From 16b227871e530dbaabc8717befd42ab79bd29c19 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 9 Nov 2017 12:48:15 -0800 Subject: [PATCH 1/2] s3: smbd: kernel oplocks. Replace retry_open() with setup_kernel_oplock_poll_open(). If a O_NONBLOCK open fails with EWOULDBLOCK, this code changes smbd to do a retry open every second, until either the timeout or we get a successful open. If we're opening a file that has a kernel lease set by a non-smbd process, this is the best we can do. Prior to this, smbd would block on the second open on such a leased file (not using O_NONBLOCK) which freezes active clients. Regression test to follow. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13121 Signed-off-by: Jeremy Allison --- source3/smbd/open.c | 96 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 28 deletions(-) diff --git a/source3/smbd/open.c b/source3/smbd/open.c index 89a267b0634..8c52f4bba56 100644 --- a/source3/smbd/open.c +++ b/source3/smbd/open.c @@ -2410,19 +2410,40 @@ static void defer_open_done(struct tevent_req *req) } /** - * Reschedule an open for immediate execution + * Actually attempt the kernel oplock polling open. + */ + +static void kernel_oplock_poll_open_timer(struct tevent_context *ev, + struct tevent_timer *te, + struct timeval current_time, + void *private_data) +{ + bool ok; + struct smb_request *req = (struct smb_request *)private_data; + + ok = schedule_deferred_open_message_smb(req->xconn, req->mid); + if (!ok) { + exit_server("schedule_deferred_open_message_smb failed"); + } + DBG_DEBUG("kernel_oplock_poll_open_timer fired. Retying open !\n"); +} + +/** + * Reschedule an open for 1 second from now, if not timed out. **/ -static void retry_open(struct timeval request_time, +static void setup_kernel_oplock_poll_open(struct timeval request_time, struct smb_request *req, struct file_id id) { - struct deferred_open_record *open_rec = NULL; + bool ok; + struct deferred_open_record *open_rec = NULL; + /* Maximum wait time. */ + struct timeval timeout = timeval_set(OPLOCK_BREAK_TIMEOUT*2, 0); - DBG_DEBUG("request time [%s] mid [%" PRIu64 "] file_id [%s]\n", - timeval_string(talloc_tos(), &request_time, false), - req->mid, - file_id_string_tos(&id)); + if (request_timed_out(request_time, timeout)) { + return; + } open_rec = deferred_open_record_create(false, false, id); if (open_rec == NULL) { @@ -2431,17 +2452,30 @@ static void retry_open(struct timeval request_time, ok = push_deferred_open_message_smb(req, request_time, - timeval_set(0, 0), + timeout, id, open_rec); if (!ok) { exit_server("push_deferred_open_message_smb failed"); } - ok = schedule_deferred_open_message_smb(req->xconn, req->mid); - if (!ok) { - exit_server("schedule_deferred_open_message_smb failed"); + /* + * As this timer event is owned by req, it will + * disappear if req it talloc_freed. + */ + open_rec->te = tevent_add_timer(req->sconn->ev_ctx, + req, + timeval_current_ofs(1, 0), + kernel_oplock_poll_open_timer, + req); + if (open_rec->te == NULL) { + exit_server("tevent_add_timer failed"); } + + DBG_DEBUG("poll request time [%s] mid [%" PRIu64 "] file_id [%s]\n", + timeval_string(talloc_tos(), &request_time, false), + req->mid, + file_id_string_tos(&id)); } /**************************************************************************** @@ -3160,20 +3194,18 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn, flags2 &= ~(O_CREAT|O_TRUNC); } - if (first_open_attempt && lp_kernel_oplocks(SNUM(conn))) { + if (lp_kernel_oplocks(SNUM(conn))) { /* * With kernel oplocks the open breaking an oplock * blocks until the oplock holder has given up the - * oplock or closed the file. We prevent this by first + * oplock or closed the file. We prevent this by always * trying to open the file with O_NONBLOCK (see "man - * fcntl" on Linux). For the second try, triggered by - * an oplock break response, we do not need this - * anymore. + * fcntl" on Linux). * - * This is true under the assumption that only Samba - * requests kernel oplocks. Once someone else like - * NFSv4 starts to use that API, we will have to - * modify this by communicating with the NFSv4 server. + * If a process that doesn't use the smbd open files + * database or communication methods holds a kernel + * oplock we must periodically poll for available open + * using O_NONBLOCK. */ flags2 |= O_NONBLOCK; } @@ -3252,9 +3284,16 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn, lck = get_existing_share_mode_lock(talloc_tos(), fsp->file_id); if (lck == NULL) { - retry_open(request_time, req, fsp->file_id); - DEBUG(10, ("No share mode lock found after " - "EWOULDBLOCK, retrying sync\n")); + /* + * No oplock from Samba around. Set up a poll every 1 + * second to retry a non-blocking open until the time + * expires. + */ + setup_kernel_oplock_poll_open(request_time, + req, + fsp->file_id); + DBG_DEBUG("No Samba oplock around after EWOULDBLOCK. " + "Retrying with poll\n"); return NT_STATUS_SHARING_VIOLATION; } @@ -3275,14 +3314,15 @@ static NTSTATUS open_file_ntcreate(connection_struct *conn, } /* - * No oplock from Samba around. Immediately retry with - * a blocking open. + * No oplock from Samba around. Set up a poll every 1 + * second to retry a non-blocking open until the time + * expires. */ - retry_open(request_time, req, fsp->file_id); + setup_kernel_oplock_poll_open(request_time, req, fsp->file_id); TALLOC_FREE(lck); - DEBUG(10, ("No Samba oplock around after EWOULDBLOCK. " - "Retrying sync\n")); + DBG_DEBUG("No Samba oplock around after EWOULDBLOCK. " + "Retrying with poll\n"); return NT_STATUS_SHARING_VIOLATION; } -- 2.15.0.448.gf294e3d99a-goog From d5dd0c4b0dccc257d719f192af81e66c9042ea63 Mon Sep 17 00:00:00 2001 From: Jeremy Allison Date: Thu, 9 Nov 2017 09:59:23 -0800 Subject: [PATCH 2/2] s4: torture: kernel oplocks. Add smb2.kernel-oplocks.kernel_oplocks8 Test if the server blocks whilst waiting on a kernel lease held by a non-smbd process. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13121 Signed-off-by: Jeremy Allison --- source3/selftest/tests.py | 2 +- source4/torture/smb2/oplock.c | 236 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 237 insertions(+), 1 deletion(-) diff --git a/source3/selftest/tests.py b/source3/selftest/tests.py index 186b2235875..5b12355ac8e 100755 --- a/source3/selftest/tests.py +++ b/source3/selftest/tests.py @@ -508,7 +508,7 @@ for t in tests: plansmbtorture4testsuite(t, "simpleserver", '//$SERVER/dosmode -U$USERNAME%$PASSWORD') elif t == "smb2.kernel-oplocks": if have_linux_kernel_oplocks: - plansmbtorture4testsuite(t, "nt4_dc", '//$SERVER/kernel_oplocks -U$USERNAME%$PASSWORD') + plansmbtorture4testsuite(t, "nt4_dc", '//$SERVER/kernel_oplocks -U$USERNAME%$PASSWORD --option=torture:localdir=$SELFTEST_PREFIX/nt4_dc/share') elif t == "smb2.notify-inotify": if have_inotify: plansmbtorture4testsuite(t, "fileserver", '//$SERVER_IP/tmp -U$USERNAME%$PASSWORD') diff --git a/source4/torture/smb2/oplock.c b/source4/torture/smb2/oplock.c index 3290ed42d8c..1830a017e83 100644 --- a/source4/torture/smb2/oplock.c +++ b/source4/torture/smb2/oplock.c @@ -36,6 +36,8 @@ #include "torture/torture.h" #include "torture/smb2/proto.h" +#include "lib/util/sys_rw.h" + #define CHECK_RANGE(v, min, max) do { \ if ((v) < (min) || (v) > (max)) { \ torture_result(tctx, TORTURE_FAIL, "(%s): wrong value for %s " \ @@ -4790,6 +4792,239 @@ done: return ret; } +#if HAVE_KERNEL_OPLOCKS_LINUX + +#ifndef F_SETLEASE +#define F_SETLEASE 1024 +#endif + +#ifndef RT_SIGNAL_LEASE +#define RT_SIGNAL_LEASE (SIGRTMIN+1) +#endif + +#ifndef F_SETSIG +#define F_SETSIG 10 +#endif + +static int got_break; + +/* + * Signal handler. + */ + +static void got_rt_break(int sig) +{ + got_break = 1; +} + +/* + * Child process function. + */ + +static int do_child_process(int pipefd, const char *name) +{ + int ret = 0; + int fd = -1; + char c = 0; + struct sigaction act; + + /* Set up a signal handler for RT_SIGNAL_LEASE. */ + ZERO_STRUCT(act); + act.sa_handler = got_rt_break; + ret = sigaction(RT_SIGNAL_LEASE, &act, NULL); + if (ret == -1) { + return 1; + } + /* Open the passed in file and get a kernel oplock. */ + fd = open(name, O_RDWR, 0666); + if (fd == -1) { + return 2; + } + + ret = fcntl(fd, F_SETSIG, RT_SIGNAL_LEASE); + if (ret == -1) { + return 3; + } + + ret = fcntl(fd, F_SETLEASE, F_WRLCK); + if (ret == -1) { + return 4; + } + + /* Tell the parent we're ready. */ + ret = sys_write(pipefd, &c, 1); + if (ret != 1) { + return 5; + } + + /* Wait for RT_SIGNAL_LEASE. */ + ret = pause(); + if (ret != -1 || errno != EINTR) { + return 6; + } + + if (got_break != 1) { + return 7; + } + + /* Force the server to wait for 3 seconds. */ + sleep(3); + + /* Remove our lease. */ + ret = fcntl(fd, F_SETLEASE, F_UNLCK); + if (ret == -1) { + return 8; + } + + ret = close(fd); + if (ret == -1) { + return 9; + } + + /* All is well. */ + return 0; +} + +static bool wait_for_child_oplock(struct torture_context *tctx, + const char *localdir, + const char *fname) +{ + int fds[2]; + int ret; + pid_t pid; + char *name = talloc_asprintf(tctx, + "%s/%s", + localdir, + fname); + + torture_assert(tctx, name != NULL, "talloc failed"); + + ret = pipe(fds); + torture_assert(tctx, ret != -1, "pipe failed"); + + pid = fork(); + torture_assert(tctx, pid != (pid_t)-1, "fork failed"); + + if (pid != (pid_t)0) { + char c; + /* Parent. */ + TALLOC_FREE(name); + ret = sys_read(fds[0], &c, 1); + torture_assert(tctx, ret == 1, "read failed"); + return true; + } + + /* Child process. */ + ret = do_child_process(fds[1], name); + _exit(ret); + /* Notreached. */ +} +#else +static bool wait_for_child_oplock(struct torture_context *tctx, + const char *localdir, + const char *fname) +{ + return false; +} +#endif + +/* + * Deal with a non-smbd process holding a kernel oplock. + */ + +static bool test_smb2_kernel_oplocks8(struct torture_context *tctx, + struct smb2_tree *tree) +{ + const char *fname = "test_kernel_oplock8.dat"; + const char *fname1 = "tmp_test_kernel_oplock8.dat"; + NTSTATUS status; + bool ret = true; + struct smb2_create io; + struct smb2_request *req = NULL; + struct smb2_handle h1 = {{0}}; + struct smb2_handle h2 = {{0}}; + const char *localdir = torture_setting_string(tctx, "localdir", NULL); + time_t start; + time_t end; + +#ifndef HAVE_KERNEL_OPLOCKS_LINUX + torture_skip(tctx, "Need kernel oplocks for test"); +#endif + + if (localdir == NULL) { + torture_skip(tctx, "Need localdir for test"); + } + + smb2_util_unlink(tree, fname); + smb2_util_unlink(tree, fname1); + status = torture_smb2_testfile(tree, fname, &h1); + torture_assert_ntstatus_ok_goto(tctx, status, ret, done, + "Error creating testfile\n"); + smb2_util_close(tree, h1); + ZERO_STRUCT(h1); + + /* Take the oplock locally in a sub-process. */ + ret = wait_for_child_oplock(tctx, localdir, fname); + torture_assert_goto(tctx, ret = true, ret, done, + "Wait for child process failed.\n"); + + /* + * Now try and open. This should block for 3 seconds. + * while the child process is still alive. + */ + + ZERO_STRUCT(io); + io.in.desired_access = SEC_FLAG_MAXIMUM_ALLOWED; + io.in.file_attributes = FILE_ATTRIBUTE_NORMAL; + io.in.create_disposition = NTCREATEX_DISP_OPEN; + io.in.share_access = + NTCREATEX_SHARE_ACCESS_DELETE| + NTCREATEX_SHARE_ACCESS_READ| + NTCREATEX_SHARE_ACCESS_WRITE; + io.in.create_options = 0; + io.in.fname = fname; + + req = smb2_create_send(tree, &io); + torture_assert(tctx, req != NULL, "smb2_create_send"); + + /* Ensure while the open is blocked the smbd is + still serving other requests. */ + io.in.fname = fname1; + io.in.create_disposition = NTCREATEX_DISP_CREATE; + + /* Time the start -> end of the request. */ + start = time(NULL); + status = smb2_create(tree, tctx, &io); + end = time(NULL); + + /* Should succeed. */ + torture_assert_ntstatus_ok_goto(tctx, status, ret, done, + "Error opening the second file\n"); + h1 = io.out.file.handle; + + /* in less than 2 seconds. Otherwise the server blocks. */ + torture_assert(tctx, end - start < 2, "server was blocked !"); + + /* Pick up the return for the initial blocking open. */ + status = smb2_create_recv(req, tctx, &io); + + /* Which should also have succeeded. */ + torture_assert_ntstatus_ok_goto(tctx, status, ret, done, + "Error opening the file\n"); + h2 = io.out.file.handle; + +done: + if (!smb2_util_handle_empty(h1)) { + smb2_util_close(tree, h1); + } + if (!smb2_util_handle_empty(h2)) { + smb2_util_close(tree, h2); + } + smb2_util_unlink(tree, fname); + smb2_util_unlink(tree, fname1); + return ret; +} + struct torture_suite *torture_smb2_kernel_oplocks_init(TALLOC_CTX *ctx) { struct torture_suite *suite = @@ -4802,6 +5037,7 @@ struct torture_suite *torture_smb2_kernel_oplocks_init(TALLOC_CTX *ctx) torture_suite_add_1smb2_test(suite, "kernel_oplocks5", test_smb2_kernel_oplocks5); torture_suite_add_2smb2_test(suite, "kernel_oplocks6", test_smb2_kernel_oplocks6); torture_suite_add_2smb2_test(suite, "kernel_oplocks7", test_smb2_kernel_oplocks7); + torture_suite_add_1smb2_test(suite, "kernel_oplocks8", test_smb2_kernel_oplocks8); suite->description = talloc_strdup(suite, "SMB2-KERNEL-OPLOCK tests"); -- 2.15.0.448.gf294e3d99a-goog