tfork() test hanging in socket_wrapper lock

Andreas Schneider asn at samba.org
Mon Nov 19 10:54:20 UTC 2018


On Monday, 19 November 2018 03:59:35 CET Andrew Bartlett wrote:
> Today on sn-devel my autobuild hung running the tfork test:
> 
> It appears to be waiting forever for a mutex lock.

Anoop and myself tried to reproduce it, but we couldn't. However helgrind 
reports several errors in tfork itself.

I see a lot of:

==2163== Warning: invalid file descriptor 1024 in syscall close()

and it reports also data races.

I've run it using:

LD_PRELOAD=libuid_wrapper.so:/home/asn/workspace/projects/socket_wrapper/obj/
src/libsocket_wrapper.so UID_WRAPPER=1 SOCKET_WRAPPER_DIR=/tmp/tfork/ valgrind 
-v --trace-children=yes --tool=helgrind --error-exitcode=1 --read-var-info=yes 
--suppressions=/home/asn/workspace/projects/socket_wrapper/tests/helgrind.supp 
./bin/smbtorture ncalrpc:localhost local.tfork

I think tfork should be fixed first.


FYI: For socket_wrapper we setup a runner which runs the threading tests with 
helgrind.

https://gitlab.com/cwrap/socket_wrapper/-/jobs/120275592


I need to look into Thread Sanitizer next.


	Andreas

> 
> The command line was:
> 
> /memdisk/abartlet/a/b1628673/samba-purepy3-none-env/bin/smbtorture 
> --configfile=/memdisk/abartlet/a/b1628673/samba-purepy3-none-
> env/bin/ab/client/client.conf 
> --option=fss:sequence timeout=1
> --maximum-runtime=1200
> --basedir=/memdisk/abartlet/a/b1628673/samba-purepy3-none-
> env/bin/ab/tmp
> --format=subunit
> --option=torture:progress=no
> --target=samba4ncalrpc:localhost local.tfork
> 
> pstree shows:
> 
>   ├─smbtorture,3849367,abartlet --
> configfile=/memdisk/abartlet/a/b1628673/samba-purepy3-none-
> env/bin/ab/client/client.conf--option=fss:seque
>   ├─smbtorture,3849391,abartlet --
> configfile=/memdisk/abartlet/a/b1628673/samba-purepy3-none-
> env/bin/ab/client/client.conf--option=fss:seque
>   ├─smbtorture,3849435,abartlet --
> configfile=/memdisk/abartlet/a/b1628673/samba-purepy3-none-
> env/bin/ab/client/client.conf--option=fss:seque
> 
> 
> 
> gdb -p 3849435
> 
> (gdb) bt full
> #0  __lll_lock_wait () at
> ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
> No locals.
> #1  0x000014a811302649 in _L_lock_909 () from /lib/x86_64-linux-
> gnu/libpthread.so.0
> No symbol table info available.
> #2  0x000014a811302470 in __GI___pthread_mutex_lock
> (mutex=0x14a8130a6120 <socket_reset_mutex>) at
> ../nptl/pthread_mutex_lock.c:79
>         __PRETTY_FUNCTION__ = "__pthread_mutex_lock"
>         type = 4294966784
> #3  0x000014a813097275 in swrap_mutex_lock (mutex=0x14a8130a6120
> <socket_reset_mutex>) at
> ../third_party/socket_wrapper/socket_wrapper.c:690
>         ret = 0
>         __func__ = "swrap_mutex_lock"
> #4  0x000014a8130a13f1 in swrap_close (fd=14) at
> ../third_party/socket_wrapper/socket_wrapper.c:5983
>         si = 0x0
>         si_index = 0
>         ret = 0
> #5  0x000014a8130a15b1 in close (fd=14) at
> ../third_party/socket_wrapper/socket_wrapper.c:6033
> No locals.
> #6  0x000014a8129d0797 in tfork_start_waiter_and_worker
> (state=0x14a80ac7a010, _event_fd=0x55aee1d365d0,
> _status_fd=0x55aee1d365d4) at ../lib/util/tfork.c:543
>         p = {95, 96}
>         status_sp_caller_fd = 14
>         status_sp_waiter_fd = 61
>         event_pipe_caller_fd = 67
>         event_pipe_waiter_fd = 78
>         ready_pipe_caller_fd = 96
>         ready_pipe_worker_fd = 95
>         nwritten = 22712065256662
>         nread = 0
>         pid = 0
>         status = 0
>         fd = 0
>         c = 0 '\000'
>         ret = 0
> #7  0x000014a8129d0c36 in tfork_create () at ../lib/util/tfork.c:746
>         state = 0x14a80ac7a010
>         t = 0x55aee1d365d0
>         pid = 5288
>         saved_errno = 5288
>         ret = 0
>         __PRETTY_FUNCTION__ = "tfork_create"
> #8  0x000055aedf751d07 in tfork_thread (p=0x0) at
> ../lib/util/tests/tfork.c:431
>         t = 0x0
>         status = 0
>         child = 0
>         tid = 22711974225664
>         result = 0x0
>         up = {15, 17}
>         nread = 0
>         ret = 0
> #9  0x000014a81308d42b in uwrap_pthread_create_start
> (_a=0x55aee1caf710) at ../third_party/uid_wrapper/uid_wrapper.c:719
>         a = 0x0
>         start_routine = 0x55aedf751cbe <tfork_thread>
>         arg = 0x0
>         id = 0x55aee1d35ff0
> #10 0x000014a811300184 in start_thread (arg=0x14a80b27e700) at
> pthread_create.c:312
>         __res = <optimized out>
>         pd = 0x14a80b27e700
>         now = <optimized out>
>         unwind_buf = {cancel_jmp_buf = {{jmp_buf = {22711974225664,
> -3660055873340739015, 0, 0, 22711974226368, 22711974225664,
> -1989244285778562503, -1989187013946150343}, 
>               mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0},
> data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
>         not_first_call = <optimized out>
>         pagesize_m1 = <optimized out>
>         sp = <optimized out>
> ---Type <return> to continue, or q <return> to quit---
>         freesize = <optimized out>
>         __PRETTY_FUNCTION__ = "start_thread"
> #11 0x000014a8109cb03d in clone () at
> ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
> No locals.
> 
> (gdb) 
> gdb -p 3849367
> 
> 0x000014a8109bc37d in read () at ../sysdeps/unix/syscall-template.S:81
> 81	../sysdeps/unix/syscall-template.S: No such file or directory.
> (gdb) bt full
> #0  0x000014a8109bc37d in read () at ../sysdeps/unix/syscall-template.S:81
> No locals.
> #1  0x000014a813097d01 in libc_read (fd=1, buf=0x14a80b67fdb1, count=1) at
> ../third_party/socket_wrapper/socket_wrapper.c:997 No locals.
> #2  0x000014a81309febf in swrap_read (s=1, buf=0x14a80b67fdb1, len=1) at
> ../third_party/socket_wrapper/socket_wrapper.c:5463 si = 0x0
>         msg = {msg_name = 0xffffeb57f4980351, msg_namelen = 2052, msg_iov =
> 0x201, msg_iovlen = 22711787061809, msg_control = 0x14a80b67fcb0,
> msg_controllen = 64, msg_flags = 0}
>         tmp = {iov_base = 0x0, iov_len = 22712068694016}
>         saddr = {sa_socklen = 128, sa = {s = {sa_family = 0, sa_data =
> '\000' <repeats 13 times>}, in = {sin_family = 0, sin_port = 0, sin_addr =
> {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, in6 =
> {sin6_family = 0, sin6_port = 0, sin6_flowinfo = 0, sin6_addr = {__in6_u =
> { __u6_addr8 = '\000' <repeats 15 times>, __u6_addr16 = {0, 0, 0, 0, 0, 0,
> 0, 0}, __u6_addr32 = {0, 0, 0, 0}}}, sin6_scope_id = 0}, un = {sun_family =
> 0, sun_path = '\000' <repeats 107 times>}, ss = {ss_family = 0, __ss_align
> = 0, __ss_padding = '\000' <repeats 111 times>}}} ret = 0
>         tret = 0
> #3  0x000014a8130a009d in read (s=1, buf=0x14a80b67fdb1, len=1) at
> ../third_party/socket_wrapper/socket_wrapper.c:5503 No locals.
> #4  0x000014a811735cbd in sys_read (fd=1, buf=0x14a80b67fdb1, count=1) at
> ../lib/util/sys_rw.c:36 ret = 1
> #5  0x000014a8129d0a61 in tfork_start_waiter_and_worker (state=0x0,
> _event_fd=0x55aee1d361c0, _status_fd=0x55aee1d361c4) at
> ../lib/util/tfork.c:667 p = {15, 16}
>         status_sp_caller_fd = 9
>         status_sp_waiter_fd = 1
>         event_pipe_caller_fd = 11
>         event_pipe_waiter_fd = 0
>         ready_pipe_caller_fd = 16
>         ready_pipe_worker_fd = 15
>         nwritten = 1
>         nread = 0
>         pid = 3849384
>         status = 0
>         fd = 1
>         c = 0 '\000'
>         ret = 3849384
> #6  0x000014a8129d0c36 in tfork_create () at ../lib/util/tfork.c:746
>         state = 0x14a811720010
>         t = 0x55aee1d361c0
>         pid = 5288
>         saved_errno = 5288
>         ret = 0
>         __PRETTY_FUNCTION__ = "tfork_create"
> #7  0x000055aedf751d07 in tfork_thread (p=0x0) at
> ../lib/util/tests/tfork.c:431 t = 0x0
>         status = 0
>         child = 0
>         tid = 22711978428160
>         result = 0x0
>         up = {6, 8}
>         nread = 0
>         ret = 0
> #8  0x000014a81308d42b in uwrap_pthread_create_start (_a=0x55aee1caf710) at
> ../third_party/uid_wrapper/uid_wrapper.c:719 a = 0x0
>         start_routine = 0x55aedf751cbe <tfork_thread>
>         arg = 0x0
>         id = 0x55aee1d35c10
> #9  0x000014a811300184 in start_thread (arg=0x14a80b680700) at
> pthread_create.c:312 __res = <optimized out>
>         pd = 0x14a80b680700
>         now = <optimized out>
>         unwind_buf = {cancel_jmp_buf = {{jmp_buf = {22711978428160,
> -3660055873340739015, 0, 0, 22711978428864, 22711978428160,
> -1989244834460634567, -1989187013946150343}, mask_was_saved = 0}}, priv =
> {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype
> = 0}}} ---Type <return> to continue, or q <return> to quit---
>         not_first_call = <optimized out>
>         pagesize_m1 = <optimized out>
>         sp = <optimized out>
>         freesize = <optimized out>
>         __PRETTY_FUNCTION__ = "start_thread"
> #10 0x000014a8109cb03d in clone () at
> ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
> 
> gdb -p 3849391
> (gdb) bt
> #0  0x000014a8109bc37d in read () at ../sysdeps/unix/syscall-template.S:81
> #1  0x000014a813097d01 in libc_read (fd=1, buf=0x14a80b47edb1, count=1) at
> ../third_party/socket_wrapper/socket_wrapper.c:997 #2  0x000014a81309febf
> in swrap_read (s=1, buf=0x14a80b47edb1, len=1) at
> ../third_party/socket_wrapper/socket_wrapper.c:5463 #3  0x000014a8130a009d
> in read (s=1, buf=0x14a80b47edb1, len=1) at
> ../third_party/socket_wrapper/socket_wrapper.c:5503 #4  0x000014a811735cbd
> in sys_read (fd=1, buf=0x14a80b47edb1, count=1) at ../lib/util/sys_rw.c:36
> #5  0x000014a8129d0a61 in tfork_start_waiter_and_worker (state=0x0,
> _event_fd=0x55aee1d36450, _status_fd=0x55aee1d36454) at
> ../lib/util/tfork.c:667 #6  0x000014a8129d0c36 in tfork_create () at
> ../lib/util/tfork.c:746 #7  0x000055aedf751d07 in tfork_thread (p=0x0) at
> ../lib/util/tests/tfork.c:431 #8  0x000014a81308d42b in
> uwrap_pthread_create_start (_a=0x55aee1d35e10) at
> ../third_party/uid_wrapper/uid_wrapper.c:719 #9  0x000014a811300184 in
> start_thread (arg=0x14a80b47f700) at pthread_create.c:312 #10
> 0x000014a8109cb03d in clone () at
> ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
> 
> In short, the rest of the tfork() test hangs because this lock is stopping
> it proceeding.
> 
> Can you look into this?
> 
> Andrew Bartlett


-- 
Andreas Schneider                      asn at samba.org
Samba Team                             www.samba.org
GPG-ID:     8DFF53E18F2ABC8D8F3C92237EE0FC4DCC014E3D





More information about the samba-technical mailing list