tfork() test hanging in socket_wrapper lock
Andreas Schneider
asn at samba.org
Mon Nov 19 10:54:20 UTC 2018
On Monday, 19 November 2018 03:59:35 CET Andrew Bartlett wrote:
> Today on sn-devel my autobuild hung running the tfork test:
>
> It appears to be waiting forever for a mutex lock.
Anoop and myself tried to reproduce it, but we couldn't. However helgrind
reports several errors in tfork itself.
I see a lot of:
==2163== Warning: invalid file descriptor 1024 in syscall close()
and it reports also data races.
I've run it using:
LD_PRELOAD=libuid_wrapper.so:/home/asn/workspace/projects/socket_wrapper/obj/
src/libsocket_wrapper.so UID_WRAPPER=1 SOCKET_WRAPPER_DIR=/tmp/tfork/ valgrind
-v --trace-children=yes --tool=helgrind --error-exitcode=1 --read-var-info=yes
--suppressions=/home/asn/workspace/projects/socket_wrapper/tests/helgrind.supp
./bin/smbtorture ncalrpc:localhost local.tfork
I think tfork should be fixed first.
FYI: For socket_wrapper we setup a runner which runs the threading tests with
helgrind.
https://gitlab.com/cwrap/socket_wrapper/-/jobs/120275592
I need to look into Thread Sanitizer next.
Andreas
>
> The command line was:
>
> /memdisk/abartlet/a/b1628673/samba-purepy3-none-env/bin/smbtorture
> --configfile=/memdisk/abartlet/a/b1628673/samba-purepy3-none-
> env/bin/ab/client/client.conf
> --option=fss:sequence timeout=1
> --maximum-runtime=1200
> --basedir=/memdisk/abartlet/a/b1628673/samba-purepy3-none-
> env/bin/ab/tmp
> --format=subunit
> --option=torture:progress=no
> --target=samba4ncalrpc:localhost local.tfork
>
> pstree shows:
>
> ├─smbtorture,3849367,abartlet --
> configfile=/memdisk/abartlet/a/b1628673/samba-purepy3-none-
> env/bin/ab/client/client.conf--option=fss:seque
> ├─smbtorture,3849391,abartlet --
> configfile=/memdisk/abartlet/a/b1628673/samba-purepy3-none-
> env/bin/ab/client/client.conf--option=fss:seque
> ├─smbtorture,3849435,abartlet --
> configfile=/memdisk/abartlet/a/b1628673/samba-purepy3-none-
> env/bin/ab/client/client.conf--option=fss:seque
>
>
>
> gdb -p 3849435
>
> (gdb) bt full
> #0 __lll_lock_wait () at
> ../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
> No locals.
> #1 0x000014a811302649 in _L_lock_909 () from /lib/x86_64-linux-
> gnu/libpthread.so.0
> No symbol table info available.
> #2 0x000014a811302470 in __GI___pthread_mutex_lock
> (mutex=0x14a8130a6120 <socket_reset_mutex>) at
> ../nptl/pthread_mutex_lock.c:79
> __PRETTY_FUNCTION__ = "__pthread_mutex_lock"
> type = 4294966784
> #3 0x000014a813097275 in swrap_mutex_lock (mutex=0x14a8130a6120
> <socket_reset_mutex>) at
> ../third_party/socket_wrapper/socket_wrapper.c:690
> ret = 0
> __func__ = "swrap_mutex_lock"
> #4 0x000014a8130a13f1 in swrap_close (fd=14) at
> ../third_party/socket_wrapper/socket_wrapper.c:5983
> si = 0x0
> si_index = 0
> ret = 0
> #5 0x000014a8130a15b1 in close (fd=14) at
> ../third_party/socket_wrapper/socket_wrapper.c:6033
> No locals.
> #6 0x000014a8129d0797 in tfork_start_waiter_and_worker
> (state=0x14a80ac7a010, _event_fd=0x55aee1d365d0,
> _status_fd=0x55aee1d365d4) at ../lib/util/tfork.c:543
> p = {95, 96}
> status_sp_caller_fd = 14
> status_sp_waiter_fd = 61
> event_pipe_caller_fd = 67
> event_pipe_waiter_fd = 78
> ready_pipe_caller_fd = 96
> ready_pipe_worker_fd = 95
> nwritten = 22712065256662
> nread = 0
> pid = 0
> status = 0
> fd = 0
> c = 0 '\000'
> ret = 0
> #7 0x000014a8129d0c36 in tfork_create () at ../lib/util/tfork.c:746
> state = 0x14a80ac7a010
> t = 0x55aee1d365d0
> pid = 5288
> saved_errno = 5288
> ret = 0
> __PRETTY_FUNCTION__ = "tfork_create"
> #8 0x000055aedf751d07 in tfork_thread (p=0x0) at
> ../lib/util/tests/tfork.c:431
> t = 0x0
> status = 0
> child = 0
> tid = 22711974225664
> result = 0x0
> up = {15, 17}
> nread = 0
> ret = 0
> #9 0x000014a81308d42b in uwrap_pthread_create_start
> (_a=0x55aee1caf710) at ../third_party/uid_wrapper/uid_wrapper.c:719
> a = 0x0
> start_routine = 0x55aedf751cbe <tfork_thread>
> arg = 0x0
> id = 0x55aee1d35ff0
> #10 0x000014a811300184 in start_thread (arg=0x14a80b27e700) at
> pthread_create.c:312
> __res = <optimized out>
> pd = 0x14a80b27e700
> now = <optimized out>
> unwind_buf = {cancel_jmp_buf = {{jmp_buf = {22711974225664,
> -3660055873340739015, 0, 0, 22711974226368, 22711974225664,
> -1989244285778562503, -1989187013946150343},
> mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0},
> data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
> not_first_call = <optimized out>
> pagesize_m1 = <optimized out>
> sp = <optimized out>
> ---Type <return> to continue, or q <return> to quit---
> freesize = <optimized out>
> __PRETTY_FUNCTION__ = "start_thread"
> #11 0x000014a8109cb03d in clone () at
> ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
> No locals.
>
> (gdb)
> gdb -p 3849367
>
> 0x000014a8109bc37d in read () at ../sysdeps/unix/syscall-template.S:81
> 81 ../sysdeps/unix/syscall-template.S: No such file or directory.
> (gdb) bt full
> #0 0x000014a8109bc37d in read () at ../sysdeps/unix/syscall-template.S:81
> No locals.
> #1 0x000014a813097d01 in libc_read (fd=1, buf=0x14a80b67fdb1, count=1) at
> ../third_party/socket_wrapper/socket_wrapper.c:997 No locals.
> #2 0x000014a81309febf in swrap_read (s=1, buf=0x14a80b67fdb1, len=1) at
> ../third_party/socket_wrapper/socket_wrapper.c:5463 si = 0x0
> msg = {msg_name = 0xffffeb57f4980351, msg_namelen = 2052, msg_iov =
> 0x201, msg_iovlen = 22711787061809, msg_control = 0x14a80b67fcb0,
> msg_controllen = 64, msg_flags = 0}
> tmp = {iov_base = 0x0, iov_len = 22712068694016}
> saddr = {sa_socklen = 128, sa = {s = {sa_family = 0, sa_data =
> '\000' <repeats 13 times>}, in = {sin_family = 0, sin_port = 0, sin_addr =
> {s_addr = 0}, sin_zero = "\000\000\000\000\000\000\000"}, in6 =
> {sin6_family = 0, sin6_port = 0, sin6_flowinfo = 0, sin6_addr = {__in6_u =
> { __u6_addr8 = '\000' <repeats 15 times>, __u6_addr16 = {0, 0, 0, 0, 0, 0,
> 0, 0}, __u6_addr32 = {0, 0, 0, 0}}}, sin6_scope_id = 0}, un = {sun_family =
> 0, sun_path = '\000' <repeats 107 times>}, ss = {ss_family = 0, __ss_align
> = 0, __ss_padding = '\000' <repeats 111 times>}}} ret = 0
> tret = 0
> #3 0x000014a8130a009d in read (s=1, buf=0x14a80b67fdb1, len=1) at
> ../third_party/socket_wrapper/socket_wrapper.c:5503 No locals.
> #4 0x000014a811735cbd in sys_read (fd=1, buf=0x14a80b67fdb1, count=1) at
> ../lib/util/sys_rw.c:36 ret = 1
> #5 0x000014a8129d0a61 in tfork_start_waiter_and_worker (state=0x0,
> _event_fd=0x55aee1d361c0, _status_fd=0x55aee1d361c4) at
> ../lib/util/tfork.c:667 p = {15, 16}
> status_sp_caller_fd = 9
> status_sp_waiter_fd = 1
> event_pipe_caller_fd = 11
> event_pipe_waiter_fd = 0
> ready_pipe_caller_fd = 16
> ready_pipe_worker_fd = 15
> nwritten = 1
> nread = 0
> pid = 3849384
> status = 0
> fd = 1
> c = 0 '\000'
> ret = 3849384
> #6 0x000014a8129d0c36 in tfork_create () at ../lib/util/tfork.c:746
> state = 0x14a811720010
> t = 0x55aee1d361c0
> pid = 5288
> saved_errno = 5288
> ret = 0
> __PRETTY_FUNCTION__ = "tfork_create"
> #7 0x000055aedf751d07 in tfork_thread (p=0x0) at
> ../lib/util/tests/tfork.c:431 t = 0x0
> status = 0
> child = 0
> tid = 22711978428160
> result = 0x0
> up = {6, 8}
> nread = 0
> ret = 0
> #8 0x000014a81308d42b in uwrap_pthread_create_start (_a=0x55aee1caf710) at
> ../third_party/uid_wrapper/uid_wrapper.c:719 a = 0x0
> start_routine = 0x55aedf751cbe <tfork_thread>
> arg = 0x0
> id = 0x55aee1d35c10
> #9 0x000014a811300184 in start_thread (arg=0x14a80b680700) at
> pthread_create.c:312 __res = <optimized out>
> pd = 0x14a80b680700
> now = <optimized out>
> unwind_buf = {cancel_jmp_buf = {{jmp_buf = {22711978428160,
> -3660055873340739015, 0, 0, 22711978428864, 22711978428160,
> -1989244834460634567, -1989187013946150343}, mask_was_saved = 0}}, priv =
> {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype
> = 0}}} ---Type <return> to continue, or q <return> to quit---
> not_first_call = <optimized out>
> pagesize_m1 = <optimized out>
> sp = <optimized out>
> freesize = <optimized out>
> __PRETTY_FUNCTION__ = "start_thread"
> #10 0x000014a8109cb03d in clone () at
> ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
>
> gdb -p 3849391
> (gdb) bt
> #0 0x000014a8109bc37d in read () at ../sysdeps/unix/syscall-template.S:81
> #1 0x000014a813097d01 in libc_read (fd=1, buf=0x14a80b47edb1, count=1) at
> ../third_party/socket_wrapper/socket_wrapper.c:997 #2 0x000014a81309febf
> in swrap_read (s=1, buf=0x14a80b47edb1, len=1) at
> ../third_party/socket_wrapper/socket_wrapper.c:5463 #3 0x000014a8130a009d
> in read (s=1, buf=0x14a80b47edb1, len=1) at
> ../third_party/socket_wrapper/socket_wrapper.c:5503 #4 0x000014a811735cbd
> in sys_read (fd=1, buf=0x14a80b47edb1, count=1) at ../lib/util/sys_rw.c:36
> #5 0x000014a8129d0a61 in tfork_start_waiter_and_worker (state=0x0,
> _event_fd=0x55aee1d36450, _status_fd=0x55aee1d36454) at
> ../lib/util/tfork.c:667 #6 0x000014a8129d0c36 in tfork_create () at
> ../lib/util/tfork.c:746 #7 0x000055aedf751d07 in tfork_thread (p=0x0) at
> ../lib/util/tests/tfork.c:431 #8 0x000014a81308d42b in
> uwrap_pthread_create_start (_a=0x55aee1d35e10) at
> ../third_party/uid_wrapper/uid_wrapper.c:719 #9 0x000014a811300184 in
> start_thread (arg=0x14a80b47f700) at pthread_create.c:312 #10
> 0x000014a8109cb03d in clone () at
> ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
>
> In short, the rest of the tfork() test hangs because this lock is stopping
> it proceeding.
>
> Can you look into this?
>
> Andrew Bartlett
--
Andreas Schneider asn at samba.org
Samba Team www.samba.org
GPG-ID: 8DFF53E18F2ABC8D8F3C92237EE0FC4DCC014E3D
More information about the samba-technical
mailing list