tfork() test hanging in socket_wrapper lock

Andrew Bartlett abartlet at samba.org
Mon Nov 19 02:59:35 UTC 2018


Today on sn-devel my autobuild hung running the tfork test:

It appears to be waiting forever for a mutex lock. 

The command line was:

/memdisk/abartlet/a/b1628673/samba-purepy3-none-env/bin/smbtorture 
--configfile=/memdisk/abartlet/a/b1628673/samba-purepy3-none-
env/bin/ab/client/client.conf 
--option=fss:sequence timeout=1
--maximum-runtime=1200
--basedir=/memdisk/abartlet/a/b1628673/samba-purepy3-none-
env/bin/ab/tmp
--format=subunit
--option=torture:progress=no
--target=samba4ncalrpc:localhost local.tfork

pstree shows:

  ├─smbtorture,3849367,abartlet --
configfile=/memdisk/abartlet/a/b1628673/samba-purepy3-none-
env/bin/ab/client/client.conf--option=fss:seque
  ├─smbtorture,3849391,abartlet --
configfile=/memdisk/abartlet/a/b1628673/samba-purepy3-none-
env/bin/ab/client/client.conf--option=fss:seque
  ├─smbtorture,3849435,abartlet --
configfile=/memdisk/abartlet/a/b1628673/samba-purepy3-none-
env/bin/ab/client/client.conf--option=fss:seque



gdb -p 3849435

(gdb) bt full
#0  __lll_lock_wait () at
../nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:135
No locals.
#1  0x000014a811302649 in _L_lock_909 () from /lib/x86_64-linux-
gnu/libpthread.so.0
No symbol table info available.
#2  0x000014a811302470 in __GI___pthread_mutex_lock
(mutex=0x14a8130a6120 <socket_reset_mutex>) at
../nptl/pthread_mutex_lock.c:79
        __PRETTY_FUNCTION__ = "__pthread_mutex_lock"
        type = 4294966784
#3  0x000014a813097275 in swrap_mutex_lock (mutex=0x14a8130a6120
<socket_reset_mutex>) at
../third_party/socket_wrapper/socket_wrapper.c:690
        ret = 0
        __func__ = "swrap_mutex_lock"
#4  0x000014a8130a13f1 in swrap_close (fd=14) at
../third_party/socket_wrapper/socket_wrapper.c:5983
        si = 0x0
        si_index = 0
        ret = 0
#5  0x000014a8130a15b1 in close (fd=14) at
../third_party/socket_wrapper/socket_wrapper.c:6033
No locals.
#6  0x000014a8129d0797 in tfork_start_waiter_and_worker
(state=0x14a80ac7a010, _event_fd=0x55aee1d365d0,
_status_fd=0x55aee1d365d4) at ../lib/util/tfork.c:543
        p = {95, 96}
        status_sp_caller_fd = 14
        status_sp_waiter_fd = 61
        event_pipe_caller_fd = 67
        event_pipe_waiter_fd = 78
        ready_pipe_caller_fd = 96
        ready_pipe_worker_fd = 95
        nwritten = 22712065256662
        nread = 0
        pid = 0
        status = 0
        fd = 0
        c = 0 '\000'
        ret = 0
#7  0x000014a8129d0c36 in tfork_create () at ../lib/util/tfork.c:746
        state = 0x14a80ac7a010
        t = 0x55aee1d365d0
        pid = 5288
        saved_errno = 5288
        ret = 0
        __PRETTY_FUNCTION__ = "tfork_create"
#8  0x000055aedf751d07 in tfork_thread (p=0x0) at
../lib/util/tests/tfork.c:431
        t = 0x0
        status = 0
        child = 0
        tid = 22711974225664
        result = 0x0
        up = {15, 17}
        nread = 0
        ret = 0
#9  0x000014a81308d42b in uwrap_pthread_create_start
(_a=0x55aee1caf710) at ../third_party/uid_wrapper/uid_wrapper.c:719
        a = 0x0
        start_routine = 0x55aedf751cbe <tfork_thread>
        arg = 0x0
        id = 0x55aee1d35ff0
#10 0x000014a811300184 in start_thread (arg=0x14a80b27e700) at
pthread_create.c:312
        __res = <optimized out>
        pd = 0x14a80b27e700
        now = <optimized out>
        unwind_buf = {cancel_jmp_buf = {{jmp_buf = {22711974225664,
-3660055873340739015, 0, 0, 22711974226368, 22711974225664,
-1989244285778562503, -1989187013946150343}, 
              mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0},
data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
        not_first_call = <optimized out>
        pagesize_m1 = <optimized out>
        sp = <optimized out>
---Type <return> to continue, or q <return> to quit---
        freesize = <optimized out>
        __PRETTY_FUNCTION__ = "start_thread"
#11 0x000014a8109cb03d in clone () at
../sysdeps/unix/sysv/linux/x86_64/clone.S:111
No locals.

(gdb) 
gdb -p 3849367

0x000014a8109bc37d in read () at ../sysdeps/unix/syscall-template.S:81
81	../sysdeps/unix/syscall-template.S: No such file or directory.
(gdb) bt full
#0  0x000014a8109bc37d in read () at ../sysdeps/unix/syscall-template.S:81
No locals.
#1  0x000014a813097d01 in libc_read (fd=1, buf=0x14a80b67fdb1, count=1) at ../third_party/socket_wrapper/socket_wrapper.c:997
No locals.
#2  0x000014a81309febf in swrap_read (s=1, buf=0x14a80b67fdb1, len=1) at ../third_party/socket_wrapper/socket_wrapper.c:5463
        si = 0x0
        msg = {msg_name = 0xffffeb57f4980351, msg_namelen = 2052, msg_iov = 0x201, msg_iovlen = 22711787061809, msg_control = 0x14a80b67fcb0, msg_controllen = 64, 
          msg_flags = 0}
        tmp = {iov_base = 0x0, iov_len = 22712068694016}
        saddr = {sa_socklen = 128, sa = {s = {sa_family = 0, sa_data = '\000' <repeats 13 times>}, in = {sin_family = 0, sin_port = 0, sin_addr = {s_addr = 0}, 
              sin_zero = "\000\000\000\000\000\000\000"}, in6 = {sin6_family = 0, sin6_port = 0, sin6_flowinfo = 0, sin6_addr = {__in6_u = {
                  __u6_addr8 = '\000' <repeats 15 times>, __u6_addr16 = {0, 0, 0, 0, 0, 0, 0, 0}, __u6_addr32 = {0, 0, 0, 0}}}, sin6_scope_id = 0}, un = {sun_family = 0, 
              sun_path = '\000' <repeats 107 times>}, ss = {ss_family = 0, __ss_align = 0, __ss_padding = '\000' <repeats 111 times>}}}
        ret = 0
        tret = 0
#3  0x000014a8130a009d in read (s=1, buf=0x14a80b67fdb1, len=1) at ../third_party/socket_wrapper/socket_wrapper.c:5503
No locals.
#4  0x000014a811735cbd in sys_read (fd=1, buf=0x14a80b67fdb1, count=1) at ../lib/util/sys_rw.c:36
        ret = 1
#5  0x000014a8129d0a61 in tfork_start_waiter_and_worker (state=0x0, _event_fd=0x55aee1d361c0, _status_fd=0x55aee1d361c4) at ../lib/util/tfork.c:667
        p = {15, 16}
        status_sp_caller_fd = 9
        status_sp_waiter_fd = 1
        event_pipe_caller_fd = 11
        event_pipe_waiter_fd = 0
        ready_pipe_caller_fd = 16
        ready_pipe_worker_fd = 15
        nwritten = 1
        nread = 0
        pid = 3849384
        status = 0
        fd = 1
        c = 0 '\000'
        ret = 3849384
#6  0x000014a8129d0c36 in tfork_create () at ../lib/util/tfork.c:746
        state = 0x14a811720010
        t = 0x55aee1d361c0
        pid = 5288
        saved_errno = 5288
        ret = 0
        __PRETTY_FUNCTION__ = "tfork_create"
#7  0x000055aedf751d07 in tfork_thread (p=0x0) at ../lib/util/tests/tfork.c:431
        t = 0x0
        status = 0
        child = 0
        tid = 22711978428160
        result = 0x0
        up = {6, 8}
        nread = 0
        ret = 0
#8  0x000014a81308d42b in uwrap_pthread_create_start (_a=0x55aee1caf710) at ../third_party/uid_wrapper/uid_wrapper.c:719
        a = 0x0
        start_routine = 0x55aedf751cbe <tfork_thread>
        arg = 0x0
        id = 0x55aee1d35c10
#9  0x000014a811300184 in start_thread (arg=0x14a80b680700) at pthread_create.c:312
        __res = <optimized out>
        pd = 0x14a80b680700
        now = <optimized out>
        unwind_buf = {cancel_jmp_buf = {{jmp_buf = {22711978428160, -3660055873340739015, 0, 0, 22711978428864, 22711978428160, -1989244834460634567, -1989187013946150343}, 
              mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
---Type <return> to continue, or q <return> to quit---
        not_first_call = <optimized out>
        pagesize_m1 = <optimized out>
        sp = <optimized out>
        freesize = <optimized out>
        __PRETTY_FUNCTION__ = "start_thread"
#10 0x000014a8109cb03d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111

gdb -p 3849391
(gdb) bt
#0  0x000014a8109bc37d in read () at ../sysdeps/unix/syscall-template.S:81
#1  0x000014a813097d01 in libc_read (fd=1, buf=0x14a80b47edb1, count=1) at ../third_party/socket_wrapper/socket_wrapper.c:997
#2  0x000014a81309febf in swrap_read (s=1, buf=0x14a80b47edb1, len=1) at ../third_party/socket_wrapper/socket_wrapper.c:5463
#3  0x000014a8130a009d in read (s=1, buf=0x14a80b47edb1, len=1) at ../third_party/socket_wrapper/socket_wrapper.c:5503
#4  0x000014a811735cbd in sys_read (fd=1, buf=0x14a80b47edb1, count=1) at ../lib/util/sys_rw.c:36
#5  0x000014a8129d0a61 in tfork_start_waiter_and_worker (state=0x0, _event_fd=0x55aee1d36450, _status_fd=0x55aee1d36454) at ../lib/util/tfork.c:667
#6  0x000014a8129d0c36 in tfork_create () at ../lib/util/tfork.c:746
#7  0x000055aedf751d07 in tfork_thread (p=0x0) at ../lib/util/tests/tfork.c:431
#8  0x000014a81308d42b in uwrap_pthread_create_start (_a=0x55aee1d35e10) at ../third_party/uid_wrapper/uid_wrapper.c:719
#9  0x000014a811300184 in start_thread (arg=0x14a80b47f700) at pthread_create.c:312
#10 0x000014a8109cb03d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111

In short, the rest of the tfork() test hangs because this lock is stopping it proceeding. 

Can you look into this?

Andrew Bartlett

-- 
Andrew Bartlett
https://samba.org/~abartlet/
Authentication Developer, Samba Team         https://samba.org
Samba Development and Support, Catalyst IT   
https://catalyst.net.nz/services/samba







More information about the samba-technical mailing list