inetd netbios-ns looping: problem found
David Collier-Brown
davecb at canada.sun.com
Mon Jun 4 20:41:40 GMT 2001
Ralf K. Wiegand was kind enough to run truss on a
copy of nmbd that was looping in inetd and getting
disconnected. This is from Solaris 7
The problem is in locking /var/opt/samba/locks/nmbd.pid:
the fcntl lock request reports it's already locked.
See below...
I wrote:
> If you need to run it from inetd, then run
> truss -d -f -l -p <process-id>
> on the failing process and email me the result, in hopes
> I can identify the problem for the team.
He replied:
| Here is the truss:
Pid /thread time system call result
13848/1: 172.9363 close(11) = 0
13848/1: 172.9365
close(10)
= 0
13848/1: 172.9367
close(9)
= 0
13848/1: 172.9368
close(8)
= 0
13848/1: 172.9370
close(7)
Err#9 EBADF
13848/1: 172.9371
close(5)
= 0
13848/1: 172.9373
close(4)
= 0
13848/1: 172.9375
close(3)
= 0
13848/1: 172.9376 setrlimit(RLIMIT_NOFILE,
0xFFBEFD80)
= 0
13848/1: 172.9380 auditsys(BSM_AUDITCTL,
0x00000014)
Err#22 EINVAL
13848/1: 172.9398 execve("/opt/samba/bin/nmbd",
0x0002C454,
0xFFBEFE00) argc = 1
-- this is inetd getting readt to exec nmbd
13848/1: 172.9404 stat("/opt/samba/bin/nmbd",
0xFFBEFAF8)
= 0
13848/1: 172.9407 open("/var/ld/ld.config",
O_RDONLY)
Err#2 ENOENT
13848/1: 172.9411 open("/usr/lib/libsec.so.1",
O_RDONLY)
= 3
13848/1: 172.9413 fstat(3,
0xFFBEF894)
= 0
13848/1: 172.9415 mmap(0x00000000, 8192,
PROT_READ|PROT_EXEC,
MAP_PRIVATE, 3, 0) = 0xFF3A0000
13848/1: 172.9420 mmap(0x00000000, 81920,
PROT_READ|PROT_EXEC,
MAP_PRIVATE, 3, 0) = 0xFF380000
13848/1: 172.9422 mmap(0xFF392000, 1050,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 8192) =
0xFF392000
13848/1: 172.9424 munmap(0xFF384000,
57344)
= 0
13848/1: 172.9427 memcntl(0xFF380000, 3156, MC_ADVISE,
0x0003,
0, 0) = 0
13848/1: 172.9428 close(3)
= 0
-- this is solaris loading a shared library (more follow)
13848/1: 172.9431 open("/usr/lib/libgen.so.1",
O_RDONLY)
= 3
13848/1: 172.9433 fstat(3,
0xFFBEF894)
= 0
13848/1: 172.9435 mmap(0xFF3A0000, 8192,
PROT_READ|PROT_EXEC,
MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF3A0000
13848/1: 172.9437 mmap(0x00000000, 98304,
PROT_READ|PROT_EXEC,
MAP_PRIVATE, 3, 0) = 0xFF360000
13848/1: 172.9440 mmap(0xFF374000, 9847,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 16384) =
0xFF374000
13848/1: 172.9442 munmap(0xFF366000,
57344)
= 0
13848/1: 172.9444 memcntl(0xFF360000, 7976, MC_ADVISE,
0x0003,
0, 0) = 0
13848/1: 172.9447
close(3)
= 0
13848/1: 172.9449 open("/usr/lib/libsocket.so.1",
O_RDONLY)
= 3
13848/1: 172.9451 fstat(3,
0xFFBEF894)
= 0
13848/1: 172.9453 mmap(0xFF3A0000, 8192,
PROT_READ|PROT_EXEC,
MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF3A0000
13848/1: 172.9455 mmap(0x00000000, 106496,
PROT_READ|PROT_EXEC,
MAP_PRIVATE, 3, 0) = 0xFF340000
13848/1: 172.9457 mmap(0xFF356000, 11073,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 24576) =
0xFF35600013848/1: 172.9461 munmap(0xFF348000,
57344)
= 0
13848/1: 172.9463 memcntl(0xFF340000, 14492, MC_ADVISE,
0x0003,
0, 0) = 0
13848/1: 172.9465
close(3)
= 0
13848/1: 172.9467 open("/usr/lib/libnsl.so.1",
O_RDONLY)
= 3
13848/1: 172.9469 fstat(3,
0xFFBEF894)
= 0
13848/1: 172.9471 mmap(0xFF3A0000, 8192,
PROT_READ|PROT_EXEC,
MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF3A0000
13848/1: 172.9473 mmap(0x00000000, 663552,
PROT_READ|PROT_EXEC,
MAP_PRIVATE, 3, 0) = 0xFF280000
13848/1: 172.9476 mmap(0xFF310000, 39184,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 524288) =
0xFF310000
13848/1: 172.9479 open("/dev/zero",
O_RDONLY)
= 4
13848/1: 172.9483 mmap(0xFF31A000, 29136,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 4, 0) =
0xFF31A000
13848/1: 172.9484 munmap(0xFF302000,
57344)
= 0
13848/1: 172.9487 mmap(0x00000000, 8192,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 4, 0) = 0xFF330000
13848/1: 172.9491 memcntl(0xFF280000, 89132, MC_ADVISE,
0x0003,
0, 0) = 0
13848/1: 172.9493
close(3)
= 0
13848/1: 172.9494 open("/usr/lib/libdl.so.1",
O_RDONLY)
= 3
13848/1: 172.9497 fstat(3,
0xFFBEF894)
= 0
13848/1: 172.9499 mmap(0xFF3A0000, 8192,
PROT_READ|PROT_EXEC,
MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF3A0000
13848/1: 172.9501
close(3)
= 0
13848/1: 172.9503 open("/usr/lib/libc.so.1",
O_RDONLY)
= 3
13848/1: 172.9505 fstat(3,
0xFFBEF894)
= 0
13848/1: 172.9507 mmap(0x00000000, 8192,
PROT_READ|PROT_EXEC,
MAP_PRIVATE, 3, 0) = 0xFF270000
13848/1: 172.9509 mmap(0x00000000, 778240,
PROT_READ|PROT_EXEC,
MAP_PRIVATE, 3, 0) = 0xFF180000
13848/1: 172.9512 mmap(0xFF234000, 31832,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 671744) =
0xFF234000
13848/1: 172.9515 mmap(0xFF23C000, 5312,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 4, 0) =
0xFF23C000
13848/1: 172.9518 munmap(0xFF226000,
57344)
= 0
13848/1: 172.9523 memcntl(0xFF180000, 131808, MC_ADVISE,
0x0003, 0, 0) = 0
13848/1: 172.9524
close(3)
= 0
13848/1: 172.9527 open("/usr/lib/libmp.so.2",
O_RDONLY)
= 3
13848/1: 172.9529 fstat(3,
0xFFBEF894)
= 0
13848/1: 172.9531 mmap(0xFF270000, 8192,
PROT_READ|PROT_EXEC,
MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF270000
13848/1: 172.9533 mmap(0x00000000, 81920,
PROT_READ|PROT_EXEC,
MAP_PRIVATE, 3, 0) = 0xFF250000
13848/1: 172.9536 mmap(0xFF262000, 3993,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 8192) =
0xFF262000
13848/1: 172.9537 munmap(0xFF254000,
57344)
= 0
13848/1: 172.9540 memcntl(0xFF250000, 3380, MC_ADVISE,
0x0003,
0, 0) = 0
13848/1: 172.9542
close(3)
= 0
13848/1: 172.9546
open("/usr/platform/SUNW,Ultra-4/lib/libc_psr.so.1", O_RDONLY) = 3
13848/1: 172.9549 fstat(3,
0xFFBEF6FC)
= 0
13848/1: 172.9551 mmap(0xFF270000, 8192,
PROT_READ|PROT_EXEC,
MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF270000
13848/1: 172.9553 mmap(0x00000000, 16384,
PROT_READ|PROT_EXEC,
MAP_PRIVATE, 3, 0) = 0xFF170000
13848/1: 172.9556
close(3)
= 0
13848/1: 172.9593
close(4)
= 0
13848/1: 172.9594 munmap(0xFF270000,
8192)
= 0
13848/1: 172.9604
time()
= 991684364
13848/1: 172.9606
time()
= 991684364
13848/1: 172.9608
getpid()
= 13848 [193]
13848/1: 172.9611
brk(0x000A3A30)
= 0
13848/1: 172.9613
brk(0x000A5A30)
= 0
13848/1: 172.9616
time()
= 991684364
13848/1: 172.9621
open("/usr/share/lib/zoneinfo/US/Eastern",
O_RDONLY) = 3
13848/1: 172.9624 read(3, "\0\0\0\0\0\0\0\0\0\0\0\0"..,
8192)
= 1250
13848/1: 172.9626
close(3)
= 0
-- and the nmbd starts running right around here
13848/1: 172.9633
sigfillset(0xFF23C3AC)
= 0
13848/1: 172.9635 sigaction(SIGSEGV, 0xFFBEFC40,
0x00000000)
= 0
13848/1: 172.9636 sigaction(SIGBUS, 0xFFBEFCA0,
0x00000000)
= 0
13848/1: 172.9638 sigaction(SIGHUP, 0xFFBEFCA0,
0x00000000)
= 0
13848/1: 172.9640 sigaction(SIGTERM, 0xFFBEFCA0,
0x00000000)
= 0
13848/1: 172.9642 sigprocmask(SIG_BLOCK, 0xFFBEFD30,
0x00000000) = 0
13848/1: 172.9644 sigaction(SIGUSR1, 0xFFBEFCA0,
0x00000000)
= 0
13848/1: 172.9645 sigaction(SIGUSR2, 0xFFBEFCA0,
0x00000000)
= 0
13848/1: 172.9648
umask(022)
= 0
13848/1: 172.9651 open64("/var/opt/samba/log.nmb",
O_WRONLY|O_APPEND|O_CREAT, 0666) = 3
13848/1: 172.9654 llseek(3, 0,
SEEK_END)
= 14111597
13848/1: 172.9656
umask(0)
= 022
13848/1: 172.9658
time()
= 991684364
13848/1: 172.9702
getuid()
= 0 [0]
13848/1: 172.9706 write(3, " [",
1)
= 1
13848/1: 172.9708 write(3, " 2 0 0 1 / 0 6 / 0 4 1"..,
19)
= 19
13848/1: 172.9711 write(3, " , 1",
3)
= 3
13848/1: 172.9713 write(3, " ] ",
2)
= 2
13848/1: 172.9716 write(3, " n m b d / n m b d . c",
11)
= 11
13848/1: 172.9720 write(3, " : (",
2)
= 2
13848/1: 172.9722 write(3, " 7 5 7",
3)
= 3
13848/1: 172.9725 write(3, " )\n",
2)
= 2
13848/1: 172.9728
getuid()
= 0 [0]
13848/1: 172.9730 write(3, " N e t b i o s n a"..,
44)
= 44
13848/1: 172.9732
getuid()
= 0 [0]
13848/1: 172.9735 write(3, " C o p y r i g h t "..,
38)
= 38
13848/1: 172.9738 stat64("/opt/samba/lib/smb.conf",
0xFFBEEF78)
= 0
13848/1: 172.9741
brk(0x000A5A30)
= 0
13848/1: 172.9743
brk(0x000A7A30)
= 0
13848/1: 172.9746 open64("/opt/samba/lib/smb.conf",
O_RDONLY)
= 4
13848/1: 172.9749 fstat64(4,
0xFFBEF258)
= 0
13848/1: 172.9751
brk(0x000A7A30)
= 0
13848/1: 172.9752
brk(0x000A9A30)
= 0
13848/1: 172.9755 ioctl(4, TCGETA,
0xFFBEF1E4)
Err#25 ENOTTY
13848/1: 172.9757 read(4, " # S a m b a c o n f"..,
8192)
= 746
13848/1: 172.9762 read(4, 0x000A60DC,
8192)
= 0
13848/1: 172.9764 llseek(4, 0,
SEEK_CUR)
= 746
13848/1: 172.9765
close(4)
= 0
13848/1: 172.9768 stat64("",
0xFFBEF3D8)
Err#2 ENOENT
13848/1: 172.9771 stat64("/opt/samba/lib/smb.conf",
0xFFBEEF68)
= 0
13848/1: 172.9774
stat64("/opt/samba/lib/codepages/codepage.850", 0xFFBEF848) = 0
13848/1: 172.9776
open64("/opt/samba/lib/codepages/codepage.850", O_RDONLY) = 4
13848/1: 172.9779 fstat64(4,
0xFFBEF658)
= 0
13848/1: 172.9781
brk(0x000A9A30)
= 0
13848/1: 172.9782
brk(0x000ABA30)
= 0
13848/1: 172.9785 ioctl(4, TCGETA,
0xFFBEF5E4)
Err#25 ENOTTY
13848/1: 172.9786 read(4, "01\0 R03 |\0\0\085B70101"..,
8192)
= 132
13848/1: 172.9789 llseek(4, 0,
SEEK_CUR)
= 132
13848/1: 172.9791
close(4)
= 0
13848/1: 172.9793
stat64("/opt/samba/lib/codepages/unicode_map.850", 0xFFBEF7C8) = 0
13848/1: 172.9796
open64("/opt/samba/lib/codepages/unicode_map.850", O_RDONLY) = 4
13848/1: 172.9799 fstat64(4,
0xFFBEF5C0)
= 0
13848/1: 172.9800 ioctl(4, TCGETA,
0xFFBEF54C)
Err#25 ENOTTY
13848/1: 172.9802 read(4, "0180 8 5 0\0\0\0\0\0\0\0"..,
8192)
= 8192
13848/1: 172.9805
brk(0x000ABA30)
= 0
13848/1: 172.9807
brk(0x000CBA30)
= 0
13848/1: 172.9811 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 8192
13848/1: 172.9815 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 8192
13848/1: 172.9819 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 8192
13848/1: 172.9822 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 8192
13848/1: 172.9826 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 8192
13848/1: 172.9830 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 8192
13848/1: 172.9833 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 8192
13848/1: 172.9837 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 8192
13848/1: 172.9841 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 8192
13848/1: 172.9844 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 8192
13848/1: 172.9848 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 8192
13848/1: 172.9852 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 8192
13848/1: 172.9855 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 8192
13848/1: 172.9859 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 8192
13848/1: 172.9864 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 8192
13848/1: 172.9867 read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)
= 542
13848/1: 172.9883 llseek(4, 0,
SEEK_CUR)
= 131614
13848/1: 172.9885
close(4)
= 0
13848/1: 172.9888
uname(0xFFBEEEF8)
= 1
13848/1: 172.9890 stat64("",
0xFFBEF840)
Err#2 ENOENT
13848/1: 172.9892 stat64("/opt/samba/lib/smb.conf",
0xFFBEF3D0)
= 0
13848/1: 172.9895 getsockopt(0, 65535, 4104, 0xFFBEFD38,
0xFFBEFD3C, 1) = 0
13848/1: 172.9897 stat64("/var/opt/samba/locks",
0xFFBEFCA8)
= 0
13848/1: 172.9900
open64("/var/opt/samba/locks/nmbd.pid",
O_RDWR|O_NONBLOCK) = 4
13848/1: 172.9902 read(4, " 1 2 5 6 1\n\0\0\0\0\0\0"..,
19)
= 19
13848/1: 172.9905 kill(12561,
SIG#0)
= 0
-- nmbd is checking if process 12561 is still alive (it is)
13848/1: 172.9907 fcntl(4, F_SETLK64,
0xFFBEF410)
Err#11 EAGAIN
-- a lock fails with EAGAIN, which means...
EAGAIN
The cmd argument is F_SETLK or F_SETLK64, the type of
lock (l_type) is a shared (F_RDLCK) or exclusive
(F_WRLCK) lock, and the segment of a file to be locked
is already exclusive-locked by another process; or the
type is an exclusive lock and some portion of the seg-
ment of a file to be locked is already shared-locked
or exclusive-locked by another process.
Therefor /var/opt/samba/locks/nmbd.pid is locked already.
See below...
13848/1: 172.9909
close(4)
= 0
13848/1: 172.9910
time()
= 991684364
13848/1: 172.9914
getuid()
= 0 [0]
13848/1: 172.9916 write(3, " [",
1)
= 1
13848/1: 172.9918 write(3, " 2 0 0 1 / 0 6 / 0 4 1"..,
19)
= 19
13848/1: 172.9921 write(3, " , 0",
3)
= 3
13848/1: 172.9923 write(3, " ] ",
2)
= 2
13848/1: 172.9926 write(3, " l i b / p i d f i l e ."..,
13)
= 13
13848/1: 172.9928 write(3, " : ( 8 6",
4)
= 4
13848/1: 172.9931 write(3, " )\n",
2)
= 2
13848/1: 172.9933
getuid()
= 0 [0]
13848/1: 172.9935 write(3, " E R R O R : n m b"..,
109)
= 109
13848/1: 172.9939 llseek(0, 0,
SEEK_CUR)
Err#29 ESPIPE
13848/1: 172.9941 _exit(1)
-- and nmbd bails, hard!
193/1: 172.9953 Received signal #18, SIGCLD, in
poll()
[caught]
193/1: siginfo: SIGCLD CLD_EXITED pid=13848
status=0x0001
-- inetd reports the problem
193/1: 172.9955 poll(0xFFBEF620, 44,
-1)
Err#4 EINTR
193/1: 172.9957 sigprocmask(0, 0x00000000,
0xFFBEF158)
= 0
193/1: 172.9959 sigprocmask(SIG_BLOCK, 0xFFBEF158,
0xFFBEF148) = 0
193/1: 172.9961 waitid(P_ALL, 0, 0xFFBEF1C8,
WEXITED|WNOHANG)
= 0
193/1: 172.9964 fstat(3,
0xFFBEEF10)
= 0
193/1: 172.9966
time()
= 991684364
193/1: 172.9969
getpid()
= 193 [1]
193/1: 172.9972 putmsg(3, 0xFFBEE5C8, 0xFFBEE5BC,
0)
= 0
193/1: 172.9974 open("/etc/.syslog_door",
O_RDONLY)
= 7
193/1: 172.9978 door_info(7,
0xFFBEE500)
= 0
193/1: 172.9980
getpid()
= 193 [1]
193/1: 172.9982 door_call(7,
0xFFBEE4E8)
= 0
193/1: 172.9984
close(7)
= 0
193/1: 172.9985 waitid(P_ALL, 0, 0xFFBEF1C8,
WEXITED|WNOHANG)
= 0
193/1: 172.9987 sigprocmask(0, 0x00000000,
0xFFBEF158)
= 0
193/1: 172.9989 sigprocmask(SIG_SETMASK, 0xFFBEF158,
0xFFBEF148) = 0
193/1: 172.9990 setcontext(0xFFBEF308)
193/1: 172.9992
alarm(0)
= 0
193/1: 172.9994 sigaction(SIGALRM, 0xFFBEF6F8,
0xFFBEF7A8)
= 0
193/1: 172.9996 sigprocmask(SIG_BLOCK, 0xFFBEF798,
0xFFBEF788) = 0
193/1: 172.9998
alarm(1)
= 0
-----
The appears to be in pidfile_pid, called from one of
Functions calling this function: pidfile_pid
File Function Line
1 pidfile.c pidfile_create 85 pid = pidfile_pid(name);
2 smbcontrol.c send_message 140 pid = pidfile_pid(dest);
3 startstop.c stop_smbd 74 pid_t pid = pidfile_pid("smbd");
4 startstop.c stop_nmbd 86 pid_t pid = pidfile_pid("nmbd");
5 statuspage.c status_page 138 smbd_pid = pidfile_pid("smbd");
I'd giuess at pidfile_create, from the ERROR message written...
Mr. Wiegand, can you email us a page or so from the
nmbd log file, including the ERROR message?
--dave
--
David Collier-Brown, | Always do right. This will gratify
Performance & Engineering Team | some people and astonish the rest.
Americas Customer Engineering | -- Mark Twain
(905) 415-2849 | davecb at canada.sun.com
More information about the samba-technical
mailing list