inetd netbios-ns looping: problem found

David Collier-Brown davecb at canada.sun.com
Mon Jun 4 20:41:40 GMT 2001


Ralf K. Wiegand was kind enough to run truss on a
copy of nmbd that was looping in inetd and getting
disconnected.  This is from Solaris 7

The problem is in locking /var/opt/samba/locks/nmbd.pid:
the fcntl lock request reports it's already locked.
See below...


I wrote:
>   If you need to run it from inetd, then run
> 	truss -d -f -l -p <process-id>
> on the failing process and email me the result, in hopes
> I can identify the problem for the team.

He replied:
| Here is the truss:
Pid  /thread    time            system call               result
13848/1:        172.9363        close(11)                 = 0
13848/1:        172.9365       
close(10)                                     
  = 0
13848/1:        172.9367       
close(9)                                      
  = 0
13848/1:        172.9368       
close(8)                                      
  = 0
13848/1:        172.9370       
close(7)                                      
  Err#9 EBADF
13848/1:        172.9371       
close(5)                                      
  = 0
13848/1:        172.9373       
close(4)                                      
  = 0
13848/1:        172.9375       
close(3)                                      
  = 0
13848/1:        172.9376        setrlimit(RLIMIT_NOFILE,
0xFFBEFD80)          
  = 0
13848/1:        172.9380        auditsys(BSM_AUDITCTL,
0x00000014)            
  Err#22 EINVAL
13848/1:        172.9398        execve("/opt/samba/bin/nmbd",
0x0002C454, 
0xFFBEFE00)  argc = 1

	-- this is inetd getting readt to exec nmbd

13848/1:        172.9404        stat("/opt/samba/bin/nmbd",
0xFFBEFAF8)       
  = 0
13848/1:        172.9407        open("/var/ld/ld.config",
O_RDONLY)           
  Err#2 ENOENT
13848/1:        172.9411        open("/usr/lib/libsec.so.1",
O_RDONLY)        
  = 3
13848/1:        172.9413        fstat(3,
0xFFBEF894)                          
  = 0
13848/1:        172.9415        mmap(0x00000000, 8192,
PROT_READ|PROT_EXEC, 
MAP_PRIVATE, 3, 0) = 0xFF3A0000
13848/1:        172.9420        mmap(0x00000000, 81920,
PROT_READ|PROT_EXEC, 
MAP_PRIVATE, 3, 0) = 0xFF380000
13848/1:        172.9422        mmap(0xFF392000, 1050, 
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 8192) =
0xFF392000
13848/1:        172.9424        munmap(0xFF384000,
57344)                     
  = 0
13848/1:        172.9427        memcntl(0xFF380000, 3156, MC_ADVISE,
0x0003, 
0, 0) = 0
13848/1:        172.9428        close(3) 

 = 0
	-- this is solaris loading a shared library (more follow)
                                     

13848/1:        172.9431        open("/usr/lib/libgen.so.1",
O_RDONLY)        
  = 3
13848/1:        172.9433        fstat(3,
0xFFBEF894)                          
  = 0
13848/1:        172.9435        mmap(0xFF3A0000, 8192,
PROT_READ|PROT_EXEC, 
MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF3A0000
13848/1:        172.9437        mmap(0x00000000, 98304,
PROT_READ|PROT_EXEC, 
MAP_PRIVATE, 3, 0) = 0xFF360000
13848/1:        172.9440        mmap(0xFF374000, 9847, 
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 16384) =
0xFF374000 
13848/1:        172.9442        munmap(0xFF366000,
57344)                     
  = 0
13848/1:        172.9444        memcntl(0xFF360000, 7976, MC_ADVISE,
0x0003, 
0, 0) = 0
13848/1:        172.9447       
close(3)                                      
  = 0
13848/1:        172.9449        open("/usr/lib/libsocket.so.1",
O_RDONLY)     
  = 3
13848/1:        172.9451        fstat(3,
0xFFBEF894)                          
  = 0
13848/1:        172.9453        mmap(0xFF3A0000, 8192,
PROT_READ|PROT_EXEC, 
MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF3A0000
13848/1:        172.9455        mmap(0x00000000, 106496,
PROT_READ|PROT_EXEC, 
MAP_PRIVATE, 3, 0) = 0xFF340000
13848/1:        172.9457        mmap(0xFF356000, 11073, 
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 24576) = 
0xFF35600013848/1:        172.9461        munmap(0xFF348000,
57344)           
            = 0
13848/1:        172.9463        memcntl(0xFF340000, 14492, MC_ADVISE,
0x0003, 
0, 0) = 0
13848/1:        172.9465       
close(3)                                      
  = 0
13848/1:        172.9467        open("/usr/lib/libnsl.so.1",
O_RDONLY)        
  = 3
13848/1:        172.9469        fstat(3,
0xFFBEF894)                          
  = 0
13848/1:        172.9471        mmap(0xFF3A0000, 8192,
PROT_READ|PROT_EXEC, 
MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF3A0000
13848/1:        172.9473        mmap(0x00000000, 663552,
PROT_READ|PROT_EXEC, 
MAP_PRIVATE, 3, 0) = 0xFF280000
13848/1:        172.9476        mmap(0xFF310000, 39184, 
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 524288) =
0xFF310000
13848/1:        172.9479        open("/dev/zero",
O_RDONLY)                   
  = 4
13848/1:        172.9483        mmap(0xFF31A000, 29136, 
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 4, 0) =
0xFF31A000
13848/1:        172.9484        munmap(0xFF302000,
57344)                     
  = 0
13848/1:        172.9487        mmap(0x00000000, 8192, 
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 4, 0) = 0xFF330000
13848/1:        172.9491        memcntl(0xFF280000, 89132, MC_ADVISE,
0x0003, 
0, 0) = 0
13848/1:        172.9493       
close(3)                                      
  = 0
13848/1:        172.9494        open("/usr/lib/libdl.so.1",
O_RDONLY)         
  = 3
13848/1:        172.9497        fstat(3,
0xFFBEF894)                          
  = 0
13848/1:        172.9499        mmap(0xFF3A0000, 8192,
PROT_READ|PROT_EXEC, 
MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF3A0000
13848/1:        172.9501       
close(3)                                      
  = 0
13848/1:        172.9503        open("/usr/lib/libc.so.1",
O_RDONLY)          
  = 3
13848/1:        172.9505        fstat(3,
0xFFBEF894)                          
  = 0
13848/1:        172.9507        mmap(0x00000000, 8192,
PROT_READ|PROT_EXEC, 
MAP_PRIVATE, 3, 0) = 0xFF270000
13848/1:        172.9509        mmap(0x00000000, 778240,
PROT_READ|PROT_EXEC, 
MAP_PRIVATE, 3, 0) = 0xFF180000
13848/1:        172.9512        mmap(0xFF234000, 31832, 
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 671744) =
0xFF234000
13848/1:        172.9515        mmap(0xFF23C000, 5312, 
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 4, 0) =
0xFF23C000
13848/1:        172.9518        munmap(0xFF226000,
57344)                     
  = 0
13848/1:        172.9523        memcntl(0xFF180000, 131808, MC_ADVISE, 
0x0003, 0, 0) = 0
13848/1:        172.9524       
close(3)                                      
  = 0
13848/1:        172.9527        open("/usr/lib/libmp.so.2",
O_RDONLY)         
  = 3
13848/1:        172.9529        fstat(3,
0xFFBEF894)                          
  = 0
13848/1:        172.9531        mmap(0xFF270000, 8192,
PROT_READ|PROT_EXEC, 
MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF270000
13848/1:        172.9533        mmap(0x00000000, 81920,
PROT_READ|PROT_EXEC, 
MAP_PRIVATE, 3, 0) = 0xFF250000
13848/1:        172.9536        mmap(0xFF262000, 3993, 
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_FIXED, 3, 8192) =
0xFF262000
13848/1:        172.9537        munmap(0xFF254000,
57344)                     
  = 0
13848/1:        172.9540        memcntl(0xFF250000, 3380, MC_ADVISE,
0x0003, 
0, 0) = 0
13848/1:        172.9542       
close(3)                                      
  = 0
13848/1:        172.9546        
open("/usr/platform/SUNW,Ultra-4/lib/libc_psr.so.1", O_RDONLY) = 3
13848/1:        172.9549        fstat(3,
0xFFBEF6FC)                          
  = 0
13848/1:        172.9551        mmap(0xFF270000, 8192,
PROT_READ|PROT_EXEC, 
MAP_PRIVATE|MAP_FIXED, 3, 0) = 0xFF270000
13848/1:        172.9553        mmap(0x00000000, 16384,
PROT_READ|PROT_EXEC, 
MAP_PRIVATE, 3, 0) = 0xFF170000
13848/1:        172.9556       
close(3)                                      
  = 0
13848/1:        172.9593       
close(4)                                      
  = 0
13848/1:        172.9594        munmap(0xFF270000,
8192)                      
  = 0
13848/1:        172.9604       
time()                                        
  = 991684364
13848/1:        172.9606       
time()                                        
  = 991684364
13848/1:        172.9608       
getpid()                                      
  = 13848 [193]
13848/1:        172.9611       
brk(0x000A3A30)                               
  = 0
13848/1:        172.9613       
brk(0x000A5A30)                               
  = 0
13848/1:        172.9616       
time()                                        
  = 991684364
13848/1:        172.9621       
open("/usr/share/lib/zoneinfo/US/Eastern", 
O_RDONLY) = 3
13848/1:        172.9624        read(3, "\0\0\0\0\0\0\0\0\0\0\0\0"..,
8192)   
  = 1250
13848/1:        172.9626       
close(3)                                      
  = 0

	-- and the nmbd starts running right around here


13848/1:        172.9633       
sigfillset(0xFF23C3AC)                        
  = 0
13848/1:        172.9635        sigaction(SIGSEGV, 0xFFBEFC40,
0x00000000)    
  = 0
13848/1:        172.9636        sigaction(SIGBUS, 0xFFBEFCA0,
0x00000000)     
  = 0
13848/1:        172.9638        sigaction(SIGHUP, 0xFFBEFCA0,
0x00000000)     
  = 0
13848/1:        172.9640        sigaction(SIGTERM, 0xFFBEFCA0,
0x00000000)    
  = 0
13848/1:        172.9642        sigprocmask(SIG_BLOCK, 0xFFBEFD30, 
0x00000000)  = 0
13848/1:        172.9644        sigaction(SIGUSR1, 0xFFBEFCA0,
0x00000000)    
  = 0
13848/1:        172.9645        sigaction(SIGUSR2, 0xFFBEFCA0,
0x00000000)    
  = 0
13848/1:        172.9648       
umask(022)                                    
  = 0
13848/1:        172.9651        open64("/var/opt/samba/log.nmb", 
O_WRONLY|O_APPEND|O_CREAT, 0666) = 3
13848/1:        172.9654        llseek(3, 0,
SEEK_END)                        
  = 14111597
13848/1:        172.9656       
umask(0)                                      
  = 022
13848/1:        172.9658       
time()                                        
  = 991684364
13848/1:        172.9702       
getuid()                                      
  = 0 [0]
13848/1:        172.9706        write(3, " [",
1)                             
  = 1
13848/1:        172.9708        write(3, " 2 0 0 1 / 0 6 / 0 4   1"..,
19)    
  = 19
13848/1:        172.9711        write(3, " ,   1",
3)                         
  = 3
13848/1:        172.9713        write(3, " ]  ",
2)                           
  = 2
13848/1:        172.9716        write(3, " n m b d / n m b d . c",
11)        
  = 11
13848/1:        172.9720        write(3, " : (",
2)                           
  = 2
13848/1:        172.9722        write(3, " 7 5 7",
3)                         
  = 3
13848/1:        172.9725        write(3, " )\n",
2)                           
  = 2
13848/1:        172.9728       
getuid()                                      
  = 0 [0]
13848/1:        172.9730        write(3, "     N e t b i o s   n a"..,
44)    
  = 44
13848/1:        172.9732       
getuid()                                      
  = 0 [0]
13848/1:        172.9735        write(3, "     C o p y r i g h t  "..,
38)    
  = 38
13848/1:        172.9738        stat64("/opt/samba/lib/smb.conf",
0xFFBEEF78) 
  = 0
13848/1:        172.9741       
brk(0x000A5A30)                               
  = 0
13848/1:        172.9743       
brk(0x000A7A30)                               
  = 0
13848/1:        172.9746        open64("/opt/samba/lib/smb.conf",
O_RDONLY)   
  = 4
13848/1:        172.9749        fstat64(4,
0xFFBEF258)                        
  = 0
13848/1:        172.9751       
brk(0x000A7A30)                               
  = 0
13848/1:        172.9752       
brk(0x000A9A30)                               
  = 0
13848/1:        172.9755        ioctl(4, TCGETA,
0xFFBEF1E4)                  
  Err#25 ENOTTY
13848/1:        172.9757        read(4, " #   S a m b a   c o n f"..,
8192)   
  = 746
13848/1:        172.9762        read(4, 0x000A60DC,
8192)                     
  = 0
13848/1:        172.9764        llseek(4, 0,
SEEK_CUR)                        
  = 746
13848/1:        172.9765       
close(4)                                      
  = 0
13848/1:        172.9768        stat64("",
0xFFBEF3D8)                        
  Err#2 ENOENT
13848/1:        172.9771        stat64("/opt/samba/lib/smb.conf",
0xFFBEEF68) 
  = 0
13848/1:        172.9774        
stat64("/opt/samba/lib/codepages/codepage.850", 0xFFBEF848) = 0
13848/1:        172.9776        
open64("/opt/samba/lib/codepages/codepage.850", O_RDONLY) = 4
13848/1:        172.9779        fstat64(4,
0xFFBEF658)                        
  = 0
13848/1:        172.9781       
brk(0x000A9A30)                               
  = 0
13848/1:        172.9782       
brk(0x000ABA30)                               
  = 0
13848/1:        172.9785        ioctl(4, TCGETA,
0xFFBEF5E4)                  
  Err#25 ENOTTY
13848/1:        172.9786        read(4, "01\0 R03 |\0\0\085B70101"..,
8192)   
  = 132
13848/1:        172.9789        llseek(4, 0,
SEEK_CUR)                        
  = 132
13848/1:        172.9791       
close(4)                                      
  = 0
13848/1:        172.9793        
stat64("/opt/samba/lib/codepages/unicode_map.850", 0xFFBEF7C8) = 0
13848/1:        172.9796        
open64("/opt/samba/lib/codepages/unicode_map.850", O_RDONLY) = 4
13848/1:        172.9799        fstat64(4,
0xFFBEF5C0)                        
  = 0
13848/1:        172.9800        ioctl(4, TCGETA,
0xFFBEF54C)                  
  Err#25 ENOTTY
13848/1:        172.9802        read(4, "0180 8 5 0\0\0\0\0\0\0\0"..,
8192)   
  = 8192
13848/1:        172.9805       
brk(0x000ABA30)                               
  = 0
13848/1:        172.9807       
brk(0x000CBA30)                               
  = 0
13848/1:        172.9811        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 8192
13848/1:        172.9815        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 8192
13848/1:        172.9819        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 8192
13848/1:        172.9822        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 8192
13848/1:        172.9826        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 8192
13848/1:        172.9830        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 8192
13848/1:        172.9833        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 8192
13848/1:        172.9837        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 8192
13848/1:        172.9841        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 8192
13848/1:        172.9844        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 8192
13848/1:        172.9848        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 8192
13848/1:        172.9852        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 8192
13848/1:        172.9855        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 8192
13848/1:        172.9859        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 8192
13848/1:        172.9864        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 8192
13848/1:        172.9867        read(4, "\0 _\0 _\0 _\0 _\0 _\0 _"..,
8192)   
  = 542
13848/1:        172.9883        llseek(4, 0,
SEEK_CUR)                        
  = 131614
13848/1:        172.9885       
close(4)                                      
  = 0
13848/1:        172.9888       
uname(0xFFBEEEF8)                             
  = 1
13848/1:        172.9890        stat64("",
0xFFBEF840)                        
  Err#2 ENOENT
13848/1:        172.9892        stat64("/opt/samba/lib/smb.conf",
0xFFBEF3D0) 
  = 0
13848/1:        172.9895        getsockopt(0, 65535, 4104, 0xFFBEFD38, 
0xFFBEFD3C, 1) = 0
13848/1:        172.9897        stat64("/var/opt/samba/locks",
0xFFBEFCA8)    
  = 0
13848/1:        172.9900       
open64("/var/opt/samba/locks/nmbd.pid", 
O_RDWR|O_NONBLOCK) = 4
13848/1:        172.9902        read(4, " 1 2 5 6 1\n\0\0\0\0\0\0"..,
19)     
  = 19
13848/1:        172.9905        kill(12561,
SIG#0)                            
  = 0

	--  nmbd is checking if process 12561 is still alive (it is)

13848/1:        172.9907        fcntl(4, F_SETLK64,
0xFFBEF410)               
  Err#11 EAGAIN

	-- a lock fails with EAGAIN, which means...
     EAGAIN
           The cmd argument is F_SETLK or F_SETLK64, the type  of
           lock  (l_type)  is  a  shared  (F_RDLCK)  or exclusive
           (F_WRLCK) lock, and the segment of a file to be locked
           is already exclusive-locked by another process; or the
           type is an exclusive lock and some portion of the seg-
           ment  of  a file to be locked is already shared-locked
           or exclusive-locked by another process.

	Therefor /var/opt/samba/locks/nmbd.pid is locked already.
	See below...


13848/1:        172.9909       
close(4)                                      
  = 0
13848/1:        172.9910       
time()                                        
  = 991684364
13848/1:        172.9914       
getuid()                                      
  = 0 [0]
13848/1:        172.9916        write(3, " [",
1)                             
  = 1
13848/1:        172.9918        write(3, " 2 0 0 1 / 0 6 / 0 4   1"..,
19)    
  = 19
13848/1:        172.9921        write(3, " ,   0",
3)                         
  = 3
13848/1:        172.9923        write(3, " ]  ",
2)                           
  = 2
13848/1:        172.9926        write(3, " l i b / p i d f i l e ."..,
13)    
  = 13
13848/1:        172.9928        write(3, " : ( 8 6",
4)                       
  = 4
13848/1:        172.9931        write(3, " )\n",
2)                           
  = 2
13848/1:        172.9933       
getuid()                                      
  = 0 [0]
13848/1:        172.9935        write(3, "     E R R O R :   n m b"..,
109)   
  = 109
13848/1:        172.9939        llseek(0, 0,
SEEK_CUR)                        
  Err#29 ESPIPE
13848/1:        172.9941        _exit(1)

	-- and nmbd bails, hard!

193/1:          172.9953            Received signal #18, SIGCLD, in
poll() 
[caught]
193/1:                siginfo: SIGCLD CLD_EXITED pid=13848
status=0x0001

	-- inetd reports the problem

193/1:          172.9955        poll(0xFFBEF620, 44,
-1)                      
  Err#4 EINTR
193/1:          172.9957        sigprocmask(0, 0x00000000,
0xFFBEF158)        
  = 0
193/1:          172.9959        sigprocmask(SIG_BLOCK, 0xFFBEF158, 
0xFFBEF148)  = 0
193/1:          172.9961        waitid(P_ALL, 0, 0xFFBEF1C8,
WEXITED|WNOHANG) 
  = 0
193/1:          172.9964        fstat(3,
0xFFBEEF10)                          
  = 0
193/1:          172.9966       
time()                                        
  = 991684364
193/1:          172.9969       
getpid()                                      
  = 193 [1]
193/1:          172.9972        putmsg(3, 0xFFBEE5C8, 0xFFBEE5BC,
0)          
  = 0
193/1:          172.9974        open("/etc/.syslog_door",
O_RDONLY)           
  = 7
193/1:          172.9978        door_info(7,
0xFFBEE500)                      
  = 0
193/1:          172.9980       
getpid()                                      
  = 193 [1]
193/1:          172.9982        door_call(7,
0xFFBEE4E8)                      
  = 0
193/1:          172.9984       
close(7)                                      
  = 0
193/1:          172.9985        waitid(P_ALL, 0, 0xFFBEF1C8,
WEXITED|WNOHANG) 
  = 0
193/1:          172.9987        sigprocmask(0, 0x00000000,
0xFFBEF158)        
  = 0
193/1:          172.9989        sigprocmask(SIG_SETMASK, 0xFFBEF158, 
0xFFBEF148) = 0
193/1:          172.9990        setcontext(0xFFBEF308)
193/1:          172.9992       
alarm(0)                                      
  = 0
193/1:          172.9994        sigaction(SIGALRM, 0xFFBEF6F8,
0xFFBEF7A8)    
  = 0
193/1:          172.9996        sigprocmask(SIG_BLOCK, 0xFFBEF798, 
0xFFBEF788)  = 0
193/1:          172.9998       
alarm(1)                                      
  = 0

-----

The appears to be in pidfile_pid, called from one of
Functions calling this function: pidfile_pid

  File         Function         Line
1 pidfile.c    pidfile_create    85 pid = pidfile_pid(name);
2 smbcontrol.c send_message     140 pid = pidfile_pid(dest);
3 startstop.c  stop_smbd         74 pid_t pid = pidfile_pid("smbd");
4 startstop.c  stop_nmbd         86 pid_t pid = pidfile_pid("nmbd");
5 statuspage.c status_page      138 smbd_pid = pidfile_pid("smbd");

I'd giuess at pidfile_create, from the ERROR message written...

Mr. Wiegand, can you email us a page or so from the
nmbd log file, including the ERROR message?

--dave
--
David Collier-Brown,           | Always do right. This will gratify 
Performance & Engineering Team | some people and astonish the rest.
Americas Customer Engineering  |                      -- Mark Twain
(905) 415-2849                 | davecb at canada.sun.com




More information about the samba-technical mailing list