smbd spinning in loop in linux kernel 2.6 test 9

William Baker bbaker at priefert.com
Tue Dec 9 03:26:57 GMT 2003


This is probably a kernel issue, but you have to start somewhere, so 
this is where I'm starting.  Besides, there are better people than 
myself to present this in the kernel list.

I am running samba 2.2.8a running on a stock RH9 distribution.  The 
machine is a single XEON with hyperthreading enabled, and the kernel is 
compiled with SMP options, and so recognizes the machine as a 2-CPU in 
SMP mode.  Samba source was compiled on the machine using gcc3.2.2, as 
supplied in RH9.

I have a client that has disconnected, and the machine has been turned 
off, but the smbd associated with that workstation is a runaway.  My 
first clue was that CPU utilization was maxed on a machine where I 
really didn't anticipate seeing it max'd.  The smbd runaways are easy to 
spot.

[root at pacman bin]# ps -ax | grep smb
 3514 ?        SN     0:00 /usr/local/samba/bin/smbd -D
 3524 ?        RN   105:55 /usr/local/samba/bin/smbd -D
 3525 ?        RN   111:09 /usr/local/samba/bin/smbd -D
 3530 ?        SN     0:00 /usr/local/samba/bin/smbd -D
 3556 ?        SN     0:53 /usr/local/samba/bin/smbd -D
 3594 ?        SN     4:59 /usr/local/samba/bin/smbd -D
 3600 ?        SN    28:10 /usr/local/samba/bin/smbd -D
 3601 ?        SN     0:14 /usr/local/samba/bin/smbd -D
 3646 ?        SN   116:44 /usr/local/samba/bin/smbd -D
 3728 ?        SN     0:19 /usr/local/samba/bin/smbd -D
 3753 ?        SN     0:00 /usr/local/samba/bin/smbd -D
 3755 ?        SN     0:00 /usr/local/samba/bin/smbd -D
 3756 ?        SN     0:01 /usr/local/samba/bin/smbd -D
 3758 ?        SN     0:50 /usr/local/samba/bin/smbd -D
 3777 ?        SN     0:00 /usr/local/samba/bin/smbd -D
 3795 ?        SN     0:18 /usr/local/samba/bin/smbd -D
 3811 ?        RN   112:25 /usr/local/samba/bin/smbd -D
 3832 ?        SN     0:28 /usr/local/samba/bin/smbd -D
 3864 ?        RN   113:34 /usr/local/samba/bin/smbd -D
 4101 ?        SN   110:46 /usr/local/samba/bin/smbd -D
 4111 ?        SN     0:43 /usr/local/samba/bin/smbd -D
 4176 ?        SN     3:59 /usr/local/samba/bin/smbd -D
 4181 ?        SN     0:25 /usr/local/samba/bin/smbd -D
 4547 ?        SN     1:19 /usr/local/samba/bin/smbd -D
 4576 ?        RN   130:44 /usr/local/samba/bin/smbd -D
 4699 ?        SN     3:13 /usr/local/samba/bin/smbd -D
 4788 ?        SN    19:27 /usr/local/samba/bin/smbd -D
 4814 ?        SN     0:00 /usr/local/samba/bin/smbd -D
 5107 ?        SN    76:44 /usr/local/samba/bin/smbd -D
 5137 ?        SN     2:08 /usr/local/samba/bin/smbd -D
 6070 ?        SN     0:00 /usr/local/samba/bin/smbd -D
 6357 pts/4    S      0:00 grep smb

I picked one process to investigate.  smbstatus shows the process 3864 
as follows:

[root at pacman bin]# ./smbstatus  | grep 3864
user         jmorris  users     3864   jmorris  (192.168.4.147) Mon Dec  
8 09:3:40 2003
postmode     jmorris  users     3864   jmorris  (192.168.4.147) Mon Dec  
8 09:4:19 2003
3864   DENY_NONE  0x3         RDWR       NONE             
/export/postmode/postmode/FLEXERRS.DAT   Mon Dec  8:09:43:28 2003
3864   DENY_NONE  0x3         RDWR       NONE             
/export/user/DMPROGS/flex3.1b/IN070S29.flx   Mon Dec  8 09:41:11 2003
3864   DENY_NONE  0x3         RDWR       NONE             
/export/user/DMPROGS/flex3.1b/MENU0001.FLX   Mon Dec  8 09:43:28 2003

The client machine, 192.168.4.147, has been turned off for hours.  
Attaching to the process with gdb shows the call stack:

#0  0x40291812 in select () from /lib/i686/libc.so.6
#1  0xbffff060 in ?? ()
#2  0x0814fa28 in receive_local_message ()
#3  0x08151722 in request_oplock_break ()
#4  0x080975ce in open_mode_check ()
#5  0x08097fd0 in open_file_shared1 ()
#6  0x08097af2 in open_file_shared ()
#7  0x08085593 in reply_open_and_X ()
#8  0x080a2cf7 in switch_message ()
#9  0x080a2d81 in construct_reply ()
#10 0x080a2f28 in process_smb ()
#11 0x080a38d6 in smbd_process ()
#12 0x0806a4a6 in main ()
#13 0x401cca07 in __libc_start_main () from /lib/i686/libc.so.6
(gdb)

strace shows a loop in progress.  I don't know what to make of the 
kill's, but the server was restarted once from SWAT.  strace follows:

--- SIGSTOP (Stopped (signal)) @ 0 (0) ---
--- SIGSTOP (Stopped (signal)) @ 0 (0) ---
select(22, [6 21], NULL, NULL, {32, 0}) = 1 (in [6], left {32, 0})
recvfrom(6, "\3\200\f\16\0\0\1\10\0\0\0\0\0\0?\203G\0\0\0\0\0\0\t\0"..., 
26, 0, {sa_family=AF_INET, sin_port=htons(33000), 
sin_addr=inet_addr("127.0.0.1")}, [16]) = 26
fcntl64(14, F_SETLKW64, {type=F_WRLCK, whence=SEEK_SET, start=624, 
len=1}, 0xbffff0f0) = 0
kill(4576, SIG_0)                       = 0
kill(3594, SIG_0)                       = 0
kill(3600, SIG_0)                       = 0
kill(5137, SIG_0)                       = 0
kill(3556, SIG_0)                       = 0
kill(4547, SIG_0)                       = 0
kill(4111, SIG_0)                       = 0
kill(3758, SIG_0)                       = 0
kill(3795, SIG_0)                       = 0
kill(3777, SIG_0)                       = 0
kill(3756, SIG_0)                       = 0
kill(3753, SIG_0)                       = 0
kill(3728, SIG_0)                       = 0
kill(3601, SIG_0)                       = 0
fcntl64(14, F_SETLKW64, {type=F_UNLCK, whence=SEEK_SET, start=624, 
len=1}, 0xbffff0f0) = 0
sendto(6, "\3\0\f\16\0\0\1\10\0\0\0\0\0\0?\203G\0\0\0\0\0\0\t\0\0"..., 
26, 0, {sa_family=AF_INET, sin_port=htons(33000), 
sin_addr=inet_addr("127.0.0.1")}, 16) = 26
time(NULL)                              = 1070938528
time(NULL)                              = 1070938528
select(22, [6 21], NULL, NULL, {32, 0}) = 1 (in [6], left {32, 0})
recvfrom(6, "\3\200\f\16\0\0\1\10\0\0\0\0\0\0?\203G\0\0\0\0\0\0\t\0"..., 
26, 0, {sa_family=AF_INET, sin_port=htons(33000), 
sin_addr=inet_addr("127.0.0.1")}, [16]) = 26
fcntl64(14, F_SETLKW64, {type=F_WRLCK, whence=SEEK_SET, start=624, 
len=1}, 0xbffff0f0) = 0
kill(4576, SIG_0)                       = 0
kill(3594, SIG_0)                       = 0
kill(3600, SIG_0)                       = 0
kill(5137, SIG_0)                       = 0
kill(3556, SIG_0)                       = 0
kill(4547, SIG_0)                       = 0
kill(4111, SIG_0)                       = 0
kill(3758, SIG_0)                       = 0
kill(3795, SIG_0)                       = 0
kill(3777, SIG_0)                       = 0
kill(3756, SIG_0)                       = 0
kill(3753, SIG_0)                       = 0
kill(3728, SIG_0)                       = 0
kill(3601, SIG_0)                       = 0
fcntl64(14, F_SETLKW64, {type=F_UNLCK, whence=SEEK_SET, start=624, 
len=1}, 0xbffff0f0) = 0
sendto(6, "\3\0\f\16\0\0\1\10\0\0\0\0\0\0?\203G\0\0\0\0\0\0\t\0\0"..., 
26, 0, {sa_family=AF_INET, sin_port=htons(33000), 
sin_addr=inet_addr("127.0.0.1")}, 16) = 26
time(NULL)                              = 1070938528
time(NULL)                              = 1070938528
select(22, [6 21], NULL, NULL, {32, 0}) = 1 (in [6], left {31, 999000})
recvfrom(6, "\3\200\f\16\0\0\1\10\0\0\0\0\0\0?\203G\0\0\0\0\0\0\t\0"..., 
26, 0, {sa_family=AF_INET, sin_port=htons(33000), 
sin_addr=inet_addr("127.0.0.1")}, [16]) = 26
fcntl64(14, F_SETLKW64, {type=F_WRLCK, whence=SEEK_SET, start=624, 
len=1}, 0xbffff0f0) = 0
kill(4576, SIG_0)                       = 0
kill(3594, SIG_0)                       = 0
kill(3600, SIG_0)                       = 0




More information about the samba-technical mailing list