[distcc] Problems with distcc hanging on large compiles (More debugging info)

Hien D. Ngo hien at moses.xp.com
Tue Aug 27 20:53:00 GMT 2002


Here's what I've got.  Looks like the client completes the compile, sends the data 
back to the server, and tries to close its connection.  The server receives all of 
the DOTO data back, but never gets to the "completed ok" or dcc_exit stage.  The 
server in this case is a RH 7.2 box running 2.4.9, the client is an older RH 6.x box 
running 2.2.16.

This particular problem was tickled using ccache and distcc, but I've seen the same 
problem using just distcc by itself.

Hien

Debug info:

======
Server
======
$ netstat -an | grep 192.168.0.146
tcp        0      0 192.168.0.252:36881     192.168.0.146:4200     ESTABLISHED 
$ lsof -i:36881
COMMAND   PID USER   FD   TYPE  DEVICE SIZE NODE NAME
distcc  23968 ngoh    7u  IPv4 5797561       TCP bldmaster.foo.com:36881-
>build03.foo.com:4200 (ESTABLISHED)
$ lsof -p 23968
COMMAND   PID USER   FD   TYPE  DEVICE     SIZE    NODE NAME
distcc  23968 ngoh  cwd    DIR   104,5     8192 6210700
/local/scratch/ngoh/ver/src/credit/src
distcc  23968 ngoh  rtd    DIR   104,2     4096       2 /
distcc  23968 ngoh  txt    REG   104,5   136632 5603616
/usr/local/utils/bin/distcc
distcc  23968 ngoh  mem    REG   104,2   485171  589888 /lib/ld-2.2.4.so
distcc  23968 ngoh  mem    REG   104,2   436784  589906 /lib/libnsl-2.2.4.so
distcc  23968 ngoh  mem    REG   104,2  5772268  983105
/lib/i686/libc-2.2.4.so
distcc  23968 ngoh  mem    REG   104,2   261460  589922
/lib/libnss_files-2.2.4.so
distcc  23968 ngoh  mem    REG   104,2   355236  589930
/lib/libnss_nisplus-2.2.4.so
distcc  23968 ngoh  mem    REG   104,2    72296  589919
/lib/libnss_dns-2.2.4.sodistcc  23968 ngoh  mem    REG   104,2   261196
589935 /lib/libresolv-2.2.4.so
distcc  23968 ngoh    0r  FIFO     0,0          5411159 pipe
distcc  23968 ngoh    1w   REG   104,5        0 5361153
/local/scratch/ngoh/ccache/tmp.stdout.23964
distcc  23968 ngoh    2w   REG   104,5        0 5361168
/local/scratch/ngoh/ccache/tmp.stderr.23964
distcc  23968 ngoh    3w   REG   104,2 37570049  230248 /tmp/distcc.log
distcc  23968 ngoh    4w   REG   104,2        0  688283
/tmp/distcc_00002493/lock_build01.foo.com_0000000
distcc  23968 ngoh    5w   REG   104,2        0  690210
/tmp/distcc_00002493/lock_build02.foo.com_0000000
distcc  23968 ngoh    6w   REG   104,2        0  690214
/tmp/distcc_00002493/lock_build03.foo.com_0000000
distcc  23968 ngoh    7u  IPv4 5797561              TCP
bldmaster.foo.com:36881->build03.foo.com:4200 (ESTABLISHED)
distcc  23968 ngoh    8w   REG   104,5   616776 5361181
/local/scratch/ngoh/ccache/tmp.hash.23964.o
$ grep 'distcc\[23968\]' /tmp/distcc.log
distcc[23968] (dcc_scan_args) scanning arguments: g++ -fPIC -g -O -Wall -pipe -
pthread -Wno-non-template-friend -Wwrite-strings -ffor-scope -I./shadow/linux -
I../../credit/linux.bld -I. -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -DRW_NO_STL -ftemplate-depth-50 -DRW_CENTURY_REQD -
DRW_MULTI_THREAD -D_REENTRANT -I/usr/local/RogueWave-7.1.1 -
DMY_RW_CTLIB_=/usr/local/RogueWave-7.1.1/lib/libsdb12d.so -I/usr/local/vni-
3.0/CTT2.1/include -DWORKAROUND_COMPILER_BUG -c -
o /local/scratch/ngoh/ccache/tmp.hash.23964.o /local/scratch/ngoh/ccache/tmp.stdout.2
3964.ii
distcc[23968] (dcc_scan_args) found object 
file "/local/scratch/ngoh/ccache/tmp.hash.23964.o"
distcc[23968] (dcc_scan_args) found input 
file "/local/scratch/ngoh/ccache/tmp.stdout.23964.ii"
distcc[23968] compile from /local/scratch/ngoh/ccache/tmp.stdout.23964.ii 
to /local/scratch/ngoh/ccache/tmp.hash.23964.o
distcc[23968] (dcc_parse_hosts) found tcp token "build01.foo.com"
distcc[23968] (dcc_parse_hosts) found tcp token "build02.foo.com"
distcc[23968] (dcc_parse_hosts) found tcp token "build03.foo.com"
distcc[23968] (dcc_parse_hosts) found tcp token "build04.foo.com"
distcc[23968] (dcc_parse_hosts) found tcp token "build05.foo.com"
distcc[23968] (dcc_try_lock_host) /tmp/distcc_00002493/lock_build01.foo.com_0000000 
already locked
distcc[23968] (dcc_try_lock_host) /tmp/distcc_00002493/lock_build02.foo.com_0000000 
already locked
distcc[23968] (dcc_try_lock_host) 
locked /tmp/distcc_00002493/lock_build03.foo.com_0000000
distcc[23968] (dcc_pick_buildhost) building on build03.foo.com
distcc[23968] exec on build03.foo.com: g++ -fPIC -g -O -Wall -pipe -pthread -Wno-non-
template-friend -Wwrite-strings -ffor-scope -I./shadow/linux -
I../../credit/linux.bld -I. -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -DRW_NO_STL -ftemplate-depth-50 -DRW_CENTURY_REQD -
DRW_MULTI_THREAD -D_REENTRANT -I/usr/local/RogueWave-7.1.1 -
DMY_RW_CTLIB_=/usr/local/RogueWave-7.1.1/lib/libsdb12d.so -I/usr/local/vni-
3.0/CTT2.1/include -DWORKAROUND_COMPILER_BUG -c -
o /local/scratch/ngoh/ccache/tmp.hash.23964.o /local/scratch/ngoh/ccache/tmp.stdout.2
3964.ii
distcc[23968] (dcc_open_socket_out) client got connection to build03.foo.com port 
4200 on fd7
distcc[23968] (dcc_x_file) send 602449 byte 
file /local/scratch/ngoh/ccache/tmp.stdout.23964.ii with token DOTI
distcc[23968] (dcc_compile_remote) client finished sending request to server
distcc[23968] (dcc_r_result_header) got response header
distcc[23968] (dcc_r_fd) receive 0 bytes SERR file
distcc[23968] (dcc_r_fd) receive 0 bytes SOUT file
distcc[23968] (dcc_r_fd) receive 618052 bytes DOTO file

$ ps auxwww | grep distcc
ngoh     23963  0.0  0.0  2284 1024 pts/3    S    00:21   0:00 /bin/sh -c 
MY_DEPS="../../credit/linux.bld/FlowMatrix.dep ../../credit/linux.bld/FlowMatrix.o" /
usr/local/utils/bin/ccache /usr/local/utils/bin/distcc g++ -fPIC -g -O  -Wall -pipe -
pthread -Wno-non-template-friend -Wwrite-strings -ffor-scope -I./shadow/linux -
I../../credit/linux.bld -I. -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -DRW_NO_STL -ftemplate-depth-50  -DRW_CENTURY_REQD -
DRW_MULTI_THREAD -D_REENTRANT -I/usr/local/RogueWave-7.1.1 -
DMY_RW_CTLIB_=/usr/local/RogueWave-7.1.1/lib/libsdb12d.so -I/usr/local/vni-
3.0/CTT2.1/include -DWORKAROUND_COMPILER_BUG     -c -
o ../../credit/linux.bld/FlowMatrix.o FlowMatrix.cpp || { 
rm ../../credit/linux.bld/FlowMatrix.dep && false; }
ngoh     23964  0.0  0.0  1412  404 pts/3    S    00:21   
0:00 /usr/local/utils/bin/ccache /usr/local/utils/bin/distcc g++ -fPIC -g -O -Wall -
pipe -pthread -Wno-non-template-friend -Wwrite-strings -ffor-scope -I./shadow/linux -
I../../credit/linux.bld -I. -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -DRW_NO_STL -ftemplate-depth-50 -DRW_CENTURY_REQD -
DRW_MULTI_THREAD -D_REENTRANT -I/usr/local/RogueWave-7.1.1 -
DMY_RW_CTLIB_=/usr/local/RogueWave-7.1.1/lib/libsdb12d.so -I/usr/local/vni-
3.0/CTT2.1/include -DWORKAROUND_COMPILER_BUG -c -
o ../../credit/linux.bld/FlowMatrix.o FlowMatrix.cpp
ngoh     23968  0.0  0.0  1728  748 pts/3    S    00:21   
0:00 /usr/local/utils/bin/distcc g++ -fPIC -g -O -Wall -pipe -pthread -Wno-non-
template-friend -Wwrite-strings -ffor-scope -I./shadow/linux -
I../../credit/linux.bld -I. -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -DRW_NO_STL -ftemplate-depth-50 -DRW_CENTURY_REQD -
DRW_MULTI_THREAD -D_REENTRANT -I/usr/local/RogueWave-7.1.1 -
DMY_RW_CTLIB_=/usr/local/RogueWave-7.1.1/lib/libsdb12d.so -I/usr/local/vni-
3.0/CTT2.1/include -DWORKAROUND_COMPILER_BUG -c -
o /local/scratch/ngoh/ccache/tmp.hash.23964.o /local/scratch/ngoh/ccache/tmp.stdout.2
3964.ii

======
Client
======
$ netstat -an | grep 192.168.0.252
tcp        0   1277 192.168.0.146:4200     192.168.0.252:36881     FIN_WAIT1   
$ lsof -i:36881
[nothing]
$ grep 'distcc.*\[14606\]' /tmp/distcc.log
distccd[14606] (dcc_log_clientname) connection from bldmaster.foo.com (192.168.0.252)
distccd[14606] (dcc_r_argv) reading 30 arguments from job submission
distccd[14606] (dcc_r_argv) argv[0] = "g++"
distccd[14606] (dcc_r_argv) argv[1] = "-fPIC"
distccd[14606] (dcc_r_argv) argv[2] = "-g"
distccd[14606] (dcc_r_argv) argv[3] = "-O"
distccd[14606] (dcc_r_argv) argv[4] = "-Wall"
distccd[14606] (dcc_r_argv) argv[5] = "-pipe"
distccd[14606] (dcc_r_argv) argv[6] = "-pthread"
distccd[14606] (dcc_r_argv) argv[7] = "-Wno-non-template-friend"
distccd[14606] (dcc_r_argv) argv[8] = "-Wwrite-strings"
distccd[14606] (dcc_r_argv) argv[9] = "-ffor-scope"
distccd[14606] (dcc_r_argv) argv[10] = "-I./shadow/linux"
distccd[14606] (dcc_r_argv) argv[11] = "-I../../credit/linux.bld"
distccd[14606] (dcc_r_argv) argv[12] = "-I."
distccd[14606] (dcc_r_argv) argv[13] = "-I/local/scratch/ngoh/ver/hdr/shadow/linux"
distccd[14606] (dcc_r_argv) argv[14] = "-I/local/scratch/ngoh/ver/hdr"
distccd[14606] (dcc_r_argv) argv[15] = "-I/local/scratch/ngoh/ver/hdr/shadow/linux"
distccd[14606] (dcc_r_argv) argv[16] = "-I/local/scratch/ngoh/ver/hdr"
distccd[14606] (dcc_r_argv) argv[17] = "-DRW_NO_STL"
distccd[14606] (dcc_r_argv) argv[18] = "-ftemplate-depth-50"
distccd[14606] (dcc_r_argv) argv[19] = "-DRW_CENTURY_REQD"
distccd[14606] (dcc_r_argv) argv[20] = "-DRW_MULTI_THREAD"
distccd[14606] (dcc_r_argv) argv[21] = "-D_REENTRANT"
distccd[14606] (dcc_r_argv) argv[22] = "-I/usr/local/RogueWave-7.1.1"
distccd[14606] (dcc_r_argv) argv[23] = "-DMY_RW_CTLIB_=/usr/local/RogueWave-
7.1.1/lib/libsdb12d.so"
distccd[14606] (dcc_r_argv) argv[24] = "-I/usr/local/vni-3.0/CTT2.1/include"
distccd[14606] (dcc_r_argv) argv[25] = "-DWORKAROUND_COMPILER_BUG"
distccd[14606] (dcc_r_argv) argv[26] = "-c"
distccd[14606] (dcc_r_argv) argv[27] = "-o"
distccd[14606] (dcc_r_argv) argv[28] = "/local/scratch/ngoh/ccache/tmp.hash.23964.o"
distccd[14606] (dcc_r_argv) argv[29] 
= "/local/scratch/ngoh/ccache/tmp.stdout.23964.ii"
distccd[14606] (dcc_accept_job) got arguments: g++ -fPIC -g -O -Wall -pipe -pthread -
Wno-non-template-friend -Wwrite-strings -ffor-scope -I./shadow/linux -
I../../credit/linux.bld -I. -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -DRW_NO_STL -ftemplate-depth-50 -DRW_CENTURY_REQD -
DRW_MULTI_THREAD -D_REENTRANT -I/usr/local/RogueWave-7.1.1 -
DMY_RW_CTLIB_=/usr/local/RogueWave-7.1.1/lib/libsdb12d.so -I/usr/local/vni-
3.0/CTT2.1/include -DWORKAROUND_COMPILER_BUG -c -
o /local/scratch/ngoh/ccache/tmp.hash.23964.o /local/scratch/ngoh/ccache/tmp.stdout.2
3964.ii
distccd[14606] (dcc_scan_args) scanning arguments: g++ -fPIC -g -O -Wall -pipe -
pthread -Wno-non-template-friend -Wwrite-strings -ffor-scope -I./shadow/linux -
I../../credit/linux.bld -I. -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -DRW_NO_STL -ftemplate-depth-50 -DRW_CENTURY_REQD -
DRW_MULTI_THREAD -D_REENTRANT -I/usr/local/RogueWave-7.1.1 -
DMY_RW_CTLIB_=/usr/local/RogueWave-7.1.1/lib/libsdb12d.so -I/usr/local/vni-
3.0/CTT2.1/include -DWORKAROUND_COMPILER_BUG -c -
o /local/scratch/ngoh/ccache/tmp.hash.23964.o /local/scratch/ngoh/ccache/tmp.stdout.2
3964.ii
distccd[14606] (dcc_scan_args) found object 
file "/local/scratch/ngoh/ccache/tmp.hash.23964.o"
distccd[14606] (dcc_scan_args) found input 
file "/local/scratch/ngoh/ccache/tmp.stdout.23964.ii"
distccd[14606] compile from /local/scratch/ngoh/ccache/tmp.stdout.23964.ii 
to /local/scratch/ngoh/ccache/tmp.hash.23964.o
distccd[14606] input file /local/scratch/ngoh/ccache/tmp.stdout.23964.ii, output 
file /local/scratch/ngoh/ccache/tmp.hash.23964.o
distccd[14606] (dcc_set_input) command before: g++ -fPIC -g -O -Wall -pipe -pthread -
Wno-non-template-friend -Wwrite-strings -ffor-scope -I./shadow/linux -
I../../credit/linux.bld -I. -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -DRW_NO_STL -ftemplate-depth-50 -DRW_CENTURY_REQD -
DRW_MULTI_THREAD -D_REENTRANT -I/usr/local/RogueWave-7.1.1 -
DMY_RW_CTLIB_=/usr/local/RogueWave-7.1.1/lib/libsdb12d.so -I/usr/local/vni-
3.0/CTT2.1/include -DWORKAROUND_COMPILER_BUG -c -
o /local/scratch/ngoh/ccache/tmp.hash.23964.o /local/scratch/ngoh/ccache/tmp.stdout.2
3964.ii
distccd[14606] (dcc_set_input) changed input 
from "/local/scratch/ngoh/ccache/tmp.stdout.23964.ii" 
to "/tmp/distcc_00002493/server_0014606.ii"
distccd[14606] (dcc_set_input) command after: g++ -fPIC -g -O -Wall -pipe -pthread -
Wno-non-template-friend -Wwrite-strings -ffor-scope -I./shadow/linux -
I../../credit/linux.bld -I. -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -DRW_NO_STL -ftemplate-depth-50 -DRW_CENTURY_REQD -
DRW_MULTI_THREAD -D_REENTRANT -I/usr/local/RogueWave-7.1.1 -
DMY_RW_CTLIB_=/usr/local/RogueWave-7.1.1/lib/libsdb12d.so -I/usr/local/vni-
3.0/CTT2.1/include -DWORKAROUND_COMPILER_BUG -c -
o /local/scratch/ngoh/ccache/tmp.hash.23964.o /tmp/distcc_00002493/server_0014606.ii
distccd[14606] (dcc_set_output) changed output 
from "/local/scratch/ngoh/ccache/tmp.hash.23964.o" 
to "/tmp/distcc_00002493/server_0014606.out"
distccd[14606] (dcc_set_output) command after: g++ -fPIC -g -O -Wall -pipe -pthread -
Wno-non-template-friend -Wwrite-strings -ffor-scope -I./shadow/linux -
I../../credit/linux.bld -I. -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -DRW_NO_STL -ftemplate-depth-50 -DRW_CENTURY_REQD -
DRW_MULTI_THREAD -D_REENTRANT -I/usr/local/RogueWave-7.1.1 -
DMY_RW_CTLIB_=/usr/local/RogueWave-7.1.1/lib/libsdb12d.so -I/usr/local/vni-
3.0/CTT2.1/include -DWORKAROUND_COMPILER_BUG -c -
o /tmp/distcc_00002493/server_0014606.out /tmp/distcc_00002493/server_0014606.ii
distccd[14606] (dcc_spawn_child) forking to execute g++ -fPIC -g -O -Wall -pipe -
pthread -Wno-non-template-friend -Wwrite-strings -ffor-scope -I./shadow/linux -
I../../credit/linux.bld -I. -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -I/local/scratch/ngoh/ver/hdr/shadow/linux -
I/local/scratch/ngoh/ver/hdr -DRW_NO_STL -ftemplate-depth-50 -DRW_CENTURY_REQD -
DRW_MULTI_THREAD -D_REENTRANT -I/usr/local/RogueWave-7.1.1 -
DMY_RW_CTLIB_=/usr/local/RogueWave-7.1.1/lib/libsdb12d.so -I/usr/local/vni-
3.0/CTT2.1/include -DWORKAROUND_COMPILER_BUG -c -
o /tmp/distcc_00002493/server_0014606.out /tmp/distcc_00002493/server_0014606.ii
distccd[14606] (dcc_spawn_child) child started as pid14607
distccd[14606] (dcc_r_fd) receive 602449 bytes DOTI file
distccd[14606] (dcc_collect_child) child 14607 terminated with status 0
distccd[14606] (dcc_x_file) send 0 byte file /tmp/distcc_00002493/cc_0014606.err 
with token SERR
distccd[14606] (dcc_x_file) send 0 byte file /tmp/distcc_00002493/cc_0014606.out 
with token SOUT
distccd[14606] (dcc_x_file) send 618052 byte 
file /tmp/distcc_00002493/server_0014606.out with token DOTO
distccd[14606] (dcc_report_rusage) g++ resource usage: 3.210000s user, 0.340000s 
system
distccd[14606] g++ on build03.foo.com completed ok
distccd[14606] input file: 602449 bytes; output file: 618052 bytes
distccd[14606] (dcc_exit) exit: code 0; self: 0.010000 user 0.050000 sys; children: 
3.210000 user 0.340000 sys





More information about the distcc mailing list