repeatable disconnections when rsyncing over ssh

Arkadiusz Miskiewicz arekm at pld-linux.org
Thu Sep 22 07:06:43 GMT 2005


Hi,

I have a problem with rsyncing large data sets (consisting bunch of small 
files) over ssh. 

rsync 2.6.6 on both sides. Linux 2.6.10 on receiving side, 2.6.12.6 on sending 
side.

This strace is from rsyncing over 2Mbit link but I can repeat it over 100Mbps 
lan, too (while testing on lan 2.6.11.3 was on sending and 2.6.12.6 on 
receiving side).

I wonder what can be problem here? Some kernel settings?

The command line is:
rsync -e ssh -avP user at somehost.pl:/somedir .

It fails with
somedir/somefile
  Read from remote somehost.pl: Connection reset by peer
rsync: connection unexpectedly closed (579059629 bytes received so far) 
[receiver]
rsync: writefd_unbuffered failed to write 470 bytes: phase 
"unknown" [generator]: Broken pipe (32)
rsync error: error in rsync protocol data stream (code 12) at io.c(1099)
rsync error: error in rsync protocol data stream (code 12) at io.c(434)

strace on this side shows:
28461 gettimeofday({1127322710, 986521}, NULL) = 0
28461 write(3, "Return-path: <xyz>\nEnvelope-to: cos at abc.pl\nDelivery-date: 
Wed, 21 Sep 2005 18:09:01 +0200\nReceived: from [xx.xx.xx.xx]
(helo=xx.xx.xx)\n\tby xx.xx.xx with esmtp (Ex"..., 262144) = 262144
28461 select(6, [5], [], NULL, {60, 0} <unfinished ...>
28445 <... select resumed> )            = 0 (Timeout)
28445 select(5, [3], [4], NULL, {60, 0} <unfinished ...>
28461 <... select resumed> )            = 0 (Timeout)
28461 select(6, [5], [], NULL, {60, 0} <unfinished ...>
28445 <... select resumed> )            = 0 (Timeout)
28445 select(5, [3], [4], NULL, {60, 0} <unfinished ...>
28461 <... select resumed> )            = 0 (Timeout)
28461 select(6, [5], [], NULL, {60, 0} <unfinished ...>
28445 <... select resumed> )            = 0 (Timeout)
28445 select(5, [3], [4], NULL, {60, 0} <unfinished ...>
28461 <... select resumed> )            = 0 (Timeout)
28461 select(6, [5], [], NULL, {60, 0} <unfinished ...>
[...]
28445 select(5, [3], [4], NULL, {60, 0} <unfinished ...>
28461 <... select resumed> )            = 0 (Timeout)
28461 select(6, [5], [], NULL, {60, 0} <unfinished ...>
28445 <... select resumed> )            = 1 (out [4], left {30, 978000})
28461 <... select resumed> )            = 1 (in [5], left {59, 240000})
28445 --- SIGCHLD (Child exited) @ 0 (0) ---
28461 read(5, "", 4)                    = 0
28461 write(2, "rsync: connection unexpectedly closed (579059629 bytes 
received so far) [receiver]\n", 83) = 8
3
28461 rt_sigaction(SIGUSR1, {SIG_IGN}, {0x80570b0, [USR1], SA_RESTART}, 8) = 0
28461 rt_sigaction(SIGUSR2, {SIG_IGN}, {0x80570e0, [USR2], SA_RESTART}, 8) = 0
28461 waitpid(28446,  <unfinished ...>
28445 waitpid(-1,  <unfinished ...>
28461 <... waitpid resumed> 0xbfff8fd8, WNOHANG) = -1 ECHILD (No child 
processes)
28445 <... waitpid resumed> [{WIFEXITED(s) && WEXITSTATUS(s) == 255}], 
WNOHANG) = 28446
28461 close(3 <unfinished ...>
28445 waitpid(-1,  <unfinished ...>
28461 <... close resumed> )             = 0
28445 <... waitpid resumed> 0xbfffd2f4, WNOHANG) = 0
28461 lstat64("...",  <unfinished ...>
28445 sigreturn( <unfinished ...>
28461 <... lstat64 resumed> {st_mode=S_IFREG|0600, st_size=262144, ...}) = 0
28445 <... sigreturn resumed> )         = ? (mask now [])
28461 lchown32("...", 8, 12 <unfinished ...
>
28445 write(4, "\10\0YK\n\0\0\240\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0ZK\n\0\10\0
[K\n\0\0\240\0\0\0\0\0\0\0\0\0\0\0
\0\0\0\0\0\\K\n\0\0\240\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0]K\n\0\10\0kK\n\0\10\0lK\n\0\0\240\0\0\0\0\0\0\0\0\0\0\
0\0\0\0\0\0mK\n\0\10\0\201K\n\0\f\200\1\0\0\0\274\2\0\0\2\0\0\0\17\0\0\0\272\2\260\26\344\273\202K\n\0\10\0\20
4K\n\0\f\200\1\0\0\0\274\2\0\0\2\0\0\0Q\0\0\0\261\21\335\247\32\363\207K\n\0"..., 
470 <unfinished ...>
28461 <... lchown32 resumed> )          = 0
28445 <... write resumed> )             = -1 EPIPE (Broken pipe)
28461 chmod("...", 0660 <unfinished ...>
28445 --- SIGPIPE (Broken pipe) @ 0 (0) ---
28461 <... chmod resumed> )             = 0
28461 rename("..." <unfinished ...>
28445 write(2, "rsync: writefd_unbuffered failed to write 470 bytes: phase 
\"unknown\" [generator]: Broken pip
e (32)\n", 99) = 99
28445 rt_sigaction(SIGUSR1, {SIG_IGN}, {0x80570b0, [USR1], SA_RESTART}, 8) = 0
28445 rt_sigaction(SIGUSR2, {SIG_IGN}, {0x80570e0, [USR2], SA_RESTART}, 8) = 0
28445 waitpid(28446, 0xbfffd5e8, WNOHANG) = 0
28445 getpid()                          = 28445
28445 kill(28446, SIGUSR1)              = -1 ESRCH (No such process)
28445 kill(28461, SIGUSR1)              = 0
28445 write(2, "rsync error: error in rsync protocol data stream (code 12) at 
io.c(1099)\n", 73) = 73
28461 <... rename resumed> )            = 0
28461 --- SIGUSR1 (User defined signal 1) @ 0 (0) ---
28461 kill(28446, SIGUSR1)              = -1 ESRCH (No such process)
28461 write(2, "rsync error: error in rsync protocol data stream (code 12) at 
io.c(434)\n", 72) = 72
28445 munmap(0xb7dc2000, 4096)          = 0
28445 exit_group(12)                    = ?
28461 munmap(0xb7dc2000, 4096)          = 0
28461 exit_group(12)     


on the other side:


  30731 gettimeofday({1127319163, 408316}, NULL) = 0
30731 select(0, NULL, NULL, NULL, {0, 118261}) = 0 (Timeout)
30731 gettimeofday({1127319163, 527315}, NULL) = 0
30731 select(2, NULL, [1], NULL, {60, 0}) = 1 (out [1], left {60, 0})
30731 write(1, 
"\374\17\0\7YHu5IWPt7YSODINB28eNA0BcJ6HoK66iYiaDg4CPSE7PDIKE4GQnbGw\ntLOPFQf8tYcWDjEN7Z/+BgJoe5
eKs7aT8pz2ZRwDHRoLAx5r+0zV5X/x6ZOSbx5tlId3GeWdmoyG\n6mgAasFe22pkl5PhFGz7bRH0wWFGVuHlYVxqGhQRYnBl9lL6hJH2np7l4/
vk4HH"..., 4096) = 4096
30731 gettimeofday({1127319163, 527704}, NULL) = 0
30731 select(2, NULL, [1], NULL, {60, 0}) = 1 (out [1], left {60, 0})
30731 write(1, 
"\374\17\0\00779EQFOmf8Gpnk+MUFkjo52wXYVJ5Wpuf5pmcQuDuaRxmbQUe\nFRUUQXXhhOrylpX24v0VY2UGAVX0Foe
I6J20hH3i5RIHHgkKBQUPEezQ7oCEh4mCn4HrBgcSDjcP\nDQh+uqWGiqSykfUdDjYwOicxDTMGgYC1vryyvLSNj/0yOTY/Jz40NgKzqqq4pq6
7mDQkIi"..., 4096) = 4096
30731 gettimeofday({1127319163, 528021}, NULL) = 0
30731 select(2, NULL, [1], NULL, {60, 0}) = 1 (out [1], left {60, 0})
30731 write(1, 
"\374\17\0\7CSnnVzmxZ5HT2/3Fn4Ftvl3dlkHFK9d/F9tV8\n9WJP/xRY/2PA5nJU2PrYR+D313D8dGP9z3t0/mRU+mXy
dtT/ZFRdVFjEw1TCwf8W8ZIXzukR5cBP\nZ1XnaVjgY0TX9fVpk/9r5u0Q358bw+kU2/1s3f5aUPF88M8TlOQflukF7ZMazu5y8vPGWPPPkpxL
\nfMMV"..., 4096) = 4096
30731 gettimeofday({1127319163, 528323}, NULL) = 0
30731 select(0, NULL, NULL, NULL, {0, 118213}) = 0 (Timeout)
30731 gettimeofday({1127319163, 647304}, NULL) = 0
30731 select(2, NULL, [1], NULL, {60, 0}) = 1 (out [1], left {60, 0})
30731 write(1, 
"\374\17\0\7Xs1WNuZEpmEAUHBRFH4pXp6JCa\nhISe6MZgEBweGgQEH2h39uKVlpCSk+v9VHZlZH1Ux9vE9+Tg4Ob80nR
4YhQRFGtsYGRyQMT94JWW\nl+rp6e37wER/ZmNvaWxnTtv3/fH3/vrz3E1+Z2Vycnt4Tsfy/vz88vL2zttZd3VS3drDzsDX191S\nTnN/eGV/d
EVX383"..., 4096) = -1 EPIPE (Broken pipe)
30731 --- SIGPIPE (Broken pipe) @ 0 (0) ---
30731 write(2, "rsync: writefd_unbuffered failed to write 4096 bytes: phase 
\"unknown\" [sender]: Broken pipe
(32)\n", 97) = -1 EPIPE (Broken pipe)
30731 --- SIGPIPE (Broken pipe) @ 0 (0) ---
30731 rt_sigaction(SIGUSR1, {SIG_IGN}, {0x80570b0, [USR1], SA_RESTART}, 8) = 0
30731 rt_sigaction(SIGUSR2, {SIG_IGN}, {0x80570e0, [USR2], SA_RESTART}, 8) = 0
30731 write(2, "rsync error: errors with program diagnostics (code 13) at 
log.c(254)\n", 69) = -1 EPIPE (Broke
n pipe)
30731 --- SIGPIPE (Broken pipe) @ 0 (0) ---
30731 rt_sigaction(SIGUSR1, {SIG_IGN}, {SIG_IGN}, 8) = 0
30731 rt_sigaction(SIGUSR2, {SIG_IGN}, {SIG_IGN}, 8) = 0
   30731 write(2, "rsync error: errors with program diagnostics (code 13) at 
log.c(254)\n", 69) = -1 EPIPE (Broke
n pipe)
30731 --- SIGPIPE (Broken pipe) @ 0 (0) ---
30731 rt_sigaction(SIGUSR1, {SIG_IGN}, {SIG_IGN}, 8) = 0
30731 rt_sigaction(SIGUSR2, {SIG_IGN}, {SIG_IGN}, 8) = 0
30731 write(2, "rsync error: errors with program diagnostics (code 13) at 
log.c(254)\n", 69) = -1 EPIPE (Broke
n pipe)
30731 --- SIGPIPE (Broken pipe) @ 0 (0) ---
30731 rt_sigaction(SIGUSR1, {SIG_IGN}, {SIG_IGN}, 8) = 0
30731 rt_sigaction(SIGUSR2, {SIG_IGN}, {SIG_IGN}, 8) = 0
30731 write(2, "rsync error: errors with program diagnostics (code 13) at 
log.c(254)\n", 69) = -1 EPIPE (Broke
n pipe)
30731 --- SIGPIPE (Broken pipe) @ 0 (0) ---
[...]
 30731 rt_sigaction(SIGUSR1, {SIG_IGN}, {SIG_IGN}, 8) = 0
30731 rt_sigaction(SIGUSR2, {SIG_IGN}, {SIG_IGN}, 8) = 0
30731 write(2, "rsync error: errors with program diagnostics (code 13) at 
log.c(254)\n", 69) = -1 EPIPE (Broke
n pipe)
30731 --- SIGPIPE (Broken pipe) @ 0 (0) ---
30731 exit_group(13)                    = ?
       

-- 
Arkadiusz Miśkiewicz                    PLD/Linux Team
http://www.t17.ds.pwr.wroc.pl/~misiek/  http://ftp.pld-linux.org/


More information about the rsync mailing list