[distcc] timeouts in connect and gethostbyname
Oscar Esteban
flesteban at mi.madritel.es
Sun Oct 6 19:09:00 GMT 2002
I enclose a patch regarding some "to-do's" in the code. They give the
possibility of aborting a connect if it takes too long, as well as a
gethostbyname.
The second part has changed due to the last patch by Andreas Granig
regarding one hostname -> multiple IPs.
The associated tests do work, but should be more generic. If anyone has
any better idea to test the "connect timeout" (Connect_Dead_Host_Case),
please say so.
Also the gethostbyname test case should be rewritten: it works for me when
I'm not online. But when I am, named immediately returns 'host not found'.
So the case must have a running DNS server without possibility of
connecting other servers. Not something to be simulated in a script...
Oscar
Index: src/clinet.c
===================================================================
RCS file: /cvsroot/distcc/src/clinet.c,v
retrieving revision 1.26
diff -w -u -r1.26 clinet.c
--- src/clinet.c 30 Sep 2002 13:01:20 -0000 1.26
+++ src/clinet.c 6 Oct 2002 18:25:06 -0000
@@ -36,11 +36,11 @@
#include <string.h>
#include <fcntl.h>
#include <errno.h>
-#include <signal.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/socket.h>
+#include <sys/time.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
@@ -70,11 +70,66 @@
**/
+/**
+ * Simulate connect behaviour, but adding a timeout. If the connection is not
+ * ended in this time, return ETIMEDOUT
+ **/
+static int dcc_connect_tout (int fd, struct sockaddr *addr, int size, int time_to_wait)
+{
+ int got_an_error = 0;
+ int fcntl_previous;
+
+ /* Keep flags associated with the fd */
+ fcntl_previous = fcntl(fd, F_GETFL, 0);
+
+ /* Set the fd to be non-blocking */
+ fcntl(fd, F_SETFL, fcntl_previous | O_NONBLOCK);
+
+ if (connect(fd, addr, size)) {
+ /* If got out in a hurry, but hasn't yet finished... */
+ if (EINPROGRESS == errno) {
+ struct timeval tv;
+ fd_set wfds;
+ int valret;
+
+ FD_ZERO(&wfds); /* Clear write filedescriptor set */
+ FD_SET(fd, &wfds); /* Add fd to the set */
+ tv.tv_sec = time_to_wait / 1000; /* Set time to wait */
+ tv.tv_usec = 1000 * (time_to_wait % 1000);
+
+ valret = select(fd+1, NULL, &wfds, NULL, &tv);
+ if (valret) {
+ /* We got a result in time */
+
+ int lbuf = sizeof(valret);
+
+ /* Now, let's obtain the real error */
+ getsockopt(fd, SOL_SOCKET, SO_ERROR, &valret, &lbuf);
+ if (valret) {
+ got_an_error = -1;
+
+ errno = valret; /* Do as real connect would have done */
+ }
+ } else {
+ /* Time has passed with no result. */
+ rs_log_error("connect timed out on fd%d\n", fd);
+ /* We'll fake some error code into errno, TIMEOUT seems ok */
+ errno = ETIMEDOUT;
+ got_an_error = -1;
+ }
+ } else {
+ got_an_error = -1;
+ }
+ }
+
+ /* Restore flags saved before */
+ fcntl(fd, F_SETFL, fcntl_previous);
+
+ return got_an_error;
+}
/**
* Open a socket to a tcp remote host with the specified port.
- *
- * @todo Don't try for too long to connect.
**/
int dcc_open_socket_out(const struct in_addr *in, int port, int *p_fd)
{
@@ -96,7 +151,8 @@
sock_out.sin_port = htons((in_port_t) port);
sock_out.sin_family = PF_INET;
- if (connect(fd, (struct sockaddr *) &sock_out, (int) sizeof(sock_out))) {
+ if (dcc_connect_tout(fd, (struct sockaddr *) &sock_out,
+ (int) sizeof(sock_out), CONNECT_TIMEOUT)) {
rs_log_error("failed to connect to %s port %d: %s", inet_ntoa(*in), port,
strerror(errno));
(void) close(fd);
Index: src/clinet.h
===================================================================
RCS file: /cvsroot/distcc/src/clinet.h,v
retrieving revision 1.2
diff -w -u -r1.2 clinet.h
--- src/clinet.h 30 Sep 2002 13:01:20 -0000 1.2
+++ src/clinet.h 6 Oct 2002 18:25:06 -0000
@@ -21,5 +21,7 @@
* USA
*/
+#define CONNECT_TIMEOUT 2000 /* Time given in milliseconds */
+
int dcc_open_socket_out(const struct in_addr *in,
int port, int *fd);
Index: src/where.c
===================================================================
RCS file: /cvsroot/distcc/src/where.c,v
retrieving revision 1.33
diff -w -u -r1.33 where.c
--- src/where.c 30 Sep 2002 13:01:20 -0000 1.33
+++ src/where.c 6 Oct 2002 18:25:06 -0000
@@ -25,6 +25,8 @@
/* His hand is stretched out, and who shall turn it
* back? -- Isaiah 14:27 */
+#define DNS_TIMEOUT 2000 /* Time given in milliseconds, but
+ it'd better be multiple of 1000 */
/**
* @file
@@ -77,6 +79,8 @@
#include <fcntl.h>
#include <errno.h>
#include <time.h>
+#include <setjmp.h>
+#include <signal.h>
#include <sys/stat.h>
#include <sys/file.h>
@@ -92,6 +96,43 @@
#include "tempfile.h"
#include "lock.h"
+/**
+ * alarm handler for implementing gethostbyname timeout
+ **/
+static jmp_buf time_to_wakeup;
+static void dcc_alarm_handler()
+{
+ longjmp(time_to_wakeup, 1);
+}
+
+struct hostent * dcc_gethostbyname(const char *host)
+{
+ void *prev_signal;
+ struct hostent *hp;
+
+ alarm(0); /* Let's be sure we do not get a signal before setjmp */
+
+ prev_signal = signal(SIGALRM, dcc_alarm_handler);
+ if (!setjmp(time_to_wakeup)) {
+ /* we came here from the setjmp, proceed normally */
+ alarm(DNS_TIMEOUT / 1000); /* Program the alarm */
+ hp = gethostbyname(host);
+ alarm(0); /* Finished in time, disconnect the alarm */
+ signal(SIGALRM, prev_signal); /* and restore previous behaviour */
+
+ if (!hp) {
+ rs_log_error("unknown host: \"%s\"", host);
+ }
+ } else {
+ /* we came here from a longjump -> we got the SIGALRM */
+ signal(SIGALRM, prev_signal); /* restore previous behaviour */
+
+ rs_log_error("timeout looking for host: \"%s\"\n", host);
+ hp = NULL;
+ }
+
+ return hp;
+}
/* TODO: Write a test harness for the host selection algorithm.
* Perhaps a really simple simulation of machines taking different
@@ -122,7 +163,7 @@
for (i_try = 0; i_try < 50; i_try++) {
for (h = hostlist; h; h = h->next) {
/* resolve hostname and fetch all proper IPs. */
- hp = gethostbyname(h->hostname);
+ hp = dcc_gethostbyname(h->hostname);
if(!hp)
continue;
Index: test/testdistcc.py
===================================================================
RCS file: /cvsroot/distcc/test/testdistcc.py,v
retrieving revision 1.64
diff -w -u -r1.64 testdistcc.py
--- test/testdistcc.py 30 Sep 2002 05:44:54 -0000 1.64
+++ test/testdistcc.py 6 Oct 2002 18:25:06 -0000
@@ -696,6 +696,79 @@
def tearDown(self):
self.leaveRundir()
+class dcc_connect_timeout(CompileHello_Case):
+ "base for dcc_connect_timeout test cases"
+
+ def setUp(self, host_list):
+ CompileHello_Case.setUp(self)
+
+ os.environ['DISTCC_HOSTS'] = host_list
+ self.distcc_log = 'distcc.log'
+ os.environ['DISTCC_LOG'] = self.distcc_log
+
+ def runTest(self, string_to_match):
+ self.run_cmd("distcc gcc -c -o testtmp.o testtmp.c")
+ msgs = open(self.distcc_log, 'r').read()
+ self.assertReMatch(string_to_match, msgs)
+
+"The case 'got connection' is tested in every other test"
+
+class Connect_Dead_Host_Case(dcc_connect_timeout):
+ """Known host, but it does not answer"""
+
+ def GetHostAddress(self):
+ output = self.run_cmd("/sbin/ifconfig").split('\n')
+
+ skipping = 0
+ for line in output:
+ if re.match(r'^lo\s+', line):
+ skipping = 1
+ continue
+ if skipping:
+ if re.match(r'^$', line):
+ skipping = 0
+ continue
+ "TODO: adapt for IPv6 notation if different"
+ result = re.search(r'addr:(\d+(?:\.\d+)*).*Mask:(\d+(?:\.\d+)*)',
+ line, re.I)
+
+ if result:
+ """Extract address and mask, and make up an address,
+ hoping that it does not exist. Any better idea?"""
+ addr = map(int, result.group(1).split("."))
+ mask = map(int, result.group(2).split("."))
+ for i in xrange(len(addr)):
+ addr[i] ^= (0x85 & ~mask[i])
+
+ return ".".join(map(repr, addr))
+
+ return ""
+
+ def setUp(self):
+ self.host_list = self.GetHostAddress()
+ dcc_connect_timeout.setUp(self, self.host_list)
+
+ def runTest(self):
+ assert self.host_list != "", "Can not find a network interface excluding localhost"
+ dcc_connect_timeout.runTest(self, r'connect timed out')
+
+class Connect_Invalid_Port_Case(dcc_connect_timeout):
+ """Known host, but closed port
+ It should be nice to check for 'connection refused', but due to
+ localization, the message will be different in every case."""
+ def setUp(self):
+ dcc_connect_timeout.setUp(self, 'localhost:9999')
+
+ def runTest(self):
+ dcc_connect_timeout.runTest(self, r'failed to connect')
+
+class Connect_Invalid_Server_Name_Case(dcc_connect_timeout):
+ """Invalid server name"""
+ def setUp(self):
+ dcc_connect_timeout.setUp (self, 'no.such.host.here')
+
+ def runTest(self):
+ dcc_connect_timeout.runTest(self, r'timeout looking for host')
class ImpliedOutput_Case(CompileHello_Case):
"""Test handling absence of -o"""
@@ -883,4 +956,7 @@
ModeBits_Case,
Concurrent_Case,
ThousandFold_Case,
+ Connect_Dead_Host_Case,
+ Connect_Invalid_Port_Case,
+ Connect_Invalid_Server_Name_Case,
BigAssFile_Case])
More information about the distcc
mailing list