[distcc] timeouts in connect and gethostbyname

Oscar Esteban flesteban at mi.madritel.es
Sun Oct 6 19:09:00 GMT 2002


I enclose a patch regarding some "to-do's" in the code. They give the
possibility of aborting a connect if it takes too long, as well as a
gethostbyname.
The second part has changed due to the last patch by Andreas Granig
regarding one hostname -> multiple IPs.

The associated tests do work, but should be more generic. If anyone has
any better idea to test the "connect timeout" (Connect_Dead_Host_Case),
please say so.

Also the gethostbyname test case should be rewritten: it works for me when
I'm not online. But when I am, named immediately returns 'host not found'.
So the case must have a running DNS server without possibility of
connecting other servers. Not something to be simulated in a script...

	Oscar

Index: src/clinet.c
===================================================================
RCS file: /cvsroot/distcc/src/clinet.c,v
retrieving revision 1.26
diff -w -u -r1.26 clinet.c
--- src/clinet.c	30 Sep 2002 13:01:20 -0000	1.26
+++ src/clinet.c	6 Oct 2002 18:25:06 -0000
@@ -36,11 +36,11 @@
 #include <string.h>
 #include <fcntl.h>
 #include <errno.h>
-#include <signal.h>
 
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/socket.h>
+#include <sys/time.h>
 
 #include <netinet/in.h>
 #include <netinet/tcp.h>
@@ -70,11 +70,66 @@
  **/
 
 
+/**
+ * Simulate connect behaviour, but adding a timeout. If the connection is not
+ * ended in this time, return ETIMEDOUT
+ **/
+static int dcc_connect_tout (int fd, struct sockaddr *addr, int size, int time_to_wait)
+{
+  int got_an_error = 0;
+  int fcntl_previous;
+
+  /* Keep flags associated with the fd */
+  fcntl_previous = fcntl(fd, F_GETFL, 0);
+
+  /* Set the fd to be non-blocking */
+  fcntl(fd, F_SETFL, fcntl_previous | O_NONBLOCK);
+
+  if (connect(fd, addr, size)) {
+    /* If got out in a hurry, but hasn't yet finished... */
+    if (EINPROGRESS == errno) {
+      struct timeval tv;
+      fd_set wfds;
+      int valret;
+
+      FD_ZERO(&wfds);       /* Clear write filedescriptor set */
+      FD_SET(fd, &wfds);    /* Add fd to the set */
+      tv.tv_sec  = time_to_wait / 1000;       /* Set time to wait */
+      tv.tv_usec = 1000 * (time_to_wait % 1000);
+
+      valret = select(fd+1, NULL, &wfds, NULL, &tv);
+      if (valret) {
+        /* We got a result in time */
+
+        int lbuf = sizeof(valret);
+
+        /* Now, let's obtain the real error */
+        getsockopt(fd, SOL_SOCKET, SO_ERROR, &valret, &lbuf);
+        if (valret) {
+          got_an_error = -1;
+
+          errno = valret; /* Do as real connect would have done */
+        }
+      } else {
+        /* Time has passed with no result. */
+        rs_log_error("connect timed out on fd%d\n", fd);
+        /* We'll fake some error code into errno, TIMEOUT seems ok */
+        errno = ETIMEDOUT;
+        got_an_error = -1;
+      }
+    } else {
+      got_an_error = -1;
+    }
+  }
+
+  /* Restore flags saved before */
+  fcntl(fd, F_SETFL, fcntl_previous);
+
+  return got_an_error;
+}
 
 /**
  * Open a socket to a tcp remote host with the specified port.
- *
- * @todo Don't try for too long to connect. 
  **/
 int dcc_open_socket_out(const struct in_addr *in, int port, int *p_fd)
 {
@@ -96,7 +151,8 @@
     sock_out.sin_port = htons((in_port_t) port);
     sock_out.sin_family = PF_INET;
 
-    if (connect(fd, (struct sockaddr *) &sock_out, (int) sizeof(sock_out))) {
+    if (dcc_connect_tout(fd, (struct sockaddr *) &sock_out,
+                         (int) sizeof(sock_out), CONNECT_TIMEOUT)) {
         rs_log_error("failed to connect to %s port %d: %s", inet_ntoa(*in), port, 
                      strerror(errno));
 	(void) close(fd);
Index: src/clinet.h
===================================================================
RCS file: /cvsroot/distcc/src/clinet.h,v
retrieving revision 1.2
diff -w -u -r1.2 clinet.h
--- src/clinet.h	30 Sep 2002 13:01:20 -0000	1.2
+++ src/clinet.h	6 Oct 2002 18:25:06 -0000
@@ -21,5 +21,7 @@
  * USA
  */
 
+#define CONNECT_TIMEOUT 2000   /* Time given in milliseconds */
+
 int dcc_open_socket_out(const struct in_addr *in,
                         int port, int *fd);
Index: src/where.c
===================================================================
RCS file: /cvsroot/distcc/src/where.c,v
retrieving revision 1.33
diff -w -u -r1.33 where.c
--- src/where.c	30 Sep 2002 13:01:20 -0000	1.33
+++ src/where.c	6 Oct 2002 18:25:06 -0000
@@ -25,6 +25,8 @@
                 /* His hand is stretched out, and who shall turn it
                  * back?  -- Isaiah 14:27 */
 
+#define DNS_TIMEOUT     2000   /* Time given in milliseconds, but
+				  it'd better be multiple of 1000 */
     
 /**
  * @file
@@ -77,6 +79,8 @@
 #include <fcntl.h>
 #include <errno.h>
 #include <time.h>
+#include <setjmp.h>
+#include <signal.h>
 
 #include <sys/stat.h>
 #include <sys/file.h>
@@ -92,6 +96,43 @@
 #include "tempfile.h"
 #include "lock.h"
 
+/**
+ * alarm handler for implementing gethostbyname timeout
+ **/
+static jmp_buf time_to_wakeup;
+static void dcc_alarm_handler()
+{
+  longjmp(time_to_wakeup, 1);
+}
+
+struct hostent * dcc_gethostbyname(const char *host)
+{
+  void *prev_signal;
+  struct hostent *hp;
+
+  alarm(0); /* Let's be sure we do not get a signal before setjmp */
+
+  prev_signal = signal(SIGALRM, dcc_alarm_handler);
+  if (!setjmp(time_to_wakeup)) {
+    /* we came here from the setjmp, proceed normally */
+    alarm(DNS_TIMEOUT / 1000); /* Program the alarm */
+    hp = gethostbyname(host);
+    alarm(0);             /* Finished in time, disconnect the alarm */
+    signal(SIGALRM, prev_signal); /* and restore previous behaviour */
+
+    if (!hp) {
+      rs_log_error("unknown host: \"%s\"", host);
+    }
+  } else {
+    /* we came here from a longjump -> we got the SIGALRM */
+    signal(SIGALRM, prev_signal); /* restore previous behaviour */
+
+    rs_log_error("timeout looking for host: \"%s\"\n", host);
+    hp = NULL;
+  }
+
+  return hp;
+}
 
 /* TODO: Write a test harness for the host selection algorithm.
  * Perhaps a really simple simulation of machines taking different
@@ -122,7 +163,7 @@
     for (i_try = 0; i_try < 50; i_try++) {
         for (h = hostlist; h; h = h->next) {
             /* resolve hostname and fetch all proper IPs. */
-            hp = gethostbyname(h->hostname);
+            hp = dcc_gethostbyname(h->hostname);
             if(!hp)
                 continue;
 
Index: test/testdistcc.py
===================================================================
RCS file: /cvsroot/distcc/test/testdistcc.py,v
retrieving revision 1.64
diff -w -u -r1.64 testdistcc.py
--- test/testdistcc.py	30 Sep 2002 05:44:54 -0000	1.64
+++ test/testdistcc.py	6 Oct 2002 18:25:06 -0000
@@ -696,6 +696,79 @@
     def tearDown(self):
         self.leaveRundir()
 
+class dcc_connect_timeout(CompileHello_Case):
+    "base for dcc_connect_timeout test cases"
+
+    def setUp(self, host_list):
+        CompileHello_Case.setUp(self)
+
+        os.environ['DISTCC_HOSTS'] = host_list
+        self.distcc_log = 'distcc.log'
+        os.environ['DISTCC_LOG'] = self.distcc_log
+
+    def runTest(self, string_to_match):
+        self.run_cmd("distcc gcc -c -o testtmp.o testtmp.c")
+        msgs = open(self.distcc_log, 'r').read()
+        self.assertReMatch(string_to_match, msgs)            
+
+"The case 'got connection' is tested in every other test"
+
+class Connect_Dead_Host_Case(dcc_connect_timeout):
+    """Known host, but it does not answer"""
+
+    def GetHostAddress(self):
+        output = self.run_cmd("/sbin/ifconfig").split('\n')
+
+        skipping = 0
+        for line in output:
+            if re.match(r'^lo\s+', line):
+                skipping = 1
+                continue
+            if skipping:
+                if re.match(r'^$', line):
+                    skipping = 0
+                continue
+            "TODO: adapt for IPv6 notation if different"
+            result = re.search(r'addr:(\d+(?:\.\d+)*).*Mask:(\d+(?:\.\d+)*)',
+                               line, re.I)
+
+            if result:
+                """Extract address and mask, and make up an address,
+                   hoping that it does not exist. Any better idea?"""
+                addr = map(int, result.group(1).split("."))
+                mask = map(int, result.group(2).split("."))
+                for i in xrange(len(addr)):
+                    addr[i] ^= (0x85 & ~mask[i])
+                        
+                return ".".join(map(repr, addr))
+
+        return ""
+
+    def setUp(self):
+        self.host_list = self.GetHostAddress()
+        dcc_connect_timeout.setUp(self, self.host_list)
+    
+    def runTest(self):
+        assert self.host_list != "", "Can not find a network interface excluding localhost"
+        dcc_connect_timeout.runTest(self, r'connect timed out')
+
+class Connect_Invalid_Port_Case(dcc_connect_timeout):
+    """Known host, but closed port
+       It should be nice to check for 'connection refused', but due to
+       localization, the message will be different in every case."""
+    def setUp(self):
+        dcc_connect_timeout.setUp(self, 'localhost:9999')
+    
+    def runTest(self):
+        dcc_connect_timeout.runTest(self, r'failed to connect')
+
+class Connect_Invalid_Server_Name_Case(dcc_connect_timeout):
+    """Invalid server name"""
+    def setUp(self):
+        dcc_connect_timeout.setUp (self, 'no.such.host.here')
+    
+    def runTest(self):
+        dcc_connect_timeout.runTest(self, r'timeout looking for host')
         
 class ImpliedOutput_Case(CompileHello_Case):
     """Test handling absence of -o"""
@@ -883,4 +956,7 @@
                              ModeBits_Case,
                              Concurrent_Case,
                              ThousandFold_Case,
+			     Connect_Dead_Host_Case,
+			     Connect_Invalid_Port_Case,
+			     Connect_Invalid_Server_Name_Case,
                              BigAssFile_Case])





More information about the distcc mailing list