[distcc] [PATCH] Adding host list randomization

Josh Hyman josh.hyman at gmail.com
Fri May 28 23:33:00 GMT 2004


In http://lists.samba.org/archive/distcc/2004q2/002250.html, Dan
proposed randomizing the hosts list.  Here's a patch to add that
feature, controlled by a special --randomize option in the hosts list.
On a large shared build cluster, this helps spread the load
without requiring separate hosts lists for each user.
Martin, what do you think?

-- Josh

--- distcc-2.14/man/distcc.1.dist    2004-05-27 17:15:27.000000000 -0700
+++ distcc-2.14/man/distcc.1    2004-05-27 18:07:52.000000000 -0700
@@ -221,7 +221,9 @@
 distcc prefers hosts towards the start of the list, so machines should
 be listed in descending order of speed.  In particular, when only a
 single compilation can be run (such as from a configure script), the
-first machine listed is used.
+first machine listed is used (but see
+.I --randomize
+below).
 .PP
 Placing
 .I localhost
@@ -235,6 +237,14 @@
 client is less than one fifth of the total, then the client should be
 left out of the list.
 .PP
+If you have a large shared build cluster and a single shared hosts file,
+the above rules would cause the first few machines in the hosts
+file to be tried first even though they are likely to be busier than machines
+later in the list.  To avoid this, place the keyword
+.I --randomize
+into the host list.  This will cause the host list to be randomized,
+which should improve performance slightly for large build clusters.
+.PP
 Performance depends on the details of the source and makefiles used
 for the project, and the machine and network speeds.  Experimenting
 with different settings for the host list and -j factor may improve
@@ -245,6 +255,7 @@
 .nf
   DISTCC_HOSTS = HOSTSPEC ...
   HOSTSPEC = LOCAL_HOST | SSH_HOST | TCP_HOST | OLDSTYLE_TCP_HOST
+                        | GLOBAL_OPTION
   LOCAL_HOST = localhost[/LIMIT]
   SSH_HOST = [USER]@HOSTID[/LIMIT][:COMMAND][OPTIONS]
   TCP_HOST = HOSTID[:PORT][/LIMIT][OPTIONS]
@@ -252,6 +263,7 @@
   HOSTID = HOSTNAME | IPV4
   OPTIONS = ,OPTION[OPTIONS]
   OPTION = lzo
+  GLOBAL_OPTION = --randomize
 .fi
 .PP
 Here are some individual examples of the syntax:
@@ -298,6 +310,9 @@
 .TP
 .B ,lzo
 Enables LZO compression for this TCP or SSH host.
+.TP
+.B --randomize
+Randomize the host list before execution.
 .PP
 Here is an example demonstrating some possibilities:
 .PP
--- distcc-2.14/src/hosts.c.dist    2004-05-25 16:21:27.000000000 -0700
+++ distcc-2.14/src/hosts.c    2004-05-28 10:41:25.000000000 -0700
@@ -4,6 +4,9 @@
  *
  * Copyright (C) 2002, 2003, 2004 by Martin Pool <mbp at samba.org>
  *
+ * dcc_randomize_host_list() and friends:
+ *   Copyright (C) 2004 by Google (Josh Hyman <joshh at google.com>)
+ *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
  * published by the Free Software Foundation; either version 2 of the
@@ -36,6 +39,7 @@
  *
   DISTCC_HOSTS = HOSTSPEC ...
   HOSTSPEC = LOCAL_HOST | SSH_HOST | TCP_HOST | OLDSTYLE_TCP_HOST
+                        | GLOBAL_OPTION
   LOCAL_HOST = localhost[/LIMIT]
   SSH_HOST = [USER]@HOSTID[/LIMIT][:COMMAND][OPTIONS]
   TCP_HOST = HOSTID[:PORT][/LIMIT][OPTIONS]
@@ -43,6 +47,7 @@
   HOSTID = HOSTNAME | IPV4
   OPTIONS = ,OPTION[OPTIONS]
   OPTION = lzo
+  GLOBAL_OPTION = --randomize
  *
  * Any amount of whitespace may be present between hosts.
  *
@@ -89,6 +94,8 @@
 #include <errno.h>
 #include <time.h>
 #include <ctype.h>
+#include <sys/time.h>
+#include <sys/types.h>

 #include "distcc.h"
 #include "trace.h"
@@ -100,6 +107,18 @@

 const int dcc_default_port = DISTCC_DEFAULT_PORT;

+/***
+ * A simple container which would hold a host -> rand int pair
+ ***/
+struct rand_container {
+    struct dcc_hostdef *host;
+    int rand;
+};
+
+int dcc_randomize_host_list(struct dcc_hostdef **host_list, int length);
+
+int dcc_compare_container(const void *a, const void *b);
+

 #ifndef HAVE_STRNDUP
 /**
@@ -348,7 +367,7 @@
                     struct dcc_hostdef **ret_list,
                     int *ret_nhosts)
 {
-    int ret;
+    int ret, flag_randomize = 0;
     struct dcc_hostdef *prev, *curr;

     /* TODO: Check for '/' in places where it might cause trouble with
@@ -390,6 +409,13 @@
         token_start = where;
         token_len = strcspn(where, " #\t\n\f");

+        /* intercept keywords which are not actually hosts */
+        if (!strncmp(token_start, "--randomize", 11)) {
+            flag_randomize = 1;
+            where = token_start + token_len;
+            continue;
+        }
+
         /* Allocate new list item */
         curr = calloc(1, sizeof(struct dcc_hostdef));
         if (!curr) {
@@ -441,6 +467,9 @@
     }

     if (*ret_nhosts) {
+        if (flag_randomize)
+            if ((ret = dcc_randomize_host_list(ret_list, *ret_nhosts)) != 0)
+                return ret;
         return 0;
     } else {
         rs_log_warning("%s contained no hosts; can't distribute
work", source_name);
@@ -448,6 +477,68 @@
     }
 }

+int dcc_compare_container(const void *a, const void *b)
+{
+    struct rand_container *i, *j;
+    i = (struct rand_container *) a;
+    j = (struct rand_container *) b;
+
+    if (i->rand == j->rand)
+        return 0;
+    else if (i->rand > j->rand)
+        return 1;
+    else
+        return -1;
+}
+
+int dcc_randomize_host_list(struct dcc_hostdef **host_list, int length)
+{
+    int i, ret;
+    unsigned int rand_val;
+    struct dcc_hostdef *curr;
+    struct rand_container *c;
+    struct timeval tv;
+
+    c = malloc(length * sizeof(struct rand_container));
+    if (!c) {
+        rs_log_crit("failed to allocate host definition");
+        return EXIT_OUT_OF_MEMORY;
+    }
+
+#ifdef HAVE_GETTIMEOFDAY
+    if ((ret = gettimeofday(&tv, NULL)) == 0)
+        rand_val = (unsigned int) tv.tv_usec;
+    else
+#else
+        rand_val = (unsigned int) time(NULL) ^ (unsigned int) getpid();
+#endif
+
+    /* create pairs of hosts -> random numbers */
+    srand(rand_val);
+    curr = *host_list;
+    for (i = 0; i < length; i++) {
+        c[i].host = curr;
+        c[i].rand = rand();
+        curr = curr->next;
+    }
+
+    /* sort */
+    qsort(c, length, sizeof(struct rand_container), &dcc_compare_container);
+
+    /* reorder the list */
+    for (i = 0; i < length; i++) {
+        if (i != length - 1)
+            c[i].host->next = c[i+1].host;
+        else
+            c[i].host->next = NULL;
+    }
+
+    /* move the start of the list */
+    *host_list = c[0].host;
+
+    free(c);
+    return 0;
+}

 int dcc_free_hostdef(struct dcc_hostdef *host)
 {
--- distcc-2.14/src/distcc.c.dist    2004-05-27 17:11:07.000000000 -0700
+++ distcc-2.14/src/distcc.c    2004-05-27 17:12:38.000000000 -0700
@@ -105,6 +105,7 @@
 "   HOST:PORT                  TCP connection, specified port\n"
 "   @HOST                      SSH connection\n"
 "   USER at HOST                  SSH connection to specified host\n"
+"   --randomize                Randomize the server list before execution\n"
 "\n"
 "distcc distributes compilation jobs across volunteer machines running\n"
 "distccd.  Jobs that cannot be distributed, such as linking or \n"
--- distcc-2.14/NEWS.dist    2004-05-28 11:35:08.000000000 -0700
+++ distcc-2.14/NEWS    2004-05-28 11:40:43.000000000 -0700
@@ -13,6 +13,9 @@
     * distcc can now be built with a build directory separate from the
       source directory.  Patch from Dennis Henriksen.

+    * Added --randomize option to host list to help spread the load i
+      across large shared clusters.  From patch by Google
+      (Josh Hyman <joshh at google.com>).

   DOCUMENTATION:



More information about the distcc mailing list