[distcc] [PATCH] Adding host list randomization

Martin Pool mbp at sourcefrog.net
Fri May 28 23:46:48 GMT 2004


On 28 May 2004, Josh Hyman <josh.hyman at gmail.com> wrote:
> In http://lists.samba.org/archive/distcc/2004q2/002250.html, Dan
> proposed randomizing the hosts list.  Here's a patch to add that
> feature, controlled by a special --randomize option in the hosts list.
> On a large shared build cluster, this helps spread the load
> without requiring separate hosts lists for each user.
> Martin, what do you think?

Hi Josh, 

Thankyou for the patch.  It looks OK to me, except that I am not sure
if it should be an option specified in that way.  I wonder if it
should always be on?  Or at least, if it should always be on until we
have some kind of automatic weighting of faster machines.

Rather than the qsort I think it is OK to just do a simple shuffle.

Can I just check Google is OK for this to be released under the GPL?

> 
> -- Josh
> 
> --- distcc-2.14/man/distcc.1.dist    2004-05-27 17:15:27.000000000 -0700
> +++ distcc-2.14/man/distcc.1    2004-05-27 18:07:52.000000000 -0700
> @@ -221,7 +221,9 @@
>  distcc prefers hosts towards the start of the list, so machines should
>  be listed in descending order of speed.  In particular, when only a
>  single compilation can be run (such as from a configure script), the
> -first machine listed is used.
> +first machine listed is used (but see
> +.I --randomize
> +below).
>  .PP
>  Placing
>  .I localhost
> @@ -235,6 +237,14 @@
>  client is less than one fifth of the total, then the client should be
>  left out of the list.
>  .PP
> +If you have a large shared build cluster and a single shared hosts file,
> +the above rules would cause the first few machines in the hosts
> +file to be tried first even though they are likely to be busier than machines
> +later in the list.  To avoid this, place the keyword
> +.I --randomize
> +into the host list.  This will cause the host list to be randomized,
> +which should improve performance slightly for large build clusters.
> +.PP
>  Performance depends on the details of the source and makefiles used
>  for the project, and the machine and network speeds.  Experimenting
>  with different settings for the host list and -j factor may improve
> @@ -245,6 +255,7 @@
>  .nf
>    DISTCC_HOSTS = HOSTSPEC ...
>    HOSTSPEC = LOCAL_HOST | SSH_HOST | TCP_HOST | OLDSTYLE_TCP_HOST
> +                        | GLOBAL_OPTION
>    LOCAL_HOST = localhost[/LIMIT]
>    SSH_HOST = [USER]@HOSTID[/LIMIT][:COMMAND][OPTIONS]
>    TCP_HOST = HOSTID[:PORT][/LIMIT][OPTIONS]
> @@ -252,6 +263,7 @@
>    HOSTID = HOSTNAME | IPV4
>    OPTIONS = ,OPTION[OPTIONS]
>    OPTION = lzo
> +  GLOBAL_OPTION = --randomize
>  .fi
>  .PP
>  Here are some individual examples of the syntax:
> @@ -298,6 +310,9 @@
>  .TP
>  .B ,lzo
>  Enables LZO compression for this TCP or SSH host.
> +.TP
> +.B --randomize
> +Randomize the host list before execution.
>  .PP
>  Here is an example demonstrating some possibilities:
>  .PP
> --- distcc-2.14/src/hosts.c.dist    2004-05-25 16:21:27.000000000 -0700
> +++ distcc-2.14/src/hosts.c    2004-05-28 10:41:25.000000000 -0700
> @@ -4,6 +4,9 @@
>   *
>   * Copyright (C) 2002, 2003, 2004 by Martin Pool <mbp at samba.org>
>   *
> + * dcc_randomize_host_list() and friends:
> + *   Copyright (C) 2004 by Google (Josh Hyman <joshh at google.com>)
> + *
>   * This program is free software; you can redistribute it and/or
>   * modify it under the terms of the GNU General Public License as
>   * published by the Free Software Foundation; either version 2 of the
> @@ -36,6 +39,7 @@
>   *
>    DISTCC_HOSTS = HOSTSPEC ...
>    HOSTSPEC = LOCAL_HOST | SSH_HOST | TCP_HOST | OLDSTYLE_TCP_HOST
> +                        | GLOBAL_OPTION
>    LOCAL_HOST = localhost[/LIMIT]
>    SSH_HOST = [USER]@HOSTID[/LIMIT][:COMMAND][OPTIONS]
>    TCP_HOST = HOSTID[:PORT][/LIMIT][OPTIONS]
> @@ -43,6 +47,7 @@
>    HOSTID = HOSTNAME | IPV4
>    OPTIONS = ,OPTION[OPTIONS]
>    OPTION = lzo
> +  GLOBAL_OPTION = --randomize
>   *
>   * Any amount of whitespace may be present between hosts.
>   *
> @@ -89,6 +94,8 @@
>  #include <errno.h>
>  #include <time.h>
>  #include <ctype.h>
> +#include <sys/time.h>
> +#include <sys/types.h>
> 
>  #include "distcc.h"
>  #include "trace.h"
> @@ -100,6 +107,18 @@
> 
>  const int dcc_default_port = DISTCC_DEFAULT_PORT;
> 
> +/***
> + * A simple container which would hold a host -> rand int pair
> + ***/
> +struct rand_container {
> +    struct dcc_hostdef *host;
> +    int rand;
> +};
> +
> +int dcc_randomize_host_list(struct dcc_hostdef **host_list, int length);
> +
> +int dcc_compare_container(const void *a, const void *b);
> +
> 
>  #ifndef HAVE_STRNDUP
>  /**
> @@ -348,7 +367,7 @@
>                      struct dcc_hostdef **ret_list,
>                      int *ret_nhosts)
>  {
> -    int ret;
> +    int ret, flag_randomize = 0;
>      struct dcc_hostdef *prev, *curr;
> 
>      /* TODO: Check for '/' in places where it might cause trouble with
> @@ -390,6 +409,13 @@
>          token_start = where;
>          token_len = strcspn(where, " #\t\n\f");
> 
> +        /* intercept keywords which are not actually hosts */
> +        if (!strncmp(token_start, "--randomize", 11)) {
> +            flag_randomize = 1;
> +            where = token_start + token_len;
> +            continue;
> +        }
> +
>          /* Allocate new list item */
>          curr = calloc(1, sizeof(struct dcc_hostdef));
>          if (!curr) {
> @@ -441,6 +467,9 @@
>      }
> 
>      if (*ret_nhosts) {
> +        if (flag_randomize)
> +            if ((ret = dcc_randomize_host_list(ret_list, *ret_nhosts)) != 0)
> +                return ret;
>          return 0;
>      } else {
>          rs_log_warning("%s contained no hosts; can't distribute
> work", source_name);
> @@ -448,6 +477,68 @@
>      }
>  }
> 
> +int dcc_compare_container(const void *a, const void *b)
> +{
> +    struct rand_container *i, *j;
> +    i = (struct rand_container *) a;
> +    j = (struct rand_container *) b;
> +
> +    if (i->rand == j->rand)
> +        return 0;
> +    else if (i->rand > j->rand)
> +        return 1;
> +    else
> +        return -1;
> +}
> +
> +int dcc_randomize_host_list(struct dcc_hostdef **host_list, int length)
> +{
> +    int i, ret;
> +    unsigned int rand_val;
> +    struct dcc_hostdef *curr;
> +    struct rand_container *c;
> +    struct timeval tv;
> +
> +    c = malloc(length * sizeof(struct rand_container));
> +    if (!c) {
> +        rs_log_crit("failed to allocate host definition");
> +        return EXIT_OUT_OF_MEMORY;
> +    }
> +
> +#ifdef HAVE_GETTIMEOFDAY
> +    if ((ret = gettimeofday(&tv, NULL)) == 0)
> +        rand_val = (unsigned int) tv.tv_usec;
> +    else
> +#else
> +        rand_val = (unsigned int) time(NULL) ^ (unsigned int) getpid();
> +#endif
> +
> +    /* create pairs of hosts -> random numbers */
> +    srand(rand_val);
> +    curr = *host_list;
> +    for (i = 0; i < length; i++) {
> +        c[i].host = curr;
> +        c[i].rand = rand();
> +        curr = curr->next;
> +    }
> +
> +    /* sort */
> +    qsort(c, length, sizeof(struct rand_container), &dcc_compare_container);
> +
> +    /* reorder the list */
> +    for (i = 0; i < length; i++) {
> +        if (i != length - 1)
> +            c[i].host->next = c[i+1].host;
> +        else
> +            c[i].host->next = NULL;
> +    }
> +
> +    /* move the start of the list */
> +    *host_list = c[0].host;
> +
> +    free(c);
> +    return 0;
> +}
> 
>  int dcc_free_hostdef(struct dcc_hostdef *host)
>  {
> --- distcc-2.14/src/distcc.c.dist    2004-05-27 17:11:07.000000000 -0700
> +++ distcc-2.14/src/distcc.c    2004-05-27 17:12:38.000000000 -0700
> @@ -105,6 +105,7 @@
>  "   HOST:PORT                  TCP connection, specified port\n"
>  "   @HOST                      SSH connection\n"
>  "   USER at HOST                  SSH connection to specified host\n"
> +"   --randomize                Randomize the server list before execution\n"
>  "\n"
>  "distcc distributes compilation jobs across volunteer machines running\n"
>  "distccd.  Jobs that cannot be distributed, such as linking or \n"
> --- distcc-2.14/NEWS.dist    2004-05-28 11:35:08.000000000 -0700
> +++ distcc-2.14/NEWS    2004-05-28 11:40:43.000000000 -0700
> @@ -13,6 +13,9 @@
>      * distcc can now be built with a build directory separate from the
>        source directory.  Patch from Dennis Henriksen.
> 
> +    * Added --randomize option to host list to help spread the load i
> +      across large shared clusters.  From patch by Google
> +      (Josh Hyman <joshh at google.com>).
> 
>    DOCUMENTATION:
> __ 
> distcc mailing list            http://distcc.samba.org/
> To unsubscribe or change options: 
> http://lists.samba.org/mailman/listinfo/distcc
--
Martin
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
Url : http://lists.samba.org/archive/distcc/attachments/20040529/20126cbf/attachment.bin


More information about the distcc mailing list