child exit code rsync bug patch finally submitted

Dave Dykstra dwd at drdykstra.us
Thu Jan 9 19:21:09 EST 2003


I am preparing for making 2.5.6pre1 today and decided to submit David
Staples patch with a little rework.  Here's the version I submitted.

- Dave Dykstra


--- main.c~	Thu Aug  1 15:46:59 2002
+++ main.c	Thu Jan  9 12:43:55 2003
@@ -26,6 +26,16 @@
 extern struct stats stats;
 extern int verbose;
 
+/* there's probably never more than at most 2 outstanding child processes,
+ * but set it higher just in case.
+ */
+#define MAXCHILDPROCS 5
+
+struct pid_status {
+	pid_t pid;
+	int   status;
+} pid_stat_table[MAXCHILDPROCS];
+
 static void show_malloc_stats(void);
 
 /****************************************************************************
@@ -33,11 +43,27 @@
 ****************************************************************************/
 void wait_process(pid_t pid, int *status)
 {
-	while (waitpid(pid, status, WNOHANG) == 0) {
+	pid_t waited_pid;
+	int cnt;
+
+	while ((waited_pid = waitpid(pid, status, WNOHANG)) == 0) {
 		msleep(20);
 		io_flush();
 	}
         
+	if ((waited_pid == -1) && (errno == ECHILD)) {
+		/* status of requested child no longer available.
+		 * check to see if it was processed by the sigchld_handler.
+		 */
+		for (cnt = 0;  cnt < MAXCHILDPROCS; cnt++) {
+			if (pid == pid_stat_table[cnt].pid) {
+				*status = pid_stat_table[cnt].status;
+				pid_stat_table[cnt].pid = 0;
+				break;
+			}
+		}
+	}
+
         /* TODO: If the child exited on a signal, then log an
          * appropriate error message.  Perhaps we should also accept a
          * message describing the purpose of the child.  Also indicate
@@ -848,7 +874,24 @@
 
 static RETSIGTYPE sigchld_handler(int UNUSED(val)) {
 #ifdef WNOHANG
-	while (waitpid(-1, NULL, WNOHANG) > 0) ;
+	int cnt, status;
+	pid_t pid;
+	/* An empty waitpid() loop was put here by Tridge and we could never
+	 * get him to explain why he put it in, so rather than taking it 
+	 * out we're instead saving the child exit statuses for later use.
+	 * The waitpid() loop presumably eliminates all possibility of leaving
+	 * zombie children, maybe that's why he did it.
+	 */
+	while ((pid = waitpid(-1, &status, WNOHANG)) > 0) {
+		 /* save the child's exit status */
+		 for (cnt = 0; cnt < MAXCHILDPROCS; cnt++) {
+			  if (pid_stat_table[cnt].pid == 0) {
+				   pid_stat_table[cnt].pid = pid;
+				   pid_stat_table[cnt].status = status;
+				   break;
+			  }
+		 }
+	}
 #endif
 }
 



On Tue, Sep 03, 2002 at 06:29:03PM -0700, jw schultz wrote:
> Subject: Re: [patch] for rsync
> On Tue, Sep 03, 2002 at 05:00:02PM -0700, drstaples at beckman.com wrote:
> > 
> > JW,
> > 
> > I pushed everything to a LINUX box and did the diff again this time with the
> > -bur option.
> > It does look significantly different.  I think I've got below what you are
> > looking for, so I am
> > resending it to rsync at lists.samba.org.
> > 
> > I'm including two additional people as they have asked for the patch information
> > as well.
> > The diff was performed on main.c (2.5.5 version from rsync.samba.org).  I hope
> > everyone
> > knows that I'm my purpose in providing this information, is so that everyone can
> > critique
> > my code (to make it better) and we can eliminate this problem for everyone.
> > 
> > Dave
> 
> Thank you Dave.  I've been abusing/educating him a bit
> offline so be nice everybody.  He has not only diagnosed the
> bug independantly but has come up with a direction to go
> toward a permanent solution.
> 
> Aside from my dislike of a fixed size array
> my comments are below.
> 
> 
> > -------------------------------------------------------------
> > 
> > --- main.c.orig     Tue Sep  3 16:38:23 2002
> > +++ main.c     Tue Sep  3 16:41:08 2002
> > @@ -27,6 +27,11 @@
> > 
> >  extern int verbose;
> > 
> > +struct pid_status {
> > +    pid_t pid;
> > +    int   status;
> > + } pid_stat_table[10];
> 
> The use of a literal here is bad news.
> Should be a #define CHILD_TABLE_SIZE or whatever.
> Then macro can be used in the loops.
> 
> Question.  Is 10 a good size?  Should it be more?
> I've not dug into the child spawning of rsync.
> 
> > +
> >  static void show_malloc_stats(void);
> > 
> >  /****************************************************************************
> > @@ -34,9 +39,27 @@
> >  ****************************************************************************/
> >  void wait_process(pid_t pid, int *status)
> >  {
> > -    while (waitpid(pid, status, WNOHANG) == 0) {
> > +    pid_t waited_pid;
> > +    int   cnt;
> > +
> > +    while ( 1 ) {
> > +         waited_pid = waitpid(pid, status, WNOHANG);
> > +         if ( waited_pid == 0 ) {
> >           msleep(20);
> >           io_flush();
> > +         } else break;
> > +    }
> > +    if (( waited_pid == -1 ) && ( errno == ECHILD )) {
> > +         /* status of requested child no longer available.  Check */
> > +         /* to see if it was processed by the sigchld_handler.    */
> > +         cnt = 0;
> > +         while ( cnt < 10 ) {
> > +              if ( pid == pid_stat_table[cnt].pid ) {
> > +                   *status = pid_stat_table[cnt].status;
> 
> If we do 'pid_stat_table[cnt].pid = 0' here we'll be able to
> reuse the slots. (if necessary)
> 
> > +                   break;
> > +              }
> > +              cnt++;
> > +         }
> >      }
> > 
> >          /* TODO: If the child exited on a signal, then log an
> > @@ -792,7 +815,22 @@
> > 
> >  static RETSIGTYPE sigchld_handler(int UNUSED(val)) {
> >  #ifdef WNOHANG
> > -    while (waitpid(-1, NULL, WNOHANG) > 0) ;
> > +    int cnt = 0;
> > +    pid_t pid = 0;
> > +    int status = 0;
> > +    while ( 1 ) {
> > +         pid = waitpid(-1, &status, WNOHANG);
> > +         cnt = 0;
> > +         while ( cnt < 10 ) {
> > +              if (pid_stat_table[cnt].pid == 0 ) {
> > +                   pid_stat_table[cnt].pid = pid;
> > +                   pid_stat_table[cnt].status = status;
> > +                   break;
> > +              }
> > +              cnt++;
> > +         }
> > +         if ( pid < 1 ) break;
> 
> If we run off the end of the array (cnt == 10) we should kick out a
> warning or we should resize it.
> 
> > +    };
> >  #endif
> >  }



More information about the rsync mailing list