[SCM] CTDB repository - branch 1.2.40 updated - ctdb-1.2.61-17-g2cf0ceb

Mon Apr 22 00:16:22 MDT 2013

The branch, 1.2.40 has been updated
       via  2cf0ceb20c87756cf2e012d67129c0205c6db9aa (commit)
       via  02e349d158cfa5413e391066e098413f981c592b (commit)
       via  905a5a6fdf69a79574fcfff272aaa9b292eac159 (commit)
       via  5e157d829efff1bed94c1cc65a220148c769e559 (commit)
       via  57e520c86cd59776e6e850be6ae02b3345e6ab3a (commit)
       via  e71243dc514752bd19dfd8cdf1eda30809d67494 (commit)
       via  485b32d77972271dd0c5938ab5aa1d657484ae5e (commit)
       via  0f39c9bf47a5d84b1b5e3af19e6e8cd610dbfef6 (commit)
       via  f4c04ee072772450e269c71db3c6c0f4331537a8 (commit)
       via  b90249874c45c291e25515286a81c3cfe1b0ca6f (commit)
       via  764359b2081b7b6aad6af17e9b86b7df0e422416 (commit)
       via  64bbe9f35f58b5f2af23acfa6ded9751839b9ece (commit)
       via  63debc8aecd8ab626d827d508109fefa9503c101 (commit)
       via  dd465bfcf5bc8cffcae3abb58100b68d46ccda49 (commit)
       via  7dc9c1d3edcba5b61bdffb4ed213a216cd8ee809 (commit)
       via  859e72b1f6e7f438fe36e5ae303ef2ee713f5f41 (commit)
       via  4de7ec7931062e640665c91a53182bb34d6f55cc (commit)
      from  f7a21af8adc65c72326c0f955e5e1712467951ad (commit)

http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=1.2.40


- Log -----------------------------------------------------------------
commit 2cf0ceb20c87756cf2e012d67129c0205c6db9aa
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Mon Apr 22 14:26:56 2013 +1000

    New version 1.2.62
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>

commit 02e349d158cfa5413e391066e098413f981c592b
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Fri Apr 19 13:29:04 2013 +1000

    ctdbd: Set num_clients statistic from ctdb->num_clients
    
    This fixes the problem of "ctdb statisticsreset" clearing the number of
    clients even when there are active clients.
    
    Values returned in statistics for frozen, recovering, memory_used are based on
    the current state of CTDB and are not maintained as statistics.  This should
    include num_clients as well.
    
    Currently ctdb->num_clients is unused. So use that to track the number of
    clients and fill in statistics field only when requested.
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>
    (cherry picked from commit dc4ca816630ed44b419108da53421331243fb8c7)

commit 905a5a6fdf69a79574fcfff272aaa9b292eac159
Author: Martin Schwenke <martin at meltin.net>
Date:   Mon Apr 22 13:52:04 2013 +1000

    ctdbd: Log PID file creation and removal at NOTICE level
    
    Unexpected removal of this file can have serious consequences, so it
    is best if this is logged at the default level.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>
    (cherry picked from commit bfed6a8d1771db3401d12b819204736c33acb312)

commit 5e157d829efff1bed94c1cc65a220148c769e559
Author: Martin Schwenke <martin at meltin.net>
Date:   Tue Apr 16 16:10:04 2013 +1000

    scripts: Crash cleanup script should pass a tag to logger
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit 57e520c86cd59776e6e850be6ae02b3345e6ab3a
Author: Martin Schwenke <martin at meltin.net>
Date:   Mon Apr 15 15:42:55 2013 +1000

    scripts: ctdb-crash-cleanup.sh uses initscript to see if ctdbd is running
    
    "ctdb ping" (or "ctdb status") can time out.  How many times should we
    try?
    
    Instead, depend on the initscript to implement something sane.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>
    Reviewed-by: Michael Adam <obnox at samba.org>
    (cherry picked from commit 90cb337e5ccf397b69a64298559a428ff508f196)
    
    Conflicts:
    	config/ctdb-crash-cleanup.sh

commit e71243dc514752bd19dfd8cdf1eda30809d67494
Author: Martin Schwenke <martin at meltin.net>
Date:   Mon Apr 15 15:18:12 2013 +1000

    initscript: Use a PID file to implement the "status" option
    
    Using "ctdb ping" and "ctdb status" is fraught with danger.  These
    commands can timeout when ctdbd is running, leading callers to believe
    that ctdbd is not running.  Timeouts could be increased but we would
    still have to handle potential timeouts.
    
    Everything else in the world implements the "status" option by
    checking if the relevant process is running.  This change makes CTDB
    do the same thing and uses standard distro functions.
    
    This change is backward compatible in sense that a missing
    /var/run/ctdb/ directory means that we don't do a PID file check but
    just depend on the distro's checking method.  Therefore, if CTDB was
    started with an older version of this script then "service ctdb
    status" will still work.
    
    This script does not support changing the value of CTDB_VALGRIND
    between calls.  If you start with CTDB_VALGRIND=yes then you need to
    check status with the same setting.  CTDB_VALGRIND is a debug
    variable, so this is acceptable.
    
    This also adds sourcing of /lib/lsb/init-functions to make the Debian
    function status_of_proc() available.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>
    Pair-programmed-with: Amitay Isaacs <amitay at gmail.com>
    Reviewed-by: Michael Adam <obnox at samba.org>
    (cherry picked from commit 687e2eace4f48400cf5029914f62b6ddabb85378)
    
    Conflicts:
    	config/ctdb.init

commit 485b32d77972271dd0c5938ab5aa1d657484ae5e
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Fri Apr 19 16:47:32 2013 +1000

    ctdbd: Add --pidfile option
    
    Default is not to create a pid file.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>
    Pair-programmed-with: Amitay Isaacs <amitay at gmail.com>
    Reviewed-by: Michael Adam <obnox at samba.org>
    (cherry picked from commit 996e74d3db0c50f91b320af8ab7c43ea6b1136af)
    
    Conflicts:
    	server/ctdb_daemon.c

commit 0f39c9bf47a5d84b1b5e3af19e6e8cd610dbfef6
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Apr 19 15:16:19 2013 +1000

    ctdbd: Change some fork() calls to ctdb_fork()
    
    This guarantees that ctdb_set_child_info() is called.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit f4c04ee072772450e269c71db3c6c0f4331537a8
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Apr 19 14:54:03 2013 +1000

    util: ctdb_fork() should call ctdb_set_child_info()
    
    For now we pass NULL as the child name.  Later we'll give ctdb_fork()
    and friends an extra argument and pass that through.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>
    Pair-programmed-with: Amitay Isaacs <amitay at gmail.com>
    Reviewed-by: Michael Adam <obnox at samba.org>
    (backported from commit ba8866d40125bab06391a17d48ff06a4a9f9da89)

commit b90249874c45c291e25515286a81c3cfe1b0ca6f
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Apr 19 14:42:44 2013 +1000

    util: New functions ctdb_set_child_info() and ctdb_is_child_process()
    
    Must be called by all child processes.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>
    Reviewed-by: Michael Adam <obnox at samba.org>
    (backported from commit 59b019a97aad9a731f9080ea5be14d0dbdfe03d6)

commit 764359b2081b7b6aad6af17e9b86b7df0e422416
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Apr 19 14:35:49 2013 +1000

    Logging: Fix breakage when freeing the log ringbuffer
    
    Commit c6e1b84595039edb5c49a5851b440710dc0e2ac1 broke fetching from
    the log ringbuffer.  The solution there is still generally good: there
    is no need to keep the ringbuffer in children created by
    ctdb_fork()... except for those special children that are created to
    fetch data from the ringbuffer!
    
    Introduce a new function ctdb_fork_no_free_ringbuffer() that does
    everything ctdb_fork() needs to do except free the ringbuffer (i.e. it
    is the old ctdb_fork() function).  The new ctdb_fork() function just
    calls that function and then frees the ringbuffer in the child.
    
    This means all callers of ctdb_fork() have the convenience of having
    the ringbuffer freed, apart from the special case in the ringbuffer
    fetching code where we call ctdb_fork_no_free_ringbuffer() instead.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>
    (backported from commit 00db5fa00474f8a83f1aa3b603fd756cc9b49ff4)

commit 64bbe9f35f58b5f2af23acfa6ded9751839b9ece
Author: Michael Adam <obnox at samba.org>
Date:   Wed Apr 3 12:02:59 2013 +0200

    ctdb_call: don't bump the rsn in ctdb_become_dmaster() any more
    
    This is now done in ctdb_ltdb_store_server(), so this
    extra bump can be spared.
    
    Signed-off-by: Michael Adam <obnox at samba.org>
    Reviewed-By: Amitay Isaacs <amitay at gmail.com>
    (cherry picked from commit cad3107b12e8392f786f9a758ee38cf3a3d58538)

commit 63debc8aecd8ab626d827d508109fefa9503c101
Author: Michael Adam <obnox at samba.org>
Date:   Wed Apr 3 11:40:25 2013 +0200

    Fix a severe recovery bug that can lead to data corruption for SMB clients.
    
    Problem:
    Recovery can under certain circumstances lead to old record copies
    resurrecting: Recovery selects the newest record copy purely by RSN. At
    the end of the recovery, the recovery master is the dmaster for all
    records in all (non-persistent) databases. And the other nodes locally
    hold the complete copy of the databases. The bug is that the recovery
    process does not increment the RSN on the recovery master at the end of
    the recovery. Now clients acting directly on the Recovery master will
    directly change a record's content on the recmaster without migration
    and hence without RSN bump.  So a subsequent recovery can not tell that
    the recmaster's copy is newer than the copies on the other nodes, since
    their RSN is the same. Hence, if the recmaster is not node 0 (or more
    precisely not the active node with the lowest node number), the recovery
    will choose copies from nodes with lower number and stick to these.
    
    Here is how to reproduce:
    
    - assume we have a cluster with at least 2 nodes
    - ensure that the recmaster is not node 0
      (maybe ensure with "onnode 0 ctdb setrecmasterrole off")
      say recmaster is node 1
    - choose a new database name, say "test1.tdb"
      (make sure it is not yet attached as persistent)
    - choose a key name, say "key1"
    - all clustere nodes should ok and no recovery running
    - now do the following on node 1:
    
    1. dbwrap_tool test1.tdb store key1 uint32 1
    2. dbwrap_tool test1.tdb fetch key1 uint32
       ==> 1
    3. ctdb recover
    4. dbwrap_tool test1.tdb store key1 uint32 2
    5. dbwrap_tool test1.tdb fetch key1 uint32
       ==> 2
    4. ctdb recover
    7. dbwrap_tool test1.tdb fetch key1 uint32
       ==> 1
       ==> BUG
    
    This is a very severe bug, since when applied to Samba's locking.tdb
    database, it means that for SMB clients on clustered Samba there is
    the potential for locking out oneself from previously opened files
    or even worse, data corruption:
    
    Case 1: locking out
    
    - client on recmaster opens file
    - recovery propagates open file handle (entry in locking.tdb) to
      other nodes
    - client closes file
    - client opens the same file
    - recovery resurrects old copy of open file record in locking.tdb
      from lower node
    - client closes file but fails to delete entry in locking.tdb
    - client tries to open same file again but fails, since
      the old record locks it out (since the client is still connected)
    
    Case 2: data corruption
    
    - clien1 on recmaster opens file
    - recovery propagates open file info to other nodes
    - client1 closes the file and disconnects
    - client2 opens the same file
    - recovery resurrects old copy of locking.tdb record,
      where client2 has no entry, but client1 has.
    - but client2 believes it still has a handle
    - client3 opens the file and succees without
      conflicting with client2
      (the detached entry for client1 is discarded because
       the server does not exist any more).
    => both client2 and client3 believe they have exclusive
      access to the file and writing creates data corruption
    
    Fix:
    
    When storing a record on the dmaster, bump its RSN.
    
    The ctdb_ltdb_store_server() is the central function for storing
    a record to a local tdb from the ctdbd server context.
    So this is also the place where the RSN of the record to be stored
    should be incremented, when storing on the dmaster.
    
    For the case of the record migration, this is currently done in
    ctdb_become_dmaster() in ctdb_call.c, but there are other places
    such as in recovery, where we should bump the RSN, but currently
    don't do it.
    
    So moving the RSN incrementation into ctdb_ltdb_store_server fixes
    the recovery-record-resurrection bug.
    
    Signed-off-by: Michael Adam <obnox at samba.org>
    Reviewed-By: Amitay Isaacs <amitay at gmail.com>
    (cherry picked from commit feb1d40b21a160737aead22e398f3c34ff3be8de)

commit dd465bfcf5bc8cffcae3abb58100b68d46ccda49
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Feb 21 06:54:09 2012 +1100

    READONLY: dont schedule for fast vacuum deletion if any of the readonly record flags are set
    (cherry picked from commit b3307d78fd15f446b423f8cdd1e403f89fbe8ac8)

commit 7dc9c1d3edcba5b61bdffb4ed213a216cd8ee809
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Feb 20 21:13:46 2012 +1100

    ReadOnly: Make sure we dont try to fast-vacuum records that are set for readonly delegation
    (cherry picked from commit 303134cf10a08ce61954d5de9025d9bbcb5f75ef)

commit 859e72b1f6e7f438fe36e5ae303ef2ee713f5f41
Author: Michael Adam <obnox at samba.org>
Date:   Thu Apr 7 12:17:42 2011 +0200

    ctdb_ltdb_store_server: when storing a record that is not to be scheduled for deletion, remove it from the delete queue
    
    Pair-Programmed-With: Stefan Metzmacher <metze at samba.org>
    (cherry picked from commit 489148e465e2b8aed87ea836e3518f43490671ca)

commit 4de7ec7931062e640665c91a53182bb34d6f55cc
Author: Michael Adam <obnox at samba.org>
Date:   Thu Apr 7 12:17:16 2011 +0200

    vacuum: add ctdb_local_remove_from_delete_queue()
    
    Pair-Programmed-With: Stefan Metzmacher <metze at samba.org>
    (cherry picked from commit a5065b42a98c709173503e02d217f97792878625)

-----------------------------------------------------------------------

Summary of changes:
 common/ctdb_logging.c        |    2 +-
 common/ctdb_util.c           |   37 ++++++++++++++++++-
 config/ctdb-crash-cleanup.sh |    6 ++-
 config/ctdb.init             |   85 ++++++++++++++++++++++++++++--------------
 include/ctdb_private.h       |    7 +++
 packaging/RPM/ctdb.spec.in   |    8 +++-
 server/ctdb_call.c           |    2 +-
 server/ctdb_control.c        |    3 +-
 server/ctdb_daemon.c         |   41 ++++++++++++++++++--
 server/ctdb_freeze.c         |    2 +-
 server/ctdb_ltdb_server.c    |   26 ++++++++++---
 server/ctdb_recover.c        |    2 +-
 server/ctdb_recoverd.c       |    2 +-
 server/ctdb_vacuum.c         |   53 ++++++++++++++++++++++++++
 server/ctdbd.c               |    1 +
 15 files changed, 229 insertions(+), 48 deletions(-)


Changeset truncated at 500 lines:

diff --git a/common/ctdb_logging.c b/common/ctdb_logging.c
index bcc954a..66eaec3 100644
--- a/common/ctdb_logging.c
+++ b/common/ctdb_logging.c
@@ -164,7 +164,7 @@ int32_t ctdb_control_get_log(struct ctdb_context *ctdb, TDB_DATA addr)
 	/* spawn a child process to marshall the huge log blob and send it back
 	   to the ctdb tool using a MESSAGE
 	*/
-	child = ctdb_fork(ctdb);
+	child = ctdb_fork_no_free_ringbuffer(ctdb);
 	if (child == (pid_t)-1) {
 		DEBUG(DEBUG_ERR,("Failed to fork a log collector child\n"));
 		return -1;
diff --git a/common/ctdb_util.c b/common/ctdb_util.c
index d8d5ed7..5a29dcf 100644
--- a/common/ctdb_util.c
+++ b/common/ctdb_util.c
@@ -357,16 +357,40 @@ void ctdb_restore_scheduler(struct ctdb_context *ctdb)
 #endif
 }
 
+static bool is_child = false;
+
+void ctdb_set_child_info(TALLOC_CTX *mem_ctx, const char * child_name_fmt, ...)
+{
+	is_child = true;
+	if (child_name_fmt != NULL) {
+		va_list ap;
+		char *t;
+
+		va_start(ap, child_name_fmt);
+		t = talloc_vasprintf(mem_ctx, child_name_fmt, ap);
+		debug_extra = talloc_asprintf(mem_ctx, "%s:", t);
+		talloc_free(t);
+		va_end(ap);
+	}
+}
+
+bool ctdb_is_child_process(void)
+{
+	return is_child;
+}
+
 /*
  * This function forks a child process and drops the realtime 
  * scheduler for the child process.
  */
-pid_t ctdb_fork(struct ctdb_context *ctdb)
+pid_t ctdb_fork_no_free_ringbuffer(struct ctdb_context *ctdb)
 {
 	pid_t pid;
 
 	pid = fork();
 	if (pid == 0) {
+		ctdb_set_child_info(ctdb, NULL);
+
 		/* Close the Unix Domain socket and the TCP socket.
 		 * This ensures that none of the child processes will
 		 * look like the main daemon when it is not running.
@@ -388,12 +412,23 @@ pid_t ctdb_fork(struct ctdb_context *ctdb)
 			ctdb_restore_scheduler(ctdb);
 		}
 		ctdb->can_send_controls = false;
+	}
+	return pid;
+}
+
+pid_t ctdb_fork(struct ctdb_context *ctdb)
+{
+	pid_t pid;
 
+	pid = ctdb_fork_no_free_ringbuffer(ctdb);
+	if (pid == 0) {
 		ctdb_log_ringbuffer_free();
 	}
+
 	return pid;
 }
 
+
 void set_nonblocking(int fd)
 {
 	unsigned v;
diff --git a/config/ctdb-crash-cleanup.sh b/config/ctdb-crash-cleanup.sh
index 420db76..ac33b4d 100755
--- a/config/ctdb-crash-cleanup.sh
+++ b/config/ctdb-crash-cleanup.sh
@@ -17,8 +17,10 @@
 	exit 1
 }
 
+PATH=/sbin:/usr/sbin:/bin:/usr/bin:$PATH
+
 # if ctdb is running, just return
-ctdb status 2>/dev/null && {
+service ctdb status >/dev/null 2>&1 && {
     exit 0
 }
 
@@ -27,7 +29,7 @@ ctdb status 2>/dev/null && {
 	[ -z "$_IP_HELD" ] || {
 		_IFACE=`echo $_IP_HELD | sed -e "s/.*\s//"`
 		_NM=`echo $_IP_HELD | sed -e "s/.*$_IP\///" -e "s/\s.*//"`
-		logger "Removing public address $_IP/$_NM from device $_IFACE"
+		logger -t "ctdbd" "ctdb-crash-cleanup.sh: Removing public address $_IP/$_NM from device $_IFACE"
 		/sbin/ip addr del $_IP/$_NM dev $_IFACE
 	}
 done
diff --git a/config/ctdb.init b/config/ctdb.init
index 3c2412d..71629e6 100755
--- a/config/ctdb.init
+++ b/config/ctdb.init
@@ -6,7 +6,7 @@
 # chkconfig:           - 90 01
 #
 # description:                 Starts and stops the clustered tdb daemon
-# pidfile:             /var/run/ctdbd/ctdbd.pid
+# pidfile:             /var/run/ctdb/ctdbd.pid
 #
 
 ### BEGIN INIT INFO
@@ -32,6 +32,10 @@ fi
     LC_ALL=en_US.UTF-8
 }
 
+if [ -f /lib/lsb/init-functions ] ; then
+    . /lib/lsb/init-functions
+fi
+
 # Avoid using root's TMPDIR
 unset TMPDIR
 
@@ -54,6 +58,7 @@ detect_init_style
 export CTDB_INIT_STYLE
 
 ctdbd=${CTDBD:-/usr/sbin/ctdbd}
+pidfile="/var/run/ctdb/ctdbd.pid"
 
 if [ "$CTDB_VALGRIND" = "yes" ]; then
     init_style="valgrind"
@@ -88,6 +93,9 @@ build_ctdb_options () {
     }
     maybe_set "--reclock"                "$CTDB_RECOVERY_LOCK"
 
+    mkdir -p $(dirname "$pidfile")
+    maybe_set "--pidfile"                "$pidfile"
+
     # build up CTDB_OPTIONS variable from optional parameters
     maybe_set "--logfile"                "$CTDB_LOGFILE"
     maybe_set "--nlist"                  "$CTDB_NODES"
@@ -357,6 +365,8 @@ stop() {
     # make sure all ips are dropped, pfkill -9 might leave them hanging around
     drop_all_public_ips
 
+    rm -f "$pidfile"
+
     case $init_style in
 	suse)
 	    # re-set the return code to the recorded RETVAL in order
@@ -379,30 +389,47 @@ restart() {
     start
 }
 
-status() {
-    echo -n $"Checking for ctdbd service: "
-    ctdb ping >/dev/null 2>&1 || {
-	RETVAL=$?
-	echo -n "  ctdbd not running. "
-	case $init_style in
-	    suse)
-		set_retval $RETVAL
-		rc_status -v
-		;;
-	    redhat)
-		if [ -f /var/lock/subsys/ctdb ]; then
-			echo $"ctdb dead but subsys locked"
-			RETVAL=2
-		else
-			echo $"ctdb is stopped"
-			RETVAL=3
-		fi
-		;;
-	esac
-	return $RETVAL
-    }
-    echo ""
-    ctdb status
+# Given that CTDB_VALGRIND is a debug option we don't support the pid
+# file.  We just do a quick and dirty hack instead.  Otherwise we just
+# end up re-implementing each distro's pidfile support...
+check_status_valgrind ()
+{
+    if pkill -0 -f "valgrind.*${ctdbd}" ; then
+	echo "ctdbd is running under valgrind..."
+	return 0
+    else
+	echo "ctdbd is not running"
+	return 1
+    fi
+}
+
+check_status ()
+{
+    # Backward compatibility.  When we arrange to pass --pidfile to
+    # ctdbd we also create the directory that will contain it.  If
+    # that directory is missing then we don't use the pidfile to check
+    # status.
+    if [ -d $(dirname "$pidfile") ] ; then
+	_pf_opt="-p $pidfile"
+    else
+	_pf_opt=""
+    fi
+
+    case "$init_style" in
+	valgrind)
+	    check_status_valgrind
+	    ;;
+	suse)
+	    checkproc $_pf_opt "$ctdbd"
+	    rc_status -v
+	    ;;
+	redhat)
+	    status $_pf_opt -l "ctdb" "$ctdbd"
+	    ;;
+	debian)
+	    status_of_proc $_pf_opt "$ctdbd" "ctdb"
+	    ;;
+    esac
 }
 
 
@@ -417,14 +444,16 @@ case "$1" in
   	restart
 	;;
     status)
-  	status
+  	check_status
 	;;
     condrestart|try-restart)
-  	ctdb status > /dev/null && restart || :
+  	if check_status >/dev/null ; then
+	    restart
+	fi
 	;;
     cron)
 	# used from cron to auto-restart ctdb
-  	ctdb status > /dev/null || restart
+  	check_status >/dev/null || restart
 	;;
     *)
 	echo $"Usage: $0 {start|stop|restart|reload|force-reload|status|cron|condrestart|try-restart}"
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 0eef0e3..7d67a10 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -1034,6 +1034,8 @@ bool ctdb_blocking_freeze(struct ctdb_context *ctdb);
 void ctdb_set_scheduler(struct ctdb_context *ctdb);
 void ctdb_restore_scheduler(struct ctdb_context *ctdb);
 pid_t ctdb_fork(struct ctdb_context *ctdb);
+pid_t ctdb_fork_no_free_ringbuffer(struct ctdb_context *ctdb);
+bool ctdb_is_child_process(void);
 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
 				 struct ctdb_req_control *c,
 				 TDB_DATA indata, 
@@ -1341,6 +1343,7 @@ int32_t ctdb_control_set_recmaster(struct ctdb_context *ctdb, uint32_t opcode, T
 
 extern int script_log_level;
 extern bool fast_start;
+extern const char *ctdbd_pidfile;
 
 int32_t ctdb_control_get_event_script_status(struct ctdb_context *ctdb,
 					     uint32_t call_type,
@@ -1441,6 +1444,10 @@ int32_t ctdb_local_schedule_for_deletion(struct ctdb_db_context *ctdb_db,
 					 const struct ctdb_ltdb_header *hdr,
 					 TDB_DATA key);
 
+void ctdb_local_remove_from_delete_queue(struct ctdb_db_context *ctdb_db,
+					 const struct ctdb_ltdb_header *hdr,
+					 const TDB_DATA key);
+
 struct ctdb_ltdb_header *ctdb_header_from_record_handle(struct ctdb_record_handle *h);
 
 /* For unit testing ctdb_transaction.c. */
diff --git a/packaging/RPM/ctdb.spec.in b/packaging/RPM/ctdb.spec.in
index 9394987..432e909 100644
--- a/packaging/RPM/ctdb.spec.in
+++ b/packaging/RPM/ctdb.spec.in
@@ -3,7 +3,7 @@ Name: ctdb
 Summary: Clustered TDB
 Vendor: Samba Team
 Packager: Samba Team <samba at samba.org>
-Version: 1.2.61
+Version: 1.2.62
 Release: 1GITHASH
 Epoch: 0
 License: GNU GPL version 3
@@ -155,6 +155,12 @@ development libraries for ctdb
 
 %changelog
 
+* Mon Apr 22 2013 : Version 1.2.62
+  - Fix a bug where subsequent recoveries can corrupt databases
+  - Fix breakage when freeing log ringbuffer
+  - Add option to create PID file
+  - Check for process existence in service ctdb status instead of ctdb status
+  - Fix ctdb statisticsreset clearing num_clients
 * Fri Apr 05 2013 : Version 1.2.61
   - Free message list header if all message handlers are freed
   - Use tdb_parse_record instead of tdb_fetch to avoid memory leaks
diff --git a/server/ctdb_call.c b/server/ctdb_call.c
index dd18558..1a2c640 100644
--- a/server/ctdb_call.c
+++ b/server/ctdb_call.c
@@ -277,7 +277,7 @@ static void ctdb_become_dmaster(struct ctdb_db_context *ctdb_db,
 	DEBUG(DEBUG_DEBUG,("pnn %u dmaster response %08x\n", ctdb->pnn, ctdb_hash(&key)));
 
 	ZERO_STRUCT(header);
-	header.rsn = rsn + 1;
+	header.rsn = rsn;
 	header.dmaster = ctdb->pnn;
 	header.flags = record_flags;
 
diff --git a/server/ctdb_control.c b/server/ctdb_control.c
index 5968b3e..e01e046 100644
--- a/server/ctdb_control.c
+++ b/server/ctdb_control.c
@@ -101,6 +101,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 		int i;
 		CHECK_CONTROL_DATA_SIZE(0);
 		ctdb->statistics.memory_used = talloc_total_size(NULL);
+		ctdb->statistics.num_clients = ctdb->num_clients;
 		ctdb->statistics.frozen = 0;
 		for (i=1; i<= NUM_DB_PRIORITIES; i++) {
 			if (ctdb->freeze_mode[i] == CTDB_FREEZE_FROZEN) {
@@ -192,7 +193,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb,
 
 	case CTDB_CONTROL_PING:
 		CHECK_CONTROL_DATA_SIZE(0);
-		return ctdb->statistics.num_clients;
+		return ctdb->num_clients;
 
 	case CTDB_CONTROL_SET_DB_READONLY: {
 		uint32_t db_id;
diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index 09b3d32..300f324 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -37,6 +37,8 @@ struct ctdb_client_pid_list {
 	struct ctdb_client *client;
 };
 
+const char *ctdbd_pidfile = NULL;
+
 static void daemon_incoming_packet(void *, struct ctdb_req_header *);
 
 static void print_exit_message(void)
@@ -245,7 +247,7 @@ static int ctdb_client_destructor(struct ctdb_client *client)
 
 	ctdb_takeover_client_destructor_hook(client);
 	ctdb_reqid_remove(client->ctdb, client->client_id);
-	CTDB_DECREMENT_STAT(client->ctdb, num_clients);
+	client->ctdb->num_clients--;
 
 	if (client->num_persistent_updates != 0) {
 		DEBUG(DEBUG_ERR,(__location__ " Client disconnecting with %u persistent updates in flight. Starting recovery\n", client->num_persistent_updates));
@@ -979,7 +981,7 @@ static void ctdb_accept_client(struct event_context *ev, struct fd_event *fde,
 
 	talloc_set_destructor(client, ctdb_client_destructor);
 	talloc_set_destructor(client_pid, ctdb_clientpid_destructor);
-	CTDB_INCREMENT_STAT(ctdb, num_clients);
+	ctdb->num_clients++;
 }
 
 
@@ -1066,6 +1068,38 @@ static void ctdb_setup_event_callback(struct ctdb_context *ctdb, int status,
 				 tdb_null, NULL, NULL);
 }
 
+static void ctdb_remove_pidfile(void)
+{
+	if (ctdbd_pidfile != NULL && !ctdb_is_child_process()) {
+		if (unlink(ctdbd_pidfile) == 0) {
+			DEBUG(DEBUG_NOTICE, ("Removed PID file %s\n",
+					     ctdbd_pidfile));
+		} else {
+			DEBUG(DEBUG_WARNING, ("Failed to Remove PID file %s\n",
+					      ctdbd_pidfile));
+		}
+	}
+}
+
+static void ctdb_create_pidfile(pid_t pid)
+{
+	if (ctdbd_pidfile != NULL) {
+		FILE *fp;
+
+		fp = fopen(ctdbd_pidfile, "w");
+		if (fp == NULL) {
+			DEBUG(DEBUG_ALERT,
+			      ("Failed to open PID file %s\n", ctdbd_pidfile));
+			exit(11);
+		}
+
+		fprintf(fp, "%d\n", pid);
+		fclose(fp);
+		DEBUG(DEBUG_NOTICE, ("Created PID file %s\n", ctdbd_pidfile));
+		atexit(ctdb_remove_pidfile);
+	}
+}
+
 /*
   start the protocol going as a daemon
 */
@@ -1103,8 +1137,7 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog,
 	block_signal(SIGPIPE);
 
 	ctdb->ctdbd_pid = getpid();
-
-
+	ctdb_create_pidfile(ctdb->ctdbd_pid);
 	DEBUG(DEBUG_ERR, ("Starting CTDBD as pid : %u\n", ctdb->ctdbd_pid));
 
 	if (ctdb->do_setsched) {
diff --git a/server/ctdb_freeze.c b/server/ctdb_freeze.c
index f422e6d..81c5b56 100644
--- a/server/ctdb_freeze.c
+++ b/server/ctdb_freeze.c
@@ -190,7 +190,7 @@ static struct ctdb_freeze_handle *ctdb_freeze_lock(struct ctdb_context *ctdb, ui
 		return NULL;
 	}
 	
-	h->child = fork();
+	h->child = ctdb_fork(ctdb);
 	if (h->child == -1) {
 		DEBUG(DEBUG_ERR,("Failed to fork child for ctdb_freeze_lock\n"));
 		talloc_free(h);
diff --git a/server/ctdb_ltdb_server.c b/server/ctdb_ltdb_server.c
index c9cf021..b87e176 100644
--- a/server/ctdb_ltdb_server.c
+++ b/server/ctdb_ltdb_server.c
@@ -90,6 +90,7 @@ static int ctdb_ltdb_store_server(struct ctdb_db_context *ctdb_db,
 	bool seqnum_suppressed = false;
 	bool keep = false;
 	bool schedule_for_deletion = false;
+	bool remove_from_delete_queue = false;
 	uint32_t lmaster;
 
 	if (ctdb->flags & CTDB_FLAG_TORTURE) {
@@ -121,6 +122,8 @@ static int ctdb_ltdb_store_server(struct ctdb_db_context *ctdb_db,
 	 */
 	if (data.dsize != 0) {
 		keep = true;
+	} else if (header->flags & (CTDB_REC_RO_HAVE_DELEGATIONS|CTDB_REC_RO_HAVE_READONLY|CTDB_REC_RO_REVOKING_READONLY|CTDB_REC_RO_REVOKE_COMPLETE)) {
+		keep = true;
 	} else if (ctdb_db->persistent) {
 		keep = true;
 	} else if (header->flags & CTDB_REC_FLAG_AUTOMATIC) {
@@ -161,12 +164,18 @@ static int ctdb_ltdb_store_server(struct ctdb_db_context *ctdb_db,
 		keep = true;
 	}
 
-	if (keep &&
-	    (data.dsize == 0) &&
-	    !ctdb_db->persistent &&
-	    (ctdb_db->ctdb->pnn == header->dmaster))
-	{
-		schedule_for_deletion = true;
+	if (keep) {
+		if (!ctdb_db->persistent &&
+		    (ctdb_db->ctdb->pnn == header->dmaster) &&
+		    !(header->flags & (CTDB_REC_RO_HAVE_DELEGATIONS|CTDB_REC_RO_HAVE_READONLY|CTDB_REC_RO_REVOKING_READONLY|CTDB_REC_RO_REVOKE_COMPLETE)))
+		{
+			header->rsn++;
+
+			if (data.dsize == 0) {
+				schedule_for_deletion = true;
+			}
+		}
+		remove_from_delete_queue = !schedule_for_deletion;
 	}
 
 store:
@@ -247,6 +256,7 @@ store:
 			    tdb_errorstr(ctdb_db->ltdb->tdb)));
 
 		schedule_for_deletion = false;
+		remove_from_delete_queue = false;
 	}
 	if (seqnum_suppressed) {
 		tdb_add_flags(ctdb_db->ltdb->tdb, TDB_SEQNUM);
@@ -262,6 +272,10 @@ store:
 		}
 	}
 
+	if (remove_from_delete_queue) {
+		ctdb_local_remove_from_delete_queue(ctdb_db, header, key);
+	}
+
 	return ret;
 }


-- 
CTDB repository