[SCM] CTDB repository - branch 2.5 updated - ctdb-2.5.3-156-g70c7ef0

Thu Sep 25 20:57:02 MDT 2014

The branch, 2.5 has been updated
       via  70c7ef023730d8344ca4afde2c94634dd541101f (commit)
       via  b1758c6a22c16e7798bbb147d5bf8e04b2bf0c55 (commit)
       via  be0e7aba3a569adedc87be74270b079738ad7f13 (commit)
       via  8518cbd6665e6d1b3402876346454930de2a5ed3 (commit)
      from  1d4bd9eb387775dca674112913c1bf23be1c3690 (commit)

http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=2.5


- Log -----------------------------------------------------------------
commit 70c7ef023730d8344ca4afde2c94634dd541101f
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Thu Sep 25 17:55:15 2014 +1000

    daemon: Fix the usage for lock helper
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>
    Reviewed-by: Martin Schwenke <martin at meltin.net>
    
    Autobuild-User(master): Martin Schwenke <martins at samba.org>
    Autobuild-Date(master): Thu Sep 25 17:16:31 CEST 2014 on sn-devel-104
    
    (Imported from commit 0f92de8463b71a2d7e9acdd27454be7859713436)

commit b1758c6a22c16e7798bbb147d5bf8e04b2bf0c55
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Thu Sep 25 17:17:04 2014 +1000

    recoverd: If obtaining recovery lock fails, try again
    
    When ctdb daemon starts up, it considers itself the recovery master
    and tries to do first recovery.  However, it's possible that there is
    already a recovery master and the current node has not yet heard from it.
    So do not ban ourselves immediately if ctdb_recovery_lock() fails when
    doing first recovery.
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>
    Reviewed-by: Martin Schwenke <martin at meltin.net>
    
    (Imported from commit 57310f80c9b8146a0978d912f73b0a64fde7697e)

commit be0e7aba3a569adedc87be74270b079738ad7f13
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Thu Sep 25 12:46:22 2014 +1000

    scripts: Fix the regular expresssion for parsing /proc/locks
    
    The major and minor device numbers are hexadecimal not decimal.
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>
    Reviewed-by: Martin Schwenke <martin at meltin.net>
    
    Autobuild-User(master): Martin Schwenke <martins at samba.org>
    Autobuild-Date(master): Thu Sep 25 07:19:59 CEST 2014 on sn-devel-104
    
    (Imported from commit f1e281cd47d9ebd79e09294606b8fa411ec0fbb4)

commit 8518cbd6665e6d1b3402876346454930de2a5ed3
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Thu Sep 25 12:44:59 2014 +1000

    locking: Reset ttimer before doing an early return
    
    When timer expires, timeout handler routine sets lock_ctx->ttimer
    to a newly created timer event.  However, when a node is INACTIVE,
    timeout handler returns early with lock_ctx->ttimer set to the previous
    timer event.  This timer event gets freed when the callback returns and
    lock_ctx->ttimer remains set to already freed timer event.
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>
    Reviewed-by: Martin Schwenke <martin at meltin.net>
    
    (Imported from commit c64369cba2e5a975d87d518737abbf04c9871a26)

-----------------------------------------------------------------------

Summary of changes:
 config/debug_locks.sh     |    2 +-
 server/ctdb_lock.c        |    1 +
 server/ctdb_lock_helper.c |    4 ++--
 server/ctdb_recoverd.c    |   18 ++++++++++++++++++
 4 files changed, 22 insertions(+), 3 deletions(-)


Changeset truncated at 500 lines:

diff --git a/config/debug_locks.sh b/config/debug_locks.sh
index 54b52ed..33bf3e6 100755
--- a/config/debug_locks.sh
+++ b/config/debug_locks.sh
@@ -28,7 +28,7 @@ loadconfig ctdb
     sed_cmd=$( ls -li "$CTDB_DBDIR"/*.tdb.* "$CTDB_DBDIR_PERSISTENT"/*.tdb.* |
 	   sed -e "s#${CTDB_DBDIR}/\(.*\)#\1#" \
 	       -e "s#${CTDB_DBDIR_PERSISTENT}/\(.*\)#\1#" |
-	   awk '{printf "s#[0-9]*:[0-9]*:%s #%s #\n", $1, $10}' )
+	   awk '{printf "s#[0-9a-f]*:[0-9a-f]*:%s #%s #\n", $1, $10}' )
 
     # Parse /proc/locks and extract following information
     #    pid process_name tdb_name offsets [W]
diff --git a/server/ctdb_lock.c b/server/ctdb_lock.c
index a866835..8292599 100644
--- a/server/ctdb_lock.c
+++ b/server/ctdb_lock.c
@@ -492,6 +492,7 @@ static void ctdb_lock_timeout_handler(struct tevent_context *ev,
 
 	/* If a node stopped/banned, don't spam the logs */
 	if (ctdb->nodes[ctdb->pnn]->flags & NODE_FLAGS_INACTIVE) {
+		lock_ctx->ttimer = NULL;
 		return;
 	}
 	if (lock_ctx->ctdb_db) {
diff --git a/server/ctdb_lock_helper.c b/server/ctdb_lock_helper.c
index 261a048..f164769 100644
--- a/server/ctdb_lock_helper.c
+++ b/server/ctdb_lock_helper.c
@@ -36,9 +36,9 @@ static void send_result(int fd, char result)
 static void usage(void)
 {
 	fprintf(stderr, "\n");
-	fprintf(stderr, "Usage: %s <ctdbd-pid> <output-fd> RECORD <db-path> <db-key>\n",
+	fprintf(stderr, "Usage: %s <log-fd> <ctdbd-pid> <output-fd> RECORD <db-path> <db-key>\n",
 		progname);
-	fprintf(stderr, "       %s <ctdbd-pid> <output-fd> DB <db1-path> [<db2-path> ...]\n",
+	fprintf(stderr, "       %s <log-fd> <ctdbd-pid> <output-fd> DB <db1-path> [<db2-path> ...]\n",
 		progname);
 }
 
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 9b553b7..d3c06b4 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -1815,6 +1815,16 @@ static int do_recovery(struct ctdb_recoverd *rec,
 		DEBUG(DEBUG_ERR,("Taking out recovery lock from recovery daemon\n"));
 		start_time = timeval_current();
 		if (!ctdb_recovery_lock(ctdb, true)) {
+			if (ctdb->runstate == CTDB_RUNSTATE_FIRST_RECOVERY) {
+				/* If ctdb is trying first recovery, it's
+				 * possible that current node does not know yet
+				 * who the recmaster is.
+				 */
+				DEBUG(DEBUG_ERR, ("Unable to get recovery lock"
+						" - retrying recovery\n"));
+				return -1;
+			}
+
 			DEBUG(DEBUG_ERR,("Unable to get recovery lock - aborting recovery "
 					 "and ban ourself for %u seconds\n",
 					 ctdb->tunable.recovery_ban_period));
@@ -3593,6 +3603,14 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 		return;
 	}
 
+	/* get runstate */
+	ret = ctdb_ctrl_get_runstate(ctdb, CONTROL_TIMEOUT(),
+				     CTDB_CURRENT_NODE, &ctdb->runstate);
+	if (ret != 0) {
+		DEBUG(DEBUG_ERR, ("Failed to get runstate - retrying\n"));
+		return;
+	}
+
 	/* get the current recovery lock file from the server */
 	if (update_recovery_lock_file(ctdb) != 0) {
 		DEBUG(DEBUG_ERR,("Failed to update the recovery lock file\n"));


-- 
CTDB repository