[SCM] CTDB repository - branch 2.5 updated - ctdb-2.5-39-ge6df029

Tue Nov 19 19:07:06 MST 2013

The branch, 2.5 has been updated
       via  e6df02968fe0ff26eb5ce5e3409e0f3276a47634 (commit)
       via  2722ad70c71e9858e62c9f2652ac16bee12fe56b (commit)
       via  308715e677954fe798514af10164d4caec47eb12 (commit)
       via  26a58242552ce7f59a38d5b518fbd8afd39818d2 (commit)
       via  ea892229887d2cdaaa0f5e5a7320ce7377a49c20 (commit)
       via  c80b93e62e41dd654f9303f381976ebb70567e81 (commit)
       via  29cab532e4b4144b8e9959492d3cf463c044a9d0 (commit)
       via  d25e23a8e5f9f7e7e4abff465bde56c30279f3ab (commit)
       via  41f7d2a31daf531ede72213d0d3fc0646892d9f4 (commit)
       via  eeeb2923dc1f60148179eef542972fef31bab440 (commit)
       via  46e3e6edb625e1dac9b2cd7448db9e67157625eb (commit)
       via  00ddf2a628da7958c87de6dd29a285cb5c007c18 (commit)
       via  0179b2dbb0700c55203fdb827be2579ddd940fe3 (commit)
       via  ceadee057087e5a3b116722d50bda06c8cf1c847 (commit)
      from  da61dd7c25407e25c19276fa1ebad86ef9944b66 (commit)

http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=2.5


- Log -----------------------------------------------------------------
commit e6df02968fe0ff26eb5ce5e3409e0f3276a47634
Author: Martin Schwenke <martin at meltin.net>
Date:   Tue Nov 19 15:40:08 2013 +1100

    scripts: Add an early exit to statd-callout's notify case
    
    If $statd_state is empty then the loop will run once and print
    spurious errors.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit 2722ad70c71e9858e62c9f2652ac16bee12fe56b
Author: Martin Schwenke <martin at meltin.net>
Date:   Tue Nov 19 15:37:58 2013 +1100

    eventscripts: Remove the nfs_statd_update() call from 60.ganesha
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit 308715e677954fe798514af10164d4caec47eb12
Author: Martin Schwenke <martin at meltin.net>
Date:   Mon Nov 18 21:04:49 2013 +1100

    tests/integration: Neaten up some of the persistent database tests
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit 26a58242552ce7f59a38d5b518fbd8afd39818d2
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Mon Nov 18 15:09:27 2013 +1100

    tools/ctdb: Fix tstore command to generate ltdb header internally
    
    This fixes an alignment discrepancy on 32-bit vs 64-bit platforms.
    
      sizeof(struct ctdb_ltdb_header) = 20  (32-bit)
                                      = 24  (64-bit)
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>

commit ea892229887d2cdaaa0f5e5a7320ce7377a49c20
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Nov 15 15:31:03 2013 +1100

    tests/takeover: Fix bogus test description
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit c80b93e62e41dd654f9303f381976ebb70567e81
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Nov 15 15:23:14 2013 +1100

    tests/simple: User sleep_for() instead of sleep
    
    Progress...
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>

commit 29cab532e4b4144b8e9959492d3cf463c044a9d0
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Nov 15 15:21:58 2013 +1100

    tests/simple: Update persistent DB tests
    
    * Low level DB checks should ignore the sequence number record.
    
    * A restart is needed after messing with the RecoverPDBBySeqNum
      tunable.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>
    Pair-programmed-with: Amitay Isaacs <amitay at gmail.com>

commit d25e23a8e5f9f7e7e4abff465bde56c30279f3ab
Author: Martin Schwenke <martin at meltin.net>
Date:   Fri Nov 15 15:20:40 2013 +1100

    recoverd: For persistent databases a sequence number of 0 is valid
    
    Otherwise recovery ends up done by RSN when it is unnecessary.
    
    Signed-off-by: Martin Schwenke <martin at meltin.net>
    Pair-programmed-with: Amitay Isaacs <amitay at gmail.com>

commit 41f7d2a31daf531ede72213d0d3fc0646892d9f4
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Tue Nov 19 15:31:39 2013 +1100

    locking: Use vfork instead of fork to exec helpers
    
    There is a significant overhead using fork() over vfork(), specially
    when the child process execs a helper.  The overhead is in memory space
    and time.
    
        # strace -c ./test_fork 1024 200
        count=1024, size=204800, total=200M
        failed fork=0
        time for fork() = 4879.597000 us
        % time     seconds  usecs/call     calls    errors syscall
        ------ ----------- ----------- --------- --------- ----------------
        100.00    4.543321        3304      1375       375 clone
          0.00    0.000071           0      1033           mmap
          0.00    0.000000           0         1           read
          0.00    0.000000           0         3           write
          0.00    0.000000           0         2           open
          0.00    0.000000           0         2           close
          0.00    0.000000           0         3           fstat
          0.00    0.000000           0         3           mprotect
          0.00    0.000000           0         1           munmap
          0.00    0.000000           0         3           brk
          0.00    0.000000           0         1         1 access
          0.00    0.000000           0         1           execve
          0.00    0.000000           0         1           arch_prctl
        ------ ----------- ----------- --------- --------- ----------------
        100.00    4.543392                  2429       376 total
    
        # strace -c ./test_vfork 1024 200
        count=1024, size=204800, total=200M
        failed fork=0
        time for fork() = 82.041000 us
        % time     seconds  usecs/call     calls    errors syscall
        ------ ----------- ----------- --------- --------- ----------------
         96.47    0.001204           1      1000           vfork
          3.53    0.000044           0      1033           mmap
          0.00    0.000000           0         1           read
          0.00    0.000000           0         3           write
          0.00    0.000000           0         2           open
          0.00    0.000000           0         2           close
          0.00    0.000000           0         3           fstat
          0.00    0.000000           0         3           mprotect
          0.00    0.000000           0         1           munmap
          0.00    0.000000           0         3           brk
          0.00    0.000000           0         1         1 access
          0.00    0.000000           0         1           execve
          0.00    0.000000           0         1           arch_prctl
        ------ ----------- ----------- --------- --------- ----------------
        100.00    0.001248                  2054         1 total
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>

commit eeeb2923dc1f60148179eef542972fef31bab440
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Tue Nov 19 16:13:20 2013 +1100

    common: Refactor code to keep track of child processes
    
    This code can then be used to track child processes created with vfork().
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>

commit 46e3e6edb625e1dac9b2cd7448db9e67157625eb
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Fri Nov 15 18:59:04 2013 +1100

    scripts: Run a single instance of debug_locks.sh at a give time
    
    This prevents spamming of logs if multiple lock requests are waiting
    and keep timing out.
    
    Also, improve the logging format with separators.
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>

commit 00ddf2a628da7958c87de6dd29a285cb5c007c18
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Fri Nov 15 18:36:09 2013 +1100

    locking: Update current lock statistics when lock is scheduled
    
    When a child process is created for a lock request, the current locks
    statistics should be updated immediately.  This will provide accurate
    information on number of active lock requests.
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>

commit 0179b2dbb0700c55203fdb827be2579ddd940fe3
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Mon Nov 18 15:48:22 2013 +1100

    locking: Do not merge multiple lock requests to avoid unfair scheduling
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>

commit ceadee057087e5a3b116722d50bda06c8cf1c847
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Fri Nov 15 15:58:59 2013 +1100

    locking: Implement active lock requests limit per database
    
    This limit was currently a global limit and not per database.  This
    prevents any database freeze lock requests from getting scheduled if
    the global limit was reached.
    
    Only individual record requests should be limited and database freeze
    requests should always get scheduled.
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>

-----------------------------------------------------------------------

Summary of changes:
 common/ctdb_fork.c                          |   22 ++-
 config/debug_locks.sh                       |   58 +++++----
 config/events.d/60.ganesha                  |    4 -
 config/statd-callout                        |    1 +
 include/ctdb_private.h                      |    4 +-
 server/ctdb_lock.c                          |   37 ++++--
 server/ctdb_recoverd.c                      |    5 +-
 tests/scripts/integration.bash              |   36 +++++
 tests/simple/70_recoverpdbbyseqnum.sh       |  188 ++++++++++++---------------
 tests/simple/71_ctdb_wipedb.sh              |   51 ++++----
 tests/simple/72_update_record_persistent.sh |   56 +++++----
 tests/simple/73_tunable_NoIPTakeover.sh     |    4 +-
 tests/takeover/lcp2.022.sh                  |    2 +-
 tools/ctdb.c                                |   34 ++++-
 14 files changed, 284 insertions(+), 218 deletions(-)


Changeset truncated at 500 lines:

diff --git a/common/ctdb_fork.c b/common/ctdb_fork.c
index d372ae0..1d7d9aa 100644
--- a/common/ctdb_fork.c
+++ b/common/ctdb_fork.c
@@ -45,6 +45,19 @@ bool ctdb_is_child_process(void)
 	return is_child;
 }
 
+void ctdb_track_child(struct ctdb_context *ctdb, pid_t pid)
+{
+	char *process;
+
+	/* Only CTDB main daemon should track child processes */
+	if (getpid() != ctdb->ctdbd_pid) {
+		return;
+	}
+
+	process = talloc_asprintf(ctdb->child_processes, "process:%d", (int)pid);
+	trbt_insert32(ctdb->child_processes, pid, process);
+}
+
 /*
  * This function forks a child process and drops the realtime 
  * scheduler for the child process.
@@ -52,7 +65,6 @@ bool ctdb_is_child_process(void)
 pid_t ctdb_fork_no_free_ringbuffer(struct ctdb_context *ctdb)
 {
 	pid_t pid;
-	char *process;
 
 	pid = fork();
 	if (pid == -1) {
@@ -87,13 +99,7 @@ pid_t ctdb_fork_no_free_ringbuffer(struct ctdb_context *ctdb)
 		return 0;
 	}
 
-	if (getpid() != ctdb->ctdbd_pid) {
-		return pid;
-	}
-
-	process = talloc_asprintf(ctdb->child_processes, "process:%d", (int)pid);
-	trbt_insert32(ctdb->child_processes, pid, process);
-
+	ctdb_track_child(ctdb, pid);
 	return pid;
 }
 
diff --git a/config/debug_locks.sh b/config/debug_locks.sh
index ce80835..72a8917 100755
--- a/config/debug_locks.sh
+++ b/config/debug_locks.sh
@@ -14,15 +14,20 @@
 
 loadconfig ctdb
 
-# Create sed expression to convert inodes to names
-sed_cmd=$( ls -li "$CTDB_DBDIR"/*.tdb.* "$CTDB_DBDIR_PERSISTENT"/*.tdb.* |
+(
+    flock -n 9 || exit 1
+
+    echo "===== Start of debug locks PID=$$ ====="
+
+    # Create sed expression to convert inodes to names
+    sed_cmd=$( ls -li "$CTDB_DBDIR"/*.tdb.* "$CTDB_DBDIR_PERSISTENT"/*.tdb.* |
 	   sed -e "s#${CTDB_DBDIR}/\(.*\)#\1#" \
 	       -e "s#${CTDB_DBDIR_PERSISTENT}/\(.*\)#\1#" |
 	   awk '{printf "s#[0-9]*:[0-9]*:%s #%s #\n", $1, $10}' )
 
-# Parse /proc/locks and extract following information
-#    pid process_name tdb_name offsets [W]
-out=$( cat /proc/locks |
+    # Parse /proc/locks and extract following information
+    #    pid process_name tdb_name offsets [W]
+    out=$( cat /proc/locks |
     grep -F "POSIX  ADVISORY  WRITE" |
     awk '{ if($2 == "->") { print $6, $7, $8, $9, "W" } else { print $5, $6, $7, $8 } }' |
     while read pid rest ; do
@@ -30,24 +35,29 @@ out=$( cat /proc/locks |
 	echo $pid $pname $rest
     done | sed -e "$sed_cmd" | grep "\.tdb" )
 
-if [ -n "$out" ]; then
-    # Log information about locks
-    echo "$out" | logger -t "ctdbd-lock"
-
-    # Find processes that are waiting for locks
-    dbs=$(echo "$out" | grep "W$" | awk '{print $3}')
-    all_pids=""
-    for db in $dbs ; do
-	pids=$(echo "$out" | grep -v "W$" | grep "$db" | grep -v ctdbd | awk '{print $1}')
-	all_pids="$all_pids $pids"
-    done
-    pids=$(echo $all_pids | sort -u)
-
-    # For each process waiting, log stack trace
-    for pid in $pids ; do
-	gstack $pid | logger -t "ctdbd-lock $pid"
-#	gcore -o /var/log/core-deadlock-ctdb $pid
-    done
-fi
+    if [ -n "$out" ]; then
+	# Log information about locks
+	echo "$out"
+
+	# Find processes that are waiting for locks
+	dbs=$(echo "$out" | grep "W$" | awk '{print $3}')
+	all_pids=""
+	for db in $dbs ; do
+	    pids=$(echo "$out" | grep -v "W$" | grep "$db" | grep -v ctdbd | awk '{print $1}')
+	    all_pids="$all_pids $pids"
+	done
+	pids=$(echo $all_pids | sort -u)
+
+	# For each process waiting, log stack trace
+	for pid in $pids ; do
+	    echo "----- Stack trace for PID=$pid -----"
+	    gstack $pid
+	    # gcore -o /var/log/core-deadlock-ctdb $pid
+	done
+    fi
+
+    echo "===== End of debug locks PID=$$ ====="
+
+) 9>"${CTDB_VARDIR}/debug_locks.lock" | script_log "ctdbd-lock"
 
 exit 0
diff --git a/config/events.d/60.ganesha b/config/events.d/60.ganesha
index 744c5ce..242321a 100755
--- a/config/events.d/60.ganesha
+++ b/config/events.d/60.ganesha
@@ -215,10 +215,6 @@ case "$1" in
 	    grep Path /etc/ganesha/$CTDB_CLUSTER_FILESYSTEM_TYPE.ganesha.exports.conf |
  	    cut -f2 -d\" | ctdb_check_directories
 	} || exit $?
-
-	# once every 60 seconds, update the statd state database for which
-	# clients need notifications
-	nfs_statd_update 60
 	;;
 
      *)
diff --git a/config/statd-callout b/config/statd-callout
index 70665e2..53b408d 100755
--- a/config/statd-callout
+++ b/config/statd-callout
@@ -148,6 +148,7 @@ case "$1" in
 	    awk -v pnn=$pnn 'pnn == $2 { printf "s/^key.*=.*statd-state@\\(%s\\)@\\([^\"]*\\).*/\\1 \\2/p\n", gensub(/\./, "\\\\.", "g", $1) }')
 
 	statd_state=$(ctdb catdb ctdb.tdb | sed -n "$sed_expr" | sort)
+	[ -n "$statd_state" ] || exit 0
 
 	# The following is dangerous if this script times out before
 	# all of the smnotify commands are run.  Revert to individual
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 279fa2f..71c9f13 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -556,7 +556,6 @@ struct ctdb_context {
 	struct trbt_tree *child_processes; 
 
 	/* Used for locking record/db/alldb */
-	int lock_num_current;
 	int lock_num_pending;
 	struct lock_context *lock_current;
 	struct lock_context *lock_pending;
@@ -596,6 +595,8 @@ struct ctdb_db_context {
 	struct trbt_tree *deferred_fetch;
 
 	struct ctdb_db_statistics statistics;
+
+	int lock_num_current;
 };
 
 
@@ -1083,6 +1084,7 @@ void ctdb_set_scheduler(struct ctdb_context *ctdb);
 void ctdb_restore_scheduler(struct ctdb_context *ctdb);
 
 struct tevent_signal *ctdb_init_sigchld(struct ctdb_context *ctdb);
+void ctdb_track_child(struct ctdb_context *ctdb, pid_t pid);
 pid_t ctdb_fork(struct ctdb_context *ctdb);
 pid_t ctdb_fork_no_free_ringbuffer(struct ctdb_context *ctdb);
 void ctdb_set_child_info(TALLOC_CTX *mem_ctx, const char *child_name_fmt, ...);
diff --git a/server/ctdb_lock.c b/server/ctdb_lock.c
index bb66f94..b71fac2 100644
--- a/server/ctdb_lock.c
+++ b/server/ctdb_lock.c
@@ -279,7 +279,9 @@ static int ctdb_lock_context_destructor(struct lock_context *lock_ctx)
 	if (lock_ctx->child > 0) {
 		ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGKILL);
 		DLIST_REMOVE(lock_ctx->ctdb->lock_current, lock_ctx);
-		lock_ctx->ctdb->lock_num_current--;
+		if (lock_ctx->ctdb_db) {
+			lock_ctx->ctdb_db->lock_num_current--;
+		}
 		CTDB_DECREMENT_STAT(lock_ctx->ctdb, locks.num_current);
 		if (lock_ctx->type == LOCK_RECORD || lock_ctx->type == LOCK_DB) {
 			CTDB_DECREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_current);
@@ -464,13 +466,11 @@ static void ctdb_lock_handler(struct tevent_context *ev,
 
 	if (locked) {
 		if (lock_ctx->ctdb_db) {
-			CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_current);
 			CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.buckets[id]);
 			CTDB_UPDATE_LATENCY(lock_ctx->ctdb, lock_ctx->ctdb_db,
 					    lock_type_str[lock_ctx->type], locks.latency,
 					    lock_ctx->start_time);
 
-			CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_current);
 			CTDB_UPDATE_DB_LATENCY(lock_ctx->ctdb_db, lock_type_str[lock_ctx->type], locks.latency, t);
 			CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.buckets[id]);
 		}
@@ -528,10 +528,12 @@ static void ctdb_lock_timeout_handler(struct tevent_context *ev,
 		}
 	}
 	if (debug_locks != NULL) {
-		pid = fork();
+		pid = vfork();
 		if (pid == 0) {
 			execl(debug_locks, debug_locks, NULL);
+			_exit(0);
 		}
+		ctdb_track_child(ctdb, pid);
 	} else {
 		DEBUG(DEBUG_WARNING,
 		      (__location__
@@ -741,10 +743,6 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb)
 		CTDB_NO_MEMORY_VOID(ctdb, prog);
 	}
 
-	if (ctdb->lock_num_current >= MAX_LOCK_PROCESSES_PER_DB) {
-		return;
-	}
-
 	if (ctdb->lock_pending == NULL) {
 		return;
 	}
@@ -767,8 +765,11 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb)
 						       lock_ctx->key, lock_ctx->priority,
 						       lock_ctx->type);
 			if (active_ctx == NULL) {
-				/* Found a lock context with lock requests */
-				break;
+				if (lock_ctx->ctdb_db == NULL ||
+				    lock_ctx->ctdb_db->lock_num_current < MAX_LOCK_PROCESSES_PER_DB) {
+					/* Found a lock context with lock requests */
+					break;
+				}
 			}
 
 			/* There is already a child waiting for the
@@ -811,7 +812,7 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb)
 		return;
 	}
 
-	lock_ctx->child = ctdb_fork(ctdb);
+	lock_ctx->child = vfork();
 
 	if (lock_ctx->child == (pid_t)-1) {
 		DEBUG(DEBUG_ERR, ("Failed to create a child in ctdb_lock_schedule\n"));
@@ -833,6 +834,7 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb)
 	}
 
 	/* Parent process */
+	ctdb_track_child(ctdb, lock_ctx->child);
 	close(lock_ctx->fd[1]);
 
 	talloc_set_destructor(lock_ctx, ctdb_lock_context_destructor);
@@ -874,7 +876,11 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb)
 	DLIST_REMOVE(ctdb->lock_pending, lock_ctx);
 	ctdb->lock_num_pending--;
 	DLIST_ADD_END(ctdb->lock_current, lock_ctx, NULL);
-	ctdb->lock_num_current++;
+	if (lock_ctx->ctdb_db) {
+		lock_ctx->ctdb_db->lock_num_current++;
+		CTDB_INCREMENT_STAT(lock_ctx->ctdb, locks.num_current);
+		CTDB_INCREMENT_DB_STAT(lock_ctx->ctdb_db, locks.num_current);
+	}
 }
 
 
@@ -890,7 +896,7 @@ static struct lock_request *ctdb_lock_internal(struct ctdb_context *ctdb,
 					       enum lock_type type,
 					       bool auto_mark)
 {
-	struct lock_context *lock_ctx;
+	struct lock_context *lock_ctx = NULL;
 	struct lock_request *request;
 
 	if (callback == NULL) {
@@ -898,9 +904,14 @@ static struct lock_request *ctdb_lock_internal(struct ctdb_context *ctdb,
 		return NULL;
 	}
 
+#if 0
+	/* Disable this optimization to ensure first-in-first-out fair
+	 * scheduling of lock requests */
+
 	/* get a context for this key - search only the pending contexts,
 	 * current contexts might in the middle of processing callbacks */
 	lock_ctx = find_lock_context(ctdb->lock_pending, ctdb_db, key, priority, type);
+#endif
 
 	/* No existing context, create one */
 	if (lock_ctx == NULL) {
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 6820ec9..70ed87e 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -739,7 +739,8 @@ static void pull_seqnum_cb(struct ctdb_context *ctdb, uint32_t node_pnn, int32_t
 
 	seqnum = *((uint64_t *)outdata.dptr);
 
-	if (seqnum > cb_data->seqnum) {
+	if (seqnum > cb_data->seqnum ||
+	    (cb_data->pnn == -1 && seqnum == 0)) {
 		cb_data->seqnum = seqnum;
 		cb_data->pnn = node_pnn;
 	}
@@ -802,7 +803,7 @@ static int pull_highest_seqnum_pdb(struct ctdb_context *ctdb,
 		return -1;
 	}
 
-	if (cb_data->seqnum == 0 || cb_data->pnn == -1) {
+	if (cb_data->pnn == -1) {
 		DEBUG(DEBUG_NOTICE, ("Failed to find a node with highest sequence numbers for DB 0x%08x\n", dbid));
 		talloc_free(tmp_ctx);
 		return -1;
diff --git a/tests/scripts/integration.bash b/tests/scripts/integration.bash
index 7dbccbc..4f0f68b 100644
--- a/tests/scripts/integration.bash
+++ b/tests/scripts/integration.bash
@@ -967,7 +967,43 @@ nfs_test_cleanup ()
     onnode -q $test_node rmdir "$nfs_test_dir"
 }
 
+#######################################
+
+# $1: pnn, $2: DB name
+db_get_path ()
+{
+    try_command_on_node -v $1 $CTDB getdbstatus "$2" |
+    sed -n -e "s@^path: @@p"
+}
+
+# $1: pnn, $2: DB name
+db_ctdb_cattdb_count_records ()
+{
+    try_command_on_node -v $1 $CTDB cattdb "$2" |
+    grep '^key' | grep -v '__db_sequence_number__' |
+    wc -l
+}
 
+# $1: pnn, $2: DB name, $3: key string, $4: value string, $5: RSN (default 7)
+db_ctdb_tstore ()
+{
+    _tdb=$(db_get_path $1 "$2")
+    _rsn="${5:-7}"
+    try_command_on_node $1 $CTDB tstore "$_tdb" "$3" "$4" "$_rsn"
+}
+
+# $1: pnn, $2: DB name, $3: dbseqnum (must be < 255!!!!!)
+db_ctdb_tstore_dbseqnum ()
+{
+    # "__db_sequence_number__" + trailing 0x00
+    _key='0x5f5f64625f73657175656e63655f6e756d6265725f5f00'
+
+    # Construct 8 byte (unit64_t) database sequence number.  This
+    # probably breaks if $3 > 255
+    _value=$(printf "0x%02x%014x" $3 0)
+
+    db_ctdb_tstore $1 "$2" "$_key" "$_value"
+}
 
 #######################################
 
diff --git a/tests/simple/70_recoverpdbbyseqnum.sh b/tests/simple/70_recoverpdbbyseqnum.sh
index a83dbe0..a4765b1 100755
--- a/tests/simple/70_recoverpdbbyseqnum.sh
+++ b/tests/simple/70_recoverpdbbyseqnum.sh
@@ -48,136 +48,129 @@ set -e
 
 cluster_is_healthy
 
+# Reset configuration
+ctdb_restart_when_done
+
 try_command_on_node 0 "$CTDB listnodes"
 num_nodes=$(echo "$out" | wc -l)
 
-# create a temporary persistent database to test with
-echo create persistent test database persistent_test.tdb
-try_command_on_node 0 $CTDB attach persistent_test.tdb persistent
-
+add_record_per_node ()
+{
+    _i=0
+    while [ $_i -lt $num_nodes ] ; do
+	_k="KEY${_i}"
+	_d="DATA${_i}"
+	echo "Store key(${_k}) data(${_d}) on node ${_i}"
+	db_ctdb_tstore $_i "$test_db" "$_k" "$_d"
+	_i=$(($_i + 1))
+    done
+}
 
-# set RecoverPDBBySeqNum=0
-echo "setting RecoverPDBBySeqNum to 0"
-try_command_on_node all $CTDB setvar RecoverPDBBySeqNum 0
+test_db="persistent_test.tdb"
+echo "Create persistent test database \"$test_db\""
+try_command_on_node 0 $CTDB attach "$test_db" persistent
 
 
+echo "Setting RecoverPDBBySeqNum=0"
+try_command_on_node all $CTDB setvar "RecoverPDBBySeqNum" 0
 
-# 3,
+# 3.
 # If RecoverPDBBySeqNum==0  and no __db_sequence_number__
 # recover record by record
 #
 # wipe database
 echo
-echo test that RecoverPDBBySeqNum==0 and no __db_sequence_number__ blends the database during recovery
-echo wipe the test database
-try_command_on_node 0 $CTDB wipedb persistent_test.tdb
+echo "Test that RecoverPDBBySeqNum=0 and no __db_sequence_number__ blends the database during recovery"
 
-# add one record to node 0   key==ABC  data==ABC
-TDB=`try_command_on_node -v 0 $CTDB getdbmap | grep persistent_test.tdb | sed -e "s/.*path://" -e "s/ .*//"`
-echo "store key(ABC) data(ABC) on node 0"
-try_command_on_node 0 $CTDB tstore $TDB 0x414243 0x070000000000000000000000000000000000000000000000414243
-#
-# add one record to node 1   key==DEF  data==DEF
-TDB=`try_command_on_node -v 1 $CTDB getdbmap | grep persistent_test.tdb | sed -e "s/.*path://" -e "s/ .*//"`
-echo "store key(DEF) data(DEF) on node 1"
-try_command_on_node 1 $CTDB tstore $TDB 0x444546 0x070000000000000000000000000000000000000000000000444546
+echo "Wipe test database"
+try_command_on_node 0 $CTDB wipedb "$test_db"
+
+add_record_per_node
 
 # force a recovery
-echo force a recovery
+echo "Force a recovery"
 try_command_on_node 0 $CTDB recover
 
 # check that we now have both records on node 0
-num_records=$(try_command_on_node -v 0 $CTDB cattdb persistent_test.tdb | grep key | egrep "ABC|DEF" | wc -l)
-[ $num_records != "2" ] && {
-    echo "BAD: we did not end up with the expected two records after the recovery"
+num_records=$(db_ctdb_cattdb_count_records 0 "$test_db")
+if [ $num_records = "$num_nodes" ] ; then
+    echo "OK: databases were blended"
+else
+    echo "BAD: we did not end up with the expected $num_nodes records after the recovery"
     exit 1
-}
-echo "OK. databases were blended"
-
+fi
 
-
-# 4,
+# 4.
 # If RecoverPDBBySeqNum==0  and __db_sequence_number__
 # recover record by record
 #
 # wipe database
 echo
-echo test that RecoverPDBBySeqNum==0 and __db_sequence_number__ blends the database during recovery
-echo wipe the test database
+echo "Test that RecoverPDBBySeqNum=0 and __db_sequence_number__ blends the database during recovery"
+
+echo "Wipe the test database"
 try_command_on_node 0 $CTDB wipedb persistent_test.tdb
 
-echo "add __db_sequence_number__==5 record to all nodes"
-try_command_on_node -v 0 $CTDB nodestatus all | grep pnn | sed -e"s/^pnn://" -e "s/ .*//" | while read PNN; do
-    TDB=`try_command_on_node -v $PNN $CTDB getdbmap | grep persistent_test.tdb | sed -e "s/.*path://" -e "s/ .*//"`
-    try_command_on_node $PNN $CTDB tstore $TDB 0x5f5f64625f73657175656e63655f6e756d6265725f5f00 0x0700000000000000000000000000000000000000000000000500000000000000
+add_record_per_node
+
+echo "Add __db_sequence_number__==5 record to all nodes"
+pnn=0
+while [ $pnn -lt $num_nodes ] ; do
+    db_ctdb_tstore_dbseqnum $pnn "$test_db" 5
+    pnn=$(($pnn + 1))
 done
 
-# add one record to node 0   key==ABC  data==ABC
-TDB=`try_command_on_node -v 0 $CTDB getdbmap | grep persistent_test.tdb | sed -e "s/.*path://" -e "s/ .*//"`
-echo "store key(ABC) data(ABC) on node 0"
-try_command_on_node 0 $CTDB tstore $TDB 0x414243 0x070000000000000000000000000000000000000000000000414243
-echo "add __db_sequence_number__==7 record to node 0"
-try_command_on_node 0 $CTDB tstore $TDB 0x5f5f64625f73657175656e63655f6e756d6265725f5f00 0x0700000000000000000000000000000000000000000000000700000000000000
+echo "Set __db_sequence_number__ to 7 on node 0"
+db_ctdb_tstore_dbseqnum 0 "$test_db" 7
 
-# add one record to node 1   key==DEF  data==DEF


-- 
CTDB repository