[SCM] Samba Shared Repository - branch master updated

Amitay Isaacs amitay at samba.org
Thu Jan 12 07:00:02 UTC 2017


The branch, master has been updated
       via  7794497 ctdb-tests: Add robust mutex test
       via  3a56a16 ctdb-locking: Explicitly unlock record/db in lock helper
       via  5b1076d ctdb-locking: Remove support for locking multiple databases
      from  b4f40e4 s4:tests/sec_descriptor: use more unique oid values

https://git.samba.org/?p=samba.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 7794497bc909fa7b02da9d9ce1fc496a8fa2a9ae
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Fri Dec 2 15:11:20 2016 +1100

    ctdb-tests: Add robust mutex test
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=12469
    
    This demonstrates robust mutex bug on linux/glibc system.
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>
    Reviewed-by: Martin Schwenke <martin at meltin.net>
    
    Autobuild-User(master): Amitay Isaacs <amitay at samba.org>
    Autobuild-Date(master): Thu Jan 12 07:59:34 CET 2017 on sn-devel-144

commit 3a56a16b06cf6d1cce613ec29f5ea46630902072
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Tue Nov 29 17:20:45 2016 +1100

    ctdb-locking: Explicitly unlock record/db in lock helper
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=12469
    
    Instead of killing lock helper processes with SIGKILL, send SIGTERM so
    lock helper processes can explicitly unlock record/db.
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>
    Reviewed-by: Martin Schwenke <martin at meltin.net>

commit 5b1076dc61f5e3f006c1b8cef98e7d2d3cc1bfba
Author: Amitay Isaacs <amitay at gmail.com>
Date:   Tue Nov 29 17:13:41 2016 +1100

    ctdb-locking: Remove support for locking multiple databases
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=12469
    
    The code to lock multiple databases has been dropped from ctdb_lock.c.
    
    Signed-off-by: Amitay Isaacs <amitay at gmail.com>
    Reviewed-by: Martin Schwenke <martin at meltin.net>

-----------------------------------------------------------------------

Summary of changes:
 ctdb/server/ctdb_lock.c         |   6 +-
 ctdb/server/ctdb_lock_helper.c  | 194 ++++++++++++++++++++++++-----
 ctdb/tests/src/test_mutex_raw.c | 261 ++++++++++++++++++++++++++++++++++++++++
 ctdb/wscript                    |   8 +-
 4 files changed, 435 insertions(+), 34 deletions(-)
 create mode 100644 ctdb/tests/src/test_mutex_raw.c


Changeset truncated at 500 lines:

diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c
index 0203369..b2c6d15 100644
--- a/ctdb/server/ctdb_lock.c
+++ b/ctdb/server/ctdb_lock.c
@@ -192,7 +192,7 @@ static int ctdb_lock_context_destructor(struct lock_context *lock_ctx)
 		lock_ctx->request->lctx = NULL;
 	}
 	if (lock_ctx->child > 0) {
-		ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGKILL);
+		ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGTERM);
 		if (lock_ctx->type == LOCK_RECORD) {
 			DLIST_REMOVE(lock_ctx->ctdb_db->lock_current, lock_ctx);
 		} else {
@@ -683,7 +683,7 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb)
 					    ctdb_lock_timeout_handler,
 					    (void *)lock_ctx);
 	if (lock_ctx->ttimer == NULL) {
-		ctdb_kill(ctdb, lock_ctx->child, SIGKILL);
+		ctdb_kill(ctdb, lock_ctx->child, SIGTERM);
 		lock_ctx->child = -1;
 		close(lock_ctx->fd[0]);
 		return;
@@ -698,7 +698,7 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb)
 				      (void *)lock_ctx);
 	if (lock_ctx->tfd == NULL) {
 		TALLOC_FREE(lock_ctx->ttimer);
-		ctdb_kill(ctdb, lock_ctx->child, SIGKILL);
+		ctdb_kill(ctdb, lock_ctx->child, SIGTERM);
 		lock_ctx->child = -1;
 		close(lock_ctx->fd[0]);
 		return;
diff --git a/ctdb/server/ctdb_lock_helper.c b/ctdb/server/ctdb_lock_helper.c
index 34823fb..8b9f5aa 100644
--- a/ctdb/server/ctdb_lock_helper.c
+++ b/ctdb/server/ctdb_lock_helper.c
@@ -20,11 +20,14 @@
 #include "replace.h"
 #include "system/filesys.h"
 #include "system/network.h"
+#include "system/wait.h"
 
 #include <talloc.h>
+#include <tevent.h>
 #include <tdb.h>
 
 #include "lib/util/sys_rw.h"
+#include "lib/util/tevent_unix.h"
 
 #include "protocol/protocol.h"
 
@@ -33,6 +36,11 @@
 static char *progname = NULL;
 static bool realtime = true;
 
+struct lock_state {
+	struct tdb_context *tdb;
+	TDB_DATA key;
+};
+
 static void set_priority(void)
 {
 	const char *ptr;
@@ -73,7 +81,7 @@ static void usage(void)
 {
 	fprintf(stderr, "\n");
 	fprintf(stderr, "Usage: %s <ctdbd-pid> <output-fd> RECORD <db-path> <db-flags> <db-key>\n", progname);
-	fprintf(stderr, "       %s <ctdbd-pid> <output-fd> DB <db1-path> <db1-flags> [<db2-path> <db2-flags>...]\n", progname);
+	fprintf(stderr, "       %s <ctdbd-pid> <output-fd> DB <db-path> <db-flags>\n", progname);
 }
 
 static uint8_t *hex_decode_talloc(TALLOC_CTX *mem_ctx,
@@ -93,10 +101,9 @@ static uint8_t *hex_decode_talloc(TALLOC_CTX *mem_ctx,
 	return buffer;
 }
 
-static int lock_record(const char *dbpath, const char *dbflags, const char *dbkey)
+static int lock_record(const char *dbpath, const char *dbflags,
+		       const char *dbkey, struct lock_state *state)
 {
-	TDB_DATA key;
-	struct tdb_context *tdb;
 	int tdb_flags;
 
 	/* No error checking since CTDB always passes sane values */
@@ -104,23 +111,24 @@ static int lock_record(const char *dbpath, const char *dbflags, const char *dbke
 
 	/* Convert hex key to key */
 	if (strcmp(dbkey, "NULL") == 0) {
-		key.dptr = NULL;
-		key.dsize = 0;
+		state->key.dptr = NULL;
+		state->key.dsize = 0;
 	} else {
-		key.dptr = hex_decode_talloc(NULL, dbkey, &key.dsize);
+		state->key.dptr = hex_decode_talloc(NULL, dbkey,
+						    &state->key.dsize);
 	}
 
-	tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600);
-	if (tdb == NULL) {
+	state->tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600);
+	if (state->tdb == NULL) {
 		fprintf(stderr, "locking: Error opening database %s\n", dbpath);
 		return 1;
 	}
 
 	set_priority();
 
-	if (tdb_chainlock(tdb, key) < 0) {
+	if (tdb_chainlock(state->tdb, state->key) < 0) {
 		fprintf(stderr, "locking: Error getting record lock (%s)\n",
-			tdb_errorstr(tdb));
+			tdb_errorstr(state->tdb));
 		return 1;
 	}
 
@@ -130,26 +138,25 @@ static int lock_record(const char *dbpath, const char *dbflags, const char *dbke
 
 }
 
-
-static int lock_db(const char *dbpath, const char *dbflags)
+static int lock_db(const char *dbpath, const char *dbflags,
+		   struct lock_state *state)
 {
-	struct tdb_context *tdb;
 	int tdb_flags;
 
 	/* No error checking since CTDB always passes sane values */
 	tdb_flags = strtol(dbflags, NULL, 0);
 
-	tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600);
-	if (tdb == NULL) {
+	state->tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600);
+	if (state->tdb == NULL) {
 		fprintf(stderr, "locking: Error opening database %s\n", dbpath);
 		return 1;
 	}
 
 	set_priority();
 
-	if (tdb_lockall(tdb) < 0) {
+	if (tdb_lockall(state->tdb) < 0) {
 		fprintf(stderr, "locking: Error getting db lock (%s)\n",
-			tdb_errorstr(tdb));
+			tdb_errorstr(state->tdb));
 		return 1;
 	}
 
@@ -158,13 +165,114 @@ static int lock_db(const char *dbpath, const char *dbflags)
 	return 0;
 }
 
+struct wait_for_parent_state {
+	struct tevent_context *ev;
+	pid_t ppid;
+};
+
+static void wait_for_parent_check(struct tevent_req *subreq);
+
+static struct tevent_req *wait_for_parent_send(TALLOC_CTX *mem_ctx,
+					       struct tevent_context *ev,
+					       pid_t ppid)
+{
+	struct tevent_req *req, *subreq;
+	struct wait_for_parent_state *state;
+
+	req = tevent_req_create(mem_ctx, &state, struct wait_for_parent_state);
+	if (req == NULL) {
+		return NULL;
+	}
+
+	state->ev = ev;
+	state->ppid = ppid;
+
+	if (ppid == 1) {
+		tevent_req_done(req);
+		return tevent_req_post(req, ev);
+	}
+
+	subreq = tevent_wakeup_send(state, ev,
+				    tevent_timeval_current_ofs(5,0));
+	if (tevent_req_nomem(subreq, req)) {
+		return tevent_req_post(req, ev);
+	}
+	tevent_req_set_callback(subreq, wait_for_parent_check, req);
+
+	return req;
+}
+
+static void wait_for_parent_check(struct tevent_req *subreq)
+{
+	struct tevent_req *req = tevent_req_callback_data(
+		subreq, struct tevent_req);
+	struct wait_for_parent_state *state = tevent_req_data(
+		req, struct wait_for_parent_state);
+	bool status;
+
+	status = tevent_wakeup_recv(subreq);
+	TALLOC_FREE(subreq);
+	if (! status) {
+		/* Ignore error */
+		fprintf(stderr, "locking: tevent_wakeup_recv() failed\n");
+	}
+
+	if (kill(state->ppid, 0) == -1 && errno == ESRCH) {
+		tevent_req_done(req);
+		return;
+	}
+
+	subreq = tevent_wakeup_send(state, state->ev,
+				    tevent_timeval_current_ofs(5,0));
+	if (tevent_req_nomem(subreq, req)) {
+		return;
+	}
+	tevent_req_set_callback(subreq, wait_for_parent_check, req);
+}
+
+static bool wait_for_parent_recv(struct tevent_req *req)
+{
+	if (tevent_req_is_unix_error(req, NULL)) {
+		return false;
+	}
+
+	return true;
+}
+
+static void cleanup(struct lock_state *state)
+{
+	if (state->tdb != NULL) {
+		if (state->key.dsize == 0) {
+			tdb_unlockall(state->tdb);
+		} else {
+			tdb_chainunlock(state->tdb, state->key);
+		}
+		tdb_close(state->tdb);
+	}
+}
+
+static void signal_handler(struct tevent_context *ev,
+			   struct tevent_signal *se,
+			   int signum, int count, void *siginfo,
+			   void *private_data)
+{
+	struct lock_state *state = (struct lock_state *)private_data;
+
+	cleanup(state);
+	exit(0);
+}
 
 int main(int argc, char *argv[])
 {
+	struct tevent_context *ev;
+	struct tevent_signal *se;
+	struct tevent_req *req;
+	struct lock_state state = { 0 };
 	int write_fd;
 	char result = 0;
 	int ppid;
 	const char *lock_type;
+	bool status;
 
 	reset_scheduler();
 
@@ -179,6 +287,20 @@ int main(int argc, char *argv[])
 	write_fd = atoi(argv[2]);
 	lock_type = argv[3];
 
+	ev = tevent_context_init(NULL);
+	if (ev == NULL) {
+		fprintf(stderr, "locking: tevent_context_init() failed\n");
+		exit(1);
+	}
+
+	se = tevent_add_signal(ev, ev, SIGTERM, 0,
+			       signal_handler, &state);
+	if (se == NULL) {
+		fprintf(stderr, "locking: tevent_add_signal() failed\n");
+		talloc_free(ev);
+		exit(1);
+	}
+
 	if (strcmp(lock_type, "RECORD") == 0) {
 		if (argc != 7) {
 			fprintf(stderr,
@@ -187,20 +309,17 @@ int main(int argc, char *argv[])
 			usage();
 			exit(1);
 		}
-		result = lock_record(argv[4], argv[5], argv[6]);
+		result = lock_record(argv[4], argv[5], argv[6], &state);
 
 	} else if (strcmp(lock_type, "DB") == 0) {
-		int n;
-
-		/* If there are no databases specified, no need for lock */
-		if (argc > 4) {
-			for (n=4; n+1<argc; n+=2) {
-				result = lock_db(argv[n], argv[n+1]);
-				if (result != 0) {
-					break;
-				}
-			}
+		if (argc != 6) {
+			fprintf(stderr,
+				"locking: Invalid number of arguments (%d)\n",
+				argc);
+			usage();
+			exit(1);
 		}
+		result = lock_db(argv[4], argv[5], &state);
 
 	} else {
 		fprintf(stderr, "locking: Invalid lock-type '%s'\n", lock_type);
@@ -210,6 +329,21 @@ int main(int argc, char *argv[])
 
 	send_result(write_fd, result);
 
-	ctdb_wait_for_process_to_exit(ppid);
+	req = wait_for_parent_send(ev, ev, ppid);
+	if (req == NULL) {
+		fprintf(stderr, "locking: wait_for_parent_send() failed\n");
+		cleanup(&state);
+		exit(1);
+	}
+
+	tevent_req_poll(req, ev);
+
+	status = wait_for_parent_recv(req);
+	if (! status) {
+		fprintf(stderr, "locking: wait_for_parent_recv() failed\n");
+	}
+
+	talloc_free(ev);
+	cleanup(&state);
 	return 0;
 }
diff --git a/ctdb/tests/src/test_mutex_raw.c b/ctdb/tests/src/test_mutex_raw.c
new file mode 100644
index 0000000..8e3cae3
--- /dev/null
+++ b/ctdb/tests/src/test_mutex_raw.c
@@ -0,0 +1,261 @@
+/*
+   Robust mutex test
+
+   Copyright (C) Amitay Isaacs  2016
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ * Run this test as follows:
+ *
+ * 1. Running all processes at normal priority
+ *
+ *  $ while true ; do ./bin/test_mutex_raw /tmp/foo 10 0 ; done
+ *
+ * 2. Running all processes at real-time priority
+ *
+ *  # while true ; do ./bin/test_mutex_raw /tmp/foo 10 1 ; done
+ *
+ * The test will block after few iterations.  At this time none of the 
+ * child processes is holding the mutex.
+ *
+ * To check which process is holding a lock:
+ *
+ *  $ ./bin/test_mutex_raw /tmp/foo debug
+ *
+ *  If no pid is printed, then no process is holding the mutex.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#include <sched.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <errno.h>
+#include <stdbool.h>
+
+int pthread_mutex_consistent_np(pthread_mutex_t *);
+
+static void set_realtime(void)
+{
+	struct sched_param p;
+	int ret;
+
+	p.sched_priority = 1;
+
+	ret = sched_setscheduler(0, SCHED_FIFO, &p);
+	if (ret == -1) {
+		fprintf(stderr, "Failed to set scheduler to SCHED_FIFO\n");
+	}
+}
+
+static void high_priority(void)
+{
+	int ret;
+
+	ret = nice(-20);
+	if (ret == -1) {
+		fprintf(stderr, "Failed to set high priority\n");
+	}
+}
+
+static void run_child(const char *filename)
+{
+	pthread_mutex_t *mutex;
+	void *addr;
+	int ret, fd;
+
+	fd = open(filename, O_RDWR, 0600);
+	if (fd == -1) {
+		exit(1);
+	}
+
+	addr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE,
+		    MAP_SHARED|MAP_FILE, fd, 0);
+	if (addr == NULL) {
+		exit(2);
+	}
+
+	mutex = (pthread_mutex_t *)addr;
+
+again:
+	ret = pthread_mutex_lock(mutex);
+	if (ret == EOWNERDEAD) {
+		ret = pthread_mutex_consistent_np(mutex);
+	} else if (ret == EAGAIN) {
+		goto again;
+	}
+	if (ret != 0) {
+		fprintf(stderr, "pid %u lock failed, ret=%d\n", getpid(), ret);
+		exit(3);
+	}
+
+	fprintf(stderr, "pid %u locked\n", getpid());
+	kill(getpid(), SIGKILL);
+}
+
+#define PRIO_NORMAL	0
+#define PRIO_REALTIME	1
+#define PRIO_NICE_20	2
+
+int main(int argc, const char **argv)
+{
+	pthread_mutexattr_t ma;
+	pthread_mutex_t *mutex;
+	int fd, ret, i;
+	pid_t pid;
+	void *addr;
+	int num_children;
+	int priority = PRIO_NORMAL;
+
+	if (argc < 3 || argc > 4) {
+		fprintf(stderr, "Usage: %s <file> <n> [0|1|2]\n", argv[0]);
+		fprintf(stderr, "       %s <file> debug\n", argv[0]);
+		exit(1);
+	}
+
+	if (argc == 4) {
+		priority = atoi(argv[3]);
+	}
+
+	if (priority == PRIO_REALTIME) {
+		set_realtime();
+	} else if (priority == PRIO_NICE_20) {
+		high_priority();
+	}
+
+	fd = open(argv[1], O_CREAT|O_RDWR, 0600);
+	if (fd == -1) {
+		fprintf(stderr, "open failed\n");
+		exit(1);
+	}
+
+	ret = lseek(fd, 0, SEEK_SET);
+	if (ret != 0) {
+		fprintf(stderr, "lseek failed\n");
+		exit(1);
+	}
+
+	ret = ftruncate(fd, sizeof(pthread_mutex_t));


-- 
Samba Shared Repository



More information about the samba-cvs mailing list