[SCM] Samba Shared Repository - branch master updated
Amitay Isaacs
amitay at samba.org
Thu Jan 12 07:00:02 UTC 2017
The branch, master has been updated
via 7794497 ctdb-tests: Add robust mutex test
via 3a56a16 ctdb-locking: Explicitly unlock record/db in lock helper
via 5b1076d ctdb-locking: Remove support for locking multiple databases
from b4f40e4 s4:tests/sec_descriptor: use more unique oid values
https://git.samba.org/?p=samba.git;a=shortlog;h=master
- Log -----------------------------------------------------------------
commit 7794497bc909fa7b02da9d9ce1fc496a8fa2a9ae
Author: Amitay Isaacs <amitay at gmail.com>
Date: Fri Dec 2 15:11:20 2016 +1100
ctdb-tests: Add robust mutex test
BUG: https://bugzilla.samba.org/show_bug.cgi?id=12469
This demonstrates robust mutex bug on linux/glibc system.
Signed-off-by: Amitay Isaacs <amitay at gmail.com>
Reviewed-by: Martin Schwenke <martin at meltin.net>
Autobuild-User(master): Amitay Isaacs <amitay at samba.org>
Autobuild-Date(master): Thu Jan 12 07:59:34 CET 2017 on sn-devel-144
commit 3a56a16b06cf6d1cce613ec29f5ea46630902072
Author: Amitay Isaacs <amitay at gmail.com>
Date: Tue Nov 29 17:20:45 2016 +1100
ctdb-locking: Explicitly unlock record/db in lock helper
BUG: https://bugzilla.samba.org/show_bug.cgi?id=12469
Instead of killing lock helper processes with SIGKILL, send SIGTERM so
lock helper processes can explicitly unlock record/db.
Signed-off-by: Amitay Isaacs <amitay at gmail.com>
Reviewed-by: Martin Schwenke <martin at meltin.net>
commit 5b1076dc61f5e3f006c1b8cef98e7d2d3cc1bfba
Author: Amitay Isaacs <amitay at gmail.com>
Date: Tue Nov 29 17:13:41 2016 +1100
ctdb-locking: Remove support for locking multiple databases
BUG: https://bugzilla.samba.org/show_bug.cgi?id=12469
The code to lock multiple databases has been dropped from ctdb_lock.c.
Signed-off-by: Amitay Isaacs <amitay at gmail.com>
Reviewed-by: Martin Schwenke <martin at meltin.net>
-----------------------------------------------------------------------
Summary of changes:
ctdb/server/ctdb_lock.c | 6 +-
ctdb/server/ctdb_lock_helper.c | 194 ++++++++++++++++++++++++-----
ctdb/tests/src/test_mutex_raw.c | 261 ++++++++++++++++++++++++++++++++++++++++
ctdb/wscript | 8 +-
4 files changed, 435 insertions(+), 34 deletions(-)
create mode 100644 ctdb/tests/src/test_mutex_raw.c
Changeset truncated at 500 lines:
diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c
index 0203369..b2c6d15 100644
--- a/ctdb/server/ctdb_lock.c
+++ b/ctdb/server/ctdb_lock.c
@@ -192,7 +192,7 @@ static int ctdb_lock_context_destructor(struct lock_context *lock_ctx)
lock_ctx->request->lctx = NULL;
}
if (lock_ctx->child > 0) {
- ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGKILL);
+ ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGTERM);
if (lock_ctx->type == LOCK_RECORD) {
DLIST_REMOVE(lock_ctx->ctdb_db->lock_current, lock_ctx);
} else {
@@ -683,7 +683,7 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb)
ctdb_lock_timeout_handler,
(void *)lock_ctx);
if (lock_ctx->ttimer == NULL) {
- ctdb_kill(ctdb, lock_ctx->child, SIGKILL);
+ ctdb_kill(ctdb, lock_ctx->child, SIGTERM);
lock_ctx->child = -1;
close(lock_ctx->fd[0]);
return;
@@ -698,7 +698,7 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb)
(void *)lock_ctx);
if (lock_ctx->tfd == NULL) {
TALLOC_FREE(lock_ctx->ttimer);
- ctdb_kill(ctdb, lock_ctx->child, SIGKILL);
+ ctdb_kill(ctdb, lock_ctx->child, SIGTERM);
lock_ctx->child = -1;
close(lock_ctx->fd[0]);
return;
diff --git a/ctdb/server/ctdb_lock_helper.c b/ctdb/server/ctdb_lock_helper.c
index 34823fb..8b9f5aa 100644
--- a/ctdb/server/ctdb_lock_helper.c
+++ b/ctdb/server/ctdb_lock_helper.c
@@ -20,11 +20,14 @@
#include "replace.h"
#include "system/filesys.h"
#include "system/network.h"
+#include "system/wait.h"
#include <talloc.h>
+#include <tevent.h>
#include <tdb.h>
#include "lib/util/sys_rw.h"
+#include "lib/util/tevent_unix.h"
#include "protocol/protocol.h"
@@ -33,6 +36,11 @@
static char *progname = NULL;
static bool realtime = true;
+struct lock_state {
+ struct tdb_context *tdb;
+ TDB_DATA key;
+};
+
static void set_priority(void)
{
const char *ptr;
@@ -73,7 +81,7 @@ static void usage(void)
{
fprintf(stderr, "\n");
fprintf(stderr, "Usage: %s <ctdbd-pid> <output-fd> RECORD <db-path> <db-flags> <db-key>\n", progname);
- fprintf(stderr, " %s <ctdbd-pid> <output-fd> DB <db1-path> <db1-flags> [<db2-path> <db2-flags>...]\n", progname);
+ fprintf(stderr, " %s <ctdbd-pid> <output-fd> DB <db-path> <db-flags>\n", progname);
}
static uint8_t *hex_decode_talloc(TALLOC_CTX *mem_ctx,
@@ -93,10 +101,9 @@ static uint8_t *hex_decode_talloc(TALLOC_CTX *mem_ctx,
return buffer;
}
-static int lock_record(const char *dbpath, const char *dbflags, const char *dbkey)
+static int lock_record(const char *dbpath, const char *dbflags,
+ const char *dbkey, struct lock_state *state)
{
- TDB_DATA key;
- struct tdb_context *tdb;
int tdb_flags;
/* No error checking since CTDB always passes sane values */
@@ -104,23 +111,24 @@ static int lock_record(const char *dbpath, const char *dbflags, const char *dbke
/* Convert hex key to key */
if (strcmp(dbkey, "NULL") == 0) {
- key.dptr = NULL;
- key.dsize = 0;
+ state->key.dptr = NULL;
+ state->key.dsize = 0;
} else {
- key.dptr = hex_decode_talloc(NULL, dbkey, &key.dsize);
+ state->key.dptr = hex_decode_talloc(NULL, dbkey,
+ &state->key.dsize);
}
- tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600);
- if (tdb == NULL) {
+ state->tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600);
+ if (state->tdb == NULL) {
fprintf(stderr, "locking: Error opening database %s\n", dbpath);
return 1;
}
set_priority();
- if (tdb_chainlock(tdb, key) < 0) {
+ if (tdb_chainlock(state->tdb, state->key) < 0) {
fprintf(stderr, "locking: Error getting record lock (%s)\n",
- tdb_errorstr(tdb));
+ tdb_errorstr(state->tdb));
return 1;
}
@@ -130,26 +138,25 @@ static int lock_record(const char *dbpath, const char *dbflags, const char *dbke
}
-
-static int lock_db(const char *dbpath, const char *dbflags)
+static int lock_db(const char *dbpath, const char *dbflags,
+ struct lock_state *state)
{
- struct tdb_context *tdb;
int tdb_flags;
/* No error checking since CTDB always passes sane values */
tdb_flags = strtol(dbflags, NULL, 0);
- tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600);
- if (tdb == NULL) {
+ state->tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600);
+ if (state->tdb == NULL) {
fprintf(stderr, "locking: Error opening database %s\n", dbpath);
return 1;
}
set_priority();
- if (tdb_lockall(tdb) < 0) {
+ if (tdb_lockall(state->tdb) < 0) {
fprintf(stderr, "locking: Error getting db lock (%s)\n",
- tdb_errorstr(tdb));
+ tdb_errorstr(state->tdb));
return 1;
}
@@ -158,13 +165,114 @@ static int lock_db(const char *dbpath, const char *dbflags)
return 0;
}
+struct wait_for_parent_state {
+ struct tevent_context *ev;
+ pid_t ppid;
+};
+
+static void wait_for_parent_check(struct tevent_req *subreq);
+
+static struct tevent_req *wait_for_parent_send(TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev,
+ pid_t ppid)
+{
+ struct tevent_req *req, *subreq;
+ struct wait_for_parent_state *state;
+
+ req = tevent_req_create(mem_ctx, &state, struct wait_for_parent_state);
+ if (req == NULL) {
+ return NULL;
+ }
+
+ state->ev = ev;
+ state->ppid = ppid;
+
+ if (ppid == 1) {
+ tevent_req_done(req);
+ return tevent_req_post(req, ev);
+ }
+
+ subreq = tevent_wakeup_send(state, ev,
+ tevent_timeval_current_ofs(5,0));
+ if (tevent_req_nomem(subreq, req)) {
+ return tevent_req_post(req, ev);
+ }
+ tevent_req_set_callback(subreq, wait_for_parent_check, req);
+
+ return req;
+}
+
+static void wait_for_parent_check(struct tevent_req *subreq)
+{
+ struct tevent_req *req = tevent_req_callback_data(
+ subreq, struct tevent_req);
+ struct wait_for_parent_state *state = tevent_req_data(
+ req, struct wait_for_parent_state);
+ bool status;
+
+ status = tevent_wakeup_recv(subreq);
+ TALLOC_FREE(subreq);
+ if (! status) {
+ /* Ignore error */
+ fprintf(stderr, "locking: tevent_wakeup_recv() failed\n");
+ }
+
+ if (kill(state->ppid, 0) == -1 && errno == ESRCH) {
+ tevent_req_done(req);
+ return;
+ }
+
+ subreq = tevent_wakeup_send(state, state->ev,
+ tevent_timeval_current_ofs(5,0));
+ if (tevent_req_nomem(subreq, req)) {
+ return;
+ }
+ tevent_req_set_callback(subreq, wait_for_parent_check, req);
+}
+
+static bool wait_for_parent_recv(struct tevent_req *req)
+{
+ if (tevent_req_is_unix_error(req, NULL)) {
+ return false;
+ }
+
+ return true;
+}
+
+static void cleanup(struct lock_state *state)
+{
+ if (state->tdb != NULL) {
+ if (state->key.dsize == 0) {
+ tdb_unlockall(state->tdb);
+ } else {
+ tdb_chainunlock(state->tdb, state->key);
+ }
+ tdb_close(state->tdb);
+ }
+}
+
+static void signal_handler(struct tevent_context *ev,
+ struct tevent_signal *se,
+ int signum, int count, void *siginfo,
+ void *private_data)
+{
+ struct lock_state *state = (struct lock_state *)private_data;
+
+ cleanup(state);
+ exit(0);
+}
int main(int argc, char *argv[])
{
+ struct tevent_context *ev;
+ struct tevent_signal *se;
+ struct tevent_req *req;
+ struct lock_state state = { 0 };
int write_fd;
char result = 0;
int ppid;
const char *lock_type;
+ bool status;
reset_scheduler();
@@ -179,6 +287,20 @@ int main(int argc, char *argv[])
write_fd = atoi(argv[2]);
lock_type = argv[3];
+ ev = tevent_context_init(NULL);
+ if (ev == NULL) {
+ fprintf(stderr, "locking: tevent_context_init() failed\n");
+ exit(1);
+ }
+
+ se = tevent_add_signal(ev, ev, SIGTERM, 0,
+ signal_handler, &state);
+ if (se == NULL) {
+ fprintf(stderr, "locking: tevent_add_signal() failed\n");
+ talloc_free(ev);
+ exit(1);
+ }
+
if (strcmp(lock_type, "RECORD") == 0) {
if (argc != 7) {
fprintf(stderr,
@@ -187,20 +309,17 @@ int main(int argc, char *argv[])
usage();
exit(1);
}
- result = lock_record(argv[4], argv[5], argv[6]);
+ result = lock_record(argv[4], argv[5], argv[6], &state);
} else if (strcmp(lock_type, "DB") == 0) {
- int n;
-
- /* If there are no databases specified, no need for lock */
- if (argc > 4) {
- for (n=4; n+1<argc; n+=2) {
- result = lock_db(argv[n], argv[n+1]);
- if (result != 0) {
- break;
- }
- }
+ if (argc != 6) {
+ fprintf(stderr,
+ "locking: Invalid number of arguments (%d)\n",
+ argc);
+ usage();
+ exit(1);
}
+ result = lock_db(argv[4], argv[5], &state);
} else {
fprintf(stderr, "locking: Invalid lock-type '%s'\n", lock_type);
@@ -210,6 +329,21 @@ int main(int argc, char *argv[])
send_result(write_fd, result);
- ctdb_wait_for_process_to_exit(ppid);
+ req = wait_for_parent_send(ev, ev, ppid);
+ if (req == NULL) {
+ fprintf(stderr, "locking: wait_for_parent_send() failed\n");
+ cleanup(&state);
+ exit(1);
+ }
+
+ tevent_req_poll(req, ev);
+
+ status = wait_for_parent_recv(req);
+ if (! status) {
+ fprintf(stderr, "locking: wait_for_parent_recv() failed\n");
+ }
+
+ talloc_free(ev);
+ cleanup(&state);
return 0;
}
diff --git a/ctdb/tests/src/test_mutex_raw.c b/ctdb/tests/src/test_mutex_raw.c
new file mode 100644
index 0000000..8e3cae3
--- /dev/null
+++ b/ctdb/tests/src/test_mutex_raw.c
@@ -0,0 +1,261 @@
+/*
+ Robust mutex test
+
+ Copyright (C) Amitay Isaacs 2016
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ * Run this test as follows:
+ *
+ * 1. Running all processes at normal priority
+ *
+ * $ while true ; do ./bin/test_mutex_raw /tmp/foo 10 0 ; done
+ *
+ * 2. Running all processes at real-time priority
+ *
+ * # while true ; do ./bin/test_mutex_raw /tmp/foo 10 1 ; done
+ *
+ * The test will block after few iterations. At this time none of the
+ * child processes is holding the mutex.
+ *
+ * To check which process is holding a lock:
+ *
+ * $ ./bin/test_mutex_raw /tmp/foo debug
+ *
+ * If no pid is printed, then no process is holding the mutex.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#include <sched.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <errno.h>
+#include <stdbool.h>
+
+int pthread_mutex_consistent_np(pthread_mutex_t *);
+
+static void set_realtime(void)
+{
+ struct sched_param p;
+ int ret;
+
+ p.sched_priority = 1;
+
+ ret = sched_setscheduler(0, SCHED_FIFO, &p);
+ if (ret == -1) {
+ fprintf(stderr, "Failed to set scheduler to SCHED_FIFO\n");
+ }
+}
+
+static void high_priority(void)
+{
+ int ret;
+
+ ret = nice(-20);
+ if (ret == -1) {
+ fprintf(stderr, "Failed to set high priority\n");
+ }
+}
+
+static void run_child(const char *filename)
+{
+ pthread_mutex_t *mutex;
+ void *addr;
+ int ret, fd;
+
+ fd = open(filename, O_RDWR, 0600);
+ if (fd == -1) {
+ exit(1);
+ }
+
+ addr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE,
+ MAP_SHARED|MAP_FILE, fd, 0);
+ if (addr == NULL) {
+ exit(2);
+ }
+
+ mutex = (pthread_mutex_t *)addr;
+
+again:
+ ret = pthread_mutex_lock(mutex);
+ if (ret == EOWNERDEAD) {
+ ret = pthread_mutex_consistent_np(mutex);
+ } else if (ret == EAGAIN) {
+ goto again;
+ }
+ if (ret != 0) {
+ fprintf(stderr, "pid %u lock failed, ret=%d\n", getpid(), ret);
+ exit(3);
+ }
+
+ fprintf(stderr, "pid %u locked\n", getpid());
+ kill(getpid(), SIGKILL);
+}
+
+#define PRIO_NORMAL 0
+#define PRIO_REALTIME 1
+#define PRIO_NICE_20 2
+
+int main(int argc, const char **argv)
+{
+ pthread_mutexattr_t ma;
+ pthread_mutex_t *mutex;
+ int fd, ret, i;
+ pid_t pid;
+ void *addr;
+ int num_children;
+ int priority = PRIO_NORMAL;
+
+ if (argc < 3 || argc > 4) {
+ fprintf(stderr, "Usage: %s <file> <n> [0|1|2]\n", argv[0]);
+ fprintf(stderr, " %s <file> debug\n", argv[0]);
+ exit(1);
+ }
+
+ if (argc == 4) {
+ priority = atoi(argv[3]);
+ }
+
+ if (priority == PRIO_REALTIME) {
+ set_realtime();
+ } else if (priority == PRIO_NICE_20) {
+ high_priority();
+ }
+
+ fd = open(argv[1], O_CREAT|O_RDWR, 0600);
+ if (fd == -1) {
+ fprintf(stderr, "open failed\n");
+ exit(1);
+ }
+
+ ret = lseek(fd, 0, SEEK_SET);
+ if (ret != 0) {
+ fprintf(stderr, "lseek failed\n");
+ exit(1);
+ }
+
+ ret = ftruncate(fd, sizeof(pthread_mutex_t));
--
Samba Shared Repository
More information about the samba-cvs
mailing list