[SCM] CTDB repository - branch 1.2-nodeflags updated - ctdb-1.9.1-267-g4c99269

Ronnie Sahlberg sahlberg at samba.org
Mon Jan 10 14:13:52 MST 2011


The branch, 1.2-nodeflags has been updated
       via  4c99269672035c98f8f1b677654d844f716ba94c (commit)
       via  8d1306f88af5c1ba4067cc28d7b52835a612cd4b (commit)
       via  78b42769278cdf4bc4f5cf0ce9bd2f6c3a4c5610 (commit)
       via  4e4c65612c9cf097e00005ad140411431f2debe9 (commit)
      from  e16e75b6fdfc18c992e68fc01bee7758bce34a59 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.2-nodeflags


- Log -----------------------------------------------------------------
commit 4c99269672035c98f8f1b677654d844f716ba94c
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Jan 11 07:37:17 2011 +1100

    LIBCTDB uninitialized inqueue element
    
    From Michael Anderson,
    initialize the inqueue element of the ctdb structure to NULL,
    else it might be used uninitialized and cause a segv.

commit 8d1306f88af5c1ba4067cc28d7b52835a612cd4b
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Jan 10 16:51:56 2011 +1100

    recoverd: avoid triggering a full recovery if just some ip allocation
    has failed.
    We dont need to rebuild the databases in this situation, we just
    need to try again to sort out the ip address allocations.

commit 78b42769278cdf4bc4f5cf0ce9bd2f6c3a4c5610
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Jan 10 13:57:49 2011 +1100

    Add ctdb_fork(0 which will fork a child process and drop the real-time
    scheduler for the child.
    
    Use ctdb_fork() from callers where we dont want the child to be running
    at real-time privilege.

commit 4e4c65612c9cf097e00005ad140411431f2debe9
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Jan 10 13:35:39 2011 +1100

    Revert scheduling back to use real-time processes
    
    Revert this patch:
    commit 482c302d46e2162d0cf552f8456bc49573ae729d
    
    We may need to use real-time processes for the main daemon and the recovery daemon to handle the cases where systems come under very high loads.

-----------------------------------------------------------------------

Summary of changes:
 client/ctdb_client.c     |    5 +++
 common/ctdb_logging.c    |    2 +-
 common/ctdb_util.c       |   66 ++++++++++++++++++++++++++++++++++++----------
 include/ctdb_private.h   |    9 ++++--
 libctdb/ctdb.c           |    1 +
 server/ctdb_daemon.c     |    5 +++-
 server/ctdb_lockwait.c   |    2 +-
 server/ctdb_logging.c    |    4 +-
 server/ctdb_monitor.c    |    2 +-
 server/ctdb_persistent.c |    2 +-
 server/ctdb_recoverd.c   |   17 +++++-------
 server/ctdb_traverse.c   |    2 +-
 server/ctdb_vacuum.c     |    2 +-
 server/ctdbd.c           |    3 +-
 server/eventscript.c     |    3 +-
 15 files changed, 86 insertions(+), 39 deletions(-)


Changeset truncated at 500 lines:

diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index 92739f9..707a6ab 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -3786,6 +3786,11 @@ int switch_from_server_to_client(struct ctdb_context *ctdb, const char *fmt, ...
 	close(ctdb->daemon.sd);
 	ctdb->daemon.sd = -1;
 
+	/* the client does not need to be realtime */
+	if (ctdb->do_setsched) {
+		ctdb_restore_scheduler(ctdb);
+	}
+
 	/* initialise ctdb */
 	ret = ctdb_socket_connect(ctdb);
 	if (ret != 0) {
diff --git a/common/ctdb_logging.c b/common/ctdb_logging.c
index de6e039..dee4dfd 100644
--- a/common/ctdb_logging.c
+++ b/common/ctdb_logging.c
@@ -157,7 +157,7 @@ int32_t ctdb_control_get_log(struct ctdb_context *ctdb, TDB_DATA addr)
 	/* spawn a child process to marshall the huge log blob and send it back
 	   to the ctdb tool using a MESSAGE
 	*/
-	child = fork();
+	child = ctdb_fork(ctdb);
 	if (child == (pid_t)-1) {
 		DEBUG(DEBUG_ERR,("Failed to fork a log collector child\n"));
 		return -1;
diff --git a/common/ctdb_util.c b/common/ctdb_util.c
index 749b9c2..d0d9ddb 100644
--- a/common/ctdb_util.c
+++ b/common/ctdb_util.c
@@ -285,31 +285,69 @@ struct ctdb_rec_data *ctdb_marshall_loop_next(struct ctdb_marshall_buffer *m, st
 	return r;
 }
 
+
+#if HAVE_SCHED_H
+#include <sched.h>
+#endif
+
 /*
-  if possible, make this task very high priority
+  if possible, make this task real time
  */
-void ctdb_high_priority(struct ctdb_context *ctdb)
+void ctdb_set_scheduler(struct ctdb_context *ctdb)
 {
-	errno = 0;
-	if (nice(-20) == -1 && errno != 0) {
-		DEBUG(DEBUG_WARNING,("Unable to renice self: %s\n",
-				     strerror(errno)));
+#if HAVE_SCHED_SETSCHEDULER	
+	struct sched_param p;
+	if (ctdb->saved_scheduler_param == NULL) {
+		ctdb->saved_scheduler_param = talloc_size(ctdb, sizeof(p));
+	}
+	
+	if (sched_getparam(0, (struct sched_param *)ctdb->saved_scheduler_param) == -1) {
+		DEBUG(DEBUG_ERR,("Unable to get old scheduler params\n"));
+		return;
+	}
+
+	p = *(struct sched_param *)ctdb->saved_scheduler_param;
+	p.sched_priority = 1;
+
+	if (sched_setscheduler(0, SCHED_FIFO, &p) == -1) {
+		DEBUG(DEBUG_CRIT,("Unable to set scheduler to SCHED_FIFO (%s)\n", 
+			 strerror(errno)));
 	} else {
-		DEBUG(DEBUG_NOTICE,("Scheduler says I'm nice: %i\n",
-				    getpriority(PRIO_PROCESS, getpid())));
+		DEBUG(DEBUG_NOTICE,("Set scheduler to SCHED_FIFO\n"));
+	}
+#endif
+}
+
+/*
+  restore previous scheduler parameters
+ */
+void ctdb_restore_scheduler(struct ctdb_context *ctdb)
+{
+#if HAVE_SCHED_SETSCHEDULER	
+	if (ctdb->saved_scheduler_param == NULL) {
+		ctdb_fatal(ctdb, "No saved scheduler parameters\n");
 	}
+	if (sched_setscheduler(0, SCHED_OTHER, (struct sched_param *)ctdb->saved_scheduler_param) == -1) {
+		ctdb_fatal(ctdb, "Unable to restore old scheduler parameters\n");
+	}
+#endif
 }
 
 /*
-  make ourselves slightly nicer: eg. a ctdb child.
+ * This function forks a child process and drops the realtime 
+ * scheduler for the child process.
  */
-void ctdb_reduce_priority(struct ctdb_context *ctdb)
+pid_t ctdb_fork(struct ctdb_context *ctdb)
 {
-	errno = 0;
-	if (nice(10) == -1 && errno != 0) {
-		DEBUG(DEBUG_WARNING,("Unable to lower priority: %s\n",
-				     strerror(errno)));
+	pid_t pid;
+
+	pid = fork();
+	if (pid == 0) {
+		if (ctdb->do_setsched) {
+			ctdb_restore_scheduler(ctdb);
+		}
 	}
+	return pid;
 }
 
 void set_nonblocking(int fd)
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 8b6a9d3..dd50ed3 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -455,7 +455,9 @@ struct ctdb_context {
 	uint32_t recovery_master;
 	struct ctdb_call_state *pending_calls;
 	struct ctdb_client_ip *client_ip_list;
-	struct trbt_tree *server_ids;	
+	struct trbt_tree *server_ids; 
+	bool do_setsched;
+	void *saved_scheduler_param;
 	const char *event_script_dir;
 	const char *notification_script;
 	const char *default_public_interface;
@@ -989,8 +991,9 @@ void ctdb_call_resend_all(struct ctdb_context *ctdb);
 void ctdb_node_dead(struct ctdb_node *node);
 void ctdb_node_connected(struct ctdb_node *node);
 bool ctdb_blocking_freeze(struct ctdb_context *ctdb);
-void ctdb_high_priority(struct ctdb_context *ctdb);
-void ctdb_reduce_priority(struct ctdb_context *ctdb);
+void ctdb_set_scheduler(struct ctdb_context *ctdb);
+void ctdb_restore_scheduler(struct ctdb_context *ctdb);
+pid_t ctdb_fork(struct ctdb_context *ctdb);
 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
 				 struct ctdb_req_control *c,
 				 TDB_DATA indata, 
diff --git a/libctdb/ctdb.c b/libctdb/ctdb.c
index 2649524..e06c66c 100644
--- a/libctdb/ctdb.c
+++ b/libctdb/ctdb.c
@@ -154,6 +154,7 @@ struct ctdb_connection *ctdb_connect(const char *addr,
 	ctdb->outq = NULL;
 	ctdb->doneq = NULL;
 	ctdb->in = NULL;
+	ctdb->inqueue = NULL;
 	ctdb->message_handlers = NULL;
 	ctdb->next_id = 0;
 	ctdb->broken = false;
diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index e322453..72c7293 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -742,7 +742,10 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog,
 
 	DEBUG(DEBUG_ERR, ("Starting CTDBD as pid : %u\n", ctdbd_pid));
 
-	ctdb_high_priority(ctdb);
+	if (ctdb->do_setsched) {
+		/* try to set us up as realtime */
+		ctdb_set_scheduler(ctdb);
+	}
 
 	/* ensure the socket is deleted on exit of the daemon */
 	domain_socket_name = talloc_strdup(talloc_autofree_context(), ctdb->daemon.name);
diff --git a/server/ctdb_lockwait.c b/server/ctdb_lockwait.c
index 48198fd..1d3a597 100644
--- a/server/ctdb_lockwait.c
+++ b/server/ctdb_lockwait.c
@@ -117,7 +117,7 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
 		return NULL;
 	}
 
-	result->child = fork();
+	result->child = ctdb_fork(ctdb_db->ctdb);
 
 	if (result->child == (pid_t)-1) {
 		close(result->fd[0]);
diff --git a/server/ctdb_logging.c b/server/ctdb_logging.c
index 7e5367e..27b990e 100644
--- a/server/ctdb_logging.c
+++ b/server/ctdb_logging.c
@@ -96,7 +96,7 @@ int start_syslog_daemon(struct ctdb_context *ctdb)
 		return -1;
 	}
 	
-	ctdb->syslogd_pid = fork();
+	ctdb->syslogd_pid = ctdb_fork(ctdb);
 	if (ctdb->syslogd_pid == (pid_t)-1) {
 		printf("Failed to create syslog child process\n");
 		close(state->fd[0]);
@@ -454,7 +454,7 @@ struct ctdb_log_state *ctdb_fork_with_logging(TALLOC_CTX *mem_ctx,
 		goto free_log;
 	}
 
-	*pid = fork();
+	*pid = ctdb_fork(ctdb);
 
 	/* Child? */
 	if (*pid == 0) {
diff --git a/server/ctdb_monitor.c b/server/ctdb_monitor.c
index 35be474..3d3531b 100644
--- a/server/ctdb_monitor.c
+++ b/server/ctdb_monitor.c
@@ -83,7 +83,7 @@ void ctdb_run_notification_script(struct ctdb_context *ctdb, const char *event)
 		return;
 	}
 
-	child = fork();
+	child = ctdb_fork(ctdb);
 	if (child == (pid_t)-1) {
 		DEBUG(DEBUG_ERR,("Failed to fork() a notification child process\n"));
 		return;
diff --git a/server/ctdb_persistent.c b/server/ctdb_persistent.c
index 9346f7d..f9a2051 100644
--- a/server/ctdb_persistent.c
+++ b/server/ctdb_persistent.c
@@ -524,7 +524,7 @@ struct childwrite_handle *ctdb_childwrite(struct ctdb_db_context *ctdb_db,
 		return NULL;
 	}
 
-	result->child = fork();
+	result->child = ctdb_fork(ctdb_db->ctdb);
 
 	if (result->child == (pid_t)-1) {
 		close(result->fd[0]);
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 053e8e0..5199fb4 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -1683,13 +1683,14 @@ static int do_recovery(struct ctdb_recoverd *rec,
 	if (ret != 0) {
 		DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
 				 culprit));
+		rec->need_takeover_run = true;
 		return -1;
 	}
 	rec->need_takeover_run = false;
 	ret = ctdb_takeover_run(ctdb, nodemap);
 	if (ret != 0) {
-		DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses\n"));
-		return -1;
+		DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. ctdb_takeover_run() failed.\n"));
+		rec->need_takeover_run = true;
 	}
 	DEBUG(DEBUG_NOTICE, (__location__ " Recovery - takeip finished\n"));
 
@@ -2124,8 +2125,7 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb, struct ctdb
 	if (ret == 0) {
 		ret = ctdb_takeover_run(ctdb, rec->nodemap);
 		if (ret != 0) {
-			DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
-					 culprit));
+			DEBUG(DEBUG_ERR,("Failed to reallocate addresses: ctdb_takeover_run() failed.\n"));
 			rec->need_takeover_run = true;
 		}
 	}
@@ -2717,7 +2717,7 @@ static int check_recovery_lock(struct ctdb_context *ctdb)
 		return -1;
 	}
 
-	state->child = fork();
+	state->child = ctdb_fork(ctdb);
 	if (state->child == (pid_t)-1) {
 		DEBUG(DEBUG_CRIT,(__location__ " fork() failed in check_reclock child\n"));
 		close(state->fd[0]);
@@ -3350,8 +3350,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 		if (ret != 0) {
 			DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
 					 culprit));
-			ctdb_set_culprit(rec, culprit);
-			do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
+			rec->need_takeover_run = true;
 			return;
 		}
 
@@ -3366,9 +3365,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 
 		ret = ctdb_takeover_run(ctdb, nodemap);
 		if (ret != 0) {
-			DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses - starting recovery\n"));
-			ctdb_set_culprit(rec, ctdb->pnn);
-			do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
+			DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. Try again later\n"));
 			return;
 		}
 
diff --git a/server/ctdb_traverse.c b/server/ctdb_traverse.c
index dcb16b2..5ad374f 100644
--- a/server/ctdb_traverse.c
+++ b/server/ctdb_traverse.c
@@ -153,7 +153,7 @@ static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_con
 		return NULL;
 	}
 
-	h->child = fork();
+	h->child = ctdb_fork(ctdb_db->ctdb);
 
 	if (h->child == (pid_t)-1) {
 		close(h->fd[0]);
diff --git a/server/ctdb_vacuum.c b/server/ctdb_vacuum.c
index 4104853..4aac302 100644
--- a/server/ctdb_vacuum.c
+++ b/server/ctdb_vacuum.c
@@ -836,7 +836,7 @@ ctdb_vacuum_event(struct event_context *ev, struct timed_event *te,
 		return;
 	}
 
-	child_ctx->child_pid = fork();
+	child_ctx->child_pid = ctdb_fork(ctdb);
 	if (child_ctx->child_pid == (pid_t)-1) {
 		close(child_ctx->fd[0]);
 		close(child_ctx->fd[1]);
diff --git a/server/ctdbd.c b/server/ctdbd.c
index b90dbcd..bddd658 100644
--- a/server/ctdbd.c
+++ b/server/ctdbd.c
@@ -132,7 +132,7 @@ int main(int argc, const char *argv[])
 		{ "dbdir-persistent", 0, POPT_ARG_STRING, &options.db_dir_persistent, 0, "directory for persistent tdb files", NULL },
 		{ "dbdir-state", 0, POPT_ARG_STRING, &options.db_dir_state, 0, "directory for internal state tdb files", NULL },
 		{ "reclock", 0, POPT_ARG_STRING, &options.recovery_lock_file, 0, "location of recovery lock file", "filename" },
-		{ "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "make valgrind more effective", NULL },
+		{ "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL },
 		{ "syslog", 0, POPT_ARG_NONE, &options.use_syslog, 0, "log messages to syslog", NULL },
 		{ "start-as-disabled", 0, POPT_ARG_NONE, &options.start_as_disabled, 0, "Node starts in disabled state", NULL },
 		{ "start-as-stopped", 0, POPT_ARG_NONE, &options.start_as_stopped, 0, "Node starts in stopped state", NULL },
@@ -315,6 +315,7 @@ int main(int argc, const char *argv[])
 	}
 
 	ctdb->valgrinding = options.valgrinding;
+	ctdb->do_setsched = !ctdb->valgrinding;
 
 	if (options.max_persistent_check_errors < 0) {
 		ctdb->max_persistent_check_errors = 0xFFFFFFFFFFFFFFFFLL;
diff --git a/server/eventscript.c b/server/eventscript.c
index 49b5b3a..3ce9629 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -511,9 +511,8 @@ static void debug_timeout(struct ctdb_event_script_state *state)
 	sprintf(buf, "{ pstree -p -a; cat /proc/locks; ls -li /var/ctdb/ /var/ctdb/persistent; }"
 			" >/tmp/ctdb.event.%s.%d", tbuf, getpid());
 
-	pid = fork();
+	pid = ctdb_fork(state->ctdb);
 	if (pid == 0) {
-		ctdb_reduce_priority(state->ctdb);
 		system(buf);
 		/* Now we can kill the child */
 		kill(state->child, SIGTERM);


-- 
CTDB repository


More information about the samba-cvs mailing list