[SCM] CTDB repository - branch 1.2 updated - ctdb-1.9.1-283-g22c03e9

Ronnie Sahlberg sahlberg at samba.org
Mon Jan 10 14:13:52 MST 2011


The branch, 1.2 has been updated
       via  22c03e96a51324b83eaba871dc06869c1abc0229 (commit)
       via  59733d9a266feae1170974326eca4124a0986349 (commit)
       via  ebbebeec3b511ffb66bca506abc1bb867911ae95 (commit)
       via  2416a7c821652476107afbc77616f482cd0c97eb (commit)
      from  dff5b8356de639b757d02af6ca48f25e1497fe7a (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.2


- Log -----------------------------------------------------------------
commit 22c03e96a51324b83eaba871dc06869c1abc0229
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Jan 11 07:37:17 2011 +1100

    LIBCTDB uninitialized inqueue element
    
    From Michael Anderson,
    initialize the inqueue element of the ctdb structure to NULL,
    else it might be used uninitialized and cause a segv.

commit 59733d9a266feae1170974326eca4124a0986349
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Jan 10 16:51:56 2011 +1100

    recoverd: avoid triggering a full recovery if just some ip allocation
    has failed.
    We dont need to rebuild the databases in this situation, we just
    need to try again to sort out the ip address allocations.

commit ebbebeec3b511ffb66bca506abc1bb867911ae95
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Jan 10 13:57:49 2011 +1100

    Add ctdb_fork(0 which will fork a child process and drop the real-time
    scheduler for the child.
    
    Use ctdb_fork() from callers where we dont want the child to be running
    at real-time privilege.

commit 2416a7c821652476107afbc77616f482cd0c97eb
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Jan 10 13:35:39 2011 +1100

    Revert scheduling back to use real-time processes
    
    Revert this patch:
    commit 482c302d46e2162d0cf552f8456bc49573ae729d
    
    We may need to use real-time processes for the main daemon and the recovery daemon to handle the cases where systems come under very high loads.

-----------------------------------------------------------------------

Summary of changes:
 client/ctdb_client.c     |    5 +++
 common/ctdb_logging.c    |    2 +-
 common/ctdb_util.c       |   66 ++++++++++++++++++++++++++++++++++++----------
 include/ctdb_private.h   |    9 ++++--
 libctdb/ctdb.c           |    1 +
 server/ctdb_daemon.c     |    5 +++-
 server/ctdb_lockwait.c   |    2 +-
 server/ctdb_logging.c    |    4 +-
 server/ctdb_monitor.c    |    2 +-
 server/ctdb_persistent.c |    2 +-
 server/ctdb_recoverd.c   |   17 +++++-------
 server/ctdb_traverse.c   |    2 +-
 server/ctdb_vacuum.c     |    2 +-
 server/ctdbd.c           |    3 +-
 server/eventscript.c     |    3 +-
 15 files changed, 86 insertions(+), 39 deletions(-)


Changeset truncated at 500 lines:

diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index 1abea12..99ff72d 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -3843,6 +3843,11 @@ int switch_from_server_to_client(struct ctdb_context *ctdb, const char *fmt, ...
 	close(ctdb->daemon.sd);
 	ctdb->daemon.sd = -1;
 
+	/* the client does not need to be realtime */
+	if (ctdb->do_setsched) {
+		ctdb_restore_scheduler(ctdb);
+	}
+
 	/* initialise ctdb */
 	ret = ctdb_socket_connect(ctdb);
 	if (ret != 0) {
diff --git a/common/ctdb_logging.c b/common/ctdb_logging.c
index de6e039..dee4dfd 100644
--- a/common/ctdb_logging.c
+++ b/common/ctdb_logging.c
@@ -157,7 +157,7 @@ int32_t ctdb_control_get_log(struct ctdb_context *ctdb, TDB_DATA addr)
 	/* spawn a child process to marshall the huge log blob and send it back
 	   to the ctdb tool using a MESSAGE
 	*/
-	child = fork();
+	child = ctdb_fork(ctdb);
 	if (child == (pid_t)-1) {
 		DEBUG(DEBUG_ERR,("Failed to fork a log collector child\n"));
 		return -1;
diff --git a/common/ctdb_util.c b/common/ctdb_util.c
index c705249..1ff4c1f 100644
--- a/common/ctdb_util.c
+++ b/common/ctdb_util.c
@@ -285,31 +285,69 @@ struct ctdb_rec_data *ctdb_marshall_loop_next(struct ctdb_marshall_buffer *m, st
 	return r;
 }
 
+
+#if HAVE_SCHED_H
+#include <sched.h>
+#endif
+
 /*
-  if possible, make this task very high priority
+  if possible, make this task real time
  */
-void ctdb_high_priority(struct ctdb_context *ctdb)
+void ctdb_set_scheduler(struct ctdb_context *ctdb)
 {
-	errno = 0;
-	if (nice(-20) == -1 && errno != 0) {
-		DEBUG(DEBUG_WARNING,("Unable to renice self: %s\n",
-				     strerror(errno)));
+#if HAVE_SCHED_SETSCHEDULER	
+	struct sched_param p;
+	if (ctdb->saved_scheduler_param == NULL) {
+		ctdb->saved_scheduler_param = talloc_size(ctdb, sizeof(p));
+	}
+	
+	if (sched_getparam(0, (struct sched_param *)ctdb->saved_scheduler_param) == -1) {
+		DEBUG(DEBUG_ERR,("Unable to get old scheduler params\n"));
+		return;
+	}
+
+	p = *(struct sched_param *)ctdb->saved_scheduler_param;
+	p.sched_priority = 1;
+
+	if (sched_setscheduler(0, SCHED_FIFO, &p) == -1) {
+		DEBUG(DEBUG_CRIT,("Unable to set scheduler to SCHED_FIFO (%s)\n", 
+			 strerror(errno)));
 	} else {
-		DEBUG(DEBUG_NOTICE,("Scheduler says I'm nice: %i\n",
-				    getpriority(PRIO_PROCESS, getpid())));
+		DEBUG(DEBUG_NOTICE,("Set scheduler to SCHED_FIFO\n"));
+	}
+#endif
+}
+
+/*
+  restore previous scheduler parameters
+ */
+void ctdb_restore_scheduler(struct ctdb_context *ctdb)
+{
+#if HAVE_SCHED_SETSCHEDULER	
+	if (ctdb->saved_scheduler_param == NULL) {
+		ctdb_fatal(ctdb, "No saved scheduler parameters\n");
 	}
+	if (sched_setscheduler(0, SCHED_OTHER, (struct sched_param *)ctdb->saved_scheduler_param) == -1) {
+		ctdb_fatal(ctdb, "Unable to restore old scheduler parameters\n");
+	}
+#endif
 }
 
 /*
-  make ourselves slightly nicer: eg. a ctdb child.
+ * This function forks a child process and drops the realtime 
+ * scheduler for the child process.
  */
-void ctdb_reduce_priority(struct ctdb_context *ctdb)
+pid_t ctdb_fork(struct ctdb_context *ctdb)
 {
-	errno = 0;
-	if (nice(10) == -1 && errno != 0) {
-		DEBUG(DEBUG_WARNING,("Unable to lower priority: %s\n",
-				     strerror(errno)));
+	pid_t pid;
+
+	pid = fork();
+	if (pid == 0) {
+		if (ctdb->do_setsched) {
+			ctdb_restore_scheduler(ctdb);
+		}
 	}
+	return pid;
 }
 
 void set_nonblocking(int fd)
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index ec41270..2d9315f 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -454,7 +454,9 @@ struct ctdb_context {
 	uint32_t recovery_master;
 	struct ctdb_call_state *pending_calls;
 	struct ctdb_client_ip *client_ip_list;
-	struct trbt_tree *server_ids;	
+	struct trbt_tree *server_ids; 
+	bool do_setsched;
+	void *saved_scheduler_param;
 	const char *event_script_dir;
 	const char *notification_script;
 	const char *default_public_interface;
@@ -989,8 +991,9 @@ void ctdb_call_resend_all(struct ctdb_context *ctdb);
 void ctdb_node_dead(struct ctdb_node *node);
 void ctdb_node_connected(struct ctdb_node *node);
 bool ctdb_blocking_freeze(struct ctdb_context *ctdb);
-void ctdb_high_priority(struct ctdb_context *ctdb);
-void ctdb_reduce_priority(struct ctdb_context *ctdb);
+void ctdb_set_scheduler(struct ctdb_context *ctdb);
+void ctdb_restore_scheduler(struct ctdb_context *ctdb);
+pid_t ctdb_fork(struct ctdb_context *ctdb);
 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
 				 struct ctdb_req_control *c,
 				 TDB_DATA indata, 
diff --git a/libctdb/ctdb.c b/libctdb/ctdb.c
index 2649524..e06c66c 100644
--- a/libctdb/ctdb.c
+++ b/libctdb/ctdb.c
@@ -154,6 +154,7 @@ struct ctdb_connection *ctdb_connect(const char *addr,
 	ctdb->outq = NULL;
 	ctdb->doneq = NULL;
 	ctdb->in = NULL;
+	ctdb->inqueue = NULL;
 	ctdb->message_handlers = NULL;
 	ctdb->next_id = 0;
 	ctdb->broken = false;
diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index e322453..72c7293 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -742,7 +742,10 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog,
 
 	DEBUG(DEBUG_ERR, ("Starting CTDBD as pid : %u\n", ctdbd_pid));
 
-	ctdb_high_priority(ctdb);
+	if (ctdb->do_setsched) {
+		/* try to set us up as realtime */
+		ctdb_set_scheduler(ctdb);
+	}
 
 	/* ensure the socket is deleted on exit of the daemon */
 	domain_socket_name = talloc_strdup(talloc_autofree_context(), ctdb->daemon.name);
diff --git a/server/ctdb_lockwait.c b/server/ctdb_lockwait.c
index 48198fd..1d3a597 100644
--- a/server/ctdb_lockwait.c
+++ b/server/ctdb_lockwait.c
@@ -117,7 +117,7 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
 		return NULL;
 	}
 
-	result->child = fork();
+	result->child = ctdb_fork(ctdb_db->ctdb);
 
 	if (result->child == (pid_t)-1) {
 		close(result->fd[0]);
diff --git a/server/ctdb_logging.c b/server/ctdb_logging.c
index 7e5367e..27b990e 100644
--- a/server/ctdb_logging.c
+++ b/server/ctdb_logging.c
@@ -96,7 +96,7 @@ int start_syslog_daemon(struct ctdb_context *ctdb)
 		return -1;
 	}
 	
-	ctdb->syslogd_pid = fork();
+	ctdb->syslogd_pid = ctdb_fork(ctdb);
 	if (ctdb->syslogd_pid == (pid_t)-1) {
 		printf("Failed to create syslog child process\n");
 		close(state->fd[0]);
@@ -454,7 +454,7 @@ struct ctdb_log_state *ctdb_fork_with_logging(TALLOC_CTX *mem_ctx,
 		goto free_log;
 	}
 
-	*pid = fork();
+	*pid = ctdb_fork(ctdb);
 
 	/* Child? */
 	if (*pid == 0) {
diff --git a/server/ctdb_monitor.c b/server/ctdb_monitor.c
index 7f5da5c..fa642fb 100644
--- a/server/ctdb_monitor.c
+++ b/server/ctdb_monitor.c
@@ -83,7 +83,7 @@ void ctdb_run_notification_script(struct ctdb_context *ctdb, const char *event)
 		return;
 	}
 
-	child = fork();
+	child = ctdb_fork(ctdb);
 	if (child == (pid_t)-1) {
 		DEBUG(DEBUG_ERR,("Failed to fork() a notification child process\n"));
 		return;
diff --git a/server/ctdb_persistent.c b/server/ctdb_persistent.c
index 9346f7d..f9a2051 100644
--- a/server/ctdb_persistent.c
+++ b/server/ctdb_persistent.c
@@ -524,7 +524,7 @@ struct childwrite_handle *ctdb_childwrite(struct ctdb_db_context *ctdb_db,
 		return NULL;
 	}
 
-	result->child = fork();
+	result->child = ctdb_fork(ctdb_db->ctdb);
 
 	if (result->child == (pid_t)-1) {
 		close(result->fd[0]);
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index 9caa502..b82f0e7 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -1624,13 +1624,14 @@ static int do_recovery(struct ctdb_recoverd *rec,
 	if (ret != 0) {
 		DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
 				 culprit));
+		rec->need_takeover_run = true;
 		return -1;
 	}
 	rec->need_takeover_run = false;
 	ret = ctdb_takeover_run(ctdb, nodemap);
 	if (ret != 0) {
-		DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses\n"));
-		return -1;
+		DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. ctdb_takeover_run() failed.\n"));
+		rec->need_takeover_run = true;
 	}
 	DEBUG(DEBUG_NOTICE, (__location__ " Recovery - takeip finished\n"));
 
@@ -2045,8 +2046,7 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb, struct ctdb
 	if (ret == 0) {
 		ret = ctdb_takeover_run(ctdb, rec->nodemap);
 		if (ret != 0) {
-			DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
-					 culprit));
+			DEBUG(DEBUG_ERR,("Failed to reallocate addresses: ctdb_takeover_run() failed.\n"));
 			rec->need_takeover_run = true;
 		}
 	}
@@ -2768,7 +2768,7 @@ static int check_recovery_lock(struct ctdb_context *ctdb)
 		return -1;
 	}
 
-	state->child = fork();
+	state->child = ctdb_fork(ctdb);
 	if (state->child == (pid_t)-1) {
 		DEBUG(DEBUG_CRIT,(__location__ " fork() failed in check_reclock child\n"));
 		close(state->fd[0]);
@@ -3397,8 +3397,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 		if (ret != 0) {
 			DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
 					 culprit));
-			ctdb_set_culprit(rec, culprit);
-			do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
+			rec->need_takeover_run = true;
 			return;
 		}
 
@@ -3413,9 +3412,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 
 		ret = ctdb_takeover_run(ctdb, nodemap);
 		if (ret != 0) {
-			DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses - starting recovery\n"));
-			ctdb_set_culprit(rec, ctdb->pnn);
-			do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
+			DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. Try again later\n"));
 			return;
 		}
 
diff --git a/server/ctdb_traverse.c b/server/ctdb_traverse.c
index dcb16b2..5ad374f 100644
--- a/server/ctdb_traverse.c
+++ b/server/ctdb_traverse.c
@@ -153,7 +153,7 @@ static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_con
 		return NULL;
 	}
 
-	h->child = fork();
+	h->child = ctdb_fork(ctdb_db->ctdb);
 
 	if (h->child == (pid_t)-1) {
 		close(h->fd[0]);
diff --git a/server/ctdb_vacuum.c b/server/ctdb_vacuum.c
index 4104853..4aac302 100644
--- a/server/ctdb_vacuum.c
+++ b/server/ctdb_vacuum.c
@@ -836,7 +836,7 @@ ctdb_vacuum_event(struct event_context *ev, struct timed_event *te,
 		return;
 	}
 
-	child_ctx->child_pid = fork();
+	child_ctx->child_pid = ctdb_fork(ctdb);
 	if (child_ctx->child_pid == (pid_t)-1) {
 		close(child_ctx->fd[0]);
 		close(child_ctx->fd[1]);
diff --git a/server/ctdbd.c b/server/ctdbd.c
index b90dbcd..bddd658 100644
--- a/server/ctdbd.c
+++ b/server/ctdbd.c
@@ -132,7 +132,7 @@ int main(int argc, const char *argv[])
 		{ "dbdir-persistent", 0, POPT_ARG_STRING, &options.db_dir_persistent, 0, "directory for persistent tdb files", NULL },
 		{ "dbdir-state", 0, POPT_ARG_STRING, &options.db_dir_state, 0, "directory for internal state tdb files", NULL },
 		{ "reclock", 0, POPT_ARG_STRING, &options.recovery_lock_file, 0, "location of recovery lock file", "filename" },
-		{ "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "make valgrind more effective", NULL },
+		{ "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL },
 		{ "syslog", 0, POPT_ARG_NONE, &options.use_syslog, 0, "log messages to syslog", NULL },
 		{ "start-as-disabled", 0, POPT_ARG_NONE, &options.start_as_disabled, 0, "Node starts in disabled state", NULL },
 		{ "start-as-stopped", 0, POPT_ARG_NONE, &options.start_as_stopped, 0, "Node starts in stopped state", NULL },
@@ -315,6 +315,7 @@ int main(int argc, const char *argv[])
 	}
 
 	ctdb->valgrinding = options.valgrinding;
+	ctdb->do_setsched = !ctdb->valgrinding;
 
 	if (options.max_persistent_check_errors < 0) {
 		ctdb->max_persistent_check_errors = 0xFFFFFFFFFFFFFFFFLL;
diff --git a/server/eventscript.c b/server/eventscript.c
index ce2fd89..9ba3a5d 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -511,9 +511,8 @@ static void debug_timeout(struct ctdb_event_script_state *state)
 	sprintf(buf, "{ pstree -p; cat /proc/locks; ls -li /var/ctdb/ /var/ctdb/persistent; }"
 			" >/tmp/ctdb.event.%s.%d", tbuf, getpid());
 
-	pid = fork();
+	pid = ctdb_fork(state->ctdb);
 	if (pid == 0) {
-		ctdb_reduce_priority(state->ctdb);
 		system(buf);
 		/* Now we can kill the child */
 		kill(state->child, SIGTERM);


-- 
CTDB repository


More information about the samba-cvs mailing list