[SCM] CTDB repository - branch master updated - ctdb-1.10-60-g775d021

Ronnie Sahlberg sahlberg at samba.org
Mon Jan 10 14:13:52 MST 2011


The branch, master has been updated
       via  775d02180b825ae32d6536eaf2059884d5fed9f4 (commit)
       via  044c398ffea23d36ee033c8ddf07d11028197346 (commit)
       via  58795a4c9e0624e20fa3e0023b65127053edd103 (commit)
       via  08bef9dcab6e4da15fc783f8624e5ed09aa060b5 (commit)
      from  6075e85ba6c0f58fd1ab2ce3b09dd3d6ff491365 (commit)

http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 775d02180b825ae32d6536eaf2059884d5fed9f4
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Tue Jan 11 07:37:17 2011 +1100

    LIBCTDB uninitialized inqueue element
    
    From Michael Anderson,
    initialize the inqueue element of the ctdb structure to NULL,
    else it might be used uninitialized and cause a segv.

commit 044c398ffea23d36ee033c8ddf07d11028197346
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Jan 10 16:51:56 2011 +1100

    recoverd: avoid triggering a full recovery if just some ip allocation
    has failed.
    We dont need to rebuild the databases in this situation, we just
    need to try again to sort out the ip address allocations.

commit 58795a4c9e0624e20fa3e0023b65127053edd103
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Jan 10 13:57:49 2011 +1100

    Add ctdb_fork(0 which will fork a child process and drop the real-time
    scheduler for the child.
    
    Use ctdb_fork() from callers where we dont want the child to be running
    at real-time privilege.

commit 08bef9dcab6e4da15fc783f8624e5ed09aa060b5
Author: Ronnie Sahlberg <ronniesahlberg at gmail.com>
Date:   Mon Jan 10 13:35:39 2011 +1100

    Revert scheduling back to use real-time processes
    
    Revert this patch:
    commit 482c302d46e2162d0cf552f8456bc49573ae729d
    
    We may need to use real-time processes for the main daemon and the recovery daemon to handle the cases where systems come under very high loads.

-----------------------------------------------------------------------

Summary of changes:
 client/ctdb_client.c     |    5 +++
 common/ctdb_logging.c    |    2 +-
 common/ctdb_util.c       |   66 ++++++++++++++++++++++++++++++++++++----------
 include/ctdb_private.h   |    9 ++++--
 libctdb/ctdb.c           |    1 +
 server/ctdb_daemon.c     |    5 +++-
 server/ctdb_lockwait.c   |    2 +-
 server/ctdb_logging.c    |    4 +-
 server/ctdb_monitor.c    |    2 +-
 server/ctdb_persistent.c |    2 +-
 server/ctdb_recoverd.c   |   17 +++++-------
 server/ctdb_traverse.c   |    2 +-
 server/ctdb_vacuum.c     |    2 +-
 server/ctdbd.c           |    3 +-
 server/eventscript.c     |    3 +-
 15 files changed, 86 insertions(+), 39 deletions(-)


Changeset truncated at 500 lines:

diff --git a/client/ctdb_client.c b/client/ctdb_client.c
index 4c770fd..5a07a85 100644
--- a/client/ctdb_client.c
+++ b/client/ctdb_client.c
@@ -3850,6 +3850,11 @@ int switch_from_server_to_client(struct ctdb_context *ctdb, const char *fmt, ...
 	close(ctdb->daemon.sd);
 	ctdb->daemon.sd = -1;
 
+	/* the client does not need to be realtime */
+	if (ctdb->do_setsched) {
+		ctdb_restore_scheduler(ctdb);
+	}
+
 	/* initialise ctdb */
 	ret = ctdb_socket_connect(ctdb);
 	if (ret != 0) {
diff --git a/common/ctdb_logging.c b/common/ctdb_logging.c
index de6e039..dee4dfd 100644
--- a/common/ctdb_logging.c
+++ b/common/ctdb_logging.c
@@ -157,7 +157,7 @@ int32_t ctdb_control_get_log(struct ctdb_context *ctdb, TDB_DATA addr)
 	/* spawn a child process to marshall the huge log blob and send it back
 	   to the ctdb tool using a MESSAGE
 	*/
-	child = fork();
+	child = ctdb_fork(ctdb);
 	if (child == (pid_t)-1) {
 		DEBUG(DEBUG_ERR,("Failed to fork a log collector child\n"));
 		return -1;
diff --git a/common/ctdb_util.c b/common/ctdb_util.c
index 3e01173..88741e3 100644
--- a/common/ctdb_util.c
+++ b/common/ctdb_util.c
@@ -292,31 +292,69 @@ struct ctdb_rec_data *ctdb_marshall_loop_next(struct ctdb_marshall_buffer *m, st
 	return r;
 }
 
+
+#if HAVE_SCHED_H
+#include <sched.h>
+#endif
+
 /*
-  if possible, make this task very high priority
+  if possible, make this task real time
  */
-void ctdb_high_priority(struct ctdb_context *ctdb)
+void ctdb_set_scheduler(struct ctdb_context *ctdb)
 {
-	errno = 0;
-	if (nice(-20) == -1 && errno != 0) {
-		DEBUG(DEBUG_WARNING,("Unable to renice self: %s\n",
-				     strerror(errno)));
+#if HAVE_SCHED_SETSCHEDULER	
+	struct sched_param p;
+	if (ctdb->saved_scheduler_param == NULL) {
+		ctdb->saved_scheduler_param = talloc_size(ctdb, sizeof(p));
+	}
+	
+	if (sched_getparam(0, (struct sched_param *)ctdb->saved_scheduler_param) == -1) {
+		DEBUG(DEBUG_ERR,("Unable to get old scheduler params\n"));
+		return;
+	}
+
+	p = *(struct sched_param *)ctdb->saved_scheduler_param;
+	p.sched_priority = 1;
+
+	if (sched_setscheduler(0, SCHED_FIFO, &p) == -1) {
+		DEBUG(DEBUG_CRIT,("Unable to set scheduler to SCHED_FIFO (%s)\n", 
+			 strerror(errno)));
 	} else {
-		DEBUG(DEBUG_NOTICE,("Scheduler says I'm nice: %i\n",
-				    getpriority(PRIO_PROCESS, getpid())));
+		DEBUG(DEBUG_NOTICE,("Set scheduler to SCHED_FIFO\n"));
+	}
+#endif
+}
+
+/*
+  restore previous scheduler parameters
+ */
+void ctdb_restore_scheduler(struct ctdb_context *ctdb)
+{
+#if HAVE_SCHED_SETSCHEDULER	
+	if (ctdb->saved_scheduler_param == NULL) {
+		ctdb_fatal(ctdb, "No saved scheduler parameters\n");
 	}
+	if (sched_setscheduler(0, SCHED_OTHER, (struct sched_param *)ctdb->saved_scheduler_param) == -1) {
+		ctdb_fatal(ctdb, "Unable to restore old scheduler parameters\n");
+	}
+#endif
 }
 
 /*
-  make ourselves slightly nicer: eg. a ctdb child.
+ * This function forks a child process and drops the realtime 
+ * scheduler for the child process.
  */
-void ctdb_reduce_priority(struct ctdb_context *ctdb)
+pid_t ctdb_fork(struct ctdb_context *ctdb)
 {
-	errno = 0;
-	if (nice(10) == -1 && errno != 0) {
-		DEBUG(DEBUG_WARNING,("Unable to lower priority: %s\n",
-				     strerror(errno)));
+	pid_t pid;
+
+	pid = fork();
+	if (pid == 0) {
+		if (ctdb->do_setsched) {
+			ctdb_restore_scheduler(ctdb);
+		}
 	}
+	return pid;
 }
 
 void set_nonblocking(int fd)
diff --git a/include/ctdb_private.h b/include/ctdb_private.h
index 243a4dd..f62125c 100644
--- a/include/ctdb_private.h
+++ b/include/ctdb_private.h
@@ -455,7 +455,9 @@ struct ctdb_context {
 	uint32_t recovery_master;
 	struct ctdb_call_state *pending_calls;
 	struct ctdb_client_ip *client_ip_list;
-	struct trbt_tree *server_ids;	
+	struct trbt_tree *server_ids; 
+	bool do_setsched;
+	void *saved_scheduler_param;
 	const char *event_script_dir;
 	const char *notification_script;
 	const char *default_public_interface;
@@ -990,8 +992,9 @@ void ctdb_call_resend_all(struct ctdb_context *ctdb);
 void ctdb_node_dead(struct ctdb_node *node);
 void ctdb_node_connected(struct ctdb_node *node);
 bool ctdb_blocking_freeze(struct ctdb_context *ctdb);
-void ctdb_high_priority(struct ctdb_context *ctdb);
-void ctdb_reduce_priority(struct ctdb_context *ctdb);
+void ctdb_set_scheduler(struct ctdb_context *ctdb);
+void ctdb_restore_scheduler(struct ctdb_context *ctdb);
+pid_t ctdb_fork(struct ctdb_context *ctdb);
 int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, 
 				 struct ctdb_req_control *c,
 				 TDB_DATA indata, 
diff --git a/libctdb/ctdb.c b/libctdb/ctdb.c
index 2649524..e06c66c 100644
--- a/libctdb/ctdb.c
+++ b/libctdb/ctdb.c
@@ -154,6 +154,7 @@ struct ctdb_connection *ctdb_connect(const char *addr,
 	ctdb->outq = NULL;
 	ctdb->doneq = NULL;
 	ctdb->in = NULL;
+	ctdb->inqueue = NULL;
 	ctdb->message_handlers = NULL;
 	ctdb->next_id = 0;
 	ctdb->broken = false;
diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c
index e322453..72c7293 100644
--- a/server/ctdb_daemon.c
+++ b/server/ctdb_daemon.c
@@ -742,7 +742,10 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog,
 
 	DEBUG(DEBUG_ERR, ("Starting CTDBD as pid : %u\n", ctdbd_pid));
 
-	ctdb_high_priority(ctdb);
+	if (ctdb->do_setsched) {
+		/* try to set us up as realtime */
+		ctdb_set_scheduler(ctdb);
+	}
 
 	/* ensure the socket is deleted on exit of the daemon */
 	domain_socket_name = talloc_strdup(talloc_autofree_context(), ctdb->daemon.name);
diff --git a/server/ctdb_lockwait.c b/server/ctdb_lockwait.c
index 48198fd..1d3a597 100644
--- a/server/ctdb_lockwait.c
+++ b/server/ctdb_lockwait.c
@@ -117,7 +117,7 @@ struct lockwait_handle *ctdb_lockwait(struct ctdb_db_context *ctdb_db,
 		return NULL;
 	}
 
-	result->child = fork();
+	result->child = ctdb_fork(ctdb_db->ctdb);
 
 	if (result->child == (pid_t)-1) {
 		close(result->fd[0]);
diff --git a/server/ctdb_logging.c b/server/ctdb_logging.c
index 7e5367e..27b990e 100644
--- a/server/ctdb_logging.c
+++ b/server/ctdb_logging.c
@@ -96,7 +96,7 @@ int start_syslog_daemon(struct ctdb_context *ctdb)
 		return -1;
 	}
 	
-	ctdb->syslogd_pid = fork();
+	ctdb->syslogd_pid = ctdb_fork(ctdb);
 	if (ctdb->syslogd_pid == (pid_t)-1) {
 		printf("Failed to create syslog child process\n");
 		close(state->fd[0]);
@@ -454,7 +454,7 @@ struct ctdb_log_state *ctdb_fork_with_logging(TALLOC_CTX *mem_ctx,
 		goto free_log;
 	}
 
-	*pid = fork();
+	*pid = ctdb_fork(ctdb);
 
 	/* Child? */
 	if (*pid == 0) {
diff --git a/server/ctdb_monitor.c b/server/ctdb_monitor.c
index dff6f42..416e4c5 100644
--- a/server/ctdb_monitor.c
+++ b/server/ctdb_monitor.c
@@ -83,7 +83,7 @@ void ctdb_run_notification_script(struct ctdb_context *ctdb, const char *event)
 		return;
 	}
 
-	child = fork();
+	child = ctdb_fork(ctdb);
 	if (child == (pid_t)-1) {
 		DEBUG(DEBUG_ERR,("Failed to fork() a notification child process\n"));
 		return;
diff --git a/server/ctdb_persistent.c b/server/ctdb_persistent.c
index 9346f7d..f9a2051 100644
--- a/server/ctdb_persistent.c
+++ b/server/ctdb_persistent.c
@@ -524,7 +524,7 @@ struct childwrite_handle *ctdb_childwrite(struct ctdb_db_context *ctdb_db,
 		return NULL;
 	}
 
-	result->child = fork();
+	result->child = ctdb_fork(ctdb_db->ctdb);
 
 	if (result->child == (pid_t)-1) {
 		close(result->fd[0]);
diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c
index e0f01fb..d75370d 100644
--- a/server/ctdb_recoverd.c
+++ b/server/ctdb_recoverd.c
@@ -1626,13 +1626,14 @@ static int do_recovery(struct ctdb_recoverd *rec,
 	if (ret != 0) {
 		DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
 				 culprit));
+		rec->need_takeover_run = true;
 		return -1;
 	}
 	rec->need_takeover_run = false;
 	ret = ctdb_takeover_run(ctdb, nodemap);
 	if (ret != 0) {
-		DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses\n"));
-		return -1;
+		DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. ctdb_takeover_run() failed.\n"));
+		rec->need_takeover_run = true;
 	}
 	DEBUG(DEBUG_NOTICE, (__location__ " Recovery - takeip finished\n"));
 
@@ -2047,8 +2048,7 @@ static void process_ipreallocate_requests(struct ctdb_context *ctdb, struct ctdb
 	if (ret == 0) {
 		ret = ctdb_takeover_run(ctdb, rec->nodemap);
 		if (ret != 0) {
-			DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
-					 culprit));
+			DEBUG(DEBUG_ERR,("Failed to reallocate addresses: ctdb_takeover_run() failed.\n"));
 			rec->need_takeover_run = true;
 		}
 	}
@@ -2770,7 +2770,7 @@ static int check_recovery_lock(struct ctdb_context *ctdb)
 		return -1;
 	}
 
-	state->child = fork();
+	state->child = ctdb_fork(ctdb);
 	if (state->child == (pid_t)-1) {
 		DEBUG(DEBUG_CRIT,(__location__ " fork() failed in check_reclock child\n"));
 		close(state->fd[0]);
@@ -3400,8 +3400,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 		if (ret != 0) {
 			DEBUG(DEBUG_ERR,("Failed to read public ips from remote node %d\n",
 					 culprit));
-			ctdb_set_culprit(rec, culprit);
-			do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
+			rec->need_takeover_run = true;
 			return;
 		}
 
@@ -3416,9 +3415,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec,
 
 		ret = ctdb_takeover_run(ctdb, nodemap);
 		if (ret != 0) {
-			DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses - starting recovery\n"));
-			ctdb_set_culprit(rec, ctdb->pnn);
-			do_recovery(rec, mem_ctx, pnn, nodemap, vnnmap);
+			DEBUG(DEBUG_ERR, (__location__ " Unable to setup public takeover addresses. Try again later\n"));
 			return;
 		}
 
diff --git a/server/ctdb_traverse.c b/server/ctdb_traverse.c
index dcb16b2..5ad374f 100644
--- a/server/ctdb_traverse.c
+++ b/server/ctdb_traverse.c
@@ -153,7 +153,7 @@ static struct ctdb_traverse_local_handle *ctdb_traverse_local(struct ctdb_db_con
 		return NULL;
 	}
 
-	h->child = fork();
+	h->child = ctdb_fork(ctdb_db->ctdb);
 
 	if (h->child == (pid_t)-1) {
 		close(h->fd[0]);
diff --git a/server/ctdb_vacuum.c b/server/ctdb_vacuum.c
index 4104853..4aac302 100644
--- a/server/ctdb_vacuum.c
+++ b/server/ctdb_vacuum.c
@@ -836,7 +836,7 @@ ctdb_vacuum_event(struct event_context *ev, struct timed_event *te,
 		return;
 	}
 
-	child_ctx->child_pid = fork();
+	child_ctx->child_pid = ctdb_fork(ctdb);
 	if (child_ctx->child_pid == (pid_t)-1) {
 		close(child_ctx->fd[0]);
 		close(child_ctx->fd[1]);
diff --git a/server/ctdbd.c b/server/ctdbd.c
index b90dbcd..bddd658 100644
--- a/server/ctdbd.c
+++ b/server/ctdbd.c
@@ -132,7 +132,7 @@ int main(int argc, const char *argv[])
 		{ "dbdir-persistent", 0, POPT_ARG_STRING, &options.db_dir_persistent, 0, "directory for persistent tdb files", NULL },
 		{ "dbdir-state", 0, POPT_ARG_STRING, &options.db_dir_state, 0, "directory for internal state tdb files", NULL },
 		{ "reclock", 0, POPT_ARG_STRING, &options.recovery_lock_file, 0, "location of recovery lock file", "filename" },
-		{ "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "make valgrind more effective", NULL },
+		{ "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL },
 		{ "syslog", 0, POPT_ARG_NONE, &options.use_syslog, 0, "log messages to syslog", NULL },
 		{ "start-as-disabled", 0, POPT_ARG_NONE, &options.start_as_disabled, 0, "Node starts in disabled state", NULL },
 		{ "start-as-stopped", 0, POPT_ARG_NONE, &options.start_as_stopped, 0, "Node starts in stopped state", NULL },
@@ -315,6 +315,7 @@ int main(int argc, const char *argv[])
 	}
 
 	ctdb->valgrinding = options.valgrinding;
+	ctdb->do_setsched = !ctdb->valgrinding;
 
 	if (options.max_persistent_check_errors < 0) {
 		ctdb->max_persistent_check_errors = 0xFFFFFFFFFFFFFFFFLL;
diff --git a/server/eventscript.c b/server/eventscript.c
index ce2fd89..9ba3a5d 100644
--- a/server/eventscript.c
+++ b/server/eventscript.c
@@ -511,9 +511,8 @@ static void debug_timeout(struct ctdb_event_script_state *state)
 	sprintf(buf, "{ pstree -p; cat /proc/locks; ls -li /var/ctdb/ /var/ctdb/persistent; }"
 			" >/tmp/ctdb.event.%s.%d", tbuf, getpid());
 
-	pid = fork();
+	pid = ctdb_fork(state->ctdb);
 	if (pid == 0) {
-		ctdb_reduce_priority(state->ctdb);
 		system(buf);
 		/* Now we can kill the child */
 		kill(state->child, SIGTERM);


-- 
CTDB repository


More information about the samba-cvs mailing list