[PATCH] Add process restart to pre-fork process model
Gary Lockyer
gary at catalyst.net.nz
Tue Nov 20 04:08:06 UTC 2018
Updated patch set that applies to current master, conflicted with latest
WHATSNEW.
With the patch this time.
Thanks
Gary
On 14/11/18 13:22, Gary Lockyer via samba-technical wrote:
> This patch set modifies the pre-fork process model so that it will
> restart failed processes. Restart attempts are backed of using a linear
> back off strategy up to a configurable maximum delay.
>
> CI results: https://gitlab.com/samba-team/devel/samba/pipelines/36482415
>
> Reviews and comments appreciated
>
> Gary
>
-------------- next part --------------
From 585a9888469070d6a9f5bbeb0786ff95c716b722 Mon Sep 17 00:00:00 2001
From: Gary Lockyer <gary at catalyst.net.nz>
Date: Tue, 18 Sep 2018 08:37:02 +1200
Subject: [PATCH 01/11] source4 smbd test: prefork process restart
Add tests for the restarting of failed/terminated process, by the
pre-fork process model.
Signed-off-by: Gary Lockyer <gary at catalyst.net.nz>
---
python/samba/tests/prefork_restart.py | 446 ++++++++++++++++++++++++++
selftest/knownfail.d/prefork_restart | 8 +
selftest/target/Samba.pm | 1 +
selftest/target/Samba4.pm | 39 +++
source4/selftest/tests.py | 12 +
5 files changed, 506 insertions(+)
create mode 100644 python/samba/tests/prefork_restart.py
create mode 100644 selftest/knownfail.d/prefork_restart
diff --git a/python/samba/tests/prefork_restart.py b/python/samba/tests/prefork_restart.py
new file mode 100644
index 00000000000..90037d99461
--- /dev/null
+++ b/python/samba/tests/prefork_restart.py
@@ -0,0 +1,446 @@
+# Tests for process restarting in the pre-fork process model
+#
+# Copyright (C) Andrew Bartlett <abartlet at samba.org> 2018
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+from __future__ import print_function
+"""Tests process restarting in the pre-fork process model.
+ NOTE: As this test kills samba processes it won't play nicely with other
+ tests, so needs to be run in it's own environment.
+"""
+
+
+import os
+import signal
+import time
+
+import samba
+from samba.tests import TestCase, delete_force
+from samba.dcerpc import echo, netlogon
+from samba.messaging import Messaging
+from samba.samdb import SamDB
+from samba.credentials import Credentials, DONT_USE_KERBEROS
+from samba.compat import get_string
+from samba.dsdb import (
+ UF_WORKSTATION_TRUST_ACCOUNT,
+ UF_PASSWD_NOTREQD)
+from samba.dcerpc.misc import SEC_CHAN_WKSTA
+from samba.auth import system_session
+
+NUM_WORKERS = 4
+MACHINE_NAME = "PFRS"
+
+
+class PreforkProcessRestartTests(TestCase):
+
+ def setUp(self):
+ super(PreforkProcessRestartTests, self).setUp()
+ lp_ctx = self.get_loadparm()
+ self.msg_ctx = Messaging(lp_ctx=lp_ctx)
+
+ def tearDown(self):
+ super(PreforkProcessRestartTests, self).tearDown()
+
+ def get_process_data(self):
+ services = self.msg_ctx.irpc_all_servers()
+
+ processes = []
+ for service in services:
+ for id in service.ids:
+ processes.append((service.name, id.pid))
+ return processes
+
+ def get_process(self, name):
+ processes = self.get_process_data()
+ for pname, pid in processes:
+ if name == pname:
+ return pid
+ return None
+
+ def get_worker_pids(self, name, workers):
+ pids = []
+ for x in range(workers):
+ process_name = "prefork-worker-{0}-{1}".format(name, x)
+ pids.append(self.get_process(process_name))
+ self.assertIsNotNone(pids[x])
+ return pids
+
+ def wait_for_process(self, name, pid, initial_delay, wait, timeout):
+ time.sleep(initial_delay)
+ delay = initial_delay
+ while delay < timeout:
+ p = self.get_process(name)
+ if p is not None and p != pid:
+ # process has restarted
+ return
+ time.sleep(wait)
+ delay += wait
+ self.fail("Times out after {0} seconds waiting for {1} to restart".
+ format(delay, name))
+
+ def check_for_duplicate_processes(self):
+ processes = self.get_process_data()
+ process_map = {}
+ for name, p in processes:
+ if (name.startswith("prefork-") or
+ name.endswith("_server") or
+ name.endswith("srv")):
+
+ if name in process_map:
+ if p != process_map[name]:
+ self.fail(
+ "Duplicate process for {0}, pids {1} and {2}".
+ format(name, p, process_map[name]))
+
+ def simple_bind(self):
+ creds = self.insta_creds(template=self.get_credentials())
+ creds.set_bind_dn("%s\\%s" % (creds.get_domain(),
+ creds.get_username()))
+
+ self.samdb = SamDB(url="ldaps://%s" % os.environ["SERVER"],
+ lp=self.get_loadparm(),
+ credentials=creds)
+
+ def rpc_echo(self):
+ conn = echo.rpcecho("ncalrpc:", self.get_loadparm())
+ self.assertEquals([1, 2, 3], conn.EchoData([1, 2, 3]))
+
+ def netlogon(self):
+ server = os.environ["SERVER"]
+ host = os.environ["SERVER_IP"]
+ lp = self.get_loadparm()
+
+ credentials = self.get_credentials()
+
+ session = system_session()
+ ldb = SamDB(url="ldap://%s" % host,
+ session_info=session,
+ credentials=credentials,
+ lp=lp)
+ machine_pass = samba.generate_random_password(32, 32)
+ machine_name = MACHINE_NAME
+ machine_dn = "cn=%s,%s" % (machine_name, ldb.domain_dn())
+
+ delete_force(ldb, machine_dn)
+
+ utf16pw = ('"%s"' % get_string(machine_pass)).encode('utf-16-le')
+ ldb.add({
+ "dn": machine_dn,
+ "objectclass": "computer",
+ "sAMAccountName": "%s$" % machine_name,
+ "userAccountControl":
+ str(UF_WORKSTATION_TRUST_ACCOUNT | UF_PASSWD_NOTREQD),
+ "unicodePwd": utf16pw})
+
+ machine_creds = Credentials()
+ machine_creds.guess(lp)
+ machine_creds.set_secure_channel_type(SEC_CHAN_WKSTA)
+ machine_creds.set_kerberos_state(DONT_USE_KERBEROS)
+ machine_creds.set_password(machine_pass)
+ machine_creds.set_username(machine_name + "$")
+ machine_creds.set_workstation(machine_name)
+
+ netlogon.netlogon(
+ "ncacn_ip_tcp:%s[schannel,seal]" % server,
+ lp,
+ machine_creds)
+
+ delete_force(ldb, machine_dn)
+
+ def test_ldap_master_restart(self):
+ # check ldap connection, do a simple bind
+ self.simple_bind()
+
+ # get ldap master process
+ pid = self.get_process("prefork-master-ldap")
+ self.assertIsNotNone(pid)
+
+ # Get the worker processes
+ workers = self.get_worker_pids("ldap", NUM_WORKERS)
+
+ # kill it
+ os.kill(pid, signal.SIGTERM)
+
+ # wait for the process to restart
+ self.wait_for_process("prefork-master-ldap", pid, 1, 1, 30)
+
+ # get ldap master process
+ new_pid = self.get_process("prefork-master-ldap")
+ self.assertIsNotNone(new_pid)
+
+ # check that the pid has changed
+ self.assertNotEquals(pid, new_pid)
+
+ # check that the worker processes have restarted
+ new_workers = self.get_worker_pids("ldap", NUM_WORKERS)
+ for x in range(NUM_WORKERS):
+ self.assertNotEquals(workers[x], new_workers[x])
+
+ # check that the previous server entries have been removed.
+ self.check_for_duplicate_processes()
+
+ # check ldap connection, another simple bind
+ self.simple_bind()
+
+ def test_ldap_worker_restart(self):
+ # check ldap connection, do a simple bind
+ self.simple_bind()
+
+ # get ldap master process
+ pid = self.get_process("prefork-master-ldap")
+ self.assertIsNotNone(pid)
+
+ # Get the worker processes
+ workers = self.get_worker_pids("ldap", NUM_WORKERS)
+
+ # kill worker 0
+ os.kill(workers[0], signal.SIGTERM)
+
+ # wait for the process to restart
+ self.wait_for_process("prefork-worker-ldap-0", pid, 1, 1, 30)
+
+ # get ldap master process
+ new_pid = self.get_process("prefork-master-ldap")
+ self.assertIsNotNone(new_pid)
+
+ # check that the pid has not changed
+ self.assertEquals(pid, new_pid)
+
+ # check that the worker processes have restarted
+ new_workers = self.get_worker_pids("ldap", NUM_WORKERS)
+ # process 0 should have a new pid the others should be unchanged
+ self.assertNotEquals(workers[0], new_workers[0])
+ self.assertEquals(workers[1], new_workers[1])
+ self.assertEquals(workers[2], new_workers[2])
+ self.assertEquals(workers[3], new_workers[3])
+
+ # check that the previous server entries have been removed.
+ self.check_for_duplicate_processes()
+
+ # check ldap connection, another simple bind
+ self.simple_bind()
+
+ #
+ # Kill all the ldap worker processes and ensure that they are restarted
+ # correctly
+ #
+ def test_ldap_all_workers_restart(self):
+ # check ldap connection, do a simple bind
+ self.simple_bind()
+
+ # get ldap master process
+ pid = self.get_process("prefork-master-ldap")
+ self.assertIsNotNone(pid)
+
+ # Get the worker processes
+ workers = self.get_worker_pids("ldap", NUM_WORKERS)
+
+ # kill all the worker processes
+ for x in workers:
+ os.kill(x, signal.SIGTERM)
+
+ # wait for the worker processes to restart
+ for x in range(NUM_WORKERS):
+ self.wait_for_process(
+ "prefork-worker-ldap-{0}".format(x), workers[x], 0, 1, 30)
+
+ # get ldap master process
+ new_pid = self.get_process("prefork-master-ldap")
+ self.assertIsNotNone(new_pid)
+
+ # check that the pid has not changed
+ self.assertEquals(pid, new_pid)
+
+ # check that the worker processes have restarted
+ new_workers = self.get_worker_pids("ldap", NUM_WORKERS)
+ for x in range(4):
+ self.assertNotEquals(workers[x], new_workers[x])
+
+ # check that the previous server entries have been removed.
+ self.check_for_duplicate_processes()
+
+ # check ldap connection, another simple bind
+ self.simple_bind()
+
+ def test_rpc_master_restart(self):
+ # check rpc connection, make a rpc echo request
+ self.rpc_echo()
+
+ # get rpc master process
+ pid = self.get_process("prefork-master-rpc")
+ self.assertIsNotNone(pid)
+
+ # Get the worker processes
+ workers = self.get_worker_pids("rpc", NUM_WORKERS)
+
+ # kill it
+ os.kill(pid, signal.SIGTERM)
+
+ # wait for the process to restart
+ self.wait_for_process("prefork-master-rpc", pid, 1, 1, 30)
+
+ # get ldap master process
+ new_pid = self.get_process("prefork-master-rpc")
+ self.assertIsNotNone(new_pid)
+
+ # check that the pid has changed
+ self.assertNotEquals(pid, new_pid)
+
+ # check that the worker processes have restarted
+ new_workers = self.get_worker_pids("rpc", NUM_WORKERS)
+ for x in range(NUM_WORKERS):
+ self.assertNotEquals(workers[x], new_workers[x])
+
+ # check that the previous server entries have been removed.
+ self.check_for_duplicate_processes()
+
+ # check rpc connection, another rpc echo request
+ self.rpc_echo()
+
+ def test_rpc_worker_zero_restart(self):
+ # check rpc connection, make a rpc echo request and a netlogon request
+ self.rpc_echo()
+ self.netlogon()
+
+ # get rpc master process
+ pid = self.get_process("prefork-master-rpc")
+ self.assertIsNotNone(pid)
+
+ # Get the worker processes
+ workers = self.get_worker_pids("rpc", NUM_WORKERS)
+
+ # kill worker 0
+ os.kill(workers[0], signal.SIGTERM)
+
+ # wait for the process to restart
+ self.wait_for_process("prefork-worker-rpc-0", pid, 1, 1, 30)
+
+ # get rpc master process
+ new_pid = self.get_process("prefork-master-rpc")
+ self.assertIsNotNone(new_pid)
+
+ # check that the pid has not changed
+ self.assertEquals(pid, new_pid)
+
+ # check that the worker processes have restarted
+ new_workers = self.get_worker_pids("rpc", NUM_WORKERS)
+ # process 0 should have a new pid the others should be unchanged
+ self.assertNotEquals(workers[0], new_workers[0])
+ self.assertEquals(workers[1], new_workers[1])
+ self.assertEquals(workers[2], new_workers[2])
+ self.assertEquals(workers[3], new_workers[3])
+
+ # check that the previous server entries have been removed.
+ self.check_for_duplicate_processes()
+
+ # check rpc connection, another rpc echo request, and netlogon request
+ self.rpc_echo()
+ self.netlogon()
+
+ def test_rpc_all_workers_restart(self):
+ # check rpc connection, make a rpc echo request, and a netlogon request
+ self.rpc_echo()
+ self.netlogon()
+
+ # get rpc master process
+ pid = self.get_process("prefork-master-rpc")
+ self.assertIsNotNone(pid)
+
+ # Get the worker processes
+ workers = self.get_worker_pids("rpc", NUM_WORKERS)
+
+ # kill all the worker processes
+ for x in workers:
+ os.kill(x, signal.SIGTERM)
+
+ # wait for the worker processes to restart
+ for x in range(NUM_WORKERS):
+ self.wait_for_process(
+ "prefork-worker-rpc-{0}".format(x), workers[x], 0, 1, 30)
+
+ # get rpc master process
+ new_pid = self.get_process("prefork-master-rpc")
+ self.assertIsNotNone(new_pid)
+
+ # check that the pid has not changed
+ self.assertEquals(pid, new_pid)
+
+ # check that the worker processes have restarted
+ new_workers = self.get_worker_pids("rpc", NUM_WORKERS)
+ for x in range(4):
+ self.assertNotEquals(workers[x], new_workers[x])
+
+ # check that the previous server entries have been removed.
+ self.check_for_duplicate_processes()
+
+ # check rpc connection, another rpc echo request and netlogon
+ self.rpc_echo()
+ self.netlogon()
+
+ def test_master_restart_backoff(self):
+
+ # get kdc master process
+ pid = self.get_process("prefork-master-kdc")
+ self.assertIsNotNone(pid)
+
+ # Get the worker processes
+ workers = self.get_worker_pids("kdc", NUM_WORKERS)
+
+ #
+ # Check that the processes get backed off as expected
+ #
+ # have prefork backoff increment = 5
+ # prefork maximum backoff = 10
+ # Set the expected values to one less than the next ideal value
+ # 0, 5, 10, 10, ...
+ for expected in [4, 9, 14, 14]:
+ process = self.get_process("prefork-master-kdc")
+ os.kill(process, signal.SIGTERM)
+ # wait for the process to restart
+ start = time.time()
+ self.wait_for_process("prefork-master-kdc", process, 0, 1, 30)
+ end = time.time()
+ duration = end - start
+ self.assertTrue(duration < expected)
+
+ # check that the worker processes have restarted
+ new_workers = self.get_worker_pids("kdc", NUM_WORKERS)
+ for x in range(NUM_WORKERS):
+ self.assertNotEquals(workers[x], new_workers[x])
+
+ # check that the previous server entries have been removed.
+ self.check_for_duplicate_processes()
+
+ def test_worker_restart_backoff(self):
+ #
+ # Check that the processes get backed off as expected
+ #
+ # have prefork backoff increment = 5
+ # prefork maximum backoff = 10
+ # Set the expected values to one less than the next ideal value
+ # 0, 5, 10, 10, ...
+ for expected in [4, 9, 14, 14]:
+ process = self.get_process("prefork-worker-kdc-2")
+ self.assertIsNotNone(process)
+ os.kill(process, signal.SIGTERM)
+ # wait for the process to restart
+ start = time.time()
+ self.wait_for_process("prefork-worker-kdc-2", process, 0, 1, 30)
+ end = time.time()
+ duration = end - start
+ self.assertTrue(duration < expected)
+
+ self.check_for_duplicate_processes()
diff --git a/selftest/knownfail.d/prefork_restart b/selftest/knownfail.d/prefork_restart
new file mode 100644
index 00000000000..020199cd9b4
--- /dev/null
+++ b/selftest/knownfail.d/prefork_restart
@@ -0,0 +1,8 @@
+^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_ldap_all_workers_restart
+^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_ldap_master_restart
+^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_ldap_worker_restart
+^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_master_restart_backoff
+^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_rpc_all_workers_restart
+^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_rpc_master_restart
+^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_rpc_worker_zero_restart
+^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_worker_restart_backoff
diff --git a/selftest/target/Samba.pm b/selftest/target/Samba.pm
index aa6ec9ee931..5f83645476f 100644
--- a/selftest/target/Samba.pm
+++ b/selftest/target/Samba.pm
@@ -424,6 +424,7 @@ sub get_interface($)
$interfaces{"labdc"} = 43;
$interfaces{"offlinebackupdc"} = 44;
$interfaces{"customdc"} = 45;
+ $interfaces{"prfrkrstrt"} = 46;
# update lib/socket_wrapper/socket_wrapper.c
# #define MAX_WRAPPED_INTERFACES 64
diff --git a/selftest/target/Samba4.pm b/selftest/target/Samba4.pm
index 5de0a706f35..e667acce4e3 100755
--- a/selftest/target/Samba4.pm
+++ b/selftest/target/Samba4.pm
@@ -2206,6 +2206,7 @@ sub check_env($$)
renamedc => ["backupfromdc"],
offlinebackupdc => ["backupfromdc"],
labdc => ["backupfromdc"],
+ preforkrestartdc => [],
none => [],
);
@@ -2627,6 +2628,44 @@ sub setup_ad_dc_no_ntlm
return $env;
}
+#
+# ad_dc test environment used solely to test pre-frork process restarts
+# as processes get killed off and restarted it should not be used for other
+# tests.
+sub setup_preforkrestartdc
+{
+ my ($self, $path) = @_;
+
+ # If we didn't build with ADS, pretend this env was never available
+ if (not $self->{target3}->have_ads()) {
+ return "UNKNOWN";
+ }
+
+ my $env = $self->provision_ad_dc(
+ $path,
+ "prfrkrstrt",
+ "PRFRKRSTRTDOM",
+ "prfrkrstrt.samba.example.com",
+ "prefork backoff increment = 5\nprefork maximum backoff=10");
+ unless ($env) {
+ return undef;
+ }
+
+ $env->{NSS_WRAPPER_MODULE_SO_PATH} = undef;
+ $env->{NSS_WRAPPER_MODULE_FN_PREFIX} = undef;
+
+ if (not defined($self->check_or_start($env, "prefork"))) {
+ return undef;
+ }
+
+ my $upn_array = ["$env->{REALM}.upn"];
+ my $spn_array = ["$env->{REALM}.spn"];
+
+ $self->setup_namespaces($env, $upn_array, $spn_array);
+
+ return $env;
+}
+
# Sets up a DC that's solely used to do a domain backup from. We then use the
# backupfrom-DC to create the restore-DC - this proves that the backup/restore
# process will create a Samba DC that will actually start up.
diff --git a/source4/selftest/tests.py b/source4/selftest/tests.py
index c4b7d18444c..60e682495fe 100755
--- a/source4/selftest/tests.py
+++ b/source4/selftest/tests.py
@@ -1232,3 +1232,15 @@ plantestsuite("samba4.dsdb.samdb.ldb_modules.group_audit", "none",
[os.path.join(bindir(), "test_group_audit")])
plantestsuite("samba4.dsdb.samdb.ldb_modules.group_audit.errors", "none",
[os.path.join(bindir(), "test_group_audit_errors")])
+
+# process restart and limit tests, these break the environment so need to run
+# in their own specific environaments
+planoldpythontestsuite("preforkrestartdc:local",
+ "samba.tests.prefork_restart",
+ extra_path=[
+ os.path.join(srcdir(), 'python/samba/tests')],
+ extra_args=['-U"$USERNAME%$PASSWORD"'],
+ environ={'CLIENT_IP': '127.0.0.11',
+ 'SOCKET_WRAPPER_DEFAULT_IFACE': 11},
+ name="samba.tests.prefork_restart",
+ py3_compatible=True)
--
2.17.1
From b2da9787c716b77aab7b03bc031975f1fa60fdc2 Mon Sep 17 00:00:00 2001
From: Gary Lockyer <gary at catalyst.net.nz>
Date: Fri, 31 Aug 2018 11:40:18 +1200
Subject: [PATCH 02/11] source4 smdb prefork: Pass restart information
Pass information about the pre-fork master and worker processes that
will allow them to be restarted.
Signed-off-by: Gary Lockyer <gary at catalyst.net.nz>
---
source4/smbd/process_prefork.c | 176 ++++++++++++++++++++++++---------
1 file changed, 129 insertions(+), 47 deletions(-)
diff --git a/source4/smbd/process_prefork.c b/source4/smbd/process_prefork.c
index 016385c762c..c8ed942b741 100644
--- a/source4/smbd/process_prefork.c
+++ b/source4/smbd/process_prefork.c
@@ -37,6 +37,35 @@
NTSTATUS process_model_prefork_init(void);
+/*
+ * State needed to restart the master process or a worker process if they
+ * terminate early.
+ */
+struct master_restart_context {
+ struct loadparm_context *lp_ctx;
+ struct task_server *(*new_task_fn)(struct tevent_context *,
+ struct loadparm_context *lp_ctx,
+ struct server_id,
+ void *,
+ void *);
+ void *private_data;
+};
+
+struct worker_restart_context {
+ unsigned int instance;
+ struct task_server *task;
+ struct tevent_context *ev2;
+};
+
+struct restart_context {
+ struct tfork *t;
+ int from_parent_fd;
+ const struct service_details *service_details;
+ const char *service_name;
+ struct master_restart_context *master;
+ struct worker_restart_context *worker;
+};
+
static void sighup_signal_handler(struct tevent_context *ev,
struct tevent_signal *se,
int signum, int count, void *siginfo,
@@ -110,7 +139,7 @@ static void prefork_child_pipe_handler(struct tevent_context *ev,
uint16_t flags,
void *private_data)
{
- struct tfork *t = NULL;
+ struct restart_context *rc = NULL;
int status = 0;
pid_t pid = 0;
@@ -119,11 +148,10 @@ static void prefork_child_pipe_handler(struct tevent_context *ev,
/* the child has closed the pipe, assume its dead */
- /* tfork allocates tfork structures with malloc */
- t = (struct tfork*)private_data;
- pid = tfork_child_pid(t);
+ rc = talloc_get_type_abort(private_data, struct restart_context);
+ pid = tfork_child_pid(rc->t);
errno = 0;
- status = tfork_status(&t, false);
+ status = tfork_status(&rc->t, false);
if (status == -1) {
DBG_ERR("Parent %d, Child %d terminated, "
"unable to get status code from tfork\n",
@@ -138,7 +166,8 @@ static void prefork_child_pipe_handler(struct tevent_context *ev,
getpid(), pid, status);
}
/* tfork allocates tfork structures with malloc */
- free(t);
+ free(rc->t);
+ TALLOC_FREE(rc);
return;
}
@@ -220,6 +249,72 @@ static void setup_handlers(struct tevent_context *ev, int from_parent_fd) {
}
}
+static void prefork_fork_worker(struct task_server *task,
+ struct tevent_context *ev,
+ struct tevent_context *ev2,
+ const struct service_details *service_details,
+ const char *service_name,
+ int from_parent_fd,
+ struct process_details *pd)
+{
+ struct tfork *w = NULL;
+ pid_t pid;
+
+ w = tfork_create();
+ if (w == NULL) {
+ smb_panic("failure in tfork\n");
+ }
+
+ pid = tfork_child_pid(w);
+ if (pid != 0) {
+ struct tevent_fd *fde = NULL;
+ int fd = tfork_event_fd(w);
+ struct restart_context *rc = NULL;
+
+ rc = talloc_zero(ev, struct restart_context);
+ if (rc == NULL) {
+ smb_panic("OOM allocating restart context\n");
+ }
+ rc->t = w;
+ rc->service_name = service_name;
+ rc->service_details = service_details;
+ rc->from_parent_fd = from_parent_fd;
+ rc->master = NULL;
+ rc->worker = talloc_zero(rc, struct worker_restart_context);
+ if (rc->worker == NULL) {
+ smb_panic("OOM allocating master restart context\n");
+ }
+ rc->worker->ev2 = ev2;
+ rc->worker->instance = pd->instances;
+ rc->worker->task = task;
+
+ fde = tevent_add_fd(
+ ev, ev, fd, TEVENT_FD_READ, prefork_child_pipe_handler, rc);
+ if (fde == NULL) {
+ smb_panic("Failed to add child pipe handler, "
+ "after fork");
+ }
+ tevent_fd_set_auto_close(fde);
+ } else {
+ /*
+ * tfork uses malloc
+ */
+ free(w);
+
+ TALLOC_FREE(ev);
+ setproctitle("task[%s] pre-forked worker(%d)",
+ service_name,
+ pd->instances);
+ prefork_reload_after_fork();
+ setup_handlers(ev2, from_parent_fd);
+ if (service_details->post_fork != NULL) {
+ service_details->post_fork(task, pd);
+ }
+ tevent_loop_wait(ev2);
+ talloc_free(ev2);
+ exit(0);
+ }
+}
/*
* called to create a new server task
*/
@@ -251,12 +346,30 @@ static void prefork_new_task(
if (pid != 0) {
struct tevent_fd *fde = NULL;
int fd = tfork_event_fd(t);
+ struct restart_context *rc = NULL;
/* Register a pipe handler that gets called when the prefork
* master process terminates.
*/
- fde = tevent_add_fd(ev, ev, fd, TEVENT_FD_READ,
- prefork_child_pipe_handler, t);
+ rc = talloc_zero(ev, struct restart_context);
+ if (rc == NULL) {
+ smb_panic("OOM allocating restart context\n");
+ }
+ rc->t = t;
+ rc->service_name = service_name;
+ rc->service_details = service_details;
+ rc->from_parent_fd = from_parent_fd;
+ rc->master = talloc_zero(rc, struct master_restart_context);
+ if (rc->master == NULL) {
+ smb_panic("OOM allocating master restart context\n");
+ }
+
+ rc->master->lp_ctx = lp_ctx;
+ rc->master->new_task_fn = new_task_fn;
+ rc->master->private_data = private_data;
+
+ fde = tevent_add_fd(
+ ev, ev, fd, TEVENT_FD_READ, prefork_child_pipe_handler, rc);
if (fde == NULL) {
smb_panic("Failed to add child pipe handler, "
"after fork");
@@ -330,45 +443,14 @@ static void prefork_new_task(
* We are now free to spawn some worker processes
*/
for (i=0; i < num_children; i++) {
- struct tfork* w = NULL;
-
- w = tfork_create();
- if (w == NULL) {
- smb_panic("failure in tfork\n");
- }
-
- pid = tfork_child_pid(w);
- if (pid != 0) {
- struct tevent_fd *fde = NULL;
- int fd = tfork_event_fd(w);
-
- fde = tevent_add_fd(ev, ev, fd, TEVENT_FD_READ,
- prefork_child_pipe_handler, w);
- if (fde == NULL) {
- smb_panic("Failed to add child pipe handler, "
- "after fork");
- }
- tevent_fd_set_auto_close(fde);
- pd.instances++;
- } else {
- /*
- * tfork uses malloc
- */
- free(w);
-
- TALLOC_FREE(ev);
- setproctitle("task[%s] pre-forked worker(%d)",
- service_name,
- pd.instances);
- prefork_reload_after_fork();
- setup_handlers(ev2, from_parent_fd);
- if (service_details->post_fork != NULL) {
- service_details->post_fork(task, &pd);
- }
- tevent_loop_wait(ev2);
- talloc_free(ev2);
- exit(0);
- }
+ prefork_fork_worker(task,
+ ev,
+ ev2,
+ service_details,
+ service_name,
+ from_parent_fd,
+ &pd);
+ pd.instances++;
}
/* Don't listen on the sockets we just gave to the children */
--
2.17.1
From 8418f7c2ee926a2f6d4e523368041d0fc6f94233 Mon Sep 17 00:00:00 2001
From: Gary Lockyer <gary at catalyst.net.nz>
Date: Mon, 3 Sep 2018 09:34:17 +1200
Subject: [PATCH 03/11] source4 smdb prefork: Restart failed processes
Restart any pre-forked master or worker process that terminated with SIGABRT,
SIGBUS, SIGFPE, SIGILL or SIGSYS
Signed-off-by: Gary Lockyer <gary at catalyst.net.nz>
---
source4/smbd/process_prefork.c | 75 ++++++++++++++++++++++++++++++++--
1 file changed, 71 insertions(+), 4 deletions(-)
diff --git a/source4/smbd/process_prefork.c b/source4/smbd/process_prefork.c
index c8ed942b741..35513bc7118 100644
--- a/source4/smbd/process_prefork.c
+++ b/source4/smbd/process_prefork.c
@@ -36,6 +36,25 @@
#include "lib/util/tfork.h"
NTSTATUS process_model_prefork_init(void);
+static void prefork_new_task(
+ struct tevent_context *ev,
+ struct loadparm_context *lp_ctx,
+ const char *service_name,
+ struct task_server *(*new_task_fn)(struct tevent_context *,
+ struct loadparm_context *lp_ctx,
+ struct server_id,
+ void *,
+ void *),
+ void *private_data,
+ const struct service_details *service_details,
+ int from_parent_fd);
+static void prefork_fork_worker(struct task_server *task,
+ struct tevent_context *ev,
+ struct tevent_context *ev2,
+ const struct service_details *service_details,
+ const char *service_name,
+ int control_pipe[2],
+ struct process_details *pd);
/*
* State needed to restart the master process or a worker process if they
@@ -55,6 +74,7 @@ struct worker_restart_context {
unsigned int instance;
struct task_server *task;
struct tevent_context *ev2;
+ int control_pipe[2];
};
struct restart_context {
@@ -164,6 +184,39 @@ static void prefork_child_pipe_handler(struct tevent_context *ev,
status = WTERMSIG(status);
DBG_ERR("Parent %d, Child %d terminated with signal %d\n",
getpid(), pid, status);
+ if (status == SIGABRT || status == SIGBUS || status == SIGFPE ||
+ status == SIGILL || status == SIGSYS) {
+ /*
+ * Lets restart the process.
+ *
+ */
+ tfork_destroy(&rc->t);
+ if (rc->master != NULL) {
+ DBG_ERR("Restarting [%s] pre-fork master\n",
+ rc->service_name);
+ prefork_new_task(ev,
+ rc->master->lp_ctx,
+ rc->service_name,
+ rc->master->new_task_fn,
+ rc->master->private_data,
+ rc->service_details,
+ rc->from_parent_fd);
+ } else if (rc->worker != NULL) {
+ struct process_details pd =
+ initial_process_details;
+ DBG_ERR("Restarting [%s] pre-fork worker(%d)\n",
+ rc->service_name,
+ rc->worker->instance);
+ pd.instances = rc->worker->instance;
+ prefork_fork_worker(rc->worker->task,
+ ev,
+ rc->worker->ev2,
+ rc->service_details,
+ rc->service_name,
+ rc->worker->control_pipe,
+ &pd);
+ }
+ }
}
/* tfork allocates tfork structures with malloc */
free(rc->t);
@@ -254,7 +307,7 @@ static void prefork_fork_worker(struct task_server *task,
struct tevent_context *ev2,
const struct service_details *service_details,
const char *service_name,
- int from_parent_fd,
+ int control_pipe[2],
struct process_details *pd)
{
struct tfork *w = NULL;
@@ -278,7 +331,6 @@ static void prefork_fork_worker(struct task_server *task,
rc->t = w;
rc->service_name = service_name;
rc->service_details = service_details;
- rc->from_parent_fd = from_parent_fd;
rc->master = NULL;
rc->worker = talloc_zero(rc, struct worker_restart_context);
if (rc->worker == NULL) {
@@ -287,6 +339,8 @@ static void prefork_fork_worker(struct task_server *task,
rc->worker->ev2 = ev2;
rc->worker->instance = pd->instances;
rc->worker->task = task;
+ rc->worker->control_pipe[0] = control_pipe[0];
+ rc->worker->control_pipe[1] = control_pipe[1];
fde = tevent_add_fd(
ev, ev, fd, TEVENT_FD_READ, prefork_child_pipe_handler, rc);
@@ -296,6 +350,8 @@ static void prefork_fork_worker(struct task_server *task,
}
tevent_fd_set_auto_close(fde);
} else {
+ close(control_pipe[1]);
+ setup_handlers(ev2, control_pipe[0]);
/*
* tfork uses malloc
*/
@@ -306,7 +362,6 @@ static void prefork_fork_worker(struct task_server *task,
service_name,
pd->instances);
prefork_reload_after_fork();
- setup_handlers(ev2, from_parent_fd);
if (service_details->post_fork != NULL) {
service_details->post_fork(task, pd);
}
@@ -336,12 +391,14 @@ static void prefork_new_task(
struct tevent_context *ev2;
struct task_server *task = NULL;
struct process_details pd = initial_process_details;
+ int control_pipe[2];
t = tfork_create();
if (t == NULL) {
smb_panic("failure in tfork\n");
}
+ DBG_NOTICE("Forking [%s] pre-fork master process\n", service_name);
pid = tfork_child_pid(t);
if (pid != 0) {
struct tevent_fd *fde = NULL;
@@ -439,6 +496,16 @@ static void prefork_new_task(
}
DBG_NOTICE("Forking %d %s worker processes\n",
num_children, service_name);
+ {
+ int ret;
+ ret = pipe(control_pipe);
+ if (ret != 0) {
+ smb_panic("Unable to create worker control pipe\n");
+ }
+ smb_set_close_on_exec(control_pipe[0]);
+ smb_set_close_on_exec(control_pipe[0]);
+ }
+
/*
* We are now free to spawn some worker processes
*/
@@ -448,7 +515,7 @@ static void prefork_new_task(
ev2,
service_details,
service_name,
- from_parent_fd,
+ control_pipe,
&pd);
pd.instances++;
}
--
2.17.1
From d937375fac4d62b07265e67dda15976cca7ac837 Mon Sep 17 00:00:00 2001
From: Gary Lockyer <gary at catalyst.net.nz>
Date: Tue, 4 Sep 2018 10:09:38 +1200
Subject: [PATCH 04/11] source4 smbd process: pass the fatal flag to terminate
Pass the fatal flag supplied to task_server_terminate to the process
task_terminate method. It will be used by the task_terminate methods to
set an appropriate exit code. The process_prefork model will use a non
zero exit code to indicate that the process should be restarted.
Signed-off-by: Gary Lockyer <gary at catalyst.net.nz>
---
source4/smbd/process_model.h | 3 ++-
source4/smbd/process_prefork.c | 1 +
source4/smbd/process_single.c | 1 +
source4/smbd/process_standard.c | 1 +
source4/smbd/service_stream.c | 3 ++-
source4/smbd/service_task.c | 4 ++--
6 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/source4/smbd/process_model.h b/source4/smbd/process_model.h
index 17d70254cf2..2a226cef8cb 100644
--- a/source4/smbd/process_model.h
+++ b/source4/smbd/process_model.h
@@ -69,7 +69,8 @@ struct model_ops {
void (*terminate)(struct tevent_context *,
struct loadparm_context *lp_ctx,
const char *reason,
- void * process_context);
+ bool fatal,
+ void *process_context);
/* function to set a title for the connection or task */
void (*set_title)(struct tevent_context *, const char *title);
diff --git a/source4/smbd/process_prefork.c b/source4/smbd/process_prefork.c
index 35513bc7118..762f32aa977 100644
--- a/source4/smbd/process_prefork.c
+++ b/source4/smbd/process_prefork.c
@@ -534,6 +534,7 @@ static void prefork_new_task(
static void prefork_terminate(struct tevent_context *ev,
struct loadparm_context *lp_ctx,
const char *reason,
+ bool fatal,
void *process_context)
{
DBG_DEBUG("called with reason[%s]\n", reason);
diff --git a/source4/smbd/process_single.c b/source4/smbd/process_single.c
index 174c1572812..061f09673dd 100644
--- a/source4/smbd/process_single.c
+++ b/source4/smbd/process_single.c
@@ -124,6 +124,7 @@ static void single_new_task(struct tevent_context *ev,
static void single_terminate(struct tevent_context *ev,
struct loadparm_context *lp_ctx,
const char *reason,
+ bool fatal,
void *process_context)
{
DBG_NOTICE("single_terminate: reason[%s]\n",reason);
diff --git a/source4/smbd/process_standard.c b/source4/smbd/process_standard.c
index 91dfa9753c5..536a6be3007 100644
--- a/source4/smbd/process_standard.c
+++ b/source4/smbd/process_standard.c
@@ -513,6 +513,7 @@ static void standard_new_task(struct tevent_context *ev,
static void standard_terminate(struct tevent_context *ev,
struct loadparm_context *lp_ctx,
const char *reason,
+ bool fatal,
void *process_context)
{
struct process_context *proc_ctx = NULL;
diff --git a/source4/smbd/service_stream.c b/source4/smbd/service_stream.c
index fc996d942e6..336a0cb32aa 100644
--- a/source4/smbd/service_stream.c
+++ b/source4/smbd/service_stream.c
@@ -57,6 +57,7 @@ void stream_terminate_connection(struct stream_connection *srv_conn, const char
const struct model_ops *model_ops = srv_conn->model_ops;
struct loadparm_context *lp_ctx = srv_conn->lp_ctx;
void *process_context = srv_conn->process_context;
+ bool fatal = true;
TALLOC_CTX *frame = NULL;
if (!reason) reason = "unknown reason";
@@ -91,7 +92,7 @@ void stream_terminate_connection(struct stream_connection *srv_conn, const char
srv_conn->event.fde = NULL;
imessaging_cleanup(srv_conn->msg_ctx);
TALLOC_FREE(srv_conn);
- model_ops->terminate(event_ctx, lp_ctx, reason, process_context);
+ model_ops->terminate(event_ctx, lp_ctx, reason, fatal, process_context);
TALLOC_FREE(frame);
}
diff --git a/source4/smbd/service_task.c b/source4/smbd/service_task.c
index 15e480ec043..cb1f4d5ad1f 100644
--- a/source4/smbd/service_task.c
+++ b/source4/smbd/service_task.c
@@ -54,8 +54,8 @@ void task_server_terminate(struct task_server *task, const char *reason, bool fa
imessaging_cleanup(task->msg_ctx);
- model_ops->terminate(event_ctx, task->lp_ctx, reason,
- task->process_context);
+ model_ops->terminate(
+ event_ctx, task->lp_ctx, reason, fatal, task->process_context);
/* don't free this above, it might contain the 'reason' being printed */
talloc_free(task);
}
--
2.17.1
From 35e839bbde165396d7dd4c9b1d93fdb222f90f71 Mon Sep 17 00:00:00 2001
From: Gary Lockyer <gary at catalyst.net.nz>
Date: Tue, 4 Sep 2018 12:12:49 +1200
Subject: [PATCH 05/11] source4 smbd prefork: restart on non zero exit code
Restart any pre-fork master or worker process that exits with a non
zero exit code.
Signed-off-by: Gary Lockyer <gary at catalyst.net.nz>
---
source4/kdc/kdc-heimdal.c | 3 +-
source4/smbd/process_model.h | 17 ++++--
source4/smbd/process_prefork.c | 97 ++++++++++++++++++++-------------
source4/smbd/process_single.c | 30 +++++++---
source4/smbd/process_standard.c | 32 +++++++----
source4/smbd/service_stream.c | 4 +-
source4/smbd/service_task.c | 2 +-
7 files changed, 118 insertions(+), 67 deletions(-)
diff --git a/source4/kdc/kdc-heimdal.c b/source4/kdc/kdc-heimdal.c
index 67c07c3d81a..b5de5a790d4 100644
--- a/source4/kdc/kdc-heimdal.c
+++ b/source4/kdc/kdc-heimdal.c
@@ -276,7 +276,8 @@ static NTSTATUS kdc_task_init(struct task_server *task)
return NT_STATUS_INVALID_DOMAIN_ROLE;
case ROLE_DOMAIN_PDC:
case ROLE_DOMAIN_BDC:
- task_server_terminate(task, "Cannot start KDC as a 'classic Samba' DC", true);
+ task_server_terminate(
+ task, "Cannot start KDC as a 'classic Samba' DC", false);
return NT_STATUS_INVALID_DOMAIN_ROLE;
case ROLE_ACTIVE_DIRECTORY_DC:
/* Yes, we want a KDC */
diff --git a/source4/smbd/process_model.h b/source4/smbd/process_model.h
index 2a226cef8cb..2892dd43c04 100644
--- a/source4/smbd/process_model.h
+++ b/source4/smbd/process_model.h
@@ -65,12 +65,17 @@ struct model_ops {
const struct service_details*,
const int);
- /* function to terminate a connection or task */
- void (*terminate)(struct tevent_context *,
- struct loadparm_context *lp_ctx,
- const char *reason,
- bool fatal,
- void *process_context);
+ /* function to terminate a task */
+ void (*terminate_task)(struct tevent_context *,
+ struct loadparm_context *lp_ctx,
+ const char *reason,
+ bool fatal,
+ void *process_context);
+ /* function to terminate a connection */
+ void (*terminate_connection)(struct tevent_context *,
+ struct loadparm_context *lp_ctx,
+ const char *reason,
+ void *process_context);
/* function to set a title for the connection or task */
void (*set_title)(struct tevent_context *, const char *title);
diff --git a/source4/smbd/process_prefork.c b/source4/smbd/process_prefork.c
index 762f32aa977..37b258d75ed 100644
--- a/source4/smbd/process_prefork.c
+++ b/source4/smbd/process_prefork.c
@@ -147,6 +147,33 @@ static void prefork_pipe_handler(struct tevent_context *event_ctx,
exit(0);
}
+static void prefork_restart(struct tevent_context *ev,
+ struct restart_context *rc)
+{
+ if (rc->master != NULL) {
+ DBG_ERR("Restarting [%s] pre-fork master\n", rc->service_name);
+ prefork_new_task(ev,
+ rc->master->lp_ctx,
+ rc->service_name,
+ rc->master->new_task_fn,
+ rc->master->private_data,
+ rc->service_details,
+ rc->from_parent_fd);
+ } else if (rc->worker != NULL) {
+ struct process_details pd = initial_process_details;
+ DBG_ERR("Restarting [%s] pre-fork worker(%d)\n",
+ rc->service_name,
+ rc->worker->instance);
+ pd.instances = rc->worker->instance;
+ prefork_fork_worker(rc->worker->task,
+ ev,
+ rc->worker->ev2,
+ rc->service_details,
+ rc->service_name,
+ rc->worker->control_pipe,
+ &pd);
+ }
+}
/*
handle EOF on the child pipe in the parent, so we know when a
process terminates without using SIGCHLD or waiting on all possible pids.
@@ -176,49 +203,26 @@ static void prefork_child_pipe_handler(struct tevent_context *ev,
DBG_ERR("Parent %d, Child %d terminated, "
"unable to get status code from tfork\n",
getpid(), pid);
+ prefork_restart(ev, rc);
} else if (WIFEXITED(status)) {
status = WEXITSTATUS(status);
DBG_ERR("Parent %d, Child %d exited with status %d\n",
getpid(), pid, status);
+ if (status != 0) {
+ prefork_restart(ev, rc);
+ }
} else if (WIFSIGNALED(status)) {
status = WTERMSIG(status);
DBG_ERR("Parent %d, Child %d terminated with signal %d\n",
getpid(), pid, status);
if (status == SIGABRT || status == SIGBUS || status == SIGFPE ||
status == SIGILL || status == SIGSYS) {
- /*
- * Lets restart the process.
- *
- */
- tfork_destroy(&rc->t);
- if (rc->master != NULL) {
- DBG_ERR("Restarting [%s] pre-fork master\n",
- rc->service_name);
- prefork_new_task(ev,
- rc->master->lp_ctx,
- rc->service_name,
- rc->master->new_task_fn,
- rc->master->private_data,
- rc->service_details,
- rc->from_parent_fd);
- } else if (rc->worker != NULL) {
- struct process_details pd =
- initial_process_details;
- DBG_ERR("Restarting [%s] pre-fork worker(%d)\n",
- rc->service_name,
- rc->worker->instance);
- pd.instances = rc->worker->instance;
- prefork_fork_worker(rc->worker->task,
- ev,
- rc->worker->ev2,
- rc->service_details,
- rc->service_name,
- rc->worker->control_pipe,
- &pd);
- }
+
+ prefork_restart(ev, rc);
}
}
/* tfork allocates tfork structures with malloc */
+ tfork_destroy(&rc->t);
free(rc->t);
TALLOC_FREE(rc);
return;
@@ -529,15 +533,31 @@ static void prefork_new_task(
}
-
-/* called when a task goes down */
-static void prefork_terminate(struct tevent_context *ev,
- struct loadparm_context *lp_ctx,
- const char *reason,
- bool fatal,
- void *process_context)
+/*
+ * called when a task terminates
+ */
+static void prefork_terminate_task(struct tevent_context *ev,
+ struct loadparm_context *lp_ctx,
+ const char *reason,
+ bool fatal,
+ void *process_context)
{
DBG_DEBUG("called with reason[%s]\n", reason);
+ if (fatal == true) {
+ exit(127);
+ } else {
+ exit(0);
+ }
+}
+
+/*
+ * called when a connection completes
+ */
+static void prefork_terminate_connection(struct tevent_context *ev,
+ struct loadparm_context *lp_ctx,
+ const char *reason,
+ void *process_context)
+{
}
/* called to set a title of a task or connection */
@@ -550,7 +570,8 @@ static const struct model_ops prefork_ops = {
.model_init = prefork_model_init,
.accept_connection = prefork_accept_connection,
.new_task = prefork_new_task,
- .terminate = prefork_terminate,
+ .terminate_task = prefork_terminate_task,
+ .terminate_connection = prefork_terminate_connection,
.set_title = prefork_set_title,
};
diff --git a/source4/smbd/process_single.c b/source4/smbd/process_single.c
index 061f09673dd..12ed2e30a74 100644
--- a/source4/smbd/process_single.c
+++ b/source4/smbd/process_single.c
@@ -119,17 +119,28 @@ static void single_new_task(struct tevent_context *ev,
}
}
-
-/* called when a task goes down */
-static void single_terminate(struct tevent_context *ev,
- struct loadparm_context *lp_ctx,
- const char *reason,
- bool fatal,
- void *process_context)
+/*
+ * Called when a task goes down
+ */
+static void single_terminate_task(struct tevent_context *ev,
+ struct loadparm_context *lp_ctx,
+ const char *reason,
+ bool fatal,
+ void *process_context)
{
DBG_NOTICE("single_terminate: reason[%s]\n",reason);
}
+/*
+ * Called when a connection has ended
+ */
+static void single_terminate_connection(struct tevent_context *ev,
+ struct loadparm_context *lp_ctx,
+ const char *reason,
+ void *process_context)
+{
+}
+
/* called to set a title of a task or connection */
static void single_set_title(struct tevent_context *ev, const char *title)
{
@@ -138,9 +149,10 @@ static void single_set_title(struct tevent_context *ev, const char *title)
const struct model_ops single_ops = {
.name = "single",
.model_init = single_model_init,
- .new_task = single_new_task,
+ .new_task = single_new_task,
.accept_connection = single_accept_connection,
- .terminate = single_terminate,
+ .terminate_task = single_terminate_task,
+ .terminate_connection = single_terminate_connection,
.set_title = single_set_title,
};
diff --git a/source4/smbd/process_standard.c b/source4/smbd/process_standard.c
index 536a6be3007..d25538716ba 100644
--- a/source4/smbd/process_standard.c
+++ b/source4/smbd/process_standard.c
@@ -510,15 +510,27 @@ static void standard_new_task(struct tevent_context *ev,
/* called when a task goes down */
-static void standard_terminate(struct tevent_context *ev,
- struct loadparm_context *lp_ctx,
- const char *reason,
- bool fatal,
- void *process_context)
+static void standard_terminate_task(struct tevent_context *ev,
+ struct loadparm_context *lp_ctx,
+ const char *reason,
+ bool fatal,
+ void *process_context)
+{
+ if (fatal == true) {
+ exit(127);
+ }
+ exit(0);
+}
+
+/* called when a connection terminates*/
+static void standard_terminate_connection(struct tevent_context *ev,
+ struct loadparm_context *lp_ctx,
+ const char *reason,
+ void *process_context)
{
struct process_context *proc_ctx = NULL;
- DBG_DEBUG("process terminating reason[%s]\n", reason);
+ DBG_DEBUG("connection terminating reason[%s]\n", reason);
if (process_context == NULL) {
smb_panic("Panicking process_context is NULL");
}
@@ -547,7 +559,6 @@ static void standard_terminate(struct tevent_context *ev,
/* terminate this process */
exit(0);
}
-
/* called to set a title of a task or connection */
static void standard_set_title(struct tevent_context *ev, const char *title)
{
@@ -562,9 +573,10 @@ static const struct model_ops standard_ops = {
.name = "standard",
.model_init = standard_model_init,
.accept_connection = standard_accept_connection,
- .new_task = standard_new_task,
- .terminate = standard_terminate,
- .set_title = standard_set_title,
+ .new_task = standard_new_task,
+ .terminate_task = standard_terminate_task,
+ .terminate_connection = standard_terminate_connection,
+ .set_title = standard_set_title,
};
/*
diff --git a/source4/smbd/service_stream.c b/source4/smbd/service_stream.c
index 336a0cb32aa..d0ad45aca3a 100644
--- a/source4/smbd/service_stream.c
+++ b/source4/smbd/service_stream.c
@@ -57,7 +57,6 @@ void stream_terminate_connection(struct stream_connection *srv_conn, const char
const struct model_ops *model_ops = srv_conn->model_ops;
struct loadparm_context *lp_ctx = srv_conn->lp_ctx;
void *process_context = srv_conn->process_context;
- bool fatal = true;
TALLOC_CTX *frame = NULL;
if (!reason) reason = "unknown reason";
@@ -92,7 +91,8 @@ void stream_terminate_connection(struct stream_connection *srv_conn, const char
srv_conn->event.fde = NULL;
imessaging_cleanup(srv_conn->msg_ctx);
TALLOC_FREE(srv_conn);
- model_ops->terminate(event_ctx, lp_ctx, reason, fatal, process_context);
+ model_ops->terminate_connection(
+ event_ctx, lp_ctx, reason, process_context);
TALLOC_FREE(frame);
}
diff --git a/source4/smbd/service_task.c b/source4/smbd/service_task.c
index cb1f4d5ad1f..d911027db0a 100644
--- a/source4/smbd/service_task.c
+++ b/source4/smbd/service_task.c
@@ -54,7 +54,7 @@ void task_server_terminate(struct task_server *task, const char *reason, bool fa
imessaging_cleanup(task->msg_ctx);
- model_ops->terminate(
+ model_ops->terminate_task(
event_ctx, task->lp_ctx, reason, fatal, task->process_context);
/* don't free this above, it might contain the 'reason' being printed */
talloc_free(task);
--
2.17.1
From 871ebb10c611ff8fa5a5bbc7c431461b4f54d375 Mon Sep 17 00:00:00 2001
From: Gary Lockyer <gary at catalyst.net.nz>
Date: Wed, 5 Sep 2018 07:31:22 +1200
Subject: [PATCH 06/11] source4 smbd prefork: Add backoff to process restart
Add new smbd.conf variables 'prefork backoff increment' and
'prefork maximum backoff' to control the rate at which failed pre-forked
processes are restarted.
Signed-off-by: Gary Lockyer <gary at catalyst.net.nz>
---
.../base/preforkbackoffincrement.xml | 25 ++
.../smbdotconf/base/preforkmaximumbackoff.xml | 13 +
lib/param/loadparm.c | 2 +
source3/param/loadparm.c | 2 +
source4/smbd/process_prefork.c | 378 +++++++++++-------
5 files changed, 270 insertions(+), 150 deletions(-)
create mode 100644 docs-xml/smbdotconf/base/preforkbackoffincrement.xml
create mode 100644 docs-xml/smbdotconf/base/preforkmaximumbackoff.xml
diff --git a/docs-xml/smbdotconf/base/preforkbackoffincrement.xml b/docs-xml/smbdotconf/base/preforkbackoffincrement.xml
new file mode 100644
index 00000000000..2cb1cc32fd8
--- /dev/null
+++ b/docs-xml/smbdotconf/base/preforkbackoffincrement.xml
@@ -0,0 +1,25 @@
+<samba:parameter name="prefork backoff increment"
+ context="G"
+ type="integer"
+ xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
+<description>
+ <para>This option specifies the number of seconds added to the delay
+ before a prefork master or worker process is restarted. The
+ restart is initially zero, the prefork backoff increment is
+ added to the delay on each restart up to the value specified by
+ "prefork maximum backoff".
+ </para>
+
+ <para>Additionally the the backoff for an individual service by using
+ "prefork backoff increment: service name"
+ i.e. "prefork backoff increment:ldap = 2" to set the
+ backoff increment to 2.</para>
+
+ <para>If the backoff increment is 2 and the maximum backoff is 5.
+ There will be a zero second delay for the first restart. A two
+ second delay for the second restart. A four second delay for the
+ third and any subsequent restarts</para>
+</description>
+
+<value type="default">10</value>
+</samba:parameter>
diff --git a/docs-xml/smbdotconf/base/preforkmaximumbackoff.xml b/docs-xml/smbdotconf/base/preforkmaximumbackoff.xml
new file mode 100644
index 00000000000..17e530d5db8
--- /dev/null
+++ b/docs-xml/smbdotconf/base/preforkmaximumbackoff.xml
@@ -0,0 +1,13 @@
+<samba:parameter name="prefork maximum backoff"
+ context="G"
+ type="integer"
+ xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
+<description>
+ <para>This option controls the maximum delay before a failed pre-fork
+ process is restarted.
+ </para>
+
+</description>
+
+<value type="default">120</value>
+</samba:parameter>
diff --git a/lib/param/loadparm.c b/lib/param/loadparm.c
index 484c8913009..a7dbc6f8f0b 100644
--- a/lib/param/loadparm.c
+++ b/lib/param/loadparm.c
@@ -2997,6 +2997,8 @@ struct loadparm_context *loadparm_init(TALLOC_CTX *mem_ctx)
"49152-65535");
lpcfg_do_global_parameter(lp_ctx, "prefork children", "4");
+ lpcfg_do_global_parameter(lp_ctx, "prefork backoff increment", "10");
+ lpcfg_do_global_parameter(lp_ctx, "prefork maximum backoff", "120");
lpcfg_do_global_parameter(lp_ctx, "check parent directory delete on close", "no");
diff --git a/source3/param/loadparm.c b/source3/param/loadparm.c
index 495683083cb..994f9b0ce43 100644
--- a/source3/param/loadparm.c
+++ b/source3/param/loadparm.c
@@ -958,6 +958,8 @@ static void init_globals(struct loadparm_context *lp_ctx, bool reinit_globals)
Globals.rpc_low_port = SERVER_TCP_LOW_PORT;
Globals.rpc_high_port = SERVER_TCP_HIGH_PORT;
Globals.prefork_children = 4;
+ Globals.prefork_backoff_increment = 10;
+ Globals.prefork_maximum_backoff = 120;
/* Now put back the settings that were set with lp_set_cmdline() */
apply_lp_set_cmdline();
diff --git a/source4/smbd/process_prefork.c b/source4/smbd/process_prefork.c
index 37b258d75ed..f963b83afd1 100644
--- a/source4/smbd/process_prefork.c
+++ b/source4/smbd/process_prefork.c
@@ -35,6 +35,8 @@
#include "ldb_wrap.h"
#include "lib/util/tfork.h"
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+
NTSTATUS process_model_prefork_init(void);
static void prefork_new_task(
struct tevent_context *ev,
@@ -51,17 +53,23 @@ static void prefork_new_task(
static void prefork_fork_worker(struct task_server *task,
struct tevent_context *ev,
struct tevent_context *ev2,
+ struct loadparm_context *lp_ctx,
const struct service_details *service_details,
const char *service_name,
int control_pipe[2],
+ unsigned restart_delay,
struct process_details *pd);
+static void prefork_child_pipe_handler(struct tevent_context *ev,
+ struct tevent_fd *fde,
+ uint16_t flags,
+ void *private_data);
+static void setup_handlers(struct tevent_context *ev, int from_parent_fd);
/*
* State needed to restart the master process or a worker process if they
* terminate early.
*/
struct master_restart_context {
- struct loadparm_context *lp_ctx;
struct task_server *(*new_task_fn)(struct tevent_context *,
struct loadparm_context *lp_ctx,
struct server_id,
@@ -78,10 +86,12 @@ struct worker_restart_context {
};
struct restart_context {
+ struct loadparm_context *lp_ctx;
struct tfork *t;
int from_parent_fd;
const struct service_details *service_details;
const char *service_name;
+ unsigned restart_delay;
struct master_restart_context *master;
struct worker_restart_context *worker;
};
@@ -147,18 +157,214 @@ static void prefork_pipe_handler(struct tevent_context *event_ctx,
exit(0);
}
+
+/*
+ * called to create a new server task
+ */
+static void prefork_fork_master(
+ struct tevent_context *ev,
+ struct loadparm_context *lp_ctx,
+ const char *service_name,
+ struct task_server *(*new_task_fn)(struct tevent_context *,
+ struct loadparm_context *lp_ctx,
+ struct server_id,
+ void *,
+ void *),
+ void *private_data,
+ const struct service_details *service_details,
+ unsigned restart_delay,
+ int from_parent_fd)
+{
+ pid_t pid;
+ struct tfork* t = NULL;
+ int i, num_children;
+
+ struct tevent_context *ev2;
+ struct task_server *task = NULL;
+ struct process_details pd = initial_process_details;
+ int control_pipe[2];
+
+ t = tfork_create();
+ if (t == NULL) {
+ smb_panic("failure in tfork\n");
+ }
+
+ DBG_NOTICE("Forking [%s] pre-fork master process\n", service_name);
+ pid = tfork_child_pid(t);
+ if (pid != 0) {
+ struct tevent_fd *fde = NULL;
+ int fd = tfork_event_fd(t);
+ struct restart_context *rc = NULL;
+
+ /* Register a pipe handler that gets called when the prefork
+ * master process terminates.
+ */
+ rc = talloc_zero(ev, struct restart_context);
+ if (rc == NULL) {
+ smb_panic("OOM allocating restart context\n");
+ }
+ rc->t = t;
+ rc->lp_ctx = lp_ctx;
+ rc->service_name = service_name;
+ rc->service_details = service_details;
+ rc->from_parent_fd = from_parent_fd;
+ rc->restart_delay = restart_delay;
+ rc->master = talloc_zero(rc, struct master_restart_context);
+ if (rc->master == NULL) {
+ smb_panic("OOM allocating master restart context\n");
+ }
+
+ rc->master->new_task_fn = new_task_fn;
+ rc->master->private_data = private_data;
+
+ fde = tevent_add_fd(
+ ev, ev, fd, TEVENT_FD_READ, prefork_child_pipe_handler, rc);
+ if (fde == NULL) {
+ smb_panic("Failed to add child pipe handler, "
+ "after fork");
+ }
+ tevent_fd_set_auto_close(fde);
+ return;
+ }
+
+ pid = getpid();
+ setproctitle("task[%s] pre-fork master", service_name);
+
+ /*
+ * this will free all the listening sockets and all state that
+ * is not associated with this new connection
+ */
+ if (tevent_re_initialise(ev) != 0) {
+ smb_panic("Failed to re-initialise tevent after fork");
+ }
+ prefork_reload_after_fork();
+ setup_handlers(ev, from_parent_fd);
+
+ if (service_details->inhibit_pre_fork) {
+ task = new_task_fn(
+ ev, lp_ctx, cluster_id(pid, 0), private_data, NULL);
+ /*
+ * The task does not support pre-fork
+ */
+ if (task != NULL && service_details->post_fork != NULL) {
+ service_details->post_fork(task, &pd);
+ }
+ tevent_loop_wait(ev);
+ TALLOC_FREE(ev);
+ exit(0);
+ }
+
+ /*
+ * This is now the child code. We need a completely new event_context
+ * to work with
+ */
+ ev2 = s4_event_context_init(NULL);
+
+ /* setup this new connection: process will bind to it's sockets etc
+ *
+ * While we can use ev for the child, which has been re-initialised
+ * above we must run the new task under ev2 otherwise the children would
+ * be listening on the sockets. Also we don't want the top level
+ * process accepting and handling requests, it's responsible for
+ * monitoring and controlling the child work processes.
+ */
+ task = new_task_fn(ev2, lp_ctx, cluster_id(pid, 0), private_data, NULL);
+ if (task == NULL) {
+ TALLOC_FREE(ev);
+ TALLOC_FREE(ev2);
+ exit(0);
+ }
+
+ {
+ int default_children;
+ default_children = lpcfg_prefork_children(lp_ctx);
+ num_children = lpcfg_parm_int(lp_ctx, NULL, "prefork children",
+ service_name, default_children);
+ }
+ if (num_children == 0) {
+ DBG_WARNING("Number of pre-fork children for %s is zero, "
+ "NO worker processes will be started for %s\n",
+ service_name, service_name);
+ }
+ DBG_NOTICE("Forking %d %s worker processes\n",
+ num_children, service_name);
+ {
+ int ret;
+ ret = pipe(control_pipe);
+ if (ret != 0) {
+ smb_panic("Unable to create worker control pipe\n");
+ }
+ smb_set_close_on_exec(control_pipe[0]);
+ smb_set_close_on_exec(control_pipe[0]);
+ }
+
+ /*
+ * We are now free to spawn some worker processes
+ */
+ for (i=0; i < num_children; i++) {
+ prefork_fork_worker(task,
+ ev,
+ ev2,
+ lp_ctx,
+ service_details,
+ service_name,
+ control_pipe,
+ 0,
+ &pd);
+ pd.instances++;
+ }
+
+ /* Don't listen on the sockets we just gave to the children */
+ tevent_loop_wait(ev);
+ TALLOC_FREE(ev);
+ /* We need to keep ev2 until we're finished for the messaging to work */
+ TALLOC_FREE(ev2);
+ exit(0);
+
+}
static void prefork_restart(struct tevent_context *ev,
struct restart_context *rc)
{
+ unsigned max_backoff = 10;
+ unsigned backoff = 2;
+ unsigned restart_delay = rc->restart_delay;
+
+ unsigned default_value;
+
+ default_value = lpcfg_prefork_backoff_increment(rc->lp_ctx);
+ backoff = lpcfg_parm_int(rc->lp_ctx,
+ NULL,
+ "prefork backoff increment",
+ rc->service_name,
+ default_value);
+
+ default_value = lpcfg_prefork_maximum_backoff(rc->lp_ctx);
+ max_backoff = lpcfg_parm_int(rc->lp_ctx,
+ NULL,
+ "prefork maximum backoff",
+ rc->service_name,
+ default_value);
+
+ if (restart_delay > 0) {
+ DBG_ERR("Restarting [%s] pre-fork %s in (%d) seconds\n",
+ rc->service_name,
+ (rc->master == NULL) ? "worker" : "master",
+ restart_delay);
+ sleep(restart_delay);
+ }
+ restart_delay += backoff;
+ restart_delay = min(restart_delay, max_backoff);
+
if (rc->master != NULL) {
DBG_ERR("Restarting [%s] pre-fork master\n", rc->service_name);
- prefork_new_task(ev,
- rc->master->lp_ctx,
- rc->service_name,
- rc->master->new_task_fn,
- rc->master->private_data,
- rc->service_details,
- rc->from_parent_fd);
+ prefork_fork_master(ev,
+ rc->lp_ctx,
+ rc->service_name,
+ rc->master->new_task_fn,
+ rc->master->private_data,
+ rc->service_details,
+ restart_delay,
+ rc->from_parent_fd);
} else if (rc->worker != NULL) {
struct process_details pd = initial_process_details;
DBG_ERR("Restarting [%s] pre-fork worker(%d)\n",
@@ -168,9 +374,11 @@ static void prefork_restart(struct tevent_context *ev,
prefork_fork_worker(rc->worker->task,
ev,
rc->worker->ev2,
+ rc->lp_ctx,
rc->service_details,
rc->service_name,
rc->worker->control_pipe,
+ restart_delay,
&pd);
}
}
@@ -309,9 +517,11 @@ static void setup_handlers(struct tevent_context *ev, int from_parent_fd) {
static void prefork_fork_worker(struct task_server *task,
struct tevent_context *ev,
struct tevent_context *ev2,
+ struct loadparm_context *lp_ctx,
const struct service_details *service_details,
const char *service_name,
int control_pipe[2],
+ unsigned restart_delay,
struct process_details *pd)
{
struct tfork *w = NULL;
@@ -333,8 +543,10 @@ static void prefork_fork_worker(struct task_server *task,
smb_panic("OOM allocating restart context\n");
}
rc->t = w;
+ rc->lp_ctx = lp_ctx;
rc->service_name = service_name;
rc->service_details = service_details;
+ rc->restart_delay = restart_delay;
rc->master = NULL;
rc->worker = talloc_zero(rc, struct worker_restart_context);
if (rc->worker == NULL) {
@@ -388,148 +600,14 @@ static void prefork_new_task(
const struct service_details *service_details,
int from_parent_fd)
{
- pid_t pid;
- struct tfork* t = NULL;
- int i, num_children;
-
- struct tevent_context *ev2;
- struct task_server *task = NULL;
- struct process_details pd = initial_process_details;
- int control_pipe[2];
-
- t = tfork_create();
- if (t == NULL) {
- smb_panic("failure in tfork\n");
- }
-
- DBG_NOTICE("Forking [%s] pre-fork master process\n", service_name);
- pid = tfork_child_pid(t);
- if (pid != 0) {
- struct tevent_fd *fde = NULL;
- int fd = tfork_event_fd(t);
- struct restart_context *rc = NULL;
-
- /* Register a pipe handler that gets called when the prefork
- * master process terminates.
- */
- rc = talloc_zero(ev, struct restart_context);
- if (rc == NULL) {
- smb_panic("OOM allocating restart context\n");
- }
- rc->t = t;
- rc->service_name = service_name;
- rc->service_details = service_details;
- rc->from_parent_fd = from_parent_fd;
- rc->master = talloc_zero(rc, struct master_restart_context);
- if (rc->master == NULL) {
- smb_panic("OOM allocating master restart context\n");
- }
-
- rc->master->lp_ctx = lp_ctx;
- rc->master->new_task_fn = new_task_fn;
- rc->master->private_data = private_data;
-
- fde = tevent_add_fd(
- ev, ev, fd, TEVENT_FD_READ, prefork_child_pipe_handler, rc);
- if (fde == NULL) {
- smb_panic("Failed to add child pipe handler, "
- "after fork");
- }
- tevent_fd_set_auto_close(fde);
- return;
- }
-
- pid = getpid();
- setproctitle("task[%s] pre-fork master", service_name);
-
- /*
- * this will free all the listening sockets and all state that
- * is not associated with this new connection
- */
- if (tevent_re_initialise(ev) != 0) {
- smb_panic("Failed to re-initialise tevent after fork");
- }
- prefork_reload_after_fork();
- setup_handlers(ev, from_parent_fd);
-
- if (service_details->inhibit_pre_fork) {
- task = new_task_fn(
- ev, lp_ctx, cluster_id(pid, 0), private_data, NULL);
- /*
- * The task does not support pre-fork
- */
- if (task != NULL && service_details->post_fork != NULL) {
- service_details->post_fork(task, &pd);
- }
- tevent_loop_wait(ev);
- TALLOC_FREE(ev);
- exit(0);
- }
-
- /*
- * This is now the child code. We need a completely new event_context
- * to work with
- */
- ev2 = s4_event_context_init(NULL);
-
- /* setup this new connection: process will bind to it's sockets etc
- *
- * While we can use ev for the child, which has been re-initialised
- * above we must run the new task under ev2 otherwise the children would
- * be listening on the sockets. Also we don't want the top level
- * process accepting and handling requests, it's responsible for
- * monitoring and controlling the child work processes.
- */
- task = new_task_fn(ev2, lp_ctx, cluster_id(pid, 0), private_data, NULL);
- if (task == NULL) {
- TALLOC_FREE(ev);
- TALLOC_FREE(ev2);
- exit(0);
- }
-
- {
- int default_children;
- default_children = lpcfg_prefork_children(lp_ctx);
- num_children = lpcfg_parm_int(lp_ctx, NULL, "prefork children",
- service_name, default_children);
- }
- if (num_children == 0) {
- DBG_WARNING("Number of pre-fork children for %s is zero, "
- "NO worker processes will be started for %s\n",
- service_name, service_name);
- }
- DBG_NOTICE("Forking %d %s worker processes\n",
- num_children, service_name);
- {
- int ret;
- ret = pipe(control_pipe);
- if (ret != 0) {
- smb_panic("Unable to create worker control pipe\n");
- }
- smb_set_close_on_exec(control_pipe[0]);
- smb_set_close_on_exec(control_pipe[0]);
- }
-
- /*
- * We are now free to spawn some worker processes
- */
- for (i=0; i < num_children; i++) {
- prefork_fork_worker(task,
- ev,
- ev2,
- service_details,
- service_name,
- control_pipe,
- &pd);
- pd.instances++;
- }
-
- /* Don't listen on the sockets we just gave to the children */
- tevent_loop_wait(ev);
- TALLOC_FREE(ev);
- /* We need to keep ev2 until we're finished for the messaging to work */
- TALLOC_FREE(ev2);
- exit(0);
+ prefork_fork_master(ev,
+ lp_ctx,
+ service_name,
+ new_task_fn,
+ private_data,
+ service_details,
+ 0,
+ from_parent_fd);
}
--
2.17.1
From 491fab0433abecf843e04c6692c5e89a26837b38 Mon Sep 17 00:00:00 2001
From: Gary Lockyer <gary at catalyst.net.nz>
Date: Fri, 14 Sep 2018 09:43:59 +1200
Subject: [PATCH 07/11] source4 messaging: clean up terminated processes
Now that the smbd pre-fork process model restarts failed processes rather than
terminating, we end up with names registered to defunct processes.
This patch adds a function to clean up all the names registered to a process.
Signed-off-by: Gary Lockyer <gary at catalyst.net.nz>
---
source4/lib/messaging/messaging.c | 42 +++++++++++++++++++++++++++++++
source4/lib/messaging/messaging.h | 2 ++
2 files changed, 44 insertions(+)
diff --git a/source4/lib/messaging/messaging.c b/source4/lib/messaging/messaging.c
index 935951f3fba..413a19445eb 100644
--- a/source4/lib/messaging/messaging.c
+++ b/source4/lib/messaging/messaging.c
@@ -248,6 +248,48 @@ static void imessaging_dgm_recv(struct tevent_context *ev,
/* Keep a list of imessaging contexts */
static struct imessaging_context *msg_ctxs;
+/*
+ * A process has terminated, clean-up any names it has registered.
+ */
+NTSTATUS imessaging_process_cleanup(
+ struct imessaging_context *msg_ctx,
+ pid_t pid)
+{
+ struct irpc_name_records *names = NULL;
+ int i = 0;
+ int j = 0;
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+
+ if (mem_ctx == NULL) {
+ DBG_ERR("OOM unable to clean up messaging for process (%d)\n",
+ pid);
+ return NT_STATUS_NO_MEMORY;
+ }
+
+ names = irpc_all_servers(msg_ctx, mem_ctx);
+ if (names == NULL) {
+ TALLOC_FREE(mem_ctx);
+ return NT_STATUS_OK;
+ }
+ for (i = 0; i < names->num_records; i++) {
+ for (j = 0; j < names->names[i]->count; j++) {
+ if (names->names[i]->ids[j].pid == pid) {
+ int ret = server_id_db_prune_name(
+ msg_ctx->names,
+ names->names[i]->name,
+ names->names[i]->ids[j]);
+ if (ret != 0 && ret != ENOENT) {
+ TALLOC_FREE(mem_ctx);
+ return map_nt_error_from_unix_common(
+ ret);
+ }
+ }
+ }
+ }
+ TALLOC_FREE(mem_ctx);
+ return NT_STATUS_OK;
+}
+
static int imessaging_context_destructor(struct imessaging_context *msg)
{
DLIST_REMOVE(msg_ctxs, msg);
diff --git a/source4/lib/messaging/messaging.h b/source4/lib/messaging/messaging.h
index 668464dc061..8eb195dfb2e 100644
--- a/source4/lib/messaging/messaging.h
+++ b/source4/lib/messaging/messaging.h
@@ -54,5 +54,7 @@ NTSTATUS imessaging_send_ptr(struct imessaging_context *msg, struct server_id se
uint32_t msg_type, void *ptr);
void imessaging_deregister(struct imessaging_context *msg, uint32_t msg_type, void *private_data);
struct server_id imessaging_get_server_id(struct imessaging_context *msg_ctx);
+NTSTATUS imessaging_process_cleanup(struct imessaging_context *msg_ctx,
+ pid_t pid);
#endif
--
2.17.1
From 6190b4730f91e0d200a8bfad3c4e13850f83bc03 Mon Sep 17 00:00:00 2001
From: Gary Lockyer <gary at catalyst.net.nz>
Date: Fri, 14 Sep 2018 09:45:38 +1200
Subject: [PATCH 08/11] source4 smbd prefork: Cleanup messaging on restart
Clean up names registered in messaging for a terminated process.
Signed-off-by: Gary Lockyer <gary at catalyst.net.nz>
---
selftest/knownfail.d/prefork_restart | 8 ---
source4/smbd/process_prefork.c | 104 ++++++++++++++++++++++++---
2 files changed, 95 insertions(+), 17 deletions(-)
delete mode 100644 selftest/knownfail.d/prefork_restart
diff --git a/selftest/knownfail.d/prefork_restart b/selftest/knownfail.d/prefork_restart
deleted file mode 100644
index 020199cd9b4..00000000000
--- a/selftest/knownfail.d/prefork_restart
+++ /dev/null
@@ -1,8 +0,0 @@
-^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_ldap_all_workers_restart
-^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_ldap_master_restart
-^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_ldap_worker_restart
-^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_master_restart_backoff
-^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_rpc_all_workers_restart
-^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_rpc_master_restart
-^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_rpc_worker_zero_restart
-^samba.tests.prefork_restart.samba.tests.prefork_restart.PreforkProcessRestartTests.test_worker_restart_backoff
diff --git a/source4/smbd/process_prefork.c b/source4/smbd/process_prefork.c
index f963b83afd1..f892f7f10d8 100644
--- a/source4/smbd/process_prefork.c
+++ b/source4/smbd/process_prefork.c
@@ -34,6 +34,7 @@
#include "param/param.h"
#include "ldb_wrap.h"
#include "lib/util/tfork.h"
+#include "lib/messaging/irpc.h"
#define min(a, b) (((a) < (b)) ? (a) : (b))
@@ -63,7 +64,9 @@ static void prefork_child_pipe_handler(struct tevent_context *ev,
struct tevent_fd *fde,
uint16_t flags,
void *private_data);
-static void setup_handlers(struct tevent_context *ev, int from_parent_fd);
+static void setup_handlers(struct tevent_context *ev,
+ struct loadparm_context *lp_ctx,
+ int from_parent_fd);
/*
* State needed to restart the master process or a worker process if they
@@ -120,7 +123,7 @@ static void sigterm_signal_handler(struct tevent_context *ev,
}
#endif
DBG_NOTICE("Exiting pid %d on SIGTERM\n", getpid());
- talloc_free(ev);
+ TALLOC_FREE(ev);
exit(127);
}
@@ -143,6 +146,40 @@ static void prefork_reload_after_fork(void)
}
}
+/*
+ * clean up any messaging associated with the old process.
+ *
+ */
+static void irpc_cleanup(
+ struct loadparm_context *lp_ctx,
+ struct tevent_context *ev,
+ pid_t pid)
+{
+ TALLOC_CTX *mem_ctx = talloc_new(NULL);
+ struct imessaging_context *msg_ctx = NULL;
+ NTSTATUS status = NT_STATUS_OK;
+
+ if (mem_ctx == NULL) {
+ DBG_ERR("OOM cleaning up irpc\n");
+ return;
+ }
+ msg_ctx = imessaging_client_init(mem_ctx, lp_ctx, ev);
+ if (msg_ctx == NULL) {
+ DBG_ERR("Unable to create imessaging_context\n");
+ TALLOC_FREE(mem_ctx);
+ return;
+ }
+ status = imessaging_process_cleanup(msg_ctx, pid);
+ if (!NT_STATUS_IS_OK(status)) {
+ DBG_ERR("imessaging_process_cleanup returned (%s)\n",
+ nt_errstr(status));
+ TALLOC_FREE(mem_ctx);
+ return;
+ }
+
+ TALLOC_FREE(mem_ctx);
+}
+
/*
handle EOF on the parent-to-all-children pipe in the child
*/
@@ -150,10 +187,23 @@ static void prefork_pipe_handler(struct tevent_context *event_ctx,
struct tevent_fd *fde, uint16_t flags,
void *private_data)
{
- /* free the fde which removes the event and stops it firing again */
+ struct loadparm_context *lp_ctx = NULL;
+ pid_t pid;
+
+ /*
+ * free the fde which removes the event and stops it firing again
+ */
TALLOC_FREE(fde);
+
+ /*
+ * Clean up any irpc end points this process had.
+ */
+ pid = getpid();
+ lp_ctx = talloc_get_type_abort(private_data, struct loadparm_context);
+ irpc_cleanup(lp_ctx, event_ctx, pid);
+
DBG_NOTICE("Child %d exiting\n", getpid());
- talloc_free(event_ctx);
+ TALLOC_FREE(event_ctx);
exit(0);
}
@@ -238,7 +288,7 @@ static void prefork_fork_master(
smb_panic("Failed to re-initialise tevent after fork");
}
prefork_reload_after_fork();
- setup_handlers(ev, from_parent_fd);
+ setup_handlers(ev, lp_ctx, from_parent_fd);
if (service_details->inhibit_pre_fork) {
task = new_task_fn(
@@ -272,7 +322,23 @@ static void prefork_fork_master(
if (task == NULL) {
TALLOC_FREE(ev);
TALLOC_FREE(ev2);
- exit(0);
+ exit(127);
+ }
+
+ /*
+ * Register an irpc name that can be used by the samba-tool processes
+ * command
+ */
+ {
+ struct talloc_ctx *ctx = talloc_new(NULL);
+ char *name = NULL;
+ if (ctx == NULL) {
+ DBG_ERR("Out of memory");
+ exit(127);
+ }
+ name = talloc_asprintf(ctx, "prefork-master-%s", service_name);
+ irpc_add_name(task->msg_ctx, name);
+ TALLOC_FREE(ctx);
}
{
@@ -406,6 +472,8 @@ static void prefork_child_pipe_handler(struct tevent_context *ev,
rc = talloc_get_type_abort(private_data, struct restart_context);
pid = tfork_child_pid(rc->t);
errno = 0;
+
+ irpc_cleanup(rc->lp_ctx, ev, pid);
status = tfork_status(&rc->t, false);
if (status == -1) {
DBG_ERR("Parent %d, Child %d terminated, "
@@ -483,12 +551,16 @@ static void prefork_accept_connection(
private_data, process_context);
}
-static void setup_handlers(struct tevent_context *ev, int from_parent_fd) {
+static void setup_handlers(
+ struct tevent_context *ev,
+ struct loadparm_context *lp_ctx,
+ int from_parent_fd)
+{
struct tevent_fd *fde = NULL;
struct tevent_signal *se = NULL;
fde = tevent_add_fd(ev, ev, from_parent_fd, TEVENT_FD_READ,
- prefork_pipe_handler, NULL);
+ prefork_pipe_handler, lp_ctx);
if (fde == NULL) {
smb_panic("Failed to add fd handler after fork");
}
@@ -567,7 +639,7 @@ static void prefork_fork_worker(struct task_server *task,
tevent_fd_set_auto_close(fde);
} else {
close(control_pipe[1]);
- setup_handlers(ev2, control_pipe[0]);
+ setup_handlers(ev2, lp_ctx, control_pipe[0]);
/*
* tfork uses malloc
*/
@@ -581,6 +653,19 @@ static void prefork_fork_worker(struct task_server *task,
if (service_details->post_fork != NULL) {
service_details->post_fork(task, pd);
}
+ {
+ struct talloc_ctx *ctx = talloc_new(NULL);
+ char *name = NULL;
+ if (ctx == NULL) {
+ smb_panic("OOM allocating talloc context\n");
+ }
+ name = talloc_asprintf(ctx,
+ "prefork-worker-%s-%d",
+ service_name,
+ pd->instances);
+ irpc_add_name(task->msg_ctx, name);
+ TALLOC_FREE(ctx);
+ }
tevent_loop_wait(ev2);
talloc_free(ev2);
exit(0);
@@ -621,6 +706,7 @@ static void prefork_terminate_task(struct tevent_context *ev,
void *process_context)
{
DBG_DEBUG("called with reason[%s]\n", reason);
+ TALLOC_FREE(ev);
if (fatal == true) {
exit(127);
} else {
--
2.17.1
From 8ebc53b9e083e5a9e520621dbe828d68281c3bc7 Mon Sep 17 00:00:00 2001
From: Gary Lockyer <gary at catalyst.net.nz>
Date: Tue, 11 Sep 2018 07:38:06 +1200
Subject: [PATCH 09/11] source4 dcerpc_server: remove irpc_add_name
Remove the irpc_add_name from dcesrv_sock_accept, as it results in two
identical names being registered for a process.
Signed-off-by: Gary Lockyer <gary at catalyst.net.nz>
---
source4/rpc_server/dcerpc_server.c | 2 --
1 file changed, 2 deletions(-)
diff --git a/source4/rpc_server/dcerpc_server.c b/source4/rpc_server/dcerpc_server.c
index b900623b627..7949c66323a 100644
--- a/source4/rpc_server/dcerpc_server.c
+++ b/source4/rpc_server/dcerpc_server.c
@@ -2723,8 +2723,6 @@ static void dcesrv_sock_accept(struct stream_connection *srv_conn)
srv_conn->private_data = dcesrv_conn;
- irpc_add_name(srv_conn->msg_ctx, "rpc_server");
-
subreq = dcerpc_read_ncacn_packet_send(dcesrv_conn,
dcesrv_conn->event_ctx,
dcesrv_conn->stream);
--
2.17.1
From 376f90f3129bc2094243347ba132d2124c5f4209 Mon Sep 17 00:00:00 2001
From: Gary Lockyer <gary at catalyst.net.nz>
Date: Fri, 14 Sep 2018 09:38:56 +1200
Subject: [PATCH 10/11] samba-tool processes: display pre-fork masters and
workers
Tag prefork work processes with "(worker 0)", and sort the process list
on server name to get a consistent order.
Service: PID
--------------------------------------
cldap_server 15588
...
ldap_server 15584
ldap_server(worker 0) 15627
ldap_server(worker 1) 15630
ldap_server(worker 2) 15632
ldap_server(worker 3) 15634
nbt_server 15576
notify-daemon 15638
...
samba 0
...
wrepl_server 15580
Signed-off-by: Gary Lockyer <gary at catalyst.net.nz>
---
python/samba/netcmd/processes.py | 75 +++++++++++++++++++++++++++++---
1 file changed, 69 insertions(+), 6 deletions(-)
diff --git a/python/samba/netcmd/processes.py b/python/samba/netcmd/processes.py
index d04a548abd7..0406b1859ca 100644
--- a/python/samba/netcmd/processes.py
+++ b/python/samba/netcmd/processes.py
@@ -49,6 +49,42 @@ class cmd_processes(Command):
takes_args = []
+ #
+ # Get details of the samba services currently registered in irpc
+ # The prefork process model registers names in the form:
+ # prefork-master-<service> and prefork-worker-<service>-<instance>
+ #
+ # To allow this routine to identify pre-fork master and worker process
+ #
+ # returns a tuple (filtered, masters, workers)
+ #
+ # filtered - is a list of services with the prefork-* removed
+ # masters - dictionary keyed on service name of prefork master processes
+ # workers - dictionary keyed on service name containing an ordered list
+ # of worker processes.
+ def get_service_data(self, msg_ctx):
+ services = msg_ctx.irpc_all_servers()
+ filtered = []
+ masters = {}
+ workers = {}
+ for service in services:
+ for id in service.ids:
+ if service.name.startswith("prefork-master"):
+ ns = service.name.split("-")
+ name = ns[2] + "_server"
+ masters[name] = service.ids[0].pid
+ elif service.name.startswith("prefork-worker"):
+ ns = service.name.split("-")
+ name = ns[2] + "_server"
+ instance = int(ns[3])
+ pid = service.ids[0].pid
+ if name not in workers:
+ workers[name] = {}
+ workers[name][instance] = (instance, pid)
+ else:
+ filtered.append(service)
+ return (filtered, masters, workers)
+
def run(self, sambaopts, versionopts, section_name=None,
name=None, pid=None):
@@ -72,9 +108,36 @@ class cmd_processes(Command):
if server_id.pid == int(pid):
self.outf.write("%s\n" % name.name)
else:
- names = msg_ctx.irpc_all_servers()
- self.outf.write(" Service: PID \n")
- self.outf.write("-----------------------------\n")
- for name in names:
- for server_id in name.ids:
- self.outf.write("%-16s %6d\n" % (name.name, server_id.pid))
+ seen = {} # Service entries already printed, service names can
+ # be registered multiple times against a process
+ # but we should only display them once.
+ prefork = {} # Services running in the prefork process model
+ # want to ensure that the master process and workers
+ # are grouped to together.
+ (services, masters, workers) = self.get_service_data(msg_ctx)
+ self.outf.write(" Service: PID\n")
+ self.outf.write("--------------------------------------\n")
+
+ for service in sorted(services, key=lambda x: x.name):
+ if service.name in masters:
+ # If this service is running in a pre-forked process we
+ # want to print the master process followed by all the
+ # worker processes
+ pid = masters[service.name]
+ if pid not in prefork:
+ prefork[pid] = True
+ self.outf.write("%-26s %6d\n" %
+ (service.name, pid))
+ if service.name in workers:
+ ws = workers[service.name]
+ for w in ws:
+ (instance, pid) = ws[w]
+ sn = "{0}(worker {1})".format(
+ service.name, instance)
+ self.outf.write("%-26s %6d\n" % (sn, pid))
+ else:
+ for server_id in service.ids:
+ if (service.name, server_id.pid) not in seen:
+ self.outf.write("%-26s %6d\n"
+ % (service.name, server_id.pid))
+ seen[(service.name, server_id.pid)] = True
--
2.17.1
From 2284563b416e083cab935dc043343ccf04c1c25c Mon Sep 17 00:00:00 2001
From: Gary Lockyer <gary at catalyst.net.nz>
Date: Wed, 19 Sep 2018 15:25:02 +1200
Subject: [PATCH 11/11] WHATSNEW: prefork restart
Signed-off-by: Gary Lockyer <gary at catalyst.net.nz>
---
WHATSNEW.txt | 21 +++++++++++++++++++--
1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/WHATSNEW.txt b/WHATSNEW.txt
index 98ada34e5c6..b6243c0beb0 100644
--- a/WHATSNEW.txt
+++ b/WHATSNEW.txt
@@ -45,6 +45,7 @@ netlogon prefork
DCERPC now supports pre-forked NETLOGON processes. The netlogon processes are
pre-forked when the prefork process model is selected for samba.
+<<<<<<< HEAD
Offline domain backups
----------------------
@@ -63,6 +64,18 @@ information about how the users are spread across groups in your domain.
The 'samba-tool group list --verbose' command has also been updated to include
the number of users in each group.
+prefork process restart
+-----------------------
+
+The pre-fork process model now restarts failed processes. The delay between
+restart attempts is controlled by the "prefork backoff increment" (default = 10)
+and "prefork maximum backoff" (default = 120) smbd.conf parameters. A linear
+back off strategy is used with "prefork backoff increment" added to the
+delay between restart attempts up until it reaches "prefork maximum backoff".
+
+Using the default sequence the restart delays (in seconds) are:
+ 0, 10, 20, ..., 120, 120, ...
+
REMOVED FEATURES
================
@@ -75,8 +88,12 @@ The samba_backup script has been removed. This has now been replaced by the
smb.conf changes
================
- Parameter Name Description Default
- -------------- ----------- -------
+ Parameter Name Description Default
+ -------------- ----------- -------
+ prefork backoff increment Delay added to process restart 10 (seconds)
+ between attempts.
+ prefork maximum backoff Maximum delay for process between 120 (seconds)
+ process restart attempts
KNOWN ISSUES
--
2.17.1
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 488 bytes
Desc: OpenPGP digital signature
URL: <http://lists.samba.org/pipermail/samba-technical/attachments/20181120/b9abf432/signature-0001.sig>
More information about the samba-technical
mailing list