[PATCH] Offline backup could be missing replUpToDateVector

Tim Beale timbeale at catalyst.net.nz
Mon Nov 12 04:11:20 UTC 2018


We noticed that when you take an offline backup of a singleton DC, the
replUpToDateVector can be missing. This means that replication
propagation dampening doesn't kick in for any objects created on the
original DC (i.e. the objects created by the singleton DC *always* get
replicated, *every* time there's a new DRS connection between 2 DCs).

The attached patch-set avoids the problem by flushing the
replUpToDateVector when we restore an offline backup.

CI pass: https://gitlab.com/catalyst-samba/samba/pipelines/35953778

Review appreciated. Thanks.

-------------- next part --------------
From 1b98444d01db648b0833fc27f42178da96aa6732 Mon Sep 17 00:00:00 2001
From: Tim Beale <timbeale at catalyst.net.nz>
Date: Thu, 8 Nov 2018 12:20:30 +1300
Subject: [PATCH 1/4] tests: Add assertion that replUpToDateVector is present
 after backup

We noticed that offline backups were missing a replUpToDateVector for
the original DC, if the backup was taken on a singleton DC. This patch
adds an assertion to the existing test-cases to highlight the problem.

Signed-off-by: Tim Beale <timbeale at catalyst.net.nz>
---
 python/samba/tests/domain_backup.py | 31 ++++++++++++++++++++++++++-----
 selftest/knownfail.d/domain_backup  |  3 +++
 2 files changed, 29 insertions(+), 5 deletions(-)
 create mode 100644 selftest/knownfail.d/domain_backup

diff --git a/python/samba/tests/domain_backup.py b/python/samba/tests/domain_backup.py
index 320ff1f..e9fcd31 100644
--- a/python/samba/tests/domain_backup.py
+++ b/python/samba/tests/domain_backup.py
@@ -28,6 +28,7 @@ from samba import Ldb, dn_from_dns_name
 from samba.netcmd.fsmo import get_fsmo_roleowner
 import re
 from samba import sites
+from samba.dsdb import _dsdb_load_udv_v2
 
 
 def get_prim_dom(secrets_path, lp):
@@ -64,22 +65,39 @@ class DomainBackupBase(SambaToolCmdTest, TestCaseInTempDir):
         self.backend = backend
         self.base_cmd += ["--backend-store=" + backend]
 
+    def get_expected_partitions(self, samdb):
+        basedn = str(samdb.get_default_basedn())
+        config_dn = "CN=Configuration,%s" % basedn
+        return [basedn, config_dn, "CN=Schema,%s" % config_dn,
+                "DC=DomainDnsZones,%s" % basedn,
+                "DC=ForestDnsZones,%s" % basedn]
+
     def assert_partitions_present(self, samdb):
         """Asserts all expected partitions are present in the backup samdb"""
         res = samdb.search(base="", scope=ldb.SCOPE_BASE,
                            attrs=['namingContexts'])
         actual_ncs = [str(r) for r in res[0].get('namingContexts')]
 
-        basedn = str(samdb.get_default_basedn())
-        config_dn = "CN=Configuration,%s" % basedn
-        expected_ncs = [basedn, config_dn, "CN=Schema,%s" % config_dn,
-                        "DC=DomainDnsZones,%s" % basedn,
-                        "DC=ForestDnsZones,%s" % basedn]
+        expected_ncs = self.get_expected_partitions(samdb)
 
         for nc in expected_ncs:
             self.assertTrue(nc in actual_ncs,
                             "%s not in %s" % (nc, str(actual_ncs)))
 
+    def assert_repl_uptodate_vector(self, samdb):
+        """Asserts an replUpToDateVector entry exists for the original DC"""
+        orig_invoc_id = self.ldb.get_invocation_id()
+        expected_ncs = self.get_expected_partitions(samdb)
+
+        # loop through the partitions and check the upToDateness vector
+        for nc in expected_ncs:
+            found = False
+            for cursor in _dsdb_load_udv_v2(samdb, nc):
+                if orig_invoc_id == str(cursor.source_dsa_invocation_id):
+                    found = True
+                    break
+            self.assertTrue(found, "Couldn't find UDTV for original DC")
+
     def assert_dcs_present(self, samdb, expected_server, expected_count=None):
         """Checks that the expected server is present in the restored DB"""
         search_expr = "(&(objectClass=Server)(serverReference=*))"
@@ -296,6 +314,9 @@ class DomainBackupBase(SambaToolCmdTest, TestCaseInTempDir):
         self.assert_dcs_present(samdb, self.new_server, expected_count=1)
         self.assert_fsmo_roles(samdb, self.new_server, self.server)
         self.assert_secrets(samdb, expect_secrets=expect_secrets)
+
+        # check we still have an uptodateness vector for the original DC
+        self.assert_repl_uptodate_vector(samdb)
         return samdb
 
     def assert_user_secrets(self, samdb, username, expect_secrets):
diff --git a/selftest/knownfail.d/domain_backup b/selftest/knownfail.d/domain_backup
new file mode 100644
index 0000000..0484a92
--- /dev/null
+++ b/selftest/knownfail.d/domain_backup
@@ -0,0 +1,3 @@
+samba.tests.domain_backup.samba.tests.domain_backup.DomainBackupOffline.test_backup_restore\(ad_dc:local\)
+samba.tests.domain_backup.samba.tests.domain_backup.DomainBackupOffline.test_backup_restore_into_site\(ad_dc:local\)
+samba.tests.domain_backup.samba.tests.domain_backup.DomainBackupOffline.test_backup_restore_with_conf\(ad_dc:local\)
-- 
2.7.4


From 2315db83fe74605ff2b07874f76abc2e509cd86c Mon Sep 17 00:00:00 2001
From: Tim Beale <timbeale at catalyst.net.nz>
Date: Thu, 8 Nov 2018 16:41:52 +1300
Subject: [PATCH 2/4] netcmd: Add backupType marker to backed-up DB

We are starting to hit restore cases that are only applicable to a
particular type of backup. We already had a marker to differentiate
renames, but differentiating offline backups would also be useful.

Note that this raises a slight compatibility issue for backups created
on v4.9, as the marker won't exist. However, it's only offline backups
we will use this marker for (at the moment), and this option doesn't
exist on v4.9, so there's no problem.

Removing the markers has been refactored out into a separate function to
handle the optional presence of the new marker.

Signed-off-by: Tim Beale <timbeale at catalyst.net.nz>
---
 python/samba/netcmd/domain_backup.py | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/python/samba/netcmd/domain_backup.py b/python/samba/netcmd/domain_backup.py
index 8b8ecda..fd3ab32 100644
--- a/python/samba/netcmd/domain_backup.py
+++ b/python/samba/netcmd/domain_backup.py
@@ -261,6 +261,7 @@ class cmd_domain_backup_online(samba.netcmd.Command):
         time_str = get_timestamp()
         add_backup_marker(samdb, "backupDate", time_str)
         add_backup_marker(samdb, "sidForRestore", new_sid)
+        add_backup_marker(samdb, "backupType", "online")
 
         # ensure the admin user always has a password set (same as provision)
         if no_secrets:
@@ -380,6 +381,25 @@ class cmd_domain_backup_restore(cmd_fsmo_seize):
 
         return sitename
 
+    def remove_backup_markers(self, samdb):
+        """Remove DB markers added by the backup process"""
+
+        # check what markers we need to remove (this may vary)
+        markers = ['sidForRestore', 'backupRename', 'backupDate', 'backupType']
+        res = samdb.search(base=ldb.Dn(samdb, "@SAMBA_DSDB"),
+                           scope=ldb.SCOPE_BASE,
+                           attrs=markers)
+
+        # remove any markers that exist in the DB
+        m = ldb.Message()
+        m.dn = ldb.Dn(samdb, "@SAMBA_DSDB")
+
+        for attr in markers:
+            if attr in res[0]:
+                m[attr] = ldb.MessageElement([], ldb.FLAG_MOD_DELETE, attr)
+
+        samdb.modify(m)
+
     def run(self, sambaopts=None, credopts=None, backup_file=None,
             targetdir=None, newservername=None, host_ip=None, host_ip6=None,
             site=None):
@@ -553,16 +573,7 @@ class cmd_domain_backup_restore(cmd_fsmo_seize):
         self.fix_old_dc_references(samdb)
 
         # Remove DB markers added by the backup process
-        m = ldb.Message()
-        m.dn = ldb.Dn(samdb, "@SAMBA_DSDB")
-        m["backupDate"] = ldb.MessageElement([], ldb.FLAG_MOD_DELETE,
-                                             "backupDate")
-        m["sidForRestore"] = ldb.MessageElement([], ldb.FLAG_MOD_DELETE,
-                                                "sidForRestore")
-        if is_rename:
-            m["backupRename"] = ldb.MessageElement([], ldb.FLAG_MOD_DELETE,
-                                                   "backupRename")
-        samdb.modify(m)
+        self.remove_backup_markers(samdb)
 
         logger.info("Backup file successfully restored to %s" % targetdir)
         logger.info("Please check the smb.conf settings are correct before "
@@ -789,6 +800,7 @@ class cmd_domain_backup_rename(samba.netcmd.Command):
         add_backup_marker(samdb, "backupDate", time_str)
         add_backup_marker(samdb, "sidForRestore", new_sid)
         add_backup_marker(samdb, "backupRename", old_realm)
+        add_backup_marker(samdb, "backupType", "rename")
 
         # fix up the DNS objects that are using the old dnsRoot value
         self.update_dns_root(logger, samdb, old_realm, delete_old_dns)
@@ -991,6 +1003,7 @@ class cmd_domain_backup_offline(samba.netcmd.Command):
         time_str = get_timestamp()
         add_backup_marker(samdb, "backupDate", time_str)
         add_backup_marker(samdb, "sidForRestore", sid)
+        add_backup_marker(samdb, "backupType", "offline")
 
         # Now handle all the LDB and TDB files that are not linked to
         # anything else.  Use transactions for LDBs.
-- 
2.7.4


From d2f973bcfbf5311239bc02bd4feef555ab72307e Mon Sep 17 00:00:00 2001
From: Tim Beale <timbeale at catalyst.net.nz>
Date: Thu, 8 Nov 2018 17:07:08 +1300
Subject: [PATCH 3/4] netcmd: Small backup refactor to avoid compatiblity
 problems

It will be easy to forget that the backupType marker doesn't exist on
v4.9. However, this seems like a dumb reason not to support v4.9
backup-files. Add a wrapper function to avoid potential problems
cropping up in future.

Signed-off-by: Tim Beale <timbeale at catalyst.net.nz>
---
 python/samba/netcmd/domain_backup.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/python/samba/netcmd/domain_backup.py b/python/samba/netcmd/domain_backup.py
index fd3ab32..4518345 100644
--- a/python/samba/netcmd/domain_backup.py
+++ b/python/samba/netcmd/domain_backup.py
@@ -400,6 +400,23 @@ class cmd_domain_backup_restore(cmd_fsmo_seize):
 
         samdb.modify(m)
 
+    def get_backup_type(self, samdb):
+        res = samdb.search(base=ldb.Dn(samdb, "@SAMBA_DSDB"),
+                           scope=ldb.SCOPE_BASE,
+                           attrs=['backupRename', 'backupType'])
+
+        # note that the backupType marker won't exist on backups created on
+        # v4.9. However, we can still infer the type, as only rename and
+        # online backups are supported on v4.9
+        if 'backupType' in res[0]:
+            backup_type = str(res[0]['backupType'])
+        elif 'backupRename' in res[0]:
+            backup_type = "rename"
+        else:
+            backup_type = "online"
+
+        return backup_type
+
     def run(self, sambaopts=None, credopts=None, backup_file=None,
             targetdir=None, newservername=None, host_ip=None, host_ip6=None,
             site=None):
@@ -445,6 +462,7 @@ class cmd_domain_backup_restore(cmd_fsmo_seize):
         private_dir = os.path.join(targetdir, 'private')
         samdb_path = os.path.join(private_dir, 'sam.ldb')
         samdb = SamDB(url=samdb_path, session_info=system_session(), lp=lp)
+        backup_type = self.get_backup_type(samdb)
 
         if site is None:
             # There's no great way to work out the correct site to add the
@@ -480,8 +498,7 @@ class cmd_domain_backup_restore(cmd_fsmo_seize):
         # Get the SID saved by the backup process and create account
         res = samdb.search(base=ldb.Dn(samdb, "@SAMBA_DSDB"),
                            scope=ldb.SCOPE_BASE,
-                           attrs=['sidForRestore', 'backupRename'])
-        is_rename = True if 'backupRename' in res[0] else False
+                           attrs=['sidForRestore'])
         sid = res[0].get('sidForRestore')[0]
         logger.info('Creating account with SID: ' + str(sid))
         ctx.join_add_objects(specified_sid=dom_sid(str(sid)))
@@ -497,7 +514,7 @@ class cmd_domain_backup_restore(cmd_fsmo_seize):
         # if we renamed the backed-up domain, then we need to add the DNS
         # objects for the new realm (we do this in the restore, now that we
         # know the new DC's IP address)
-        if is_rename:
+        if backup_type == "rename":
             self.register_dns_zone(logger, samdb, lp, ctx.ntds_guid,
                                    host_ip, host_ip6, site)
 
-- 
2.7.4


From 506ac203b2b9d3416d97a7bd4c17e3ddc8e6c629 Mon Sep 17 00:00:00 2001
From: Tim Beale <timbeale at catalyst.net.nz>
Date: Thu, 8 Nov 2018 17:34:26 +1300
Subject: [PATCH 4/4] netcmd: Flush replUpToDateVector when restoring offline
 backup

The replUpToDateVector could be incorrect after an offline backup was
restored. This means replication propagation dampening doesn't work
properly. In the worst case, a singleton DC would have no
replUpToDateVector at all, and so *all* objects created on that DC get
replicated every time a new DRS connection is established between 2 DCs.
This becomes a real problem if you used that singleton DC to create 100K
objects...

This patch flushes the replUpToDateVector when an offline backup gets
restored. We need to do this before we add in the new DC and remove the
old DCs.

Note that this is only a problem for offline backups. The online/rename
backups are received over DRS, and as part of the replication they
receive the latest replUpToDateVector from the DC being backed up.

Signed-off-by: Tim Beale <timbeale at catalyst.net.nz>
---
 python/samba/netcmd/domain_backup.py | 38 ++++++++++++++++++++++++++++++++----
 selftest/knownfail.d/domain_backup   |  3 ---
 2 files changed, 34 insertions(+), 7 deletions(-)
 delete mode 100644 selftest/knownfail.d/domain_backup

diff --git a/python/samba/netcmd/domain_backup.py b/python/samba/netcmd/domain_backup.py
index 4518345..aa51a7d 100644
--- a/python/samba/netcmd/domain_backup.py
+++ b/python/samba/netcmd/domain_backup.py
@@ -33,7 +33,7 @@ from samba.auth import system_session
 from samba.join import DCJoinContext, join_clone, DCCloneAndRenameContext
 from samba.dcerpc.security import dom_sid
 from samba.netcmd import Option, CommandError
-from samba.dcerpc import misc, security
+from samba.dcerpc import misc, security, drsblobs
 from samba import Ldb
 from . fsmo import cmd_fsmo_seize
 from samba.provision import make_smbconf, DEFAULTSITE
@@ -51,6 +51,8 @@ from samba.mdb_util import mdb_copy
 import errno
 from subprocess import CalledProcessError
 from samba import sites
+from samba.dsdb import _dsdb_load_udv_v2
+from samba.ndr import ndr_pack
 
 
 # work out a SID (based on a free RID) to use when the domain gets restored.
@@ -417,6 +419,26 @@ class cmd_domain_backup_restore(cmd_fsmo_seize):
 
         return backup_type
 
+    def save_uptodate_vectors(self, samdb, partitions):
+        """Ensures the UTDV used by DRS is correct after an offline backup"""
+        for nc in partitions:
+            # load the replUpToDateVector we *should* have
+            utdv = _dsdb_load_udv_v2(samdb, nc)
+
+            # convert it to NDR format and write it into the DB
+            utdv_blob = drsblobs.replUpToDateVectorBlob()
+            utdv_blob.version = 2
+            utdv_blob.ctr.cursors = utdv
+            utdv_blob.ctr.count = len(utdv)
+            new_value = ndr_pack(utdv_blob)
+
+            m = ldb.Message()
+            m.dn = ldb.Dn(samdb, nc)
+            m["replUpToDateVector"] = ldb.MessageElement(new_value,
+                                                         ldb.FLAG_MOD_REPLACE,
+                                                         "replUpToDateVector")
+            samdb.modify(m)
+
     def run(self, sambaopts=None, credopts=None, backup_file=None,
             targetdir=None, newservername=None, host_ip=None, host_ip6=None,
             site=None):
@@ -471,13 +493,21 @@ class cmd_domain_backup_restore(cmd_fsmo_seize):
             site = self.create_default_site(samdb, logger)
             logger.info("Adding new DC to site '{0}'".format(site))
 
-        # Create account using the join_add_objects function in the join object
-        # We need namingContexts, account control flags, and the sid saved by
-        # the backup process.
+        # read the naming contexts out of the DB
         res = samdb.search(base="", scope=ldb.SCOPE_BASE,
                            attrs=['namingContexts'])
         ncs = [str(r) for r in res[0].get('namingContexts')]
 
+        # for offline backups we need to make sure the upToDateness info
+        # contains the invocation-ID and highest-USN of the DC we backed up.
+        # Otherwise replication propagation dampening won't correctly filter
+        # objects created by that DC
+        if backup_type == "offline":
+            self.save_uptodate_vectors(samdb, ncs)
+
+        # Create account using the join_add_objects function in the join object
+        # We need namingContexts, account control flags, and the sid saved by
+        # the backup process.
         creds = credopts.get_credentials(lp)
         ctx = DCJoinContext(logger, creds=creds, lp=lp, site=site,
                             forced_local_samdb=samdb,
diff --git a/selftest/knownfail.d/domain_backup b/selftest/knownfail.d/domain_backup
deleted file mode 100644
index 0484a92..0000000
--- a/selftest/knownfail.d/domain_backup
+++ /dev/null
@@ -1,3 +0,0 @@
-samba.tests.domain_backup.samba.tests.domain_backup.DomainBackupOffline.test_backup_restore\(ad_dc:local\)
-samba.tests.domain_backup.samba.tests.domain_backup.DomainBackupOffline.test_backup_restore_into_site\(ad_dc:local\)
-samba.tests.domain_backup.samba.tests.domain_backup.DomainBackupOffline.test_backup_restore_with_conf\(ad_dc:local\)
-- 
2.7.4



More information about the samba-technical mailing list