[PATCH] Improve DRS performance for large databases

Tim Beale timbeale at catalyst.net.nz
Mon Nov 12 02:45:58 UTC 2018


Attached are some patches that improve DRS performance (link processing)
for large databases.

We usually receive the linked attributes grouped by source object. So if
we're dealing with the same source object as the last link, we don't
need to re-check that the source object is valid. We already made this
change for replmd_process_linked_attribute(), but this patch-set makes
the same change to replmd_store_linked_attributes().

Also, we can cache the recycle-bin state to avoid looking it up multiple
times for every single link we process.

CI pass: https://gitlab.com/catalyst-samba/samba/pipelines/35963856

Review appreciated. Thanks.

-------------- next part --------------
From f7b6341096d93e41bb3029a1a388050c57fcbf54 Mon Sep 17 00:00:00 2001
From: Tim Beale <timbeale at catalyst.net.nz>
Date: Fri, 26 Oct 2018 15:43:33 +1300
Subject: [PATCH 1/5] replmd: Split up replmd_verify_linked_attribute() into
 src/target checks

Refactor replmd_verify_linked_attribute() so we split out the link
attribute source/target checks. This patch should not alter
functionality.

The source object check has been moved out to where
replmd_verify_linked_attribute() was called.

replmd_verify_linked_attribute() has been renamed, as it's now only
checking the link target.

Signed-off-by: Tim Beale <timbeale at catalyst.net.nz>
---
 source4/dsdb/samdb/ldb_modules/repl_meta_data.c | 74 ++++++++++++++-----------
 1 file changed, 43 insertions(+), 31 deletions(-)

diff --git a/source4/dsdb/samdb/ldb_modules/repl_meta_data.c b/source4/dsdb/samdb/ldb_modules/repl_meta_data.c
index 3c7b8b1..5bf8193 100644
--- a/source4/dsdb/samdb/ldb_modules/repl_meta_data.c
+++ b/source4/dsdb/samdb/ldb_modules/repl_meta_data.c
@@ -138,8 +138,16 @@ static int replmd_check_upgrade_links(struct ldb_context *ldb,
 				      struct parsed_dn *dns, uint32_t count,
 				      struct ldb_message_element *el,
 				      const char *ldap_oid);
-static int replmd_verify_linked_attribute(struct replmd_replicated_request *ar,
-					  struct la_entry *la);
+static int replmd_verify_link_target(struct replmd_replicated_request *ar,
+				     TALLOC_CTX *mem_ctx,
+				     struct la_entry *la_entry,
+				     struct ldb_message *src_msg,
+				     const struct dsdb_attribute *attr);
+static int replmd_get_la_entry_source(struct ldb_module *module,
+				      struct la_entry *la_entry,
+				      TALLOC_CTX *mem_ctx,
+				      const struct dsdb_attribute **ret_attr,
+				      struct ldb_message **source_msg);
 static int replmd_set_la_val(TALLOC_CTX *mem_ctx, struct ldb_val *v, struct dsdb_dn *dsdb_dn,
 			     struct dsdb_dn *old_dsdb_dn, const struct GUID *invocation_id,
 			     uint64_t usn, uint64_t local_usn, NTTIME nttime,
@@ -6573,6 +6581,9 @@ static int replmd_store_linked_attributes(struct replmd_replicated_request *ar)
 		talloc_get_type(ldb_module_get_private(module), struct replmd_private);
 	struct la_group *la_group = NULL;
 	struct ldb_context *ldb;
+	TALLOC_CTX *tmp_ctx = NULL;
+	struct ldb_message *src_msg = NULL;
+	const struct dsdb_attribute *attr = NULL;
 
 	ldb = ldb_module_get_ctx(module);
 
@@ -6582,6 +6593,8 @@ static int replmd_store_linked_attributes(struct replmd_replicated_request *ar)
 	for (i = 0; i < ar->objs->linked_attributes_count; i++) {
 		struct la_entry *la_entry;
 
+		tmp_ctx = talloc_new(ar);
+
 		if (replmd_private->la_ctx == NULL) {
 			replmd_private->la_ctx = talloc_new(replmd_private);
 		}
@@ -6604,8 +6617,26 @@ static int replmd_store_linked_attributes(struct replmd_replicated_request *ar)
 		talloc_steal(la_entry->la, la_entry->la->identifier);
 		talloc_steal(la_entry->la, la_entry->la->value.blob);
 
-		ret = replmd_verify_linked_attribute(ar, la_entry);
+		/* verify the source object exists for the link */
+		ret = replmd_get_la_entry_source(module, la_entry, tmp_ctx,
+						 &attr, &src_msg);
 
+		/*
+		 * When we fail to find the source object, the error
+		 * code we pass back here is really important. It flags
+		 * back to the callers to retry this request with
+		 * DRSUAPI_DRS_GET_ANC. This case should never happen
+		 * if we're replicating from a Samba DC, but it is
+		 * needed to talk to a Windows DC
+		 */
+		if (ret == LDB_ERR_NO_SUCH_OBJECT) {
+			WERROR err = WERR_DS_DRA_MISSING_PARENT;
+			ret = replmd_replicated_request_werror(ar, err);
+			break;
+		}
+
+		ret = replmd_verify_link_target(ar, tmp_ctx, la_entry,
+						src_msg, attr);
 		if (ret != LDB_SUCCESS) {
 			break;
 		}
@@ -6624,6 +6655,7 @@ static int replmd_store_linked_attributes(struct replmd_replicated_request *ar)
 		}
 		DLIST_ADD(la_group->la_entries, la_entry);
 		replmd_private->total_links++;
+		TALLOC_FREE(tmp_ctx);
 	}
 
 	return ret;
@@ -7531,45 +7563,26 @@ linked_attributes[0]:
 }
 
 /**
- * Verifies the source and target objects are known for a linked attribute
+ * Verifies the target object is known for a linked attribute
  */
-static int replmd_verify_linked_attribute(struct replmd_replicated_request *ar,
-					  struct la_entry *la_entry)
+static int replmd_verify_link_target(struct replmd_replicated_request *ar,
+				     TALLOC_CTX *mem_ctx,
+				     struct la_entry *la_entry,
+				     struct ldb_message *src_msg,
+				     const struct dsdb_attribute *attr)
 {
 	int ret = LDB_SUCCESS;
-	TALLOC_CTX *tmp_ctx = talloc_new(la_entry);
 	struct ldb_module *module = ar->module;
-	struct ldb_message *src_msg;
-	const struct dsdb_attribute *attr;
 	struct dsdb_dn *tgt_dsdb_dn = NULL;
 	struct GUID guid = GUID_zero();
 	bool dummy;
 	WERROR status;
 	struct ldb_context *ldb = ldb_module_get_ctx(module);
 	struct drsuapi_DsReplicaLinkedAttribute *la = la_entry->la;
-	const struct dsdb_schema *schema = dsdb_get_schema(ldb, tmp_ctx);
-
-	ret = replmd_get_la_entry_source(module, la_entry, tmp_ctx, &attr,
-					 &src_msg);
-
-	/*
-	 * When we fail to find the source object, the error code we pass
-	 * back here is really important. It flags back to the callers to
-	 * retry this request with DRSUAPI_DRS_GET_ANC. This case should
-	 * never happen if we're replicating from a Samba DC, but it is
-	 * needed to talk to a Windows DC
-	 */
-	if (ret == LDB_ERR_NO_SUCH_OBJECT) {
-		ret = replmd_replicated_request_werror(ar, WERR_DS_DRA_MISSING_PARENT);
-	}
-
-	if (ret != LDB_SUCCESS) {
-		talloc_free(tmp_ctx);
-		return ret;
-	}
+	const struct dsdb_schema *schema = dsdb_get_schema(ldb, mem_ctx);
 
 	/* the value blob for the attribute holds the target object DN */
-	status = dsdb_dn_la_from_blob(ldb, attr, schema, tmp_ctx,
+	status = dsdb_dn_la_from_blob(ldb, attr, schema, mem_ctx,
 				      la->value.blob, &tgt_dsdb_dn);
 	if (!W_ERROR_IS_OK(status)) {
 		ldb_asprintf_errstring(ldb, "Failed to parsed linked attribute blob for %s on %s - %s\n",
@@ -7601,7 +7614,6 @@ static int replmd_verify_linked_attribute(struct replmd_replicated_request *ar,
 		ret = replmd_replicated_request_werror(ar, WERR_DS_DRA_RECYCLED_TARGET);
 	}
 
-	talloc_free(tmp_ctx);
 	return ret;
 }
 
-- 
2.7.4


From adad9a5a5abf3b122e9ce684ced425118c92ae76 Mon Sep 17 00:00:00 2001
From: Tim Beale <timbeale at catalyst.net.nz>
Date: Fri, 9 Nov 2018 14:06:16 +1300
Subject: [PATCH 2/5] replmd: Skip redundant source object link checks

We receive the links grouped together by source object. We can save
ourselves some work by not looking up the source object for every single
link (if it's still the same object we're dealing with).

We've already made this change to replmd_process_linked_attribute().
This patch makes the same change to replmd_store_linked_attributes().
(We verify that we know about each link source/target as we receive each
replication chunk. replmd_process_linked_attribute() kicks in later as
the transaction completes).

Note some care is needed to hold onto the tmp_ctx/src_msg across
multiple passes of the for loop.

Signed-off-by: Tim Beale <timbeale at catalyst.net.nz>
---
 source4/dsdb/samdb/ldb_modules/repl_meta_data.c | 53 +++++++++++++++----------
 1 file changed, 33 insertions(+), 20 deletions(-)

diff --git a/source4/dsdb/samdb/ldb_modules/repl_meta_data.c b/source4/dsdb/samdb/ldb_modules/repl_meta_data.c
index 5bf8193..208c1b9 100644
--- a/source4/dsdb/samdb/ldb_modules/repl_meta_data.c
+++ b/source4/dsdb/samdb/ldb_modules/repl_meta_data.c
@@ -6592,8 +6592,7 @@ static int replmd_store_linked_attributes(struct replmd_replicated_request *ar)
 	/* save away the linked attributes for the end of the transaction */
 	for (i = 0; i < ar->objs->linked_attributes_count; i++) {
 		struct la_entry *la_entry;
-
-		tmp_ctx = talloc_new(ar);
+		bool new_srcobj;
 
 		if (replmd_private->la_ctx == NULL) {
 			replmd_private->la_ctx = talloc_new(replmd_private);
@@ -6617,22 +6616,38 @@ static int replmd_store_linked_attributes(struct replmd_replicated_request *ar)
 		talloc_steal(la_entry->la, la_entry->la->identifier);
 		talloc_steal(la_entry->la, la_entry->la->value.blob);
 
-		/* verify the source object exists for the link */
-		ret = replmd_get_la_entry_source(module, la_entry, tmp_ctx,
-						 &attr, &src_msg);
-
 		/*
-		 * When we fail to find the source object, the error
-		 * code we pass back here is really important. It flags
-		 * back to the callers to retry this request with
-		 * DRSUAPI_DRS_GET_ANC. This case should never happen
-		 * if we're replicating from a Samba DC, but it is
-		 * needed to talk to a Windows DC
+		 * check if we're still dealing with the same source object
+		 * as the last link
 		 */
-		if (ret == LDB_ERR_NO_SUCH_OBJECT) {
-			WERROR err = WERR_DS_DRA_MISSING_PARENT;
-			ret = replmd_replicated_request_werror(ar, err);
-			break;
+		new_srcobj = (la_group == NULL ||
+			      !la_entry_matches_group(la_entry, la_group));
+
+		if (new_srcobj) {
+
+			/* get a new mem_ctx to lookup the source object */
+			TALLOC_FREE(tmp_ctx);
+			tmp_ctx = talloc_new(ar);
+
+			/* verify the link source exists */
+			ret = replmd_get_la_entry_source(module, la_entry,
+							 tmp_ctx, &attr,
+							 &src_msg);
+
+			/*
+			 * When we fail to find the source object, the error
+			 * code we pass back here is really important. It flags
+			 * back to the callers to retry this request with
+			 * DRSUAPI_DRS_GET_ANC. This case should never happen
+			 * if we're replicating from a Samba DC, but it is
+			 * needed to talk to a Windows DC
+			 */
+			if (ret == LDB_ERR_NO_SUCH_OBJECT) {
+				WERROR err = WERR_DS_DRA_MISSING_PARENT;
+				ret = replmd_replicated_request_werror(ar,
+								       err);
+				break;
+			}
 		}
 
 		ret = replmd_verify_link_target(ar, tmp_ctx, la_entry,
@@ -6642,9 +6657,7 @@ static int replmd_store_linked_attributes(struct replmd_replicated_request *ar)
 		}
 
 		/* group the links together by source-object for efficiency */
-		if (la_group == NULL ||
-		    !la_entry_matches_group(la_entry, la_group)) {
-
+		if (new_srcobj) {
 			la_group = talloc_zero(replmd_private->la_ctx,
 					       struct la_group);
 			if (la_group == NULL) {
@@ -6655,9 +6668,9 @@ static int replmd_store_linked_attributes(struct replmd_replicated_request *ar)
 		}
 		DLIST_ADD(la_group->la_entries, la_entry);
 		replmd_private->total_links++;
-		TALLOC_FREE(tmp_ctx);
 	}
 
+	TALLOC_FREE(tmp_ctx);
 	return ret;
 }
 
-- 
2.7.4


From 7bf3b28b6ef65ee233549ca9b3704232df3656e2 Mon Sep 17 00:00:00 2001
From: Tim Beale <timbeale at catalyst.net.nz>
Date: Fri, 9 Nov 2018 14:13:11 +1300
Subject: [PATCH 3/5] replmd: Minor change to replmd_verify_link_target() args

We were passing in the entire src_msg, but all we really need is the
source object's DN (and even then, it's only used in error messages).

Change it so we only pass in what the function actually needs. This
makes it a bit easier to see what src_msg is actually used for.

Signed-off-by: Tim Beale <timbeale at catalyst.net.nz>
---
 source4/dsdb/samdb/ldb_modules/repl_meta_data.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/source4/dsdb/samdb/ldb_modules/repl_meta_data.c b/source4/dsdb/samdb/ldb_modules/repl_meta_data.c
index 208c1b9..dcdf5b7 100644
--- a/source4/dsdb/samdb/ldb_modules/repl_meta_data.c
+++ b/source4/dsdb/samdb/ldb_modules/repl_meta_data.c
@@ -141,7 +141,7 @@ static int replmd_check_upgrade_links(struct ldb_context *ldb,
 static int replmd_verify_link_target(struct replmd_replicated_request *ar,
 				     TALLOC_CTX *mem_ctx,
 				     struct la_entry *la_entry,
-				     struct ldb_message *src_msg,
+				     struct ldb_dn *src_dn,
 				     const struct dsdb_attribute *attr);
 static int replmd_get_la_entry_source(struct ldb_module *module,
 				      struct la_entry *la_entry,
@@ -6651,7 +6651,7 @@ static int replmd_store_linked_attributes(struct replmd_replicated_request *ar)
 		}
 
 		ret = replmd_verify_link_target(ar, tmp_ctx, la_entry,
-						src_msg, attr);
+						src_msg->dn, attr);
 		if (ret != LDB_SUCCESS) {
 			break;
 		}
@@ -7581,7 +7581,7 @@ linked_attributes[0]:
 static int replmd_verify_link_target(struct replmd_replicated_request *ar,
 				     TALLOC_CTX *mem_ctx,
 				     struct la_entry *la_entry,
-				     struct ldb_message *src_msg,
+				     struct ldb_dn *src_dn,
 				     const struct dsdb_attribute *attr)
 {
 	int ret = LDB_SUCCESS;
@@ -7600,7 +7600,7 @@ static int replmd_verify_link_target(struct replmd_replicated_request *ar,
 	if (!W_ERROR_IS_OK(status)) {
 		ldb_asprintf_errstring(ldb, "Failed to parsed linked attribute blob for %s on %s - %s\n",
 				       attr->lDAPDisplayName,
-				       ldb_dn_get_linearized(src_msg->dn),
+				       ldb_dn_get_linearized(src_dn),
 				       win_errstr(status));
 		return LDB_ERR_OPERATIONS_ERROR;
 	}
@@ -7614,8 +7614,7 @@ static int replmd_verify_link_target(struct replmd_replicated_request *ar,
 					  DSDB_REPL_FLAG_TARGETS_UPTODATE)) == 0) {
 
 		ret = replmd_check_target_exists(module, tgt_dsdb_dn, la_entry,
-						 src_msg->dn, false, &guid,
-						 &dummy);
+						 src_dn, false, &guid, &dummy);
 	}
 
 	/*
-- 
2.7.4


From 694f9af3b72ec60dc4ad6278b2dc950eccd54f1b Mon Sep 17 00:00:00 2001
From: Tim Beale <timbeale at catalyst.net.nz>
Date: Fri, 9 Nov 2018 14:29:14 +1300
Subject: [PATCH 4/5] replmd: Split some code out into create_la_entry() helper
 function

replmd_store_linked_attributes() has gotten in szie and complexity. This
refactors some code out into a separate function to make things a bit
more manageable.

This patch should not alter functionality.

Signed-off-by: Tim Beale <timbeale at catalyst.net.nz>
---
 source4/dsdb/samdb/ldb_modules/repl_meta_data.c | 58 +++++++++++++++++--------
 1 file changed, 41 insertions(+), 17 deletions(-)

diff --git a/source4/dsdb/samdb/ldb_modules/repl_meta_data.c b/source4/dsdb/samdb/ldb_modules/repl_meta_data.c
index dcdf5b7..53b8823 100644
--- a/source4/dsdb/samdb/ldb_modules/repl_meta_data.c
+++ b/source4/dsdb/samdb/ldb_modules/repl_meta_data.c
@@ -6568,6 +6568,43 @@ static bool la_entry_matches_group(struct la_entry *la_entry,
 }
 
 /**
+ * Creates a new la_entry to store replication info for a single
+ * linked attribute.
+ */
+static struct la_entry *
+create_la_entry(struct replmd_private *replmd_private,
+		struct drsuapi_DsReplicaLinkedAttribute *la,
+		uint32_t dsdb_repl_flags)
+{
+	struct la_entry *la_entry;
+
+	if (replmd_private->la_ctx == NULL) {
+		replmd_private->la_ctx = talloc_new(replmd_private);
+	}
+	la_entry = talloc(replmd_private->la_ctx, struct la_entry);
+	if (la_entry == NULL) {
+		return NULL;
+	}
+	la_entry->la = talloc(la_entry,
+			      struct drsuapi_DsReplicaLinkedAttribute);
+	if (la_entry->la == NULL) {
+		talloc_free(la_entry);
+		return NULL;
+	}
+	*la_entry->la = *la;
+	la_entry->dsdb_repl_flags = dsdb_repl_flags;
+
+	/*
+	 * we need to steal the non-scalars so they stay
+	 * around until the end of the transaction
+	 */
+	talloc_steal(la_entry->la, la_entry->la->identifier);
+	talloc_steal(la_entry->la, la_entry->la->value.blob);
+
+	return la_entry;
+}
+
+/**
  * Stores the linked attributes received in the replication chunk - these get
  * applied at the end of the transaction. We also check that each linked
  * attribute is valid, i.e. source and target objects are known.
@@ -6594,27 +6631,14 @@ static int replmd_store_linked_attributes(struct replmd_replicated_request *ar)
 		struct la_entry *la_entry;
 		bool new_srcobj;
 
-		if (replmd_private->la_ctx == NULL) {
-			replmd_private->la_ctx = talloc_new(replmd_private);
-		}
-		la_entry = talloc(replmd_private->la_ctx, struct la_entry);
+		/* create an entry to store the received link attribute info */
+		la_entry = create_la_entry(replmd_private,
+					   &ar->objs->linked_attributes[i],
+					   ar->objs->dsdb_repl_flags);
 		if (la_entry == NULL) {
 			ldb_oom(ldb);
 			return LDB_ERR_OPERATIONS_ERROR;
 		}
-		la_entry->la = talloc(la_entry, struct drsuapi_DsReplicaLinkedAttribute);
-		if (la_entry->la == NULL) {
-			talloc_free(la_entry);
-			ldb_oom(ldb);
-			return LDB_ERR_OPERATIONS_ERROR;
-		}
-		*la_entry->la = ar->objs->linked_attributes[i];
-		la_entry->dsdb_repl_flags = ar->objs->dsdb_repl_flags;
-
-		/* we need to steal the non-scalars so they stay
-		   around until the end of the transaction */
-		talloc_steal(la_entry->la, la_entry->la->identifier);
-		talloc_steal(la_entry->la, la_entry->la->value.blob);
 
 		/*
 		 * check if we're still dealing with the same source object
-- 
2.7.4


From b8386fd4fe0587e9d13923b0664820e19c3009ca Mon Sep 17 00:00:00 2001
From: Tim Beale <timbeale at catalyst.net.nz>
Date: Fri, 26 Oct 2018 15:04:42 +1300
Subject: [PATCH 5/5] replmd: Cache recycle-bin state to avoid DB lookup

By caching the recycle-bin state we can save ~6% of the join time.

Checking whether the recycle-bin is enabled involves an underlying DSDB
search. We do this ~4 times for each link we replicate (twice for the
link source and target). By caching the recycle-bin's state over the
duration of the replication, we can save 1000s of unnecessary DB
searches.

With 5K users this makes the join time ~5 secs faster.

Signed-off-by: Tim Beale <timbeale at catalyst.net.nz>
---
 source4/dsdb/samdb/ldb_modules/repl_meta_data.c | 34 ++++++++++++++++++++-----
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/source4/dsdb/samdb/ldb_modules/repl_meta_data.c b/source4/dsdb/samdb/ldb_modules/repl_meta_data.c
index 53b8823..cab24be 100644
--- a/source4/dsdb/samdb/ldb_modules/repl_meta_data.c
+++ b/source4/dsdb/samdb/ldb_modules/repl_meta_data.c
@@ -77,6 +77,8 @@ struct replmd_private {
 	bool sorted_links;
 	uint32_t total_links;
 	uint32_t num_processed;
+	bool recyclebin_enabled;
+	bool recyclebin_state_known;
 };
 
 /*
@@ -174,12 +176,35 @@ enum deletion_state {
 	OBJECT_REMOVED=5
 };
 
+static bool replmd_recyclebin_enabled(struct ldb_module *module)
+{
+	bool enabled = false;
+	struct replmd_private *replmd_private =
+		talloc_get_type_abort(ldb_module_get_private(module),
+				      struct replmd_private);
+
+	/*
+	 * only lookup the recycle-bin state once per replication, then cache
+	 * the result. This can save us 1000s of DB searches
+	 */
+	if (!replmd_private->recyclebin_state_known) {
+		int ret = dsdb_recyclebin_enabled(module, &enabled);
+		if (ret != LDB_SUCCESS) {
+			return false;
+		}
+
+		replmd_private->recyclebin_enabled = enabled;
+		replmd_private->recyclebin_state_known = true;
+	}
+
+	return replmd_private->recyclebin_enabled;
+}
+
 static void replmd_deletion_state(struct ldb_module *module,
 				  const struct ldb_message *msg,
 				  enum deletion_state *current_state,
 				  enum deletion_state *next_state)
 {
-	int ret;
 	bool enabled = false;
 
 	if (msg == NULL) {
@@ -190,10 +215,7 @@ static void replmd_deletion_state(struct ldb_module *module,
 		return;
 	}
 
-	ret = dsdb_recyclebin_enabled(module, &enabled);
-	if (ret != LDB_SUCCESS) {
-		enabled = false;
-	}
+	enabled = replmd_recyclebin_enabled(module);
 
 	if (ldb_msg_check_string_attribute(msg, "isDeleted", "TRUE")) {
 		if (!enabled) {
@@ -334,7 +356,7 @@ static void replmd_txn_cleanup(struct replmd_private *replmd_private)
 	talloc_free(replmd_private->la_ctx);
 	replmd_private->la_list = NULL;
 	replmd_private->la_ctx = NULL;
-
+	replmd_private->recyclebin_state_known = false;
 }
 
 
-- 
2.7.4



More information about the samba-technical mailing list