[SCM] Samba Shared Repository - branch master updated

Douglas Bagnall dbagnall at samba.org
Tue May 17 23:12:01 UTC 2022


The branch, master has been updated
       via  637e7cbdbab lzxpress: compress shortcut if we've reached maximum length
       via  04309bc6824 lzxpress/test: time performance of long boring sequences
      from  0633d8837ce vfs_glusterfs: Fix fdopendir implementation

https://git.samba.org/?p=samba.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 637e7cbdbab6a5229b51954f506e51c677739ce8
Author: Douglas Bagnall <douglas.bagnall at catalyst.net.nz>
Date:   Sun May 15 12:28:32 2022 +1200

    lzxpress: compress shortcut if we've reached maximum length
    
    A simple degenerate case for our compressor has been a large number of
    repeated bytes that will match the maximum length (~64k) at all 8192
    search positions, 8191 of which searches are in vain because the
    matches are not of greater length than the first one.
    
    Here we recognise the inevitable and reduce runtime proportionately.
    
    Credit to OSS-Fuzz.
    
    REF: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=47428
    
    Signed-off-by: Douglas Bagnall <douglas.bagnall at catalyst.net.nz>
    Reviewed-by: Andrew Bartlett <abartlet at samba.org>
    
    Autobuild-User(master): Douglas Bagnall <dbagnall at samba.org>
    Autobuild-Date(master): Tue May 17 23:11:21 UTC 2022 on sn-devel-184

commit 04309bc68240f55028c7d5108c55625199ad8884
Author: Douglas Bagnall <douglas.bagnall at catalyst.net.nz>
Date:   Sun May 15 14:38:55 2022 +1200

    lzxpress/test: time performance of long boring sequences
    
    We get *very* slow when long runs of the bytes are the same. On this
    laptop the test takes 18s; with the next commit it will be 0.006s.
    
    Signed-off-by: Douglas Bagnall <douglas.bagnall at catalyst.net.nz>
    Reviewed-by: Andrew Bartlett <abartlet at samba.org>

-----------------------------------------------------------------------

Summary of changes:
 lib/compression/lzxpress.c                         |  4 ++
 lib/compression/testsuite.c                        | 69 ++++++++++++++++++++++
 .../__init__.py => selftest/knownfail.d/lzxpress   |  0
 3 files changed, 73 insertions(+)
 copy buildtools/wafsamba/__init__.py => selftest/knownfail.d/lzxpress (100%)


Changeset truncated at 500 lines:

diff --git a/lib/compression/lzxpress.c b/lib/compression/lzxpress.c
index 71b39c1efb3..6b2aeef02f6 100644
--- a/lib/compression/lzxpress.c
+++ b/lib/compression/lzxpress.c
@@ -118,6 +118,10 @@ ssize_t lzxpress_compress(const uint8_t *uncompressed,
 				found = true;
 				best_len = len;
 				best_offset = offset;
+				if (best_len == max_len) {
+					/* We're not going to do better than this */
+					break;
+				}
 			}
 		}
 
diff --git a/lib/compression/testsuite.c b/lib/compression/testsuite.c
index 4de3700c727..708af2bcbf1 100644
--- a/lib/compression/testsuite.c
+++ b/lib/compression/testsuite.c
@@ -340,6 +340,73 @@ static bool test_lzxpress4(struct torture_context *test)
 	return true;
 }
 
+
+static bool test_lzxpress_many_zeros(struct torture_context *test)
+{
+	/*
+	 * Repeated values (zero is convenient but not special) will lead to
+	 * very long substring searches in compression, which can be very slow
+	 * if we're not careful.
+	 *
+	 * This test makes a very loose assertion about how long it should
+	 * take to compress a million zeros.
+	 *
+	 * Wall clock time *should* be < 0.1 seconds with the fix and around a
+	 * minute without it. We try for CLOCK_THREAD_CPUTIME_ID which should
+	 * filter out some noise on the machine, and set the threshold at 5
+	 * seconds.
+	 */
+
+	TALLOC_CTX *tmp_ctx = talloc_new(test);
+	const size_t N_ZEROS = 1000000;
+	const uint8_t *zeros = talloc_zero_size(tmp_ctx, N_ZEROS);
+	const ssize_t expected_c_size = 93;
+	ssize_t c_size;
+	uint8_t *comp, *decomp;
+	static struct timespec t_start, t_end;
+	uint64_t elapsed_ns;
+
+	if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t_start) != 0) {
+		if (clock_gettime(CUSTOM_CLOCK_MONOTONIC, &t_start) != 0) {
+			clock_gettime(CLOCK_REALTIME, &t_start);
+		}
+	}
+
+	comp = talloc_zero_size(tmp_ctx, 2048);
+
+	c_size = lzxpress_compress(zeros,
+				   N_ZEROS,
+				   comp,
+				   talloc_get_size(comp));
+
+	torture_assert_int_equal(test, c_size, expected_c_size,
+				 "fixed lzxpress_compress size");
+
+	decomp = talloc_size(tmp_ctx, N_ZEROS * 2);
+	c_size = lzxpress_decompress(comp,
+				     c_size,
+				     decomp,
+				     N_ZEROS * 2);
+
+	if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t_end) != 0) {
+		if (clock_gettime(CUSTOM_CLOCK_MONOTONIC, &t_end) != 0) {
+			clock_gettime(CLOCK_REALTIME, &t_end);
+		}
+	}
+	elapsed_ns = (
+		(t_end.tv_sec - t_start.tv_sec) * 1000U * 1000U * 1000U) +
+		(t_end.tv_nsec - t_start.tv_nsec);
+	torture_comment(test, "round-trip time: %lu ns\n", elapsed_ns);
+	torture_assert(test, elapsed_ns < 3 * 1000U * 1000U * 1000U,
+		       "million zeros round trip tool > 3 seconds");
+	torture_assert_mem_equal(test, decomp, zeros, N_ZEROS,
+				 "fixed lzxpress_decompress data");
+
+	talloc_free(tmp_ctx);
+	return true;
+}
+
+
 static bool test_lzxpress_round_trip(struct torture_context *test)
 {
 	/*
@@ -408,6 +475,8 @@ struct torture_suite *torture_local_compression(TALLOC_CTX *mem_ctx)
 	torture_suite_add_simple_test(suite, "lzxpress2", test_lzxpress2);
 	torture_suite_add_simple_test(suite, "lzxpress3", test_lzxpress3);
 	torture_suite_add_simple_test(suite, "lzxpress4", test_lzxpress4);
+	torture_suite_add_simple_test(suite, "lzxpress_many_zeros",
+				      test_lzxpress_many_zeros);
 	torture_suite_add_simple_test(suite, "lzxpress_round_trip",
 				      test_lzxpress_round_trip);
 	return suite;
diff --git a/buildtools/wafsamba/__init__.py b/selftest/knownfail.d/lzxpress
similarity index 100%
copy from buildtools/wafsamba/__init__.py
copy to selftest/knownfail.d/lzxpress


-- 
Samba Shared Repository



More information about the samba-cvs mailing list