[Samba] High cpu load on LDAP
Douglas Bagnall
douglas.bagnall at catalyst.net.nz
Fri Nov 22 00:06:36 UTC 2024
On 21/11/24 20:10, Heinz Hölzl wrote:
> sorry,
No worries.
The ones where the backtrace starts like:
> gdb1.log-[?2004l
#0 0x00007f17ceb0cd16 in epoll_wait (epfd=22, events=0x7ffc19e2cc8c,
maxevents=1, timeout=3624) at ../sysdeps/unix/sysv/linux/epoll_wait.c:30
are not busy. You caught it at an uninteresting time.
This one:
> --
> gdb4.log:[?2004l
[?2004h(gdb) [7mbt full[27mbt full
> gdb4.log-[?2004l
#0 ldb_unpack_data_flags_v2 (ldb=0x55a9e0be6990, data=0x7fffb7ab2e00,
message=0x55a9e017b670, flags=4) at ../../lib/ldb/common/ldb_pack.c:910
> gdb4.log- attr = 0x0
> gdb4.log- attr_len = 17
> gdb4.log- element = 0x0
> gdb4.log- p = 0x55a9e14659b7 "unixHomeDirectory"
> gdb4.log- q = 0x55a9e1468487 "/home/xxxxxxxxxxxx"
> gdb4.log- end_p = 0x55a9e1468555 ""
> gdb4.log- value_section_p = 0x55a9e14659f7 "top"
> gdb4.log- i = 46
> gdb4.log- j = 1
> gdb4.log- nelem = 46
> gdb4.log- len = 12
> gdb4.log- ldb_val_single_array = 0x55a9e0ad35e0
> gdb4.log- val_len_width = 1 '\001'
> gdb4.log-#1 0x00007f4c19fdd098 in ldb_unpack_data_flags (ldb=0x55a9e0be6990, data=0x7fffb7ab2e00, message=0x55a9e017b670, flags=4) at ../../lib/ldb/common/ldb_pack.c:1075
> gdb4.log- format = 637606248
> gdb4.log-#2 0x00007f4c177c940f in search_func (ldb_kv=0x55a9e03efd40, key=..., val=..., state=0x55a9dfdbfc30) at ../../lib/ldb/ldb_key_value/ldb_kv_search.c:381
> gdb4.log- ldb = 0x55a9e0be6990
> gdb4.log- ac = 0x55a9dfdbfc30
> gdb4.log- msg = 0x55a9e017b670
> gdb4.log- now = {tv_sec = 794002426393788440, tv_usec = 94188097784864}
> gdb4.log- ret = 176408300
> gdb4.log- timeval_cmp = 24
> gdb4.log- matched = false
> gdb4.log-#3 0x00007f4c177ed1ca in ltdb_traverse_fn_wrapper (tdb=0x55a9e01c4c20, tdb_key=..., tdb_data=..., ctx=0x7fffb7ab3010) at ../../lib/ldb/ldb_tdb/ldb_tdb.c:262
> gdb4.log- kv_ctx = 0x7fffb7ab3010
> gdb4.log- key = {data = 0x55a9e1465500 "GUID=\246\037{\233}\363H@\274'\035Z\225\347rph\031\001&1", length = 21}
> gdb4.log- data = {data = 0x55a9e1465515 "h\031\001&1", length = 12352}
> gdb4.log-#4 0x00007f4c19638fce in tdb_traverse_internal (tdb=0x55a9e01c4c20, fn=0x7f4c177ed13d <ltdb_traverse_fn_wrapper>, private_data=0x7fffb7ab3010, tl=0x7fffb7ab2fc0) at ../../lib/tdb/common/traverse.c:222
> gdb4.log- full_len = 12373
> gdb4.log- nread = 0
> gdb4.log- key = {dptr = 0x55a9e1465500 "GUID=\246\037{\233}\363H@\274'\035Z\225\347rph\031\001&1", dsize = 21}
> gdb4.log- dbuf = {dptr = 0x55a9e1465515 "h\031\001&1", dsize = 12352}
> gdb4.log- rec = {next = 184868096, rec_len = 15040, key_len = 21, data_len = 12352, full_hash = 3390810639, magic = 637606297}
> gdb4.log- ret = 0
> gdb4.log- count = 27126
> gdb4.log- off = 176408300
> gdb4.log- recbuf_len = 61918
> gdb4.log-#5 0x00007f4c19639107 in tdb_traverse_read (tdb=0x55a9e01c4c20, fn=0x7f4c177ed13d <ltdb_traverse_fn_wrapper>, private_data=0x7fffb7ab3010) at ../../lib/tdb/common/traverse.c:254
> gdb4.log- tl = {next = 0x0, off = 176408300, list = 639, lock_rw = 0}
> gdb4.log- ret = 21929
> gdb4.log-#6 0x00007f4c177ed27b in ltdb_traverse_fn (ldb_kv=0x55a9e03efd40, fn=0x7f4c177c92dd <search_func>, ctx=0x55a9dfdbfc30) at ../../lib/ldb/ldb_tdb/ldb_tdb.c:275
> gdb4.log- kv_ctx = {kv_traverse_fn = 0x7f4c177c92dd <search_func>, ctx = 0x55a9dfdbfc30, ldb_kv = 0x55a9e03efd40, parser = 0x0, parser_ret = 0}
> gdb4.log-#7 0x00007f4c177c9783 in ldb_kv_search_full (ctx=0x55a9dfdbfc30) at ../../lib/ldb/ldb_key_value/ldb_kv_search.c:504
> gdb4.log- data = 0x55a9e03efd40
> gdb4.log- ldb_kv = 0x55a9e03efd40
> gdb4.log- ret = 1
> gdb4.log-#8 0x00007f4c177c9ef9 in ldb_kv_search (ctx=0x55a9dfdbfc30) at ../../lib/ldb/ldb_key_value/ldb_kv_search.c:785
> gdb4.log- match_count = 0
> gdb4.log- ldb = 0x55a9e0be6990
is an unindexed search, which according to my theory is the problem. The
"count = 27126" shows how far the list it was at this point.
See, it has to traverse the entire database and unpack every object to
see if it has the matching attributes.
This one is an indexed search:
> gdb6.log:[?2004l
[?2004h(gdb) [7mbt full[27mbt full
> gdb6.log-[?2004l
#0 0x00007f03d7ee1d66 in __strcasecmp_l_avx () at
../sysdeps/x86_64/multiarch/strcmp-sse42.S:231
> gdb6.log-No locals.
> gdb6.log-#1 0x00007f03d84c6d9f in ldb_msg_find_element (msg=0x560b51be9850, attr_name=0x560b51f1d480 "userAccountControl") at ../../lib/ldb/common/ldb_msg.c:52
> gdb6.log- i = 15
> gdb6.log-#2 0x00007f03d84d1fcd in ldb_match_bitmask (ldb=0x560b51eedb80, oid=0x7f03d84e834d "1.2.840.113556.1.4.803", msg=0x560b51be9850, attribute_to_match=0x560b51f1d480 "userAccountControl", value_to_match=0x560b514b53b0, matched=0x7ffce8c31470) at ../../lib/ldb/common/ldb_match.c:480
> gdb6.log- i = 1374608256
> gdb6.log- el = 0x7ffce8c31300
> gdb6.log-#3 0x00007f03d84d221a in ldb_match_extended (ldb=0x560b51eedb80, msg=0x560b51be9850, tree=0x560b514b5390, scope=LDB_SCOPE_SUBTREE, matched=0x7ffce8c31470) at ../../lib/ldb/common/ldb_match.c:569
> gdb6.log- rule = 0x560b51740ad0
> gdb6.log-#4 0x00007f03d84d2584 in ldb_match_message (ldb=0x560b51eedb80, msg=0x560b51be9850, tree=0x560b514b5390, scope=LDB_SCOPE_SUBTREE, matched=0x7ffce8c31470) at ../../lib/ldb/common/ldb_match.c:673
> gdb6.log- i = 0
> gdb6.log- ret = 0
> gdb6.log-#5 0x00007f03d84d2367 in ldb_match_message (ldb=0x560b51eedb80, msg=0x560b51be9850, tree=0x560b51386660, scope=LDB_SCOPE_SUBTREE, matched=0x7ffce8c31470) at ../../lib/ldb/common/ldb_match.c:632
> gdb6.log- i = 1
> gdb6.log- ret = 0
> gdb6.log-#6 0x00007f03d84d2367 in ldb_match_message (ldb=0x560b51eedb80, msg=0x560b51be9850, tree=0x560b51dd8e80, scope=LDB_SCOPE_SUBTREE, matched=0x7ffce8c31470) at ../../lib/ldb/common/ldb_match.c:632
> gdb6.log- i = 0
> gdb6.log- ret = -1783986687
> gdb6.log-#7 0x00007f03d5cc66bf in ldb_kv_index_filter (ldb_kv=0x560b50ad6db0, dn_list=0x560b50a34740, ac=0x560b51c0c060, match_count=0x7ffce8c31530, scope_one_truncation=KEY_NOT_TRUNCATED) at ../../lib/ldb/ldb_key_value/ldb_kv_index.c:2456
> gdb6.log- ret = 0
> gdb6.log- matched = false
> gdb6.log- ldb = 0x560b51eedb80
> gdb6.log- msg = 0x560b51be9850
> gdb6.log- i = 21521
> gdb6.log- num_keys = 30926
> gdb6.log- previous_guid_key = "GUID=\377\377`B\222e\272A\233\274\275R\356\350\260k"
> gdb6.log- keys = 0x560b523dbfd0
> gdb6.log-#8 0x00007f03d5cc6cd5 in ldb_kv_search_indexed (ac=0x560b51c0c060, match_count=0x7ffce8c31530) at ../../lib/ldb/ldb_key_value/ldb_kv_index.c:2693
> gdb6.log- ldb = 0x560b51eedb80
> gdb6.log- ldb_kv = 0x560b50ad6db0
> gdb6.log- dn_list = 0x560b50a34740
> gdb6.log- ret = 0
> gdb6.log- index_scope = LDB_SCOPE_SUBTREE
> gdb6.log- scope_one_truncation = KEY_NOT_TRUNCATED
> gdb6.log-#9 0x00007f03d5cc1d9b in ldb_kv_search (ctx=0x560b51c0c060) at ../../lib/ldb/ldb_key_value/ldb_kv_search.c:739
> gdb6.log- match_count = 0
> gdb6.log- ldb = 0x560b51eedb80
> gdb6.log- module = 0x560b519088f0
> gdb6.log- req = 0x560b5122f630
> gdb6.log- data = 0x560b50ad6db0
> gdb6.log- ldb_kv = 0x560b50ad6db0
> gdb6.log- ret = 0
and my theory is you will not find many of these if you repeat this more
times. Mind you, this search is finding and filtering 30k records
("num_keys = 30926"), so it is not nothing.
A shortcut to sample in gdb would be to cycle like this:
c <enter>
[wait a short random time]
control-C
bt full 12
c <enter> [...]
Ignore the ones with "epoll_wait". I think there will be more with
"ldb_kv_search_full" than with "ldb_kv_search_indexed".
It also might be best not to post the 'bt full' results to the list, as
there might be personal information in there.
Douglas
More information about the samba
mailing list