[SCM] Samba Shared Repository - branch master updated

Ralph Böhme slow at samba.org
Tue Jul 26 14:33:01 UTC 2022


The branch, master has been updated
       via  d5c7e2e2738 s3:dbwrap_watch: call dbwrap_watched_trigger_wakeup() outside of the low level record lock
       via  9d999116632 s3:dbwrap_watch: only notify the first waiter
       via  6e701d02ee2 s3:smbXsrv_session: only change the dbwrap_watch instance when the record has changed
       via  98269bd5f31 s3:smbXsrv_session: introduce smb2srv_session_close_previous_cleanup()
       via  67af3586d98 s3:smbXsrv_client: only change the dbwrap_watch instance when the record has changed
       via  e33143099b3 s3:g_lock: try to keep the watch instance during g_lock_watch_data()
       via  20f3fd02191 s3:g_lock: remember an unique_lock_epoch similar to unique_data_epoch
       via  2e92267920d s3:g_lock: avoid a lot of unused overhead using the new dbwrap_watch features
       via  52720516998 s3:g_lock: always call g_lock_cleanup_shared() before getting stuck on lck.num_shared != 0
       via  b865bb28abe s3:g_lock: avoid calling g_lock_store() from g_lock_cleanup_dead()
       via  f62beaa2c25 s3:dbwrap_watch: allow callers of dbwrap_watched_watch_send/recv() to manage the watcher instances
       via  50163da3096 s3:dbwrap_watch: remove a watcher via db_watched_record_fini()
       via  2eb6a209493 s3:dbwrap_watch: use dbwrap_watched_record_storev() to add a new watcher
       via  044e018e9a2 s3:dbwrap_watch: let dbwrap_watched_delete() call dbwrap_watched_record_storev(num_dbufs=0)
       via  cc9c8b8e7e0 s3:dbwrap_watch: filter out records with empty payload during traverse
       via  1fb9db8c994 s3:dbwrap_watch: prepare dbwrap_watched_record_storev() to store watchers if requested
       via  908eea12024 s3:dbwrap_watch: define/use DBWRAP_MAX_WATCHERS
       via  2129d352ae0 s3:dbwrap_watch: remove unused dbwrap_watched_do_locked_state.status
       via  8908af56955 s3:dbwrap_watch: let dbwrap_watched_watch_recv() use tevent_req_received()
       via  1c84980d7c3 s3:dbwrap_watch: don't use talloc_tos() for messaging_filtered_read_recv()
       via  39cdcec49c6 s3:dbwrap_watch: move db_record and db_watched_record to dbwrap_watched_do_locked()
       via  6b173bf1569 s3:dbwrap_watch: split out a dbwrap_watched_watch_add_instance() helper
       via  5021abff88c s3:dbwrap_watch: remove dbwrap_watched_record_wakeup_fn() indirection
       via  6e45da1a38d s3:dbwrap_watch: also the fetch_locked case only needs to wake waiters just once
       via  726f468ccdb s3:dbwrap_watch: split out db_watched_record_fini() from db_watched_record_destructor()
       via  eb89748ee4a s3:dbwrap_watch: split out a db_watched_record_init() helper function
       via  095fafbe0c9 s3:dbwrap_watch: remove unused dbwrap_watched_do_locked_{storev,delete}()
       via  c0febbd3e19 s3:dbwrap_watch: move the do_locked optimization to dbwrap_watched_record_wakeup()
       via  2342489f526 s3:dbwrap_watch: add db_record_get_watched_record() helper
       via  b3f6668f93b s3:dbwrap_watch: use backend.{rec,initial_value} instead of subrec[_value]
       via  cb012e45c91 s3:dbwrap_watch: only pass struct db_watched_record to dbwrap_watched_record_*() functions
       via  6702b3b0da7 s3:dbwrap_watch: use dbwrap_record_get_key() to access the key
       via  7226d0b3656 s3:dbwrap_watch: move 'wrec' from dbwrap_watched_do_locked_state to dbwrap_watched_do_locked_fn
       via  9356b1701c4 s3:dbwrap_watch: use struct db_watched_record as rec->private_data for do_locked too
       via  420a595c1ba s3:dbwrap_watch: use dbwrap_record_get_db(rec) instead of state->db
       via  cdf1c37a90c s3:dbwrap_watch: move wakeup_value to struct db_watched_record
       via  77db4b666f0 s3:dbwrap_watch: rename struct dbwrap_watched_record variables to 'wrec'
       via  5af37ae6970 s3:dbwrap_watch: s/dbwrap_watched_subrec/dbwrap_watched_record
       via  3f88b700a9e s3:dbwrap_watch: s/db_watched_subrec/db_watched_record
       via  f26b22cc8e7 s3:dbwrap_watch: use value_valid = false during dbwrap_watched_do_locked_fn()
       via  e06413c2ba5 s3:dbwrap_watch: let dbwrap_watched_watch_state_destructor() use DBG_WARNING()
      from  0d4cb5a641e smbd: split out smbd_check_access_rights_fname and call it before SMB_VFS_FGET_NT_ACL

https://git.samba.org/?p=samba.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit d5c7e2e27385a4730ff8674650700bd25b77f975
Author: Stefan Metzmacher <metze at samba.org>
Date:   Thu Jun 30 10:39:18 2022 +0000

    s3:dbwrap_watch: call dbwrap_watched_trigger_wakeup() outside of the low level record lock
    
    This gives a nice speed up, as it's unlikely for the waiters to hit
    contention.
    
    The following test with 256 commections all looping with open/close
    on the same inode (share root) is improved drastically:
    
      smbtorture //127.0.0.1/m -Uroot%test smb2.create.bench-path-contention-shared \
         --option='torture:bench_path=' \
         --option="torture:timelimit=60" \
         --option="torture:nprocs=256"
    
    From some like this:
    
       open[num/s=8800,avslat=0.021445,minlat=0.000095,maxlat=0.179786]
       close[num/s=8800,avslat=0.021658,minlat=0.000044,maxlat=0.179819]
    
    to:
    
       open[num/s=10223,avslat=0.017922,minlat=0.000083,maxlat=0.106759]
       close[num/s=10223,avslat=0.017694,minlat=0.000040,maxlat=0.107345]
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>
    
    Autobuild-User(master): Ralph Böhme <slow at samba.org>
    Autobuild-Date(master): Tue Jul 26 14:32:35 UTC 2022 on sn-devel-184

commit 9d9991166322477781f20372ffd7c19d1632276c
Author: Stefan Metzmacher <metze at samba.org>
Date:   Sun Jun 26 12:57:06 2022 +0000

    s3:dbwrap_watch: only notify the first waiter
    
    In case of a highly contended record we will have a lot of watchers,
    which will all race to get g_lock_lock() to finish.
    
    If g_lock_unlock() wakes them all, e.g. 250 of them, we get a thundering
    herd, were 249 will only find that one of them as able to get the lock
    and re-add their watcher entry (not unlikely in a different order).
    
    With this commit we only wake the first watcher and let it remove
    itself once it no longer wants to monitor the record content
    (at that time it will wake the new first watcher).
    
    It means the woken watcher doesn't have to race with all others
    and also means order of watchers is kept, which means that we
    most likely get a fair latency distribution for all watchers.
    
    The following test with 256 commections all looping with open/close
    on the same inode (share root) is improved drastically:
    
      smbtorture //127.0.0.1/m -Uroot%test smb2.create.bench-path-contention-shared \
         --option='torture:bench_path=' \
         --option="torture:timelimit=60" \
         --option="torture:nprocs=256"
    
    From some like this:
    
       open[num/s=80,avslat=2.793862,minlat=0.004097,maxlat=46.597053]
       close[num/s=80,avslat=2.387326,minlat=0.023875,maxlat=50.878165]
    
    to:
    
       open[num/s=8800,avslat=0.021445,minlat=0.000095,maxlat=0.179786]
       close[num/s=8800,avslat=0.021658,minlat=0.000044,maxlat=0.179819]
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 6e701d02ee2d0fc304157395c451d3b972128cfc
Author: Stefan Metzmacher <metze at samba.org>
Date:   Tue Jul 5 16:05:15 2022 +0200

    s3:smbXsrv_session: only change the dbwrap_watch instance when the record has changed
    
    This will become important in the following commits when the
    dbwrap_watch layer will only wake up one watcher at a time
    and each woken watcher will wakeup the next one.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 98269bd5f31a2521b756e0a20fba82e9122582f7
Author: Stefan Metzmacher <metze at samba.org>
Date:   Mon Jul 25 22:28:27 2022 +0200

    s3:smbXsrv_session: introduce smb2srv_session_close_previous_cleanup()
    
    This makes sure we cleanup the locked record in all cases.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 67af3586d989be9d6a8fe7e7789250451b03f2bb
Author: Stefan Metzmacher <metze at samba.org>
Date:   Tue Jul 5 16:04:09 2022 +0200

    s3:smbXsrv_client: only change the dbwrap_watch instance when the record has changed
    
    This will become important in the following commits when the
    dbwrap_watch layer will only wake up one watcher at a time
    and each woken watcher will wakeup the next one.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit e33143099b37a4d79a1cd45ca43a5a5d9d63b261
Author: Stefan Metzmacher <metze at samba.org>
Date:   Sun Jun 26 16:16:38 2022 +0000

    s3:g_lock: try to keep the watch instance during g_lock_watch_data()
    
    Unless the unique_lock_epoch changes via g_lock_lock()/g_lock_unlock()
    we try to keep our existing watch instance alive while waiting
    for unique_data_epoch to change.
    
    This will become important in the following commits when the
    dbwrap_watch layer will only wake up one watcher at a time
    and each woken watcher will wakeup the next one. Without this
    commit we would trigger an endless loop as none of the watchers
    will ever change unique_data_epoch.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 20f3fd021911a118228815cb32d436069b0273d1
Author: Stefan Metzmacher <metze at samba.org>
Date:   Sun Jun 26 16:16:38 2022 +0000

    s3:g_lock: remember an unique_lock_epoch similar to unique_data_epoch
    
    It changes with every lock and unlock.
    
    This will be needed in future in order to differentiate between
    lock and data changed.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 2e92267920ddee7b96f32e243eb7b21124ed554f
Author: Stefan Metzmacher <metze at samba.org>
Date:   Mon Jun 27 13:40:55 2022 +0000

    s3:g_lock: avoid a lot of unused overhead using the new dbwrap_watch features
    
    The key points are:
    
    1. We keep our position in the watcher queue until we got what
       we were waiting for. It means the order is now fair and stable.
    
    2. We only wake up other during g_lock_unlock() and only if
       we detect that an pending exclusive lock is able to make progress.
       (Note: read lock holders are never waiters on their own)
    
    This reduced the contention on locking.tdb records drastically,
    as waiters are no longer woken 3 times (where the first 2 times were completely useless).
    
    The following test with 256 commections all looping with open/close
    on the same inode (share root) is improved drastically:
    
      smbtorture //127.0.0.1/m -Uroot%test smb2.create.bench-path-contention-shared \
         --option='torture:bench_path=' \
         --option="torture:timelimit=60" \
         --option="torture:nprocs=256"
    
    From some like this:
    
       open[num/s=50,avslat=6.455775,minlat=0.000157,maxlat=55.683846]
       close[num/s=50,avslat=4.563605,minlat=0.000128,maxlat=53.585839]
    
    to:
    
       open[num/s=80,avslat=2.793862,minlat=0.004097,maxlat=46.597053]
       close[num/s=80,avslat=2.387326,minlat=0.023875,maxlat=50.878165]
    
    Note the real effect of this commit will releaved together
    with a following commit that only wakes one waiter at a time.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 52720516998091206399e9b2387d8ee8265e4660
Author: Stefan Metzmacher <metze at samba.org>
Date:   Thu Jun 30 16:42:54 2022 +0200

    s3:g_lock: always call g_lock_cleanup_shared() before getting stuck on lck.num_shared != 0
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit b865bb28abedd18d5db0ae3ea31908c75ee1129b
Author: Stefan Metzmacher <metze at samba.org>
Date:   Mon Jun 27 13:39:18 2022 +0000

    s3:g_lock: avoid calling g_lock_store() from g_lock_cleanup_dead()
    
    This matches the behavior of g_lock_cleanup_shared(), which also
    only operates on the in memory struct g_lock.
    
    We do a g_lock_store() later during g_lock_trylock() anyway
    when we make any progress.
    
    In the case we where a pending exclusive lock holder
    we now force a g_lock_store() if g_lock_cleanup_dead()
    removed the dead blocker.
    
    This will be useful for the following changes...
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit f62beaa2c25043a1b3dde408ce7d8f7327ac3e13
Author: Stefan Metzmacher <metze at samba.org>
Date:   Thu Jun 30 15:53:47 2022 +0200

    s3:dbwrap_watch: allow callers of dbwrap_watched_watch_send/recv() to manage the watcher instances
    
    The destructor triggered by dbwrap_watched_watch_recv() will
    remove the watcher instance via a dedicated dbwrap_do_locked(),
    just calling dbwrap_watched_watch_remove_instance() inside.
    
    But the typical caller triggers a dbwrap_do_locked() again after
    dbwrap_watched_watch_recv() returned. Which means we call
    dbwrap_do_locked() twice.
    
    We now allow dbwrap_watched_watch_recv() to return the existing
    instance id (if it still exists) and removes the destructor.
    That way the caller can pass the given instance id to
    dbwrap_watched_watch_remove_instance() from within its own dbwrap_do_locked(),
    when it decides to leave the queue, because it's happy with the new
    state of the record. In order to get the best performance
    dbwrap_watched_watch_remove_instance() should be called before any
    dbwrap_record_storev() or dbwrap_record_delete(),
    because that will only trigger a single low level storev/delete.
    
    If the caller found out that the state of the record doesn't meet the
    expectations and the callers wants to continue watching the
    record (from its current position, most likely the first one),
    dbwrap_watched_watch_remove_instance() can be skipped and the
    instance id can be passed to dbwrap_watched_watch_send() again,
    in order to resume waiting on the existing instance.
    Currently the watcher instance were always removed (most likely from
    the first position) and re-added (to the last position), which may
    cause unfair latencies.
    
    In order to improve the overhead of adding a new watcher instance
    the caller can call dbwrap_watched_watch_add_instance() before
    any dbwrap_record_storev() or dbwrap_record_delete(), which
    will only result in a single low level storev/delete.
    The returned instance id is then passed to dbwrap_watched_watch_send(),
    within the same dbwrap_do_locked() run.
    
    It also adds a way to avoid alerting any callers during
    the current dbwrap_do_locked() run.
    
    Layers above may only want to wake up watchers
    during specific situations and while it's useless to wake
    others in other situations.
    
    This will soon be used to add more fairness to the g_lock code.
    
    Note that this commit only prepares the api for the above to be useful,
    the instance returned by dbwrap_watched_watch_recv() is most likely 0,
    which means the watcher entry was already removed, but that will change
    in the following commits.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 50163da3096d71c08e9c3ff13dd3ecb252a60f4e
Author: Stefan Metzmacher <metze at samba.org>
Date:   Sun Jun 26 12:57:06 2022 +0000

    s3:dbwrap_watch: remove a watcher via db_watched_record_fini()
    
    The new dbwrap_watched_watch_remove_instance() will just remove ourself
    from the in memory array and let db_watched_record_fini() call
    dbwrap_watched_record_storev() in order to write the modified version
    into the low level backend record.
    
    For now there's no change in behavior, but it allows us to change it
    soon....
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 2eb6a209493e228e433a565154c7c4cd076c0a4a
Author: Stefan Metzmacher <metze at samba.org>
Date:   Sun Jun 26 12:57:06 2022 +0000

    s3:dbwrap_watch: use dbwrap_watched_record_storev() to add a new watcher
    
    It means we only have one code path storing the low level record
    and that's dbwrap_watched_record_storev on the main record.
    
    It avoids the nested dbwrap_do_locked() and only uses
    dbwrap_parse_record() and talloc_memdup() when needed.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 044e018e9a243466bdbf075304a2715f36c0199a
Author: Stefan Metzmacher <metze at samba.org>
Date:   Sun Jun 26 12:57:06 2022 +0000

    s3:dbwrap_watch: let dbwrap_watched_delete() call dbwrap_watched_record_storev(num_dbufs=0)
    
    dbwrap_watched_record_storev() will handle the high level storev and
    delete, it will find out if we can remove the record as there's no value
    and also no watchers to be stored.
    
    This is no real change for now as dbwrap_watched_record_wakeup() will
    always exits with wrec->watchers.count = 0, but that will change in the next
    commits.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit cc9c8b8e7e0e2ab8512fb443b89780a11bc966ae
Author: Stefan Metzmacher <metze at samba.org>
Date:   Mon Jul 25 22:19:13 2022 +0200

    s3:dbwrap_watch: filter out records with empty payload during traverse
    
    We will soon have records with just a number of watchers, but without
    payload. These records should not be visible during traverse.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 1fb9db8c994b60720ea82167b5a91808293640e0
Author: Stefan Metzmacher <metze at samba.org>
Date:   Sun Jun 26 12:57:06 2022 +0000

    s3:dbwrap_watch: prepare dbwrap_watched_record_storev() to store watchers if requested
    
    It will also delete the low level record in case there are no watchers
    should be stored and no data buffers are given.
    
    This is no real change for now as dbwrap_watched_record_wakeup() will
    always exit with wrec->watchers.count = 0, but that will change in the next
    commits.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 908eea1202459c8ffdf8e43eb310cf8690693824
Author: Stefan Metzmacher <metze at samba.org>
Date:   Sun Jun 26 12:57:06 2022 +0000

    s3:dbwrap_watch: define/use DBWRAP_MAX_WATCHERS
    
    dbwrap backends are unlikely to be able to store
    UINT32_MAX*DBWRAP_WATCHER_BUF_LENGTH in a single record
    and most likely also not with the whole database!
    
    DBWRAP_MAX_WATCHERS = INT32_MAX/DBWRAP_WATCHER_BUF_LENGTH should be
    enough and makes further changes easier as we don't need to care
    about size_t overflows.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 2129d352ae0d90867ebe8101529817a12223d5ee
Author: Stefan Metzmacher <metze at samba.org>
Date:   Sun Jun 26 12:57:06 2022 +0000

    s3:dbwrap_watch: remove unused dbwrap_watched_do_locked_state.status
    
    This is never set...
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 8908af569551def4e06c9aa179cf77e0beaa2638
Author: Stefan Metzmacher <metze at samba.org>
Date:   Sun Jun 26 12:57:06 2022 +0000

    s3:dbwrap_watch: let dbwrap_watched_watch_recv() use tevent_req_received()
    
    At the end of the dbwrap_watched_watch_recv() all temporary state should
    be destroyed. It also means dbwrap_watched_watch_state_destructor() was
    triggered.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 1c84980d7c3ca33c61c8137902af2fc7a0988824
Author: Stefan Metzmacher <metze at samba.org>
Date:   Sun Jun 26 12:57:06 2022 +0000

    s3:dbwrap_watch: don't use talloc_tos() for messaging_filtered_read_recv()
    
    Async function always have their 'state' context for temporary memory.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 39cdcec49c6578430d91c450645de1522fe26c2a
Author: Stefan Metzmacher <metze at samba.org>
Date:   Thu Jun 30 19:30:39 2022 +0200

    s3:dbwrap_watch: move db_record and db_watched_record to dbwrap_watched_do_locked()
    
    This will help in the next commits.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 6b173bf1569b4c099223a090f021465eee04747e
Author: Stefan Metzmacher <metze at samba.org>
Date:   Thu Jun 30 14:05:43 2022 +0200

    s3:dbwrap_watch: split out a dbwrap_watched_watch_add_instance() helper
    
    This will be used in other places soon.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 5021abff88ccaeb8b7a2b274e4ddc9ea046b2c34
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 15:48:54 2022 +0000

    s3:dbwrap_watch: remove dbwrap_watched_record_wakeup_fn() indirection
    
    This reduces quite some complexity and will make further changes
    (which will follow soon) easier.
    
    Review with git show --patience
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 6e45da1a38d31c18e38397f354596f6c4654c8b1
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 15:33:30 2022 +0000

    s3:dbwrap_watch: also the fetch_locked case only needs to wake waiters just once
    
    This is no change in behavior, because:
    
    - The first dbwrap_do_locked(dbwrap_watched_record_wakeup_fn), is
      called at the start of dbwrap_watched_record_{storev,delete}().
      That means the nested dbwrap_do_locked() will pass the
      exact value same (unchanged) value to dbwrap_watched_record_wakeup_fn.
    
    - After the first change we have either removed the whole backend
      record in dbwrap_watched_record_delete or dbwrap_watched_record_storev()
      removed all watchers and store num_watchers = 0.
    
    - With that any further updates will have no watchers in the backend
      record, so dbwrap_do_locked(dbwrap_watched_record_wakeup_fn) will
      never do anything useful. It only burns cpu time any may cause memory
      fragmentation.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 726f468ccdb432c35dadef1aa4af6a041cbf46f2
Author: Stefan Metzmacher <metze at samba.org>
Date:   Sun Jun 26 10:58:21 2022 +0200

    s3:dbwrap_watch: split out db_watched_record_fini() from db_watched_record_destructor()
    
    That makes it easier to understand that db_watched_record_init() and
    db_watched_record_fini() wrap any caller activity on the record,
    either during do_locked or between fetch_locked and the related
    destructor.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit eb89748ee4a9c58da8064b41d0253e00c30c213f
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 15:07:43 2022 +0000

    s3:dbwrap_watch: split out a db_watched_record_init() helper function
    
    The code to construct a struct db_watched_record is mostly common
    between dbwrap_watched_fetch_locked() and dbwrap_watched_do_locked_fn().
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 095fafbe0c901180b30253b9adb6b2de3bf827f2
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 14:45:58 2022 +0000

    s3:dbwrap_watch: remove unused dbwrap_watched_do_locked_{storev,delete}()
    
    dbwrap_watched_do_locked_{storev,delete}() was now exactly the
    same as dbwrap_watched_{storev,delete}().
    
    We only need to know if dbwrap_watched_record_wakeup() is called from
    within dbwrap_watched_do_locked_fn().
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit c0febbd3e19a1c40c48d93f38f9223a7feab8f30
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 14:38:50 2022 +0000

    s3:dbwrap_watch: move the do_locked optimization to dbwrap_watched_record_wakeup()
    
    Both dbwrap_watched_record_storev() and dbwrap_watched_record_delete()
    call dbwrap_watched_record_wakeup() as their first action.
    
    So the behavior stays the same, but dbwrap_watched_do_locked_storev()
    and dbwrap_watched_do_locked_delete() are not trivial and we
    have the wakeup logic isolated in dbwrap_watched_record_wakeup() only.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 2342489f526a43fe2d56f3859b6df0e26538870d
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 13:41:12 2022 +0000

    s3:dbwrap_watch: add db_record_get_watched_record() helper
    
    This allows safe casting off rec->private_data to get
    struct db_watched_record. And that works fetch_locked and do_locked
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit b3f6668f93b73a5980dc418c6b1dacde330f7497
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 11:16:37 2022 +0000

    s3:dbwrap_watch: use backend.{rec,initial_value} instead of subrec[_value]
    
    This makes it much clearer to me what it actually is.
    
    Keeping the initial_value with struct db_watched_record will also
    simplify further changes.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit cb012e45c91b4b7d1572864b7e60de36c96a38af
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 11:05:40 2022 +0000

    s3:dbwrap_watch: only pass struct db_watched_record to dbwrap_watched_record_*() functions
    
    We get to the main 'struct db_record' via wrec->rec where needed.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 6702b3b0da729b824455a5efc7b50ebc03f835e8
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 11:59:21 2022 +0000

    s3:dbwrap_watch: use dbwrap_record_get_key() to access the key
    
    We should avoid doing shortcuts if not needed.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 7226d0b365625ab0e5ff0ffe8b8fe3924ae1a569
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 13:00:06 2022 +0000

    s3:dbwrap_watch: move 'wrec' from dbwrap_watched_do_locked_state to dbwrap_watched_do_locked_fn
    
    We can use a local variable in dbwrap_watched_do_locked_fn.
    As 'wrec' should have the same lifetime as 'rec'.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 9356b1701c43c9ec2a71252a31e09555a1f5a3ea
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 12:54:40 2022 +0000

    s3:dbwrap_watch: use struct db_watched_record as rec->private_data for do_locked too
    
    There's no real reason to pass struct dbwrap_watched_do_locked_state
    anymore. The only difference is that we can't use
    talloc_get_type_abort().
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 420a595c1baa6062fcb7f672cf032287b4521a76
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 12:51:49 2022 +0000

    s3:dbwrap_watch: use dbwrap_record_get_db(rec) instead of state->db
    
    We should try to avoid using dbwrap_watched_do_locked_state in low
    level code.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit cdf1c37a90c24cc32953865a0bb909fd71fbbf05
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 12:49:36 2022 +0000

    s3:dbwrap_watch: move wakeup_value to struct db_watched_record
    
    For the do_locked case they have the same scope, but having
    it on db_watched_record will simplify further changes.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 77db4b666f0eb6668b6e0bc3d445048980542bfd
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 10:23:21 2022 +0000

    s3:dbwrap_watch: rename struct dbwrap_watched_record variables to 'wrec'
    
    This makes it much easier to understand...
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 5af37ae6970674573bb6f32c6c0f347718b8a2a9
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 10:23:21 2022 +0000

    s3:dbwrap_watch: s/dbwrap_watched_subrec/dbwrap_watched_record
    
    These functions operate on struct db_watched_record.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit 3f88b700a9e73b2371cba3bb867c4c625bc36c7c
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 10:23:21 2022 +0000

    s3:dbwrap_watch: s/db_watched_subrec/db_watched_record
    
    struct db_watched_record is the private data of
    the struct db_record produced by the struct db_context that
    uses struct db_watched_ctx.
    
    db_watched_subrec had nothing really todo with the
    sub record we got back from db_watched_ctx->backend.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit f26b22cc8e784af1dc64e0aa9a2c948385e2b667
Author: Stefan Metzmacher <metze at samba.org>
Date:   Fri Jun 24 09:57:05 2022 +0000

    s3:dbwrap_watch: use value_valid = false during dbwrap_watched_do_locked_fn()
    
    This matches db_tdb_do_locked() and the fetch_locked based fallback in
    dbwrap_do_locked().
    
    Calling dbwrap_record_get_value() is not allowed from within
    dbwrap_do_locked()!
    
    Now that rec.value is only internal, use it to remember the initial
    payload value. This will simplify further code changes as it
    makes the fetch_locked case.
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=15125
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Jeremy Allison <jra at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

commit e06413c2ba5a27607a03d7c009c66c1e7a187105
Author: Stefan Metzmacher <metze at samba.org>
Date:   Sun Jun 26 12:57:06 2022 +0000

    s3:dbwrap_watch: let dbwrap_watched_watch_state_destructor() use DBG_WARNING()
    
    When we (need) to ignore an error from dbwrap_do_locked() within
    dbwrap_watched_watch_state_destructor(), we better print this
    with log level 1 instead of 10.
    
    Signed-off-by: Stefan Metzmacher <metze at samba.org>
    Reviewed-by: Ralph Boehme <slow at samba.org>

-----------------------------------------------------------------------

Summary of changes:
 source3/lib/dbwrap/dbwrap_watch.c   | 919 ++++++++++++++++++++----------------
 source3/lib/dbwrap/dbwrap_watch.h   |   5 +
 source3/lib/g_lock.c                | 223 +++++++--
 source3/smbd/smbXsrv_client.c       |  56 ++-
 source3/smbd/smbXsrv_session.c      |  76 ++-
 source3/torture/test_dbwrap_watch.c |  13 +-
 6 files changed, 835 insertions(+), 457 deletions(-)


Changeset truncated at 500 lines:

diff --git a/source3/lib/dbwrap/dbwrap_watch.c b/source3/lib/dbwrap/dbwrap_watch.c
index 17a52de37cc..e5b11aa124e 100644
--- a/source3/lib/dbwrap/dbwrap_watch.c
+++ b/source3/lib/dbwrap/dbwrap_watch.c
@@ -25,6 +25,7 @@
 #include "dbwrap_open.h"
 #include "lib/util/util_tdb.h"
 #include "lib/util/tevent_ntstatus.h"
+#include "serverid.h"
 #include "server_id_watch.h"
 #include "lib/dbwrap/dbwrap_private.h"
 
@@ -41,6 +42,7 @@ struct dbwrap_watcher {
 };
 
 #define DBWRAP_WATCHER_BUF_LENGTH (SERVER_ID_BUF_LENGTH + sizeof(uint64_t))
+#define DBWRAP_MAX_WATCHERS (INT32_MAX/DBWRAP_WATCHER_BUF_LENGTH)
 
 /*
  * Watched records contain a header of:
@@ -138,269 +140,324 @@ struct db_watched_ctx {
 	struct messaging_context *msg;
 };
 
-struct db_watched_subrec {
-	struct db_record *subrec;
+struct db_watched_record {
+	struct db_record *rec;
+	struct server_id self;
+	struct {
+		struct db_record *rec;
+		TDB_DATA initial_value;
+		bool initial_valid;
+	} backend;
+	bool force_fini_store;
 	struct dbwrap_watcher added;
+	bool removed_first;
+	struct {
+		/*
+		 * The is the number of watcher records
+		 * parsed from backend.initial_value
+		 */
+		size_t count;
+		/*
+		 * This is the pointer to
+		 * the optentially first watcher record
+		 * parsed from backend.initial_value
+		 *
+		 * The pointer actually points to memory
+		 * in backend.initial_value.
+		 *
+		 * Note it might be NULL, if count is 0.
+		 */
+		uint8_t *first;
+		/*
+		 * This remembers if we already
+		 * notified the watchers.
+		 *
+		 * As we only need to do that once during:
+		 *   do_locked
+		 * or:
+		 *   between rec = fetch_locked
+		 *   and
+		 *   TALLOC_FREE(rec)
+		 */
+		bool alerted;
+	} watchers;
+	struct {
+		struct dbwrap_watcher watcher;
+	} wakeup;
 };
 
-static NTSTATUS dbwrap_watched_subrec_storev(
-	struct db_record *rec, struct db_watched_subrec *subrec,
+static struct db_watched_record *db_record_get_watched_record(struct db_record *rec)
+{
+	/*
+	 * we can't use wrec = talloc_get_type_abort() here!
+	 * because wrec is likely a stack variable in
+	 * dbwrap_watched_do_locked_fn()
+	 *
+	 * In order to have a least some protection
+	 * we verify the cross reference pointers
+	 * between rec and wrec
+	 */
+	struct db_watched_record *wrec =
+		(struct db_watched_record *)rec->private_data;
+	SMB_ASSERT(wrec->rec == rec);
+	return wrec;
+}
+
+static NTSTATUS dbwrap_watched_record_storev(
+	struct db_watched_record *wrec,
 	const TDB_DATA *dbufs, int num_dbufs, int flags);
-static NTSTATUS dbwrap_watched_subrec_delete(
-	struct db_record *rec, struct db_watched_subrec *subrec);
 static NTSTATUS dbwrap_watched_storev(struct db_record *rec,
 				      const TDB_DATA *dbufs, int num_dbufs,
 				      int flags);
 static NTSTATUS dbwrap_watched_delete(struct db_record *rec);
-static void dbwrap_watched_subrec_wakeup(
-	struct db_record *rec, struct db_watched_subrec *subrec);
-static int db_watched_subrec_destructor(struct db_watched_subrec *s);
+static void dbwrap_watched_trigger_wakeup(struct messaging_context *msg_ctx,
+					  struct dbwrap_watcher *watcher);
+static int db_watched_record_destructor(struct db_watched_record *wrec);
+
+static void db_watched_record_init(struct db_context *db,
+				   struct messaging_context *msg_ctx,
+				   struct db_record *rec,
+				   struct db_watched_record *wrec,
+				   struct db_record *backend_rec,
+				   TDB_DATA backend_value)
+{
+	bool ok;
+
+	*rec = (struct db_record) {
+		.db = db,
+		.key = dbwrap_record_get_key(backend_rec),
+		.storev = dbwrap_watched_storev,
+		.delete_rec = dbwrap_watched_delete,
+		.private_data = wrec,
+	};
+
+	*wrec = (struct db_watched_record) {
+		.rec = rec,
+		.self = messaging_server_id(msg_ctx),
+		.backend = {
+			.rec = backend_rec,
+			.initial_value = backend_value,
+			.initial_valid = true,
+		},
+	};
+
+	ok = dbwrap_watch_rec_parse(backend_value,
+				    &wrec->watchers.first,
+				    &wrec->watchers.count,
+				    &rec->value);
+	if (!ok) {
+		dbwrap_watch_log_invalid_record(rec->db, rec->key, backend_value);
+		/* wipe invalid data */
+		rec->value = (TDB_DATA) { .dptr = NULL, .dsize = 0 };
+	}
+}
 
 static struct db_record *dbwrap_watched_fetch_locked(
 	struct db_context *db, TALLOC_CTX *mem_ctx, TDB_DATA key)
 {
 	struct db_watched_ctx *ctx = talloc_get_type_abort(
 		db->private_data, struct db_watched_ctx);
-	struct db_record *rec;
-	struct db_watched_subrec *subrec;
-	TDB_DATA subrec_value;
-	bool ok;
+	struct db_record *rec = NULL;
+	struct db_watched_record *wrec = NULL;
+	struct db_record *backend_rec = NULL;
+	TDB_DATA backend_value = { .dptr = NULL, };
 
 	rec = talloc_zero(mem_ctx, struct db_record);
 	if (rec == NULL) {
 		return NULL;
 	}
-	subrec = talloc_zero(rec, struct db_watched_subrec);
-	if (subrec == NULL) {
+	wrec = talloc_zero(rec, struct db_watched_record);
+	if (wrec == NULL) {
 		TALLOC_FREE(rec);
 		return NULL;
 	}
-	talloc_set_destructor(subrec, db_watched_subrec_destructor);
-	rec->private_data = subrec;
 
-	subrec->subrec = dbwrap_fetch_locked(ctx->backend, subrec, key);
-	if (subrec->subrec == NULL) {
+	backend_rec = dbwrap_fetch_locked(ctx->backend, wrec, key);
+	if (backend_rec == NULL) {
 		TALLOC_FREE(rec);
 		return NULL;
 	}
+	backend_value = dbwrap_record_get_value(backend_rec);
 
-	rec->db = db;
-	rec->key = dbwrap_record_get_key(subrec->subrec);
-	rec->storev = dbwrap_watched_storev;
-	rec->delete_rec = dbwrap_watched_delete;
-
-	subrec_value = dbwrap_record_get_value(subrec->subrec);
-
-	ok = dbwrap_watch_rec_parse(subrec_value, NULL, NULL, &rec->value);
-	if (!ok) {
-		dbwrap_watch_log_invalid_record(db, rec->key, subrec_value);
-		/* wipe invalid data */
-		rec->value = (TDB_DATA) { .dptr = NULL, .dsize = 0 };
-	}
+	db_watched_record_init(db, ctx->msg,
+			       rec, wrec,
+			       backend_rec, backend_value);
 	rec->value_valid = true;
+	talloc_set_destructor(wrec, db_watched_record_destructor);
 
 	return rec;
 }
 
-struct dbwrap_watched_add_watcher_state {
-	struct dbwrap_watcher w;
-	NTSTATUS status;
+struct db_watched_record_fini_state {
+	struct db_watched_record *wrec;
+	TALLOC_CTX *frame;
+	TDB_DATA dbufs[2];
+	int num_dbufs;
+	bool ok;
 };
 
-static void dbwrap_watched_add_watcher(
-	struct db_record *rec,
-	TDB_DATA value,
-	void *private_data)
+static void db_watched_record_fini_fetcher(TDB_DATA key,
+					   TDB_DATA backend_value,
+					   void *private_data)
 {
-	struct dbwrap_watched_add_watcher_state *state = private_data;
-	size_t num_watchers = 0;
+	struct db_watched_record_fini_state *state =
+		(struct db_watched_record_fini_state *)private_data;
+	struct db_watched_record *wrec = state->wrec;
+	struct db_record *rec = wrec->rec;
+	TDB_DATA value = {};
 	bool ok;
+	size_t copy_size;
 
-	uint8_t num_watchers_buf[4];
-	uint8_t add_buf[DBWRAP_WATCHER_BUF_LENGTH];
-
-	TDB_DATA dbufs[4] = {
-		{
-			.dptr = num_watchers_buf,
-			.dsize = sizeof(num_watchers_buf),
-		},
-		{ 0 },		/* filled in with existing watchers */
-		{
-			.dptr = add_buf,
-			.dsize = sizeof(add_buf),
-		},
-		{ 0 },		/* filled in with existing data */
-	};
-
-	dbwrap_watcher_put(add_buf, &state->w);
+	/*
+	 * We're within dbwrap_parse_record()
+	 * and backend_value directly points into
+	 * the mmap'ed tdb, so we need to copy the
+	 * parts we require.
+	 */
 
-	ok = dbwrap_watch_rec_parse(
-		value, &dbufs[1].dptr, &num_watchers, &dbufs[3]);
+	ok = dbwrap_watch_rec_parse(backend_value, NULL, NULL, &value);
 	if (!ok) {
 		struct db_context *db = dbwrap_record_get_db(rec);
-		TDB_DATA key = dbwrap_record_get_key(rec);
 
-		dbwrap_watch_log_invalid_record(db, key, value);
+		dbwrap_watch_log_invalid_record(db, key, backend_value);
 
 		/* wipe invalid data */
-		num_watchers = 0;
-		dbufs[3] = (TDB_DATA) { .dptr = NULL, .dsize = 0 };
+		value = (TDB_DATA) { .dptr = NULL, .dsize = 0 };
 	}
 
-	dbufs[1].dsize = num_watchers * DBWRAP_WATCHER_BUF_LENGTH;
-
-	if (num_watchers >= UINT32_MAX) {
-		DBG_DEBUG("Can't handle %zu watchers\n",
-			  num_watchers+1);
-		state->status = NT_STATUS_INSUFFICIENT_RESOURCES;
-		return;
+	copy_size = MIN(rec->value.dsize, value.dsize);
+	if (copy_size != 0) {
+		/*
+		 * First reuse the buffer we already had
+		 * as much as we can.
+		 */
+		memcpy(rec->value.dptr, value.dptr, copy_size);
+		state->dbufs[state->num_dbufs++] = rec->value;
+		value.dsize -= copy_size;
+		value.dptr += copy_size;
 	}
 
-	num_watchers += 1;
-	SIVAL(num_watchers_buf, 0, num_watchers);
+	if (value.dsize != 0) {
+		uint8_t *p = NULL;
+
+		/*
+		 * There's still new data left
+		 * allocate it on callers stackframe
+		 */
+		p = talloc_memdup(state->frame, value.dptr, value.dsize);
+		if (p == NULL) {
+			DBG_WARNING("failed to allocate %zu bytes\n",
+				    value.dsize);
+			return;
+		}
+
+		state->dbufs[state->num_dbufs++] = (TDB_DATA) {
+			.dptr = p, .dsize = value.dsize,
+		};
+	}
 
-	state->status = dbwrap_record_storev(rec, dbufs, ARRAY_SIZE(dbufs), 0);
+	state->ok = true;
 }
 
-static int db_watched_subrec_destructor(struct db_watched_subrec *s)
+static void db_watched_record_fini(struct db_watched_record *wrec)
 {
-	struct dbwrap_watched_add_watcher_state state = { .w = s->added };
-	struct db_context *backend = dbwrap_record_get_db(s->subrec);
+	struct db_watched_record_fini_state state = { .wrec = wrec, };
+	struct db_context *backend = dbwrap_record_get_db(wrec->backend.rec);
+	struct db_record *rec = wrec->rec;
+	TDB_DATA key = dbwrap_record_get_key(wrec->backend.rec);
 	NTSTATUS status;
 
-	if (s->added.pid.pid == 0) {
-		return 0;
-	}
-
-	status = dbwrap_do_locked(
-		backend, s->subrec->key, dbwrap_watched_add_watcher, &state);
-	if (!NT_STATUS_IS_OK(status)) {
-		DBG_WARNING("dbwrap_do_locked failed: %s\n",
-			    nt_errstr(status));
-		return 0;
-	}
-	if (!NT_STATUS_IS_OK(state.status)) {
-		DBG_WARNING("dbwrap_watched_add_watcher failed: %s\n",
-			    nt_errstr(state.status));
-		return 0;
+	if (!wrec->force_fini_store) {
+		return;
 	}
-	return 0;
-}
 
-struct dbwrap_watched_subrec_wakeup_state {
-	struct messaging_context *msg_ctx;
-};
-static void dbwrap_watched_subrec_wakeup_fn(
-	struct db_record *rec,
-	TDB_DATA value,
-	void *private_data);
+	if (wrec->backend.initial_valid) {
+		if (rec->value.dsize != 0) {
+			state.dbufs[state.num_dbufs++] = rec->value;
+		}
+	} else {
+		/*
+		 * We need to fetch the current
+		 * value from the backend again,
+		 * which may need to allocate memory
+		 * on the provided stackframe.
+		 */
 
-struct dbwrap_watched_do_locked_state {
-	struct db_context *db;
-	void (*fn)(struct db_record *rec,
-		   TDB_DATA value,
-		   void *private_data);
-	void *private_data;
+		state.frame = talloc_stackframe();
 
-	struct db_watched_subrec subrec;
+		status = dbwrap_parse_record(backend, key,
+				db_watched_record_fini_fetcher, &state);
+		if (!NT_STATUS_IS_OK(status)) {
+			DBG_WARNING("dbwrap_parse_record failed: %s\n",
+				    nt_errstr(status));
+			TALLOC_FREE(state.frame);
+			return;
+		}
+		if (!state.ok) {
+			TALLOC_FREE(state.frame);
+			return;
+		}
+	}
 
 	/*
-	 * This contains the initial value we got
-	 * passed to dbwrap_watched_do_locked_fn()
-	 *
-	 * It's only used in order to pass it
-	 * to dbwrap_watched_subrec_wakeup_fn()
-	 * in dbwrap_watched_do_locked_{storev,delete}()
-	 *
-	 * It gets cleared after the first call to
-	 * dbwrap_watched_subrec_wakeup_fn() as we
-	 * only need to wakeup once per dbwrap_do_locked().
+	 * We don't want to wake up others just because
+	 * we added ourself as new watcher. But if we
+	 * removed outself from the first position
+	 * we need to alert the next one.
 	 */
-	TDB_DATA wakeup_value;
-
-	NTSTATUS status;
-};
-
-static NTSTATUS dbwrap_watched_do_locked_storev(
-	struct db_record *rec, const TDB_DATA *dbufs, int num_dbufs,
-	int flags)
-{
-	struct dbwrap_watched_do_locked_state *state = rec->private_data;
-	struct db_watched_subrec *subrec = &state->subrec;
-	struct db_watched_ctx *ctx = talloc_get_type_abort(
-		state->db->private_data, struct db_watched_ctx);
-	struct dbwrap_watched_subrec_wakeup_state wakeup_state = {
-		.msg_ctx = ctx->msg,
-	};
-	NTSTATUS status;
+	if (!wrec->removed_first) {
+		dbwrap_watched_watch_skip_alerting(rec);
+	}
 
-	/*
-	 * Wakeup only needs to happen once.
-	 * so we clear state->wakeup_value after the first run
-	 */
-	dbwrap_watched_subrec_wakeup_fn(rec, state->wakeup_value, &wakeup_state);
-	state->wakeup_value = (TDB_DATA) { .dsize = 0, };
+	status = dbwrap_watched_record_storev(wrec, state.dbufs, state.num_dbufs, 0);
+	TALLOC_FREE(state.frame);
+	if (!NT_STATUS_IS_OK(status)) {
+		DBG_WARNING("dbwrap_watched_record_storev failed: %s\n",
+			    nt_errstr(status));
+		return;
+	}
 
-	status = dbwrap_watched_subrec_storev(rec, subrec, dbufs, num_dbufs,
-					      flags);
-	return status;
+	return;
 }
 
-static NTSTATUS dbwrap_watched_do_locked_delete(struct db_record *rec)
+static int db_watched_record_destructor(struct db_watched_record *wrec)
 {
-	struct dbwrap_watched_do_locked_state *state = rec->private_data;
-	struct db_watched_subrec *subrec = &state->subrec;
+	struct db_record *rec = wrec->rec;
 	struct db_watched_ctx *ctx = talloc_get_type_abort(
-		state->db->private_data, struct db_watched_ctx);
-	struct dbwrap_watched_subrec_wakeup_state wakeup_state = {
-		.msg_ctx = ctx->msg,
-	};
-	NTSTATUS status;
+		rec->db->private_data, struct db_watched_ctx);
 
-	/*
-	 * Wakeup only needs to happen once.
-	 * so we clear state->wakeup_value after the first run
-	 */
-	dbwrap_watched_subrec_wakeup_fn(rec, state->wakeup_value, &wakeup_state);
-	state->wakeup_value = (TDB_DATA) { .dsize = 0, };
-
-	status = dbwrap_watched_subrec_delete(rec, subrec);
-	return status;
+	db_watched_record_fini(wrec);
+	TALLOC_FREE(wrec->backend.rec);
+	dbwrap_watched_trigger_wakeup(ctx->msg, &wrec->wakeup.watcher);
+	return 0;
 }
 
+struct dbwrap_watched_do_locked_state {
+	struct db_context *db;
+	struct messaging_context *msg_ctx;
+	struct db_watched_record *wrec;
+	struct db_record *rec;
+	void (*fn)(struct db_record *rec,
+		   TDB_DATA value,
+		   void *private_data);
+	void *private_data;
+};
+
 static void dbwrap_watched_do_locked_fn(
-	struct db_record *subrec,
-	TDB_DATA subrec_value,
+	struct db_record *backend_rec,
+	TDB_DATA backend_value,
 	void *private_data)
 {
 	struct dbwrap_watched_do_locked_state *state =
 		(struct dbwrap_watched_do_locked_state *)private_data;
-	TDB_DATA value = {0};
-	struct db_record rec = {
-		.db = state->db,


-- 
Samba Shared Repository



More information about the samba-cvs mailing list