winbind cache issue for NDR entries

Shilpa K shilpa.krishnareddy at gmail.com
Sat Jan 30 02:38:53 UTC 2021


Thanks Jeremy. The patch you provided works and I only had to do a slight
modification to it. Below is the updated patch.

Thanks,
Shilpa

diff --git a/source3/winbindd/winbindd_cache.c
b/source3/winbindd/winbindd_cache.c
index 0e51ae5..6ee733a 100644
--- a/source3/winbindd/winbindd_cache.c
+++ b/source3/winbindd/winbindd_cache.c
@@ -132,7 +132,7 @@ static struct winbind_cache *get_cache(struct
winbindd_domain *domain)
                domain->backend = &sam_passdb_methods;
        }

-       if ( !domain->initialized ) {
+       if ( !domain->initialized && !domain->online ) {
                /* We do not need a connection to an RW DC for cache
operation */
                init_dc_connection(domain, false);
        }
@@ -509,6 +509,40 @@ static bool store_cache_seqnum( struct winbindd_domain
*domain )
                                   domain->last_seq_check);
 }

+void force_refresh_domain_sequence_number(struct winbindd_domain *domain)
+{
+       NTSTATUS status;
+
+       /* domain->backend to could be NULL when this function is called
+        * from set_domain_online. make sure to reinitialize the backend */
+        get_cache(domain);
+
+       if (winbindd_can_contact_domain(domain)) {
+               status = domain->backend->sequence_number(domain,
+                                                 &domain->sequence_number);
+       } else {
+               /* just use the current time */
+               status = NT_STATUS_OK;
+               domain->sequence_number = time(NULL);
+       }
+
+       if (!NT_STATUS_IS_OK(status)) {
+               DEBUG(10, ("failed with %s\n", nt_errstr(status)));
+               domain->sequence_number = DOM_SEQUENCE_NONE;
+       }
+
+       domain->last_status = status;
+       domain->last_seq_check = time(NULL);
+
+       /* save the new sequence number in the cache */
+       store_cache_seqnum(domain);
+
+       DEBUG(10, ("%s seq number is now %"PRIu32"\n",
+                  domain->name, domain->sequence_number));
+
+       return;
+}
+
 /*
   refresh the domain sequence number. If force is true
   then always refresh it, no matter how recently we fetched it
@@ -554,34 +588,7 @@ static void refresh_sequence_number(struct
winbindd_domain *domain, bool force)
                goto done;
        }

-       /* important! make sure that we know if this is a native
-          mode domain or not.  And that we can contact it. */
-
-       if ( winbindd_can_contact_domain( domain ) ) {
-               status = domain->backend->sequence_number(domain,
-
&domain->sequence_number);
-       } else {
-               /* just use the current time */
-               status = NT_STATUS_OK;
-               domain->sequence_number = time(NULL);
-       }
-
-
-       /* the above call could have set our domain->backend to NULL when
-        * coming from offline to online mode, make sure to reinitialize the
-        * backend - Guenther */
-       get_cache( domain );
-
-       if (!NT_STATUS_IS_OK(status)) {
-               DEBUG(10,("refresh_sequence_number: failed with %s\n",
nt_errstr(status)));
-               domain->sequence_number = DOM_SEQUENCE_NONE;
-       }
-
-       domain->last_status = status;
-       domain->last_seq_check = time(NULL);
-
-       /* save the new sequence number in the cache */
-       store_cache_seqnum( domain );
+       force_refresh_domain_sequence_number(domain);

 done:
        DEBUG(10, ("refresh_sequence_number: %s seq number is now %d\n",
@@ -5084,7 +5091,8 @@ bool wcache_fetch_ndr(TALLOC_CTX *mem_ctx, struct
winbindd_domain *domain,
                        goto fail;
                }
                entry_seqnum = IVAL(data.dptr, 0);
-               if (entry_seqnum != dom_seqnum) {
+
+               if (wcache_server_down(domain) || (entry_seqnum !=
dom_seqnum)) {
                        DEBUG(10, ("Entry has wrong sequence number: %d\n",
                                   (int)entry_seqnum));
                        goto fail;


diff --git a/source3/winbindd/winbindd_cm.c b/source3/winbindd/winbindd_cm.c
index e0466da..faf1a62 100644
--- a/source3/winbindd/winbindd_cm.c
+++ b/source3/winbindd/winbindd_cm.c
@@ -552,6 +552,7 @@ static void set_domain_online(struct winbindd_domain
*domain)
                             MSG_WINBIND_FAILED_TO_GO_ONLINE, NULL);

        domain->online = True;
+       force_refresh_domain_sequence_number(domain);

        /* Send a message to the parent that the domain is online. */
        if (parent_pid > 1 && !domain->internal) {

diff --git a/source3/winbindd/winbindd_proto.h
b/source3/winbindd/winbindd_proto.h
index 55d5337..cc61ac0 100644
--- a/source3/winbindd/winbindd_proto.h
+++ b/source3/winbindd/winbindd_proto.h
@@ -56,7 +56,7 @@ NTSTATUS rpc_lookup_sids(TALLOC_CTX *mem_ctx,
                         struct lsa_TransNameArray **pnames);

 /* The following definitions come from winbindd/winbindd_cache.c  */
-
+void force_refresh_domain_sequence_number(struct winbindd_domain *domain);
 NTSTATUS wcache_cached_creds_exist(struct winbindd_domain *domain, const
struct dom_sid *sid);
 NTSTATUS wcache_get_creds(struct winbindd_domain *domain,
                          TALLOC_CTX *mem_ctx,


On Sat, Jan 30, 2021 at 2:35 AM Jeremy Allison <jra at samba.org> wrote:

> On Fri, Jan 29, 2021 at 07:39:40PM +0530, Shilpa K via samba-technical
> wrote:
> >Hello,
> >
> >We had a customer report that the users were not able to login for about
> >30minutes and the problem cleared itself in almost about 30minutes. They
> >are using Samba as a member server in a domain which has 2 way trust with
> >another domain (say ABC.COM). Upon investigation, we found that there
> was a
> >problem with trusted domain DCs for a very short duration as per the event
> >log on the DC of the primary domain. This problem seems to have been
> >cleared away after a short duration. Around the same time, a user
> belonging
> >to a trusted domain mapped Samba share and encountered a problem. At this
> >time, looks like NDR cache entry for trusted domain group "Domain Users"
> >was added in winbindd_cache.tdb to indicate that there was a lookup
> problem
> >and the status NT_STATUS_TRUSTED_DOMAIN_FAILURE was stored as part of this
> >entry. Once the issue with trusted domain DC was cleared and the domain
> was
> >back online, when users tried to login, PAM_AUTH was successful for the
> >users but getpwnam failed while looking up SID for "Domain Users". This
> >failure was returned from the entry in the winbindd_cache.tdb as
> >wcache_fetch_ndr() succeeded for this entry. Due to this, users belonging
> >to the trusted domain were not able to login. Once the cache was expired,
> >getpwnam succeeded for trusted domain users and the shares could be
> mapped.
> >In order to resolve this issue, should we not refresh the sequence number
> >when the domain goes online? Btw, we are using "winbind cache time =
> 1800".
>
> Yep, looks like we should add a call to force a refresh of the
> sequence number in the cache here:
>
> source3/winbindd/winbindd_cm.c:set_domain_online()
>
>   538
>   539         domain->online = True;
>   540
>
> Add a force_refresh_domain_sequence_number(domain) call above.
>
> Here is a (raw, untested) patch that implements this.
>
> Any chance you can test this for me ?
>
> Jeremy.
>


More information about the samba-technical mailing list