Patchwork [BUG:2061] nfs: opendir/closedir for every readdir

login
register
Submitter Anand Avati
Date 2010-11-15 11:04:00
Message ID <20101115110400.GA20991@dev.gluster.com>
Download mbox | patch
Permalink /patch/5699/
State Accepted
Headers show

Comments

Anand Avati - 2010-11-15 11:04:00
Revert "nfs3: Unref & unbind dir fd with inode lock on EOF"

This reverts commit 4e6fb304ce41acbaf7c9ba67c06bf443e65082e8.

The above commit (which unbinds fds at EOF) does not fix the original
bug (1619) because a readdir from a second app could have already
started before the readdir_cbk of the first app's readdir reaches
NFS code. Hence the race still exists.

Performing extra unrefs when EOF is received is not a reliable way
of detecting that a client has performed a closedir (and to close
the fd ourselves). Neither is interpreting a 0 cookies a new opendir.
Clients can always use telldir/seekdir and hit EOFs twice.

Due to the way NFS3 protocol is designed, it is just not possible
for the server to reliably detect opendirs/closedirs performed by
the client and map the corresponding readdirs to the same dir fd
on the server side.

The only reliable way of fixing this is to perform opendir/closedir
at the cost of performance. Any optimization towards keeping dir fds
open attempting to map them with application's opendir/closedir will
either result in fd leaks or extra fd unrefs.

Signed-off-by: Anand V. Avati <avati@blackhole.gluster.com>
---
 libglusterfs/src/fd.c                 |   23 -----------------------
 libglusterfs/src/fd.h                 |    2 --
 xlators/nfs/server/src/nfs3-helpers.c |    2 +-
 xlators/nfs/server/src/nfs3.c         |   17 ++++-------------
 4 files changed, 5 insertions(+), 39 deletions(-)
Shehjar Tikoo - 2010-11-15 11:23:42
Reviewed OK.

Anand V. Avati wrote:
> Revert "nfs3: Unref & unbind dir fd with inode lock on EOF"
> 
> This reverts commit 4e6fb304ce41acbaf7c9ba67c06bf443e65082e8.
> 
> The above commit (which unbinds fds at EOF) does not fix the original
> bug (1619) because a readdir from a second app could have already
> started before the readdir_cbk of the first app's readdir reaches
> NFS code. Hence the race still exists.
> 
> Performing extra unrefs when EOF is received is not a reliable way
> of detecting that a client has performed a closedir (and to close
> the fd ourselves). Neither is interpreting a 0 cookies a new opendir.
> Clients can always use telldir/seekdir and hit EOFs twice.
> 
> Due to the way NFS3 protocol is designed, it is just not possible
> for the server to reliably detect opendirs/closedirs performed by
> the client and map the corresponding readdirs to the same dir fd
> on the server side.
> 
> The only reliable way of fixing this is to perform opendir/closedir
> at the cost of performance. Any optimization towards keeping dir fds
> open attempting to map them with application's opendir/closedir will
> either result in fd leaks or extra fd unrefs.
> 
> Signed-off-by: Anand V. Avati <avati@blackhole.gluster.com>
> ---
>  libglusterfs/src/fd.c                 |   23 -----------------------
>  libglusterfs/src/fd.h                 |    2 --
>  xlators/nfs/server/src/nfs3-helpers.c |    2 +-
>  xlators/nfs/server/src/nfs3.c         |   17 ++++-------------
>  4 files changed, 5 insertions(+), 39 deletions(-)
> 
> diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
> index e4e75d8..def1525 100644
> --- a/libglusterfs/src/fd.c
> +++ b/libglusterfs/src/fd.c
> @@ -500,29 +500,6 @@ fd_bind (fd_t *fd)
>  }
>  
>  
> -void
> -fd_unref_unbind (fd_t *fd)
> -{
> -        GF_ASSERT (fd->refcount);
> -
> -        LOCK (&fd->inode->lock);
> -        {
> -	        --fd->refcount;
> -                /* Better know what you're doing with this function
> -                 * because it does not do what fd_destroy does when
> -                 * refcount goes to 0.
> -                 * Make sure you only call this when you know there are
> -                 * pending refs on the fd.
> -                 */
> -                GF_ASSERT (fd->refcount);
> -		list_del_init (&fd->inode_list);
> -        }
> -        UNLOCK (&fd->inode->lock);
> -
> -        return;
> -}
> -
> -
>  fd_t *
>  fd_create (inode_t *inode, pid_t pid)
>  {
> diff --git a/libglusterfs/src/fd.h b/libglusterfs/src/fd.h
> index d3efeb9..d19b072 100644
> --- a/libglusterfs/src/fd.h
> +++ b/libglusterfs/src/fd.h
> @@ -169,6 +169,4 @@ _fd_ref (fd_t *fd);
>  void
>  fd_ctx_dump (fd_t *fd, char *prefix);
>  
> -extern void
> -fd_unref_unbind (fd_t *fd);
>  #endif /* _FD_H */
> diff --git a/xlators/nfs/server/src/nfs3-helpers.c b/xlators/nfs/server/src/nfs3-helpers.c
> index cff029d..8e4ca37 100644
> --- a/xlators/nfs/server/src/nfs3-helpers.c
> +++ b/xlators/nfs/server/src/nfs3-helpers.c
> @@ -1775,7 +1775,7 @@ nfs3_dir_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
>                  goto err;
>          }
>  
> -        cs->fd = fd_ref (fd);
> +        cs->fd = fd;     /* Gets unrefd when the call state is wiped. */
>          nfs3_set_inode_opened (cs->nfsx, cs->resolvedloc.inode);
>          gf_log (GF_NFS3, GF_LOG_TRACE, "FD_REF: %d", fd->refcount);
>          nfs3_call_resume (cs);
> diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c
> index 7fed84d..ce59275 100644
> --- a/xlators/nfs/server/src/nfs3.c
> +++ b/xlators/nfs/server/src/nfs3.c
> @@ -3889,17 +3889,6 @@ nfs3svc_readdir_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
>  
>          stat = NFS3_OK;
>  nfs3err:
> -
> -        /* On end-of-directory, unref the fd to have it removed from the cache
> -         * and also unbind it from the fd so that any subsequent request on the
> -         * on the directory do not get this fd when fd_lookup is called in
> -         * dir open resume path.
> -         */
> -        if (is_eof) {
> -                gf_log (GF_NFS3, GF_LOG_TRACE, "EOF REF: %d", cs->fd->refcount);
> -                fd_unref_unbind (cs->fd);
> -        }
> -
>          if (cs->maxcount == 0) {
>                  nfs3_log_readdir_res (nfs_rpcsvc_request_xid (cs->req), stat,
>                                        op_errno, (uintptr_t)cs->fd,
> @@ -3917,7 +3906,10 @@ nfs3err:
>                                       cs->maxcount, is_eof);
>          }
>  
> -        gf_log (GF_NFS3, GF_LOG_TRACE, "CS WIPE REF: %d", cs->fd->refcount);
> +        if (is_eof) {
> +                /* do nothing */
> +        }
> +
>          nfs3_call_state_wipe (cs);
>          return 0;
>  }
> @@ -3968,7 +3960,6 @@ err:
>           * so that next time the dir is read, we'll get any changed directory
>           * entries.
>           */
> -        fd_unref_unbind (cs->fd);
>          nfs3_call_state_wipe (cs);
>  ret:
>          return 0;
Harshavardhana - 2010-11-15 19:34:35
wouldn't this cause performance impact over large directory traversals?
small files?

On Mon, Nov 15, 2010 at 3:23 AM, Shehjar Tikoo <shehjart@gluster.com> wrote:

> Reviewed OK.
>
>
> Anand V. Avati wrote:
>
>> Revert "nfs3: Unref & unbind dir fd with inode lock on EOF"
>>
>> This reverts commit 4e6fb304ce41acbaf7c9ba67c06bf443e65082e8.
>>
>> The above commit (which unbinds fds at EOF) does not fix the original
>> bug (1619) because a readdir from a second app could have already
>> started before the readdir_cbk of the first app's readdir reaches
>> NFS code. Hence the race still exists.
>>
>> Performing extra unrefs when EOF is received is not a reliable way
>> of detecting that a client has performed a closedir (and to close
>> the fd ourselves). Neither is interpreting a 0 cookies a new opendir.
>> Clients can always use telldir/seekdir and hit EOFs twice.
>>
>> Due to the way NFS3 protocol is designed, it is just not possible
>> for the server to reliably detect opendirs/closedirs performed by
>> the client and map the corresponding readdirs to the same dir fd
>> on the server side.
>>
>> The only reliable way of fixing this is to perform opendir/closedir
>> at the cost of performance. Any optimization towards keeping dir fds
>> open attempting to map them with application's opendir/closedir will
>> either result in fd leaks or extra fd unrefs.
>>
>> Signed-off-by: Anand V. Avati <avati@blackhole.gluster.com>
>> ---
>>  libglusterfs/src/fd.c                 |   23 -----------------------
>>  libglusterfs/src/fd.h                 |    2 --
>>  xlators/nfs/server/src/nfs3-helpers.c |    2 +-
>>  xlators/nfs/server/src/nfs3.c         |   17 ++++-------------
>>  4 files changed, 5 insertions(+), 39 deletions(-)
>>
>> diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
>> index e4e75d8..def1525 100644
>> --- a/libglusterfs/src/fd.c
>> +++ b/libglusterfs/src/fd.c
>> @@ -500,29 +500,6 @@ fd_bind (fd_t *fd)
>>  }
>>  -void
>> -fd_unref_unbind (fd_t *fd)
>> -{
>> -        GF_ASSERT (fd->refcount);
>> -
>> -        LOCK (&fd->inode->lock);
>> -        {
>> -               --fd->refcount;
>> -                /* Better know what you're doing with this function
>> -                 * because it does not do what fd_destroy does when
>> -                 * refcount goes to 0.
>> -                 * Make sure you only call this when you know there are
>> -                 * pending refs on the fd.
>> -                 */
>> -                GF_ASSERT (fd->refcount);
>> -               list_del_init (&fd->inode_list);
>> -        }
>> -        UNLOCK (&fd->inode->lock);
>> -
>> -        return;
>> -}
>> -
>> -
>>  fd_t *
>>  fd_create (inode_t *inode, pid_t pid)
>>  {
>> diff --git a/libglusterfs/src/fd.h b/libglusterfs/src/fd.h
>> index d3efeb9..d19b072 100644
>> --- a/libglusterfs/src/fd.h
>> +++ b/libglusterfs/src/fd.h
>> @@ -169,6 +169,4 @@ _fd_ref (fd_t *fd);
>>  void
>>  fd_ctx_dump (fd_t *fd, char *prefix);
>>  -extern void
>> -fd_unref_unbind (fd_t *fd);
>>  #endif /* _FD_H */
>> diff --git a/xlators/nfs/server/src/nfs3-helpers.c
>> b/xlators/nfs/server/src/nfs3-helpers.c
>> index cff029d..8e4ca37 100644
>> --- a/xlators/nfs/server/src/nfs3-helpers.c
>> +++ b/xlators/nfs/server/src/nfs3-helpers.c
>> @@ -1775,7 +1775,7 @@ nfs3_dir_open_cbk (call_frame_t *frame, void
>> *cookie, xlator_t *this,
>>                 goto err;
>>         }
>>  -        cs->fd = fd_ref (fd);
>> +        cs->fd = fd;     /* Gets unrefd when the call state is wiped. */
>>         nfs3_set_inode_opened (cs->nfsx, cs->resolvedloc.inode);
>>         gf_log (GF_NFS3, GF_LOG_TRACE, "FD_REF: %d", fd->refcount);
>>         nfs3_call_resume (cs);
>> diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c
>> index 7fed84d..ce59275 100644
>> --- a/xlators/nfs/server/src/nfs3.c
>> +++ b/xlators/nfs/server/src/nfs3.c
>> @@ -3889,17 +3889,6 @@ nfs3svc_readdir_fstat_cbk (call_frame_t *frame,
>> void *cookie, xlator_t *this,
>>          stat = NFS3_OK;
>>  nfs3err:
>> -
>> -        /* On end-of-directory, unref the fd to have it removed from the
>> cache
>> -         * and also unbind it from the fd so that any subsequent request
>> on the
>> -         * on the directory do not get this fd when fd_lookup is called
>> in
>> -         * dir open resume path.
>> -         */
>> -        if (is_eof) {
>> -                gf_log (GF_NFS3, GF_LOG_TRACE, "EOF REF: %d",
>> cs->fd->refcount);
>> -                fd_unref_unbind (cs->fd);
>> -        }
>> -
>>         if (cs->maxcount == 0) {
>>                 nfs3_log_readdir_res (nfs_rpcsvc_request_xid (cs->req),
>> stat,
>>                                       op_errno, (uintptr_t)cs->fd,
>> @@ -3917,7 +3906,10 @@ nfs3err:
>>                                      cs->maxcount, is_eof);
>>         }
>>  -        gf_log (GF_NFS3, GF_LOG_TRACE, "CS WIPE REF: %d",
>> cs->fd->refcount);
>> +        if (is_eof) {
>> +                /* do nothing */
>> +        }
>> +
>>         nfs3_call_state_wipe (cs);
>>         return 0;
>>  }
>> @@ -3968,7 +3960,6 @@ err:
>>          * so that next time the dir is read, we'll get any changed
>> directory
>>          * entries.
>>          */
>> -        fd_unref_unbind (cs->fd);
>>         nfs3_call_state_wipe (cs);
>>  ret:
>>         return 0;
>>
>
> _______________________________________________
> glusterfs mailing list
> glusterfs@dev.gluster.com
> http://dev.gluster.com/cgi-bin/mailman/listinfo/glusterfs
>
Shehjar Tikoo - 2010-11-16 05:04:02
Harshavardhana wrote:
> wouldn't this cause performance impact over large directory traversals? 
> small files? 

It does. But consider it a quick fix if you will. The crash is pretty 
consistent. I can come back to it later.

> 
> On Mon, Nov 15, 2010 at 3:23 AM, Shehjar Tikoo <shehjart@gluster.com 
> <mailto:shehjart@gluster.com>> wrote:
> 
>     Reviewed OK.
> 
> 
>     Anand V. Avati wrote:
> 
>         Revert "nfs3: Unref & unbind dir fd with inode lock on EOF"
> 
>         This reverts commit 4e6fb304ce41acbaf7c9ba67c06bf443e65082e8.
> 
>         The above commit (which unbinds fds at EOF) does not fix the
>         original
>         bug (1619) because a readdir from a second app could have already
>         started before the readdir_cbk of the first app's readdir reaches
>         NFS code. Hence the race still exists.
> 
>         Performing extra unrefs when EOF is received is not a reliable way
>         of detecting that a client has performed a closedir (and to close
>         the fd ourselves). Neither is interpreting a 0 cookies a new
>         opendir.
>         Clients can always use telldir/seekdir and hit EOFs twice.
> 
>         Due to the way NFS3 protocol is designed, it is just not possible
>         for the server to reliably detect opendirs/closedirs performed by
>         the client and map the corresponding readdirs to the same dir fd
>         on the server side.
> 
>         The only reliable way of fixing this is to perform opendir/closedir
>         at the cost of performance. Any optimization towards keeping dir fds
>         open attempting to map them with application's opendir/closedir will
>         either result in fd leaks or extra fd unrefs.
> 
>         Signed-off-by: Anand V. Avati <avati@blackhole.gluster.com
>         <mailto:avati@blackhole.gluster.com>>
>         ---
>          libglusterfs/src/fd.c                 |   23
>         -----------------------
>          libglusterfs/src/fd.h                 |    2 --
>          xlators/nfs/server/src/nfs3-helpers.c |    2 +-
>          xlators/nfs/server/src/nfs3.c         |   17 ++++-------------
>          4 files changed, 5 insertions(+), 39 deletions(-)
> 
>         diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
>         index e4e75d8..def1525 100644
>         --- a/libglusterfs/src/fd.c
>         +++ b/libglusterfs/src/fd.c
>         @@ -500,29 +500,6 @@ fd_bind (fd_t *fd)
>          }
>          -void
>         -fd_unref_unbind (fd_t *fd)
>         -{
>         -        GF_ASSERT (fd->refcount);
>         -
>         -        LOCK (&fd->inode->lock);
>         -        {
>         -               --fd->refcount;
>         -                /* Better know what you're doing with this function
>         -                 * because it does not do what fd_destroy does when
>         -                 * refcount goes to 0.
>         -                 * Make sure you only call this when you know
>         there are
>         -                 * pending refs on the fd.
>         -                 */
>         -                GF_ASSERT (fd->refcount);
>         -               list_del_init (&fd->inode_list);
>         -        }
>         -        UNLOCK (&fd->inode->lock);
>         -
>         -        return;
>         -}
>         -
>         -
>          fd_t *
>          fd_create (inode_t *inode, pid_t pid)
>          {
>         diff --git a/libglusterfs/src/fd.h b/libglusterfs/src/fd.h
>         index d3efeb9..d19b072 100644
>         --- a/libglusterfs/src/fd.h
>         +++ b/libglusterfs/src/fd.h
>         @@ -169,6 +169,4 @@ _fd_ref (fd_t *fd);
>          void
>          fd_ctx_dump (fd_t *fd, char *prefix);
>          -extern void
>         -fd_unref_unbind (fd_t *fd);
>          #endif /* _FD_H */
>         diff --git a/xlators/nfs/server/src/nfs3-helpers.c
>         b/xlators/nfs/server/src/nfs3-helpers.c
>         index cff029d..8e4ca37 100644
>         --- a/xlators/nfs/server/src/nfs3-helpers.c
>         +++ b/xlators/nfs/server/src/nfs3-helpers.c
>         @@ -1775,7 +1775,7 @@ nfs3_dir_open_cbk (call_frame_t *frame,
>         void *cookie, xlator_t *this,
>                         goto err;
>                 }
>          -        cs->fd = fd_ref (fd);
>         +        cs->fd = fd;     /* Gets unrefd when the call state is
>         wiped. */
>                 nfs3_set_inode_opened (cs->nfsx, cs->resolvedloc.inode);
>                 gf_log (GF_NFS3, GF_LOG_TRACE, "FD_REF: %d", fd->refcount);
>                 nfs3_call_resume (cs);
>         diff --git a/xlators/nfs/server/src/nfs3.c
>         b/xlators/nfs/server/src/nfs3.c
>         index 7fed84d..ce59275 100644
>         --- a/xlators/nfs/server/src/nfs3.c
>         +++ b/xlators/nfs/server/src/nfs3.c
>         @@ -3889,17 +3889,6 @@ nfs3svc_readdir_fstat_cbk (call_frame_t
>         *frame, void *cookie, xlator_t *this,
>                  stat = NFS3_OK;
>          nfs3err:
>         -
>         -        /* On end-of-directory, unref the fd to have it removed
>         from the cache
>         -         * and also unbind it from the fd so that any
>         subsequent request on the
>         -         * on the directory do not get this fd when fd_lookup
>         is called in
>         -         * dir open resume path.
>         -         */
>         -        if (is_eof) {
>         -                gf_log (GF_NFS3, GF_LOG_TRACE, "EOF REF: %d",
>         cs->fd->refcount);
>         -                fd_unref_unbind (cs->fd);
>         -        }
>         -
>                 if (cs->maxcount == 0) {
>                         nfs3_log_readdir_res (nfs_rpcsvc_request_xid
>         (cs->req), stat,
>                                               op_errno, (uintptr_t)cs->fd,
>         @@ -3917,7 +3906,10 @@ nfs3err:
>                                              cs->maxcount, is_eof);
>                 }
>          -        gf_log (GF_NFS3, GF_LOG_TRACE, "CS WIPE REF: %d",
>         cs->fd->refcount);
>         +        if (is_eof) {
>         +                /* do nothing */
>         +        }
>         +
>                 nfs3_call_state_wipe (cs);
>                 return 0;
>          }
>         @@ -3968,7 +3960,6 @@ err:
>                  * so that next time the dir is read, we'll get any
>         changed directory
>                  * entries.
>                  */
>         -        fd_unref_unbind (cs->fd);
>                 nfs3_call_state_wipe (cs);
>          ret:
>                 return 0;
> 
> 
>     _______________________________________________
>     glusterfs mailing list
>     glusterfs@dev.gluster.com <mailto:glusterfs@dev.gluster.com>
>     http://dev.gluster.com/cgi-bin/mailman/listinfo/glusterfs
> 
>

Patch

diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
index e4e75d8..def1525 100644
--- a/libglusterfs/src/fd.c
+++ b/libglusterfs/src/fd.c
@@ -500,29 +500,6 @@  fd_bind (fd_t *fd)
 }
 
 
-void
-fd_unref_unbind (fd_t *fd)
-{
-        GF_ASSERT (fd->refcount);
-
-        LOCK (&fd->inode->lock);
-        {
-	        --fd->refcount;
-                /* Better know what you're doing with this function
-                 * because it does not do what fd_destroy does when
-                 * refcount goes to 0.
-                 * Make sure you only call this when you know there are
-                 * pending refs on the fd.
-                 */
-                GF_ASSERT (fd->refcount);
-		list_del_init (&fd->inode_list);
-        }
-        UNLOCK (&fd->inode->lock);
-
-        return;
-}
-
-
 fd_t *
 fd_create (inode_t *inode, pid_t pid)
 {
diff --git a/libglusterfs/src/fd.h b/libglusterfs/src/fd.h
index d3efeb9..d19b072 100644
--- a/libglusterfs/src/fd.h
+++ b/libglusterfs/src/fd.h
@@ -169,6 +169,4 @@  _fd_ref (fd_t *fd);
 void
 fd_ctx_dump (fd_t *fd, char *prefix);
 
-extern void
-fd_unref_unbind (fd_t *fd);
 #endif /* _FD_H */
diff --git a/xlators/nfs/server/src/nfs3-helpers.c b/xlators/nfs/server/src/nfs3-helpers.c
index cff029d..8e4ca37 100644
--- a/xlators/nfs/server/src/nfs3-helpers.c
+++ b/xlators/nfs/server/src/nfs3-helpers.c
@@ -1775,7 +1775,7 @@  nfs3_dir_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
                 goto err;
         }
 
-        cs->fd = fd_ref (fd);
+        cs->fd = fd;     /* Gets unrefd when the call state is wiped. */
         nfs3_set_inode_opened (cs->nfsx, cs->resolvedloc.inode);
         gf_log (GF_NFS3, GF_LOG_TRACE, "FD_REF: %d", fd->refcount);
         nfs3_call_resume (cs);
diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c
index 7fed84d..ce59275 100644
--- a/xlators/nfs/server/src/nfs3.c
+++ b/xlators/nfs/server/src/nfs3.c
@@ -3889,17 +3889,6 @@  nfs3svc_readdir_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
 
         stat = NFS3_OK;
 nfs3err:
-
-        /* On end-of-directory, unref the fd to have it removed from the cache
-         * and also unbind it from the fd so that any subsequent request on the
-         * on the directory do not get this fd when fd_lookup is called in
-         * dir open resume path.
-         */
-        if (is_eof) {
-                gf_log (GF_NFS3, GF_LOG_TRACE, "EOF REF: %d", cs->fd->refcount);
-                fd_unref_unbind (cs->fd);
-        }
-
         if (cs->maxcount == 0) {
                 nfs3_log_readdir_res (nfs_rpcsvc_request_xid (cs->req), stat,
                                       op_errno, (uintptr_t)cs->fd,
@@ -3917,7 +3906,10 @@  nfs3err:
                                      cs->maxcount, is_eof);
         }
 
-        gf_log (GF_NFS3, GF_LOG_TRACE, "CS WIPE REF: %d", cs->fd->refcount);
+        if (is_eof) {
+                /* do nothing */
+        }
+
         nfs3_call_state_wipe (cs);
         return 0;
 }
@@ -3968,7 +3960,6 @@  err:
          * so that next time the dir is read, we'll get any changed directory
          * entries.
          */
-        fd_unref_unbind (cs->fd);
         nfs3_call_state_wipe (cs);
 ret:
         return 0;