You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
fs: provide rcu-walk aware permission i_ops
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
This commit is contained in:
@@ -47,8 +47,8 @@ ata *);
|
||||
void * (*follow_link) (struct dentry *, struct nameidata *);
|
||||
void (*put_link) (struct dentry *, struct nameidata *, void *);
|
||||
void (*truncate) (struct inode *);
|
||||
int (*permission) (struct inode *, int, struct nameidata *);
|
||||
int (*check_acl)(struct inode *, int);
|
||||
int (*permission) (struct inode *, int, unsigned int);
|
||||
int (*check_acl)(struct inode *, int, unsigned int);
|
||||
int (*setattr) (struct dentry *, struct iattr *);
|
||||
int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
|
||||
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
|
||||
@@ -76,7 +76,7 @@ follow_link: no
|
||||
put_link: no
|
||||
truncate: yes (see below)
|
||||
setattr: yes
|
||||
permission: no
|
||||
permission: no (may not block if called in rcu-walk mode)
|
||||
check_acl: no
|
||||
getattr: no
|
||||
setxattr: yes
|
||||
|
||||
@@ -316,11 +316,9 @@ The detailed design for rcu-walk is like this:
|
||||
|
||||
The cases where rcu-walk cannot continue are:
|
||||
* NULL dentry (ie. any uncached path element)
|
||||
* parent with d_inode->i_op->permission or ACLs
|
||||
* Following links
|
||||
|
||||
In future patches, permission checks become rcu-walk aware. It may be possible
|
||||
eventually to make following links rcu-walk aware.
|
||||
It may be possible eventually to make following links rcu-walk aware.
|
||||
|
||||
Uncached path elements will always require dropping to ref-walk mode, at the
|
||||
very least because i_mutex needs to be grabbed, and objects allocated.
|
||||
@@ -336,9 +334,49 @@ or stored into. The result is massive improvements in performance and
|
||||
scalability of path resolution.
|
||||
|
||||
|
||||
Interesting statistics
|
||||
======================
|
||||
|
||||
The following table gives rcu lookup statistics for a few simple workloads
|
||||
(2s12c24t Westmere, debian non-graphical system). Ungraceful are attempts to
|
||||
drop rcu that fail due to d_seq failure and requiring the entire path lookup
|
||||
again. Other cases are successful rcu-drops that are required before the final
|
||||
element, nodentry for missing dentry, revalidate for filesystem revalidate
|
||||
routine requiring rcu drop, permission for permission check requiring drop,
|
||||
and link for symlink traversal requiring drop.
|
||||
|
||||
rcu-lookups restart nodentry link revalidate permission
|
||||
bootup 47121 0 4624 1010 10283 7852
|
||||
dbench 25386793 0 6778659(26.7%) 55 549 1156
|
||||
kbuild 2696672 10 64442(2.3%) 108764(4.0%) 1 1590
|
||||
git diff 39605 0 28 2 0 106
|
||||
vfstest 24185492 4945 708725(2.9%) 1076136(4.4%) 0 2651
|
||||
|
||||
What this shows is that failed rcu-walk lookups, ie. ones that are restarted
|
||||
entirely with ref-walk, are quite rare. Even the "vfstest" case which
|
||||
specifically has concurrent renames/mkdir/rmdir/ creat/unlink/etc to excercise
|
||||
such races is not showing a huge amount of restarts.
|
||||
|
||||
Dropping from rcu-walk to ref-walk mean that we have encountered a dentry where
|
||||
the reference count needs to be taken for some reason. This is either because
|
||||
we have reached the target of the path walk, or because we have encountered a
|
||||
condition that can't be resolved in rcu-walk mode. Ideally, we drop rcu-walk
|
||||
only when we have reached the target dentry, so the other statistics show where
|
||||
this does not happen.
|
||||
|
||||
Note that a graceful drop from rcu-walk mode due to something such as the
|
||||
dentry not existing (which can be common) is not necessarily a failure of
|
||||
rcu-walk scheme, because some elements of the path may have been walked in
|
||||
rcu-walk mode. The further we get from common path elements (such as cwd or
|
||||
root), the less contended the dentry is likely to be. The closer we are to
|
||||
common path elements, the more likely they will exist in dentry cache.
|
||||
|
||||
|
||||
Papers and other documentation on dcache locking
|
||||
================================================
|
||||
|
||||
1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124).
|
||||
|
||||
2. http://lse.sourceforge.net/locking/dcache/dcache.html
|
||||
|
||||
|
||||
|
||||
@@ -379,4 +379,9 @@ where possible.
|
||||
the filesystem provides it), which requires dropping out of rcu-walk mode. This
|
||||
may now be called in rcu-walk mode (nd->flags & LOOKUP_RCU). -ECHILD should be
|
||||
returned if the filesystem cannot handle rcu-walk. See
|
||||
Documentation/filesystems/vfs.txt for more details.
|
||||
|
||||
permission and check_acl are inode permission checks that are called
|
||||
on many or all directory inodes on the way down a path walk (to check for
|
||||
exec permission). These must now be rcu-walk aware (flags & IPERM_RCU). See
|
||||
Documentation/filesystems/vfs.txt for more details.
|
||||
|
||||
@@ -325,7 +325,8 @@ struct inode_operations {
|
||||
void * (*follow_link) (struct dentry *, struct nameidata *);
|
||||
void (*put_link) (struct dentry *, struct nameidata *, void *);
|
||||
void (*truncate) (struct inode *);
|
||||
int (*permission) (struct inode *, int, struct nameidata *);
|
||||
int (*permission) (struct inode *, int, unsigned int);
|
||||
int (*check_acl)(struct inode *, int, unsigned int);
|
||||
int (*setattr) (struct dentry *, struct iattr *);
|
||||
int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
|
||||
int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
|
||||
@@ -414,6 +415,13 @@ otherwise noted.
|
||||
permission: called by the VFS to check for access rights on a POSIX-like
|
||||
filesystem.
|
||||
|
||||
May be called in rcu-walk mode (flags & IPERM_RCU). If in rcu-walk
|
||||
mode, the filesystem must check the permission without blocking or
|
||||
storing to the inode.
|
||||
|
||||
If a situation is encountered that rcu-walk cannot handle, return
|
||||
-ECHILD and it will be called again in ref-walk mode.
|
||||
|
||||
setattr: called by the VFS to set attributes for a file. This method
|
||||
is called by chmod(2) and related system calls.
|
||||
|
||||
|
||||
@@ -407,11 +407,14 @@ smb_file_release(struct inode *inode, struct file * file)
|
||||
* privileges, so we need our own check for this.
|
||||
*/
|
||||
static int
|
||||
smb_file_permission(struct inode *inode, int mask)
|
||||
smb_file_permission(struct inode *inode, int mask, unsigned int flags)
|
||||
{
|
||||
int mode = inode->i_mode;
|
||||
int error = 0;
|
||||
|
||||
if (flags & IPERM_FLAG_RCU)
|
||||
return -ECHILD;
|
||||
|
||||
VERBOSE("mode=%x, mask=%x\n", mode, mask);
|
||||
|
||||
/* Look at user permissions */
|
||||
|
||||
+4
-1
@@ -91,11 +91,14 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
|
||||
return acl;
|
||||
}
|
||||
|
||||
int v9fs_check_acl(struct inode *inode, int mask)
|
||||
int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
|
||||
{
|
||||
struct posix_acl *acl;
|
||||
struct v9fs_session_info *v9ses;
|
||||
|
||||
if (flags & IPERM_FLAG_RCU)
|
||||
return -ECHILD;
|
||||
|
||||
v9ses = v9fs_inode2v9ses(inode);
|
||||
if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
|
||||
/*
|
||||
|
||||
+1
-1
@@ -16,7 +16,7 @@
|
||||
|
||||
#ifdef CONFIG_9P_FS_POSIX_ACL
|
||||
extern int v9fs_get_acl(struct inode *, struct p9_fid *);
|
||||
extern int v9fs_check_acl(struct inode *inode, int mask);
|
||||
extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags);
|
||||
extern int v9fs_acl_chmod(struct dentry *);
|
||||
extern int v9fs_set_create_acl(struct dentry *,
|
||||
struct posix_acl *, struct posix_acl *);
|
||||
|
||||
+1
-1
@@ -624,7 +624,7 @@ extern void afs_clear_permits(struct afs_vnode *);
|
||||
extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
|
||||
extern void afs_zap_permits(struct rcu_head *);
|
||||
extern struct key *afs_request_key(struct afs_cell *);
|
||||
extern int afs_permission(struct inode *, int);
|
||||
extern int afs_permission(struct inode *, int, unsigned int);
|
||||
|
||||
/*
|
||||
* server.c
|
||||
|
||||
+5
-2
@@ -285,13 +285,16 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
|
||||
* - AFS ACLs are attached to directories only, and a file is controlled by its
|
||||
* parent directory's ACL
|
||||
*/
|
||||
int afs_permission(struct inode *inode, int mask)
|
||||
int afs_permission(struct inode *inode, int mask, unsigned int flags)
|
||||
{
|
||||
struct afs_vnode *vnode = AFS_FS_I(inode);
|
||||
afs_access_t uninitialized_var(access);
|
||||
struct key *key;
|
||||
int ret;
|
||||
|
||||
if (flags & IPERM_FLAG_RCU)
|
||||
return -ECHILD;
|
||||
|
||||
_enter("{{%x:%u},%lx},%x,",
|
||||
vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
|
||||
|
||||
@@ -347,7 +350,7 @@ int afs_permission(struct inode *inode, int mask)
|
||||
}
|
||||
|
||||
key_put(key);
|
||||
ret = generic_permission(inode, mask, NULL);
|
||||
ret = generic_permission(inode, mask, flags, NULL);
|
||||
_leave(" = %d", ret);
|
||||
return ret;
|
||||
|
||||
|
||||
+4
-1
@@ -229,8 +229,11 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static int bad_inode_permission(struct inode *inode, int mask)
|
||||
static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags)
|
||||
{
|
||||
if (flags & IPERM_FLAG_RCU)
|
||||
return -ECHILD;
|
||||
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
|
||||
+4
-2
@@ -185,13 +185,15 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_check_acl(struct inode *inode, int mask)
|
||||
int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags)
|
||||
{
|
||||
struct posix_acl *acl;
|
||||
int error = -EAGAIN;
|
||||
|
||||
acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
|
||||
if (flags & IPERM_FLAG_RCU)
|
||||
return -ECHILD;
|
||||
|
||||
acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
|
||||
if (IS_ERR(acl))
|
||||
return PTR_ERR(acl);
|
||||
if (acl) {
|
||||
|
||||
+1
-1
@@ -2544,7 +2544,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait);
|
||||
|
||||
/* acl.c */
|
||||
#ifdef CONFIG_BTRFS_FS_POSIX_ACL
|
||||
int btrfs_check_acl(struct inode *inode, int mask);
|
||||
int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags);
|
||||
#else
|
||||
#define btrfs_check_acl NULL
|
||||
#endif
|
||||
|
||||
+5
-2
@@ -7211,11 +7211,14 @@ static int btrfs_set_page_dirty(struct page *page)
|
||||
return __set_page_dirty_nobuffers(page);
|
||||
}
|
||||
|
||||
static int btrfs_permission(struct inode *inode, int mask)
|
||||
static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
|
||||
{
|
||||
if (flags & IPERM_FLAG_RCU)
|
||||
return -ECHILD;
|
||||
|
||||
if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
|
||||
return -EACCES;
|
||||
return generic_permission(inode, mask, btrfs_check_acl);
|
||||
return generic_permission(inode, mask, flags, btrfs_check_acl);
|
||||
}
|
||||
|
||||
static const struct inode_operations btrfs_dir_inode_operations = {
|
||||
|
||||
+8
-3
@@ -1781,12 +1781,17 @@ int ceph_do_getattr(struct inode *inode, int mask)
|
||||
* Check inode permissions. We verify we have a valid value for
|
||||
* the AUTH cap, then call the generic handler.
|
||||
*/
|
||||
int ceph_permission(struct inode *inode, int mask)
|
||||
int ceph_permission(struct inode *inode, int mask, unsigned int flags)
|
||||
{
|
||||
int err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
|
||||
int err;
|
||||
|
||||
if (flags & IPERM_FLAG_RCU)
|
||||
return -ECHILD;
|
||||
|
||||
err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
|
||||
|
||||
if (!err)
|
||||
err = generic_permission(inode, mask, NULL);
|
||||
err = generic_permission(inode, mask, flags, NULL);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
+1
-1
@@ -665,7 +665,7 @@ extern void ceph_queue_invalidate(struct inode *inode);
|
||||
extern void ceph_queue_writeback(struct inode *inode);
|
||||
|
||||
extern int ceph_do_getattr(struct inode *inode, int mask);
|
||||
extern int ceph_permission(struct inode *inode, int mask);
|
||||
extern int ceph_permission(struct inode *inode, int mask, unsigned int flags);
|
||||
extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
|
||||
extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat);
|
||||
|
||||
+5
-2
@@ -283,10 +283,13 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cifs_permission(struct inode *inode, int mask)
|
||||
static int cifs_permission(struct inode *inode, int mask, unsigned int flags)
|
||||
{
|
||||
struct cifs_sb_info *cifs_sb;
|
||||
|
||||
if (flags & IPERM_FLAG_RCU)
|
||||
return -ECHILD;
|
||||
|
||||
cifs_sb = CIFS_SB(inode->i_sb);
|
||||
|
||||
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
|
||||
@@ -298,7 +301,7 @@ static int cifs_permission(struct inode *inode, int mask)
|
||||
on the client (above and beyond ACL on servers) for
|
||||
servers which do not support setting and viewing mode bits,
|
||||
so allowing client to check permissions is useful */
|
||||
return generic_permission(inode, mask, NULL);
|
||||
return generic_permission(inode, mask, flags, NULL);
|
||||
}
|
||||
|
||||
static struct kmem_cache *cifs_inode_cachep;
|
||||
|
||||
+4
-1
@@ -135,10 +135,13 @@ exit:
|
||||
}
|
||||
|
||||
|
||||
int coda_permission(struct inode *inode, int mask)
|
||||
int coda_permission(struct inode *inode, int mask, unsigned int flags)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (flags & IPERM_FLAG_RCU)
|
||||
return -ECHILD;
|
||||
|
||||
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
|
||||
|
||||
if (!mask)
|
||||
|
||||
+4
-2
@@ -24,7 +24,7 @@
|
||||
#include <linux/coda_psdev.h>
|
||||
|
||||
/* pioctl ops */
|
||||
static int coda_ioctl_permission(struct inode *inode, int mask);
|
||||
static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags);
|
||||
static long coda_pioctl(struct file *filp, unsigned int cmd,
|
||||
unsigned long user_data);
|
||||
|
||||
@@ -41,8 +41,10 @@ const struct file_operations coda_ioctl_operations = {
|
||||
};
|
||||
|
||||
/* the coda pioctl inode ops */
|
||||
static int coda_ioctl_permission(struct inode *inode, int mask)
|
||||
static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags)
|
||||
{
|
||||
if (flags & IPERM_FLAG_RCU)
|
||||
return -ECHILD;
|
||||
return (mask & MAY_EXEC) ? -EACCES : 0;
|
||||
}
|
||||
|
||||
|
||||
+3
-1
@@ -980,8 +980,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
|
||||
}
|
||||
|
||||
static int
|
||||
ecryptfs_permission(struct inode *inode, int mask)
|
||||
ecryptfs_permission(struct inode *inode, int mask, unsigned int flags)
|
||||
{
|
||||
if (flags & IPERM_FLAG_RCU)
|
||||
return -ECHILD;
|
||||
return inode_permission(ecryptfs_inode_to_lower(inode), mask);
|
||||
}
|
||||
|
||||
|
||||
+6
-2
@@ -232,10 +232,14 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
|
||||
}
|
||||
|
||||
int
|
||||
ext2_check_acl(struct inode *inode, int mask)
|
||||
ext2_check_acl(struct inode *inode, int mask, unsigned int flags)
|
||||
{
|
||||
struct posix_acl *acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
|
||||
struct posix_acl *acl;
|
||||
|
||||
if (flags & IPERM_FLAG_RCU)
|
||||
return -ECHILD;
|
||||
|
||||
acl = ext2_get_acl(inode, ACL_TYPE_ACCESS);
|
||||
if (IS_ERR(acl))
|
||||
return PTR_ERR(acl);
|
||||
if (acl) {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user