Merge tag 'fuse-update-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:

 - Add page -> folio conversions (Joanne Koong, Josef Bacik)

 - Allow max size of fuse requests to be configurable with a sysctl
   (Joanne Koong)

 - Allow FOPEN_DIRECT_IO to take advantage of async code path (yangyun)

 - Fix large kernel reads (like a module load) in virtio_fs (Hou Tao)

 - Fix attribute inconsistency in case readdirplus (and plain lookup in
   corner cases) is racing with inode eviction (Zhang Tianci)

 - Fix a WARN_ON triggered by virtio_fs (Asahi Lina)

* tag 'fuse-update-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (30 commits)
  virtiofs: dax: remove ->writepages() callback
  fuse: check attributes staleness on fuse_iget()
  fuse: remove pages for requests and exclusively use folios
  fuse: convert direct io to use folios
  mm/writeback: add folio_mark_dirty_lock()
  fuse: convert writebacks to use folios
  fuse: convert retrieves to use folios
  fuse: convert ioctls to use folios
  fuse: convert writes (non-writeback) to use folios
  fuse: convert reads to use folios
  fuse: convert readdir to use folios
  fuse: convert readlink to use folios
  fuse: convert cuse to use folios
  fuse: add support in virtio for requests using folios
  fuse: support folios in struct fuse_args_pages and fuse_copy_pages()
  fuse: convert fuse_notify_store to use folios
  fuse: convert fuse_retrieve to use folios
  fuse: use the folio based vmstat helpers
  fuse: convert fuse_writepage_need_send to take a folio
  fuse: convert fuse_do_readpage to use folios
  ...
This commit is contained in:
Linus Torvalds
2024-11-26 12:41:27 -08:00
16 changed files with 579 additions and 375 deletions

View File

@@ -337,3 +337,13 @@ Each "watch" costs roughly 90 bytes on a 32-bit kernel, and roughly 160 bytes
on a 64-bit one.
The current default value for ``max_user_watches`` is 4% of the
available low memory, divided by the "watch" cost in bytes.
5. /proc/sys/fs/fuse - Configuration options for FUSE filesystems
=====================================================================
This directory contains the following configuration options for FUSE
filesystems:
``/proc/sys/fs/fuse/max_pages_limit`` is a read/write file for
setting/getting the maximum number of pages that can be used for servicing
requests in FUSE.

View File

@@ -14,5 +14,6 @@ fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o
fuse-y += iomode.o
fuse-$(CONFIG_FUSE_DAX) += dax.o
fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o
fuse-$(CONFIG_SYSCTL) += sysctl.o
virtiofs-y := virtio_fs.o

View File

@@ -303,8 +303,8 @@ struct cuse_init_args {
struct fuse_args_pages ap;
struct cuse_init_in in;
struct cuse_init_out out;
struct page *page;
struct fuse_page_desc desc;
struct folio *folio;
struct fuse_folio_desc desc;
};
/**
@@ -326,7 +326,7 @@ static void cuse_process_init_reply(struct fuse_mount *fm,
struct fuse_args_pages *ap = &ia->ap;
struct cuse_conn *cc = fc_to_cc(fc), *pos;
struct cuse_init_out *arg = &ia->out;
struct page *page = ap->pages[0];
struct folio *folio = ap->folios[0];
struct cuse_devinfo devinfo = { };
struct device *dev;
struct cdev *cdev;
@@ -343,7 +343,7 @@ static void cuse_process_init_reply(struct fuse_mount *fm,
/* parse init reply */
cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL;
rc = cuse_parse_devinfo(page_address(page), ap->args.out_args[1].size,
rc = cuse_parse_devinfo(folio_address(folio), ap->args.out_args[1].size,
&devinfo);
if (rc)
goto err;
@@ -411,7 +411,7 @@ static void cuse_process_init_reply(struct fuse_mount *fm,
kobject_uevent(&dev->kobj, KOBJ_ADD);
out:
kfree(ia);
__free_page(page);
folio_put(folio);
return;
err_cdev:
@@ -429,7 +429,7 @@ err:
static int cuse_send_init(struct cuse_conn *cc)
{
int rc;
struct page *page;
struct folio *folio;
struct fuse_mount *fm = &cc->fm;
struct cuse_init_args *ia;
struct fuse_args_pages *ap;
@@ -437,13 +437,14 @@ static int cuse_send_init(struct cuse_conn *cc)
BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
rc = -ENOMEM;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, 0);
if (!folio)
goto err;
ia = kzalloc(sizeof(*ia), GFP_KERNEL);
if (!ia)
goto err_free_page;
goto err_free_folio;
ap = &ia->ap;
ia->in.major = FUSE_KERNEL_VERSION;
@@ -459,18 +460,18 @@ static int cuse_send_init(struct cuse_conn *cc)
ap->args.out_args[1].size = CUSE_INIT_INFO_MAX;
ap->args.out_argvar = true;
ap->args.out_pages = true;
ap->num_pages = 1;
ap->pages = &ia->page;
ap->num_folios = 1;
ap->folios = &ia->folio;
ap->descs = &ia->desc;
ia->page = page;
ia->folio = folio;
ia->desc.length = ap->args.out_args[1].size;
ap->args.end = cuse_process_init_reply;
rc = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
if (rc) {
kfree(ia);
err_free_page:
__free_page(page);
err_free_folio:
folio_put(folio);
}
err:
return rc;

View File

@@ -774,16 +774,6 @@ out:
return ret;
}
static int fuse_dax_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc);
}
static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, unsigned int order,
bool write)
{
@@ -1323,7 +1313,6 @@ bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi)
}
static const struct address_space_operations fuse_dax_file_aops = {
.writepages = fuse_dax_writepages,
.direct_IO = noop_direct_IO,
.dirty_folio = noop_dirty_folio,
};

View File

@@ -1028,17 +1028,27 @@ static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
struct fuse_req *req = cs->req;
struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args);
for (i = 0; i < ap->num_pages && (nbytes || zeroing); i++) {
for (i = 0; i < ap->num_folios && (nbytes || zeroing); i++) {
int err;
unsigned int offset = ap->descs[i].offset;
unsigned int count = min(nbytes, ap->descs[i].length);
struct page *orig, *pagep;
err = fuse_copy_page(cs, &ap->pages[i], offset, count, zeroing);
orig = pagep = &ap->folios[i]->page;
err = fuse_copy_page(cs, &pagep, offset, count, zeroing);
if (err)
return err;
nbytes -= count;
/*
* fuse_copy_page may have moved a page from a pipe instead of
* copying into our given page, so update the folios if it was
* replaced.
*/
if (pagep != orig)
ap->folios[i] = page_folio(pagep);
}
return 0;
}
@@ -1654,24 +1664,25 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
num = outarg.size;
while (num) {
struct folio *folio;
struct page *page;
unsigned int this_num;
err = -ENOMEM;
page = find_or_create_page(mapping, index,
mapping_gfp_mask(mapping));
if (!page)
folio = filemap_grab_folio(mapping, index);
err = PTR_ERR(folio);
if (IS_ERR(folio))
goto out_iput;
this_num = min_t(unsigned, num, PAGE_SIZE - offset);
page = &folio->page;
this_num = min_t(unsigned, num, folio_size(folio) - offset);
err = fuse_copy_page(cs, &page, offset, this_num, 0);
if (!PageUptodate(page) && !err && offset == 0 &&
(this_num == PAGE_SIZE || file_size == end)) {
zero_user_segment(page, this_num, PAGE_SIZE);
SetPageUptodate(page);
if (!folio_test_uptodate(folio) && !err && offset == 0 &&
(this_num == folio_size(folio) || file_size == end)) {
folio_zero_segment(folio, this_num, folio_size(folio));
folio_mark_uptodate(folio);
}
unlock_page(page);
put_page(page);
folio_unlock(folio);
folio_put(folio);
if (err)
goto out_iput;
@@ -1703,7 +1714,7 @@ static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args,
struct fuse_retrieve_args *ra =
container_of(args, typeof(*ra), ap.args);
release_pages(ra->ap.pages, ra->ap.num_pages);
release_pages(ra->ap.folios, ra->ap.num_folios);
kfree(ra);
}
@@ -1717,7 +1728,7 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
unsigned int num;
unsigned int offset;
size_t total_len = 0;
unsigned int num_pages;
unsigned int num_pages, cur_pages = 0;
struct fuse_conn *fc = fm->fc;
struct fuse_retrieve_args *ra;
size_t args_size = sizeof(*ra);
@@ -1736,15 +1747,15 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
num_pages = (num + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
num_pages = min(num_pages, fc->max_pages);
args_size += num_pages * (sizeof(ap->pages[0]) + sizeof(ap->descs[0]));
args_size += num_pages * (sizeof(ap->folios[0]) + sizeof(ap->descs[0]));
ra = kzalloc(args_size, GFP_KERNEL);
if (!ra)
return -ENOMEM;
ap = &ra->ap;
ap->pages = (void *) (ra + 1);
ap->descs = (void *) (ap->pages + num_pages);
ap->folios = (void *) (ra + 1);
ap->descs = (void *) (ap->folios + num_pages);
args = &ap->args;
args->nodeid = outarg->nodeid;
@@ -1755,19 +1766,20 @@ static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode,
index = outarg->offset >> PAGE_SHIFT;
while (num && ap->num_pages < num_pages) {
struct page *page;
while (num && cur_pages < num_pages) {
struct folio *folio;
unsigned int this_num;
page = find_get_page(mapping, index);
if (!page)
folio = filemap_get_folio(mapping, index);
if (IS_ERR(folio))
break;
this_num = min_t(unsigned, num, PAGE_SIZE - offset);
ap->pages[ap->num_pages] = page;
ap->descs[ap->num_pages].offset = offset;
ap->descs[ap->num_pages].length = this_num;
ap->num_pages++;
ap->folios[ap->num_folios] = folio;
ap->descs[ap->num_folios].offset = offset;
ap->descs[ap->num_folios].length = this_num;
ap->num_folios++;
cur_pages++;
offset = 0;
num -= this_num;

View File

@@ -366,7 +366,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
struct fuse_mount *fm = get_fuse_mount_super(sb);
FUSE_ARGS(args);
struct fuse_forget_link *forget;
u64 attr_version;
u64 attr_version, evict_ctr;
int err;
*inode = NULL;
@@ -381,6 +381,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
goto out;
attr_version = fuse_get_attr_version(fm->fc);
evict_ctr = fuse_get_evict_ctr(fm->fc);
fuse_lookup_init(fm->fc, &args, nodeid, name, outarg);
err = fuse_simple_request(fm, &args);
@@ -398,7 +399,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
&outarg->attr, ATTR_TIMEOUT(outarg),
attr_version);
attr_version, evict_ctr);
err = -ENOMEM;
if (!*inode) {
fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1);
@@ -691,7 +692,7 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir,
ff->nodeid = outentry.nodeid;
ff->open_flags = outopenp->open_flags;
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
&outentry.attr, ATTR_TIMEOUT(&outentry), 0);
&outentry.attr, ATTR_TIMEOUT(&outentry), 0, 0);
if (!inode) {
flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
fuse_sync_release(NULL, ff, flags);
@@ -822,7 +823,7 @@ static int create_new_entry(struct mnt_idmap *idmap, struct fuse_mount *fm,
goto out_put_forget_req;
inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
&outarg.attr, ATTR_TIMEOUT(&outarg), 0);
&outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0);
if (!inode) {
fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
return -ENOMEM;
@@ -1585,13 +1586,13 @@ static int fuse_permission(struct mnt_idmap *idmap,
return err;
}
static int fuse_readlink_page(struct inode *inode, struct page *page)
static int fuse_readlink_page(struct inode *inode, struct folio *folio)
{
struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
struct fuse_folio_desc desc = { .length = PAGE_SIZE - 1 };
struct fuse_args_pages ap = {
.num_pages = 1,
.pages = &page,
.num_folios = 1,
.folios = &folio,
.descs = &desc,
};
char *link;
@@ -1614,7 +1615,7 @@ static int fuse_readlink_page(struct inode *inode, struct page *page)
if (WARN_ON(res >= PAGE_SIZE))
return -EIO;
link = page_address(page);
link = folio_address(folio);
link[res] = '\0';
return 0;
@@ -1624,7 +1625,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
struct delayed_call *callback)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct page *page;
struct folio *folio;
int err;
err = -EIO;
@@ -1638,20 +1639,20 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
if (!dentry)
goto out_err;
page = alloc_page(GFP_KERNEL);
folio = folio_alloc(GFP_KERNEL, 0);
err = -ENOMEM;
if (!page)
if (!folio)
goto out_err;
err = fuse_readlink_page(inode, page);
err = fuse_readlink_page(inode, folio);
if (err) {
__free_page(page);
folio_put(folio);
goto out_err;
}
set_delayed_call(callback, page_put_link, page);
set_delayed_call(callback, page_put_link, &folio->page);
return page_address(page);
return folio_address(folio);
out_err:
return ERR_PTR(err);
@@ -2028,7 +2029,7 @@ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
fuse_change_attributes_common(inode, &outarg.attr, NULL,
ATTR_TIMEOUT(&outarg),
fuse_get_cache_mask(inode));
fuse_get_cache_mask(inode), 0);
oldsize = inode->i_size;
/* see the comment in fuse_change_attributes() */
if (!is_wb || is_truncate)
@@ -2231,7 +2232,7 @@ void fuse_init_dir(struct inode *inode)
static int fuse_symlink_read_folio(struct file *null, struct folio *folio)
{
int err = fuse_readlink_page(folio->mapping->host, &folio->page);
int err = fuse_readlink_page(folio->mapping->host, folio);
if (!err)
folio_mark_uptodate(folio);

File diff suppressed because it is too large Load Diff

View File

@@ -35,9 +35,6 @@
/** Default max number of pages that can be used in a single read request */
#define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32
/** Maximum of max_pages received in init_out */
#define FUSE_MAX_MAX_PAGES 256
/** Bias for fi->writectr, meaning new writepages must not be sent */
#define FUSE_NOWRITE INT_MIN
@@ -47,6 +44,9 @@
/** Number of dentries for each connection in the control filesystem */
#define FUSE_CTL_NUM_DENTRIES 5
/** Maximum of max_pages received in init_out */
extern unsigned int fuse_max_pages_limit;
/** List of active connections */
extern struct list_head fuse_conn_list;
@@ -285,8 +285,8 @@ struct fuse_arg {
void *value;
};
/** FUSE page descriptor */
struct fuse_page_desc {
/** FUSE folio descriptor */
struct fuse_folio_desc {
unsigned int length;
unsigned int offset;
};
@@ -309,16 +309,19 @@ struct fuse_args {
bool may_block:1;
bool is_ext:1;
bool is_pinned:1;
bool invalidate_vmap:1;
struct fuse_in_arg in_args[3];
struct fuse_arg out_args[2];
void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error);
/* Used for kvec iter backed by vmalloc address */
void *vmap_base;
};
struct fuse_args_pages {
struct fuse_args args;
struct page **pages;
struct fuse_page_desc *descs;
unsigned int num_pages;
struct folio **folios;
struct fuse_folio_desc *descs;
unsigned int num_folios;
};
struct fuse_release_args {
@@ -857,6 +860,9 @@ struct fuse_conn {
/** Passthrough support for read/write IO */
unsigned int passthrough:1;
/* Use pages instead of pointer for kernel I/O */
unsigned int use_pages_for_kvec_io:1;
/** Maximum stack depth for passthrough backing files */
int max_stack_depth;
@@ -884,6 +890,9 @@ struct fuse_conn {
/** Version counter for attribute changes */
atomic64_t attr_version;
/** Version counter for evict inode */
atomic64_t evict_ctr;
/** Called on final put */
void (*release)(struct fuse_conn *);
@@ -978,6 +987,11 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
return atomic64_read(&fc->attr_version);
}
static inline u64 fuse_get_evict_ctr(struct fuse_conn *fc)
{
return atomic64_read(&fc->evict_ctr);
}
static inline bool fuse_stale_inode(const struct inode *inode, int generation,
struct fuse_attr *attr)
{
@@ -995,25 +1009,25 @@ static inline bool fuse_is_bad(struct inode *inode)
return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state));
}
static inline struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags,
struct fuse_page_desc **desc)
static inline struct folio **fuse_folios_alloc(unsigned int nfolios, gfp_t flags,
struct fuse_folio_desc **desc)
{
struct page **pages;
struct folio **folios;
pages = kzalloc(npages * (sizeof(struct page *) +
sizeof(struct fuse_page_desc)), flags);
*desc = (void *) (pages + npages);
folios = kzalloc(nfolios * (sizeof(struct folio *) +
sizeof(struct fuse_folio_desc)), flags);
*desc = (void *) (folios + nfolios);
return pages;
return folios;
}
static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs,
unsigned int index,
unsigned int nr_pages)
static inline void fuse_folio_descs_length_init(struct fuse_folio_desc *descs,
unsigned int index,
unsigned int nr_folios)
{
int i;
for (i = index; i < index + nr_pages; i++)
for (i = index; i < index + nr_folios; i++)
descs[i].length = PAGE_SIZE - descs[i].offset;
}
@@ -1037,7 +1051,8 @@ extern const struct dentry_operations fuse_root_dentry_operations;
*/
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
int generation, struct fuse_attr *attr,
u64 attr_valid, u64 attr_version);
u64 attr_valid, u64 attr_version,
u64 evict_ctr);
int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
struct fuse_entry_out *outarg, struct inode **inode);
@@ -1062,7 +1077,7 @@ struct fuse_io_args {
struct {
struct fuse_write_in in;
struct fuse_write_out out;
bool page_locked;
bool folio_locked;
} write;
};
struct fuse_args_pages ap;
@@ -1127,7 +1142,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
struct fuse_statx *sx,
u64 attr_valid, u32 cache_mask);
u64 attr_valid, u32 cache_mask,
u64 evict_ctr);
u32 fuse_get_cache_mask(struct inode *inode);
@@ -1480,4 +1496,12 @@ ssize_t fuse_passthrough_splice_write(struct pipe_inode_info *pipe,
size_t len, unsigned int flags);
ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma);
#ifdef CONFIG_SYSCTL
extern int fuse_sysctl_register(void);
extern void fuse_sysctl_unregister(void);
#else
#define fuse_sysctl_register() (0)
#define fuse_sysctl_unregister() do { } while (0)
#endif /* CONFIG_SYSCTL */
#endif /* _FS_FUSE_I_H */

View File

@@ -35,6 +35,8 @@ DEFINE_MUTEX(fuse_mutex);
static int set_global_limit(const char *val, const struct kernel_param *kp);
unsigned int fuse_max_pages_limit = 256;
unsigned max_user_bgreq;
module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
&max_user_bgreq, 0644);
@@ -173,6 +175,14 @@ static void fuse_evict_inode(struct inode *inode)
fuse_cleanup_submount_lookup(fc, fi->submount_lookup);
fi->submount_lookup = NULL;
}
/*
* Evict of non-deleted inode may race with outstanding
* LOOKUP/READDIRPLUS requests and result in inconsistency when
* the request finishes. Deal with that here by bumping a
* counter that can be compared to the starting value.
*/
if (inode->i_nlink > 0)
atomic64_inc(&fc->evict_ctr);
}
if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
WARN_ON(fi->iocachectr != 0);
@@ -206,17 +216,30 @@ static ino_t fuse_squash_ino(u64 ino64)
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
struct fuse_statx *sx,
u64 attr_valid, u32 cache_mask)
u64 attr_valid, u32 cache_mask,
u64 evict_ctr)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
lockdep_assert_held(&fi->lock);
/*
* Clear basic stats from invalid mask.
*
* Don't do this if this is coming from a fuse_iget() call and there
* might have been a racing evict which would've invalidated the result
* if the attr_version would've been preserved.
*
* !evict_ctr -> this is create
* fi->attr_version != 0 -> this is not a new inode
* evict_ctr == fuse_get_evict_ctr() -> no evicts while during request
*/
if (!evict_ctr || fi->attr_version || evict_ctr == fuse_get_evict_ctr(fc))
set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
fi->attr_version = atomic64_inc_return(&fc->attr_version);
fi->i_time = attr_valid;
/* Clear basic stats from invalid mask */
set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
inode->i_ino = fuse_squash_ino(attr->ino);
inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
@@ -295,9 +318,9 @@ u32 fuse_get_cache_mask(struct inode *inode)
return STATX_MTIME | STATX_CTIME | STATX_SIZE;
}
void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
struct fuse_statx *sx,
u64 attr_valid, u64 attr_version)
static void fuse_change_attributes_i(struct inode *inode, struct fuse_attr *attr,
struct fuse_statx *sx, u64 attr_valid,
u64 attr_version, u64 evict_ctr)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -331,7 +354,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
}
old_mtime = inode_get_mtime(inode);
fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask);
fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask,
evict_ctr);
oldsize = inode->i_size;
/*
@@ -372,6 +396,13 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
fuse_dax_dontcache(inode, attr->flags);
}
void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
struct fuse_statx *sx, u64 attr_valid,
u64 attr_version)
{
fuse_change_attributes_i(inode, attr, sx, attr_valid, attr_version, 0);
}
static void fuse_init_submount_lookup(struct fuse_submount_lookup *sl,
u64 nodeid)
{
@@ -426,7 +457,8 @@ static int fuse_inode_set(struct inode *inode, void *_nodeidp)
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
int generation, struct fuse_attr *attr,
u64 attr_valid, u64 attr_version)
u64 attr_valid, u64 attr_version,
u64 evict_ctr)
{
struct inode *inode;
struct fuse_inode *fi;
@@ -487,8 +519,8 @@ retry:
fi->nlookup++;
spin_unlock(&fi->lock);
done:
fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
fuse_change_attributes_i(inode, attr, NULL, attr_valid, attr_version,
evict_ctr);
return inode;
}
@@ -940,11 +972,12 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
fc->initialized = 0;
fc->connected = 1;
atomic64_set(&fc->attr_version, 1);
atomic64_set(&fc->evict_ctr, 1);
get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
fc->user_ns = get_user_ns(user_ns);
fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
fc->max_pages_limit = fuse_max_pages_limit;
if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH))
fuse_backing_files_init(fc);
@@ -1001,7 +1034,7 @@ static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
attr.mode = mode;
attr.ino = FUSE_ROOT_ID;
attr.nlink = 1;
return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0);
return fuse_iget(sb, FUSE_ROOT_ID, 0, &attr, 0, 0, 0);
}
struct fuse_inode_handle {
@@ -1610,7 +1643,8 @@ static int fuse_fill_super_submount(struct super_block *sb,
return -ENOMEM;
fuse_fill_attr_from_inode(&root_attr, parent_fi);
root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0);
root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0,
fuse_get_evict_ctr(fm->fc));
/*
* This inode is just a duplicate, so it is not looked up and
* its nlookup should not be incremented. fuse_iget() does
@@ -2063,8 +2097,14 @@ static int __init fuse_fs_init(void)
if (err)
goto out3;
err = fuse_sysctl_register();
if (err)
goto out4;
return 0;
out4:
unregister_filesystem(&fuse_fs_type);
out3:
unregister_fuseblk();
out2:
@@ -2075,6 +2115,7 @@ static int __init fuse_fs_init(void)
static void fuse_fs_cleanup(void)
{
fuse_sysctl_unregister();
unregister_filesystem(&fuse_fs_type);
unregister_fuseblk();

View File

@@ -10,6 +10,8 @@
#include <linux/fileattr.h>
#include <linux/fsverity.h>
#define FUSE_VERITY_ENABLE_ARG_MAX_PAGES 256
static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args,
struct fuse_ioctl_out *outarg)
{
@@ -140,7 +142,7 @@ static int fuse_setup_enable_verity(unsigned long arg, struct iovec *iov,
{
struct fsverity_enable_arg enable;
struct fsverity_enable_arg __user *uarg = (void __user *)arg;
const __u32 max_buffer_len = FUSE_MAX_MAX_PAGES * PAGE_SIZE;
const __u32 max_buffer_len = FUSE_VERITY_ENABLE_ARG_MAX_PAGES * PAGE_SIZE;
if (copy_from_user(&enable, uarg, sizeof(enable)))
return -EFAULT;
@@ -249,12 +251,12 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
err = -ENOMEM;
ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs);
ap.folios = fuse_folios_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs);
iov_page = (struct iovec *) __get_free_page(GFP_KERNEL);
if (!ap.pages || !iov_page)
if (!ap.folios || !iov_page)
goto out;
fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages);
fuse_folio_descs_length_init(ap.descs, 0, fm->fc->max_pages);
/*
* If restricted, initialize IO parameters as encoded in @cmd.
@@ -304,14 +306,13 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
err = -ENOMEM;
if (max_pages > fm->fc->max_pages)
goto out;
while (ap.num_pages < max_pages) {
ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
if (!ap.pages[ap.num_pages])
while (ap.num_folios < max_pages) {
ap.folios[ap.num_folios] = folio_alloc(GFP_KERNEL | __GFP_HIGHMEM, 0);
if (!ap.folios[ap.num_folios])
goto out;
ap.num_pages++;
ap.num_folios++;
}
/* okay, let's send it to the client */
ap.args.opcode = FUSE_IOCTL;
ap.args.nodeid = ff->nodeid;
@@ -325,8 +326,8 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
err = -EFAULT;
iov_iter_init(&ii, ITER_SOURCE, in_iov, in_iovs, in_size);
for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_folios); i++) {
c = copy_folio_from_iter(ap.folios[i], 0, PAGE_SIZE, &ii);
if (c != PAGE_SIZE && iov_iter_count(&ii))
goto out;
}
@@ -364,7 +365,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
goto out;
vaddr = kmap_local_page(ap.pages[0]);
vaddr = kmap_local_folio(ap.folios[0], 0);
err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr,
transferred, in_iovs + out_iovs,
(flags & FUSE_IOCTL_COMPAT) != 0);
@@ -392,17 +393,17 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
err = -EFAULT;
iov_iter_init(&ii, ITER_DEST, out_iov, out_iovs, transferred);
for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) {
c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii);
for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_folios); i++) {
c = copy_folio_to_iter(ap.folios[i], 0, PAGE_SIZE, &ii);
if (c != PAGE_SIZE && iov_iter_count(&ii))
goto out;
}
err = 0;
out:
free_page((unsigned long) iov_page);
while (ap.num_pages)
__free_page(ap.pages[--ap.num_pages]);
kfree(ap.pages);
while (ap.num_folios)
folio_put(ap.folios[--ap.num_folios]);
kfree(ap.folios);
return err ? err : outarg.result;
}

View File

@@ -149,7 +149,7 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
static int fuse_direntplus_link(struct file *file,
struct fuse_direntplus *direntplus,
u64 attr_version)
u64 attr_version, u64 evict_ctr)
{
struct fuse_entry_out *o = &direntplus->entry_out;
struct fuse_dirent *dirent = &direntplus->dirent;
@@ -233,7 +233,7 @@ retry:
} else {
inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
&o->attr, ATTR_TIMEOUT(o),
attr_version);
attr_version, evict_ctr);
if (!inode)
inode = ERR_PTR(-ENOMEM);
@@ -284,7 +284,8 @@ static void fuse_force_forget(struct file *file, u64 nodeid)
}
static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
struct dir_context *ctx, u64 attr_version)
struct dir_context *ctx, u64 attr_version,
u64 evict_ctr)
{
struct fuse_direntplus *direntplus;
struct fuse_dirent *dirent;
@@ -319,7 +320,7 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
buf += reclen;
nbytes -= reclen;
ret = fuse_direntplus_link(file, direntplus, attr_version);
ret = fuse_direntplus_link(file, direntplus, attr_version, evict_ctr);
if (ret)
fuse_force_forget(file, direntplus->entry_out.nodeid);
}
@@ -331,26 +332,27 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
{
int plus;
ssize_t res;
struct page *page;
struct folio *folio;
struct inode *inode = file_inode(file);
struct fuse_mount *fm = get_fuse_mount(inode);
struct fuse_io_args ia = {};
struct fuse_args_pages *ap = &ia.ap;
struct fuse_page_desc desc = { .length = PAGE_SIZE };
u64 attr_version = 0;
struct fuse_folio_desc desc = { .length = PAGE_SIZE };
u64 attr_version = 0, evict_ctr = 0;
bool locked;
page = alloc_page(GFP_KERNEL);
if (!page)
folio = folio_alloc(GFP_KERNEL, 0);
if (!folio)
return -ENOMEM;
plus = fuse_use_readdirplus(inode, ctx);
ap->args.out_pages = true;
ap->num_pages = 1;
ap->pages = &page;
ap->num_folios = 1;
ap->folios = &folio;
ap->descs = &desc;
if (plus) {
attr_version = fuse_get_attr_version(fm->fc);
evict_ctr = fuse_get_evict_ctr(fm->fc);
fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
FUSE_READDIRPLUS);
} else {
@@ -367,15 +369,16 @@ static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
if (ff->open_flags & FOPEN_CACHE_DIR)
fuse_readdir_cache_end(file, ctx->pos);
} else if (plus) {
res = parse_dirplusfile(page_address(page), res,
file, ctx, attr_version);
res = parse_dirplusfile(folio_address(folio), res,
file, ctx, attr_version,
evict_ctr);
} else {
res = parse_dirfile(page_address(page), res, file,
res = parse_dirfile(folio_address(folio), res, file,
ctx);
}
}
__free_page(page);
folio_put(folio);
fuse_invalidate_atime(inode);
return res;
}

40
fs/fuse/sysctl.c Normal file
View File

@@ -0,0 +1,40 @@
// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/fuse/fuse_sysctl.c
*
* Sysctl interface to fuse parameters
*/
#include <linux/sysctl.h>
#include "fuse_i.h"
static struct ctl_table_header *fuse_table_header;
/* Bound by fuse_init_out max_pages, which is a u16 */
static unsigned int sysctl_fuse_max_pages_limit = 65535;
static struct ctl_table fuse_sysctl_table[] = {
{
.procname = "max_pages_limit",
.data = &fuse_max_pages_limit,
.maxlen = sizeof(fuse_max_pages_limit),
.mode = 0644,
.proc_handler = proc_douintvec_minmax,
.extra1 = SYSCTL_ONE,
.extra2 = &sysctl_fuse_max_pages_limit,
},
};
int fuse_sysctl_register(void)
{
fuse_table_header = register_sysctl("fs/fuse", fuse_sysctl_table);
if (!fuse_table_header)
return -ENOMEM;
return 0;
}
void fuse_sysctl_unregister(void)
{
unregister_sysctl_table(fuse_table_header);
fuse_table_header = NULL;
}

View File

@@ -97,7 +97,8 @@ struct virtio_fs_req_work {
};
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
struct fuse_req *req, bool in_flight);
struct fuse_req *req, bool in_flight,
gfp_t gfp);
static const struct constant_table dax_param_enums[] = {
{"always", FUSE_DAX_ALWAYS },
@@ -575,6 +576,8 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
/* Dispatch pending requests */
while (1) {
unsigned int flags;
spin_lock(&fsvq->lock);
req = list_first_entry_or_null(&fsvq->queued_reqs,
struct fuse_req, list);
@@ -585,7 +588,9 @@ static void virtio_fs_request_dispatch_work(struct work_struct *work)
list_del_init(&req->list);
spin_unlock(&fsvq->lock);
ret = virtio_fs_enqueue_req(fsvq, req, true);
flags = memalloc_nofs_save();
ret = virtio_fs_enqueue_req(fsvq, req, true, GFP_KERNEL);
memalloc_nofs_restore(flags);
if (ret < 0) {
if (ret == -ENOSPC) {
spin_lock(&fsvq->lock);
@@ -686,7 +691,7 @@ static void virtio_fs_hiprio_dispatch_work(struct work_struct *work)
}
/* Allocate and copy args into req->argbuf */
static int copy_args_to_argbuf(struct fuse_req *req)
static int copy_args_to_argbuf(struct fuse_req *req, gfp_t gfp)
{
struct fuse_args *args = req->args;
unsigned int offset = 0;
@@ -700,7 +705,7 @@ static int copy_args_to_argbuf(struct fuse_req *req)
len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) +
fuse_len_args(num_out, args->out_args);
req->argbuf = kmalloc(len, GFP_ATOMIC);
req->argbuf = kmalloc(len, gfp);
if (!req->argbuf)
return -ENOMEM;
@@ -760,7 +765,7 @@ static void virtio_fs_request_complete(struct fuse_req *req,
struct fuse_args *args;
struct fuse_args_pages *ap;
unsigned int len, i, thislen;
struct page *page;
struct folio *folio;
/*
* TODO verify that server properly follows FUSE protocol
@@ -772,12 +777,12 @@ static void virtio_fs_request_complete(struct fuse_req *req,
if (args->out_pages && args->page_zeroing) {
len = args->out_args[args->out_numargs - 1].size;
ap = container_of(args, typeof(*ap), args);
for (i = 0; i < ap->num_pages; i++) {
for (i = 0; i < ap->num_folios; i++) {
thislen = ap->descs[i].length;
if (len < thislen) {
WARN_ON(ap->descs[i].offset);
page = ap->pages[i];
zero_user_segment(page, len, thislen);
folio = ap->folios[i];
folio_zero_segment(folio, len, thislen);
len = 0;
} else {
len -= thislen;
@@ -1267,15 +1272,15 @@ static void virtio_fs_send_interrupt(struct fuse_iqueue *fiq, struct fuse_req *r
}
/* Count number of scatter-gather elements required */
static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs,
unsigned int num_pages,
unsigned int total_len)
static unsigned int sg_count_fuse_folios(struct fuse_folio_desc *folio_descs,
unsigned int num_folios,
unsigned int total_len)
{
unsigned int i;
unsigned int this_len;
for (i = 0; i < num_pages && total_len; i++) {
this_len = min(page_descs[i].length, total_len);
for (i = 0; i < num_folios && total_len; i++) {
this_len = min(folio_descs[i].length, total_len);
total_len -= this_len;
}
@@ -1294,8 +1299,8 @@ static unsigned int sg_count_fuse_req(struct fuse_req *req)
if (args->in_pages) {
size = args->in_args[args->in_numargs - 1].size;
total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
size);
total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios,
size);
}
if (!test_bit(FR_ISREPLY, &req->flags))
@@ -1308,27 +1313,27 @@ static unsigned int sg_count_fuse_req(struct fuse_req *req)
if (args->out_pages) {
size = args->out_args[args->out_numargs - 1].size;
total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages,
size);
total_sgs += sg_count_fuse_folios(ap->descs, ap->num_folios,
size);
}
return total_sgs;
}
/* Add pages to scatter-gather list and return number of elements used */
static unsigned int sg_init_fuse_pages(struct scatterlist *sg,
struct page **pages,
struct fuse_page_desc *page_descs,
unsigned int num_pages,
unsigned int total_len)
/* Add folios to scatter-gather list and return number of elements used */
static unsigned int sg_init_fuse_folios(struct scatterlist *sg,
struct folio **folios,
struct fuse_folio_desc *folio_descs,
unsigned int num_folios,
unsigned int total_len)
{
unsigned int i;
unsigned int this_len;
for (i = 0; i < num_pages && total_len; i++) {
for (i = 0; i < num_folios && total_len; i++) {
sg_init_table(&sg[i], 1);
this_len = min(page_descs[i].length, total_len);
sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset);
this_len = min(folio_descs[i].length, total_len);
sg_set_folio(&sg[i], folios[i], this_len, folio_descs[i].offset);
total_len -= this_len;
}
@@ -1353,10 +1358,10 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg,
sg_init_one(&sg[total_sgs++], argbuf, len);
if (argpages)
total_sgs += sg_init_fuse_pages(&sg[total_sgs],
ap->pages, ap->descs,
ap->num_pages,
args[numargs - 1].size);
total_sgs += sg_init_fuse_folios(&sg[total_sgs],
ap->folios, ap->descs,
ap->num_folios,
args[numargs - 1].size);
if (len_used)
*len_used = len;
@@ -1366,7 +1371,8 @@ static unsigned int sg_init_fuse_args(struct scatterlist *sg,
/* Add a request to a virtqueue and kick the device */
static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
struct fuse_req *req, bool in_flight)
struct fuse_req *req, bool in_flight,
gfp_t gfp)
{
/* requests need at least 4 elements */
struct scatterlist *stack_sgs[6];
@@ -1387,8 +1393,8 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
/* Does the sglist fit on the stack? */
total_sgs = sg_count_fuse_req(req);
if (total_sgs > ARRAY_SIZE(stack_sgs)) {
sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC);
sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC);
sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), gfp);
sg = kmalloc_array(total_sgs, sizeof(sg[0]), gfp);
if (!sgs || !sg) {
ret = -ENOMEM;
goto out;
@@ -1396,7 +1402,7 @@ static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq,
}
/* Use a bounce buffer since stack args cannot be mapped */
ret = copy_args_to_argbuf(req);
ret = copy_args_to_argbuf(req, gfp);
if (ret < 0)
goto out;
@@ -1490,7 +1496,7 @@ static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req)
queue_id);
fsvq = &fs->vqs[queue_id];
ret = virtio_fs_enqueue_req(fsvq, req, false);
ret = virtio_fs_enqueue_req(fsvq, req, false, GFP_ATOMIC);
if (ret < 0) {
if (ret == -ENOSPC) {
/*
@@ -1691,6 +1697,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
fc->delete_stale = true;
fc->auto_submounts = true;
fc->sync_fs = true;
fc->use_pages_for_kvec_io = true;
/* Tell FUSE to split requests that exceed the virtqueue's size */
fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit,

View File

@@ -2550,6 +2550,7 @@ struct kvec;
struct page *get_dump_page(unsigned long addr);
bool folio_mark_dirty(struct folio *folio);
bool folio_mark_dirty_lock(struct folio *folio);
bool set_page_dirty(struct page *page);
int set_page_dirty_lock(struct page *page);

View File

@@ -52,6 +52,12 @@ bool set_page_dirty(struct page *page)
}
EXPORT_SYMBOL(set_page_dirty);
int set_page_dirty_lock(struct page *page)
{
return folio_mark_dirty_lock(page_folio(page));
}
EXPORT_SYMBOL(set_page_dirty_lock);
bool clear_page_dirty_for_io(struct page *page)
{
return folio_clear_dirty_for_io(page_folio(page));

View File

@@ -2925,25 +2925,25 @@ bool folio_mark_dirty(struct folio *folio)
EXPORT_SYMBOL(folio_mark_dirty);
/*
* set_page_dirty() is racy if the caller has no reference against
* page->mapping->host, and if the page is unlocked. This is because another
* CPU could truncate the page off the mapping and then free the mapping.
* folio_mark_dirty() is racy if the caller has no reference against
* folio->mapping->host, and if the folio is unlocked. This is because another
* CPU could truncate the folio off the mapping and then free the mapping.
*
* Usually, the page _is_ locked, or the caller is a user-space process which
* Usually, the folio _is_ locked, or the caller is a user-space process which
* holds a reference on the inode by having an open file.
*
* In other cases, the page should be locked before running set_page_dirty().
* In other cases, the folio should be locked before running folio_mark_dirty().
*/
int set_page_dirty_lock(struct page *page)
bool folio_mark_dirty_lock(struct folio *folio)
{
int ret;
bool ret;
lock_page(page);
ret = set_page_dirty(page);
unlock_page(page);
folio_lock(folio);
ret = folio_mark_dirty(folio);
folio_unlock(folio);
return ret;
}
EXPORT_SYMBOL(set_page_dirty_lock);
EXPORT_SYMBOL(folio_mark_dirty_lock);
/*
* This cancels just the dirty bit on the kernel page itself, it does NOT