mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge tag 'hole_punch_for_v5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull fs hole punching vs cache filling race fixes from Jan Kara: "Fix races leading to possible data corruption or stale data exposure in multiple filesystems when hole punching races with operations such as readahead. This is the series I was sending for the last merge window but with your objection fixed - now filemap_fault() has been modified to take invalidate_lock only when we need to create new page in the page cache and / or bring it uptodate" * tag 'hole_punch_for_v5.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: filesystems/locking: fix Malformed table warning cifs: Fix race between hole punch and page fault ceph: Fix race between hole punch and page fault fuse: Convert to using invalidate_lock f2fs: Convert to using invalidate_lock zonefs: Convert to using invalidate_lock xfs: Convert double locking of MMAPLOCK to use VFS helpers xfs: Convert to use invalidate_lock xfs: Refactor xfs_isilocked() ext2: Convert to using invalidate_lock ext4: Convert to use mapping->invalidate_lock mm: Add functions to lock invalidate_lock for two mappings mm: Protect operations adding pages to page cache with invalidate_lock documentation: Sync file_operations members with reality mm: Fix comments mentioning i_mutex
This commit is contained in:
@@ -271,19 +271,19 @@ prototypes::
|
||||
locking rules:
|
||||
All except set_page_dirty and freepage may block
|
||||
|
||||
====================== ======================== =========
|
||||
ops PageLocked(page) i_rwsem
|
||||
====================== ======================== =========
|
||||
====================== ======================== ========= ===============
|
||||
ops PageLocked(page) i_rwsem invalidate_lock
|
||||
====================== ======================== ========= ===============
|
||||
writepage: yes, unlocks (see below)
|
||||
readpage: yes, unlocks
|
||||
readpage: yes, unlocks shared
|
||||
writepages:
|
||||
set_page_dirty no
|
||||
readahead: yes, unlocks
|
||||
readpages: no
|
||||
readahead: yes, unlocks shared
|
||||
readpages: no shared
|
||||
write_begin: locks the page exclusive
|
||||
write_end: yes, unlocks exclusive
|
||||
bmap:
|
||||
invalidatepage: yes
|
||||
invalidatepage: yes exclusive
|
||||
releasepage: yes
|
||||
freepage: yes
|
||||
direct_IO:
|
||||
@@ -295,7 +295,7 @@ is_partially_uptodate: yes
|
||||
error_remove_page: yes
|
||||
swap_activate: no
|
||||
swap_deactivate: no
|
||||
====================== ======================== =========
|
||||
====================== ======================== ========= ===============
|
||||
|
||||
->write_begin(), ->write_end() and ->readpage() may be called from
|
||||
the request handler (/dev/loop).
|
||||
@@ -378,7 +378,10 @@ keep it that way and don't breed new callers.
|
||||
->invalidatepage() is called when the filesystem must attempt to drop
|
||||
some or all of the buffers from the page when it is being truncated. It
|
||||
returns zero on success. If ->invalidatepage is zero, the kernel uses
|
||||
block_invalidatepage() instead.
|
||||
block_invalidatepage() instead. The filesystem must exclusively acquire
|
||||
invalidate_lock before invalidating page cache in truncate / hole punch path
|
||||
(and thus calling into ->invalidatepage) to block races between page cache
|
||||
invalidation and page cache filling functions (fault, read, ...).
|
||||
|
||||
->releasepage() is called when the kernel is about to try to drop the
|
||||
buffers from the page in preparation for freeing it. It returns zero to
|
||||
@@ -506,6 +509,7 @@ prototypes::
|
||||
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
|
||||
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
|
||||
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
|
||||
int (*iopoll) (struct kiocb *kiocb, bool spin);
|
||||
int (*iterate) (struct file *, struct dir_context *);
|
||||
int (*iterate_shared) (struct file *, struct dir_context *);
|
||||
__poll_t (*poll) (struct file *, struct poll_table_struct *);
|
||||
@@ -518,12 +522,6 @@ prototypes::
|
||||
int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
|
||||
int (*fasync) (int, struct file *, int);
|
||||
int (*lock) (struct file *, int, struct file_lock *);
|
||||
ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
|
||||
loff_t *);
|
||||
ssize_t (*writev) (struct file *, const struct iovec *, unsigned long,
|
||||
loff_t *);
|
||||
ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t,
|
||||
void __user *);
|
||||
ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
|
||||
loff_t *, int);
|
||||
unsigned long (*get_unmapped_area)(struct file *, unsigned long,
|
||||
@@ -536,6 +534,14 @@ prototypes::
|
||||
size_t, unsigned int);
|
||||
int (*setlease)(struct file *, long, struct file_lock **, void **);
|
||||
long (*fallocate)(struct file *, int, loff_t, loff_t);
|
||||
void (*show_fdinfo)(struct seq_file *m, struct file *f);
|
||||
unsigned (*mmap_capabilities)(struct file *);
|
||||
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
|
||||
loff_t, size_t, unsigned int);
|
||||
loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
|
||||
struct file *file_out, loff_t pos_out,
|
||||
loff_t len, unsigned int remap_flags);
|
||||
int (*fadvise)(struct file *, loff_t, loff_t, int);
|
||||
|
||||
locking rules:
|
||||
All may block.
|
||||
@@ -570,6 +576,25 @@ in sys_read() and friends.
|
||||
the lease within the individual filesystem to record the result of the
|
||||
operation
|
||||
|
||||
->fallocate implementation must be really careful to maintain page cache
|
||||
consistency when punching holes or performing other operations that invalidate
|
||||
page cache contents. Usually the filesystem needs to call
|
||||
truncate_inode_pages_range() to invalidate relevant range of the page cache.
|
||||
However the filesystem usually also needs to update its internal (and on disk)
|
||||
view of file offset -> disk block mapping. Until this update is finished, the
|
||||
filesystem needs to block page faults and reads from reloading now-stale page
|
||||
cache contents from the disk. Since VFS acquires mapping->invalidate_lock in
|
||||
shared mode when loading pages from disk (filemap_fault(), filemap_read(),
|
||||
readahead paths), the fallocate implementation must take the invalidate_lock to
|
||||
prevent reloading.
|
||||
|
||||
->copy_file_range and ->remap_file_range implementations need to serialize
|
||||
against modifications of file data while the operation is running. For
|
||||
blocking changes through write(2) and similar operations inode->i_rwsem can be
|
||||
used. To block changes to file contents via a memory mapping during the
|
||||
operation, the filesystem must take mapping->invalidate_lock to coordinate
|
||||
with ->page_mkwrite.
|
||||
|
||||
dquot_operations
|
||||
================
|
||||
|
||||
@@ -627,11 +652,11 @@ pfn_mkwrite: yes
|
||||
access: yes
|
||||
============= ========= ===========================
|
||||
|
||||
->fault() is called when a previously not present pte is about
|
||||
to be faulted in. The filesystem must find and return the page associated
|
||||
with the passed in "pgoff" in the vm_fault structure. If it is possible that
|
||||
the page may be truncated and/or invalidated, then the filesystem must lock
|
||||
the page, then ensure it is not already truncated (the page lock will block
|
||||
->fault() is called when a previously not present pte is about to be faulted
|
||||
in. The filesystem must find and return the page associated with the passed in
|
||||
"pgoff" in the vm_fault structure. If it is possible that the page may be
|
||||
truncated and/or invalidated, then the filesystem must lock invalidate_lock,
|
||||
then ensure the page is not already truncated (invalidate_lock will block
|
||||
subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
|
||||
locked. The VM will unlock the page.
|
||||
|
||||
@@ -644,12 +669,14 @@ page table entry. Pointer to entry associated with the page is passed in
|
||||
"pte" field in vm_fault structure. Pointers to entries for other offsets
|
||||
should be calculated relative to "pte".
|
||||
|
||||
->page_mkwrite() is called when a previously read-only pte is
|
||||
about to become writeable. The filesystem again must ensure that there are
|
||||
no truncate/invalidate races, and then return with the page locked. If
|
||||
the page has been truncated, the filesystem should not look up a new page
|
||||
like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
|
||||
will cause the VM to retry the fault.
|
||||
->page_mkwrite() is called when a previously read-only pte is about to become
|
||||
writeable. The filesystem again must ensure that there are no
|
||||
truncate/invalidate races or races with operations such as ->remap_file_range
|
||||
or ->copy_file_range, and then return with the page locked. Usually
|
||||
mapping->invalidate_lock is suitable for proper serialization. If the page has
|
||||
been truncated, the filesystem should not look up a new page like the ->fault()
|
||||
handler, but simply return with VM_FAULT_NOPAGE, which will cause the VM to
|
||||
retry the fault.
|
||||
|
||||
->pfn_mkwrite() is the same as page_mkwrite but when the pte is
|
||||
VM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is
|
||||
|
||||
@@ -1395,9 +1395,11 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
} else {
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct page *page = find_or_create_page(mapping, 0,
|
||||
mapping_gfp_constraint(mapping,
|
||||
~__GFP_FS));
|
||||
struct page *page;
|
||||
|
||||
filemap_invalidate_lock_shared(mapping);
|
||||
page = find_or_create_page(mapping, 0,
|
||||
mapping_gfp_constraint(mapping, ~__GFP_FS));
|
||||
if (!page) {
|
||||
ret = VM_FAULT_OOM;
|
||||
goto out_inline;
|
||||
@@ -1418,6 +1420,7 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
|
||||
vmf->page = page;
|
||||
ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
|
||||
out_inline:
|
||||
filemap_invalidate_unlock_shared(mapping);
|
||||
dout("filemap_fault %p %llu read inline data ret %x\n",
|
||||
inode, off, ret);
|
||||
}
|
||||
|
||||
@@ -2088,6 +2088,7 @@ static long ceph_fallocate(struct file *file, int mode,
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
ceph_zero_pagecache_range(inode, offset, length);
|
||||
ret = ceph_zero_objects(inode, offset, length);
|
||||
|
||||
@@ -2100,6 +2101,7 @@ static long ceph_fallocate(struct file *file, int mode,
|
||||
if (dirty)
|
||||
__mark_inode_dirty(inode, dirty);
|
||||
}
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
|
||||
ceph_put_cap_refs(ci, got);
|
||||
unlock:
|
||||
|
||||
@@ -3590,6 +3590,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
|
||||
return rc;
|
||||
}
|
||||
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
/*
|
||||
* We implement the punch hole through ioctl, so we need remove the page
|
||||
* caches first, otherwise the data may be inconsistent with the server.
|
||||
@@ -3607,6 +3608,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon,
|
||||
sizeof(struct file_zero_data_information),
|
||||
CIFSMaxBufSize, NULL, NULL);
|
||||
free_xid(xid);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
@@ -667,9 +667,6 @@ struct ext2_inode_info {
|
||||
struct rw_semaphore xattr_sem;
|
||||
#endif
|
||||
rwlock_t i_meta_lock;
|
||||
#ifdef CONFIG_FS_DAX
|
||||
struct rw_semaphore dax_sem;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* truncate_mutex is for serialising ext2_truncate() against
|
||||
@@ -685,14 +682,6 @@ struct ext2_inode_info {
|
||||
#endif
|
||||
};
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
#define dax_sem_down_write(ext2_inode) down_write(&(ext2_inode)->dax_sem)
|
||||
#define dax_sem_up_write(ext2_inode) up_write(&(ext2_inode)->dax_sem)
|
||||
#else
|
||||
#define dax_sem_down_write(ext2_inode)
|
||||
#define dax_sem_up_write(ext2_inode)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Inode dynamic state flags
|
||||
*/
|
||||
|
||||
@@ -81,7 +81,7 @@ out_unlock:
|
||||
*
|
||||
* mmap_lock (MM)
|
||||
* sb_start_pagefault (vfs, freeze)
|
||||
* ext2_inode_info->dax_sem
|
||||
* address_space->invalidate_lock
|
||||
* address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX)
|
||||
* ext2_inode_info->truncate_mutex
|
||||
*
|
||||
@@ -91,7 +91,6 @@ out_unlock:
|
||||
static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct inode *inode = file_inode(vmf->vma->vm_file);
|
||||
struct ext2_inode_info *ei = EXT2_I(inode);
|
||||
vm_fault_t ret;
|
||||
bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
|
||||
(vmf->vma->vm_flags & VM_SHARED);
|
||||
@@ -100,11 +99,11 @@ static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
|
||||
sb_start_pagefault(inode->i_sb);
|
||||
file_update_time(vmf->vma->vm_file);
|
||||
}
|
||||
down_read(&ei->dax_sem);
|
||||
filemap_invalidate_lock_shared(inode->i_mapping);
|
||||
|
||||
ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, NULL, &ext2_iomap_ops);
|
||||
|
||||
up_read(&ei->dax_sem);
|
||||
filemap_invalidate_unlock_shared(inode->i_mapping);
|
||||
if (write)
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
return ret;
|
||||
|
||||
@@ -1178,7 +1178,7 @@ static void ext2_free_branches(struct inode *inode, __le32 *p, __le32 *q, int de
|
||||
ext2_free_data(inode, p, q);
|
||||
}
|
||||
|
||||
/* dax_sem must be held when calling this function */
|
||||
/* mapping->invalidate_lock must be held when calling this function */
|
||||
static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
|
||||
{
|
||||
__le32 *i_data = EXT2_I(inode)->i_data;
|
||||
@@ -1195,7 +1195,7 @@ static void __ext2_truncate_blocks(struct inode *inode, loff_t offset)
|
||||
iblock = (offset + blocksize-1) >> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
|
||||
|
||||
#ifdef CONFIG_FS_DAX
|
||||
WARN_ON(!rwsem_is_locked(&ei->dax_sem));
|
||||
WARN_ON(!rwsem_is_locked(&inode->i_mapping->invalidate_lock));
|
||||
#endif
|
||||
|
||||
n = ext2_block_to_path(inode, iblock, offsets, NULL);
|
||||
@@ -1277,9 +1277,9 @@ static void ext2_truncate_blocks(struct inode *inode, loff_t offset)
|
||||
if (ext2_inode_is_fast_symlink(inode))
|
||||
return;
|
||||
|
||||
dax_sem_down_write(EXT2_I(inode));
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
__ext2_truncate_blocks(inode, offset);
|
||||
dax_sem_up_write(EXT2_I(inode));
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
}
|
||||
|
||||
static int ext2_setsize(struct inode *inode, loff_t newsize)
|
||||
@@ -1309,10 +1309,10 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
dax_sem_down_write(EXT2_I(inode));
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
truncate_setsize(inode, newsize);
|
||||
__ext2_truncate_blocks(inode, newsize);
|
||||
dax_sem_up_write(EXT2_I(inode));
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
|
||||
inode->i_mtime = inode->i_ctime = current_time(inode);
|
||||
if (inode_needs_sync(inode)) {
|
||||
|
||||
@@ -206,9 +206,6 @@ static void init_once(void *foo)
|
||||
init_rwsem(&ei->xattr_sem);
|
||||
#endif
|
||||
mutex_init(&ei->truncate_mutex);
|
||||
#ifdef CONFIG_FS_DAX
|
||||
init_rwsem(&ei->dax_sem);
|
||||
#endif
|
||||
inode_init_once(&ei->vfs_inode);
|
||||
}
|
||||
|
||||
|
||||
@@ -1086,15 +1086,6 @@ struct ext4_inode_info {
|
||||
* by other means, so we have i_data_sem.
|
||||
*/
|
||||
struct rw_semaphore i_data_sem;
|
||||
/*
|
||||
* i_mmap_sem is for serializing page faults with truncate / punch hole
|
||||
* operations. We have to make sure that new page cannot be faulted in
|
||||
* a section of the inode that is being punched. We cannot easily use
|
||||
* i_data_sem for this since we need protection for the whole punch
|
||||
* operation and i_data_sem ranks below transaction start so we have
|
||||
* to occasionally drop it.
|
||||
*/
|
||||
struct rw_semaphore i_mmap_sem;
|
||||
struct inode vfs_inode;
|
||||
struct jbd2_inode *jinode;
|
||||
|
||||
@@ -2972,7 +2963,6 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
|
||||
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
|
||||
loff_t lstart, loff_t lend);
|
||||
extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf);
|
||||
extern vm_fault_t ext4_filemap_fault(struct vm_fault *vmf);
|
||||
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
|
||||
extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
|
||||
extern void ext4_da_release_space(struct inode *inode, int to_free);
|
||||
|
||||
@@ -4474,6 +4474,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
|
||||
loff_t len, int mode)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct address_space *mapping = file->f_mapping;
|
||||
handle_t *handle = NULL;
|
||||
unsigned int max_blocks;
|
||||
loff_t new_size = 0;
|
||||
@@ -4560,17 +4561,17 @@ static long ext4_zero_range(struct file *file, loff_t offset,
|
||||
* Prevent page faults from reinstantiating pages we have
|
||||
* released from page cache.
|
||||
*/
|
||||
down_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(mapping);
|
||||
|
||||
ret = ext4_break_layouts(inode);
|
||||
if (ret) {
|
||||
up_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
goto out_mutex;
|
||||
}
|
||||
|
||||
ret = ext4_update_disksize_before_punch(inode, offset, len);
|
||||
if (ret) {
|
||||
up_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
goto out_mutex;
|
||||
}
|
||||
/* Now release the pages and zero block aligned part of pages */
|
||||
@@ -4579,7 +4580,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
|
||||
|
||||
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
|
||||
flags);
|
||||
up_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
if (ret)
|
||||
goto out_mutex;
|
||||
}
|
||||
@@ -5221,6 +5222,7 @@ out:
|
||||
static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
ext4_lblk_t punch_start, punch_stop;
|
||||
handle_t *handle;
|
||||
unsigned int credits;
|
||||
@@ -5274,7 +5276,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
|
||||
* Prevent page faults from reinstantiating pages we have released from
|
||||
* page cache.
|
||||
*/
|
||||
down_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(mapping);
|
||||
|
||||
ret = ext4_break_layouts(inode);
|
||||
if (ret)
|
||||
@@ -5289,15 +5291,15 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
|
||||
* Write tail of the last page before removed range since it will get
|
||||
* removed from the page cache below.
|
||||
*/
|
||||
ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
|
||||
ret = filemap_write_and_wait_range(mapping, ioffset, offset);
|
||||
if (ret)
|
||||
goto out_mmap;
|
||||
/*
|
||||
* Write data that will be shifted to preserve them when discarding
|
||||
* page cache below. We are also protected from pages becoming dirty
|
||||
* by i_mmap_sem.
|
||||
* by i_rwsem and invalidate_lock.
|
||||
*/
|
||||
ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
|
||||
ret = filemap_write_and_wait_range(mapping, offset + len,
|
||||
LLONG_MAX);
|
||||
if (ret)
|
||||
goto out_mmap;
|
||||
@@ -5350,7 +5352,7 @@ out_stop:
|
||||
ext4_journal_stop(handle);
|
||||
ext4_fc_stop_ineligible(sb);
|
||||
out_mmap:
|
||||
up_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
out_mutex:
|
||||
inode_unlock(inode);
|
||||
return ret;
|
||||
@@ -5367,6 +5369,7 @@ out_mutex:
|
||||
static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
handle_t *handle;
|
||||
struct ext4_ext_path *path;
|
||||
struct ext4_extent *extent;
|
||||
@@ -5425,7 +5428,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
|
||||
* Prevent page faults from reinstantiating pages we have released from
|
||||
* page cache.
|
||||
*/
|
||||
down_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(mapping);
|
||||
|
||||
ret = ext4_break_layouts(inode);
|
||||
if (ret)
|
||||
@@ -5526,7 +5529,7 @@ out_stop:
|
||||
ext4_journal_stop(handle);
|
||||
ext4_fc_stop_ineligible(sb);
|
||||
out_mmap:
|
||||
up_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
out_mutex:
|
||||
inode_unlock(inode);
|
||||
return ret;
|
||||
|
||||
@@ -704,22 +704,23 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf,
|
||||
*/
|
||||
bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
|
||||
(vmf->vma->vm_flags & VM_SHARED);
|
||||
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
|
||||
pfn_t pfn;
|
||||
|
||||
if (write) {
|
||||
sb_start_pagefault(sb);
|
||||
file_update_time(vmf->vma->vm_file);
|
||||
down_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock_shared(mapping);
|
||||
retry:
|
||||
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
|
||||
EXT4_DATA_TRANS_BLOCKS(sb));
|
||||
if (IS_ERR(handle)) {
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock_shared(mapping);
|
||||
sb_end_pagefault(sb);
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
} else {
|
||||
down_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock_shared(mapping);
|
||||
}
|
||||
result = dax_iomap_fault(vmf, pe_size, &pfn, &error, &ext4_iomap_ops);
|
||||
if (write) {
|
||||
@@ -731,10 +732,10 @@ retry:
|
||||
/* Handling synchronous page fault? */
|
||||
if (result & VM_FAULT_NEEDDSYNC)
|
||||
result = dax_finish_sync_fault(vmf, pe_size, pfn);
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock_shared(mapping);
|
||||
sb_end_pagefault(sb);
|
||||
} else {
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock_shared(mapping);
|
||||
}
|
||||
|
||||
return result;
|
||||
@@ -756,7 +757,7 @@ static const struct vm_operations_struct ext4_dax_vm_ops = {
|
||||
#endif
|
||||
|
||||
static const struct vm_operations_struct ext4_file_vm_ops = {
|
||||
.fault = ext4_filemap_fault,
|
||||
.fault = filemap_fault,
|
||||
.map_pages = filemap_map_pages,
|
||||
.page_mkwrite = ext4_page_mkwrite,
|
||||
};
|
||||
|
||||
@@ -3950,20 +3950,19 @@ int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void ext4_wait_dax_page(struct ext4_inode_info *ei)
|
||||
static void ext4_wait_dax_page(struct inode *inode)
|
||||
{
|
||||
up_write(&ei->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
schedule();
|
||||
down_write(&ei->i_mmap_sem);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
}
|
||||
|
||||
int ext4_break_layouts(struct inode *inode)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
struct page *page;
|
||||
int error;
|
||||
|
||||
if (WARN_ON_ONCE(!rwsem_is_locked(&ei->i_mmap_sem)))
|
||||
if (WARN_ON_ONCE(!rwsem_is_locked(&inode->i_mapping->invalidate_lock)))
|
||||
return -EINVAL;
|
||||
|
||||
do {
|
||||
@@ -3974,7 +3973,7 @@ int ext4_break_layouts(struct inode *inode)
|
||||
error = ___wait_var_event(&page->_refcount,
|
||||
atomic_read(&page->_refcount) == 1,
|
||||
TASK_INTERRUPTIBLE, 0, 0,
|
||||
ext4_wait_dax_page(ei));
|
||||
ext4_wait_dax_page(inode));
|
||||
} while (error == 0);
|
||||
|
||||
return error;
|
||||
@@ -4005,9 +4004,9 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
|
||||
|
||||
ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
|
||||
if (ext4_has_inline_data(inode)) {
|
||||
down_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(mapping);
|
||||
ret = ext4_convert_inline_data(inode);
|
||||
up_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@@ -4058,7 +4057,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
|
||||
* Prevent page faults from reinstantiating pages we have released from
|
||||
* page cache.
|
||||
*/
|
||||
down_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(mapping);
|
||||
|
||||
ret = ext4_break_layouts(inode);
|
||||
if (ret)
|
||||
@@ -4131,7 +4130,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
|
||||
out_stop:
|
||||
ext4_journal_stop(handle);
|
||||
out_dio:
|
||||
up_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
out_mutex:
|
||||
inode_unlock(inode);
|
||||
return ret;
|
||||
@@ -5426,11 +5425,11 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
|
||||
inode_dio_wait(inode);
|
||||
}
|
||||
|
||||
down_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
|
||||
rc = ext4_break_layouts(inode);
|
||||
if (rc) {
|
||||
up_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
@@ -5506,7 +5505,7 @@ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
|
||||
error = rc;
|
||||
}
|
||||
out_mmap_sem:
|
||||
up_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
}
|
||||
|
||||
if (!error) {
|
||||
@@ -5983,10 +5982,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
|
||||
* data (and journalled aops don't know how to handle these cases).
|
||||
*/
|
||||
if (val) {
|
||||
down_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
err = filemap_write_and_wait(inode->i_mapping);
|
||||
if (err < 0) {
|
||||
up_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
@@ -6019,7 +6018,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
|
||||
percpu_up_write(&sbi->s_writepages_rwsem);
|
||||
|
||||
if (val)
|
||||
up_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
|
||||
/* Finally we can mark the inode as dirty. */
|
||||
|
||||
@@ -6063,7 +6062,7 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
|
||||
sb_start_pagefault(inode->i_sb);
|
||||
file_update_time(vma->vm_file);
|
||||
|
||||
down_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock_shared(mapping);
|
||||
|
||||
err = ext4_convert_inline_data(inode);
|
||||
if (err)
|
||||
@@ -6176,7 +6175,7 @@ retry_alloc:
|
||||
out_ret:
|
||||
ret = block_page_mkwrite_return(err);
|
||||
out:
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock_shared(mapping);
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
return ret;
|
||||
out_error:
|
||||
@@ -6184,15 +6183,3 @@ out_error:
|
||||
ext4_journal_stop(handle);
|
||||
goto out;
|
||||
}
|
||||
|
||||
vm_fault_t ext4_filemap_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct inode *inode = file_inode(vmf->vma->vm_file);
|
||||
vm_fault_t ret;
|
||||
|
||||
down_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
ret = filemap_fault(vmf);
|
||||
up_read(&EXT4_I(inode)->i_mmap_sem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -148,7 +148,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
|
||||
goto journal_err_out;
|
||||
}
|
||||
|
||||
down_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
err = filemap_write_and_wait(inode->i_mapping);
|
||||
if (err)
|
||||
goto err_out;
|
||||
@@ -256,7 +256,7 @@ err_out1:
|
||||
ext4_double_up_write_data_sem(inode, inode_bl);
|
||||
|
||||
err_out:
|
||||
up_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
journal_err_out:
|
||||
unlock_two_nondirectories(inode, inode_bl);
|
||||
iput(inode_bl);
|
||||
|
||||
@@ -90,12 +90,9 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
|
||||
/*
|
||||
* Lock ordering
|
||||
*
|
||||
* Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
|
||||
* i_mmap_rwsem (inode->i_mmap_rwsem)!
|
||||
*
|
||||
* page fault path:
|
||||
* mmap_lock -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
|
||||
* page lock -> i_data_sem (rw)
|
||||
* mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
|
||||
* -> page lock -> i_data_sem (rw)
|
||||
*
|
||||
* buffered write path:
|
||||
* sb_start_write -> i_mutex -> mmap_lock
|
||||
@@ -103,8 +100,9 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
|
||||
* i_data_sem (rw)
|
||||
*
|
||||
* truncate:
|
||||
* sb_start_write -> i_mutex -> i_mmap_sem (w) -> i_mmap_rwsem (w) -> page lock
|
||||
* sb_start_write -> i_mutex -> i_mmap_sem (w) -> transaction start ->
|
||||
* sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
|
||||
* page lock
|
||||
* sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
|
||||
* i_data_sem (rw)
|
||||
*
|
||||
* direct IO:
|
||||
@@ -1360,7 +1358,6 @@ static void init_once(void *foo)
|
||||
INIT_LIST_HEAD(&ei->i_orphan);
|
||||
init_rwsem(&ei->xattr_sem);
|
||||
init_rwsem(&ei->i_data_sem);
|
||||
init_rwsem(&ei->i_mmap_sem);
|
||||
inode_init_once(&ei->vfs_inode);
|
||||
ext4_fc_init_inode(&ei->vfs_inode);
|
||||
}
|
||||
|
||||
@@ -11,14 +11,16 @@
|
||||
*/
|
||||
static inline void ext4_truncate_failed_write(struct inode *inode)
|
||||
{
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
|
||||
/*
|
||||
* We don't need to call ext4_break_layouts() because the blocks we
|
||||
* are truncating were never visible to userspace.
|
||||
*/
|
||||
down_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
truncate_inode_pages(inode->i_mapping, inode->i_size);
|
||||
filemap_invalidate_lock(mapping);
|
||||
truncate_inode_pages(mapping, inode->i_size);
|
||||
ext4_truncate(inode);
|
||||
up_write(&EXT4_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -3187,12 +3187,12 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
|
||||
/* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
|
||||
if (to > i_size && !f2fs_verity_in_progress(inode)) {
|
||||
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
down_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(mapping);
|
||||
|
||||
truncate_pagecache(inode, i_size);
|
||||
f2fs_truncate_blocks(inode, i_size, true);
|
||||
|
||||
up_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
}
|
||||
}
|
||||
@@ -3852,7 +3852,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
|
||||
int ret = 0;
|
||||
|
||||
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
down_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
|
||||
set_inode_flag(inode, FI_ALIGNED_WRITE);
|
||||
|
||||
@@ -3894,7 +3894,7 @@ done:
|
||||
clear_inode_flag(inode, FI_DO_DEFRAG);
|
||||
clear_inode_flag(inode, FI_ALIGNED_WRITE);
|
||||
|
||||
up_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
|
||||
return ret;
|
||||
|
||||
@@ -754,7 +754,6 @@ struct f2fs_inode_info {
|
||||
|
||||
/* avoid racing between foreground op and gc */
|
||||
struct rw_semaphore i_gc_rwsem[2];
|
||||
struct rw_semaphore i_mmap_sem;
|
||||
struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */
|
||||
|
||||
int i_extra_isize; /* size of extra space located in i_addr */
|
||||
|
||||
@@ -38,10 +38,7 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
|
||||
struct inode *inode = file_inode(vmf->vma->vm_file);
|
||||
vm_fault_t ret;
|
||||
|
||||
down_read(&F2FS_I(inode)->i_mmap_sem);
|
||||
ret = filemap_fault(vmf);
|
||||
up_read(&F2FS_I(inode)->i_mmap_sem);
|
||||
|
||||
if (!ret)
|
||||
f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO,
|
||||
F2FS_BLKSIZE);
|
||||
@@ -101,7 +98,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
|
||||
f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
|
||||
|
||||
file_update_time(vmf->vma->vm_file);
|
||||
down_read(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock_shared(inode->i_mapping);
|
||||
lock_page(page);
|
||||
if (unlikely(page->mapping != inode->i_mapping ||
|
||||
page_offset(page) > i_size_read(inode) ||
|
||||
@@ -159,7 +156,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
|
||||
|
||||
trace_f2fs_vm_page_mkwrite(page, DATA);
|
||||
out_sem:
|
||||
up_read(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock_shared(inode->i_mapping);
|
||||
|
||||
sb_end_pagefault(inode->i_sb);
|
||||
err:
|
||||
@@ -940,7 +937,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
|
||||
}
|
||||
|
||||
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
down_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
|
||||
truncate_setsize(inode, attr->ia_size);
|
||||
|
||||
@@ -950,7 +947,7 @@ int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
|
||||
* do not trim all blocks after i_size if target size is
|
||||
* larger than i_size.
|
||||
*/
|
||||
up_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
if (err)
|
||||
return err;
|
||||
@@ -1095,7 +1092,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
|
||||
blk_end = (loff_t)pg_end << PAGE_SHIFT;
|
||||
|
||||
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
down_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(mapping);
|
||||
|
||||
truncate_inode_pages_range(mapping, blk_start,
|
||||
blk_end - 1);
|
||||
@@ -1104,7 +1101,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
|
||||
ret = f2fs_truncate_hole(inode, pg_start, pg_end);
|
||||
f2fs_unlock_op(sbi);
|
||||
|
||||
up_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
}
|
||||
}
|
||||
@@ -1339,7 +1336,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
|
||||
|
||||
/* avoid gc operation during block exchange */
|
||||
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
down_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
|
||||
f2fs_lock_op(sbi);
|
||||
f2fs_drop_extent_tree(inode);
|
||||
@@ -1347,7 +1344,7 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len)
|
||||
ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true);
|
||||
f2fs_unlock_op(sbi);
|
||||
|
||||
up_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
return ret;
|
||||
}
|
||||
@@ -1378,13 +1375,13 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
|
||||
return ret;
|
||||
|
||||
/* write out all moved pages, if possible */
|
||||
down_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
|
||||
truncate_pagecache(inode, offset);
|
||||
|
||||
new_size = i_size_read(inode) - len;
|
||||
ret = f2fs_truncate_blocks(inode, new_size, true);
|
||||
up_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
if (!ret)
|
||||
f2fs_i_size_write(inode, new_size);
|
||||
return ret;
|
||||
@@ -1484,7 +1481,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
|
||||
pgoff_t end;
|
||||
|
||||
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
down_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(mapping);
|
||||
|
||||
truncate_pagecache_range(inode,
|
||||
(loff_t)index << PAGE_SHIFT,
|
||||
@@ -1496,7 +1493,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
|
||||
ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE);
|
||||
if (ret) {
|
||||
f2fs_unlock_op(sbi);
|
||||
up_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
goto out;
|
||||
}
|
||||
@@ -1508,7 +1505,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
|
||||
f2fs_put_dnode(&dn);
|
||||
|
||||
f2fs_unlock_op(sbi);
|
||||
up_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
|
||||
f2fs_balance_fs(sbi, dn.node_changed);
|
||||
@@ -1543,6 +1540,7 @@ out:
|
||||
static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
|
||||
{
|
||||
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
pgoff_t nr, pg_start, pg_end, delta, idx;
|
||||
loff_t new_size;
|
||||
int ret = 0;
|
||||
@@ -1565,14 +1563,14 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
|
||||
|
||||
f2fs_balance_fs(sbi, true);
|
||||
|
||||
down_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(mapping);
|
||||
ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
|
||||
up_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* write out all dirty pages from offset */
|
||||
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
|
||||
ret = filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -1583,7 +1581,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
|
||||
|
||||
/* avoid gc operation during block exchange */
|
||||
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
down_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(mapping);
|
||||
truncate_pagecache(inode, offset);
|
||||
|
||||
while (!ret && idx > pg_start) {
|
||||
@@ -1599,14 +1597,14 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
|
||||
idx + delta, nr, false);
|
||||
f2fs_unlock_op(sbi);
|
||||
}
|
||||
up_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
|
||||
/* write out all moved pages, if possible */
|
||||
down_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
|
||||
filemap_invalidate_lock(mapping);
|
||||
filemap_write_and_wait_range(mapping, offset, LLONG_MAX);
|
||||
truncate_pagecache(inode, offset);
|
||||
up_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
|
||||
if (!ret)
|
||||
f2fs_i_size_write(inode, new_size);
|
||||
@@ -3440,7 +3438,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
|
||||
goto out;
|
||||
|
||||
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
down_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
|
||||
last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
|
||||
|
||||
@@ -3476,7 +3474,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
|
||||
}
|
||||
|
||||
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
up_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
out:
|
||||
inode_unlock(inode);
|
||||
|
||||
@@ -3593,7 +3591,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
|
||||
}
|
||||
|
||||
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
down_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
|
||||
last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
|
||||
|
||||
@@ -3629,7 +3627,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
|
||||
}
|
||||
|
||||
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
up_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
|
||||
if (ret >= 0) {
|
||||
clear_inode_flag(inode, FI_COMPRESS_RELEASED);
|
||||
@@ -3748,7 +3746,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
|
||||
goto err;
|
||||
|
||||
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
down_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(mapping);
|
||||
|
||||
ret = filemap_write_and_wait_range(mapping, range.start,
|
||||
to_end ? LLONG_MAX : end_addr - 1);
|
||||
@@ -3835,7 +3833,7 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg)
|
||||
ret = f2fs_secure_erase(prev_bdev, inode, prev_index,
|
||||
prev_block, len, range.flags);
|
||||
out:
|
||||
up_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(mapping);
|
||||
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
err:
|
||||
inode_unlock(inode);
|
||||
@@ -4313,9 +4311,9 @@ write:
|
||||
/* if we couldn't write data, we should deallocate blocks. */
|
||||
if (preallocated && i_size_read(inode) < target_size) {
|
||||
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
down_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
f2fs_truncate(inode);
|
||||
up_write(&F2FS_I(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
|
||||
}
|
||||
|
||||
|
||||
@@ -1289,7 +1289,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
|
||||
mutex_init(&fi->inmem_lock);
|
||||
init_rwsem(&fi->i_gc_rwsem[READ]);
|
||||
init_rwsem(&fi->i_gc_rwsem[WRITE]);
|
||||
init_rwsem(&fi->i_mmap_sem);
|
||||
init_rwsem(&fi->i_xattr_sem);
|
||||
|
||||
/* Will be used by directory only */
|
||||
|
||||
@@ -444,12 +444,12 @@ static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos,
|
||||
/*
|
||||
* Can't do inline reclaim in fault path. We call
|
||||
* dax_layout_busy_page() before we free a range. And
|
||||
* fuse_wait_dax_page() drops fi->i_mmap_sem lock and requires it.
|
||||
* In fault path we enter with fi->i_mmap_sem held and can't drop
|
||||
* it. Also in fault path we hold fi->i_mmap_sem shared and not
|
||||
* exclusive, so that creates further issues with fuse_wait_dax_page().
|
||||
* Hence return -EAGAIN and fuse_dax_fault() will wait for a memory
|
||||
* range to become free and retry.
|
||||
* fuse_wait_dax_page() drops mapping->invalidate_lock and requires it.
|
||||
* In fault path we enter with mapping->invalidate_lock held and can't
|
||||
* drop it. Also in fault path we hold mapping->invalidate_lock shared
|
||||
* and not exclusive, so that creates further issues with
|
||||
* fuse_wait_dax_page(). Hence return -EAGAIN and fuse_dax_fault()
|
||||
* will wait for a memory range to become free and retry.
|
||||
*/
|
||||
if (flags & IOMAP_FAULT) {
|
||||
alloc_dmap = alloc_dax_mapping(fcd);
|
||||
@@ -513,7 +513,7 @@ static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos,
|
||||
down_write(&fi->dax->sem);
|
||||
node = interval_tree_iter_first(&fi->dax->tree, idx, idx);
|
||||
|
||||
/* We are holding either inode lock or i_mmap_sem, and that should
|
||||
/* We are holding either inode lock or invalidate_lock, and that should
|
||||
* ensure that dmap can't be truncated. We are holding a reference
|
||||
* on dmap and that should make sure it can't be reclaimed. So dmap
|
||||
* should still be there in tree despite the fact we dropped and
|
||||
@@ -660,14 +660,12 @@ static const struct iomap_ops fuse_iomap_ops = {
|
||||
|
||||
static void fuse_wait_dax_page(struct inode *inode)
|
||||
{
|
||||
struct fuse_inode *fi = get_fuse_inode(inode);
|
||||
|
||||
up_write(&fi->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
schedule();
|
||||
down_write(&fi->i_mmap_sem);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
}
|
||||
|
||||
/* Should be called with fi->i_mmap_sem lock held exclusively */
|
||||
/* Should be called with mapping->invalidate_lock held exclusively */
|
||||
static int __fuse_dax_break_layouts(struct inode *inode, bool *retry,
|
||||
loff_t start, loff_t end)
|
||||
{
|
||||
@@ -813,18 +811,18 @@ retry:
|
||||
* we do not want any read/write/mmap to make progress and try
|
||||
* to populate page cache or access memory we are trying to free.
|
||||
*/
|
||||
down_read(&get_fuse_inode(inode)->i_mmap_sem);
|
||||
filemap_invalidate_lock_shared(inode->i_mapping);
|
||||
ret = dax_iomap_fault(vmf, pe_size, &pfn, &error, &fuse_iomap_ops);
|
||||
if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) {
|
||||
error = 0;
|
||||
retry = true;
|
||||
up_read(&get_fuse_inode(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock_shared(inode->i_mapping);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (ret & VM_FAULT_NEEDDSYNC)
|
||||
ret = dax_finish_sync_fault(vmf, pe_size, pfn);
|
||||
up_read(&get_fuse_inode(inode)->i_mmap_sem);
|
||||
filemap_invalidate_unlock_shared(inode->i_mapping);
|
||||
|
||||
if (write)
|
||||
sb_end_pagefault(sb);
|
||||
@@ -960,7 +958,7 @@ inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode,
|
||||
int ret;
|
||||
struct interval_tree_node *node;
|
||||
|
||||
down_write(&fi->i_mmap_sem);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
|
||||
/* Lookup a dmap and corresponding file offset to reclaim. */
|
||||
down_read(&fi->dax->sem);
|
||||
@@ -1021,7 +1019,7 @@ inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode,
|
||||
out_write_dmap_sem:
|
||||
up_write(&fi->dax->sem);
|
||||
out_mmap_sem:
|
||||
up_write(&fi->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
return dmap;
|
||||
}
|
||||
|
||||
@@ -1050,10 +1048,10 @@ alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode)
|
||||
* had a reference or some other temporary failure,
|
||||
* Try again. We want to give up inline reclaim only
|
||||
* if there is no range assigned to this node. Otherwise
|
||||
* if a deadlock is possible if we sleep with fi->i_mmap_sem
|
||||
* held and worker to free memory can't make progress due
|
||||
* to unavailability of fi->i_mmap_sem lock. So sleep
|
||||
* only if fi->dax->nr=0
|
||||
* if a deadlock is possible if we sleep with
|
||||
* mapping->invalidate_lock held and worker to free memory
|
||||
* can't make progress due to unavailability of
|
||||
* mapping->invalidate_lock. So sleep only if fi->dax->nr=0
|
||||
*/
|
||||
if (retry)
|
||||
continue;
|
||||
@@ -1061,8 +1059,8 @@ alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode)
|
||||
* There are no mappings which can be reclaimed. Wait for one.
|
||||
* We are not holding fi->dax->sem. So it is possible
|
||||
* that range gets added now. But as we are not holding
|
||||
* fi->i_mmap_sem, worker should still be able to free up
|
||||
* a range and wake us up.
|
||||
* mapping->invalidate_lock, worker should still be able to
|
||||
* free up a range and wake us up.
|
||||
*/
|
||||
if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) {
|
||||
if (wait_event_killable_exclusive(fcd->range_waitq,
|
||||
@@ -1108,7 +1106,7 @@ static int lookup_and_reclaim_dmap_locked(struct fuse_conn_dax *fcd,
|
||||
/*
|
||||
* Free a range of memory.
|
||||
* Locking:
|
||||
* 1. Take fi->i_mmap_sem to block dax faults.
|
||||
* 1. Take mapping->invalidate_lock to block dax faults.
|
||||
* 2. Take fi->dax->sem to protect interval tree and also to make sure
|
||||
* read/write can not reuse a dmap which we might be freeing.
|
||||
*/
|
||||
@@ -1122,7 +1120,7 @@ static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd,
|
||||
loff_t dmap_start = start_idx << FUSE_DAX_SHIFT;
|
||||
loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1;
|
||||
|
||||
down_write(&fi->i_mmap_sem);
|
||||
filemap_invalidate_lock(inode->i_mapping);
|
||||
ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end);
|
||||
if (ret) {
|
||||
pr_debug("virtio_fs: fuse_dax_break_layouts() failed. err=%d\n",
|
||||
@@ -1134,7 +1132,7 @@ static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd,
|
||||
ret = lookup_and_reclaim_dmap_locked(fcd, inode, start_idx);
|
||||
up_write(&fi->dax->sem);
|
||||
out_mmap_sem:
|
||||
up_write(&fi->i_mmap_sem);
|
||||
filemap_invalidate_unlock(inode->i_mapping);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user