You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "The big new feature added this time is supporting online resizing using the meta_bg feature. This allows us to resize file systems which are greater than 16TB. In addition, the speed of online resizing has been improved in general. We also fix a number of races, some of which could lead to deadlocks, in ext4's Asynchronous I/O and online defrag support, thanks to good work by Dmitry Monakhov. There are also a large number of more minor bug fixes and cleanups from a number of other ext4 contributors, quite of few of which have submitted fixes for the first time." * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (69 commits) ext4: fix ext4_flush_completed_IO wait semantics ext4: fix mtime update in nodelalloc mode ext4: fix ext_remove_space for punch_hole case ext4: punch_hole should wait for DIO writers ext4: serialize truncate with owerwrite DIO workers ext4: endless truncate due to nonlocked dio readers ext4: serialize unlocked dio reads with truncate ext4: serialize dio nonlocked reads with defrag workers ext4: completed_io locking cleanup ext4: fix unwritten counter leakage ext4: give i_aiodio_unwritten a more appropriate name ext4: ext4_inode_info diet ext4: convert to use leXX_add_cpu() ext4: ext4_bread usage audit fs: reserve fallocate flag codepoint ext4: remove redundant offset check in mext_check_arguments() ext4: don't clear orphan list on ro mount with errors jbd2: fix assertion failure in commit code due to lacking transaction credits ext4: release donor reference when EXT4_IOC_MOVE_EXT ioctl fails ext4: enable FITRIM ioctl on bigalloc file system ...
This commit is contained in:
@@ -96,3 +96,16 @@ Contact: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Description:
|
||||
The maximum number of megabytes the writeback code will
|
||||
try to write out before move on to another inode.
|
||||
|
||||
What: /sys/fs/ext4/<disk>/extent_max_zeroout_kb
|
||||
Date: August 2012
|
||||
Contact: "Theodore Ts'o" <tytso@mit.edu>
|
||||
Description:
|
||||
The maximum number of kilobytes which will be zeroed
|
||||
out in preference to creating a new uninitialized
|
||||
extent when manipulating an inode's extent tree. Note
|
||||
that using a larger value will increase the
|
||||
variability of time necessary to complete a random
|
||||
write operation (since a 4k random write might turn
|
||||
into a much larger write due to the zeroout
|
||||
operation).
|
||||
|
||||
@@ -375,6 +375,16 @@ dioread_nolock locking. If the dioread_nolock option is specified
|
||||
Because of the restrictions this options comprises
|
||||
it is off by default (e.g. dioread_lock).
|
||||
|
||||
max_dir_size_kb=n This limits the size of directories so that any
|
||||
attempt to expand them beyond the specified
|
||||
limit in kilobytes will cause an ENOSPC error.
|
||||
This is useful in memory constrained
|
||||
environments, where a very large directory can
|
||||
cause severe performance problems or even
|
||||
provoke the Out Of Memory killer. (For example,
|
||||
if there is only 512mb memory available, a 176mb
|
||||
directory may seriously cramp the system's style.)
|
||||
|
||||
i_version Enable 64-bit inode version support. This option is
|
||||
off by default.
|
||||
|
||||
|
||||
+7
-6
@@ -2312,12 +2312,6 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
|
||||
loff_t size;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Update file times before taking page lock. We may end up failing the
|
||||
* fault so this update may be superfluous but who really cares...
|
||||
*/
|
||||
file_update_time(vma->vm_file);
|
||||
|
||||
lock_page(page);
|
||||
size = i_size_read(inode);
|
||||
if ((page->mapping != inode->i_mapping) ||
|
||||
@@ -2355,6 +2349,13 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
|
||||
struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
|
||||
|
||||
sb_start_pagefault(sb);
|
||||
|
||||
/*
|
||||
* Update file times before taking page lock. We may end up failing the
|
||||
* fault so this update may be superfluous but who really cares...
|
||||
*/
|
||||
file_update_time(vma->vm_file);
|
||||
|
||||
ret = __block_page_mkwrite(vma, vmf, get_block);
|
||||
sb_end_pagefault(sb);
|
||||
return block_page_mkwrite_return(ret);
|
||||
|
||||
+42
-7
@@ -186,7 +186,6 @@ struct mpage_da_data {
|
||||
#define EXT4_IO_END_ERROR 0x0002
|
||||
#define EXT4_IO_END_QUEUED 0x0004
|
||||
#define EXT4_IO_END_DIRECT 0x0008
|
||||
#define EXT4_IO_END_IN_FSYNC 0x0010
|
||||
|
||||
struct ext4_io_page {
|
||||
struct page *p_page;
|
||||
@@ -912,9 +911,7 @@ struct ext4_inode_info {
|
||||
struct list_head i_completed_io_list;
|
||||
spinlock_t i_completed_io_lock;
|
||||
atomic_t i_ioend_count; /* Number of outstanding io_end structs */
|
||||
/* current io_end structure for async DIO write*/
|
||||
ext4_io_end_t *cur_aio_dio;
|
||||
atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
|
||||
atomic_t i_unwritten; /* Nr. of inflight conversions pending */
|
||||
|
||||
spinlock_t i_block_reservation_lock;
|
||||
|
||||
@@ -1233,6 +1230,7 @@ struct ext4_sb_info {
|
||||
spinlock_t s_md_lock;
|
||||
unsigned short *s_mb_offsets;
|
||||
unsigned int *s_mb_maxs;
|
||||
unsigned int s_group_info_size;
|
||||
|
||||
/* tunables */
|
||||
unsigned long s_stripe;
|
||||
@@ -1243,6 +1241,7 @@ struct ext4_sb_info {
|
||||
unsigned int s_mb_order2_reqs;
|
||||
unsigned int s_mb_group_prealloc;
|
||||
unsigned int s_max_writeback_mb_bump;
|
||||
unsigned int s_max_dir_size_kb;
|
||||
/* where last allocation was done - for stream allocation */
|
||||
unsigned long s_mb_last_group;
|
||||
unsigned long s_mb_last_start;
|
||||
@@ -1270,8 +1269,12 @@ struct ext4_sb_info {
|
||||
unsigned long s_sectors_written_start;
|
||||
u64 s_kbytes_written;
|
||||
|
||||
/* the size of zero-out chunk */
|
||||
unsigned int s_extent_max_zeroout_kb;
|
||||
|
||||
unsigned int s_log_groups_per_flex;
|
||||
struct flex_groups *s_flex_groups;
|
||||
ext4_group_t s_flex_groups_allocated;
|
||||
|
||||
/* workqueue for dio unwritten */
|
||||
struct workqueue_struct *dio_unwritten_wq;
|
||||
@@ -1328,10 +1331,20 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
|
||||
{
|
||||
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
|
||||
io_end->flag |= EXT4_IO_END_UNWRITTEN;
|
||||
atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
|
||||
atomic_inc(&EXT4_I(inode)->i_unwritten);
|
||||
}
|
||||
}
|
||||
|
||||
static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode)
|
||||
{
|
||||
return inode->i_private;
|
||||
}
|
||||
|
||||
static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io)
|
||||
{
|
||||
inode->i_private = io;
|
||||
}
|
||||
|
||||
/*
|
||||
* Inode dynamic state flags
|
||||
*/
|
||||
@@ -1345,6 +1358,8 @@ enum {
|
||||
EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
|
||||
EXT4_STATE_NEWENTRY, /* File just added to dir */
|
||||
EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
|
||||
EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
|
||||
nolocking */
|
||||
};
|
||||
|
||||
#define EXT4_INODE_BIT_FNS(name, field, offset) \
|
||||
@@ -1932,7 +1947,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p);
|
||||
|
||||
/* fsync.c */
|
||||
extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
|
||||
extern int ext4_flush_completed_IO(struct inode *);
|
||||
extern int ext4_flush_unwritten_io(struct inode *);
|
||||
|
||||
/* hash.c */
|
||||
extern int ext4fs_dirhash(const char *name, int len, struct
|
||||
@@ -1966,6 +1981,8 @@ extern void ext4_exit_mballoc(void);
|
||||
extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
|
||||
struct buffer_head *bh, ext4_fsblk_t block,
|
||||
unsigned long count, int flags);
|
||||
extern int ext4_mb_alloc_groupinfo(struct super_block *sb,
|
||||
ext4_group_t ngroups);
|
||||
extern int ext4_mb_add_groupinfo(struct super_block *sb,
|
||||
ext4_group_t i, struct ext4_group_desc *desc);
|
||||
extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
|
||||
@@ -2051,6 +2068,8 @@ extern void ext4_superblock_csum_set(struct super_block *sb,
|
||||
extern void *ext4_kvmalloc(size_t size, gfp_t flags);
|
||||
extern void *ext4_kvzalloc(size_t size, gfp_t flags);
|
||||
extern void ext4_kvfree(void *ptr);
|
||||
extern int ext4_alloc_flex_bg_array(struct super_block *sb,
|
||||
ext4_group_t ngroup);
|
||||
extern __printf(4, 5)
|
||||
void __ext4_error(struct super_block *, const char *, unsigned int,
|
||||
const char *, ...);
|
||||
@@ -2352,6 +2371,7 @@ extern const struct file_operations ext4_dir_operations;
|
||||
extern const struct inode_operations ext4_file_inode_operations;
|
||||
extern const struct file_operations ext4_file_operations;
|
||||
extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
|
||||
extern void ext4_unwritten_wait(struct inode *inode);
|
||||
|
||||
/* namei.c */
|
||||
extern const struct inode_operations ext4_dir_inode_operations;
|
||||
@@ -2400,11 +2420,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
|
||||
|
||||
/* page-io.c */
|
||||
extern int __init ext4_init_pageio(void);
|
||||
extern void ext4_add_complete_io(ext4_io_end_t *io_end);
|
||||
extern void ext4_exit_pageio(void);
|
||||
extern void ext4_ioend_wait(struct inode *);
|
||||
extern void ext4_free_io_end(ext4_io_end_t *io);
|
||||
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
|
||||
extern int ext4_end_io_nolock(ext4_io_end_t *io);
|
||||
extern void ext4_io_submit(struct ext4_io_submit *io);
|
||||
extern int ext4_bio_write_page(struct ext4_io_submit *io,
|
||||
struct page *page,
|
||||
@@ -2452,6 +2472,21 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
|
||||
set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable DIO read nolock optimization, so new dioreaders will be forced
|
||||
* to grab i_mutex
|
||||
*/
|
||||
static inline void ext4_inode_block_unlocked_dio(struct inode *inode)
|
||||
{
|
||||
ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
|
||||
smp_mb();
|
||||
}
|
||||
static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
|
||||
{
|
||||
smp_mb();
|
||||
ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
|
||||
}
|
||||
|
||||
#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
|
||||
|
||||
/* For ioend & aio unwritten conversion wait queues */
|
||||
|
||||
+151
-107
File diff suppressed because it is too large
Load Diff
+3
-3
@@ -55,11 +55,11 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ext4_aiodio_wait(struct inode *inode)
|
||||
void ext4_unwritten_wait(struct inode *inode)
|
||||
{
|
||||
wait_queue_head_t *wq = ext4_ioend_wq(inode);
|
||||
|
||||
wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
|
||||
wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -116,7 +116,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
"performance will be poor.",
|
||||
inode->i_ino, current->comm);
|
||||
mutex_lock(ext4_aio_mutex(inode));
|
||||
ext4_aiodio_wait(inode);
|
||||
ext4_unwritten_wait(inode);
|
||||
}
|
||||
|
||||
BUG_ON(iocb->ki_pos != pos);
|
||||
|
||||
+7
-85
@@ -34,87 +34,6 @@
|
||||
|
||||
#include <trace/events/ext4.h>
|
||||
|
||||
static void dump_completed_IO(struct inode * inode)
|
||||
{
|
||||
#ifdef EXT4FS_DEBUG
|
||||
struct list_head *cur, *before, *after;
|
||||
ext4_io_end_t *io, *io0, *io1;
|
||||
unsigned long flags;
|
||||
|
||||
if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
|
||||
ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
|
||||
return;
|
||||
}
|
||||
|
||||
ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
|
||||
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
|
||||
list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
|
||||
cur = &io->list;
|
||||
before = cur->prev;
|
||||
io0 = container_of(before, ext4_io_end_t, list);
|
||||
after = cur->next;
|
||||
io1 = container_of(after, ext4_io_end_t, list);
|
||||
|
||||
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
|
||||
io, inode->i_ino, io0, io1);
|
||||
}
|
||||
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is called from ext4_sync_file().
|
||||
*
|
||||
* When IO is completed, the work to convert unwritten extents to
|
||||
* written is queued on workqueue but may not get immediately
|
||||
* scheduled. When fsync is called, we need to ensure the
|
||||
* conversion is complete before fsync returns.
|
||||
* The inode keeps track of a list of pending/completed IO that
|
||||
* might needs to do the conversion. This function walks through
|
||||
* the list and convert the related unwritten extents for completed IO
|
||||
* to written.
|
||||
* The function return the number of pending IOs on success.
|
||||
*/
|
||||
int ext4_flush_completed_IO(struct inode *inode)
|
||||
{
|
||||
ext4_io_end_t *io;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
unsigned long flags;
|
||||
int ret = 0;
|
||||
int ret2 = 0;
|
||||
|
||||
dump_completed_IO(inode);
|
||||
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
||||
while (!list_empty(&ei->i_completed_io_list)){
|
||||
io = list_entry(ei->i_completed_io_list.next,
|
||||
ext4_io_end_t, list);
|
||||
list_del_init(&io->list);
|
||||
io->flag |= EXT4_IO_END_IN_FSYNC;
|
||||
/*
|
||||
* Calling ext4_end_io_nolock() to convert completed
|
||||
* IO to written.
|
||||
*
|
||||
* When ext4_sync_file() is called, run_queue() may already
|
||||
* about to flush the work corresponding to this io structure.
|
||||
* It will be upset if it founds the io structure related
|
||||
* to the work-to-be schedule is freed.
|
||||
*
|
||||
* Thus we need to keep the io structure still valid here after
|
||||
* conversion finished. The io structure has a flag to
|
||||
* avoid double converting from both fsync and background work
|
||||
* queue work.
|
||||
*/
|
||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
||||
ret = ext4_end_io_nolock(io);
|
||||
if (ret < 0)
|
||||
ret2 = ret;
|
||||
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
||||
io->flag &= ~EXT4_IO_END_IN_FSYNC;
|
||||
}
|
||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
||||
return (ret2 < 0) ? ret2 : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're not journaling and this is a just-created file, we have to
|
||||
* sync our parent directory (if it was freshly created) since
|
||||
@@ -203,7 +122,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
|
||||
int ret;
|
||||
int ret, err;
|
||||
tid_t commit_tid;
|
||||
bool needs_barrier = false;
|
||||
|
||||
@@ -219,7 +138,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
if (inode->i_sb->s_flags & MS_RDONLY)
|
||||
goto out;
|
||||
|
||||
ret = ext4_flush_completed_IO(inode);
|
||||
ret = ext4_flush_unwritten_io(inode);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -255,8 +174,11 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
needs_barrier = true;
|
||||
jbd2_log_start_commit(journal, commit_tid);
|
||||
ret = jbd2_log_wait_commit(journal, commit_tid);
|
||||
if (needs_barrier)
|
||||
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||
if (needs_barrier) {
|
||||
err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
|
||||
if (!ret)
|
||||
ret = err;
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
trace_ext4_sync_file_exit(inode, ret);
|
||||
|
||||
@@ -697,6 +697,15 @@ got_group:
|
||||
if (!gdp)
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* Check free inodes count before loading bitmap.
|
||||
*/
|
||||
if (ext4_free_inodes_count(sb, gdp) == 0) {
|
||||
if (++group == ngroups)
|
||||
group = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
brelse(inode_bitmap_bh);
|
||||
inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
|
||||
if (!inode_bitmap_bh)
|
||||
|
||||
+16
-2
@@ -807,16 +807,30 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
|
||||
|
||||
retry:
|
||||
if (rw == READ && ext4_should_dioread_nolock(inode)) {
|
||||
if (unlikely(!list_empty(&ei->i_completed_io_list))) {
|
||||
if (unlikely(atomic_read(&EXT4_I(inode)->i_unwritten))) {
|
||||
mutex_lock(&inode->i_mutex);
|
||||
ext4_flush_completed_IO(inode);
|
||||
ext4_flush_unwritten_io(inode);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
}
|
||||
/*
|
||||
* Nolock dioread optimization may be dynamically disabled
|
||||
* via ext4_inode_block_unlocked_dio(). Check inode's state
|
||||
* while holding extra i_dio_count ref.
|
||||
*/
|
||||
atomic_inc(&inode->i_dio_count);
|
||||
smp_mb();
|
||||
if (unlikely(ext4_test_inode_state(inode,
|
||||
EXT4_STATE_DIOREAD_LOCK))) {
|
||||
inode_dio_done(inode);
|
||||
goto locked;
|
||||
}
|
||||
ret = __blockdev_direct_IO(rw, iocb, inode,
|
||||
inode->i_sb->s_bdev, iov,
|
||||
offset, nr_segs,
|
||||
ext4_get_block, NULL, NULL, 0);
|
||||
inode_dio_done(inode);
|
||||
} else {
|
||||
locked:
|
||||
ret = blockdev_direct_IO(rw, iocb, inode, iov,
|
||||
offset, nr_segs, ext4_get_block);
|
||||
|
||||
|
||||
+42
-41
@@ -732,11 +732,13 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
|
||||
err = ext4_map_blocks(handle, inode, &map,
|
||||
create ? EXT4_GET_BLOCKS_CREATE : 0);
|
||||
|
||||
/* ensure we send some value back into *errp */
|
||||
*errp = 0;
|
||||
|
||||
if (err < 0)
|
||||
*errp = err;
|
||||
if (err <= 0)
|
||||
return NULL;
|
||||
*errp = 0;
|
||||
|
||||
bh = sb_getblk(inode->i_sb, map.m_pblk);
|
||||
if (!bh) {
|
||||
@@ -1954,9 +1956,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
|
||||
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
|
||||
|
||||
/*
|
||||
* Note that we don't need to start a transaction unless we're journaling data
|
||||
* because we should have holes filled from ext4_page_mkwrite(). We even don't
|
||||
@@ -2463,6 +2462,16 @@ static int ext4_nonda_switch(struct super_block *sb)
|
||||
free_blocks = EXT4_C2B(sbi,
|
||||
percpu_counter_read_positive(&sbi->s_freeclusters_counter));
|
||||
dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
|
||||
/*
|
||||
* Start pushing delalloc when 1/2 of free blocks are dirty.
|
||||
*/
|
||||
if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
|
||||
!writeback_in_progress(sb->s_bdi) &&
|
||||
down_read_trylock(&sb->s_umount)) {
|
||||
writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
|
||||
up_read(&sb->s_umount);
|
||||
}
|
||||
|
||||
if (2 * free_blocks < 3 * dirty_blocks ||
|
||||
free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
|
||||
/*
|
||||
@@ -2471,13 +2480,6 @@ static int ext4_nonda_switch(struct super_block *sb)
|
||||
*/
|
||||
return 1;
|
||||
}
|
||||
/*
|
||||
* Even if we don't switch but are nearing capacity,
|
||||
* start pushing delalloc when 1/2 of free blocks are dirty.
|
||||
*/
|
||||
if (free_blocks < 2 * dirty_blocks)
|
||||
writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2879,9 +2881,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
|
||||
{
|
||||
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
|
||||
ext4_io_end_t *io_end = iocb->private;
|
||||
struct workqueue_struct *wq;
|
||||
unsigned long flags;
|
||||
struct ext4_inode_info *ei;
|
||||
|
||||
/* if not async direct IO or dio with 0 bytes write, just return */
|
||||
if (!io_end || !size)
|
||||
@@ -2910,24 +2909,14 @@ out:
|
||||
io_end->iocb = iocb;
|
||||
io_end->result = ret;
|
||||
}
|
||||
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
|
||||
|
||||
/* Add the io_end to per-inode completed aio dio list*/
|
||||
ei = EXT4_I(io_end->inode);
|
||||
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
||||
list_add_tail(&io_end->list, &ei->i_completed_io_list);
|
||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
||||
|
||||
/* queue the work to convert unwritten extents to written */
|
||||
queue_work(wq, &io_end->work);
|
||||
ext4_add_complete_io(io_end);
|
||||
}
|
||||
|
||||
static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
|
||||
{
|
||||
ext4_io_end_t *io_end = bh->b_private;
|
||||
struct workqueue_struct *wq;
|
||||
struct inode *inode;
|
||||
unsigned long flags;
|
||||
|
||||
if (!test_clear_buffer_uninit(bh) || !io_end)
|
||||
goto out;
|
||||
@@ -2946,15 +2935,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
|
||||
*/
|
||||
inode = io_end->inode;
|
||||
ext4_set_io_unwritten_flag(inode, io_end);
|
||||
|
||||
/* Add the io_end to per-inode completed io list*/
|
||||
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
|
||||
list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
|
||||
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
|
||||
|
||||
wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
|
||||
/* queue the work to convert unwritten extents to written */
|
||||
queue_work(wq, &io_end->work);
|
||||
ext4_add_complete_io(io_end);
|
||||
out:
|
||||
bh->b_private = NULL;
|
||||
bh->b_end_io = NULL;
|
||||
@@ -3029,6 +3010,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
|
||||
overwrite = *((int *)iocb->private);
|
||||
|
||||
if (overwrite) {
|
||||
atomic_inc(&inode->i_dio_count);
|
||||
down_read(&EXT4_I(inode)->i_data_sem);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
}
|
||||
@@ -3054,7 +3036,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
|
||||
* hook to the iocb.
|
||||
*/
|
||||
iocb->private = NULL;
|
||||
EXT4_I(inode)->cur_aio_dio = NULL;
|
||||
ext4_inode_aio_set(inode, NULL);
|
||||
if (!is_sync_kiocb(iocb)) {
|
||||
ext4_io_end_t *io_end =
|
||||
ext4_init_io_end(inode, GFP_NOFS);
|
||||
@@ -3071,7 +3053,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
|
||||
* is a unwritten extents needs to be converted
|
||||
* when IO is completed.
|
||||
*/
|
||||
EXT4_I(inode)->cur_aio_dio = iocb->private;
|
||||
ext4_inode_aio_set(inode, io_end);
|
||||
}
|
||||
|
||||
if (overwrite)
|
||||
@@ -3091,7 +3073,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
|
||||
NULL,
|
||||
DIO_LOCKING);
|
||||
if (iocb->private)
|
||||
EXT4_I(inode)->cur_aio_dio = NULL;
|
||||
ext4_inode_aio_set(inode, NULL);
|
||||
/*
|
||||
* The io_end structure takes a reference to the inode,
|
||||
* that structure needs to be destroyed and the
|
||||
@@ -3126,6 +3108,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
|
||||
retake_lock:
|
||||
/* take i_mutex locking again if we do a ovewrite dio */
|
||||
if (overwrite) {
|
||||
inode_dio_done(inode);
|
||||
up_read(&EXT4_I(inode)->i_data_sem);
|
||||
mutex_lock(&inode->i_mutex);
|
||||
}
|
||||
@@ -4052,6 +4035,7 @@ static int ext4_do_update_inode(handle_t *handle,
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
struct buffer_head *bh = iloc->bh;
|
||||
int err = 0, rc, block;
|
||||
int need_datasync = 0;
|
||||
uid_t i_uid;
|
||||
gid_t i_gid;
|
||||
|
||||
@@ -4102,7 +4086,10 @@ static int ext4_do_update_inode(handle_t *handle,
|
||||
raw_inode->i_file_acl_high =
|
||||
cpu_to_le16(ei->i_file_acl >> 32);
|
||||
raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
|
||||
ext4_isize_set(raw_inode, ei->i_disksize);
|
||||
if (ei->i_disksize != ext4_isize(raw_inode)) {
|
||||
ext4_isize_set(raw_inode, ei->i_disksize);
|
||||
need_datasync = 1;
|
||||
}
|
||||
if (ei->i_disksize > 0x7fffffffULL) {
|
||||
struct super_block *sb = inode->i_sb;
|
||||
if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
||||
@@ -4155,7 +4142,7 @@ static int ext4_do_update_inode(handle_t *handle,
|
||||
err = rc;
|
||||
ext4_clear_inode_state(inode, EXT4_STATE_NEW);
|
||||
|
||||
ext4_update_inode_fsync_trans(handle, inode, 0);
|
||||
ext4_update_inode_fsync_trans(handle, inode, need_datasync);
|
||||
out_brelse:
|
||||
brelse(bh);
|
||||
ext4_std_error(inode->i_sb, err);
|
||||
@@ -4298,7 +4285,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
}
|
||||
|
||||
if (attr->ia_valid & ATTR_SIZE) {
|
||||
inode_dio_wait(inode);
|
||||
|
||||
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
|
||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||||
@@ -4347,8 +4333,17 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
}
|
||||
|
||||
if (attr->ia_valid & ATTR_SIZE) {
|
||||
if (attr->ia_size != i_size_read(inode))
|
||||
if (attr->ia_size != i_size_read(inode)) {
|
||||
truncate_setsize(inode, attr->ia_size);
|
||||
/* Inode size will be reduced, wait for dio in flight.
|
||||
* Temporarily disable dioread_nolock to prevent
|
||||
* livelock. */
|
||||
if (orphan) {
|
||||
ext4_inode_block_unlocked_dio(inode);
|
||||
inode_dio_wait(inode);
|
||||
ext4_inode_resume_unlocked_dio(inode);
|
||||
}
|
||||
}
|
||||
ext4_truncate(inode);
|
||||
}
|
||||
|
||||
@@ -4727,6 +4722,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Wait for all existing dio workers */
|
||||
ext4_inode_block_unlocked_dio(inode);
|
||||
inode_dio_wait(inode);
|
||||
|
||||
jbd2_journal_lock_updates(journal);
|
||||
|
||||
/*
|
||||
@@ -4746,6 +4745,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
|
||||
ext4_set_aops(inode);
|
||||
|
||||
jbd2_journal_unlock_updates(journal);
|
||||
ext4_inode_resume_unlocked_dio(inode);
|
||||
|
||||
/* Finally we can mark the inode as dirty. */
|
||||
|
||||
@@ -4780,6 +4780,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
int retries = 0;
|
||||
|
||||
sb_start_pagefault(inode->i_sb);
|
||||
file_update_time(vma->vm_file);
|
||||
/* Delalloc case is easy... */
|
||||
if (test_opt(inode->i_sb, DELALLOC) &&
|
||||
!ext4_should_journal_data(inode) &&
|
||||
|
||||
@@ -366,26 +366,11 @@ group_add_out:
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (EXT4_HAS_INCOMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_INCOMPAT_META_BG)) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"Online resizing not (yet) supported with meta_bg");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
|
||||
sizeof(__u64))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (n_blocks_count > MAX_32_NUM &&
|
||||
!EXT4_HAS_INCOMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_INCOMPAT_64BIT)) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"File system only supports 32-bit block numbers");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
err = ext4_resize_begin(sb);
|
||||
if (err)
|
||||
return err;
|
||||
@@ -420,13 +405,6 @@ resizefs_out:
|
||||
if (!blk_queue_discard(q))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
|
||||
ext4_msg(sb, KERN_ERR,
|
||||
"FITRIM not supported with bigalloc");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
if (copy_from_user(&range, (struct fstrim_range __user *)arg,
|
||||
sizeof(range)))
|
||||
return -EFAULT;
|
||||
|
||||
+66
-63
@@ -24,6 +24,7 @@
|
||||
#include "ext4_jbd2.h"
|
||||
#include "mballoc.h"
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/slab.h>
|
||||
#include <trace/events/ext4.h>
|
||||
|
||||
@@ -1338,17 +1339,17 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
|
||||
mb_check_buddy(e4b);
|
||||
}
|
||||
|
||||
static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
|
||||
static int mb_find_extent(struct ext4_buddy *e4b, int block,
|
||||
int needed, struct ext4_free_extent *ex)
|
||||
{
|
||||
int next = block;
|
||||
int max;
|
||||
int max, order;
|
||||
void *buddy;
|
||||
|
||||
assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
|
||||
BUG_ON(ex == NULL);
|
||||
|
||||
buddy = mb_find_buddy(e4b, order, &max);
|
||||
buddy = mb_find_buddy(e4b, 0, &max);
|
||||
BUG_ON(buddy == NULL);
|
||||
BUG_ON(block >= max);
|
||||
if (mb_test_bit(block, buddy)) {
|
||||
@@ -1358,12 +1359,9 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* FIXME dorp order completely ? */
|
||||
if (likely(order == 0)) {
|
||||
/* find actual order */
|
||||
order = mb_find_order_for_block(e4b, block);
|
||||
block = block >> order;
|
||||
}
|
||||
/* find actual order */
|
||||
order = mb_find_order_for_block(e4b, block);
|
||||
block = block >> order;
|
||||
|
||||
ex->fe_len = 1 << order;
|
||||
ex->fe_start = block << order;
|
||||
@@ -1549,7 +1547,7 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
|
||||
/* recheck chunk's availability - we don't know
|
||||
* when it was found (within this lock-unlock
|
||||
* period or not) */
|
||||
max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex);
|
||||
max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
|
||||
if (max >= gex->fe_len) {
|
||||
ext4_mb_use_best_found(ac, e4b);
|
||||
return;
|
||||
@@ -1641,7 +1639,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
|
||||
return err;
|
||||
|
||||
ext4_lock_group(ac->ac_sb, group);
|
||||
max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex);
|
||||
max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
|
||||
|
||||
if (max > 0) {
|
||||
ac->ac_b_ex = ex;
|
||||
@@ -1662,17 +1660,20 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
|
||||
int max;
|
||||
int err;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
|
||||
struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
|
||||
struct ext4_free_extent ex;
|
||||
|
||||
if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
|
||||
return 0;
|
||||
if (grp->bb_free == 0)
|
||||
return 0;
|
||||
|
||||
err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
ext4_lock_group(ac->ac_sb, group);
|
||||
max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
|
||||
max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
|
||||
ac->ac_g_ex.fe_len, &ex);
|
||||
|
||||
if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
|
||||
@@ -1788,7 +1789,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
|
||||
break;
|
||||
}
|
||||
|
||||
mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
|
||||
mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
|
||||
BUG_ON(ex.fe_len <= 0);
|
||||
if (free < ex.fe_len) {
|
||||
ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
|
||||
@@ -1840,7 +1841,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
|
||||
|
||||
while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
|
||||
if (!mb_test_bit(i, bitmap)) {
|
||||
max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex);
|
||||
max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
|
||||
if (max >= sbi->s_stripe) {
|
||||
ac->ac_found++;
|
||||
ac->ac_b_ex = ex;
|
||||
@@ -1862,6 +1863,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
|
||||
|
||||
BUG_ON(cr < 0 || cr >= 4);
|
||||
|
||||
free = grp->bb_free;
|
||||
if (free == 0)
|
||||
return 0;
|
||||
if (cr <= 2 && free < ac->ac_g_ex.fe_len)
|
||||
return 0;
|
||||
|
||||
/* We only do this if the grp has never been initialized */
|
||||
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
|
||||
int ret = ext4_mb_init_group(ac->ac_sb, group);
|
||||
@@ -1869,10 +1876,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
|
||||
return 0;
|
||||
}
|
||||
|
||||
free = grp->bb_free;
|
||||
fragments = grp->bb_fragments;
|
||||
if (free == 0)
|
||||
return 0;
|
||||
if (fragments == 0)
|
||||
return 0;
|
||||
|
||||
@@ -2163,6 +2167,39 @@ static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
|
||||
return cachep;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate the top-level s_group_info array for the specified number
|
||||
* of groups
|
||||
*/
|
||||
int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
unsigned size;
|
||||
struct ext4_group_info ***new_groupinfo;
|
||||
|
||||
size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
|
||||
EXT4_DESC_PER_BLOCK_BITS(sb);
|
||||
if (size <= sbi->s_group_info_size)
|
||||
return 0;
|
||||
|
||||
size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
|
||||
new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL);
|
||||
if (!new_groupinfo) {
|
||||
ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (sbi->s_group_info) {
|
||||
memcpy(new_groupinfo, sbi->s_group_info,
|
||||
sbi->s_group_info_size * sizeof(*sbi->s_group_info));
|
||||
ext4_kvfree(sbi->s_group_info);
|
||||
}
|
||||
sbi->s_group_info = new_groupinfo;
|
||||
sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
|
||||
ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
|
||||
sbi->s_group_info_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Create and initialize ext4_group_info data for the given group. */
|
||||
int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
|
||||
struct ext4_group_desc *desc)
|
||||
@@ -2195,12 +2232,11 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
|
||||
sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
|
||||
i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
|
||||
|
||||
meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
|
||||
meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL);
|
||||
if (meta_group_info[i] == NULL) {
|
||||
ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
|
||||
goto exit_group_info;
|
||||
}
|
||||
memset(meta_group_info[i], 0, kmem_cache_size(cachep));
|
||||
set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
|
||||
&(meta_group_info[i]->bb_state));
|
||||
|
||||
@@ -2252,49 +2288,14 @@ static int ext4_mb_init_backend(struct super_block *sb)
|
||||
ext4_group_t ngroups = ext4_get_groups_count(sb);
|
||||
ext4_group_t i;
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_super_block *es = sbi->s_es;
|
||||
int num_meta_group_infos;
|
||||
int num_meta_group_infos_max;
|
||||
int array_size;
|
||||
int err;
|
||||
struct ext4_group_desc *desc;
|
||||
struct kmem_cache *cachep;
|
||||
|
||||
/* This is the number of blocks used by GDT */
|
||||
num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
|
||||
1) >> EXT4_DESC_PER_BLOCK_BITS(sb);
|
||||
err = ext4_mb_alloc_groupinfo(sb, ngroups);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/*
|
||||
* This is the total number of blocks used by GDT including
|
||||
* the number of reserved blocks for GDT.
|
||||
* The s_group_info array is allocated with this value
|
||||
* to allow a clean online resize without a complex
|
||||
* manipulation of pointer.
|
||||
* The drawback is the unused memory when no resize
|
||||
* occurs but it's very low in terms of pages
|
||||
* (see comments below)
|
||||
* Need to handle this properly when META_BG resizing is allowed
|
||||
*/
|
||||
num_meta_group_infos_max = num_meta_group_infos +
|
||||
le16_to_cpu(es->s_reserved_gdt_blocks);
|
||||
|
||||
/*
|
||||
* array_size is the size of s_group_info array. We round it
|
||||
* to the next power of two because this approximation is done
|
||||
* internally by kmalloc so we can have some more memory
|
||||
* for free here (e.g. may be used for META_BG resize).
|
||||
*/
|
||||
array_size = 1;
|
||||
while (array_size < sizeof(*sbi->s_group_info) *
|
||||
num_meta_group_infos_max)
|
||||
array_size = array_size << 1;
|
||||
/* An 8TB filesystem with 64-bit pointers requires a 4096 byte
|
||||
* kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
|
||||
* So a two level scheme suffices for now. */
|
||||
sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
|
||||
if (sbi->s_group_info == NULL) {
|
||||
ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
|
||||
return -ENOMEM;
|
||||
}
|
||||
sbi->s_buddy_cache = new_inode(sb);
|
||||
if (sbi->s_buddy_cache == NULL) {
|
||||
ext4_msg(sb, KERN_ERR, "can't get new inode");
|
||||
@@ -2322,7 +2323,7 @@ err_freebuddy:
|
||||
cachep = get_groupinfo_cache(sb->s_blocksize_bits);
|
||||
while (i-- > 0)
|
||||
kmem_cache_free(cachep, ext4_get_group_info(sb, i));
|
||||
i = num_meta_group_infos;
|
||||
i = sbi->s_group_info_size;
|
||||
while (i-- > 0)
|
||||
kfree(sbi->s_group_info[i]);
|
||||
iput(sbi->s_buddy_cache);
|
||||
@@ -4008,7 +4009,6 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
|
||||
ext4_get_group_no_and_offset(sb, goal, &group, &block);
|
||||
|
||||
/* set up allocation goals */
|
||||
memset(ac, 0, sizeof(struct ext4_allocation_context));
|
||||
ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
|
||||
ac->ac_status = AC_STATUS_CONTINUE;
|
||||
ac->ac_sb = sb;
|
||||
@@ -4291,7 +4291,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
|
||||
}
|
||||
}
|
||||
|
||||
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
|
||||
ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
|
||||
if (!ac) {
|
||||
ar->len = 0;
|
||||
*errp = -ENOMEM;
|
||||
@@ -4657,6 +4657,8 @@ do_more:
|
||||
* with group lock held. generate_buddy look at
|
||||
* them with group lock_held
|
||||
*/
|
||||
if (test_opt(sb, DISCARD))
|
||||
ext4_issue_discard(sb, block_group, bit, count);
|
||||
ext4_lock_group(sb, block_group);
|
||||
mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
|
||||
mb_free_blocks(inode, &e4b, bit, count_clusters);
|
||||
@@ -4988,7 +4990,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
|
||||
|
||||
start = range->start >> sb->s_blocksize_bits;
|
||||
end = start + (range->len >> sb->s_blocksize_bits) - 1;
|
||||
minlen = range->minlen >> sb->s_blocksize_bits;
|
||||
minlen = EXT4_NUM_B2C(EXT4_SB(sb),
|
||||
range->minlen >> sb->s_blocksize_bits);
|
||||
|
||||
if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) ||
|
||||
unlikely(start >= max_blks))
|
||||
@@ -5048,6 +5051,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
|
||||
atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
|
||||
|
||||
out:
|
||||
range->len = trimmed * sb->s_blocksize;
|
||||
range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -64,11 +64,6 @@ extern u8 mb_enable_debug;
|
||||
*/
|
||||
#define MB_DEFAULT_MIN_TO_SCAN 10
|
||||
|
||||
/*
|
||||
* How many groups mballoc will scan looking for the best chunk
|
||||
*/
|
||||
#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
|
||||
|
||||
/*
|
||||
* with 'ext4_mb_stats' allocator will collect stats that will be
|
||||
* shown at umount. The collecting costs though!
|
||||
|
||||
+315
-203
File diff suppressed because it is too large
Load Diff
+86
-19
@@ -55,6 +55,13 @@ static struct buffer_head *ext4_append(handle_t *handle,
|
||||
{
|
||||
struct buffer_head *bh;
|
||||
|
||||
if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
|
||||
((inode->i_size >> 10) >=
|
||||
EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) {
|
||||
*err = -ENOSPC;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
|
||||
|
||||
bh = ext4_bread(handle, inode, *block, 1, err);
|
||||
@@ -67,6 +74,12 @@ static struct buffer_head *ext4_append(handle_t *handle,
|
||||
bh = NULL;
|
||||
}
|
||||
}
|
||||
if (!bh && !(*err)) {
|
||||
*err = -EIO;
|
||||
ext4_error(inode->i_sb,
|
||||
"Directory hole detected on inode %lu\n",
|
||||
inode->i_ino);
|
||||
}
|
||||
return bh;
|
||||
}
|
||||
|
||||
@@ -594,8 +607,11 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
|
||||
u32 hash;
|
||||
|
||||
frame->bh = NULL;
|
||||
if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
|
||||
if (!(bh = ext4_bread(NULL, dir, 0, 0, err))) {
|
||||
if (*err == 0)
|
||||
*err = ERR_BAD_DX_DIR;
|
||||
goto fail;
|
||||
}
|
||||
root = (struct dx_root *) bh->b_data;
|
||||
if (root->info.hash_version != DX_HASH_TEA &&
|
||||
root->info.hash_version != DX_HASH_HALF_MD4 &&
|
||||
@@ -696,8 +712,11 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
|
||||
frame->entries = entries;
|
||||
frame->at = at;
|
||||
if (!indirect--) return frame;
|
||||
if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
|
||||
if (!(bh = ext4_bread(NULL, dir, dx_get_block(at), 0, err))) {
|
||||
if (!(*err))
|
||||
*err = ERR_BAD_DX_DIR;
|
||||
goto fail2;
|
||||
}
|
||||
at = entries = ((struct dx_node *) bh->b_data)->entries;
|
||||
|
||||
if (!buffer_verified(bh) &&
|
||||
@@ -807,8 +826,15 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
|
||||
*/
|
||||
while (num_frames--) {
|
||||
if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at),
|
||||
0, &err)))
|
||||
0, &err))) {
|
||||
if (!err) {
|
||||
ext4_error(dir->i_sb,
|
||||
"Directory hole detected on inode %lu\n",
|
||||
dir->i_ino);
|
||||
return -EIO;
|
||||
}
|
||||
return err; /* Failure */
|
||||
}
|
||||
|
||||
if (!buffer_verified(bh) &&
|
||||
!ext4_dx_csum_verify(dir,
|
||||
@@ -839,12 +865,19 @@ static int htree_dirblock_to_tree(struct file *dir_file,
|
||||
{
|
||||
struct buffer_head *bh;
|
||||
struct ext4_dir_entry_2 *de, *top;
|
||||
int err, count = 0;
|
||||
int err = 0, count = 0;
|
||||
|
||||
dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
|
||||
(unsigned long)block));
|
||||
if (!(bh = ext4_bread (NULL, dir, block, 0, &err)))
|
||||
if (!(bh = ext4_bread(NULL, dir, block, 0, &err))) {
|
||||
if (!err) {
|
||||
err = -EIO;
|
||||
ext4_error(dir->i_sb,
|
||||
"Directory hole detected on inode %lu\n",
|
||||
dir->i_ino);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
if (!buffer_verified(bh) &&
|
||||
!ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
|
||||
@@ -1267,8 +1300,15 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
|
||||
return NULL;
|
||||
do {
|
||||
block = dx_get_block(frame->at);
|
||||
if (!(bh = ext4_bread(NULL, dir, block, 0, err)))
|
||||
if (!(bh = ext4_bread(NULL, dir, block, 0, err))) {
|
||||
if (!(*err)) {
|
||||
*err = -EIO;
|
||||
ext4_error(dir->i_sb,
|
||||
"Directory hole detected on inode %lu\n",
|
||||
dir->i_ino);
|
||||
}
|
||||
goto errout;
|
||||
}
|
||||
|
||||
if (!buffer_verified(bh) &&
|
||||
!ext4_dirent_csum_verify(dir,
|
||||
@@ -1801,9 +1841,15 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
|
||||
}
|
||||
blocks = dir->i_size >> sb->s_blocksize_bits;
|
||||
for (block = 0; block < blocks; block++) {
|
||||
bh = ext4_bread(handle, dir, block, 0, &retval);
|
||||
if(!bh)
|
||||
if (!(bh = ext4_bread(handle, dir, block, 0, &retval))) {
|
||||
if (!retval) {
|
||||
retval = -EIO;
|
||||
ext4_error(inode->i_sb,
|
||||
"Directory hole detected on inode %lu\n",
|
||||
inode->i_ino);
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
if (!buffer_verified(bh) &&
|
||||
!ext4_dirent_csum_verify(dir,
|
||||
(struct ext4_dir_entry *)bh->b_data))
|
||||
@@ -1860,8 +1906,15 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
|
||||
entries = frame->entries;
|
||||
at = frame->at;
|
||||
|
||||
if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
|
||||
if (!(bh = ext4_bread(handle, dir, dx_get_block(frame->at), 0, &err))) {
|
||||
if (!err) {
|
||||
err = -EIO;
|
||||
ext4_error(dir->i_sb,
|
||||
"Directory hole detected on inode %lu\n",
|
||||
dir->i_ino);
|
||||
}
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
if (!buffer_verified(bh) &&
|
||||
!ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
|
||||
@@ -2149,9 +2202,7 @@ retry:
|
||||
err = PTR_ERR(inode);
|
||||
if (!IS_ERR(inode)) {
|
||||
init_special_inode(inode, inode->i_mode, rdev);
|
||||
#ifdef CONFIG_EXT4_FS_XATTR
|
||||
inode->i_op = &ext4_special_inode_operations;
|
||||
#endif
|
||||
err = ext4_add_nondir(handle, dentry, inode);
|
||||
}
|
||||
ext4_journal_stop(handle);
|
||||
@@ -2199,9 +2250,15 @@ retry:
|
||||
inode->i_op = &ext4_dir_inode_operations;
|
||||
inode->i_fop = &ext4_dir_operations;
|
||||
inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
|
||||
dir_block = ext4_bread(handle, inode, 0, 1, &err);
|
||||
if (!dir_block)
|
||||
if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) {
|
||||
if (!err) {
|
||||
err = -EIO;
|
||||
ext4_error(inode->i_sb,
|
||||
"Directory hole detected on inode %lu\n",
|
||||
inode->i_ino);
|
||||
}
|
||||
goto out_clear_inode;
|
||||
}
|
||||
BUFFER_TRACE(dir_block, "get_write_access");
|
||||
err = ext4_journal_get_write_access(handle, dir_block);
|
||||
if (err)
|
||||
@@ -2318,6 +2375,11 @@ static int empty_dir(struct inode *inode)
|
||||
EXT4_ERROR_INODE(inode,
|
||||
"error %d reading directory "
|
||||
"lblock %u", err, lblock);
|
||||
else
|
||||
ext4_warning(inode->i_sb,
|
||||
"bad directory (dir #%lu) - no data block",
|
||||
inode->i_ino);
|
||||
|
||||
offset += sb->s_blocksize;
|
||||
continue;
|
||||
}
|
||||
@@ -2362,7 +2424,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
|
||||
struct ext4_iloc iloc;
|
||||
int err = 0, rc;
|
||||
|
||||
if (!ext4_handle_valid(handle))
|
||||
if (!EXT4_SB(sb)->s_journal)
|
||||
return 0;
|
||||
|
||||
mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
|
||||
@@ -2436,8 +2498,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
|
||||
struct ext4_iloc iloc;
|
||||
int err = 0;
|
||||
|
||||
/* ext4_handle_valid() assumes a valid handle_t pointer */
|
||||
if (handle && !ext4_handle_valid(handle))
|
||||
if (!EXT4_SB(inode->i_sb)->s_journal)
|
||||
return 0;
|
||||
|
||||
mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
|
||||
@@ -2456,7 +2517,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
|
||||
* transaction handle with which to update the orphan list on
|
||||
* disk, but we still need to remove the inode from the linked
|
||||
* list in memory. */
|
||||
if (sbi->s_journal && !handle)
|
||||
if (!handle)
|
||||
goto out;
|
||||
|
||||
err = ext4_reserve_inode_write(handle, inode, &iloc);
|
||||
@@ -2826,9 +2887,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
|
||||
goto end_rename;
|
||||
}
|
||||
retval = -EIO;
|
||||
dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval);
|
||||
if (!dir_bh)
|
||||
if (!(dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval))) {
|
||||
if (!retval) {
|
||||
retval = -EIO;
|
||||
ext4_error(old_inode->i_sb,
|
||||
"Directory hole detected on inode %lu\n",
|
||||
old_inode->i_ino);
|
||||
}
|
||||
goto end_rename;
|
||||
}
|
||||
if (!buffer_verified(dir_bh) &&
|
||||
!ext4_dirent_csum_verify(old_inode,
|
||||
(struct ext4_dir_entry *)dir_bh->b_data))
|
||||
|
||||
+120
-58
@@ -71,6 +71,9 @@ void ext4_free_io_end(ext4_io_end_t *io)
|
||||
int i;
|
||||
|
||||
BUG_ON(!io);
|
||||
BUG_ON(!list_empty(&io->list));
|
||||
BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
|
||||
|
||||
if (io->page)
|
||||
put_page(io->page);
|
||||
for (i = 0; i < io->num_io_pages; i++)
|
||||
@@ -81,13 +84,8 @@ void ext4_free_io_end(ext4_io_end_t *io)
|
||||
kmem_cache_free(io_end_cachep, io);
|
||||
}
|
||||
|
||||
/*
|
||||
* check a range of space and convert unwritten extents to written.
|
||||
*
|
||||
* Called with inode->i_mutex; we depend on this when we manipulate
|
||||
* io->flag, since we could otherwise race with ext4_flush_completed_IO()
|
||||
*/
|
||||
int ext4_end_io_nolock(ext4_io_end_t *io)
|
||||
/* check a range of space and convert unwritten extents to written. */
|
||||
static int ext4_end_io(ext4_io_end_t *io)
|
||||
{
|
||||
struct inode *inode = io->inode;
|
||||
loff_t offset = io->offset;
|
||||
@@ -106,63 +104,136 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
|
||||
"(inode %lu, offset %llu, size %zd, error %d)",
|
||||
inode->i_ino, offset, size, ret);
|
||||
}
|
||||
|
||||
if (io->iocb)
|
||||
aio_complete(io->iocb, io->result, 0);
|
||||
|
||||
if (io->flag & EXT4_IO_END_DIRECT)
|
||||
inode_dio_done(inode);
|
||||
/* Wake up anyone waiting on unwritten extent conversion */
|
||||
if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten))
|
||||
if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
|
||||
wake_up_all(ext4_ioend_wq(io->inode));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void dump_completed_IO(struct inode *inode)
|
||||
{
|
||||
#ifdef EXT4FS_DEBUG
|
||||
struct list_head *cur, *before, *after;
|
||||
ext4_io_end_t *io, *io0, *io1;
|
||||
unsigned long flags;
|
||||
|
||||
if (list_empty(&EXT4_I(inode)->i_completed_io_list)) {
|
||||
ext4_debug("inode %lu completed_io list is empty\n",
|
||||
inode->i_ino);
|
||||
return;
|
||||
}
|
||||
|
||||
ext4_debug("Dump inode %lu completed_io list\n", inode->i_ino);
|
||||
list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list) {
|
||||
cur = &io->list;
|
||||
before = cur->prev;
|
||||
io0 = container_of(before, ext4_io_end_t, list);
|
||||
after = cur->next;
|
||||
io1 = container_of(after, ext4_io_end_t, list);
|
||||
|
||||
ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
|
||||
io, inode->i_ino, io0, io1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Add the io_end to per-inode completed end_io list. */
|
||||
void ext4_add_complete_io(ext4_io_end_t *io_end)
|
||||
{
|
||||
struct ext4_inode_info *ei = EXT4_I(io_end->inode);
|
||||
struct workqueue_struct *wq;
|
||||
unsigned long flags;
|
||||
|
||||
BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
|
||||
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
|
||||
|
||||
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
||||
if (list_empty(&ei->i_completed_io_list)) {
|
||||
io_end->flag |= EXT4_IO_END_QUEUED;
|
||||
queue_work(wq, &io_end->work);
|
||||
}
|
||||
list_add_tail(&io_end->list, &ei->i_completed_io_list);
|
||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
||||
}
|
||||
|
||||
static int ext4_do_flush_completed_IO(struct inode *inode,
|
||||
ext4_io_end_t *work_io)
|
||||
{
|
||||
ext4_io_end_t *io;
|
||||
struct list_head unwritten, complete, to_free;
|
||||
unsigned long flags;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
int err, ret = 0;
|
||||
|
||||
INIT_LIST_HEAD(&complete);
|
||||
INIT_LIST_HEAD(&to_free);
|
||||
|
||||
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
||||
dump_completed_IO(inode);
|
||||
list_replace_init(&ei->i_completed_io_list, &unwritten);
|
||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
||||
|
||||
while (!list_empty(&unwritten)) {
|
||||
io = list_entry(unwritten.next, ext4_io_end_t, list);
|
||||
BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
|
||||
list_del_init(&io->list);
|
||||
|
||||
err = ext4_end_io(io);
|
||||
if (unlikely(!ret && err))
|
||||
ret = err;
|
||||
|
||||
list_add_tail(&io->list, &complete);
|
||||
}
|
||||
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
||||
while (!list_empty(&complete)) {
|
||||
io = list_entry(complete.next, ext4_io_end_t, list);
|
||||
io->flag &= ~EXT4_IO_END_UNWRITTEN;
|
||||
/* end_io context can not be destroyed now because it still
|
||||
* used by queued worker. Worker thread will destroy it later */
|
||||
if (io->flag & EXT4_IO_END_QUEUED)
|
||||
list_del_init(&io->list);
|
||||
else
|
||||
list_move(&io->list, &to_free);
|
||||
}
|
||||
/* If we are called from worker context, it is time to clear queued
|
||||
* flag, and destroy it's end_io if it was converted already */
|
||||
if (work_io) {
|
||||
work_io->flag &= ~EXT4_IO_END_QUEUED;
|
||||
if (!(work_io->flag & EXT4_IO_END_UNWRITTEN))
|
||||
list_add_tail(&work_io->list, &to_free);
|
||||
}
|
||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
||||
|
||||
while (!list_empty(&to_free)) {
|
||||
io = list_entry(to_free.next, ext4_io_end_t, list);
|
||||
list_del_init(&io->list);
|
||||
ext4_free_io_end(io);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* work on completed aio dio IO, to convert unwritten extents to extents
|
||||
*/
|
||||
static void ext4_end_io_work(struct work_struct *work)
|
||||
{
|
||||
ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
|
||||
struct inode *inode = io->inode;
|
||||
struct ext4_inode_info *ei = EXT4_I(inode);
|
||||
unsigned long flags;
|
||||
ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
|
||||
ext4_do_flush_completed_IO(io->inode, io);
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
|
||||
if (io->flag & EXT4_IO_END_IN_FSYNC)
|
||||
goto requeue;
|
||||
if (list_empty(&io->list)) {
|
||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
||||
goto free;
|
||||
}
|
||||
|
||||
if (!mutex_trylock(&inode->i_mutex)) {
|
||||
bool was_queued;
|
||||
requeue:
|
||||
was_queued = !!(io->flag & EXT4_IO_END_QUEUED);
|
||||
io->flag |= EXT4_IO_END_QUEUED;
|
||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
||||
/*
|
||||
* Requeue the work instead of waiting so that the work
|
||||
* items queued after this can be processed.
|
||||
*/
|
||||
queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work);
|
||||
/*
|
||||
* To prevent the ext4-dio-unwritten thread from keeping
|
||||
* requeueing end_io requests and occupying cpu for too long,
|
||||
* yield the cpu if it sees an end_io request that has already
|
||||
* been requeued.
|
||||
*/
|
||||
if (was_queued)
|
||||
yield();
|
||||
return;
|
||||
}
|
||||
list_del_init(&io->list);
|
||||
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
|
||||
(void) ext4_end_io_nolock(io);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
free:
|
||||
ext4_free_io_end(io);
|
||||
int ext4_flush_unwritten_io(struct inode *inode)
|
||||
{
|
||||
int ret;
|
||||
WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) &&
|
||||
!(inode->i_state & I_FREEING));
|
||||
ret = ext4_do_flush_completed_IO(inode, NULL);
|
||||
ext4_unwritten_wait(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
|
||||
@@ -195,9 +266,7 @@ static void buffer_io_error(struct buffer_head *bh)
|
||||
static void ext4_end_bio(struct bio *bio, int error)
|
||||
{
|
||||
ext4_io_end_t *io_end = bio->bi_private;
|
||||
struct workqueue_struct *wq;
|
||||
struct inode *inode;
|
||||
unsigned long flags;
|
||||
int i;
|
||||
sector_t bi_sector = bio->bi_sector;
|
||||
|
||||
@@ -255,14 +324,7 @@ static void ext4_end_bio(struct bio *bio, int error)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Add the io_end to per-inode completed io list*/
|
||||
spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
|
||||
list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
|
||||
spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
|
||||
|
||||
wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
|
||||
/* queue the work to convert unwritten extents to written */
|
||||
queue_work(wq, &io_end->work);
|
||||
ext4_add_complete_io(io_end);
|
||||
}
|
||||
|
||||
void ext4_io_submit(struct ext4_io_submit *io)
|
||||
|
||||
+344
-90
File diff suppressed because it is too large
Load Diff
+60
-32
@@ -420,7 +420,7 @@ static void __save_error_info(struct super_block *sb, const char *func,
|
||||
*/
|
||||
if (!es->s_error_count)
|
||||
mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
|
||||
es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
|
||||
le32_add_cpu(&es->s_error_count, 1);
|
||||
}
|
||||
|
||||
static void save_error_info(struct super_block *sb, const char *func,
|
||||
@@ -850,7 +850,6 @@ static void ext4_put_super(struct super_block *sb)
|
||||
flush_workqueue(sbi->dio_unwritten_wq);
|
||||
destroy_workqueue(sbi->dio_unwritten_wq);
|
||||
|
||||
lock_super(sb);
|
||||
if (sbi->s_journal) {
|
||||
err = jbd2_journal_destroy(sbi->s_journal);
|
||||
sbi->s_journal = NULL;
|
||||
@@ -917,7 +916,6 @@ static void ext4_put_super(struct super_block *sb)
|
||||
* Now that we are completely done shutting down the
|
||||
* superblock, we need to actually destroy the kobject.
|
||||
*/
|
||||
unlock_super(sb);
|
||||
kobject_put(&sbi->s_kobj);
|
||||
wait_for_completion(&sbi->s_kobj_unregister);
|
||||
if (sbi->s_chksum_driver)
|
||||
@@ -956,11 +954,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
|
||||
ei->jinode = NULL;
|
||||
INIT_LIST_HEAD(&ei->i_completed_io_list);
|
||||
spin_lock_init(&ei->i_completed_io_lock);
|
||||
ei->cur_aio_dio = NULL;
|
||||
ei->i_sync_tid = 0;
|
||||
ei->i_datasync_tid = 0;
|
||||
atomic_set(&ei->i_ioend_count, 0);
|
||||
atomic_set(&ei->i_aiodio_unwritten, 0);
|
||||
atomic_set(&ei->i_unwritten, 0);
|
||||
|
||||
return &ei->vfs_inode;
|
||||
}
|
||||
@@ -1224,6 +1221,7 @@ enum {
|
||||
Opt_inode_readahead_blks, Opt_journal_ioprio,
|
||||
Opt_dioread_nolock, Opt_dioread_lock,
|
||||
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
|
||||
Opt_max_dir_size_kb,
|
||||
};
|
||||
|
||||
static const match_table_t tokens = {
|
||||
@@ -1297,6 +1295,7 @@ static const match_table_t tokens = {
|
||||
{Opt_init_itable, "init_itable=%u"},
|
||||
{Opt_init_itable, "init_itable"},
|
||||
{Opt_noinit_itable, "noinit_itable"},
|
||||
{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
|
||||
{Opt_removed, "check=none"}, /* mount option from ext2/3 */
|
||||
{Opt_removed, "nocheck"}, /* mount option from ext2/3 */
|
||||
{Opt_removed, "reservation"}, /* mount option from ext2/3 */
|
||||
@@ -1477,6 +1476,7 @@ static const struct mount_opts {
|
||||
{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
|
||||
{Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
|
||||
{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
|
||||
{Opt_max_dir_size_kb, 0, MOPT_GTE0},
|
||||
{Opt_err, 0, 0}
|
||||
};
|
||||
|
||||
@@ -1592,6 +1592,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
|
||||
if (!args->from)
|
||||
arg = EXT4_DEF_LI_WAIT_MULT;
|
||||
sbi->s_li_wait_mult = arg;
|
||||
} else if (token == Opt_max_dir_size_kb) {
|
||||
sbi->s_max_dir_size_kb = arg;
|
||||
} else if (token == Opt_stripe) {
|
||||
sbi->s_stripe = arg;
|
||||
} else if (m->flags & MOPT_DATAJ) {
|
||||
@@ -1664,7 +1666,7 @@ static int parse_options(char *options, struct super_block *sb,
|
||||
* Initialize args struct so we know whether arg was
|
||||
* found; some options take optional arguments.
|
||||
*/
|
||||
args[0].to = args[0].from = 0;
|
||||
args[0].to = args[0].from = NULL;
|
||||
token = match_token(p, tokens, args);
|
||||
if (handle_mount_opt(sb, p, token, args, journal_devnum,
|
||||
journal_ioprio, is_remount) < 0)
|
||||
@@ -1740,7 +1742,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
|
||||
|
||||
static const char *token2str(int token)
|
||||
{
|
||||
static const struct match_token *t;
|
||||
const struct match_token *t;
|
||||
|
||||
for (t = tokens; t->token != Opt_err; t++)
|
||||
if (t->token == token && !strchr(t->pattern, '='))
|
||||
@@ -1823,6 +1825,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
|
||||
if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
|
||||
(sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
|
||||
SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
|
||||
if (nodefs || sbi->s_max_dir_size_kb)
|
||||
SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
|
||||
|
||||
ext4_show_quota_options(seq, sb);
|
||||
return 0;
|
||||
@@ -1914,15 +1918,45 @@ done:
|
||||
return res;
|
||||
}
|
||||
|
||||
int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct flex_groups *new_groups;
|
||||
int size;
|
||||
|
||||
if (!sbi->s_log_groups_per_flex)
|
||||
return 0;
|
||||
|
||||
size = ext4_flex_group(sbi, ngroup - 1) + 1;
|
||||
if (size <= sbi->s_flex_groups_allocated)
|
||||
return 0;
|
||||
|
||||
size = roundup_pow_of_two(size * sizeof(struct flex_groups));
|
||||
new_groups = ext4_kvzalloc(size, GFP_KERNEL);
|
||||
if (!new_groups) {
|
||||
ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
|
||||
size / (int) sizeof(struct flex_groups));
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (sbi->s_flex_groups) {
|
||||
memcpy(new_groups, sbi->s_flex_groups,
|
||||
(sbi->s_flex_groups_allocated *
|
||||
sizeof(struct flex_groups)));
|
||||
ext4_kvfree(sbi->s_flex_groups);
|
||||
}
|
||||
sbi->s_flex_groups = new_groups;
|
||||
sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ext4_fill_flex_info(struct super_block *sb)
|
||||
{
|
||||
struct ext4_sb_info *sbi = EXT4_SB(sb);
|
||||
struct ext4_group_desc *gdp = NULL;
|
||||
ext4_group_t flex_group_count;
|
||||
ext4_group_t flex_group;
|
||||
unsigned int groups_per_flex = 0;
|
||||
size_t size;
|
||||
int i;
|
||||
int i, err;
|
||||
|
||||
sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
|
||||
if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
|
||||
@@ -1931,17 +1965,9 @@ static int ext4_fill_flex_info(struct super_block *sb)
|
||||
}
|
||||
groups_per_flex = 1 << sbi->s_log_groups_per_flex;
|
||||
|
||||
/* We allocate both existing and potentially added groups */
|
||||
flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
|
||||
((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
|
||||
EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
|
||||
size = flex_group_count * sizeof(struct flex_groups);
|
||||
sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL);
|
||||
if (sbi->s_flex_groups == NULL) {
|
||||
ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups",
|
||||
flex_group_count);
|
||||
err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
|
||||
if (err)
|
||||
goto failed;
|
||||
}
|
||||
|
||||
for (i = 0; i < sbi->s_groups_count; i++) {
|
||||
gdp = ext4_get_group_desc(sb, i, NULL);
|
||||
@@ -2144,10 +2170,12 @@ static void ext4_orphan_cleanup(struct super_block *sb,
|
||||
}
|
||||
|
||||
if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
|
||||
if (es->s_last_orphan)
|
||||
/* don't clear list on RO mount w/ errors */
|
||||
if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
|
||||
jbd_debug(1, "Errors on filesystem, "
|
||||
"clearing orphan list.\n");
|
||||
es->s_last_orphan = 0;
|
||||
es->s_last_orphan = 0;
|
||||
}
|
||||
jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
|
||||
return;
|
||||
}
|
||||
@@ -2528,6 +2556,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
|
||||
EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
|
||||
EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
|
||||
EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
|
||||
EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
|
||||
EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
|
||||
|
||||
static struct attribute *ext4_attrs[] = {
|
||||
@@ -2543,6 +2572,7 @@ static struct attribute *ext4_attrs[] = {
|
||||
ATTR_LIST(mb_stream_req),
|
||||
ATTR_LIST(mb_group_prealloc),
|
||||
ATTR_LIST(max_writeback_mb_bump),
|
||||
ATTR_LIST(extent_max_zeroout_kb),
|
||||
ATTR_LIST(trigger_fs_error),
|
||||
NULL,
|
||||
};
|
||||
@@ -2550,10 +2580,12 @@ static struct attribute *ext4_attrs[] = {
|
||||
/* Features this copy of ext4 supports */
|
||||
EXT4_INFO_ATTR(lazy_itable_init);
|
||||
EXT4_INFO_ATTR(batched_discard);
|
||||
EXT4_INFO_ATTR(meta_bg_resize);
|
||||
|
||||
static struct attribute *ext4_feat_attrs[] = {
|
||||
ATTR_LIST(lazy_itable_init),
|
||||
ATTR_LIST(batched_discard),
|
||||
ATTR_LIST(meta_bg_resize),
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -3374,7 +3406,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
* enable delayed allocation by default
|
||||
* Use -o nodelalloc to turn it off
|
||||
*/
|
||||
if (!IS_EXT3_SB(sb) &&
|
||||
if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
|
||||
((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
|
||||
set_opt(sb, DELALLOC);
|
||||
|
||||
@@ -3743,6 +3775,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
|
||||
|
||||
sbi->s_stripe = ext4_get_stripe_size(sbi);
|
||||
sbi->s_max_writeback_mb_bump = 128;
|
||||
sbi->s_extent_max_zeroout_kb = 32;
|
||||
|
||||
/*
|
||||
* set up enough so that it can read an inode
|
||||
@@ -4519,11 +4552,9 @@ static int ext4_unfreeze(struct super_block *sb)
|
||||
if (sb->s_flags & MS_RDONLY)
|
||||
return 0;
|
||||
|
||||
lock_super(sb);
|
||||
/* Reset the needs_recovery flag before the fs is unlocked. */
|
||||
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
|
||||
ext4_commit_super(sb, 1);
|
||||
unlock_super(sb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4559,7 +4590,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
|
||||
char *orig_data = kstrdup(data, GFP_KERNEL);
|
||||
|
||||
/* Store the original options */
|
||||
lock_super(sb);
|
||||
old_sb_flags = sb->s_flags;
|
||||
old_opts.s_mount_opt = sbi->s_mount_opt;
|
||||
old_opts.s_mount_opt2 = sbi->s_mount_opt2;
|
||||
@@ -4701,7 +4731,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
|
||||
if (sbi->s_journal == NULL)
|
||||
ext4_commit_super(sb, 1);
|
||||
|
||||
unlock_super(sb);
|
||||
#ifdef CONFIG_QUOTA
|
||||
/* Release old quota file names */
|
||||
for (i = 0; i < MAXQUOTAS; i++)
|
||||
@@ -4714,10 +4743,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
|
||||
else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
|
||||
EXT4_FEATURE_RO_COMPAT_QUOTA)) {
|
||||
err = ext4_enable_quotas(sb);
|
||||
if (err) {
|
||||
lock_super(sb);
|
||||
if (err)
|
||||
goto restore_opts;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -4744,7 +4771,6 @@ restore_opts:
|
||||
sbi->s_qf_names[i] = old_opts.s_qf_names[i];
|
||||
}
|
||||
#endif
|
||||
unlock_super(sb);
|
||||
kfree(orig_data);
|
||||
return err;
|
||||
}
|
||||
@@ -5269,8 +5295,10 @@ static int __init ext4_init_fs(void)
|
||||
if (err)
|
||||
goto out6;
|
||||
ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
|
||||
if (!ext4_kset)
|
||||
if (!ext4_kset) {
|
||||
err = -ENOMEM;
|
||||
goto out5;
|
||||
}
|
||||
ext4_proc_root = proc_mkdir("fs/ext4", NULL);
|
||||
|
||||
err = ext4_init_feat_adverts();
|
||||
|
||||
@@ -63,6 +63,7 @@ int writeback_in_progress(struct backing_dev_info *bdi)
|
||||
{
|
||||
return test_bit(BDI_writeback_running, &bdi->state);
|
||||
}
|
||||
EXPORT_SYMBOL(writeback_in_progress);
|
||||
|
||||
static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
|
||||
{
|
||||
|
||||
+29
-11
@@ -1014,17 +1014,35 @@ restart_loop:
|
||||
* there's no point in keeping a checkpoint record for
|
||||
* it. */
|
||||
|
||||
/* A buffer which has been freed while still being
|
||||
* journaled by a previous transaction may end up still
|
||||
* being dirty here, but we want to avoid writing back
|
||||
* that buffer in the future after the "add to orphan"
|
||||
* operation been committed, That's not only a performance
|
||||
* gain, it also stops aliasing problems if the buffer is
|
||||
* left behind for writeback and gets reallocated for another
|
||||
* use in a different page. */
|
||||
if (buffer_freed(bh) && !jh->b_next_transaction) {
|
||||
clear_buffer_freed(bh);
|
||||
clear_buffer_jbddirty(bh);
|
||||
/*
|
||||
* A buffer which has been freed while still being journaled by
|
||||
* a previous transaction.
|
||||
*/
|
||||
if (buffer_freed(bh)) {
|
||||
/*
|
||||
* If the running transaction is the one containing
|
||||
* "add to orphan" operation (b_next_transaction !=
|
||||
* NULL), we have to wait for that transaction to
|
||||
* commit before we can really get rid of the buffer.
|
||||
* So just clear b_modified to not confuse transaction
|
||||
* credit accounting and refile the buffer to
|
||||
* BJ_Forget of the running transaction. If the just
|
||||
* committed transaction contains "add to orphan"
|
||||
* operation, we can completely invalidate the buffer
|
||||
* now. We are rather through in that since the
|
||||
* buffer may be still accessible when blocksize <
|
||||
* pagesize and it is attached to the last partial
|
||||
* page.
|
||||
*/
|
||||
jh->b_modified = 0;
|
||||
if (!jh->b_next_transaction) {
|
||||
clear_buffer_freed(bh);
|
||||
clear_buffer_jbddirty(bh);
|
||||
clear_buffer_mapped(bh);
|
||||
clear_buffer_new(bh);
|
||||
clear_buffer_req(bh);
|
||||
bh->b_bdev = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (buffer_jbddirty(bh)) {
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user