Merge tag 'for-f2fs-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "The major change in this version is mitigating cpu overheads on write
  paths by replacing redundant inode page updates with mark_inode_dirty
  calls.  And we tried to reduce lock contentions as well to improve
  filesystem scalability.  Other feature is setting F2FS automatically
  when detecting host-managed SMR.

  Enhancements:
   - ioctl to move a range of data between files
   - inject orphan inode errors
   - avoid flush commands congestion
   - support lazytime

  Bug fixes:
   - return proper results for some dentry operations
   - fix deadlock in add_link failure
   - disable extent_cache for fcollapse/finsert"

* tag 'for-f2fs-4.8' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (68 commits)
  f2fs: clean up coding style and redundancy
  f2fs: get victim segment again after new cp
  f2fs: handle error case with f2fs_bug_on
  f2fs: avoid data race when deciding checkpoin in f2fs_sync_file
  f2fs: support an ioctl to move a range of data blocks
  f2fs: fix to report error number of f2fs_find_entry
  f2fs: avoid memory allocation failure due to a long length
  f2fs: reset default idle interval value
  f2fs: use blk_plug in all the possible paths
  f2fs: fix to avoid data update racing between GC and DIO
  f2fs: add maximum prefree segments
  f2fs: disable extent_cache for fcollapse/finsert inodes
  f2fs: refactor __exchange_data_block for speed up
  f2fs: fix ERR_PTR returned by bio
  f2fs: avoid mark_inode_dirty
  f2fs: move i_size_write in f2fs_write_end
  f2fs: fix to avoid redundant discard during fstrim
  f2fs: avoid mismatching block range for discard
  f2fs: fix incorrect f_bfree calculation in ->statfs
  f2fs: use percpu_rw_semaphore
  ...
This commit is contained in:
Linus Torvalds
2016-07-27 10:36:31 -07:00
22 changed files with 1392 additions and 755 deletions
+71 -73
View File
@@ -52,6 +52,10 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
if (excess_cached_nats(sbi))
res = false;
if (nm_i->nat_cnt > DEF_NAT_CACHE_THRESHOLD)
res = false;
} else if (type == DIRTY_DENTS) {
if (sbi->sb->s_bdi->wb.dirty_exceeded)
return false;
@@ -202,14 +206,14 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
struct nat_entry *e;
bool need = false;
down_read(&nm_i->nat_tree_lock);
percpu_down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e) {
if (!get_nat_flag(e, IS_CHECKPOINTED) &&
!get_nat_flag(e, HAS_FSYNCED_INODE))
need = true;
}
up_read(&nm_i->nat_tree_lock);
percpu_up_read(&nm_i->nat_tree_lock);
return need;
}
@@ -219,11 +223,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
struct nat_entry *e;
bool is_cp = true;
down_read(&nm_i->nat_tree_lock);
percpu_down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e && !get_nat_flag(e, IS_CHECKPOINTED))
is_cp = false;
up_read(&nm_i->nat_tree_lock);
percpu_up_read(&nm_i->nat_tree_lock);
return is_cp;
}
@@ -233,13 +237,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
struct nat_entry *e;
bool need_update = true;
down_read(&nm_i->nat_tree_lock);
percpu_down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ino);
if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
(get_nat_flag(e, IS_CHECKPOINTED) ||
get_nat_flag(e, HAS_FSYNCED_INODE)))
need_update = false;
up_read(&nm_i->nat_tree_lock);
percpu_up_read(&nm_i->nat_tree_lock);
return need_update;
}
@@ -280,7 +284,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
down_write(&nm_i->nat_tree_lock);
percpu_down_write(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, ni->nid);
if (!e) {
e = grab_nat_entry(nm_i, ni->nid);
@@ -330,7 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
set_nat_flag(e, HAS_FSYNCED_INODE, true);
set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
}
up_write(&nm_i->nat_tree_lock);
percpu_up_write(&nm_i->nat_tree_lock);
}
int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -338,8 +342,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
struct f2fs_nm_info *nm_i = NM_I(sbi);
int nr = nr_shrink;
if (!down_write_trylock(&nm_i->nat_tree_lock))
return 0;
percpu_down_write(&nm_i->nat_tree_lock);
while (nr_shrink && !list_empty(&nm_i->nat_entries)) {
struct nat_entry *ne;
@@ -348,7 +351,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
__del_from_nat_cache(nm_i, ne);
nr_shrink--;
}
up_write(&nm_i->nat_tree_lock);
percpu_up_write(&nm_i->nat_tree_lock);
return nr - nr_shrink;
}
@@ -370,13 +373,13 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
ni->nid = nid;
/* Check nat cache */
down_read(&nm_i->nat_tree_lock);
percpu_down_read(&nm_i->nat_tree_lock);
e = __lookup_nat_cache(nm_i, nid);
if (e) {
ni->ino = nat_get_ino(e);
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
up_read(&nm_i->nat_tree_lock);
percpu_up_read(&nm_i->nat_tree_lock);
return;
}
@@ -400,11 +403,11 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
node_info_from_raw_nat(ni, &ne);
f2fs_put_page(page, 1);
cache:
up_read(&nm_i->nat_tree_lock);
percpu_up_read(&nm_i->nat_tree_lock);
/* cache nat entry */
down_write(&nm_i->nat_tree_lock);
percpu_down_write(&nm_i->nat_tree_lock);
cache_nat_entry(sbi, nid, &ne);
up_write(&nm_i->nat_tree_lock);
percpu_up_write(&nm_i->nat_tree_lock);
}
/*
@@ -646,6 +649,7 @@ release_out:
if (err == -ENOENT) {
dn->cur_level = i;
dn->max_level = level;
dn->ofs_in_node = offset[level];
}
return err;
}
@@ -670,8 +674,7 @@ static void truncate_node(struct dnode_of_data *dn)
if (dn->nid == dn->inode->i_ino) {
remove_orphan_inode(sbi, dn->nid);
dec_valid_inode_count(sbi);
} else {
sync_inode_page(dn);
f2fs_inode_synced(dn->inode);
}
invalidate:
clear_node_page_dirty(dn->node_page);
@@ -953,7 +956,7 @@ int truncate_xattr_node(struct inode *inode, struct page *page)
if (IS_ERR(npage))
return PTR_ERR(npage);
F2FS_I(inode)->i_xattr_nid = 0;
f2fs_i_xnid_write(inode, 0);
/* need to do checkpoint during fsync */
F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
@@ -1019,7 +1022,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
struct page *page;
int err;
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
return ERR_PTR(-EPERM);
page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false);
@@ -1042,21 +1045,16 @@ struct page *new_node_page(struct dnode_of_data *dn,
f2fs_wait_on_page_writeback(page, NODE, true);
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(dn->inode, page);
SetPageUptodate(page);
if (!PageUptodate(page))
SetPageUptodate(page);
if (set_page_dirty(page))
dn->node_changed = true;
if (f2fs_has_xattr_block(ofs))
F2FS_I(dn->inode)->i_xattr_nid = dn->nid;
f2fs_i_xnid_write(dn->inode, dn->nid);
dn->node_page = page;
if (ipage)
update_inode(dn->inode, ipage);
else
sync_inode_page(dn);
if (ofs == 0)
inc_valid_inode_count(sbi);
return page;
fail:
@@ -1083,6 +1081,9 @@ static int read_node_page(struct page *page, int op_flags)
.encrypted_page = NULL,
};
if (PageUptodate(page))
return LOCKED_PAGE;
get_node_info(sbi, page->index, &ni);
if (unlikely(ni.blk_addr == NULL_ADDR)) {
@@ -1090,9 +1091,6 @@ static int read_node_page(struct page *page, int op_flags)
return -ENOENT;
}
if (PageUptodate(page))
return LOCKED_PAGE;
fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
return f2fs_submit_page_bio(&fio);
}
@@ -1150,16 +1148,21 @@ repeat:
lock_page(page);
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
f2fs_put_page(page, 1);
goto repeat;
}
if (unlikely(!PageUptodate(page)))
goto out_err;
page_hit:
f2fs_bug_on(sbi, nid != nid_of_node(page));
if(unlikely(nid != nid_of_node(page))) {
f2fs_bug_on(sbi, 1);
ClearPageUptodate(page);
out_err:
f2fs_put_page(page, 1);
return ERR_PTR(-EIO);
}
return page;
}
@@ -1176,24 +1179,6 @@ struct page *get_node_page_ra(struct page *parent, int start)
return __get_node_page(sbi, nid, parent, start);
}
void sync_inode_page(struct dnode_of_data *dn)
{
int ret = 0;
if (IS_INODE(dn->node_page) || dn->inode_page == dn->node_page) {
ret = update_inode(dn->inode, dn->node_page);
} else if (dn->inode_page) {
if (!dn->inode_page_locked)
lock_page(dn->inode_page);
ret = update_inode(dn->inode, dn->inode_page);
if (!dn->inode_page_locked)
unlock_page(dn->inode_page);
} else {
ret = update_inode_page(dn->inode);
}
dn->node_changed = ret ? true: false;
}
static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
{
struct inode *inode;
@@ -1319,7 +1304,7 @@ continue_unlock:
return last_page;
}
int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
struct writeback_control *wbc, bool atomic)
{
pgoff_t index, end;
@@ -1327,6 +1312,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
int ret = 0;
struct page *last_page = NULL;
bool marked = false;
nid_t ino = inode->i_ino;
if (atomic) {
last_page = last_fsync_dnode(sbi, ino);
@@ -1380,9 +1366,13 @@ continue_unlock:
if (!atomic || page == last_page) {
set_fsync_mark(page, 1);
if (IS_INODE(page))
if (IS_INODE(page)) {
if (is_inode_flag_set(inode,
FI_DIRTY_INODE))
update_inode(inode, page);
set_dentry_mark(page,
need_dentry_mark(sbi, ino));
}
/* may be written by other thread */
if (!PageDirty(page))
set_page_dirty(page);
@@ -1630,6 +1620,7 @@ static int f2fs_write_node_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
struct blk_plug plug;
long diff;
/* balancing f2fs's metadata in background */
@@ -1643,7 +1634,9 @@ static int f2fs_write_node_pages(struct address_space *mapping,
diff = nr_pages_to_write(sbi, NODE, wbc);
wbc->sync_mode = WB_SYNC_NONE;
blk_start_plug(&plug);
sync_node_pages(sbi, wbc);
blk_finish_plug(&plug);
wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
return 0;
@@ -1657,9 +1650,10 @@ static int f2fs_set_node_page_dirty(struct page *page)
{
trace_f2fs_set_page_dirty(page, NODE);
SetPageUptodate(page);
if (!PageUptodate(page))
SetPageUptodate(page);
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
f2fs_set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
SetPagePrivate(page);
f2fs_trace_pid(page);
@@ -1778,7 +1772,7 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
}
}
static void build_free_nids(struct f2fs_sb_info *sbi)
void build_free_nids(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1787,14 +1781,14 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
nid_t nid = nm_i->next_scan_nid;
/* Enough entries */
if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK)
if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK)
return;
/* readahead nat pages to be scanned */
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
META_NAT, true);
down_read(&nm_i->nat_tree_lock);
percpu_down_read(&nm_i->nat_tree_lock);
while (1) {
struct page *page = get_current_nat_page(sbi, nid);
@@ -1826,7 +1820,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
remove_free_nid(nm_i, nid);
}
up_read(&curseg->journal_rwsem);
up_read(&nm_i->nat_tree_lock);
percpu_up_read(&nm_i->nat_tree_lock);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
nm_i->ra_nid_pages, META_NAT, false);
@@ -1925,12 +1919,15 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
struct free_nid *i, *next;
int nr = nr_shrink;
if (nm_i->fcnt <= MAX_FREE_NIDS)
return 0;
if (!mutex_trylock(&nm_i->build_lock))
return 0;
spin_lock(&nm_i->free_nid_list_lock);
list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
if (nr_shrink <= 0 || nm_i->fcnt <= NAT_ENTRY_PER_BLOCK)
if (nr_shrink <= 0 || nm_i->fcnt <= MAX_FREE_NIDS)
break;
if (i->state == NID_ALLOC)
continue;
@@ -1957,7 +1954,7 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
ri = F2FS_INODE(page);
if (!(ri->i_inline & F2FS_INLINE_XATTR)) {
clear_inode_flag(F2FS_I(inode), FI_INLINE_XATTR);
clear_inode_flag(inode, FI_INLINE_XATTR);
goto update_inode;
}
@@ -1999,13 +1996,11 @@ recover_xnid:
get_node_info(sbi, new_xnid, &ni);
ni.ino = inode->i_ino;
set_node_addr(sbi, &ni, NEW_ADDR, false);
F2FS_I(inode)->i_xattr_nid = new_xnid;
f2fs_i_xnid_write(inode, new_xnid);
/* 3: update xattr blkaddr */
refresh_sit_entry(sbi, NEW_ADDR, blkaddr);
set_node_addr(sbi, &ni, blkaddr, false);
update_inode_page(inode);
}
int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
@@ -2027,7 +2022,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
/* Should not use this inode from free nid list */
remove_free_nid(NM_I(sbi), ino);
SetPageUptodate(ipage);
if (!PageUptodate(ipage))
SetPageUptodate(ipage);
fill_node_footer(ipage, ino, ino, 0, true);
src = F2FS_INODE(page);
@@ -2213,7 +2209,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
if (!nm_i->dirty_nat_cnt)
return;
down_write(&nm_i->nat_tree_lock);
percpu_down_write(&nm_i->nat_tree_lock);
/*
* if there are no enough space in journal to store dirty nat
@@ -2236,7 +2232,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
list_for_each_entry_safe(set, tmp, &sets, set_list)
__flush_nat_entry_set(sbi, set);
up_write(&nm_i->nat_tree_lock);
percpu_up_write(&nm_i->nat_tree_lock);
f2fs_bug_on(sbi, nm_i->dirty_nat_cnt);
}
@@ -2272,7 +2268,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
mutex_init(&nm_i->build_lock);
spin_lock_init(&nm_i->free_nid_list_lock);
init_rwsem(&nm_i->nat_tree_lock);
if (percpu_init_rwsem(&nm_i->nat_tree_lock))
return -ENOMEM;
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
@@ -2329,7 +2326,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
spin_unlock(&nm_i->free_nid_list_lock);
/* destroy nat cache */
down_write(&nm_i->nat_tree_lock);
percpu_down_write(&nm_i->nat_tree_lock);
while ((found = __gang_lookup_nat_cache(nm_i,
nid, NATVEC_SIZE, natvec))) {
unsigned idx;
@@ -2354,8 +2351,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
kmem_cache_free(nat_entry_set_slab, setvec[idx]);
}
}
up_write(&nm_i->nat_tree_lock);
percpu_up_write(&nm_i->nat_tree_lock);
percpu_free_rwsem(&nm_i->nat_tree_lock);
kfree(nm_i->nat_bitmap);
sbi->nm_info = NULL;
kfree(nm_i);