Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs updates from Chris Mason:
 "The biggest change here is Josef's rework of the btrfs quota
  accounting, which improves the in-memory tracking of delayed extent
  operations.

  I had been working on Btrfs stack usage for a while, mostly because it
  had become impossible to do long stress runs with slab, lockdep and
  pagealloc debugging turned on without blowing the stack.  Even though
  you upgraded us to a nice king sized stack, I kept most of the
  patches.

  We also have some very hard to find corruption fixes, an awesome sysfs
  use after free, and the usual assortment of optimizations, cleanups
  and other fixes"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (80 commits)
  Btrfs: convert smp_mb__{before,after}_clear_bit
  Btrfs: fix scrub_print_warning to handle skinny metadata extents
  Btrfs: make fsync work after cloning into a file
  Btrfs: use right type to get real comparison
  Btrfs: don't check nodes for extent items
  Btrfs: don't release invalid page in btrfs_page_exists_in_range()
  Btrfs: make sure we retry if page is a retriable exception
  Btrfs: make sure we retry if we couldn't get the page
  btrfs: replace EINVAL with EOPNOTSUPP for dev_replace raid56
  trivial: fs/btrfs/ioctl.c: fix typo s/substract/subtract/
  Btrfs: fix leaf corruption after __btrfs_drop_extents
  Btrfs: ensure btrfs_prev_leaf doesn't miss 1 item
  Btrfs: fix clone to deal with holes when NO_HOLES feature is enabled
  btrfs: free delayed node outside of root->inode_lock
  btrfs: replace EINVAL with ERANGE for resize when ULLONG_MAX
  Btrfs: fix transaction leak during fsync call
  btrfs: Avoid trucating page or punching hole in a already existed hole.
  Btrfs: update commit root on snapshot creation after orphan cleanup
  Btrfs: ioctl, don't re-lock extent range when not necessary
  Btrfs: avoid visiting all extent items when cloning a range
  ...
This commit is contained in:
Linus Torvalds
2014-06-11 09:22:21 -07:00
47 changed files with 3719 additions and 1324 deletions
+1 -1
View File
@@ -16,4 +16,4 @@ btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \
tests/extent-io-tests.o tests/inode-tests.o
tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o
-7
View File
@@ -79,13 +79,6 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
const char *name;
char *value = NULL;
if (acl) {
ret = posix_acl_valid(acl);
if (ret < 0)
return ret;
ret = 0;
}
switch (type) {
case ACL_TYPE_ACCESS:
name = POSIX_ACL_XATTR_ACCESS;
+27 -14
View File
@@ -900,7 +900,11 @@ again:
goto out;
BUG_ON(ret == 0);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
if (trans && likely(trans->type != __TRANS_DUMMY)) {
#else
if (trans) {
#endif
/*
* look if there are updates for this ref queued and lock the
* head
@@ -984,11 +988,12 @@ again:
goto out;
}
if (ref->count && ref->parent) {
if (extent_item_pos && !ref->inode_list) {
if (extent_item_pos && !ref->inode_list &&
ref->level == 0) {
u32 bsz;
struct extent_buffer *eb;
bsz = btrfs_level_size(fs_info->extent_root,
info_level);
ref->level);
eb = read_tree_block(fs_info->extent_root,
ref->parent, bsz, 0);
if (!eb || !extent_buffer_uptodate(eb)) {
@@ -1404,9 +1409,10 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
* returns <0 on error
*/
static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
struct btrfs_extent_item *ei, u32 item_size,
struct btrfs_extent_inline_ref **out_eiref,
int *out_type)
struct btrfs_key *key,
struct btrfs_extent_item *ei, u32 item_size,
struct btrfs_extent_inline_ref **out_eiref,
int *out_type)
{
unsigned long end;
u64 flags;
@@ -1416,19 +1422,26 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
/* first call */
flags = btrfs_extent_flags(eb, ei);
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
info = (struct btrfs_tree_block_info *)(ei + 1);
*out_eiref =
(struct btrfs_extent_inline_ref *)(info + 1);
if (key->type == BTRFS_METADATA_ITEM_KEY) {
/* a skinny metadata extent */
*out_eiref =
(struct btrfs_extent_inline_ref *)(ei + 1);
} else {
WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY);
info = (struct btrfs_tree_block_info *)(ei + 1);
*out_eiref =
(struct btrfs_extent_inline_ref *)(info + 1);
}
} else {
*out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1);
}
*ptr = (unsigned long)*out_eiref;
if ((void *)*ptr >= (void *)ei + item_size)
if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size)
return -ENOENT;
}
end = (unsigned long)ei + item_size;
*out_eiref = (struct btrfs_extent_inline_ref *)*ptr;
*out_eiref = (struct btrfs_extent_inline_ref *)(*ptr);
*out_type = btrfs_extent_inline_ref_type(eb, *out_eiref);
*ptr += btrfs_extent_inline_ref_size(*out_type);
@@ -1447,8 +1460,8 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
* <0 on error.
*/
int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
struct btrfs_extent_item *ei, u32 item_size,
u64 *out_root, u8 *out_level)
struct btrfs_key *key, struct btrfs_extent_item *ei,
u32 item_size, u64 *out_root, u8 *out_level)
{
int ret;
int type;
@@ -1459,8 +1472,8 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
return 1;
while (1) {
ret = __get_extent_inline_ref(ptr, eb, ei, item_size,
&eiref, &type);
ret = __get_extent_inline_ref(ptr, eb, key, ei, item_size,
&eiref, &type);
if (ret < 0)
return ret;
+4 -4
View File
@@ -40,8 +40,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
u64 *flags);
int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
struct btrfs_extent_item *ei, u32 item_size,
u64 *out_root, u8 *out_level);
struct btrfs_key *key, struct btrfs_extent_item *ei,
u32 item_size, u64 *out_root, u8 *out_level);
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
u64 extent_item_objectid,
@@ -55,8 +55,8 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots);
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
struct extent_buffer *eb_in, u64 parent,
+2
View File
@@ -284,4 +284,6 @@ static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
&BTRFS_I(inode)->runtime_flags);
}
bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end);
#endif
+4 -1
View File
@@ -1093,6 +1093,7 @@ leaf_item_out_of_bounce_error:
next_stack =
btrfsic_stack_frame_alloc();
if (NULL == next_stack) {
sf->error = -1;
btrfsic_release_block_ctx(
&sf->
next_block_ctx);
@@ -1190,8 +1191,10 @@ continue_with_current_node_stack_frame:
sf->next_block_ctx.datav[0];
next_stack = btrfsic_stack_frame_alloc();
if (NULL == next_stack)
if (NULL == next_stack) {
sf->error = -1;
goto one_stack_frame_backwards;
}
next_stack->i = -1;
next_stack->block = sf->next_block;
+3 -3
View File
@@ -887,7 +887,7 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
workspace = find_workspace(type);
if (IS_ERR(workspace))
return -1;
return PTR_ERR(workspace);
ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
start, len, pages,
@@ -923,7 +923,7 @@ static int btrfs_decompress_biovec(int type, struct page **pages_in,
workspace = find_workspace(type);
if (IS_ERR(workspace))
return -ENOMEM;
return PTR_ERR(workspace);
ret = btrfs_compress_op[type-1]->decompress_biovec(workspace, pages_in,
disk_start,
@@ -945,7 +945,7 @@ int btrfs_decompress(int type, unsigned char *data_in, struct page *dest_page,
workspace = find_workspace(type);
if (IS_ERR(workspace))
return -ENOMEM;
return PTR_ERR(workspace);
ret = btrfs_compress_op[type-1]->decompress(workspace, data_in,
dest_page, start_byte,
+52 -52
View File
@@ -224,7 +224,8 @@ static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
static void add_root_to_dirty_list(struct btrfs_root *root)
{
spin_lock(&root->fs_info->trans_lock);
if (root->track_dirty && list_empty(&root->dirty_list)) {
if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) &&
list_empty(&root->dirty_list)) {
list_add(&root->dirty_list,
&root->fs_info->dirty_cowonly_roots);
}
@@ -246,9 +247,10 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
int level;
struct btrfs_disk_key disk_key;
WARN_ON(root->ref_cows && trans->transid !=
root->fs_info->running_transaction->transid);
WARN_ON(root->ref_cows && trans->transid != root->last_trans);
WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
trans->transid != root->fs_info->running_transaction->transid);
WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
trans->transid != root->last_trans);
level = btrfs_header_level(buf);
if (level == 0)
@@ -354,43 +356,13 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
}
/*
* Increment the upper half of tree_mod_seq, set lower half zero.
*
* Must be called with fs_info->tree_mod_seq_lock held.
* Pull a new tree mod seq number for our operation.
*/
static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info)
{
u64 seq = atomic64_read(&fs_info->tree_mod_seq);
seq &= 0xffffffff00000000ull;
seq += 1ull << 32;
atomic64_set(&fs_info->tree_mod_seq, seq);
return seq;
}
/*
* Increment the lower half of tree_mod_seq.
*
* Must be called with fs_info->tree_mod_seq_lock held. The way major numbers
* are generated should not technically require a spin lock here. (Rationale:
* incrementing the minor while incrementing the major seq number is between its
* atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it
* just returns a unique sequence number as usual.) We have decided to leave
* that requirement in here and rethink it once we notice it really imposes a
* problem on some workload.
*/
static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info)
static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
{
return atomic64_inc_return(&fs_info->tree_mod_seq);
}
/*
* return the last minor in the previous major tree_mod_seq number
*/
u64 btrfs_tree_mod_seq_prev(u64 seq)
{
return (seq & 0xffffffff00000000ull) - 1ull;
}
/*
* This adds a new blocker to the tree mod log's blocker list if the @elem
* passed does not already have a sequence number set. So when a caller expects
@@ -402,19 +374,16 @@ u64 btrfs_tree_mod_seq_prev(u64 seq)
u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem)
{
u64 seq;
tree_mod_log_write_lock(fs_info);
spin_lock(&fs_info->tree_mod_seq_lock);
if (!elem->seq) {
elem->seq = btrfs_inc_tree_mod_seq_major(fs_info);
elem->seq = btrfs_inc_tree_mod_seq(fs_info);
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
}
seq = btrfs_inc_tree_mod_seq_minor(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
tree_mod_log_write_unlock(fs_info);
return seq;
return elem->seq;
}
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
@@ -487,9 +456,7 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
BUG_ON(!tm);
spin_lock(&fs_info->tree_mod_seq_lock);
tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
tm->seq = btrfs_inc_tree_mod_seq(fs_info);
tm_root = &fs_info->tree_mod_log;
new = &tm_root->rb_node;
@@ -997,14 +964,14 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
* snapshot and the block was not allocated by tree relocation,
* we know the block is not shared.
*/
if (root->ref_cows &&
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
buf != root->node && buf != root->commit_root &&
(btrfs_header_generation(buf) <=
btrfs_root_last_snapshot(&root->root_item) ||
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
return 1;
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
if (root->ref_cows &&
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
return 1;
#endif
@@ -1146,9 +1113,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_assert_tree_locked(buf);
WARN_ON(root->ref_cows && trans->transid !=
root->fs_info->running_transaction->transid);
WARN_ON(root->ref_cows && trans->transid != root->last_trans);
WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
trans->transid != root->fs_info->running_transaction->transid);
WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
trans->transid != root->last_trans);
level = btrfs_header_level(buf);
@@ -1193,7 +1161,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
return ret;
}
if (root->ref_cows) {
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
if (ret)
return ret;
@@ -1538,6 +1506,10 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf)
{
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
return 0;
#endif
/* ensure we can see the force_cow */
smp_rmb();
@@ -1556,7 +1528,7 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
!(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
!root->force_cow)
!test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
return 0;
return 1;
}
@@ -5125,7 +5097,17 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
return ret;
btrfs_item_key(path->nodes[0], &found_key, 0);
ret = comp_keys(&found_key, &key);
if (ret < 0)
/*
* We might have had an item with the previous key in the tree right
* before we released our path. And after we released our path, that
* item might have been pushed to the first slot (0) of the leaf we
* were holding due to a tree balance. Alternatively, an item with the
* previous key can exist as the only element of a leaf (big fat item).
* Therefore account for these 2 cases, so that our callers (like
* btrfs_previous_item) don't miss an existing item with a key matching
* the previous key we computed above.
*/
if (ret <= 0)
return 0;
return 1;
}
@@ -5736,6 +5718,24 @@ again:
ret = 0;
goto done;
}
/*
* So the above check misses one case:
* - after releasing the path above, someone has removed the item that
* used to be at the very end of the block, and balance between leafs
* gets another one with bigger key.offset to replace it.
*
* This one should be returned as well, or we can get leaf corruption
* later(esp. in __btrfs_drop_extents()).
*
* And a bit more explanation about this check,
* with ret > 0, the key isn't found, the path points to the slot
* where it should be inserted, so the path->slots[0] item must be the
* bigger one.
*/
if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
ret = 0;
goto done;
}
while (level < BTRFS_MAX_LEVEL) {
if (!path->nodes[level]) {
+73 -70
View File
@@ -33,6 +33,7 @@
#include <asm/kmap_types.h>
#include <linux/pagemap.h>
#include <linux/btrfs.h>
#include <linux/workqueue.h>
#include "extent_io.h"
#include "extent_map.h"
#include "async-thread.h"
@@ -756,6 +757,12 @@ struct btrfs_dir_item {
#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
/*
* Internal in-memory flag that a subvolume has been marked for deletion but
* still visible as a directory
*/
#define BTRFS_ROOT_SUBVOL_DEAD (1ULL << 48)
struct btrfs_root_item {
struct btrfs_inode_item inode;
__le64 generation;
@@ -840,7 +847,10 @@ struct btrfs_disk_balance_args {
/* BTRFS_BALANCE_ARGS_* */
__le64 flags;
__le64 unused[8];
/* BTRFS_BALANCE_ARGS_LIMIT value */
__le64 limit;
__le64 unused[7];
} __attribute__ ((__packed__));
/*
@@ -1113,6 +1123,12 @@ struct btrfs_qgroup_limit_item {
__le64 rsv_excl;
} __attribute__ ((__packed__));
/* For raid type sysfs entries */
struct raid_kobject {
int raid_type;
struct kobject kobj;
};
struct btrfs_space_info {
spinlock_t lock;
@@ -1163,7 +1179,7 @@ struct btrfs_space_info {
wait_queue_head_t wait;
struct kobject kobj;
struct kobject block_group_kobjs[BTRFS_NR_RAID_TYPES];
struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
};
#define BTRFS_BLOCK_RSV_GLOBAL 1
@@ -1313,6 +1329,8 @@ struct btrfs_stripe_hash_table {
#define BTRFS_STRIPE_HASH_TABLE_BITS 11
void btrfs_init_async_reclaim_work(struct work_struct *work);
/* fs_info */
struct reloc_control;
struct btrfs_device;
@@ -1534,6 +1552,9 @@ struct btrfs_fs_info {
*/
struct btrfs_workqueue *fixup_workers;
struct btrfs_workqueue *delayed_workers;
/* the extent workers do delayed refs on the extent allocation tree */
struct btrfs_workqueue *extent_workers;
struct task_struct *transaction_kthread;
struct task_struct *cleaner_kthread;
int thread_pool_size;
@@ -1636,7 +1657,10 @@ struct btrfs_fs_info {
/* holds configuration and tracking. Protected by qgroup_lock */
struct rb_root qgroup_tree;
struct rb_root qgroup_op_tree;
spinlock_t qgroup_lock;
spinlock_t qgroup_op_lock;
atomic_t qgroup_op_seq;
/*
* used to avoid frequently calling ulist_alloc()/ulist_free()
@@ -1688,6 +1712,9 @@ struct btrfs_fs_info {
struct semaphore uuid_tree_rescan_sem;
unsigned int update_uuid_tree_gen:1;
/* Used to reclaim the metadata space in the background. */
struct work_struct async_reclaim_work;
};
struct btrfs_subvolume_writers {
@@ -1695,6 +1722,26 @@ struct btrfs_subvolume_writers {
wait_queue_head_t wait;
};
/*
* The state of btrfs root
*/
/*
* btrfs_record_root_in_trans is a multi-step process,
* and it can race with the balancing code. But the
* race is very small, and only the first time the root
* is added to each transaction. So IN_TRANS_SETUP
* is used to tell us when more checks are required
*/
#define BTRFS_ROOT_IN_TRANS_SETUP 0
#define BTRFS_ROOT_REF_COWS 1
#define BTRFS_ROOT_TRACK_DIRTY 2
#define BTRFS_ROOT_IN_RADIX 3
#define BTRFS_ROOT_DUMMY_ROOT 4
#define BTRFS_ROOT_ORPHAN_ITEM_INSERTED 5
#define BTRFS_ROOT_DEFRAG_RUNNING 6
#define BTRFS_ROOT_FORCE_COW 7
#define BTRFS_ROOT_MULTI_LOG_TASKS 8
/*
* in ram representation of the tree. extent_root is used for all allocations
* and for the extent tree extent_root root.
@@ -1706,6 +1753,7 @@ struct btrfs_root {
struct btrfs_root *log_root;
struct btrfs_root *reloc_root;
unsigned long state;
struct btrfs_root_item root_item;
struct btrfs_key root_key;
struct btrfs_fs_info *fs_info;
@@ -1740,7 +1788,6 @@ struct btrfs_root {
/* Just be updated when the commit succeeds. */
int last_log_commit;
pid_t log_start_pid;
bool log_multiple_pids;
u64 objectid;
u64 last_trans;
@@ -1760,23 +1807,13 @@ struct btrfs_root {
u64 highest_objectid;
/* btrfs_record_root_in_trans is a multi-step process,
* and it can race with the balancing code. But the
* race is very small, and only the first time the root
* is added to each transaction. So in_trans_setup
* is used to tell us when more checks are required
*/
unsigned long in_trans_setup;
int ref_cows;
int track_dirty;
int in_radix;
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
int dummy_root;
u64 alloc_bytenr;
#endif
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
struct btrfs_key defrag_max;
int defrag_running;
char *name;
/* the dirty list is only used by non-reference counted roots */
@@ -1790,7 +1827,6 @@ struct btrfs_root {
spinlock_t orphan_lock;
atomic_t orphan_inodes;
struct btrfs_block_rsv *orphan_block_rsv;
int orphan_item_inserted;
int orphan_cleanup_state;
spinlock_t inode_lock;
@@ -1808,8 +1844,6 @@ struct btrfs_root {
*/
dev_t anon_dev;
int force_cow;
spinlock_t root_item_lock;
atomic_t refs;
@@ -2788,6 +2822,11 @@ static inline bool btrfs_root_readonly(struct btrfs_root *root)
return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
}
static inline bool btrfs_root_dead(struct btrfs_root *root)
{
return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0;
}
/* struct btrfs_root_backup */
BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup,
tree_root, 64);
@@ -2897,6 +2936,7 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
cpu->vend = le64_to_cpu(disk->vend);
cpu->target = le64_to_cpu(disk->target);
cpu->flags = le64_to_cpu(disk->flags);
cpu->limit = le64_to_cpu(disk->limit);
}
static inline void
@@ -2914,6 +2954,7 @@ btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
disk->vend = cpu_to_le64(cpu->vend);
disk->target = cpu_to_le64(cpu->target);
disk->flags = cpu_to_le64(cpu->flags);
disk->limit = cpu_to_le64(cpu->limit);
}
/* struct btrfs_super_block */
@@ -3236,6 +3277,8 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans,
void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_root *root, unsigned long count);
int btrfs_async_run_delayed_refs(struct btrfs_root *root,
unsigned long count, int wait);
int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr,
@@ -3275,9 +3318,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
u64 min_alloc_size, u64 empty_size, u64 hint_byte,
struct btrfs_key *ins, int is_data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref, int for_cow);
struct extent_buffer *buf, int full_backref, int no_quota);
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref, int for_cow);
struct extent_buffer *buf, int full_backref, int no_quota);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 flags,
@@ -3285,7 +3328,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
int btrfs_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
u64 owner, u64 offset, int for_cow);
u64 owner, u64 offset, int no_quota);
int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
@@ -3297,7 +3340,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u64 num_bytes, u64 parent,
u64 root_objectid, u64 owner, u64 offset, int for_cow);
u64 root_objectid, u64 owner, u64 offset, int no_quota);
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
@@ -3385,7 +3428,6 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int __get_raid_index(u64 flags);
int btrfs_start_nocow_write(struct btrfs_root *root);
void btrfs_end_nocow_write(struct btrfs_root *root);
/* ctree.c */
@@ -3561,7 +3603,6 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem);
u64 btrfs_tree_mod_seq_prev(u64 seq);
int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq);
/* root-item.c */
@@ -3708,6 +3749,12 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u64 file_start, int contig);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
void btrfs_extent_item_to_extent_map(struct inode *inode,
const struct btrfs_path *path,
struct btrfs_file_extent_item *fi,
const bool new_inline,
struct extent_map *em);
/* inode.c */
struct btrfs_delalloc_work {
struct inode *inode;
@@ -4069,52 +4116,6 @@ void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
u64 start, int err);
/* qgroup.c */
struct qgroup_update {
struct list_head list;
struct btrfs_delayed_ref_node *node;
struct btrfs_delayed_extent_op *extent_op;
};
int btrfs_quota_enable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
char *name);
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid);
int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 qgroupid,
struct btrfs_qgroup_limit *limit);
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
struct btrfs_delayed_extent_op;
int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op);
int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
struct btrfs_qgroup_inherit *inherit);
int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes);
void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes);
void assert_qgroups_uptodate(struct btrfs_trans_handle *trans);
static inline int is_fstree(u64 rootid)
{
if (rootid == BTRFS_FS_TREE_OBJECTID ||
@@ -4131,6 +4132,8 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
/* Sanity test specific functions */
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
void btrfs_test_destroy_inode(struct inode *inode);
int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
u64 rfer, u64 excl);
#endif
#endif
+5 -2
View File
@@ -149,8 +149,8 @@ again:
spin_lock(&root->inode_lock);
ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
if (ret == -EEXIST) {
kmem_cache_free(delayed_node_cache, node);
spin_unlock(&root->inode_lock);
kmem_cache_free(delayed_node_cache, node);
radix_tree_preload_end();
goto again;
}
@@ -267,14 +267,17 @@ static void __btrfs_release_delayed_node(
mutex_unlock(&delayed_node->mutex);
if (atomic_dec_and_test(&delayed_node->refs)) {
bool free = false;
struct btrfs_root *root = delayed_node->root;
spin_lock(&root->inode_lock);
if (atomic_read(&delayed_node->refs) == 0) {
radix_tree_delete(&root->delayed_nodes_tree,
delayed_node->inode_id);
kmem_cache_free(delayed_node_cache, delayed_node);
free = true;
}
spin_unlock(&root->inode_lock);
if (free)
kmem_cache_free(delayed_node_cache, delayed_node);
}
}
+23 -16
View File
@@ -106,6 +106,10 @@ static int comp_entry(struct btrfs_delayed_ref_node *ref2,
return -1;
if (ref1->type > ref2->type)
return 1;
if (ref1->no_quota > ref2->no_quota)
return 1;
if (ref1->no_quota < ref2->no_quota)
return -1;
/* merging of sequenced refs is not allowed */
if (compare_seq) {
if (ref1->seq < ref2->seq)
@@ -635,7 +639,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_delayed_ref_node *ref, u64 bytenr,
u64 num_bytes, u64 parent, u64 ref_root, int level,
int action, int for_cow)
int action, int no_quota)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_tree_ref *full_ref;
@@ -645,6 +649,8 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
if (action == BTRFS_ADD_DELAYED_EXTENT)
action = BTRFS_ADD_DELAYED_REF;
if (is_fstree(ref_root))
seq = atomic64_read(&fs_info->tree_mod_seq);
delayed_refs = &trans->transaction->delayed_refs;
/* first set the basic ref node struct up */
@@ -655,9 +661,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->action = action;
ref->is_head = 0;
ref->in_tree = 1;
if (need_ref_seq(for_cow, ref_root))
seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
ref->no_quota = no_quota;
ref->seq = seq;
full_ref = btrfs_delayed_node_to_tree_ref(ref);
@@ -697,7 +701,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_delayed_ref_node *ref, u64 bytenr,
u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
u64 offset, int action, int for_cow)
u64 offset, int action, int no_quota)
{
struct btrfs_delayed_ref_node *existing;
struct btrfs_delayed_data_ref *full_ref;
@@ -709,6 +713,9 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
delayed_refs = &trans->transaction->delayed_refs;
if (is_fstree(ref_root))
seq = atomic64_read(&fs_info->tree_mod_seq);
/* first set the basic ref node struct up */
atomic_set(&ref->refs, 1);
ref->bytenr = bytenr;
@@ -717,9 +724,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->action = action;
ref->is_head = 0;
ref->in_tree = 1;
if (need_ref_seq(for_cow, ref_root))
seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
ref->no_quota = no_quota;
ref->seq = seq;
full_ref = btrfs_delayed_node_to_data_ref(ref);
@@ -762,12 +767,15 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op,
int for_cow)
int no_quota)
{
struct btrfs_delayed_tree_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
if (!is_fstree(ref_root) || !fs_info->quota_enabled)
no_quota = 0;
BUG_ON(extent_op && extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
if (!ref)
@@ -793,10 +801,8 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
for_cow);
no_quota);
spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
return 0;
}
@@ -810,12 +816,15 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
struct btrfs_delayed_extent_op *extent_op,
int for_cow)
int no_quota)
{
struct btrfs_delayed_data_ref *ref;
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
if (!is_fstree(ref_root) || !fs_info->quota_enabled)
no_quota = 0;
BUG_ON(extent_op && !extent_op->is_data);
ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
if (!ref)
@@ -841,10 +850,8 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action, for_cow);
action, no_quota);
spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
return 0;
}
+3 -21
View File
@@ -52,6 +52,7 @@ struct btrfs_delayed_ref_node {
unsigned int action:8;
unsigned int type:8;
unsigned int no_quota:1;
/* is this node still in the rbtree? */
unsigned int is_head:1;
unsigned int in_tree:1;
@@ -196,14 +197,14 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
u64 bytenr, u64 num_bytes, u64 parent,
u64 ref_root, int level, int action,
struct btrfs_delayed_extent_op *extent_op,
int for_cow);
int no_quota);
int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 parent, u64 ref_root,
u64 owner, u64 offset, int action,
struct btrfs_delayed_extent_op *extent_op,
int for_cow);
int no_quota);
int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
@@ -230,25 +231,6 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq);
/*
* delayed refs with a ref_seq > 0 must be held back during backref walking.
* this only applies to items in one of the fs-trees. for_cow items never need
* to be held back, so they won't get a ref_seq number.
*/
static inline int need_ref_seq(int for_cow, u64 rootid)
{
if (for_cow)
return 0;
if (rootid == BTRFS_FS_TREE_OBJECTID)
return 1;
if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
return 1;
return 0;
}
/*
* a node might live in a head or a regular ref, this lets you
* test for the proper type to use.
+1 -1
View File
@@ -313,7 +313,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
if (btrfs_fs_incompat(fs_info, RAID56)) {
btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6");
return -EINVAL;
return -EOPNOTSUPP;
}
switch (args->start.cont_reading_from_srcdev_mode) {
+84 -31
View File
@@ -49,6 +49,7 @@
#include "dev-replace.h"
#include "raid56.h"
#include "sysfs.h"
#include "qgroup.h"
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
@@ -1109,6 +1110,11 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize)
{
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
return alloc_test_extent_buffer(root->fs_info, bytenr,
blocksize);
#endif
return alloc_extent_buffer(root->fs_info, bytenr, blocksize);
}
@@ -1201,10 +1207,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->nodesize = nodesize;
root->leafsize = leafsize;
root->stripesize = stripesize;
root->ref_cows = 0;
root->track_dirty = 0;
root->in_radix = 0;
root->orphan_item_inserted = 0;
root->state = 0;
root->orphan_cleanup_state = 0;
root->objectid = objectid;
@@ -1265,7 +1268,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
else
root->defrag_trans_start = 0;
init_completion(&root->kobj_unregister);
root->defrag_running = 0;
root->root_key.objectid = objectid;
root->anon_dev = 0;
@@ -1290,7 +1292,8 @@ struct btrfs_root *btrfs_alloc_dummy_root(void)
if (!root)
return ERR_PTR(-ENOMEM);
__setup_root(4096, 4096, 4096, 4096, root, NULL, 1);
root->dummy_root = 1;
set_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state);
root->alloc_bytenr = 0;
return root;
}
@@ -1341,8 +1344,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
root->commit_root = btrfs_root_node(root);
root->track_dirty = 1;
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
root->root_item.flags = 0;
root->root_item.byte_limit = 0;
@@ -1371,6 +1373,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
fail:
if (leaf) {
btrfs_tree_unlock(leaf);
free_extent_buffer(root->commit_root);
free_extent_buffer(leaf);
}
kfree(root);
@@ -1396,13 +1399,15 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
/*
* DON'T set REF_COWS for log trees
*
* log trees do not get reference counted because they go away
* before a real commit is actually done. They do store pointers
* to file data extents, and those reference counts still get
* updated (along with back refs to the log tree).
*/
root->ref_cows = 0;
leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
BTRFS_TREE_LOG_OBJECTID, NULL,
@@ -1536,7 +1541,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
return root;
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
root->ref_cows = 1;
set_bit(BTRFS_ROOT_REF_COWS, &root->state);
btrfs_check_and_init_root_item(&root->root_item);
}
@@ -1606,7 +1611,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
(unsigned long)root->root_key.objectid,
root);
if (ret == 0)
root->in_radix = 1;
set_bit(BTRFS_ROOT_IN_RADIX, &root->state);
spin_unlock(&fs_info->fs_roots_radix_lock);
radix_tree_preload_end();
@@ -1662,7 +1667,7 @@ again:
if (ret < 0)
goto fail;
if (ret == 0)
root->orphan_item_inserted = 1;
set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
ret = btrfs_insert_fs_root(fs_info, root);
if (ret) {
@@ -2064,6 +2069,7 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->readahead_workers);
btrfs_destroy_workqueue(fs_info->flush_workers);
btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
btrfs_destroy_workqueue(fs_info->extent_workers);
}
static void free_root_extent_buffers(struct btrfs_root *root)
@@ -2090,7 +2096,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
free_root_extent_buffers(info->chunk_root);
}
static void del_fs_roots(struct btrfs_fs_info *fs_info)
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
{
int ret;
struct btrfs_root *gang[8];
@@ -2101,7 +2107,7 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info)
struct btrfs_root, root_list);
list_del(&gang[0]->root_list);
if (gang[0]->in_radix) {
if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state)) {
btrfs_drop_and_free_fs_root(fs_info, gang[0]);
} else {
free_extent_buffer(gang[0]->node);
@@ -2221,6 +2227,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->free_chunk_lock);
spin_lock_init(&fs_info->tree_mod_seq_lock);
spin_lock_init(&fs_info->super_lock);
spin_lock_init(&fs_info->qgroup_op_lock);
spin_lock_init(&fs_info->buffer_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->reloc_mutex);
@@ -2246,6 +2253,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->async_submit_draining, 0);
atomic_set(&fs_info->nr_async_bios, 0);
atomic_set(&fs_info->defrag_running, 0);
atomic_set(&fs_info->qgroup_op_seq, 0);
atomic64_set(&fs_info->tree_mod_seq, 0);
fs_info->sb = sb;
fs_info->max_inline = 8192 * 1024;
@@ -2291,6 +2299,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->balance_cancel_req, 0);
fs_info->balance_ctl = NULL;
init_waitqueue_head(&fs_info->balance_wait_q);
btrfs_init_async_reclaim_work(&fs_info->async_reclaim_work);
sb->s_blocksize = 4096;
sb->s_blocksize_bits = blksize_bits(4096);
@@ -2354,6 +2363,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->qgroup_lock);
mutex_init(&fs_info->qgroup_ioctl_lock);
fs_info->qgroup_tree = RB_ROOT;
fs_info->qgroup_op_tree = RB_ROOT;
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
fs_info->qgroup_seq = 1;
fs_info->quota_enabled = 0;
@@ -2577,6 +2587,10 @@ int open_ctree(struct super_block *sb,
btrfs_alloc_workqueue("readahead", flags, max_active, 2);
fs_info->qgroup_rescan_workers =
btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
fs_info->extent_workers =
btrfs_alloc_workqueue("extent-refs", flags,
min_t(u64, fs_devices->num_devices,
max_active), 8);
if (!(fs_info->workers && fs_info->delalloc_workers &&
fs_info->submit_workers && fs_info->flush_workers &&
@@ -2586,6 +2600,7 @@ int open_ctree(struct super_block *sb,
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
fs_info->caching_workers && fs_info->readahead_workers &&
fs_info->fixup_workers && fs_info->delayed_workers &&
fs_info->fixup_workers && fs_info->extent_workers &&
fs_info->qgroup_rescan_workers)) {
err = -ENOMEM;
goto fail_sb_buffer;
@@ -2693,7 +2708,7 @@ retry_root_backup:
ret = PTR_ERR(extent_root);
goto recovery_tree_root;
}
extent_root->track_dirty = 1;
set_bit(BTRFS_ROOT_TRACK_DIRTY, &extent_root->state);
fs_info->extent_root = extent_root;
location.objectid = BTRFS_DEV_TREE_OBJECTID;
@@ -2702,7 +2717,7 @@ retry_root_backup:
ret = PTR_ERR(dev_root);
goto recovery_tree_root;
}
dev_root->track_dirty = 1;
set_bit(BTRFS_ROOT_TRACK_DIRTY, &dev_root->state);
fs_info->dev_root = dev_root;
btrfs_init_devices_late(fs_info);
@@ -2712,13 +2727,13 @@ retry_root_backup:
ret = PTR_ERR(csum_root);
goto recovery_tree_root;
}
csum_root->track_dirty = 1;
set_bit(BTRFS_ROOT_TRACK_DIRTY, &csum_root->state);
fs_info->csum_root = csum_root;
location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
quota_root = btrfs_read_tree_root(tree_root, &location);
if (!IS_ERR(quota_root)) {
quota_root->track_dirty = 1;
set_bit(BTRFS_ROOT_TRACK_DIRTY, &quota_root->state);
fs_info->quota_enabled = 1;
fs_info->pending_quota_state = 1;
fs_info->quota_root = quota_root;
@@ -2733,7 +2748,7 @@ retry_root_backup:
create_uuid_tree = true;
check_uuid_tree = false;
} else {
uuid_root->track_dirty = 1;
set_bit(BTRFS_ROOT_TRACK_DIRTY, &uuid_root->state);
fs_info->uuid_root = uuid_root;
create_uuid_tree = false;
check_uuid_tree =
@@ -2966,7 +2981,7 @@ fail_qgroup:
fail_trans_kthread:
kthread_stop(fs_info->transaction_kthread);
btrfs_cleanup_transaction(fs_info->tree_root);
del_fs_roots(fs_info);
btrfs_free_fs_roots(fs_info);
fail_cleaner:
kthread_stop(fs_info->cleaner_kthread);
@@ -3501,8 +3516,10 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
btrfs_free_log(NULL, root);
__btrfs_remove_free_space_cache(root->free_ino_pinned);
__btrfs_remove_free_space_cache(root->free_ino_ctl);
if (root->free_ino_pinned)
__btrfs_remove_free_space_cache(root->free_ino_pinned);
if (root->free_ino_ctl)
__btrfs_remove_free_space_cache(root->free_ino_ctl);
free_fs_root(root);
}
@@ -3533,28 +3550,51 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
{
u64 root_objectid = 0;
struct btrfs_root *gang[8];
int i;
int ret;
int i = 0;
int err = 0;
unsigned int ret = 0;
int index;
while (1) {
index = srcu_read_lock(&fs_info->subvol_srcu);
ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
(void **)gang, root_objectid,
ARRAY_SIZE(gang));
if (!ret)
if (!ret) {
srcu_read_unlock(&fs_info->subvol_srcu, index);
break;
}
root_objectid = gang[ret - 1]->root_key.objectid + 1;
for (i = 0; i < ret; i++) {
int err;
for (i = 0; i < ret; i++) {
/* Avoid to grab roots in dead_roots */
if (btrfs_root_refs(&gang[i]->root_item) == 0) {
gang[i] = NULL;
continue;
}
/* grab all the search result for later use */
gang[i] = btrfs_grab_fs_root(gang[i]);
}
srcu_read_unlock(&fs_info->subvol_srcu, index);
for (i = 0; i < ret; i++) {
if (!gang[i])
continue;
root_objectid = gang[i]->root_key.objectid;
err = btrfs_orphan_cleanup(gang[i]);
if (err)
return err;
break;
btrfs_put_fs_root(gang[i]);
}
root_objectid++;
}
return 0;
/* release the uncleaned roots due to error */
for (; i < ret; i++) {
if (gang[i])
btrfs_put_fs_root(gang[i]);
}
return err;
}
int btrfs_commit_super(struct btrfs_root *root)
@@ -3603,6 +3643,8 @@ int close_ctree(struct btrfs_root *root)
/* clear out the rbtree of defraggable inodes */
btrfs_cleanup_defrag_inodes(fs_info);
cancel_work_sync(&fs_info->async_reclaim_work);
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
ret = btrfs_commit_super(root);
if (ret)
@@ -3627,12 +3669,17 @@ int close_ctree(struct btrfs_root *root)
btrfs_sysfs_remove_one(fs_info);
del_fs_roots(fs_info);
btrfs_free_fs_roots(fs_info);
btrfs_put_block_group_cache(fs_info);
btrfs_free_block_groups(fs_info);
/*
* we must make sure there is not any read request to
* submit after we stopping all workers.
*/
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
btrfs_stop_all_workers(fs_info);
free_root_pointers(fs_info, 1);
@@ -3709,6 +3756,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
__percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
buf->len,
root->fs_info->dirty_metadata_batch);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) {
btrfs_print_leaf(root, buf);
ASSERT(0);
}
#endif
}
static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
+1
View File
@@ -68,6 +68,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
int btrfs_init_fs_root(struct btrfs_root *root);
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_key *key,
+371 -99
View File
File diff suppressed because it is too large Load Diff
+260 -147
View File
File diff suppressed because it is too large Load Diff
+2
View File
@@ -350,5 +350,7 @@ noinline u64 find_lock_delalloc_range(struct inode *inode,
struct extent_io_tree *tree,
struct page *locked_page, u64 *start,
u64 *end, u64 max_bytes);
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
u64 start, unsigned long len);
#endif
#endif
+78 -2
View File
@@ -281,10 +281,10 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
found:
csum += count * csum_size;
nblocks -= count;
bio_index += count;
while (count--) {
disk_bytenr += bvec->bv_len;
offset += bvec->bv_len;
bio_index++;
bvec++;
}
}
@@ -750,7 +750,7 @@ again:
int slot = path->slots[0] + 1;
/* we didn't find a csum item, insert one */
nritems = btrfs_header_nritems(path->nodes[0]);
if (path->slots[0] >= nritems - 1) {
if (!nritems || (path->slots[0] >= nritems - 1)) {
ret = btrfs_next_leaf(root, path);
if (ret == 1)
found_next = 1;
@@ -885,3 +885,79 @@ out:
fail_unlock:
goto out;
}
void btrfs_extent_item_to_extent_map(struct inode *inode,
const struct btrfs_path *path,
struct btrfs_file_extent_item *fi,
const bool new_inline,
struct extent_map *em)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_buffer *leaf = path->nodes[0];
const int slot = path->slots[0];
struct btrfs_key key;
u64 extent_start, extent_end;
u64 bytenr;
u8 type = btrfs_file_extent_type(leaf, fi);
int compress_type = btrfs_file_extent_compression(leaf, fi);
em->bdev = root->fs_info->fs_devices->latest_bdev;
btrfs_item_key_to_cpu(leaf, &key, slot);
extent_start = key.offset;
if (type == BTRFS_FILE_EXTENT_REG ||
type == BTRFS_FILE_EXTENT_PREALLOC) {
extent_end = extent_start +
btrfs_file_extent_num_bytes(leaf, fi);
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
size_t size;
size = btrfs_file_extent_inline_len(leaf, slot, fi);
extent_end = ALIGN(extent_start + size, root->sectorsize);
}
em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
if (type == BTRFS_FILE_EXTENT_REG ||
type == BTRFS_FILE_EXTENT_PREALLOC) {
em->start = extent_start;
em->len = extent_end - extent_start;
em->orig_start = extent_start -
btrfs_file_extent_offset(leaf, fi);
em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
if (bytenr == 0) {
em->block_start = EXTENT_MAP_HOLE;
return;
}
if (compress_type != BTRFS_COMPRESS_NONE) {
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
em->compress_type = compress_type;
em->block_start = bytenr;
em->block_len = em->orig_block_len;
} else {
bytenr += btrfs_file_extent_offset(leaf, fi);
em->block_start = bytenr;
em->block_len = em->len;
if (type == BTRFS_FILE_EXTENT_PREALLOC)
set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
}
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
em->block_start = EXTENT_MAP_INLINE;
em->start = extent_start;
em->len = extent_end - extent_start;
/*
* Initialize orig_start and block_len with the same values
* as in inode.c:btrfs_get_extent().
*/
em->orig_start = EXTENT_MAP_HOLE;
em->block_len = (u64)-1;
if (!new_inline && compress_type != BTRFS_COMPRESS_NONE) {
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
em->compress_type = compress_type;
}
} else {
btrfs_err(root->fs_info,
"unknown file extent item type %d, inode %llu, offset %llu, root %llu",
type, btrfs_ino(inode), extent_start,
root->root_key.objectid);
}
}
+129 -23
View File
@@ -40,6 +40,7 @@
#include "tree-log.h"
#include "locking.h"
#include "volumes.h"
#include "qgroup.h"
static struct kmem_cache *btrfs_inode_defrag_cachep;
/*
@@ -715,7 +716,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
int recow;
int ret;
int modify_tree = -1;
int update_refs = (root->ref_cows || root == root->fs_info->tree_root);
int update_refs;
int found = 0;
int leafs_visited = 0;
@@ -725,6 +726,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent)
modify_tree = 0;
update_refs = (test_bit(BTRFS_ROOT_REF_COWS, &root->state) ||
root == root->fs_info->tree_root);
while (1) {
recow = 0;
ret = btrfs_lookup_file_extent(trans, root, path, ino,
@@ -781,6 +784,18 @@ next_slot:
extent_end = search_start;
}
/*
* Don't skip extent items representing 0 byte lengths. They
* used to be created (bug) if while punching holes we hit
* -ENOSPC condition. So if we find one here, just ensure we
* delete it, otherwise we would insert a new file extent item
* with the same key (offset) as that 0 bytes length file
* extent item in the call to setup_items_for_insert() later
* in this function.
*/
if (extent_end == key.offset && extent_end >= search_start)
goto delete_extent_item;
if (extent_end <= search_start) {
path->slots[0]++;
goto next_slot;
@@ -836,7 +851,7 @@ next_slot:
disk_bytenr, num_bytes, 0,
root->root_key.objectid,
new_key.objectid,
start - extent_offset, 0);
start - extent_offset, 1);
BUG_ON(ret); /* -ENOMEM */
}
key.offset = start;
@@ -894,6 +909,7 @@ next_slot:
* | ------ extent ------ |
*/
if (start <= key.offset && end >= extent_end) {
delete_extent_item:
if (del_nr == 0) {
del_slot = path->slots[0];
del_nr = 1;
@@ -1192,7 +1208,7 @@ again:
ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
root->root_key.objectid,
ino, orig_offset, 0);
ino, orig_offset, 1);
BUG_ON(ret); /* -ENOMEM */
if (split == start) {
@@ -2010,8 +2026,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
if (!full_sync) {
ret = btrfs_wait_ordered_range(inode, start,
end - start + 1);
if (ret)
if (ret) {
btrfs_end_transaction(trans, root);
goto out;
}
}
ret = btrfs_commit_transaction(trans, root);
} else {
@@ -2169,6 +2187,37 @@ out:
return 0;
}
/*
* Find a hole extent on given inode and change start/len to the end of hole
* extent.(hole/vacuum extent whose em->start <= start &&
* em->start + em->len > start)
* When a hole extent is found, return 1 and modify start/len.
*/
static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
{
struct extent_map *em;
int ret = 0;
em = btrfs_get_extent(inode, NULL, 0, *start, *len, 0);
if (IS_ERR_OR_NULL(em)) {
if (!em)
ret = -ENOMEM;
else
ret = PTR_ERR(em);
return ret;
}
/* Hole or vacuum extent(only exists in no-hole mode) */
if (em->block_start == EXTENT_MAP_HOLE) {
ret = 1;
*len = em->start + em->len > *start + *len ?
0 : *start + *len - em->start - em->len;
*start = em->start + em->len;
}
free_extent_map(em);
return ret;
}
static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -2176,25 +2225,42 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
struct btrfs_path *path;
struct btrfs_block_rsv *rsv;
struct btrfs_trans_handle *trans;
u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
u64 lockend = round_down(offset + len,
BTRFS_I(inode)->root->sectorsize) - 1;
u64 cur_offset = lockstart;
u64 lockstart;
u64 lockend;
u64 tail_start;
u64 tail_len;
u64 orig_start = offset;
u64 cur_offset;
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
u64 drop_end;
int ret = 0;
int err = 0;
int rsv_count;
bool same_page = ((offset >> PAGE_CACHE_SHIFT) ==
((offset + len - 1) >> PAGE_CACHE_SHIFT));
bool same_page;
bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
u64 ino_size;
ret = btrfs_wait_ordered_range(inode, offset, len);
if (ret)
return ret;
mutex_lock(&inode->i_mutex);
ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
ret = find_first_non_hole(inode, &offset, &len);
if (ret < 0)
goto out_only_mutex;
if (ret && !len) {
/* Already in a large hole */
ret = 0;
goto out_only_mutex;
}
lockstart = round_up(offset , BTRFS_I(inode)->root->sectorsize);
lockend = round_down(offset + len,
BTRFS_I(inode)->root->sectorsize) - 1;
same_page = ((offset >> PAGE_CACHE_SHIFT) ==
((offset + len - 1) >> PAGE_CACHE_SHIFT));
/*
* We needn't truncate any page which is beyond the end of the file
* because we are sure there is no data there.
@@ -2206,8 +2272,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (same_page && len < PAGE_CACHE_SIZE) {
if (offset < ino_size)
ret = btrfs_truncate_page(inode, offset, len, 0);
mutex_unlock(&inode->i_mutex);
return ret;
goto out_only_mutex;
}
/* zero back part of the first page */
@@ -2219,12 +2284,39 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
}
}
/* zero the front end of the last page */
if (offset + len < ino_size) {
ret = btrfs_truncate_page(inode, offset + len, 0, 1);
if (ret) {
mutex_unlock(&inode->i_mutex);
return ret;
/* Check the aligned pages after the first unaligned page,
* if offset != orig_start, which means the first unaligned page
* including serveral following pages are already in holes,
* the extra check can be skipped */
if (offset == orig_start) {
/* after truncate page, check hole again */
len = offset + len - lockstart;
offset = lockstart;
ret = find_first_non_hole(inode, &offset, &len);
if (ret < 0)
goto out_only_mutex;
if (ret && !len) {
ret = 0;
goto out_only_mutex;
}
lockstart = offset;
}
/* Check the tail unaligned part is in a hole */
tail_start = lockend + 1;
tail_len = offset + len - tail_start;
if (tail_len) {
ret = find_first_non_hole(inode, &tail_start, &tail_len);
if (unlikely(ret < 0))
goto out_only_mutex;
if (!ret) {
/* zero the front end of the last page */
if (tail_start + tail_len < ino_size) {
ret = btrfs_truncate_page(inode,
tail_start + tail_len, 0, 1);
if (ret)
goto out_only_mutex;
}
}
}
@@ -2250,9 +2342,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if ((!ordered ||
(ordered->file_offset + ordered->len <= lockstart ||
ordered->file_offset > lockend)) &&
!test_range_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, EXTENT_UPTODATE, 0,
cached_state)) {
!btrfs_page_exists_in_range(inode, lockstart, lockend)) {
if (ordered)
btrfs_put_ordered_extent(ordered);
break;
@@ -2300,6 +2390,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
BUG_ON(ret);
trans->block_rsv = rsv;
cur_offset = lockstart;
len = lockend - cur_offset;
while (cur_offset < lockend) {
ret = __btrfs_drop_extents(trans, root, inode, path,
cur_offset, lockend + 1,
@@ -2340,6 +2432,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
rsv, min_size);
BUG_ON(ret); /* shouldn't happen */
trans->block_rsv = rsv;
ret = find_first_non_hole(inode, &cur_offset, &len);
if (unlikely(ret < 0))
break;
if (ret && !len) {
ret = 0;
break;
}
}
if (ret) {
@@ -2348,7 +2448,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
}
trans->block_rsv = &root->fs_info->trans_block_rsv;
if (cur_offset < ino_size) {
/*
* Don't insert file hole extent item if it's for a range beyond eof
* (because it's useless) or if it represents a 0 bytes range (when
* cur_offset == drop_end).
*/
if (cur_offset < ino_size && cur_offset < drop_end) {
ret = fill_holes(trans, inode, path, cur_offset, drop_end);
if (ret) {
err = ret;
@@ -2373,6 +2478,7 @@ out_free:
out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
out_only_mutex:
mutex_unlock(&inode->i_mutex);
if (ret && !err)
err = ret;

Some files were not shown because too many files have changed in this diff Show More