Merge tag 'for-6.14-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "User visible changes, features:

   - rebuilding of the free space tree at mount time is done in more
     transactions, fix potential hangs when the transaction thread is
     blocked due to large amount of block groups

   - more read IO balancing strategies (experimental config), add two
     new ways how to select a device for read if the profiles allow that
     (all RAID1*), the current default selects the device by pid which
     is good on average but less performant for single reader workloads

       - select preferred device for all reads (namely for testing)
       - round-robin, balance reads across devices relevant for the
         requested IO range

   - add encoded write ioctl support to io_uring (read was added in
     6.12), basis for writing send stream using that instead of
     syscalls, non-blocking mode is not yet implemented

   - support FS_IOC_READ_VERITY_METADATA, applications can use the
     metadata to do their own verification

   - pass inode's i_write_hint to bios, for parity with other
     filesystems, ioctls F_GET_RW_HINT/F_SET_RW_HINT

  Core:

   - in zoned mode: allow to directly reclaim a block group by simply
     resetting it, then it can be reused and another block group does
     not need to be allocated

   - super block validation now also does more comprehensive sys array
     validation, adding it to the points where superblock is validated
     (post-read, pre-write)

   - subpage mode fixes:
      - fix double accounting of blocks due to some races
      - improved or fixed error handling in a few cases (compression,
        delalloc)

   - raid stripe tree:
      - fix various cases with extent range splitting or deleting
      - implement hole punching to extent range
      - reduce number of stripe tree lookups during bio submission
      - more self-tests

   - updated self-tests (delayed refs)

   - error handling improvements

   - cleanups, refactoring
      - remove rest of backref caching infrastructure from relocation,
        not needed anymore
      - error message updates
      - remove unnecessary calls when extent buffer was marked dirty
      - unused parameter removal
      - code moved to new files

  Other code changes: add rb_find_add_cached() to the rb-tree API"

* tag 'for-6.14-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (127 commits)
  btrfs: selftests: add a selftest for deleting two out of three extents
  btrfs: selftests: add test for punching a hole into 3 RAID stripe-extents
  btrfs: selftests: add selftest for punching holes into the RAID stripe extents
  btrfs: selftests: test RAID stripe-tree deletion spanning two items
  btrfs: selftests: don't split RAID extents in half
  btrfs: selftests: check for correct return value of failed lookup
  btrfs: don't use btrfs_set_item_key_safe on RAID stripe-extents
  btrfs: implement hole punching for RAID stripe extents
  btrfs: fix deletion of a range spanning parts two RAID stripe extents
  btrfs: fix tail delete of RAID stripe-extents
  btrfs: fix front delete range calculation for RAID stripe extents
  btrfs: assert RAID stripe-extent length is always greater than 0
  btrfs: don't try to delete RAID stripe-extents if we don't need to
  btrfs: selftests: correct RAID stripe-tree feature flag setting
  btrfs: add io_uring interface for encoded writes
  btrfs: remove the unused locked_folio parameter from btrfs_cleanup_ordered_extents()
  btrfs: add extra error messages for delalloc range related errors
  btrfs: subpage: dump the involved bitmap when ASSERT() failed
  btrfs: subpage: fix the bitmap dump of the locked flags
  btrfs: do proper folio cleanup when run_delalloc_nocow() failed
  ...
This commit is contained in:
Linus Torvalds
2025-01-20 13:09:30 -08:00
63 changed files with 3751 additions and 1343 deletions

View File

@@ -44,4 +44,4 @@ btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \
tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \
tests/free-space-tree-tests.o tests/extent-map-tests.o \
tests/raid-stripe-tree-tests.o
tests/raid-stripe-tree-tests.o tests/delayed-refs-tests.o

View File

@@ -18,7 +18,7 @@ enum {
};
#define NO_THRESHOLD (-1)
#define DFT_THRESHOLD (32)
#define DEFAULT_THRESHOLD (32)
struct btrfs_workqueue {
struct workqueue_struct *normal_wq;
@@ -94,9 +94,9 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
ret->limit_active = limit_active;
if (thresh == 0)
thresh = DFT_THRESHOLD;
thresh = DEFAULT_THRESHOLD;
/* For low threshold, disabling threshold is a better choice */
if (thresh < DFT_THRESHOLD) {
if (thresh < DEFAULT_THRESHOLD) {
ret->current_active = limit_active;
ret->thresh = NO_THRESHOLD;
} else {

View File

@@ -250,6 +250,21 @@ static int prelim_ref_compare(const struct prelim_ref *ref1,
return 0;
}
static int prelim_ref_rb_add_cmp(const struct rb_node *new,
const struct rb_node *exist)
{
const struct prelim_ref *ref_new =
rb_entry(new, struct prelim_ref, rbnode);
const struct prelim_ref *ref_exist =
rb_entry(exist, struct prelim_ref, rbnode);
/*
* prelim_ref_compare() expects the first parameter as the existing one,
* different from the rb_find_add_cached() order.
*/
return prelim_ref_compare(ref_exist, ref_new);
}
static void update_share_count(struct share_check *sc, int oldcount,
int newcount, const struct prelim_ref *newref)
{
@@ -278,55 +293,39 @@ static void prelim_ref_insert(const struct btrfs_fs_info *fs_info,
struct share_check *sc)
{
struct rb_root_cached *root;
struct rb_node **p;
struct rb_node *parent = NULL;
struct prelim_ref *ref;
int result;
bool leftmost = true;
struct rb_node *exist;
root = &preftree->root;
p = &root->rb_root.rb_node;
exist = rb_find_add_cached(&newref->rbnode, root, prelim_ref_rb_add_cmp);
if (exist) {
struct prelim_ref *ref = rb_entry(exist, struct prelim_ref, rbnode);
/* Identical refs, merge them and free @newref */
struct extent_inode_elem *eie = ref->inode_list;
while (*p) {
parent = *p;
ref = rb_entry(parent, struct prelim_ref, rbnode);
result = prelim_ref_compare(ref, newref);
if (result < 0) {
p = &(*p)->rb_left;
} else if (result > 0) {
p = &(*p)->rb_right;
leftmost = false;
} else {
/* Identical refs, merge them and free @newref */
struct extent_inode_elem *eie = ref->inode_list;
while (eie && eie->next)
eie = eie->next;
while (eie && eie->next)
eie = eie->next;
if (!eie)
ref->inode_list = newref->inode_list;
else
eie->next = newref->inode_list;
trace_btrfs_prelim_ref_merge(fs_info, ref, newref,
preftree->count);
/*
* A delayed ref can have newref->count < 0.
* The ref->count is updated to follow any
* BTRFS_[ADD|DROP]_DELAYED_REF actions.
*/
update_share_count(sc, ref->count,
ref->count + newref->count, newref);
ref->count += newref->count;
free_pref(newref);
return;
}
if (!eie)
ref->inode_list = newref->inode_list;
else
eie->next = newref->inode_list;
trace_btrfs_prelim_ref_merge(fs_info, ref, newref,
preftree->count);
/*
* A delayed ref can have newref->count < 0.
* The ref->count is updated to follow any
* BTRFS_[ADD|DROP]_DELAYED_REF actions.
*/
update_share_count(sc, ref->count,
ref->count + newref->count, newref);
ref->count += newref->count;
free_pref(newref);
return;
}
update_share_count(sc, 0, newref->count, newref);
preftree->count++;
trace_btrfs_prelim_ref_insert(fs_info, newref, NULL, preftree->count);
rb_link_node(&newref->rbnode, parent, p);
rb_insert_color_cached(&newref->rbnode, root, leftmost);
}
/*
@@ -3022,9 +3021,6 @@ void btrfs_backref_init_cache(struct btrfs_fs_info *fs_info,
cache->rb_root = RB_ROOT;
for (i = 0; i < BTRFS_MAX_LEVEL; i++)
INIT_LIST_HEAD(&cache->pending[i]);
INIT_LIST_HEAD(&cache->changed);
INIT_LIST_HEAD(&cache->detached);
INIT_LIST_HEAD(&cache->leaves);
INIT_LIST_HEAD(&cache->pending_edge);
INIT_LIST_HEAD(&cache->useless_node);
cache->fs_info = fs_info;
@@ -3132,29 +3128,17 @@ void btrfs_backref_drop_node(struct btrfs_backref_cache *tree,
void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
struct btrfs_backref_node *node)
{
struct btrfs_backref_node *upper;
struct btrfs_backref_edge *edge;
if (!node)
return;
BUG_ON(!node->lowest && !node->detached);
while (!list_empty(&node->upper)) {
edge = list_entry(node->upper.next, struct btrfs_backref_edge,
list[LOWER]);
upper = edge->node[UPPER];
list_del(&edge->list[LOWER]);
list_del(&edge->list[UPPER]);
btrfs_backref_free_edge(cache, edge);
/*
* Add the node to leaf node list if no other child block
* cached.
*/
if (list_empty(&upper->lower)) {
list_add_tail(&upper->lower, &cache->leaves);
upper->lowest = 1;
}
}
btrfs_backref_drop_node(cache, node);
@@ -3166,33 +3150,13 @@ void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache,
void btrfs_backref_release_cache(struct btrfs_backref_cache *cache)
{
struct btrfs_backref_node *node;
int i;
while (!list_empty(&cache->detached)) {
node = list_entry(cache->detached.next,
struct btrfs_backref_node, list);
while ((node = rb_entry_safe(rb_first(&cache->rb_root),
struct btrfs_backref_node, rb_node)))
btrfs_backref_cleanup_node(cache, node);
}
while (!list_empty(&cache->leaves)) {
node = list_entry(cache->leaves.next,
struct btrfs_backref_node, lower);
btrfs_backref_cleanup_node(cache, node);
}
for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
while (!list_empty(&cache->pending[i])) {
node = list_first_entry(&cache->pending[i],
struct btrfs_backref_node,
list);
btrfs_backref_cleanup_node(cache, node);
}
}
ASSERT(list_empty(&cache->pending_edge));
ASSERT(list_empty(&cache->useless_node));
ASSERT(list_empty(&cache->changed));
ASSERT(list_empty(&cache->detached));
ASSERT(RB_EMPTY_ROOT(&cache->rb_root));
ASSERT(!cache->nr_nodes);
ASSERT(!cache->nr_edges);
}
@@ -3316,8 +3280,12 @@ static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans,
root = btrfs_get_fs_root(fs_info, ref_key->offset, false);
if (IS_ERR(root))
return PTR_ERR(root);
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
cur->cowonly = 1;
/* We shouldn't be using backref cache for non-shareable roots. */
if (unlikely(!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))) {
btrfs_put_root(root);
return -EUCLEAN;
}
if (btrfs_root_level(&root->root_item) == cur->level) {
/* Tree root */
@@ -3403,8 +3371,15 @@ static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans,
goto out;
}
upper->owner = btrfs_header_owner(eb);
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
upper->cowonly = 1;
/* We shouldn't be using backref cache for non shareable roots. */
if (unlikely(!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))) {
btrfs_put_root(root);
btrfs_backref_free_edge(cache, edge);
btrfs_backref_free_node(cache, upper);
ret = -EUCLEAN;
goto out;
}
/*
* If we know the block isn't shared we can avoid
@@ -3595,15 +3570,9 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
ASSERT(start->checked);
/* Insert this node to cache if it's not COW-only */
if (!start->cowonly) {
rb_node = rb_simple_insert(&cache->rb_root, start->bytenr,
&start->rb_node);
if (rb_node)
btrfs_backref_panic(cache->fs_info, start->bytenr,
-EEXIST);
list_add_tail(&start->lower, &cache->leaves);
}
rb_node = rb_simple_insert(&cache->rb_root, start->bytenr, &start->rb_node);
if (rb_node)
btrfs_backref_panic(cache->fs_info, start->bytenr, -EEXIST);
/*
* Use breadth first search to iterate all related edges.
@@ -3642,11 +3611,6 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
* parents have already been linked.
*/
if (!RB_EMPTY_NODE(&upper->rb_node)) {
if (upper->lowest) {
list_del_init(&upper->lower);
upper->lowest = 0;
}
list_add_tail(&edge->list[UPPER], &upper->lower);
continue;
}
@@ -3657,23 +3621,13 @@ int btrfs_backref_finish_upper_links(struct btrfs_backref_cache *cache,
return -EUCLEAN;
}
/* Sanity check, COW-only node has non-COW-only parent */
if (start->cowonly != upper->cowonly) {
ASSERT(0);
rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr,
&upper->rb_node);
if (unlikely(rb_node)) {
btrfs_backref_panic(cache->fs_info, upper->bytenr, -EEXIST);
return -EUCLEAN;
}
/* Only cache non-COW-only (subvolume trees) tree blocks */
if (!upper->cowonly) {
rb_node = rb_simple_insert(&cache->rb_root, upper->bytenr,
&upper->rb_node);
if (rb_node) {
btrfs_backref_panic(cache->fs_info,
upper->bytenr, -EEXIST);
return -EUCLEAN;
}
}
list_add_tail(&edge->list[UPPER], &upper->lower);
/*

View File

@@ -318,6 +318,12 @@ struct btrfs_backref_node {
u64 bytenr;
}; /* Use rb_simple_node for search/insert */
/*
* This is a sanity check, whenever we COW a block we will update
* new_bytenr with it's current location, and we will check this in
* various places to validate that the cache makes sense, it shouldn't
* be used for anything else.
*/
u64 new_bytenr;
/* Objectid of tree block owner, can be not uptodate */
u64 owner;
@@ -335,10 +341,6 @@ struct btrfs_backref_node {
struct extent_buffer *eb;
/* Level of the tree block */
unsigned int level:8;
/* Is the block in a non-shareable tree */
unsigned int cowonly:1;
/* 1 if no child node is in the cache */
unsigned int lowest:1;
/* Is the extent buffer locked */
unsigned int locked:1;
/* Has the block been processed */
@@ -391,12 +393,6 @@ struct btrfs_backref_cache {
* level blocks may not reflect the new location
*/
struct list_head pending[BTRFS_MAX_LEVEL];
/* List of backref nodes with no child node */
struct list_head leaves;
/* List of blocks that have been COWed in current transaction */
struct list_head changed;
/* List of detached backref node. */
struct list_head detached;
u64 last_trans;

View File

@@ -453,6 +453,14 @@ static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
(unsigned long)dev->bdev->bd_dev, btrfs_dev_name(dev),
dev->devid, bio->bi_iter.bi_size);
/*
* Track reads if tracking is enabled; ignore I/O operations before the
* filesystem is fully initialized.
*/
if (dev->fs_devices->collect_fs_stats && bio_op(bio) == REQ_OP_READ && dev->fs_info)
percpu_counter_add(&dev->fs_info->stats_read_blocks,
bio->bi_iter.bi_size >> dev->fs_info->sectorsize_bits);
if (bio->bi_opf & REQ_BTRFS_CGROUP_PUNT)
blkcg_punt_bio_submit(bio);
else
@@ -725,8 +733,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
bio->bi_opf |= REQ_OP_ZONE_APPEND;
}
if (is_data_bbio(bbio) && bioc &&
btrfs_need_stripe_tree_update(bioc->fs_info, bioc->map_type)) {
if (is_data_bbio(bbio) && bioc && bioc->use_rst) {
/*
* No locking for the list update, as we only add to
* the list in the I/O submission path, and list

View File

@@ -173,43 +173,41 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
}
}
static int btrfs_bg_start_cmp(const struct rb_node *new,
const struct rb_node *exist)
{
const struct btrfs_block_group *new_bg =
rb_entry(new, struct btrfs_block_group, cache_node);
const struct btrfs_block_group *exist_bg =
rb_entry(exist, struct btrfs_block_group, cache_node);
if (new_bg->start < exist_bg->start)
return -1;
if (new_bg->start > exist_bg->start)
return 1;
return 0;
}
/*
* This adds the block group to the fs_info rb tree for the block group cache
*/
static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
struct btrfs_block_group *block_group)
{
struct rb_node **p;
struct rb_node *parent = NULL;
struct btrfs_block_group *cache;
bool leftmost = true;
struct rb_node *exist;
int ret = 0;
ASSERT(block_group->length != 0);
write_lock(&info->block_group_cache_lock);
p = &info->block_group_cache_tree.rb_root.rb_node;
while (*p) {
parent = *p;
cache = rb_entry(parent, struct btrfs_block_group, cache_node);
if (block_group->start < cache->start) {
p = &(*p)->rb_left;
} else if (block_group->start > cache->start) {
p = &(*p)->rb_right;
leftmost = false;
} else {
write_unlock(&info->block_group_cache_lock);
return -EEXIST;
}
}
rb_link_node(&block_group->cache_node, parent, p);
rb_insert_color_cached(&block_group->cache_node,
&info->block_group_cache_tree, leftmost);
exist = rb_find_add_cached(&block_group->cache_node,
&info->block_group_cache_tree, btrfs_bg_start_cmp);
if (exist)
ret = -EEXIST;
write_unlock(&info->block_group_cache_lock);
return 0;
return ret;
}
/*
@@ -1223,7 +1221,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
block_group->space_info->total_bytes -= block_group->length;
block_group->space_info->bytes_readonly -=
(block_group->length - block_group->zone_unusable);
btrfs_space_info_update_bytes_zone_unusable(fs_info, block_group->space_info,
btrfs_space_info_update_bytes_zone_unusable(block_group->space_info,
-block_group->zone_unusable);
block_group->space_info->disk_total -= block_group->length * factor;
@@ -1396,8 +1394,7 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
if (btrfs_is_zoned(cache->fs_info)) {
/* Migrate zone_unusable bytes to readonly */
sinfo->bytes_readonly += cache->zone_unusable;
btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo,
-cache->zone_unusable);
btrfs_space_info_update_bytes_zone_unusable(sinfo, -cache->zone_unusable);
cache->zone_unusable = 0;
}
cache->ro++;
@@ -1645,8 +1642,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
btrfs_space_info_update_bytes_pinned(fs_info, space_info,
-block_group->pinned);
btrfs_space_info_update_bytes_pinned(space_info, -block_group->pinned);
space_info->bytes_readonly += block_group->pinned;
block_group->pinned = 0;
@@ -2672,7 +2668,6 @@ static int insert_dev_extent(struct btrfs_trans_handle *trans,
btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
btrfs_set_dev_extent_length(leaf, extent, num_bytes);
btrfs_mark_buffer_dirty(trans, leaf);
out:
btrfs_free_path(path);
return ret;
@@ -3060,8 +3055,7 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
(cache->alloc_offset - cache->used - cache->pinned -
cache->reserved) +
(cache->length - cache->zone_capacity);
btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo,
cache->zone_unusable);
btrfs_space_info_update_bytes_zone_unusable(sinfo, cache->zone_unusable);
sinfo->bytes_readonly -= cache->zone_unusable;
}
num_bytes = cache->length - cache->reserved -
@@ -3123,7 +3117,6 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
cache->global_root_id);
btrfs_set_stack_block_group_flags(&bgi, cache->flags);
write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
btrfs_mark_buffer_dirty(trans, leaf);
fail:
btrfs_release_path(path);
/*
@@ -3699,7 +3692,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
old_val -= num_bytes;
cache->used = old_val;
cache->pinned += num_bytes;
btrfs_space_info_update_bytes_pinned(info, space_info, num_bytes);
btrfs_space_info_update_bytes_pinned(space_info, num_bytes);
space_info->bytes_used -= num_bytes;
space_info->disk_used -= num_bytes * factor;
if (READ_ONCE(space_info->periodic_reclaim))
@@ -3781,8 +3774,7 @@ int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
space_info->bytes_reserved += num_bytes;
trace_btrfs_space_reservation(cache->fs_info, "space_info",
space_info->flags, num_bytes, 1);
btrfs_space_info_update_bytes_may_use(cache->fs_info,
space_info, -ram_bytes);
btrfs_space_info_update_bytes_may_use(space_info, -ram_bytes);
if (delalloc)
cache->delalloc_bytes += num_bytes;

View File

@@ -150,9 +150,7 @@ static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
spin_unlock(&dest->lock);
}
if (num_bytes)
btrfs_space_info_free_bytes_may_use(fs_info,
space_info,
num_bytes);
btrfs_space_info_free_bytes_may_use(space_info, num_bytes);
}
if (qgroup_to_release_ret)
*qgroup_to_release_ret = qgroup_to_release;
@@ -383,13 +381,11 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
if (block_rsv->reserved < block_rsv->size) {
num_bytes = block_rsv->size - block_rsv->reserved;
btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
num_bytes);
btrfs_space_info_update_bytes_may_use(sinfo, num_bytes);
block_rsv->reserved = block_rsv->size;
} else if (block_rsv->reserved > block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
-num_bytes);
btrfs_space_info_update_bytes_may_use(sinfo, -num_bytes);
block_rsv->reserved = block_rsv->size;
btrfs_try_granting_tickets(fs_info, sinfo);
}

View File

@@ -526,7 +526,7 @@ bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
u32 bio_offset, struct bio_vec *bv);
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
struct btrfs_file_extent *file_extent,
bool nowait, bool strict);
bool nowait);
void btrfs_del_delalloc_inode(struct btrfs_inode *inode);
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);

View File

@@ -37,19 +37,6 @@ static int push_node_left(struct btrfs_trans_handle *trans,
static int balance_node_right(struct btrfs_trans_handle *trans,
struct extent_buffer *dst_buf,
struct extent_buffer *src_buf);
static const struct btrfs_csums {
u16 size;
const char name[10];
const char driver[12];
} btrfs_csums[] = {
[BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
[BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
[BTRFS_CSUM_TYPE_SHA256] = { .size = 32, .name = "sha256" },
[BTRFS_CSUM_TYPE_BLAKE2] = { .size = 32, .name = "blake2b",
.driver = "blake2b-256" },
};
/*
* The leaf data grows from end-to-front in the node. this returns the address
* of the start of the last item, which is the stop of the leaf data stack.
@@ -148,44 +135,6 @@ static inline void copy_leaf_items(const struct extent_buffer *dst,
nr_items * sizeof(struct btrfs_item));
}
/* This exists for btrfs-progs usages. */
u16 btrfs_csum_type_size(u16 type)
{
return btrfs_csums[type].size;
}
int btrfs_super_csum_size(const struct btrfs_super_block *s)
{
u16 t = btrfs_super_csum_type(s);
/*
* csum type is validated at mount time
*/
return btrfs_csum_type_size(t);
}
const char *btrfs_super_csum_name(u16 csum_type)
{
/* csum type is validated at mount time */
return btrfs_csums[csum_type].name;
}
/*
* Return driver name if defined, otherwise the name that's also a valid driver
* name
*/
const char *btrfs_super_csum_driver(u16 csum_type)
{
/* csum type is validated at mount time */
return btrfs_csums[csum_type].driver[0] ?
btrfs_csums[csum_type].driver :
btrfs_csums[csum_type].name;
}
size_t __attribute_const__ btrfs_get_num_csums(void)
{
return ARRAY_SIZE(btrfs_csums);
}
struct btrfs_path *btrfs_alloc_path(void)
{
might_sleep();
@@ -225,22 +174,6 @@ noinline void btrfs_release_path(struct btrfs_path *p)
}
}
/*
* We want the transaction abort to print stack trace only for errors where the
* cause could be a bug, eg. due to ENOSPC, and not for common errors that are
* caused by external factors.
*/
bool __cold abort_should_print_stack(int error)
{
switch (error) {
case -EIO:
case -EROFS:
case -ENOMEM:
return false;
}
return true;
}
/*
* safely gets a reference on the root node of a tree. A lock
* is not taken, so a concurrent writer may put a different node
@@ -3900,6 +3833,7 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
key.type != BTRFS_RAID_STRIPE_KEY &&
key.type != BTRFS_EXTENT_CSUM_KEY);
if (btrfs_leaf_free_space(leaf) >= ins_len)

View File

@@ -7,7 +7,6 @@
#define BTRFS_CTREE_H
#include "linux/cleanup.h"
#include <linux/pagemap.h>
#include <linux/spinlock.h>
#include <linux/rbtree.h>
#include <linux/mutex.h>
@@ -506,20 +505,6 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
return BTRFS_MAX_ITEM_SIZE(info) - sizeof(struct btrfs_dir_item);
}
#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \
((bytes) >> (fs_info)->sectorsize_bits)
static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping)
{
return mapping_gfp_constraint(mapping, ~__GFP_FS);
}
void btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, u64 start, u64 end);
int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 *actual_bytes);
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
/* ctree.c */
int __init btrfs_ctree_init(void);
void __cold btrfs_ctree_exit(void);
@@ -756,18 +741,4 @@ static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
}
u16 btrfs_csum_type_size(u16 type);
int btrfs_super_csum_size(const struct btrfs_super_block *s);
const char *btrfs_super_csum_name(u16 csum_type);
const char *btrfs_super_csum_driver(u16 csum_type);
size_t __attribute_const__ btrfs_get_num_csums(void);
/*
* We use folio flag owner_2 to indicate there is an ordered extent with
* unfinished IO.
*/
#define folio_test_ordered(folio) folio_test_owner_2(folio)
#define folio_set_ordered(folio) folio_set_owner_2(folio)
#define folio_clear_ordered(folio) folio_clear_owner_2(folio)
#endif

View File

@@ -176,7 +176,7 @@ void btrfs_free_reserved_data_space_noquota(struct btrfs_fs_info *fs_info,
ASSERT(IS_ALIGNED(len, fs_info->sectorsize));
data_sinfo = fs_info->data_sinfo;
btrfs_space_info_free_bytes_may_use(fs_info, data_sinfo, len);
btrfs_space_info_free_bytes_may_use(data_sinfo, len);
}
/*

View File

@@ -366,40 +366,35 @@ static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
return NULL;
}
static int btrfs_delayed_item_cmp(const struct rb_node *new,
const struct rb_node *exist)
{
const struct btrfs_delayed_item *new_item =
rb_entry(new, struct btrfs_delayed_item, rb_node);
const struct btrfs_delayed_item *exist_item =
rb_entry(exist, struct btrfs_delayed_item, rb_node);
if (new_item->index < exist_item->index)
return -1;
if (new_item->index > exist_item->index)
return 1;
return 0;
}
static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
struct btrfs_delayed_item *ins)
{
struct rb_node **p, *node;
struct rb_node *parent_node = NULL;
struct rb_root_cached *root;
struct btrfs_delayed_item *item;
bool leftmost = true;
struct rb_node *exist;
if (ins->type == BTRFS_DELAYED_INSERTION_ITEM)
root = &delayed_node->ins_root;
else
root = &delayed_node->del_root;
p = &root->rb_root.rb_node;
node = &ins->rb_node;
while (*p) {
parent_node = *p;
item = rb_entry(parent_node, struct btrfs_delayed_item,
rb_node);
if (item->index < ins->index) {
p = &(*p)->rb_right;
leftmost = false;
} else if (item->index > ins->index) {
p = &(*p)->rb_left;
} else {
return -EEXIST;
}
}
rb_link_node(node, parent_node, p);
rb_insert_color_cached(node, root, leftmost);
exist = rb_find_add_cached(&ins->rb_node, root, btrfs_delayed_item_cmp);
if (exist)
return -EEXIST;
if (ins->type == BTRFS_DELAYED_INSERTION_ITEM &&
ins->index >= delayed_node->index_cnt)
@@ -1038,7 +1033,6 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_inode_item);
write_extent_buffer(leaf, &node->inode_item, (unsigned long)inode_item,
sizeof(struct btrfs_inode_item));
btrfs_mark_buffer_dirty(trans, leaf);
if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
goto out;
@@ -1561,8 +1555,7 @@ release_node:
return ret;
}
static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_node *node,
static int btrfs_delete_delayed_insertion_item(struct btrfs_delayed_node *node,
u64 index)
{
struct btrfs_delayed_item *item;
@@ -1620,7 +1613,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
if (IS_ERR(node))
return PTR_ERR(node);
ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node, index);
ret = btrfs_delete_delayed_insertion_item(node, index);
if (!ret)
goto end;

View File

@@ -93,6 +93,9 @@ void btrfs_update_delayed_refs_rsv(struct btrfs_trans_handle *trans)
u64 num_bytes;
u64 reserved_bytes;
if (btrfs_is_testing(fs_info))
return;
num_bytes = btrfs_calc_delayed_ref_bytes(fs_info, trans->delayed_ref_updates);
num_bytes += btrfs_calc_delayed_ref_csum_bytes(fs_info,
trans->delayed_ref_csum_deletions);
@@ -254,7 +257,7 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
spin_unlock(&block_rsv->lock);
if (to_free > 0)
btrfs_space_info_free_bytes_may_use(fs_info, space_info, to_free);
btrfs_space_info_free_bytes_may_use(space_info, to_free);
if (refilled_bytes > 0)
trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv", 0,
@@ -265,8 +268,8 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
/*
* compare two delayed data backrefs with same bytenr and type
*/
static int comp_data_refs(struct btrfs_delayed_ref_node *ref1,
struct btrfs_delayed_ref_node *ref2)
static int comp_data_refs(const struct btrfs_delayed_ref_node *ref1,
const struct btrfs_delayed_ref_node *ref2)
{
if (ref1->data_ref.objectid < ref2->data_ref.objectid)
return -1;
@@ -279,8 +282,8 @@ static int comp_data_refs(struct btrfs_delayed_ref_node *ref1,
return 0;
}
static int comp_refs(struct btrfs_delayed_ref_node *ref1,
struct btrfs_delayed_ref_node *ref2,
static int comp_refs(const struct btrfs_delayed_ref_node *ref1,
const struct btrfs_delayed_ref_node *ref2,
bool check_seq)
{
int ret = 0;
@@ -314,34 +317,25 @@ static int comp_refs(struct btrfs_delayed_ref_node *ref1,
return 0;
}
static int cmp_refs_node(const struct rb_node *new, const struct rb_node *exist)
{
const struct btrfs_delayed_ref_node *new_node =
rb_entry(new, struct btrfs_delayed_ref_node, ref_node);
const struct btrfs_delayed_ref_node *exist_node =
rb_entry(exist, struct btrfs_delayed_ref_node, ref_node);
return comp_refs(new_node, exist_node, true);
}
static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root,
struct btrfs_delayed_ref_node *ins)
{
struct rb_node **p = &root->rb_root.rb_node;
struct rb_node *node = &ins->ref_node;
struct rb_node *parent_node = NULL;
struct btrfs_delayed_ref_node *entry;
bool leftmost = true;
struct rb_node *exist;
while (*p) {
int comp;
parent_node = *p;
entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
ref_node);
comp = comp_refs(ins, entry, true);
if (comp < 0) {
p = &(*p)->rb_left;
} else if (comp > 0) {
p = &(*p)->rb_right;
leftmost = false;
} else {
return entry;
}
}
rb_link_node(node, parent_node, p);
rb_insert_color_cached(node, root, leftmost);
exist = rb_find_add_cached(node, root, cmp_refs_node);
if (exist)
return rb_entry(exist, struct btrfs_delayed_ref_node, ref_node);
return NULL;
}
@@ -555,6 +549,32 @@ void btrfs_delete_ref_head(const struct btrfs_fs_info *fs_info,
delayed_refs->num_heads_ready--;
}
struct btrfs_delayed_ref_node *btrfs_select_delayed_ref(struct btrfs_delayed_ref_head *head)
{
struct btrfs_delayed_ref_node *ref;
lockdep_assert_held(&head->mutex);
lockdep_assert_held(&head->lock);
if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
return NULL;
/*
* Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
* This is to prevent a ref count from going down to zero, which deletes
* the extent item from the extent tree, when there still are references
* to add, which would fail because they would not find the extent item.
*/
if (!list_empty(&head->ref_add_list))
return list_first_entry(&head->ref_add_list,
struct btrfs_delayed_ref_node, add_list);
ref = rb_entry(rb_first_cached(&head->ref_tree),
struct btrfs_delayed_ref_node, ref_node);
ASSERT(list_empty(&ref->add_list));
return ref;
}
/*
* Helper to insert the ref_node to the tail or merge with tail.
*
@@ -1234,6 +1254,7 @@ void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
{
struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs;
struct btrfs_fs_info *fs_info = trans->fs_info;
bool testing = btrfs_is_testing(fs_info);
spin_lock(&delayed_refs->lock);
while (true) {
@@ -1263,7 +1284,7 @@ void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
spin_unlock(&delayed_refs->lock);
mutex_unlock(&head->mutex);
if (pin_bytes) {
if (!testing && pin_bytes) {
struct btrfs_block_group *bg;
bg = btrfs_lookup_block_group(fs_info, head->bytenr);
@@ -1281,8 +1302,7 @@ void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
spin_lock(&bg->space_info->lock);
spin_lock(&bg->lock);
bg->pinned += head->num_bytes;
btrfs_space_info_update_bytes_pinned(fs_info,
bg->space_info,
btrfs_space_info_update_bytes_pinned(bg->space_info,
head->num_bytes);
bg->reserved -= head->num_bytes;
bg->space_info->bytes_reserved -= head->num_bytes;
@@ -1295,12 +1315,15 @@ void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
btrfs_error_unpin_extent_range(fs_info, head->bytenr,
head->bytenr + head->num_bytes - 1);
}
btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
if (!testing)
btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
btrfs_put_delayed_ref_head(head);
cond_resched();
spin_lock(&delayed_refs->lock);
}
btrfs_qgroup_destroy_extent_records(trans);
if (!testing)
btrfs_qgroup_destroy_extent_records(trans);
spin_unlock(&delayed_refs->lock);
}

View File

@@ -402,6 +402,7 @@ struct btrfs_delayed_ref_head *btrfs_select_ref_head(
struct btrfs_delayed_ref_root *delayed_refs);
void btrfs_unselect_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
struct btrfs_delayed_ref_head *head);
struct btrfs_delayed_ref_node *btrfs_select_delayed_ref(struct btrfs_delayed_ref_head *head);
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq);

View File

@@ -440,9 +440,6 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans)
dev_replace->cursor_right);
dev_replace->item_needs_writeback = 0;
up_write(&dev_replace->rwsem);
btrfs_mark_buffer_dirty(trans, eb);
out:
btrfs_free_path(path);

View File

@@ -92,7 +92,6 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
write_extent_buffer(leaf, name, name_ptr, name_len);
write_extent_buffer(leaf, data, data_ptr, data_len);
btrfs_mark_buffer_dirty(trans, path->nodes[0]);
return ret;
}
@@ -152,7 +151,6 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
name_ptr = (unsigned long)(dir_item + 1);
write_extent_buffer(leaf, name->name, name_ptr, name->len);
btrfs_mark_buffer_dirty(trans, leaf);
second_insert:
/* FIXME, use some real flag for selecting the extra index */

View File

@@ -248,8 +248,7 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
len = min(len, em->len - (start - em->start));
block_start = extent_map_block_start(em) + (start - em->start);
if (can_nocow_extent(inode, start, &len,
&file_extent, false, false) == 1) {
if (can_nocow_extent(inode, start, &len, &file_extent, false) == 1) {
bg = btrfs_inc_nocow_writers(fs_info, block_start);
if (bg)
can_nocow = true;

View File

@@ -226,7 +226,7 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb,
while (1) {
clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num, check);
ret = read_extent_buffer_pages(eb, mirror_num, check);
if (!ret)
break;
@@ -1258,6 +1258,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
{
struct percpu_counter *em_counter = &fs_info->evictable_extent_maps;
percpu_counter_destroy(&fs_info->stats_read_blocks);
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
percpu_counter_destroy(&fs_info->delalloc_bytes);
percpu_counter_destroy(&fs_info->ordered_bytes);
@@ -2327,6 +2328,71 @@ out:
return ret;
}
static int validate_sys_chunk_array(const struct btrfs_fs_info *fs_info,
const struct btrfs_super_block *sb)
{
unsigned int cur = 0; /* Offset inside the sys chunk array */
/*
* At sb read time, fs_info is not fully initialized. Thus we have
* to use super block sectorsize, which should have been validated.
*/
const u32 sectorsize = btrfs_super_sectorsize(sb);
u32 sys_array_size = btrfs_super_sys_array_size(sb);
if (sys_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
btrfs_err(fs_info, "system chunk array too big %u > %u",
sys_array_size, BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
return -EUCLEAN;
}
while (cur < sys_array_size) {
struct btrfs_disk_key *disk_key;
struct btrfs_chunk *chunk;
struct btrfs_key key;
u64 type;
u16 num_stripes;
u32 len;
int ret;
disk_key = (struct btrfs_disk_key *)(sb->sys_chunk_array + cur);
len = sizeof(*disk_key);
if (cur + len > sys_array_size)
goto short_read;
cur += len;
btrfs_disk_key_to_cpu(&key, disk_key);
if (key.type != BTRFS_CHUNK_ITEM_KEY) {
btrfs_err(fs_info,
"unexpected item type %u in sys_array at offset %u",
key.type, cur);
return -EUCLEAN;
}
chunk = (struct btrfs_chunk *)(sb->sys_chunk_array + cur);
num_stripes = btrfs_stack_chunk_num_stripes(chunk);
if (cur + btrfs_chunk_item_size(num_stripes) > sys_array_size)
goto short_read;
type = btrfs_stack_chunk_type(chunk);
if (!(type & BTRFS_BLOCK_GROUP_SYSTEM)) {
btrfs_err(fs_info,
"invalid chunk type %llu in sys_array at offset %u",
type, cur);
return -EUCLEAN;
}
ret = btrfs_check_chunk_valid(fs_info, NULL, chunk, key.offset,
sectorsize);
if (ret < 0)
return ret;
cur += btrfs_chunk_item_size(num_stripes);
}
return 0;
short_read:
btrfs_err(fs_info,
"super block sys chunk array short read, cur=%u sys_array_size=%u",
cur, sys_array_size);
return -EUCLEAN;
}
/*
* Real super block validation
* NOTE: super csum type and incompat features will not be checked here.
@@ -2495,6 +2561,8 @@ int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
ret = validate_sys_chunk_array(fs_info, sb);
/*
* Obvious sys_chunk_array corruptions, it must hold at least one key
* and one chunk
@@ -2856,6 +2924,10 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
if (ret)
return ret;
ret = percpu_counter_init(&fs_info->stats_read_blocks, 0, GFP_KERNEL);
if (ret)
return ret;
fs_info->dirty_metadata_batch = PAGE_SIZE *
(1 + ilog2(nr_cpu_ids));
@@ -3321,6 +3393,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
fs_info->sectors_per_page = (PAGE_SIZE >> fs_info->sectorsize_bits);
fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
fs_info->stripesize = stripesize;
fs_info->fs_devices->fs_info = fs_info;
/*
* Handle the space caching options appropriately now that we have the

View File

@@ -96,9 +96,6 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
/*
* This function is used to grab the root, and avoid it is freed when we
* access it. But it doesn't ensure that the tree is not dropped.
*
* If you want to ensure the whole tree is safe, you should use
* fs_info->subvol_srcu
*/
static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
{

View File

@@ -570,7 +570,6 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
btrfs_set_extent_data_ref_count(leaf, ref, num_refs);
}
}
btrfs_mark_buffer_dirty(trans, leaf);
ret = 0;
fail:
btrfs_release_path(path);
@@ -618,7 +617,6 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
else if (key.type == BTRFS_SHARED_DATA_REF_KEY)
btrfs_set_shared_data_ref_count(leaf, ref2, num_refs);
btrfs_mark_buffer_dirty(trans, leaf);
}
return ret;
}
@@ -1050,7 +1048,6 @@ void setup_inline_extent_backref(struct btrfs_trans_handle *trans,
} else {
btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
}
btrfs_mark_buffer_dirty(trans, leaf);
}
static int lookup_extent_backref(struct btrfs_trans_handle *trans,
@@ -1195,7 +1192,6 @@ static noinline_for_stack int update_inline_extent_backref(
item_size -= size;
btrfs_truncate_item(trans, path, item_size, 1);
}
btrfs_mark_buffer_dirty(trans, leaf);
return 0;
}
@@ -1260,12 +1256,12 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
{
int j, ret = 0;
u64 bytes_left, end;
u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT);
u64 aligned_start = ALIGN(start, SECTOR_SIZE);
/* Adjust the range to be aligned to 512B sectors if necessary. */
if (start != aligned_start) {
len -= aligned_start - start;
len = round_down(len, 1 << SECTOR_SHIFT);
len = round_down(len, SECTOR_SIZE);
start = aligned_start;
}
@@ -1527,7 +1523,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
if (extent_op)
__run_delayed_extent_op(extent_op, leaf, item);
btrfs_mark_buffer_dirty(trans, leaf);
btrfs_release_path(path);
/* now insert the actual backref */
@@ -1711,8 +1706,6 @@ again:
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
__run_delayed_extent_op(extent_op, leaf, ei);
btrfs_mark_buffer_dirty(trans, leaf);
out:
btrfs_free_path(path);
return ret;
@@ -1803,30 +1796,6 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
return ret;
}
static inline struct btrfs_delayed_ref_node *
select_delayed_ref(struct btrfs_delayed_ref_head *head)
{
struct btrfs_delayed_ref_node *ref;
if (RB_EMPTY_ROOT(&head->ref_tree.rb_root))
return NULL;
/*
* Select a delayed ref of type BTRFS_ADD_DELAYED_REF first.
* This is to prevent a ref count from going down to zero, which deletes
* the extent item from the extent tree, when there still are references
* to add, which would fail because they would not find the extent item.
*/
if (!list_empty(&head->ref_add_list))
return list_first_entry(&head->ref_add_list,
struct btrfs_delayed_ref_node, add_list);
ref = rb_entry(rb_first_cached(&head->ref_tree),
struct btrfs_delayed_ref_node, ref_node);
ASSERT(list_empty(&ref->add_list));
return ref;
}
static struct btrfs_delayed_extent_op *cleanup_extent_op(
struct btrfs_delayed_ref_head *head)
{
@@ -1959,7 +1928,7 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
lockdep_assert_held(&locked_ref->mutex);
lockdep_assert_held(&locked_ref->lock);
while ((ref = select_delayed_ref(locked_ref))) {
while ((ref = btrfs_select_delayed_ref(locked_ref))) {
if (ref->seq &&
btrfs_check_delayed_seq(fs_info, ref->seq)) {
spin_unlock(&locked_ref->lock);
@@ -2230,10 +2199,11 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
return ret;
}
static noinline int check_delayed_ref(struct btrfs_root *root,
static noinline int check_delayed_ref(struct btrfs_inode *inode,
struct btrfs_path *path,
u64 objectid, u64 offset, u64 bytenr)
u64 offset, u64 bytenr)
{
struct btrfs_root *root = inode->root;
struct btrfs_delayed_ref_head *head;
struct btrfs_delayed_ref_node *ref;
struct btrfs_delayed_ref_root *delayed_refs;
@@ -2307,7 +2277,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
* then we have a cross reference.
*/
if (ref->ref_root != btrfs_root_id(root) ||
ref_owner != objectid || ref_offset != offset) {
ref_owner != btrfs_ino(inode) || ref_offset != offset) {
ret = 1;
break;
}
@@ -2318,11 +2288,53 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
return ret;
}
static noinline int check_committed_ref(struct btrfs_root *root,
/*
* Check if there are references for a data extent other than the one belonging
* to the given inode and offset.
*
* @inode: The only inode we expect to find associated with the data extent.
* @path: A path to use for searching the extent tree.
* @offset: The only offset we expect to find associated with the data extent.
* @bytenr: The logical address of the data extent.
*
* When the extent does not have any other references other than the one we
* expect to find, we always return a value of 0 with the path having a locked
* leaf that contains the extent's extent item - this is necessary to ensure
* we don't race with a task running delayed references, and our caller must
* have such a path when calling check_delayed_ref() - it must lock a delayed
* ref head while holding the leaf locked. In case the extent item is not found
* in the extent tree, we return -ENOENT with the path having the leaf (locked)
* where the extent item should be, in order to prevent races with another task
* running delayed references, so that we don't miss any reference when calling
* check_delayed_ref().
*
* Note: this may return false positives, and this is because we want to be
* quick here as we're called in write paths (when flushing delalloc and
* in the direct IO write path). For example we can have an extent with
* a single reference but that reference is not inlined, or we may have
* many references in the extent tree but we also have delayed references
* that cancel all the reference except the one for our inode and offset,
* but it would be expensive to do such checks and complex due to all
* locking to avoid races between the checks and flushing delayed refs,
* plus non-inline references may be located on leaves other than the one
* that contains the extent item in the extent tree. The important thing
* here is to not return false negatives and that the false positives are
* not very common.
*
* Returns: 0 if there are no cross references and with the path having a locked
* leaf from the extent tree that contains the extent's extent item.
*
* 1 if there are cross references (false positives can happen).
*
* < 0 in case of an error. In case of -ENOENT the leaf in the extent
* tree where the extent item should be located at is read locked and
* accessible in the given path.
*/
static noinline int check_committed_ref(struct btrfs_inode *inode,
struct btrfs_path *path,
u64 objectid, u64 offset, u64 bytenr,
bool strict)
u64 offset, u64 bytenr)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bytenr);
struct extent_buffer *leaf;
@@ -2341,35 +2353,32 @@ static noinline int check_committed_ref(struct btrfs_root *root,
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
if (ret < 0)
goto out;
return ret;
if (ret == 0) {
/*
* Key with offset -1 found, there would have to exist an extent
* item with such offset, but this is out of the valid range.
*/
ret = -EUCLEAN;
goto out;
return -EUCLEAN;
}
ret = -ENOENT;
if (path->slots[0] == 0)
goto out;
return -ENOENT;
path->slots[0]--;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY)
goto out;
return -ENOENT;
ret = 1;
item_size = btrfs_item_size(leaf, path->slots[0]);
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
expected_size = sizeof(*ei) + btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY);
/* No inline refs; we need to bail before checking for owner ref. */
if (item_size == sizeof(*ei))
goto out;
return 1;
/* Check for an owner ref; skip over it to the real inline refs. */
iref = (struct btrfs_extent_inline_ref *)(ei + 1);
@@ -2377,56 +2386,69 @@ static noinline int check_committed_ref(struct btrfs_root *root,
if (btrfs_fs_incompat(fs_info, SIMPLE_QUOTA) && type == BTRFS_EXTENT_OWNER_REF_KEY) {
expected_size += btrfs_extent_inline_ref_size(BTRFS_EXTENT_OWNER_REF_KEY);
iref = (struct btrfs_extent_inline_ref *)(iref + 1);
type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
}
/* If extent item has more than 1 inline ref then it's shared */
if (item_size != expected_size)
goto out;
/*
* If extent created before last snapshot => it's shared unless the
* snapshot has been deleted. Use the heuristic if strict is false.
*/
if (!strict &&
(btrfs_extent_generation(leaf, ei) <=
btrfs_root_last_snapshot(&root->root_item)))
goto out;
return 1;
/* If this extent has SHARED_DATA_REF then it's shared */
type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_DATA);
if (type != BTRFS_EXTENT_DATA_REF_KEY)
goto out;
return 1;
ref = (struct btrfs_extent_data_ref *)(&iref->offset);
if (btrfs_extent_refs(leaf, ei) !=
btrfs_extent_data_ref_count(leaf, ref) ||
btrfs_extent_data_ref_root(leaf, ref) != btrfs_root_id(root) ||
btrfs_extent_data_ref_objectid(leaf, ref) != objectid ||
btrfs_extent_data_ref_objectid(leaf, ref) != btrfs_ino(inode) ||
btrfs_extent_data_ref_offset(leaf, ref) != offset)
goto out;
return 1;
ret = 0;
out:
return ret;
return 0;
}
int btrfs_cross_ref_exist(struct btrfs_root *root, u64 objectid, u64 offset,
u64 bytenr, bool strict, struct btrfs_path *path)
int btrfs_cross_ref_exist(struct btrfs_inode *inode, u64 offset,
u64 bytenr, struct btrfs_path *path)
{
int ret;
do {
ret = check_committed_ref(root, path, objectid,
offset, bytenr, strict);
ret = check_committed_ref(inode, path, offset, bytenr);
if (ret && ret != -ENOENT)
goto out;
ret = check_delayed_ref(root, path, objectid, offset, bytenr);
/*
* The path must have a locked leaf from the extent tree where
* the extent item for our extent is located, in case it exists,
* or where it should be located in case it doesn't exist yet
* because it's new and its delayed ref was not yet flushed.
* We need to lock the delayed ref head at check_delayed_ref(),
* if one exists, while holding the leaf locked in order to not
* race with delayed ref flushing, missing references and
* incorrectly reporting that the extent is not shared.
*/
if (IS_ENABLED(CONFIG_BTRFS_ASSERT)) {
struct extent_buffer *leaf = path->nodes[0];
ASSERT(leaf != NULL);
btrfs_assert_tree_read_locked(leaf);
if (ret != -ENOENT) {
struct btrfs_key key;
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
ASSERT(key.objectid == bytenr);
ASSERT(key.type == BTRFS_EXTENT_ITEM_KEY);
}
}
ret = check_delayed_ref(inode, path, offset, bytenr);
} while (ret == -EAGAIN && !path->nowait);
out:
btrfs_release_path(path);
if (btrfs_is_data_reloc_root(root))
if (btrfs_is_data_reloc_root(inode->root))
WARN_ON(ret > 0);
return ret;
}
@@ -2571,13 +2593,10 @@ static int pin_down_extent(struct btrfs_trans_handle *trans,
struct btrfs_block_group *cache,
u64 bytenr, u64 num_bytes, int reserved)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock);
cache->pinned += num_bytes;
btrfs_space_info_update_bytes_pinned(fs_info, cache->space_info,
num_bytes);
btrfs_space_info_update_bytes_pinned(cache->space_info, num_bytes);
if (reserved) {
cache->reserved -= num_bytes;
cache->space_info->bytes_reserved -= num_bytes;
@@ -2724,15 +2743,15 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
{
struct btrfs_block_group *cache = NULL;
struct btrfs_space_info *space_info;
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
struct btrfs_free_cluster *cluster = NULL;
u64 len;
u64 total_unpinned = 0;
u64 empty_cluster = 0;
bool readonly;
int ret = 0;
while (start <= end) {
u64 len;
readonly = false;
if (!cache ||
start >= cache->start + cache->length) {
@@ -2778,37 +2797,19 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
spin_lock(&space_info->lock);
spin_lock(&cache->lock);
cache->pinned -= len;
btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len);
btrfs_space_info_update_bytes_pinned(space_info, -len);
space_info->max_extent_size = 0;
if (cache->ro) {
space_info->bytes_readonly += len;
readonly = true;
} else if (btrfs_is_zoned(fs_info)) {
/* Need reset before reusing in a zoned block group */
btrfs_space_info_update_bytes_zone_unusable(fs_info, space_info,
len);
btrfs_space_info_update_bytes_zone_unusable(space_info, len);
readonly = true;
}
spin_unlock(&cache->lock);
if (!readonly && return_free_space &&
global_rsv->space_info == space_info) {
spin_lock(&global_rsv->lock);
if (!global_rsv->full) {
u64 to_add = min(len, global_rsv->size -
global_rsv->reserved);
global_rsv->reserved += to_add;
btrfs_space_info_update_bytes_may_use(fs_info,
space_info, to_add);
if (global_rsv->reserved >= global_rsv->size)
global_rsv->full = 1;
len -= to_add;
}
spin_unlock(&global_rsv->lock);
}
/* Add to any tickets we may have */
if (!readonly && return_free_space && len)
btrfs_try_granting_tickets(fs_info, space_info);
if (!readonly && return_free_space)
btrfs_return_free_space(space_info, len);
spin_unlock(&space_info->lock);
}
@@ -3259,7 +3260,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
} else {
btrfs_set_extent_refs(leaf, ei, refs);
btrfs_mark_buffer_dirty(trans, leaf);
}
if (found_extent) {
ret = remove_extent_backref(trans, extent_root, path,
@@ -4827,7 +4827,6 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_set_extent_data_ref_count(leaf, ref, ref_mod);
}
btrfs_mark_buffer_dirty(trans, path->nodes[0]);
btrfs_free_path(path);
return alloc_reserved_extent(trans, ins->objectid, ins->offset);
@@ -4902,7 +4901,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
btrfs_set_extent_inline_ref_offset(leaf, iref, node->ref_root);
}
btrfs_mark_buffer_dirty(trans, leaf);
btrfs_free_path(path);
return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);

Some files were not shown because too many files have changed in this diff Show More