Merge tag 'for-6.10-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "This update brings a few minor performance improvements, otherwise
  there's a lot of refactoring, cleanups and other sort of not user
  visible changes.

  Performance improvements:

   - inline b-tree locking functions, improvement in metadata-heavy
     changes

   - relax locking on a range that's being reflinked, allows read
     operations to run in parallel

   - speed up NOCOW write checks (throughput +9% on a sample test)

   - extent locking ranges have been reduced in several places, namely
     around delayed ref processing

  Core:

   - more page to folio conversions:
      - relocation
      - send
      - compression
      - inline extent handling
      - super block write and wait

   - extent_map structure optimizations:
      - reduced structure size
      - code simplifications
      - add shrinker for allocated objects, the numbers can go high and
        could exhaust memory on smaller systems (reported) as they may
        not get an opportunity to be freed fast enough

   - extent locking optimizations:
      - reduce locking ranges where it does not seem to be necessary and
        are safe due to other means of synchronization
      - potential improvements due to lower contention,
        allocation/freeing and state management operations of extent
        state tracking structures

   - delayed ref cleanups and simplifications

   - updated trace points

   - improved error handling, warnings and assertions

   - cleanups and refactoring, unification of error handling paths"

* tag 'for-6.10-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (122 commits)
  btrfs: qgroup: fix initialization of auto inherit array
  btrfs: count super block write errors in device instead of tracking folio error state
  btrfs: use the folio iterator in btrfs_end_super_write()
  btrfs: convert super block writes to folio in write_dev_supers()
  btrfs: convert super block writes to folio in wait_dev_supers()
  bio: Export bio_add_folio_nofail to modules
  btrfs: remove duplicate included header from fs.h
  btrfs: add a cached state to extent_clear_unlock_delalloc
  btrfs: push extent lock down in submit_one_async_extent
  btrfs: push lock_extent down in cow_file_range()
  btrfs: move can_cow_file_range_inline() outside of the extent lock
  btrfs: push lock_extent into cow_file_range_inline
  btrfs: push extent lock into cow_file_range
  btrfs: push extent lock into run_delalloc_cow
  btrfs: remove unlock_extent from run_delalloc_compressed
  btrfs: push extent lock down in run_delalloc_nocow
  btrfs: adjust while loop condition in run_delalloc_nocow
  btrfs: push extent lock into run_delalloc_nocow
  btrfs: push the extent lock into btrfs_run_delalloc_range
  btrfs: lock extent when doing inline extent in compression
  ...
This commit is contained in:
Linus Torvalds
2024-05-14 17:25:36 -07:00
52 changed files with 2650 additions and 2357 deletions

View File

@@ -1136,6 +1136,7 @@ void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len,
WARN_ON_ONCE(off > UINT_MAX);
__bio_add_page(bio, &folio->page, len, off);
}
EXPORT_SYMBOL_GPL(bio_add_folio_nofail);
/**
* bio_add_folio - Attempt to add part of a folio to a bio.

View File

@@ -261,7 +261,7 @@ static void update_share_count(struct share_check *sc, int oldcount,
else if (oldcount < 1 && newcount > 0)
sc->share_count++;
if (newref->root_id == sc->root->root_key.objectid &&
if (newref->root_id == btrfs_root_id(sc->root) &&
newref->wanted_disk_byte == sc->data_bytenr &&
newref->key_for_search.objectid == sc->inum)
sc->self_ref_count += newref->count;
@@ -769,7 +769,7 @@ static int resolve_indirect_refs(struct btrfs_backref_walk_ctx *ctx,
continue;
}
if (sc && ref->root_id != sc->root->root_key.objectid) {
if (sc && ref->root_id != btrfs_root_id(sc->root)) {
free_pref(ref);
ret = BACKREF_FOUND_SHARED;
goto out;
@@ -919,40 +919,38 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
switch (node->type) {
case BTRFS_TREE_BLOCK_REF_KEY: {
/* NORMAL INDIRECT METADATA backref */
struct btrfs_delayed_tree_ref *ref;
struct btrfs_key *key_ptr = NULL;
/* The owner of a tree block ref is the level. */
int level = btrfs_delayed_ref_owner(node);
if (head->extent_op && head->extent_op->update_key) {
btrfs_disk_key_to_cpu(&key, &head->extent_op->key);
key_ptr = &key;
}
ref = btrfs_delayed_node_to_tree_ref(node);
ret = add_indirect_ref(fs_info, preftrees, ref->root,
key_ptr, ref->level + 1,
node->bytenr, count, sc,
GFP_ATOMIC);
ret = add_indirect_ref(fs_info, preftrees, node->ref_root,
key_ptr, level + 1, node->bytenr,
count, sc, GFP_ATOMIC);
break;
}
case BTRFS_SHARED_BLOCK_REF_KEY: {
/* SHARED DIRECT METADATA backref */
struct btrfs_delayed_tree_ref *ref;
/*
* SHARED DIRECT METADATA backref
*
* The owner of a tree block ref is the level.
*/
int level = btrfs_delayed_ref_owner(node);
ref = btrfs_delayed_node_to_tree_ref(node);
ret = add_direct_ref(fs_info, preftrees, ref->level + 1,
ref->parent, node->bytenr, count,
ret = add_direct_ref(fs_info, preftrees, level + 1,
node->parent, node->bytenr, count,
sc, GFP_ATOMIC);
break;
}
case BTRFS_EXTENT_DATA_REF_KEY: {
/* NORMAL INDIRECT DATA backref */
struct btrfs_delayed_data_ref *ref;
ref = btrfs_delayed_node_to_data_ref(node);
key.objectid = ref->objectid;
key.objectid = btrfs_delayed_ref_owner(node);
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = ref->offset;
key.offset = btrfs_delayed_ref_offset(node);
/*
* If we have a share check context and a reference for
@@ -972,18 +970,14 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
if (sc && count < 0)
sc->have_delayed_delete_refs = true;
ret = add_indirect_ref(fs_info, preftrees, ref->root,
ret = add_indirect_ref(fs_info, preftrees, node->ref_root,
&key, 0, node->bytenr, count, sc,
GFP_ATOMIC);
break;
}
case BTRFS_SHARED_DATA_REF_KEY: {
/* SHARED DIRECT FULL backref */
struct btrfs_delayed_data_ref *ref;
ref = btrfs_delayed_node_to_data_ref(node);
ret = add_direct_ref(fs_info, preftrees, 0, ref->parent,
ret = add_direct_ref(fs_info, preftrees, 0, node->parent,
node->bytenr, count, sc,
GFP_ATOMIC);
break;
@@ -2629,7 +2623,7 @@ static int iterate_inode_refs(u64 inum, struct inode_fs_paths *ipath)
btrfs_debug(fs_root->fs_info,
"following ref at offset %u for inode %llu in tree %llu",
cur, found_key.objectid,
fs_root->root_key.objectid);
btrfs_root_id(fs_root));
ret = inode_to_path(parent, name_len,
(unsigned long)(iref + 1), eb, ipath);
if (ret)
@@ -3361,7 +3355,7 @@ static int handle_indirect_tree_backref(struct btrfs_trans_handle *trans,
if (btrfs_node_blockptr(eb, path->slots[level]) != cur->bytenr) {
btrfs_err(fs_info,
"couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
cur->bytenr, level - 1, root->root_key.objectid,
cur->bytenr, level - 1, btrfs_root_id(root),
tree_key->objectid, tree_key->type, tree_key->offset);
btrfs_put_root(root);
ret = -ENOENT;

View File

@@ -341,9 +341,9 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
read_lock(&fs_info->global_root_lock);
rbtree_postorder_for_each_entry_safe(root, tmp, &fs_info->global_root_tree,
rb_node) {
if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID ||
root->root_key.objectid == BTRFS_CSUM_TREE_OBJECTID ||
root->root_key.objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) {
if (btrfs_root_id(root) == BTRFS_EXTENT_TREE_OBJECTID ||
btrfs_root_id(root) == BTRFS_CSUM_TREE_OBJECTID ||
btrfs_root_id(root) == BTRFS_FREE_SPACE_TREE_OBJECTID) {
num_bytes += btrfs_root_used(&root->root_item);
min_items++;
}
@@ -406,7 +406,7 @@ void btrfs_init_root_block_rsv(struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
switch (root->root_key.objectid) {
switch (btrfs_root_id(root)) {
case BTRFS_CSUM_TREE_OBJECTID:
case BTRFS_EXTENT_TREE_OBJECTID:
case BTRFS_FREE_SPACE_TREE_OBJECTID:
@@ -468,8 +468,7 @@ static struct btrfs_block_rsv *get_block_rsv(
if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) ||
(root == fs_info->uuid_root) ||
(trans->adding_csums &&
root->root_key.objectid == BTRFS_CSUM_TREE_OBJECTID))
(trans->adding_csums && btrfs_root_id(root) == BTRFS_CSUM_TREE_OBJECTID))
block_rsv = trans->block_rsv;
if (!block_rsv)

View File

@@ -381,9 +381,11 @@ static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
}
/*
* Should be called while holding the inode's VFS lock in exclusive mode or in a
* context where no one else can access the inode concurrently (during inode
* creation or when loading an inode from disk).
* Should be called while holding the inode's VFS lock in exclusive mode, or
* while holding the inode's mmap lock (struct btrfs_inode::i_mmap_lock) in
* either shared or exclusive mode, or in a context where no one else can access
* the inode concurrently (during inode creation or when loading an inode from
* disk).
*/
static inline void btrfs_set_inode_full_sync(struct btrfs_inode *inode)
{
@@ -496,7 +498,6 @@ void btrfs_merge_delalloc_extent(struct btrfs_inode *inode, struct extent_state
void btrfs_split_delalloc_extent(struct btrfs_inode *inode,
struct extent_state *orig, u64 split);
void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end);
vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
void btrfs_evict_inode(struct inode *inode);
struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode);
@@ -544,6 +545,7 @@ ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter,
size_t done_before);
struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
size_t done_before);
struct btrfs_inode *btrfs_find_first_inode(struct btrfs_root *root, u64 min_ino);
extern const struct dentry_operations btrfs_dentry_operations;

View File

@@ -90,20 +90,20 @@ bool btrfs_compress_is_valid_type(const char *str, size_t len)
}
static int compression_compress_pages(int type, struct list_head *ws,
struct address_space *mapping, u64 start, struct page **pages,
unsigned long *out_pages, unsigned long *total_in,
unsigned long *total_out)
struct address_space *mapping, u64 start,
struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out)
{
switch (type) {
case BTRFS_COMPRESS_ZLIB:
return zlib_compress_pages(ws, mapping, start, pages,
out_pages, total_in, total_out);
return zlib_compress_folios(ws, mapping, start, folios,
out_folios, total_in, total_out);
case BTRFS_COMPRESS_LZO:
return lzo_compress_pages(ws, mapping, start, pages,
out_pages, total_in, total_out);
return lzo_compress_folios(ws, mapping, start, folios,
out_folios, total_in, total_out);
case BTRFS_COMPRESS_ZSTD:
return zstd_compress_pages(ws, mapping, start, pages,
out_pages, total_in, total_out);
return zstd_compress_folios(ws, mapping, start, folios,
out_folios, total_in, total_out);
case BTRFS_COMPRESS_NONE:
default:
/*
@@ -115,7 +115,7 @@ static int compression_compress_pages(int type, struct list_head *ws,
* Not a big deal, just need to inform caller that we
* haven't allocated any pages yet.
*/
*out_pages = 0;
*out_folios = 0;
return -E2BIG;
}
}
@@ -158,11 +158,11 @@ static int compression_decompress(int type, struct list_head *ws,
}
}
static void btrfs_free_compressed_pages(struct compressed_bio *cb)
static void btrfs_free_compressed_folios(struct compressed_bio *cb)
{
for (unsigned int i = 0; i < cb->nr_pages; i++)
btrfs_free_compr_page(cb->compressed_pages[i]);
kfree(cb->compressed_pages);
for (unsigned int i = 0; i < cb->nr_folios; i++)
btrfs_free_compr_folio(cb->compressed_folios[i]);
kfree(cb->compressed_folios);
}
static int btrfs_decompress_bio(struct compressed_bio *cb);
@@ -223,25 +223,25 @@ static unsigned long btrfs_compr_pool_scan(struct shrinker *sh, struct shrink_co
/*
* Common wrappers for page allocation from compression wrappers
*/
struct page *btrfs_alloc_compr_page(void)
struct folio *btrfs_alloc_compr_folio(void)
{
struct page *page = NULL;
struct folio *folio = NULL;
spin_lock(&compr_pool.lock);
if (compr_pool.count > 0) {
page = list_first_entry(&compr_pool.list, struct page, lru);
list_del_init(&page->lru);
folio = list_first_entry(&compr_pool.list, struct folio, lru);
list_del_init(&folio->lru);
compr_pool.count--;
}
spin_unlock(&compr_pool.lock);
if (page)
return page;
if (folio)
return folio;
return alloc_page(GFP_NOFS);
return folio_alloc(GFP_NOFS, 0);
}
void btrfs_free_compr_page(struct page *page)
void btrfs_free_compr_folio(struct folio *folio)
{
bool do_free = false;
@@ -249,7 +249,7 @@ void btrfs_free_compr_page(struct page *page)
if (compr_pool.count > compr_pool.thresh) {
do_free = true;
} else {
list_add(&page->lru, &compr_pool.list);
list_add(&folio->lru, &compr_pool.list);
compr_pool.count++;
}
spin_unlock(&compr_pool.lock);
@@ -257,8 +257,8 @@ void btrfs_free_compr_page(struct page *page)
if (!do_free)
return;
ASSERT(page_ref_count(page) == 1);
put_page(page);
ASSERT(folio_ref_count(folio) == 1);
folio_put(folio);
}
static void end_bbio_comprssed_read(struct btrfs_bio *bbio)
@@ -269,7 +269,7 @@ static void end_bbio_comprssed_read(struct btrfs_bio *bbio)
if (!status)
status = errno_to_blk_status(btrfs_decompress_bio(cb));
btrfs_free_compressed_pages(cb);
btrfs_free_compressed_folios(cb);
btrfs_bio_end_io(cb->orig_bbio, status);
bio_put(&bbio->bio);
}
@@ -323,7 +323,7 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work)
end_compressed_writeback(cb);
/* Note, our inode could be gone now */
btrfs_free_compressed_pages(cb);
btrfs_free_compressed_folios(cb);
bio_put(&cb->bbio.bio);
}
@@ -342,17 +342,19 @@ static void end_bbio_comprssed_write(struct btrfs_bio *bbio)
queue_work(fs_info->compressed_write_workers, &cb->write_end_work);
}
static void btrfs_add_compressed_bio_pages(struct compressed_bio *cb)
static void btrfs_add_compressed_bio_folios(struct compressed_bio *cb)
{
struct bio *bio = &cb->bbio.bio;
u32 offset = 0;
while (offset < cb->compressed_len) {
int ret;
u32 len = min_t(u32, cb->compressed_len - offset, PAGE_SIZE);
/* Maximum compressed extent is smaller than bio size limit. */
__bio_add_page(bio, cb->compressed_pages[offset >> PAGE_SHIFT],
len, 0);
ret = bio_add_folio(bio, cb->compressed_folios[offset >> PAGE_SHIFT],
len, 0);
ASSERT(ret);
offset += len;
}
}
@@ -367,8 +369,8 @@ static void btrfs_add_compressed_bio_pages(struct compressed_bio *cb)
* the end io hooks.
*/
void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
struct page **compressed_pages,
unsigned int nr_pages,
struct folio **compressed_folios,
unsigned int nr_folios,
blk_opf_t write_flags,
bool writeback)
{
@@ -384,14 +386,14 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
end_bbio_comprssed_write);
cb->start = ordered->file_offset;
cb->len = ordered->num_bytes;
cb->compressed_pages = compressed_pages;
cb->compressed_folios = compressed_folios;
cb->compressed_len = ordered->disk_num_bytes;
cb->writeback = writeback;
INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work);
cb->nr_pages = nr_pages;
cb->nr_folios = nr_folios;
cb->bbio.bio.bi_iter.bi_sector = ordered->disk_bytenr >> SECTOR_SHIFT;
cb->bbio.ordered = ordered;
btrfs_add_compressed_bio_pages(cb);
btrfs_add_compressed_bio_folios(cb);
btrfs_submit_bio(&cb->bbio, 0);
}
@@ -599,14 +601,14 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
free_extent_map(em);
cb->nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
cb->compressed_pages = kcalloc(cb->nr_pages, sizeof(struct page *), GFP_NOFS);
if (!cb->compressed_pages) {
cb->nr_folios = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct page *), GFP_NOFS);
if (!cb->compressed_folios) {
ret = BLK_STS_RESOURCE;
goto out_free_bio;
}
ret2 = btrfs_alloc_page_array(cb->nr_pages, cb->compressed_pages, 0);
ret2 = btrfs_alloc_folio_array(cb->nr_folios, cb->compressed_folios, 0);
if (ret2) {
ret = BLK_STS_RESOURCE;
goto out_free_compressed_pages;
@@ -618,7 +620,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
/* include any pages we added in add_ra-bio_pages */
cb->len = bbio->bio.bi_iter.bi_size;
cb->bbio.bio.bi_iter.bi_sector = bbio->bio.bi_iter.bi_sector;
btrfs_add_compressed_bio_pages(cb);
btrfs_add_compressed_bio_folios(cb);
if (memstall)
psi_memstall_leave(&pflags);
@@ -627,7 +629,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
return;
out_free_compressed_pages:
kfree(cb->compressed_pages);
kfree(cb->compressed_folios);
out_free_bio:
bio_put(&cb->bbio.bio);
out:
@@ -974,6 +976,29 @@ static unsigned int btrfs_compress_set_level(int type, unsigned level)
return level;
}
/* Wrapper around find_get_page(), with extra error message. */
int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start,
struct folio **in_folio_ret)
{
struct folio *in_folio;
/*
* The compressed write path should have the folio locked already, thus
* we only need to grab one reference.
*/
in_folio = filemap_get_folio(mapping, start >> PAGE_SHIFT);
if (IS_ERR(in_folio)) {
struct btrfs_inode *inode = BTRFS_I(mapping->host);
btrfs_crit(inode->root->fs_info,
"failed to get page cache, root %lld ino %llu file offset %llu",
btrfs_root_id(inode->root), btrfs_ino(inode), start);
return -ENOENT;
}
*in_folio_ret = in_folio;
return 0;
}
/*
* Given an address space and start and length, compress the bytes into @pages
* that are allocated on demand.
@@ -994,11 +1019,9 @@ static unsigned int btrfs_compress_set_level(int type, unsigned level)
* @total_out is an in/out parameter, must be set to the input length and will
* be also used to return the total number of compressed bytes
*/
int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
u64 start, struct page **pages,
unsigned long *out_pages,
unsigned long *total_in,
unsigned long *total_out)
int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out)
{
int type = btrfs_compress_type(type_level);
int level = btrfs_compress_level(type_level);
@@ -1007,8 +1030,8 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
level = btrfs_compress_set_level(type, level);
workspace = get_workspace(type, level);
ret = compression_compress_pages(type, workspace, mapping, start, pages,
out_pages, total_in, total_out);
ret = compression_compress_pages(type, workspace, mapping, start, folios,
out_folios, total_in, total_out);
put_workspace(type, workspace);
return ret;
}

View File

@@ -41,11 +41,11 @@ static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
#define BTRFS_ZLIB_DEFAULT_LEVEL 3
struct compressed_bio {
/* Number of compressed pages in the array */
unsigned int nr_pages;
/* Number of compressed folios in the array. */
unsigned int nr_folios;
/* the pages with the compressed data on them */
struct page **compressed_pages;
/* The folios with the compressed data on them. */
struct folio **compressed_folios;
/* starting offset in the inode for our pages */
u64 start;
@@ -85,27 +85,24 @@ static inline unsigned int btrfs_compress_level(unsigned int type_level)
int __init btrfs_init_compress(void);
void __cold btrfs_exit_compress(void);
int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
u64 start, struct page **pages,
unsigned long *out_pages,
unsigned long *total_in,
unsigned long *total_out);
int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out);
int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page,
unsigned long start_byte, size_t srclen, size_t destlen);
int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
struct compressed_bio *cb, u32 decompressed);
void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
struct page **compressed_pages,
unsigned int nr_pages,
blk_opf_t write_flags,
bool writeback);
struct folio **compressed_folios,
unsigned int nr_folios, blk_opf_t write_flags,
bool writeback);
void btrfs_submit_compressed_read(struct btrfs_bio *bbio);
unsigned int btrfs_compress_str2level(unsigned int type, const char *str);
struct page *btrfs_alloc_compr_page(void);
void btrfs_free_compr_page(struct page *page);
struct folio *btrfs_alloc_compr_folio(void);
void btrfs_free_compr_folio(struct folio *folio);
enum btrfs_compression_type {
BTRFS_COMPRESS_NONE = 0,
@@ -149,8 +146,11 @@ bool btrfs_compress_is_valid_type(const char *str, size_t len);
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);
int zlib_compress_pages(struct list_head *ws, struct address_space *mapping,
u64 start, struct page **pages, unsigned long *out_pages,
int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start,
struct folio **in_folio_ret);
int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out);
int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int zlib_decompress(struct list_head *ws, const u8 *data_in,
@@ -160,8 +160,8 @@ struct list_head *zlib_alloc_workspace(unsigned int level);
void zlib_free_workspace(struct list_head *ws);
struct list_head *zlib_get_workspace(unsigned int level);
int lzo_compress_pages(struct list_head *ws, struct address_space *mapping,
u64 start, struct page **pages, unsigned long *out_pages,
int lzo_compress_folios(struct list_head *ws, struct address_space *mapping,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out);
int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int lzo_decompress(struct list_head *ws, const u8 *data_in,
@@ -170,8 +170,8 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in,
struct list_head *lzo_alloc_workspace(unsigned int level);
void lzo_free_workspace(struct list_head *ws);
int zstd_compress_pages(struct list_head *ws, struct address_space *mapping,
u64 start, struct page **pages, unsigned long *out_pages,
int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
u64 start, struct folio **folios, unsigned long *out_folios,
unsigned long *total_in, unsigned long *total_out);
int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
int zstd_decompress(struct list_head *ws, const u8 *data_in,

View File

@@ -291,7 +291,7 @@ static void add_root_to_dirty_list(struct btrfs_root *root)
spin_lock(&fs_info->trans_lock);
if (!test_and_set_bit(BTRFS_ROOT_DIRTY, &root->state)) {
/* Want the extent tree to be the last on the list */
if (root->root_key.objectid == BTRFS_EXTENT_TREE_OBJECTID)
if (btrfs_root_id(root) == BTRFS_EXTENT_TREE_OBJECTID)
list_move_tail(&root->dirty_list,
&fs_info->dirty_cowonly_roots);
else
@@ -454,7 +454,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
}
} else {
refs = 1;
if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID ||
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
else
@@ -466,15 +466,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
if (refs > 1) {
if ((owner == root->root_key.objectid ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
if ((owner == btrfs_root_id(root) ||
btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID) &&
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
ret = btrfs_inc_ref(trans, root, buf, 1);
if (ret)
return ret;
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID) {
if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_dec_ref(trans, root, buf, 0);
if (ret)
return ret;
@@ -485,8 +484,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
} else {
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID)
if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID)
ret = btrfs_inc_ref(trans, root, cow, 1);
else
ret = btrfs_inc_ref(trans, root, cow, 0);
@@ -500,8 +498,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
}
} else {
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
if (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID)
if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID)
ret = btrfs_inc_ref(trans, root, cow, 1);
else
ret = btrfs_inc_ref(trans, root, cow, 0);
@@ -563,13 +560,13 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
else
btrfs_node_key(buf, &disk_key, 0);
if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID) {
if (parent)
parent_start = parent->start;
reloc_src_root = btrfs_header_owner(buf);
}
cow = btrfs_alloc_tree_block(trans, root, parent_start,
root->root_key.objectid, &disk_key, level,
btrfs_root_id(root), &disk_key, level,
search_start, empty_size, reloc_src_root, nest);
if (IS_ERR(cow))
return PTR_ERR(cow);
@@ -582,10 +579,10 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV);
btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
BTRFS_HEADER_FLAG_RELOC);
if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID)
btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
else
btrfs_set_header_owner(cow, root->root_key.objectid);
btrfs_set_header_owner(cow, btrfs_root_id(root));
write_extent_buffer_fsid(cow, fs_info->fs_devices->metadata_uuid);
@@ -609,7 +606,7 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
if (buf == root->node) {
WARN_ON(parent && parent != buf);
if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID ||
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
parent_start = buf->start;
@@ -685,7 +682,7 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
*/
if (btrfs_header_generation(buf) == trans->transid &&
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
!(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
!(btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID &&
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
!test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
return 0;
@@ -1003,7 +1000,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
goto out;
}
__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
btrfs_tree_lock_nested(left, BTRFS_NESTING_LEFT);
wret = btrfs_cow_block(trans, root, left,
parent, pslot - 1, &left,
BTRFS_NESTING_LEFT_COW);
@@ -1021,7 +1018,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
goto out;
}
__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
btrfs_tree_lock_nested(right, BTRFS_NESTING_RIGHT);
wret = btrfs_cow_block(trans, root, right,
parent, pslot + 1, &right,
BTRFS_NESTING_RIGHT_COW);
@@ -1205,7 +1202,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
if (IS_ERR(left))
return PTR_ERR(left);
__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
btrfs_tree_lock_nested(left, BTRFS_NESTING_LEFT);
left_nr = btrfs_header_nritems(left);
if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
@@ -1265,7 +1262,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
if (IS_ERR(right))
return PTR_ERR(right);
__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
btrfs_tree_lock_nested(right, BTRFS_NESTING_RIGHT);
right_nr = btrfs_header_nritems(right);
if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
@@ -1511,7 +1508,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
check.has_first_key = true;
check.level = parent_level - 1;
check.transid = gen;
check.owner_root = root->root_key.objectid;
check.owner_root = btrfs_root_id(root);
/*
* If we need to read an extent buffer from disk and we are holding locks
@@ -1556,7 +1553,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
btrfs_release_path(p);
return -EIO;
}
if (btrfs_check_eb_owner(tmp, root->root_key.objectid)) {
if (btrfs_check_eb_owner(tmp, btrfs_root_id(root))) {
free_extent_buffer(tmp);
btrfs_release_path(p);
return -EUCLEAN;
@@ -2865,7 +2862,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
else
btrfs_node_key(lower, &lower_key, 0);
c = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
c = btrfs_alloc_tree_block(trans, root, 0, btrfs_root_id(root),
&lower_key, level, root->node->start, 0,
0, BTRFS_NESTING_NEW_ROOT);
if (IS_ERR(c))
@@ -3009,7 +3006,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
mid = (c_nritems + 1) / 2;
btrfs_node_key(c, &disk_key, mid);
split = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
split = btrfs_alloc_tree_block(trans, root, 0, btrfs_root_id(root),
&disk_key, level, c->start, 0,
0, BTRFS_NESTING_SPLIT);
if (IS_ERR(split))
@@ -3267,7 +3264,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (IS_ERR(right))
return PTR_ERR(right);
__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
btrfs_tree_lock_nested(right, BTRFS_NESTING_RIGHT);
free_space = btrfs_leaf_free_space(right);
if (free_space < data_size)
@@ -3483,7 +3480,7 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
if (IS_ERR(left))
return PTR_ERR(left);
__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
btrfs_tree_lock_nested(left, BTRFS_NESTING_LEFT);
free_space = btrfs_leaf_free_space(left);
if (free_space < data_size) {
@@ -3761,7 +3758,7 @@ again:
* BTRFS_NESTING_SPLIT_THE_SPLITTENING if we need to, but for now just
* use BTRFS_NESTING_NEW_ROOT.
*/
right = btrfs_alloc_tree_block(trans, root, 0, root->root_key.objectid,
right = btrfs_alloc_tree_block(trans, root, 0, btrfs_root_id(root),
&disk_key, 0, l->start, 0, 0,
num_doubles ? BTRFS_NESTING_NEW_ROOT :
BTRFS_NESTING_SPLIT);

View File

@@ -147,7 +147,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
defrag->ino = btrfs_ino(inode);
defrag->transid = transid;
defrag->root = root->root_key.objectid;
defrag->root = btrfs_root_id(root);
defrag->extent_thresh = extent_thresh;
spin_lock(&fs_info->defrag_inodes_lock);

View File

@@ -1651,7 +1651,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
if (unlikely(ret)) {
btrfs_err(trans->fs_info,
"err add delayed dir index item(index: %llu) into the deletion tree of the delayed node(root id: %llu, inode id: %llu, errno: %d)",
index, node->root->root_key.objectid,
index, btrfs_root_id(node->root),
node->inode_id, ret);
btrfs_delayed_item_release_metadata(dir->root, item);
btrfs_release_delayed_item(item);

File diff suppressed because it is too large Load Diff

View File

@@ -30,6 +30,32 @@ enum btrfs_delayed_ref_action {
BTRFS_UPDATE_DELAYED_HEAD,
} __packed;
struct btrfs_data_ref {
/* For EXTENT_DATA_REF */
/* Inode which refers to this data extent */
u64 objectid;
/*
* file_offset - extent_offset
*
* file_offset is the key.offset of the EXTENT_DATA key.
* extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data.
*/
u64 offset;
};
struct btrfs_tree_ref {
/*
* Level of this tree block.
*
* Shared for skinny (TREE_BLOCK_REF) and normal tree ref.
*/
int level;
/* For non-skinny metadata, no special member needed */
};
struct btrfs_delayed_ref_node {
struct rb_node ref_node;
/*
@@ -48,6 +74,15 @@ struct btrfs_delayed_ref_node {
/* seq number to keep track of insertion order */
u64 seq;
/* The ref_root for this ref */
u64 ref_root;
/*
* The parent for this ref, if this isn't set the ref_root is the
* reference owner.
*/
u64 parent;
/* ref count on this data structure */
refcount_t refs;
@@ -64,6 +99,11 @@ struct btrfs_delayed_ref_node {
unsigned int action:8;
unsigned int type:8;
union {
struct btrfs_tree_ref tree_ref;
struct btrfs_data_ref data_ref;
};
};
struct btrfs_delayed_extent_op {
@@ -151,21 +191,6 @@ struct btrfs_delayed_ref_head {
bool processing;
};
struct btrfs_delayed_tree_ref {
struct btrfs_delayed_ref_node node;
u64 root;
u64 parent;
int level;
};
struct btrfs_delayed_data_ref {
struct btrfs_delayed_ref_node node;
u64 root;
u64 parent;
u64 objectid;
u64 offset;
};
enum btrfs_delayed_ref_flags {
/* Indicate that we are flushing delayed refs for the commit */
BTRFS_DELAYED_REFS_FLUSHING,
@@ -214,42 +239,6 @@ enum btrfs_ref_type {
BTRFS_REF_LAST,
} __packed;
struct btrfs_data_ref {
/* For EXTENT_DATA_REF */
/* Root which owns this data reference. */
u64 ref_root;
/* Inode which refers to this data extent */
u64 ino;
/*
* file_offset - extent_offset
*
* file_offset is the key.offset of the EXTENT_DATA key.
* extent_offset is btrfs_file_extent_offset() of the EXTENT_DATA data.
*/
u64 offset;
};
struct btrfs_tree_ref {
/*
* Level of this tree block
*
* Shared for skinny (TREE_BLOCK_REF) and normal tree ref.
*/
int level;
/*
* Root which owns this tree block reference.
*
* For TREE_BLOCK_REF (skinny metadata, either inline or keyed)
*/
u64 ref_root;
/* For non-skinny metadata, no special member needed */
};
struct btrfs_ref {
enum btrfs_ref_type type;
enum btrfs_delayed_ref_action action;
@@ -267,9 +256,15 @@ struct btrfs_ref {
u64 real_root;
#endif
u64 bytenr;
u64 len;
u64 num_bytes;
u64 owning_root;
/*
* The root that owns the reference for this reference, this will be set
* or ->parent will be set, depending on what type of reference this is.
*/
u64 ref_root;
/* Bytenr of the parent tree block */
u64 parent;
union {
@@ -279,8 +274,7 @@ struct btrfs_ref {
};
extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
extern struct kmem_cache *btrfs_delayed_ref_node_cachep;
extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
int __init btrfs_delayed_ref_init(void);
@@ -318,12 +312,10 @@ static inline u64 btrfs_calc_delayed_ref_csum_bytes(const struct btrfs_fs_info *
return btrfs_calc_metadata_size(fs_info, num_csum_items);
}
void btrfs_init_generic_ref(struct btrfs_ref *generic_ref, int action, u64 bytenr,
u64 len, u64 parent, u64 owning_root);
void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 root,
void btrfs_init_tree_ref(struct btrfs_ref *generic_ref, int level, u64 mod_root,
bool skip_qgroup);
void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ino, u64 offset,
u64 mod_root, bool skip_qgroup);
void btrfs_init_data_ref(struct btrfs_ref *generic_ref, u64 ref_root, u64 ino,
u64 offset, u64 mod_root, bool skip_qgroup);
static inline struct btrfs_delayed_extent_op *
btrfs_alloc_delayed_extent_op(void)
@@ -398,19 +390,39 @@ void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
u64 num_bytes);
bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
/*
* helper functions to cast a node into its container
*/
static inline struct btrfs_delayed_tree_ref *
btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node)
static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node)
{
return container_of(node, struct btrfs_delayed_tree_ref, node);
if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
node->type == BTRFS_SHARED_DATA_REF_KEY)
return node->data_ref.objectid;
return node->tree_ref.level;
}
static inline struct btrfs_delayed_data_ref *
btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node)
static inline u64 btrfs_delayed_ref_offset(struct btrfs_delayed_ref_node *node)
{
return container_of(node, struct btrfs_delayed_data_ref, node);
if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
node->type == BTRFS_SHARED_DATA_REF_KEY)
return node->data_ref.offset;
return 0;
}
static inline u8 btrfs_ref_type(struct btrfs_ref *ref)
{
ASSERT(ref->type == BTRFS_REF_DATA || ref->type == BTRFS_REF_METADATA);
if (ref->type == BTRFS_REF_DATA) {
if (ref->parent)
return BTRFS_SHARED_DATA_REF_KEY;
else
return BTRFS_EXTENT_DATA_REF_KEY;
} else {
if (ref->parent)
return BTRFS_SHARED_BLOCK_REF_KEY;
else
return BTRFS_TREE_BLOCK_REF_KEY;
}
return 0;
}
#endif

View File

@@ -646,7 +646,7 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
u64 objectid)
{
bool dummy = test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
bool dummy = btrfs_is_testing(fs_info);
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
@@ -663,8 +663,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
root->nr_delalloc_inodes = 0;
root->nr_ordered_extents = 0;
root->inode_tree = RB_ROOT;
/* GFP flags are compatible with XA_FLAGS_*. */
xa_init_flags(&root->delayed_nodes, GFP_ATOMIC);
xa_init(&root->delayed_nodes);
btrfs_init_root_block_rsv(root);
@@ -776,7 +775,7 @@ int btrfs_global_root_insert(struct btrfs_root *root)
if (tmp) {
ret = -EEXIST;
btrfs_warn(fs_info, "global root %llu %llu already exists",
root->root_key.objectid, root->root_key.offset);
btrfs_root_id(root), root->root_key.offset);
}
return ret;
}
@@ -1012,7 +1011,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
}
log_root->last_trans = trans->transid;
log_root->root_key.offset = root->root_key.objectid;
log_root->root_key.offset = btrfs_root_id(root);
inode_item = &log_root->root_item.inode;
btrfs_set_stack_inode_generation(inode_item, 1);
@@ -1076,15 +1075,15 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
* For real fs, and not log/reloc trees, root owner must
* match its root node owner
*/
if (!test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state) &&
root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
root->root_key.objectid != btrfs_header_owner(root->node)) {
if (!btrfs_is_testing(fs_info) &&
btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID &&
btrfs_root_id(root) != BTRFS_TREE_RELOC_OBJECTID &&
btrfs_root_id(root) != btrfs_header_owner(root->node)) {
btrfs_crit(fs_info,
"root=%llu block=%llu, tree root owner mismatch, have %llu expect %llu",
root->root_key.objectid, root->node->start,
btrfs_root_id(root), root->node->start,
btrfs_header_owner(root->node),
root->root_key.objectid);
btrfs_root_id(root));
ret = -EUCLEAN;
goto fail;
}
@@ -1121,9 +1120,9 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
btrfs_drew_lock_init(&root->snapshot_lock);
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
if (btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID &&
!btrfs_is_data_reloc_root(root) &&
is_fstree(root->root_key.objectid)) {
is_fstree(btrfs_root_id(root))) {
set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
btrfs_check_and_init_root_item(&root->root_item);
}
@@ -1132,7 +1131,7 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
* Don't assign anonymous block device to roots that are not exposed to
* userspace, the id pool is limited to 1M
*/
if (is_fstree(root->root_key.objectid) &&
if (is_fstree(btrfs_root_id(root)) &&
btrfs_root_refs(&root->root_item) > 0) {
if (!anon_dev) {
ret = get_anon_bdev(&root->anon_dev);
@@ -1219,7 +1218,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
spin_lock(&fs_info->fs_roots_radix_lock);
ret = radix_tree_insert(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
(unsigned long)btrfs_root_id(root),
root);
if (ret == 0) {
btrfs_grab_root(root);
@@ -1266,9 +1265,14 @@ static void free_global_roots(struct btrfs_fs_info *fs_info)
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
{
struct percpu_counter *em_counter = &fs_info->evictable_extent_maps;
percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
percpu_counter_destroy(&fs_info->delalloc_bytes);
percpu_counter_destroy(&fs_info->ordered_bytes);
if (percpu_counter_initialized(em_counter))
ASSERT(percpu_counter_sum_positive(em_counter) == 0);
percpu_counter_destroy(em_counter);
percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
btrfs_free_csum_hash(fs_info);
btrfs_free_stripe_hash_table(fs_info);
@@ -2584,7 +2588,7 @@ static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int lev
struct btrfs_tree_parent_check check = {
.level = level,
.transid = gen,
.owner_root = root->root_key.objectid
.owner_root = btrfs_root_id(root)
};
int ret = 0;
@@ -2848,6 +2852,10 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
if (ret)
return ret;
ret = percpu_counter_init(&fs_info->evictable_extent_maps, 0, GFP_KERNEL);
if (ret)
return ret;
ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
if (ret)
return ret;
@@ -2930,7 +2938,7 @@ static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
spin_unlock(&fs_info->fs_roots_radix_lock);
break;
}
root_objectid = gang[ret - 1]->root_key.objectid + 1;
root_objectid = btrfs_root_id(gang[ret - 1]) + 1;
for (i = 0; i < ret; i++) {
/* Avoid to grab roots in dead_roots. */
@@ -2946,7 +2954,7 @@ static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
for (i = 0; i < ret; i++) {
if (!gang[i])
continue;
root_objectid = gang[i]->root_key.objectid;
root_objectid = btrfs_root_id(gang[i]);
err = btrfs_orphan_cleanup(gang[i]);
if (err)
goto out;
@@ -3618,28 +3626,25 @@ ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
static void btrfs_end_super_write(struct bio *bio)
{
struct btrfs_device *device = bio->bi_private;
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
struct page *page;
bio_for_each_segment_all(bvec, bio, iter_all) {
page = bvec->bv_page;
struct folio_iter fi;
bio_for_each_folio_all(fi, bio) {
if (bio->bi_status) {
btrfs_warn_rl_in_rcu(device->fs_info,
"lost page write due to IO error on %s (%d)",
"lost super block write due to IO error on %s (%d)",
btrfs_dev_name(device),
blk_status_to_errno(bio->bi_status));
ClearPageUptodate(page);
SetPageError(page);
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_WRITE_ERRS);
} else {
SetPageUptodate(page);
/* Ensure failure if the primary sb fails. */
if (bio->bi_opf & REQ_FUA)
atomic_add(BTRFS_SUPER_PRIMARY_WRITE_ERROR,
&device->sb_write_errors);
else
atomic_inc(&device->sb_write_errors);
}
put_page(page);
unlock_page(page);
folio_unlock(fi.folio);
folio_put(fi.folio);
}
bio_put(bio);
@@ -3726,13 +3731,13 @@ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
/*
* Write superblock @sb to the @device. Do not wait for completion, all the
* pages we use for writing are locked.
* folios we use for writing are locked.
*
* Write @max_mirrors copies of the superblock, where 0 means default that fit
* the expected device size at commit time. Note that max_mirrors must be
* same for write and wait phases.
*
* Return number of errors when page is not found or submission fails.
* Return number of errors when folio is not found or submission fails.
*/
static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb, int max_mirrors)
@@ -3741,19 +3746,21 @@ static int write_dev_supers(struct btrfs_device *device,
struct address_space *mapping = device->bdev->bd_inode->i_mapping;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
int i;
int errors = 0;
int ret;
u64 bytenr, bytenr_orig;
atomic_set(&device->sb_write_errors, 0);
if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
shash->tfm = fs_info->csum_shash;
for (i = 0; i < max_mirrors; i++) {
struct page *page;
struct folio *folio;
struct bio *bio;
struct btrfs_super_block *disk_super;
size_t offset;
bytenr_orig = btrfs_sb_offset(i);
ret = btrfs_sb_log_location(device, i, WRITE, &bytenr);
@@ -3763,7 +3770,7 @@ static int write_dev_supers(struct btrfs_device *device,
btrfs_err(device->fs_info,
"couldn't get super block location for mirror %d",
i);
errors++;
atomic_inc(&device->sb_write_errors);
continue;
}
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
@@ -3776,20 +3783,20 @@ static int write_dev_supers(struct btrfs_device *device,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
sb->csum);
page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
GFP_NOFS);
if (!page) {
folio = __filemap_get_folio(mapping, bytenr >> PAGE_SHIFT,
FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
GFP_NOFS);
if (IS_ERR(folio)) {
btrfs_err(device->fs_info,
"couldn't get super block page for bytenr %llu",
bytenr);
errors++;
atomic_inc(&device->sb_write_errors);
continue;
}
ASSERT(folio_order(folio) == 0);
/* Bump the refcount for wait_dev_supers() */
get_page(page);
disk_super = page_address(page);
offset = offset_in_folio(folio, bytenr);
disk_super = folio_address(folio) + offset;
memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);
/*
@@ -3803,8 +3810,7 @@ static int write_dev_supers(struct btrfs_device *device,
bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
bio->bi_private = device;
bio->bi_end_io = btrfs_end_super_write;
__bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
offset_in_page(bytenr));
bio_add_folio_nofail(bio, folio, BTRFS_SUPER_INFO_SIZE, offset);
/*
* We FUA only the first super block. The others we allow to
@@ -3816,17 +3822,17 @@ static int write_dev_supers(struct btrfs_device *device,
submit_bio(bio);
if (btrfs_advance_sb_log(device, i))
errors++;
atomic_inc(&device->sb_write_errors);
}
return errors < i ? 0 : -1;
return atomic_read(&device->sb_write_errors) < i ? 0 : -1;
}
/*
* Wait for write completion of superblocks done by write_dev_supers,
* @max_mirrors same for write and wait phases.
*
* Return number of errors when page is not found or not marked up to
* date.
* Return -1 if primary super block write failed or when there were no super block
* copies written. Otherwise 0.
*/
static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
{
@@ -3840,7 +3846,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
for (i = 0; i < max_mirrors; i++) {
struct page *page;
struct folio *folio;
ret = btrfs_sb_log_location(device, i, READ, &bytenr);
if (ret == -ENOENT) {
@@ -3855,30 +3861,21 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
device->commit_total_bytes)
break;
page = find_get_page(device->bdev->bd_inode->i_mapping,
bytenr >> PAGE_SHIFT);
if (!page) {
errors++;
if (i == 0)
primary_failed = true;
folio = filemap_get_folio(device->bdev->bd_inode->i_mapping,
bytenr >> PAGE_SHIFT);
/* If the folio has been removed, then we know it completed. */
if (IS_ERR(folio))
continue;
}
/* Page is submitted locked and unlocked once the IO completes */
wait_on_page_locked(page);
if (PageError(page)) {
errors++;
if (i == 0)
primary_failed = true;
}
ASSERT(folio_order(folio) == 0);
/* Drop our reference */
put_page(page);
/* Drop the reference from the writing run */
put_page(page);
/* Folio will be unlocked once the write completes. */
folio_wait_locked(folio);
folio_put(folio);
}
/* log error, force error return */
errors += atomic_read(&device->sb_write_errors);
if (errors >= BTRFS_SUPER_PRIMARY_WRITE_ERROR)
primary_failed = true;
if (primary_failed) {
btrfs_err(device->fs_info, "error writing primary super block to device %llu",
device->devid);
@@ -4139,7 +4136,7 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
spin_lock(&fs_info->fs_roots_radix_lock);
radix_tree_delete(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid);
(unsigned long)btrfs_root_id(root));
if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state))
drop_ref = true;
spin_unlock(&fs_info->fs_roots_radix_lock);
@@ -4182,9 +4179,6 @@ static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
struct btrfs_transaction *tmp;
bool found = false;
if (list_empty(&fs_info->trans_list))
return;
/*
* This function is only called at the very end of close_ctree(),
* thus no other running transaction, no need to take trans_lock.
@@ -4484,7 +4478,7 @@ static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info)
for (i = 0; i < ret; i++) {
if (!gang[i])
continue;
root_objectid = gang[i]->root_key.objectid;
root_objectid = btrfs_root_id(gang[i]);
btrfs_free_log(NULL, gang[i]);
btrfs_put_root(gang[i]);
}
@@ -4815,7 +4809,7 @@ static void btrfs_free_all_qgroup_pertrans(struct btrfs_fs_info *fs_info)
btrfs_qgroup_free_meta_all_pertrans(root);
radix_tree_tag_clear(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
(unsigned long)btrfs_root_id(root),
BTRFS_ROOT_TRANS_TAG);
}
}
@@ -4844,14 +4838,10 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
cur_trans->state = TRANS_STATE_UNBLOCKED;
wake_up(&fs_info->transaction_wait);
btrfs_destroy_delayed_inodes(fs_info);
btrfs_destroy_marked_extents(fs_info, &cur_trans->dirty_pages,
EXTENT_DIRTY);
btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents);
btrfs_free_all_qgroup_pertrans(fs_info);
cur_trans->state =TRANS_STATE_COMPLETED;
wake_up(&cur_trans->commit_wait);
}
@@ -4904,6 +4894,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
btrfs_assert_delayed_root_empty(fs_info);
btrfs_destroy_all_delalloc_inodes(fs_info);
btrfs_drop_all_logs(fs_info);
btrfs_free_all_qgroup_pertrans(fs_info);
mutex_unlock(&fs_info->transaction_kthread_mutex);
return 0;
@@ -4959,7 +4950,7 @@ int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid)
if (unlikely(root->free_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
btrfs_warn(root->fs_info,
"the objectid of root %llu reaches its highest value",
root->root_key.objectid);
btrfs_root_id(root));
ret = -ENOSPC;
goto out;
}

View File

@@ -34,7 +34,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
type = FILEID_BTRFS_WITHOUT_PARENT;
fid->objectid = btrfs_ino(BTRFS_I(inode));
fid->root_objectid = BTRFS_I(inode)->root->root_key.objectid;
fid->root_objectid = btrfs_root_id(BTRFS_I(inode)->root);
fid->gen = inode->i_generation;
if (parent) {
@@ -42,7 +42,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
fid->parent_objectid = BTRFS_I(parent)->location.objectid;
fid->parent_gen = parent->i_generation;
parent_root_id = BTRFS_I(parent)->root->root_key.objectid;
parent_root_id = btrfs_root_id(BTRFS_I(parent)->root);
if (parent_root_id != fid->root_objectid) {
fid->parent_root_objectid = parent_root_id;
@@ -160,7 +160,7 @@ struct dentry *btrfs_get_parent(struct dentry *child)
return ERR_PTR(-ENOMEM);
if (btrfs_ino(BTRFS_I(dir)) == BTRFS_FIRST_FREE_OBJECTID) {
key.objectid = root->root_key.objectid;
key.objectid = btrfs_root_id(root);
key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = (u64)-1;
root = fs_info->tree_root;
@@ -243,7 +243,7 @@ static int btrfs_get_name(struct dentry *parent, char *name,
return -ENOMEM;
if (ino == BTRFS_FIRST_FREE_OBJECTID) {
key.objectid = BTRFS_I(inode)->root->root_key.objectid;
key.objectid = btrfs_root_id(BTRFS_I(inode)->root);
key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = (u64)-1;
root = fs_info->tree_root;

View File

@@ -1059,7 +1059,7 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state *prealloc = NULL;
struct rb_node **p = NULL;
struct rb_node *parent = NULL;
int err = 0;
int ret = 0;
u64 last_start;
u64 last_end;
u32 exclusive_bits = (bits & EXTENT_LOCKED);
@@ -1122,7 +1122,7 @@ hit_next:
if (state->state & exclusive_bits) {
*failed_start = state->start;
cache_state(state, failed_state);
err = -EEXIST;
ret = -EEXIST;
goto out;
}
@@ -1158,7 +1158,7 @@ hit_next:
if (state->state & exclusive_bits) {
*failed_start = start;
cache_state(state, failed_state);
err = -EEXIST;
ret = -EEXIST;
goto out;
}
@@ -1175,12 +1175,12 @@ hit_next:
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc)
goto search_again;
err = split_state(tree, state, prealloc, start);
if (err)
extent_io_tree_panic(tree, state, "split", err);
ret = split_state(tree, state, prealloc, start);
if (ret)
extent_io_tree_panic(tree, state, "split", ret);
prealloc = NULL;
if (err)
if (ret)
goto out;
if (state->end <= end) {
set_state_bits(tree, state, bits, changeset);
@@ -1224,8 +1224,8 @@ hit_next:
prealloc->end = this_end;
inserted_state = insert_state(tree, prealloc, bits, changeset);
if (IS_ERR(inserted_state)) {
err = PTR_ERR(inserted_state);
extent_io_tree_panic(tree, prealloc, "insert", err);
ret = PTR_ERR(inserted_state);
extent_io_tree_panic(tree, prealloc, "insert", ret);
}
cache_state(inserted_state, cached_state);
@@ -1244,16 +1244,16 @@ hit_next:
if (state->state & exclusive_bits) {
*failed_start = start;
cache_state(state, failed_state);
err = -EEXIST;
ret = -EEXIST;
goto out;
}
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc)
goto search_again;
err = split_state(tree, state, prealloc, end + 1);
if (err)
extent_io_tree_panic(tree, state, "split", err);
ret = split_state(tree, state, prealloc, end + 1);
if (ret)
extent_io_tree_panic(tree, state, "split", ret);
set_state_bits(tree, prealloc, bits, changeset);
cache_state(prealloc, cached_state);
@@ -1275,7 +1275,7 @@ out:
if (prealloc)
free_extent_state(prealloc);
return err;
return ret;
}
@@ -1312,7 +1312,7 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state *prealloc = NULL;
struct rb_node **p = NULL;
struct rb_node *parent = NULL;
int err = 0;
int ret = 0;
u64 last_start;
u64 last_end;
bool first_iteration = true;
@@ -1351,7 +1351,7 @@ again:
if (!state) {
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc) {
err = -ENOMEM;
ret = -ENOMEM;
goto out;
}
prealloc->start = start;
@@ -1402,14 +1402,14 @@ hit_next:
if (state->start < start) {
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc) {
err = -ENOMEM;
ret = -ENOMEM;
goto out;
}
err = split_state(tree, state, prealloc, start);
if (err)
extent_io_tree_panic(tree, state, "split", err);
ret = split_state(tree, state, prealloc, start);
if (ret)
extent_io_tree_panic(tree, state, "split", ret);
prealloc = NULL;
if (err)
if (ret)
goto out;
if (state->end <= end) {
set_state_bits(tree, state, bits, NULL);
@@ -1442,7 +1442,7 @@ hit_next:
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc) {
err = -ENOMEM;
ret = -ENOMEM;
goto out;
}
@@ -1454,8 +1454,8 @@ hit_next:
prealloc->end = this_end;
inserted_state = insert_state(tree, prealloc, bits, NULL);
if (IS_ERR(inserted_state)) {
err = PTR_ERR(inserted_state);
extent_io_tree_panic(tree, prealloc, "insert", err);
ret = PTR_ERR(inserted_state);
extent_io_tree_panic(tree, prealloc, "insert", ret);
}
cache_state(inserted_state, cached_state);
if (inserted_state == prealloc)
@@ -1472,13 +1472,13 @@ hit_next:
if (state->start <= end && state->end > end) {
prealloc = alloc_extent_state_atomic(prealloc);
if (!prealloc) {
err = -ENOMEM;
ret = -ENOMEM;
goto out;
}
err = split_state(tree, state, prealloc, end + 1);
if (err)
extent_io_tree_panic(tree, state, "split", err);
ret = split_state(tree, state, prealloc, end + 1);
if (ret)
extent_io_tree_panic(tree, state, "split", ret);
set_state_bits(tree, prealloc, bits, NULL);
cache_state(prealloc, cached_state);
@@ -1500,7 +1500,7 @@ out:
if (prealloc)
free_extent_state(prealloc);
return err;
return ret;
}
/*

File diff suppressed because it is too large Load Diff

View File

@@ -396,15 +396,14 @@ again:
/* then test to make sure it is all still delalloc */
ret = test_range_bit(tree, delalloc_start, delalloc_end,
EXTENT_DELALLOC, cached_state);
unlock_extent(tree, delalloc_start, delalloc_end, &cached_state);
if (!ret) {
unlock_extent(tree, delalloc_start, delalloc_end,
&cached_state);
__unlock_for_delalloc(inode, locked_page,
delalloc_start, delalloc_end);
cond_resched();
goto again;
}
free_extent_state(cached_state);
*start = delalloc_start;
*end = delalloc_end;
out_failed:
@@ -413,9 +412,10 @@ out_failed:
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
struct page *locked_page,
struct extent_state **cached,
u32 clear_bits, unsigned long page_ops)
{
clear_extent_bit(&inode->io_tree, start, end, clear_bits, NULL);
clear_extent_bit(&inode->io_tree, start, end, clear_bits, cached);
__process_pages_contig(inode->vfs_inode.i_mapping, locked_page,
start, end, page_ops);
@@ -666,6 +666,37 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
bio_put(bio);
}
/*
* Populate every free slot in a provided array with folios.
*
* @nr_folios: number of folios to allocate
* @folio_array: the array to fill with folios; any existing non-NULL entries in
* the array will be skipped
* @extra_gfp: the extra GFP flags for the allocation
*
* Return: 0 if all folios were able to be allocated;
* -ENOMEM otherwise, the partially allocated folios would be freed and
* the array slots zeroed
*/
int btrfs_alloc_folio_array(unsigned int nr_folios, struct folio **folio_array,
gfp_t extra_gfp)
{
for (int i = 0; i < nr_folios; i++) {
if (folio_array[i])
continue;
folio_array[i] = folio_alloc(GFP_NOFS | extra_gfp, 0);
if (!folio_array[i])
goto error;
}
return 0;
error:
for (int i = 0; i < nr_folios; i++) {
if (folio_array[i])
folio_put(folio_array[i]);
}
return -ENOMEM;
}
/*
* Populate every free slot in a provided array with pages.
*
@@ -1571,7 +1602,7 @@ static void set_btree_ioerr(struct extent_buffer *eb)
* can be no longer dirty nor marked anymore for writeback (if a
* subsequent modification to the extent buffer didn't happen before the
* transaction commit), which makes filemap_fdata[write|wait]_range not
* able to find the pages tagged with SetPageError at transaction
* able to find the pages which contain errors at transaction
* commit time. So if this happens we must abort the transaction,
* otherwise we commit a super block with btree roots that point to
* btree nodes/leafs whose content on disk is invalid - either garbage
@@ -2246,8 +2277,7 @@ next_page:
submit_write_bio(&bio_ctrl, found_error ? ret : 0);
}
int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc)
int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
int ret = 0;
@@ -2267,7 +2297,7 @@ int extent_writepages(struct address_space *mapping,
return ret;
}
void extent_readahead(struct readahead_control *rac)
void btrfs_readahead(struct readahead_control *rac)
{
struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ | REQ_RAHEAD };
struct page *pagepool[16];
@@ -2325,19 +2355,20 @@ int extent_invalidate_folio(struct extent_io_tree *tree,
* are locked or under IO and drops the related state bits if it is safe
* to drop the page.
*/
static int try_release_extent_state(struct extent_io_tree *tree,
static bool try_release_extent_state(struct extent_io_tree *tree,
struct page *page, gfp_t mask)
{
u64 start = page_offset(page);
u64 end = start + PAGE_SIZE - 1;
int ret = 1;
bool ret;
if (test_range_bit_exists(tree, start, end, EXTENT_LOCKED)) {
ret = 0;
ret = false;
} else {
u32 clear_bits = ~(EXTENT_LOCKED | EXTENT_NODATASUM |
EXTENT_DELALLOC_NEW | EXTENT_CTLBITS |
EXTENT_QGROUP_RESERVED);
int ret2;
/*
* At this point we can safely clear everything except the
@@ -2345,15 +2376,15 @@ static int try_release_extent_state(struct extent_io_tree *tree,
* The delalloc new bit will be cleared by ordered extent
* completion.
*/
ret = __clear_extent_bit(tree, start, end, clear_bits, NULL, NULL);
ret2 = __clear_extent_bit(tree, start, end, clear_bits, NULL, NULL);
/* if clear_extent_bit failed for enomem reasons,
* we can't allow the release to continue.
*/
if (ret < 0)
ret = 0;
if (ret2 < 0)
ret = false;
else
ret = 1;
ret = true;
}
return ret;
}
@@ -2363,84 +2394,80 @@ static int try_release_extent_state(struct extent_io_tree *tree,
* in the range corresponding to the page, both state records and extent
* map records are removed
*/
int try_release_extent_mapping(struct page *page, gfp_t mask)
bool try_release_extent_mapping(struct page *page, gfp_t mask)
{
struct extent_map *em;
u64 start = page_offset(page);
u64 end = start + PAGE_SIZE - 1;
struct btrfs_inode *btrfs_inode = page_to_inode(page);
struct extent_io_tree *tree = &btrfs_inode->io_tree;
struct extent_map_tree *map = &btrfs_inode->extent_tree;
struct btrfs_inode *inode = page_to_inode(page);
struct extent_io_tree *io_tree = &inode->io_tree;
if (gfpflags_allow_blocking(mask) &&
page->mapping->host->i_size > SZ_16M) {
u64 len;
while (start <= end) {
struct btrfs_fs_info *fs_info;
u64 cur_gen;
while (start <= end) {
const u64 cur_gen = btrfs_get_fs_generation(inode->root->fs_info);
const u64 len = end - start + 1;
struct extent_map_tree *extent_tree = &inode->extent_tree;
struct extent_map *em;
len = end - start + 1;
write_lock(&map->lock);
em = lookup_extent_mapping(map, start, len);
if (!em) {
write_unlock(&map->lock);
break;
}
if ((em->flags & EXTENT_FLAG_PINNED) ||
em->start != start) {
write_unlock(&map->lock);
free_extent_map(em);
break;
}
if (test_range_bit_exists(tree, em->start,
extent_map_end(em) - 1,
EXTENT_LOCKED))
goto next;
/*
* If it's not in the list of modified extents, used
* by a fast fsync, we can remove it. If it's being
* logged we can safely remove it since fsync took an
* extra reference on the em.
*/
if (list_empty(&em->list) ||
(em->flags & EXTENT_FLAG_LOGGING))
goto remove_em;
/*
* If it's in the list of modified extents, remove it
* only if its generation is older then the current one,
* in which case we don't need it for a fast fsync.
* Otherwise don't remove it, we could be racing with an
* ongoing fast fsync that could miss the new extent.
*/
fs_info = btrfs_inode->root->fs_info;
spin_lock(&fs_info->trans_lock);
cur_gen = fs_info->generation;
spin_unlock(&fs_info->trans_lock);
if (em->generation >= cur_gen)
goto next;
write_lock(&extent_tree->lock);
em = lookup_extent_mapping(extent_tree, start, len);
if (!em) {
write_unlock(&extent_tree->lock);
break;
}
if ((em->flags & EXTENT_FLAG_PINNED) || em->start != start) {
write_unlock(&extent_tree->lock);
free_extent_map(em);
break;
}
if (test_range_bit_exists(io_tree, em->start,
extent_map_end(em) - 1, EXTENT_LOCKED))
goto next;
/*
* If it's not in the list of modified extents, used by a fast
* fsync, we can remove it. If it's being logged we can safely
* remove it since fsync took an extra reference on the em.
*/
if (list_empty(&em->list) || (em->flags & EXTENT_FLAG_LOGGING))
goto remove_em;
/*
* If it's in the list of modified extents, remove it only if
* its generation is older then the current one, in which case
* we don't need it for a fast fsync. Otherwise don't remove it,
* we could be racing with an ongoing fast fsync that could miss
* the new extent.
*/
if (em->generation >= cur_gen)
goto next;
remove_em:
/*
* We only remove extent maps that are not in the list of
* modified extents or that are in the list but with a
* generation lower then the current generation, so there
* is no need to set the full fsync flag on the inode (it
* hurts the fsync performance for workloads with a data
* size that exceeds or is close to the system's memory).
*/
remove_extent_mapping(map, em);
/* once for the rb tree */
free_extent_map(em);
/*
* We only remove extent maps that are not in the list of
* modified extents or that are in the list but with a
* generation lower then the current generation, so there is no
* need to set the full fsync flag on the inode (it hurts the
* fsync performance for workloads with a data size that exceeds
* or is close to the system's memory).
*/
remove_extent_mapping(inode, em);
/* Once for the inode's extent map tree. */
free_extent_map(em);
next:
start = extent_map_end(em);
write_unlock(&map->lock);
start = extent_map_end(em);
write_unlock(&extent_tree->lock);
/* once for us */
free_extent_map(em);
/* Once for us, for the lookup_extent_mapping() reference. */
free_extent_map(em);
cond_resched(); /* Allow large-extent preemption. */
if (need_resched()) {
/*
* If we need to resched but we can't block just exit
* and leave any remaining extent maps.
*/
if (!gfpflags_allow_blocking(mask))
break;
cond_resched();
}
}
return try_release_extent_state(tree, page, mask);
return try_release_extent_state(io_tree, page, mask);
}
struct btrfs_fiemap_entry {
@@ -2773,13 +2800,19 @@ static int fiemap_next_leaf_item(struct btrfs_inode *inode, struct btrfs_path *p
goto out;
}
/* See the comment at fiemap_search_slot() about why we clone. */
copy_extent_buffer_full(clone, path->nodes[0]);
/*
* Important to preserve the start field, for the optimizations when
* checking if extents are shared (see extent_fiemap()).
*
* We must set ->start before calling copy_extent_buffer_full(). If we
* are on sub-pagesize blocksize, we use ->start to determine the offset
* into the folio where our eb exists, and if we update ->start after
* the fact then any subsequent reads of the eb may read from a
* different offset in the folio than where we originally copied into.
*/
clone->start = path->nodes[0]->start;
/* See the comment at fiemap_search_slot() about why we clone. */
copy_extent_buffer_full(clone, path->nodes[0]);
slot = path->slots[0];
btrfs_release_path(path);
@@ -4261,6 +4294,13 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb)
}
}
static void clear_extent_buffer_reading(struct extent_buffer *eb)
{
clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
smp_mb__after_atomic();
wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
}
static void end_bbio_meta_read(struct btrfs_bio *bbio)
{
struct extent_buffer *eb = bbio->private;
@@ -4269,6 +4309,13 @@ static void end_bbio_meta_read(struct btrfs_bio *bbio)
struct folio_iter fi;
u32 bio_offset = 0;
/*
* If the extent buffer is marked UPTODATE before the read operation
* completes, other calls to read_extent_buffer_pages() will return
* early without waiting for the read to finish, causing data races.
*/
WARN_ON(test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags));
eb->read_mirror = bbio->mirror_num;
if (uptodate &&
@@ -4295,9 +4342,7 @@ static void end_bbio_meta_read(struct btrfs_bio *bbio)
bio_offset += len;
}
clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
smp_mb__after_atomic();
wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
clear_extent_buffer_reading(eb);
free_extent_buffer(eb);
bio_put(&bbio->bio);
@@ -4331,9 +4376,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
* will now be set, and we shouldn't read it in again.
*/
if (unlikely(test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))) {
clear_bit(EXTENT_BUFFER_READING, &eb->bflags);
smp_mb__after_atomic();
wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING);
clear_extent_buffer_reading(eb);
return 0;
}

View File

@@ -27,6 +27,7 @@ struct address_space;
struct writeback_control;
struct extent_io_tree;
struct extent_map_tree;
struct extent_state;
struct btrfs_block_group;
struct btrfs_fs_info;
struct btrfs_inode;
@@ -230,18 +231,17 @@ static inline void extent_changeset_free(struct extent_changeset *changeset)
kfree(changeset);
}
int try_release_extent_mapping(struct page *page, gfp_t mask);
bool try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
int btrfs_read_folio(struct file *file, struct folio *folio);
void extent_write_locked_range(struct inode *inode, struct page *locked_page,
u64 start, u64 end, struct writeback_control *wbc,
bool pages_dirty);
int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc);
void extent_readahead(struct readahead_control *rac);
void btrfs_readahead(struct readahead_control *rac);
int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len);
int set_folio_extent_mapped(struct folio *folio);
@@ -353,6 +353,7 @@ void clear_extent_buffer_uptodate(struct extent_buffer *eb);
void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
struct page *locked_page,
struct extent_state **cached,
u32 bits_to_clear, unsigned long page_ops);
int extent_invalidate_folio(struct extent_io_tree *tree,
struct folio *folio, size_t offset);
@@ -361,6 +362,8 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
gfp_t extra_gfp);
int btrfs_alloc_folio_array(unsigned int nr_folios, struct folio **folio_array,
gfp_t extra_gfp);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
bool find_lock_delalloc_range(struct inode *inode,

File diff suppressed because it is too large Load Diff

View File

@@ -30,28 +30,77 @@ enum {
ENUM_BIT(EXTENT_FLAG_PREALLOC),
/* Logging this extent */
ENUM_BIT(EXTENT_FLAG_LOGGING),
/* Filling in a preallocated extent */
ENUM_BIT(EXTENT_FLAG_FILLING),
/* This em is merged from two or more physically adjacent ems */
ENUM_BIT(EXTENT_FLAG_MERGED),
};
/*
* This structure represents file extents and holes.
*
* Unlike on-disk file extent items, extent maps can be merged to save memory.
* This means members only match file extent items before any merging.
*
* Keep this structure as compact as possible, as we can have really large
* amounts of allocated extent maps at any time.
*/
struct extent_map {
struct rb_node rb_node;
/* all of these are in bytes */
/* All of these are in bytes. */
/* File offset matching the offset of a BTRFS_EXTENT_ITEM_KEY key. */
u64 start;
/*
* Length of the file extent.
*
* For non-inlined file extents it's btrfs_file_extent_item::num_bytes.
* For inline extents it's sectorsize, since inline data starts at
* offsetof(struct btrfs_file_extent_item, disk_bytenr) thus
* btrfs_file_extent_item::num_bytes is not valid.
*/
u64 len;
u64 mod_start;
u64 mod_len;
/*
* The file offset of the original file extent before splitting.
*
* This is an in-memory only member, matching
* extent_map::start - btrfs_file_extent_item::offset for
* regular/preallocated extents. EXTENT_MAP_HOLE otherwise.
*/
u64 orig_start;
/*
* The full on-disk extent length, matching
* btrfs_file_extent_item::disk_num_bytes.
*/
u64 orig_block_len;
/*
* The decompressed size of the whole on-disk extent, matching
* btrfs_file_extent_item::ram_bytes.
*/
u64 ram_bytes;
/*
* The on-disk logical bytenr for the file extent.
*
* For compressed extents it matches btrfs_file_extent_item::disk_bytenr.
* For uncompressed extents it matches
* btrfs_file_extent_item::disk_bytenr + btrfs_file_extent_item::offset
*
* For holes it is EXTENT_MAP_HOLE and for inline extents it is
* EXTENT_MAP_INLINE.
*/
u64 block_start;
/*
* The on-disk length for the file extent.
*
* For compressed extents it matches btrfs_file_extent_item::disk_num_bytes.
* For uncompressed extents it matches extent_map::len.
* For holes and inline extents it's -1 and shouldn't be used.
*/
u64 block_len;
/*
@@ -124,7 +173,7 @@ static inline u64 extent_map_end(const struct extent_map *em)
void extent_map_tree_init(struct extent_map_tree *tree);
struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len);
void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
void remove_extent_mapping(struct btrfs_inode *inode, struct extent_map *em);
int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
u64 new_logical);
@@ -133,11 +182,10 @@ void free_extent_map(struct extent_map *em);
int __init extent_map_init(void);
void __cold extent_map_exit(void);
int unpin_extent_cache(struct btrfs_inode *inode, u64 start, u64 len, u64 gen);
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
void clear_em_logging(struct btrfs_inode *inode, struct extent_map *em);
struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 len);
int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
struct extent_map_tree *em_tree,
int btrfs_add_extent_mapping(struct btrfs_inode *inode,
struct extent_map **em_in, u64 start, u64 len);
void btrfs_drop_extent_map_range(struct btrfs_inode *inode,
u64 start, u64 end,
@@ -145,5 +193,6 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode,
int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
struct extent_map *new_em,
bool modified);
long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan);
#endif

View File

@@ -430,8 +430,7 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
memset(csum_dst, 0, csum_size);
count = 1;
if (inode->root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
if (btrfs_root_id(inode->root) == BTRFS_DATA_RELOC_TREE_OBJECTID) {
u64 file_offset = bbio->file_offset + bio_offset;
set_extent_bit(&inode->io_tree, file_offset,
@@ -450,9 +449,22 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
return ret;
}
/*
* Search for checksums for a given logical range.
*
* @root: The root where to look for checksums.
* @start: Logical address of target checksum range.
* @end: End offset (inclusive) of the target checksum range.
* @list: List for adding each checksum that was found.
* Can be NULL in case the caller only wants to check if
* there any checksums for the range.
* @nowait: Indicate if the search must be non-blocking or not.
*
* Return < 0 on error, 0 if no checksums were found, or 1 if checksums were
* found.
*/
int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit,
bool nowait)
struct list_head *list, bool nowait)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key key;
@@ -460,8 +472,8 @@ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
struct extent_buffer *leaf;
struct btrfs_ordered_sum *sums;
struct btrfs_csum_item *item;
LIST_HEAD(tmplist);
int ret;
bool found_csums = false;
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
IS_ALIGNED(end + 1, fs_info->sectorsize));
@@ -471,11 +483,6 @@ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
return -ENOMEM;
path->nowait = nowait;
if (search_commit) {
path->skip_locking = 1;
path->reada = READA_FORWARD;
path->search_commit_root = 1;
}
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
key.offset = start;
@@ -483,7 +490,7 @@ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto fail;
goto out;
if (ret > 0 && path->slots[0] > 0) {
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
@@ -518,7 +525,7 @@ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto fail;
goto out;
if (ret > 0)
break;
leaf = path->nodes[0];
@@ -540,6 +547,10 @@ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
continue;
}
found_csums = true;
if (!list)
goto out;
csum_end = min(csum_end, end + 1);
item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_csum_item);
@@ -553,7 +564,7 @@ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
GFP_NOFS);
if (!sums) {
ret = -ENOMEM;
goto fail;
goto out;
}
sums->logical = start;
@@ -567,21 +578,24 @@ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
bytes_to_csum_size(fs_info, size));
start += size;
list_add_tail(&sums->list, &tmplist);
list_add_tail(&sums->list, list);
}
path->slots[0]++;
}
ret = 0;
fail:
while (ret < 0 && !list_empty(&tmplist)) {
sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list);
list_del(&sums->list);
kfree(sums);
}
list_splice_tail(&tmplist, list);
out:
btrfs_free_path(path);
return ret;
if (ret < 0) {
if (list) {
struct btrfs_ordered_sum *tmp_sums;
list_for_each_entry_safe(sums, tmp_sums, list, list)
kfree(sums);
}
return ret;
}
return found_csums ? 1 : 0;
}
/*
@@ -870,8 +884,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
const u32 csum_size = fs_info->csum_size;
u32 blocksize_bits = fs_info->sectorsize_bits;
ASSERT(root->root_key.objectid == BTRFS_CSUM_TREE_OBJECTID ||
root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
ASSERT(btrfs_root_id(root) == BTRFS_CSUM_TREE_OBJECTID ||
btrfs_root_id(root) == BTRFS_TREE_LOG_OBJECTID);
path = btrfs_alloc_path();
if (!path)
@@ -1171,7 +1185,7 @@ extend_csum:
* search, etc, because log trees are temporary anyway and it
* would only save a few bytes of leaf space.
*/
if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
if (btrfs_root_id(root) == BTRFS_TREE_LOG_OBJECTID) {
if (path->slots[0] + 1 >=
btrfs_header_nritems(path->nodes[0])) {
ret = find_next_csum_offset(root, path, &next_offset);
@@ -1265,20 +1279,19 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
struct extent_buffer *leaf = path->nodes[0];
const int slot = path->slots[0];
struct btrfs_key key;
u64 extent_start, extent_end;
u64 extent_start;
u64 bytenr;
u8 type = btrfs_file_extent_type(leaf, fi);
int compress_type = btrfs_file_extent_compression(leaf, fi);
btrfs_item_key_to_cpu(leaf, &key, slot);
extent_start = key.offset;
extent_end = btrfs_file_extent_end(path);
em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
em->generation = btrfs_file_extent_generation(leaf, fi);
if (type == BTRFS_FILE_EXTENT_REG ||
type == BTRFS_FILE_EXTENT_PREALLOC) {
em->start = extent_start;
em->len = extent_end - extent_start;
em->len = btrfs_file_extent_end(path) - extent_start;
em->orig_start = extent_start -
btrfs_file_extent_offset(leaf, fi);
em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
@@ -1299,9 +1312,12 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
em->flags |= EXTENT_FLAG_PREALLOC;
}
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
/* Tree-checker has ensured this. */
ASSERT(extent_start == 0);
em->block_start = EXTENT_MAP_INLINE;
em->start = extent_start;
em->len = extent_end - extent_start;
em->start = 0;
em->len = fs_info->sectorsize;
/*
* Initialize orig_start and block_len with the same values
* as in inode.c:btrfs_get_extent().
@@ -1313,7 +1329,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
btrfs_err(fs_info,
"unknown file extent item type %d, inode %llu, offset %llu, "
"root %llu", type, btrfs_ino(inode), extent_start,
root->root_key.objectid);
btrfs_root_id(root));
}
}
@@ -1334,12 +1350,10 @@ u64 btrfs_file_extent_end(const struct btrfs_path *path)
ASSERT(key.type == BTRFS_EXTENT_DATA_KEY);
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
end = btrfs_file_extent_ram_bytes(leaf, fi);
end = ALIGN(key.offset + end, leaf->fs_info->sectorsize);
} else {
if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE)
end = leaf->fs_info->sectorsize;
else
end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
}
return end;
}

Some files were not shown because too many files have changed in this diff Show More