mirror of
https://github.com/armbian/linux-cix.git
synced 2026-01-06 12:30:45 -08:00
Merge tag 'for-6.1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"There's a bunch of performance improvements, most notably the FIEMAP
speedup, the new block group tree to speed up mount on large
filesystems, more io_uring integration, some sysfs exports and the
usual fixes and core updates.
Summary:
Performance:
- outstanding FIEMAP speed improvement
- algorithmic change how extents are enumerated leads to orders of
magnitude speed boost (uncached and cached)
- extent sharing check speedup (2.2x uncached, 3x cached)
- add more cancellation points, allowing to interrupt seeking in
files with large number of extents
- more efficient hole and data seeking (4x uncached, 1.3x cached)
- sample results:
256M, 32K extents: 4s -> 29ms (~150x)
512M, 64K extents: 30s -> 59ms (~550x)
1G, 128K extents: 225s -> 120ms (~1800x)
- improved inode logging, especially for directories (on dbench
workload throughput +25%, max latency -21%)
- improved buffered IO, remove redundant extent state tracking,
lowering memory consumption and avoiding rb tree traversal
- add sysfs tunable to let qgroup temporarily skip exact accounting
when deleting snapshot, leading to a speedup but requiring a rescan
after that, will be used by snapper
- support io_uring and buffered writes, until now it was just for
direct IO, with the no-wait semantics implemented in the buffered
write path it now works and leads to speed improvement in IOPS
(2x), throughput (2.2x), latency (depends, 2x to 150x)
- small performance improvements when dropping and searching for
extent maps as well as when flushing delalloc in COW mode
(throughput +5MB/s)
User visible changes:
- new incompatible feature block-group-tree adding a dedicated tree
for tracking block groups, this allows a much faster load during
mount and avoids seeking unlike when it's scattered in the extent
tree items
- this reduces mount time for many-terabyte sized filesystems
- conversion tool will be provided so existing filesystem can also
be updated in place
- to reduce test matrix and feature combinations requires no-holes
and free-space-tree (mkfs defaults since 5.15)
- improved reporting of super block corruption detected by scrub
- scrub also tries to repair super block and does not wait until next
commit
- discard stats and tunables are exported in sysfs
(/sys/fs/btrfs/FSID/discard)
- qgroup status is exported in sysfs
(/sys/sys/fs/btrfs/FSID/qgroups/)
- verify that super block was not modified when thawing filesystem
Fixes:
- FIEMAP fixes
- fix extent sharing status, does not depend on the cached status
where merged
- flush delalloc so compressed extents are reported correctly
- fix alignment of VMA for memory mapped files on THP
- send: fix failures when processing inodes with no links (orphan
files and directories)
- fix race between quota enable and quota rescan ioctl
- handle more corner cases for read-only compat feature verification
- fix missed extent on fsync after dropping extent maps
Core:
- lockdep annotations to validate various transactions states and
state transitions
- preliminary support for fs-verity in send
- more effective memory use in scrub for subpage where sector is
smaller than page
- block group caching progress logic has been removed, load is now
synchronous
- simplify end IO callbacks and bio handling, use chained bios
instead of own tracking
- add no-wait semantics to several functions (tree search, nocow,
flushing, buffered write
- cleanups and refactoring
MM changes:
- export balance_dirty_pages_ratelimited_flags"
* tag 'for-6.1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (177 commits)
btrfs: set generation before calling btrfs_clean_tree_block in btrfs_init_new_buffer
btrfs: drop extent map range more efficiently
btrfs: avoid pointless extent map tree search when flushing delalloc
btrfs: remove unnecessary next extent map search
btrfs: remove unnecessary NULL pointer checks when searching extent maps
btrfs: assert tree is locked when clearing extent map from logging
btrfs: remove unnecessary extent map initializations
btrfs: remove the refcount warning/check at free_extent_map()
btrfs: add helper to replace extent map range with a new extent map
btrfs: move open coded extent map tree deletion out of inode eviction
btrfs: use cond_resched_rwlock_write() during inode eviction
btrfs: use extent_map_end() at btrfs_drop_extent_map_range()
btrfs: move btrfs_drop_extent_cache() to extent_map.c
btrfs: fix missed extent on fsync after dropping extent maps
btrfs: remove stale prototype of btrfs_write_inode
btrfs: enable nowait async buffered writes
btrfs: assert nowait mode is not used for some btree search functions
btrfs: make btrfs_buffered_write nowait compatible
btrfs: plumb NOWAIT through the write path
btrfs: make lock_and_cleanup_extent_if_need nowait compatible
...
This commit is contained in:
@@ -31,7 +31,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
||||
backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
|
||||
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
|
||||
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
|
||||
subpage.o tree-mod-log.o
|
||||
subpage.o tree-mod-log.o extent-io-tree.o
|
||||
|
||||
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
||||
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
|
||||
|
||||
@@ -1511,16 +1511,118 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an extent is shared or not
|
||||
/*
|
||||
* The caller has joined a transaction or is holding a read lock on the
|
||||
* fs_info->commit_root_sem semaphore, so no need to worry about the root's last
|
||||
* snapshot field changing while updating or checking the cache.
|
||||
*/
|
||||
static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache,
|
||||
struct btrfs_root *root,
|
||||
u64 bytenr, int level, bool *is_shared)
|
||||
{
|
||||
struct btrfs_backref_shared_cache_entry *entry;
|
||||
|
||||
if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Level -1 is used for the data extent, which is not reliable to cache
|
||||
* because its reference count can increase or decrease without us
|
||||
* realizing. We cache results only for extent buffers that lead from
|
||||
* the root node down to the leaf with the file extent item.
|
||||
*/
|
||||
ASSERT(level >= 0);
|
||||
|
||||
entry = &cache->entries[level];
|
||||
|
||||
/* Unused cache entry or being used for some other extent buffer. */
|
||||
if (entry->bytenr != bytenr)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* We cached a false result, but the last snapshot generation of the
|
||||
* root changed, so we now have a snapshot. Don't trust the result.
|
||||
*/
|
||||
if (!entry->is_shared &&
|
||||
entry->gen != btrfs_root_last_snapshot(&root->root_item))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If we cached a true result and the last generation used for dropping
|
||||
* a root changed, we can not trust the result, because the dropped root
|
||||
* could be a snapshot sharing this extent buffer.
|
||||
*/
|
||||
if (entry->is_shared &&
|
||||
entry->gen != btrfs_get_last_root_drop_gen(root->fs_info))
|
||||
return false;
|
||||
|
||||
*is_shared = entry->is_shared;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller has joined a transaction or is holding a read lock on the
|
||||
* fs_info->commit_root_sem semaphore, so no need to worry about the root's last
|
||||
* snapshot field changing while updating or checking the cache.
|
||||
*/
|
||||
static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache,
|
||||
struct btrfs_root *root,
|
||||
u64 bytenr, int level, bool is_shared)
|
||||
{
|
||||
struct btrfs_backref_shared_cache_entry *entry;
|
||||
u64 gen;
|
||||
|
||||
if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Level -1 is used for the data extent, which is not reliable to cache
|
||||
* because its reference count can increase or decrease without us
|
||||
* realizing. We cache results only for extent buffers that lead from
|
||||
* the root node down to the leaf with the file extent item.
|
||||
*/
|
||||
ASSERT(level >= 0);
|
||||
|
||||
if (is_shared)
|
||||
gen = btrfs_get_last_root_drop_gen(root->fs_info);
|
||||
else
|
||||
gen = btrfs_root_last_snapshot(&root->root_item);
|
||||
|
||||
entry = &cache->entries[level];
|
||||
entry->bytenr = bytenr;
|
||||
entry->is_shared = is_shared;
|
||||
entry->gen = gen;
|
||||
|
||||
/*
|
||||
* If we found an extent buffer is shared, set the cache result for all
|
||||
* extent buffers below it to true. As nodes in the path are COWed,
|
||||
* their sharedness is moved to their children, and if a leaf is COWed,
|
||||
* then the sharedness of a data extent becomes direct, the refcount of
|
||||
* data extent is increased in the extent item at the extent tree.
|
||||
*/
|
||||
if (is_shared) {
|
||||
for (int i = 0; i < level; i++) {
|
||||
entry = &cache->entries[i];
|
||||
entry->is_shared = is_shared;
|
||||
entry->gen = gen;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if a data extent is shared or not.
|
||||
*
|
||||
* @root: root inode belongs to
|
||||
* @inum: inode number of the inode whose extent we are checking
|
||||
* @bytenr: logical bytenr of the extent we are checking
|
||||
* @roots: list of roots this extent is shared among
|
||||
* @tmp: temporary list used for iteration
|
||||
* @root: The root the inode belongs to.
|
||||
* @inum: Number of the inode whose extent we are checking.
|
||||
* @bytenr: Logical bytenr of the extent we are checking.
|
||||
* @extent_gen: Generation of the extent (file extent item) or 0 if it is
|
||||
* not known.
|
||||
* @roots: List of roots this extent is shared among.
|
||||
* @tmp: Temporary list used for iteration.
|
||||
* @cache: A backref lookup result cache.
|
||||
*
|
||||
* btrfs_check_shared uses the backref walking code but will short
|
||||
* btrfs_is_data_extent_shared uses the backref walking code but will short
|
||||
* circuit as soon as it finds a root or inode that doesn't match the
|
||||
* one passed in. This provides a significant performance benefit for
|
||||
* callers (such as fiemap) which want to know whether the extent is
|
||||
@@ -1531,8 +1633,10 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
|
||||
*
|
||||
* Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
|
||||
*/
|
||||
int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
||||
struct ulist *roots, struct ulist *tmp)
|
||||
int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
||||
u64 extent_gen,
|
||||
struct ulist *roots, struct ulist *tmp,
|
||||
struct btrfs_backref_shared_cache *cache)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_trans_handle *trans;
|
||||
@@ -1545,6 +1649,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
||||
.inum = inum,
|
||||
.share_count = 0,
|
||||
};
|
||||
int level;
|
||||
|
||||
ulist_init(roots);
|
||||
ulist_init(tmp);
|
||||
@@ -1561,22 +1666,52 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
||||
btrfs_get_tree_mod_seq(fs_info, &elem);
|
||||
}
|
||||
|
||||
/* -1 means we are in the bytenr of the data extent. */
|
||||
level = -1;
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
while (1) {
|
||||
bool is_shared;
|
||||
bool cached;
|
||||
|
||||
ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
|
||||
roots, NULL, &shared, false);
|
||||
if (ret == BACKREF_FOUND_SHARED) {
|
||||
/* this is the only condition under which we return 1 */
|
||||
ret = 1;
|
||||
if (level >= 0)
|
||||
store_backref_shared_cache(cache, root, bytenr,
|
||||
level, true);
|
||||
break;
|
||||
}
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
break;
|
||||
ret = 0;
|
||||
/*
|
||||
* If our data extent is not shared through reflinks and it was
|
||||
* created in a generation after the last one used to create a
|
||||
* snapshot of the inode's root, then it can not be shared
|
||||
* indirectly through subtrees, as that can only happen with
|
||||
* snapshots. In this case bail out, no need to check for the
|
||||
* sharedness of extent buffers.
|
||||
*/
|
||||
if (level == -1 &&
|
||||
extent_gen > btrfs_root_last_snapshot(&root->root_item))
|
||||
break;
|
||||
|
||||
if (level >= 0)
|
||||
store_backref_shared_cache(cache, root, bytenr,
|
||||
level, false);
|
||||
node = ulist_next(tmp, &uiter);
|
||||
if (!node)
|
||||
break;
|
||||
bytenr = node->val;
|
||||
level++;
|
||||
cached = lookup_backref_shared_cache(cache, root, bytenr, level,
|
||||
&is_shared);
|
||||
if (cached) {
|
||||
ret = (is_shared ? 1 : 0);
|
||||
break;
|
||||
}
|
||||
shared.share_count = 0;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
@@ -17,6 +17,20 @@ struct inode_fs_paths {
|
||||
struct btrfs_data_container *fspath;
|
||||
};
|
||||
|
||||
struct btrfs_backref_shared_cache_entry {
|
||||
u64 bytenr;
|
||||
u64 gen;
|
||||
bool is_shared;
|
||||
};
|
||||
|
||||
struct btrfs_backref_shared_cache {
|
||||
/*
|
||||
* A path from a root to a leaf that has a file extent item pointing to
|
||||
* a given data extent should never exceed the maximum b+tree height.
|
||||
*/
|
||||
struct btrfs_backref_shared_cache_entry entries[BTRFS_MAX_LEVEL];
|
||||
};
|
||||
|
||||
typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
|
||||
void *ctx);
|
||||
|
||||
@@ -62,8 +76,10 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
|
||||
u64 start_off, struct btrfs_path *path,
|
||||
struct btrfs_inode_extref **ret_extref,
|
||||
u64 *found_off);
|
||||
int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
||||
struct ulist *roots, struct ulist *tmp_ulist);
|
||||
int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
||||
u64 extent_gen,
|
||||
struct ulist *roots, struct ulist *tmp,
|
||||
struct btrfs_backref_shared_cache *cache);
|
||||
|
||||
int __init btrfs_prelim_ref_init(void);
|
||||
void __cold btrfs_prelim_ref_exit(void);
|
||||
|
||||
@@ -593,8 +593,6 @@ next:
|
||||
|
||||
if (need_resched() ||
|
||||
rwsem_is_contended(&fs_info->commit_root_sem)) {
|
||||
if (wakeup)
|
||||
caching_ctl->progress = last;
|
||||
btrfs_release_path(path);
|
||||
up_read(&fs_info->commit_root_sem);
|
||||
mutex_unlock(&caching_ctl->mutex);
|
||||
@@ -618,9 +616,6 @@ next:
|
||||
key.objectid = last;
|
||||
key.offset = 0;
|
||||
key.type = BTRFS_EXTENT_ITEM_KEY;
|
||||
|
||||
if (wakeup)
|
||||
caching_ctl->progress = last;
|
||||
btrfs_release_path(path);
|
||||
goto next;
|
||||
}
|
||||
@@ -655,7 +650,6 @@ next:
|
||||
|
||||
total_found += add_new_free_space(block_group, last,
|
||||
block_group->start + block_group->length);
|
||||
caching_ctl->progress = (u64)-1;
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
@@ -725,8 +719,6 @@ done:
|
||||
}
|
||||
#endif
|
||||
|
||||
caching_ctl->progress = (u64)-1;
|
||||
|
||||
up_read(&fs_info->commit_root_sem);
|
||||
btrfs_free_excluded_extents(block_group);
|
||||
mutex_unlock(&caching_ctl->mutex);
|
||||
@@ -755,7 +747,6 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
|
||||
mutex_init(&caching_ctl->mutex);
|
||||
init_waitqueue_head(&caching_ctl->wait);
|
||||
caching_ctl->block_group = cache;
|
||||
caching_ctl->progress = cache->start;
|
||||
refcount_set(&caching_ctl->count, 2);
|
||||
btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
|
||||
|
||||
@@ -772,7 +763,6 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
|
||||
WARN_ON(cache->caching_ctl);
|
||||
cache->caching_ctl = caching_ctl;
|
||||
cache->cached = BTRFS_CACHE_STARTED;
|
||||
cache->has_caching_ctl = 1;
|
||||
spin_unlock(&cache->lock);
|
||||
|
||||
write_lock(&fs_info->block_group_cache_lock);
|
||||
@@ -784,8 +774,10 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
|
||||
|
||||
btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
|
||||
out:
|
||||
/* REVIEW */
|
||||
if (wait && caching_ctl)
|
||||
ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
|
||||
/* wait_event(caching_ctl->wait, space_cache_v1_done(cache)); */
|
||||
if (caching_ctl)
|
||||
btrfs_put_caching_control(caching_ctl);
|
||||
|
||||
@@ -988,32 +980,31 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
kobject_put(kobj);
|
||||
}
|
||||
|
||||
if (block_group->has_caching_ctl)
|
||||
caching_ctl = btrfs_get_caching_control(block_group);
|
||||
if (block_group->cached == BTRFS_CACHE_STARTED)
|
||||
btrfs_wait_block_group_cache_done(block_group);
|
||||
if (block_group->has_caching_ctl) {
|
||||
write_lock(&fs_info->block_group_cache_lock);
|
||||
if (!caching_ctl) {
|
||||
struct btrfs_caching_control *ctl;
|
||||
|
||||
list_for_each_entry(ctl,
|
||||
&fs_info->caching_block_groups, list)
|
||||
if (ctl->block_group == block_group) {
|
||||
caching_ctl = ctl;
|
||||
refcount_inc(&caching_ctl->count);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (caching_ctl)
|
||||
list_del_init(&caching_ctl->list);
|
||||
write_unlock(&fs_info->block_group_cache_lock);
|
||||
if (caching_ctl) {
|
||||
/* Once for the caching bgs list and once for us. */
|
||||
btrfs_put_caching_control(caching_ctl);
|
||||
btrfs_put_caching_control(caching_ctl);
|
||||
write_lock(&fs_info->block_group_cache_lock);
|
||||
caching_ctl = btrfs_get_caching_control(block_group);
|
||||
if (!caching_ctl) {
|
||||
struct btrfs_caching_control *ctl;
|
||||
|
||||
list_for_each_entry(ctl, &fs_info->caching_block_groups, list) {
|
||||
if (ctl->block_group == block_group) {
|
||||
caching_ctl = ctl;
|
||||
refcount_inc(&caching_ctl->count);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (caching_ctl)
|
||||
list_del_init(&caching_ctl->list);
|
||||
write_unlock(&fs_info->block_group_cache_lock);
|
||||
|
||||
if (caching_ctl) {
|
||||
/* Once for the caching bgs list and once for us. */
|
||||
btrfs_put_caching_control(caching_ctl);
|
||||
btrfs_put_caching_control(caching_ctl);
|
||||
}
|
||||
|
||||
spin_lock(&trans->transaction->dirty_bgs_lock);
|
||||
WARN_ON(!list_empty(&block_group->dirty_list));
|
||||
@@ -1034,12 +1025,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
< block_group->zone_unusable);
|
||||
WARN_ON(block_group->space_info->disk_total
|
||||
< block_group->length * factor);
|
||||
WARN_ON(block_group->zone_is_active &&
|
||||
WARN_ON(test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
|
||||
&block_group->runtime_flags) &&
|
||||
block_group->space_info->active_total_bytes
|
||||
< block_group->length);
|
||||
}
|
||||
block_group->space_info->total_bytes -= block_group->length;
|
||||
if (block_group->zone_is_active)
|
||||
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
|
||||
block_group->space_info->active_total_bytes -= block_group->length;
|
||||
block_group->space_info->bytes_readonly -=
|
||||
(block_group->length - block_group->zone_unusable);
|
||||
@@ -1069,7 +1061,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
goto out;
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
block_group->removed = 1;
|
||||
set_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags);
|
||||
|
||||
/*
|
||||
* At this point trimming or scrub can't start on this block group,
|
||||
* because we removed the block group from the rbtree
|
||||
@@ -1304,6 +1297,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
||||
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
|
||||
return;
|
||||
|
||||
if (btrfs_fs_closing(fs_info))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Long running balances can keep us blocked here for eternity, so
|
||||
* simply skip deletion if we're unable to get the mutex.
|
||||
@@ -1543,6 +1539,9 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
|
||||
return;
|
||||
|
||||
if (btrfs_fs_closing(fs_info))
|
||||
return;
|
||||
|
||||
if (!btrfs_should_reclaim(fs_info))
|
||||
return;
|
||||
|
||||
@@ -1890,16 +1889,6 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void link_block_group(struct btrfs_block_group *cache)
|
||||
{
|
||||
struct btrfs_space_info *space_info = cache->space_info;
|
||||
int index = btrfs_bg_flags_to_raid_index(cache->flags);
|
||||
|
||||
down_write(&space_info->groups_sem);
|
||||
list_add_tail(&cache->list, &space_info->block_groups[index]);
|
||||
up_write(&space_info->groups_sem);
|
||||
}
|
||||
|
||||
static struct btrfs_block_group *btrfs_create_block_group_cache(
|
||||
struct btrfs_fs_info *fs_info, u64 start)
|
||||
{
|
||||
@@ -1937,7 +1926,8 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
|
||||
btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
|
||||
atomic_set(&cache->frozen, 0);
|
||||
mutex_init(&cache->free_space_lock);
|
||||
btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
|
||||
cache->full_stripe_locks_root.root = RB_ROOT;
|
||||
mutex_init(&cache->full_stripe_locks_root.lock);
|
||||
|
||||
return cache;
|
||||
}
|
||||
@@ -2002,7 +1992,6 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
||||
int need_clear)
|
||||
{
|
||||
struct btrfs_block_group *cache;
|
||||
struct btrfs_space_info *space_info;
|
||||
const bool mixed = btrfs_fs_incompat(info, MIXED_GROUPS);
|
||||
int ret;
|
||||
|
||||
@@ -2078,11 +2067,9 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
||||
/* Should not have any excluded extents. Just in case, though. */
|
||||
btrfs_free_excluded_extents(cache);
|
||||
} else if (cache->length == cache->used) {
|
||||
cache->last_byte_to_unpin = (u64)-1;
|
||||
cache->cached = BTRFS_CACHE_FINISHED;
|
||||
btrfs_free_excluded_extents(cache);
|
||||
} else if (cache->used == 0) {
|
||||
cache->last_byte_to_unpin = (u64)-1;
|
||||
cache->cached = BTRFS_CACHE_FINISHED;
|
||||
add_new_free_space(cache, cache->start,
|
||||
cache->start + cache->length);
|
||||
@@ -2095,14 +2082,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
||||
goto error;
|
||||
}
|
||||
trace_btrfs_add_block_group(info, cache, 0);
|
||||
btrfs_update_space_info(info, cache->flags, cache->length,
|
||||
cache->used, cache->bytes_super,
|
||||
cache->zone_unusable, cache->zone_is_active,
|
||||
&space_info);
|
||||
|
||||
cache->space_info = space_info;
|
||||
|
||||
link_block_group(cache);
|
||||
btrfs_add_bg_to_space_info(info, cache);
|
||||
|
||||
set_avail_alloc_bits(info, cache->flags);
|
||||
if (btrfs_chunk_writeable(info, cache->start)) {
|
||||
@@ -2126,7 +2106,6 @@ error:
|
||||
static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
|
||||
struct btrfs_space_info *space_info;
|
||||
struct rb_node *node;
|
||||
int ret = 0;
|
||||
|
||||
@@ -2146,7 +2125,6 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
|
||||
/* Fill dummy cache as FULL */
|
||||
bg->length = em->len;
|
||||
bg->flags = map->type;
|
||||
bg->last_byte_to_unpin = (u64)-1;
|
||||
bg->cached = BTRFS_CACHE_FINISHED;
|
||||
bg->used = em->len;
|
||||
bg->flags = map->type;
|
||||
@@ -2167,10 +2145,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
|
||||
break;
|
||||
}
|
||||
|
||||
btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
|
||||
0, 0, false, &space_info);
|
||||
bg->space_info = space_info;
|
||||
link_block_group(bg);
|
||||
btrfs_add_bg_to_space_info(fs_info, bg);
|
||||
|
||||
set_avail_alloc_bits(fs_info, bg->flags);
|
||||
}
|
||||
@@ -2190,7 +2165,16 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
|
||||
int need_clear = 0;
|
||||
u64 cache_gen;
|
||||
|
||||
if (!root)
|
||||
/*
|
||||
* Either no extent root (with ibadroots rescue option) or we have
|
||||
* unsupported RO options. The fs can never be mounted read-write, so no
|
||||
* need to waste time searching block group items.
|
||||
*
|
||||
* This also allows new extent tree related changes to be RO compat,
|
||||
* no need for a full incompat flag.
|
||||
*/
|
||||
if (!root || (btrfs_super_compat_ro_flags(info->super_copy) &
|
||||
~BTRFS_FEATURE_COMPAT_RO_SUPP))
|
||||
return fill_dummy_bgs(info);
|
||||
|
||||
key.objectid = 0;
|
||||
@@ -2425,7 +2409,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
|
||||
ret = insert_block_group_item(trans, block_group);
|
||||
if (ret)
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
if (!block_group->chunk_item_inserted) {
|
||||
if (!test_bit(BLOCK_GROUP_FLAG_CHUNK_ITEM_INSERTED,
|
||||
&block_group->runtime_flags)) {
|
||||
mutex_lock(&fs_info->chunk_mutex);
|
||||
ret = btrfs_chunk_alloc_add_chunk_item(trans, block_group);
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
@@ -2494,7 +2479,6 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
|
||||
set_free_space_tree_thresholds(cache);
|
||||
cache->used = bytes_used;
|
||||
cache->flags = type;
|
||||
cache->last_byte_to_unpin = (u64)-1;
|
||||
cache->cached = BTRFS_CACHE_FINISHED;
|
||||
cache->global_root_id = calculate_global_root_id(fs_info, cache->start);
|
||||
|
||||
@@ -2519,14 +2503,6 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
|
||||
|
||||
btrfs_free_excluded_extents(cache);
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
if (btrfs_should_fragment_free_space(cache)) {
|
||||
u64 new_bytes_used = size - bytes_used;
|
||||
|
||||
bytes_used += new_bytes_used >> 1;
|
||||
fragment_free_space(cache);
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* Ensure the corresponding space_info object is created and
|
||||
* assigned to our block group. We want our bg to be added to the rbtree
|
||||
@@ -2547,12 +2523,17 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
|
||||
* the rbtree, update the space info's counters.
|
||||
*/
|
||||
trace_btrfs_add_block_group(fs_info, cache, 1);
|
||||
btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
|
||||
cache->bytes_super, cache->zone_unusable,
|
||||
cache->zone_is_active, &cache->space_info);
|
||||
btrfs_add_bg_to_space_info(fs_info, cache);
|
||||
btrfs_update_global_block_rsv(fs_info);
|
||||
|
||||
link_block_group(cache);
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
if (btrfs_should_fragment_free_space(cache)) {
|
||||
u64 new_bytes_used = size - bytes_used;
|
||||
|
||||
cache->space_info->bytes_used += new_bytes_used >> 1;
|
||||
fragment_free_space(cache);
|
||||
}
|
||||
#endif
|
||||
|
||||
list_add_tail(&cache->bg_list, &trans->new_bgs);
|
||||
trans->delayed_ref_updates++;
|
||||
@@ -2869,7 +2850,7 @@ again:
|
||||
cache_size *= fs_info->sectorsize;
|
||||
|
||||
ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, 0,
|
||||
cache_size);
|
||||
cache_size, false);
|
||||
if (ret)
|
||||
goto out_put;
|
||||
|
||||
@@ -3965,35 +3946,24 @@ void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
|
||||
void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
|
||||
{
|
||||
struct btrfs_block_group *block_group;
|
||||
u64 last = 0;
|
||||
|
||||
while (1) {
|
||||
struct inode *inode;
|
||||
block_group = btrfs_lookup_first_block_group(info, 0);
|
||||
while (block_group) {
|
||||
btrfs_wait_block_group_cache_done(block_group);
|
||||
spin_lock(&block_group->lock);
|
||||
if (test_and_clear_bit(BLOCK_GROUP_FLAG_IREF,
|
||||
&block_group->runtime_flags)) {
|
||||
struct inode *inode = block_group->inode;
|
||||
|
||||
block_group = btrfs_lookup_first_block_group(info, last);
|
||||
while (block_group) {
|
||||
btrfs_wait_block_group_cache_done(block_group);
|
||||
spin_lock(&block_group->lock);
|
||||
if (block_group->iref)
|
||||
break;
|
||||
block_group->inode = NULL;
|
||||
spin_unlock(&block_group->lock);
|
||||
block_group = btrfs_next_block_group(block_group);
|
||||
}
|
||||
if (!block_group) {
|
||||
if (last == 0)
|
||||
break;
|
||||
last = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
inode = block_group->inode;
|
||||
block_group->iref = 0;
|
||||
block_group->inode = NULL;
|
||||
spin_unlock(&block_group->lock);
|
||||
ASSERT(block_group->io_ctl.inode == NULL);
|
||||
iput(inode);
|
||||
last = block_group->start + block_group->length;
|
||||
btrfs_put_block_group(block_group);
|
||||
ASSERT(block_group->io_ctl.inode == NULL);
|
||||
iput(inode);
|
||||
} else {
|
||||
spin_unlock(&block_group->lock);
|
||||
}
|
||||
block_group = btrfs_next_block_group(block_group);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4129,7 +4099,7 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
cleanup = (atomic_dec_and_test(&block_group->frozen) &&
|
||||
block_group->removed);
|
||||
test_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags));
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
if (cleanup) {
|
||||
@@ -4150,7 +4120,7 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
|
||||
* tasks trimming this block group have left 1 entry each one.
|
||||
* Free them if any.
|
||||
*/
|
||||
__btrfs_remove_free_space_cache(block_group->free_space_ctl);
|
||||
btrfs_remove_free_space_cache(block_group);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -46,19 +46,44 @@ enum btrfs_chunk_alloc_enum {
|
||||
CHUNK_ALLOC_FORCE_FOR_EXTENT,
|
||||
};
|
||||
|
||||
/* Block group flags set at runtime */
|
||||
enum btrfs_block_group_flags {
|
||||
BLOCK_GROUP_FLAG_IREF,
|
||||
BLOCK_GROUP_FLAG_REMOVED,
|
||||
BLOCK_GROUP_FLAG_TO_COPY,
|
||||
BLOCK_GROUP_FLAG_RELOCATING_REPAIR,
|
||||
BLOCK_GROUP_FLAG_CHUNK_ITEM_INSERTED,
|
||||
BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
|
||||
BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
|
||||
};
|
||||
|
||||
enum btrfs_caching_type {
|
||||
BTRFS_CACHE_NO,
|
||||
BTRFS_CACHE_STARTED,
|
||||
BTRFS_CACHE_FINISHED,
|
||||
BTRFS_CACHE_ERROR,
|
||||
};
|
||||
|
||||
struct btrfs_caching_control {
|
||||
struct list_head list;
|
||||
struct mutex mutex;
|
||||
wait_queue_head_t wait;
|
||||
struct btrfs_work work;
|
||||
struct btrfs_block_group *block_group;
|
||||
u64 progress;
|
||||
refcount_t count;
|
||||
};
|
||||
|
||||
/* Once caching_thread() finds this much free space, it will wake up waiters. */
|
||||
#define CACHING_CTL_WAKE_UP SZ_2M
|
||||
|
||||
/*
|
||||
* Tree to record all locked full stripes of a RAID5/6 block group
|
||||
*/
|
||||
struct btrfs_full_stripe_locks_tree {
|
||||
struct rb_root root;
|
||||
struct mutex lock;
|
||||
};
|
||||
|
||||
struct btrfs_block_group {
|
||||
struct btrfs_fs_info *fs_info;
|
||||
struct inode *inode;
|
||||
@@ -95,23 +120,15 @@ struct btrfs_block_group {
|
||||
|
||||
/* For raid56, this is a full stripe, without parity */
|
||||
unsigned long full_stripe_len;
|
||||
unsigned long runtime_flags;
|
||||
|
||||
unsigned int ro;
|
||||
unsigned int iref:1;
|
||||
unsigned int has_caching_ctl:1;
|
||||
unsigned int removed:1;
|
||||
unsigned int to_copy:1;
|
||||
unsigned int relocating_repair:1;
|
||||
unsigned int chunk_item_inserted:1;
|
||||
unsigned int zone_is_active:1;
|
||||
unsigned int zoned_data_reloc_ongoing:1;
|
||||
|
||||
int disk_cache_state;
|
||||
|
||||
/* Cache tracking stuff */
|
||||
int cached;
|
||||
struct btrfs_caching_control *caching_ctl;
|
||||
u64 last_byte_to_unpin;
|
||||
|
||||
struct btrfs_space_info *space_info;
|
||||
|
||||
@@ -305,8 +322,6 @@ void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
|
||||
u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags);
|
||||
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
|
||||
int btrfs_free_block_groups(struct btrfs_fs_info *info);
|
||||
void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
|
||||
struct btrfs_caching_control *caching_ctl);
|
||||
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
|
||||
struct block_device *bdev, u64 physical, u64 **logical,
|
||||
int *naddrs, int *stripe_len);
|
||||
|
||||
@@ -286,7 +286,7 @@ u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
||||
*/
|
||||
if (block_rsv == delayed_rsv)
|
||||
target = global_rsv;
|
||||
else if (block_rsv != global_rsv && !delayed_rsv->full)
|
||||
else if (block_rsv != global_rsv && !btrfs_block_rsv_full(delayed_rsv))
|
||||
target = delayed_rsv;
|
||||
|
||||
if (target && block_rsv->space_info != target->space_info)
|
||||
@@ -424,6 +424,7 @@ void btrfs_init_root_block_rsv(struct btrfs_root *root)
|
||||
case BTRFS_CSUM_TREE_OBJECTID:
|
||||
case BTRFS_EXTENT_TREE_OBJECTID:
|
||||
case BTRFS_FREE_SPACE_TREE_OBJECTID:
|
||||
case BTRFS_BLOCK_GROUP_TREE_OBJECTID:
|
||||
root->block_rsv = &fs_info->delayed_refs_rsv;
|
||||
break;
|
||||
case BTRFS_ROOT_TREE_OBJECTID:
|
||||
|
||||
@@ -92,4 +92,13 @@ static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
btrfs_block_rsv_release(fs_info, block_rsv, 0, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fast path to check if the reserve is full, may be carefully used outside of
|
||||
* locks.
|
||||
*/
|
||||
static inline bool btrfs_block_rsv_full(const struct btrfs_block_rsv *rsv)
|
||||
{
|
||||
return data_race(rsv->full);
|
||||
}
|
||||
|
||||
#endif /* BTRFS_BLOCK_RSV_H */
|
||||
|
||||
@@ -65,6 +65,8 @@ enum {
|
||||
* on the same file.
|
||||
*/
|
||||
BTRFS_INODE_VERITY_IN_PROGRESS,
|
||||
/* Set when this inode is a free space inode. */
|
||||
BTRFS_INODE_FREE_SPACE_INODE,
|
||||
};
|
||||
|
||||
/* in memory btrfs inode */
|
||||
@@ -94,7 +96,8 @@ struct btrfs_inode {
|
||||
/* special utility tree used to record which mirrors have already been
|
||||
* tried when checksums fail for a given block
|
||||
*/
|
||||
struct extent_io_tree io_failure_tree;
|
||||
struct rb_root io_failure_tree;
|
||||
spinlock_t io_failure_lock;
|
||||
|
||||
/*
|
||||
* Keep track of where the inode has extent items mapped in order to
|
||||
@@ -250,11 +253,6 @@ struct btrfs_inode {
|
||||
struct inode vfs_inode;
|
||||
};
|
||||
|
||||
static inline u32 btrfs_inode_sectorsize(const struct btrfs_inode *inode)
|
||||
{
|
||||
return inode->root->fs_info->sectorsize;
|
||||
}
|
||||
|
||||
static inline struct btrfs_inode *BTRFS_I(const struct inode *inode)
|
||||
{
|
||||
return container_of(inode, struct btrfs_inode, vfs_inode);
|
||||
@@ -272,13 +270,6 @@ static inline unsigned long btrfs_inode_hash(u64 objectid,
|
||||
return (unsigned long)h;
|
||||
}
|
||||
|
||||
static inline void btrfs_insert_inode_hash(struct inode *inode)
|
||||
{
|
||||
unsigned long h = btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root);
|
||||
|
||||
__insert_inode_hash(inode, h);
|
||||
}
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
|
||||
/*
|
||||
@@ -312,13 +303,7 @@ static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size)
|
||||
|
||||
static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
|
||||
if (root == root->fs_info->tree_root &&
|
||||
btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
return test_bit(BTRFS_INODE_FREE_SPACE_INODE, &inode->runtime_flags);
|
||||
}
|
||||
|
||||
static inline bool is_data_inode(struct inode *inode)
|
||||
|
||||
@@ -152,9 +152,7 @@ static void finish_compressed_bio_read(struct compressed_bio *cb)
|
||||
}
|
||||
|
||||
/* Do io completion on the original bio */
|
||||
if (cb->status != BLK_STS_OK)
|
||||
cb->orig_bio->bi_status = cb->status;
|
||||
bio_endio(cb->orig_bio);
|
||||
btrfs_bio_end_io(btrfs_bio(cb->orig_bio), cb->status);
|
||||
|
||||
/* Finally free the cb struct */
|
||||
kfree(cb->compressed_pages);
|
||||
@@ -166,16 +164,15 @@ static void finish_compressed_bio_read(struct compressed_bio *cb)
|
||||
* before decompressing it into the original bio and freeing the uncompressed
|
||||
* pages.
|
||||
*/
|
||||
static void end_compressed_bio_read(struct bio *bio)
|
||||
static void end_compressed_bio_read(struct btrfs_bio *bbio)
|
||||
{
|
||||
struct compressed_bio *cb = bio->bi_private;
|
||||
struct compressed_bio *cb = bbio->private;
|
||||
struct inode *inode = cb->inode;
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_inode *bi = BTRFS_I(inode);
|
||||
bool csum = !(bi->flags & BTRFS_INODE_NODATASUM) &&
|
||||
!test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
|
||||
blk_status_t status = bio->bi_status;
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
blk_status_t status = bbio->bio.bi_status;
|
||||
struct bvec_iter iter;
|
||||
struct bio_vec bv;
|
||||
u32 offset;
|
||||
@@ -186,9 +183,8 @@ static void end_compressed_bio_read(struct bio *bio)
|
||||
if (!status &&
|
||||
(!csum || !btrfs_check_data_csum(inode, bbio, offset,
|
||||
bv.bv_page, bv.bv_offset))) {
|
||||
clean_io_failure(fs_info, &bi->io_failure_tree,
|
||||
&bi->io_tree, start, bv.bv_page,
|
||||
btrfs_ino(bi), bv.bv_offset);
|
||||
btrfs_clean_io_failure(bi, start, bv.bv_page,
|
||||
bv.bv_offset);
|
||||
} else {
|
||||
int ret;
|
||||
|
||||
@@ -209,7 +205,7 @@ static void end_compressed_bio_read(struct bio *bio)
|
||||
if (refcount_dec_and_test(&cb->pending_ios))
|
||||
finish_compressed_bio_read(cb);
|
||||
btrfs_bio_free_csum(bbio);
|
||||
bio_put(bio);
|
||||
bio_put(&bbio->bio);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -301,20 +297,20 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work)
|
||||
* This also calls the writeback end hooks for the file pages so that metadata
|
||||
* and checksums can be updated in the file.
|
||||
*/
|
||||
static void end_compressed_bio_write(struct bio *bio)
|
||||
static void end_compressed_bio_write(struct btrfs_bio *bbio)
|
||||
{
|
||||
struct compressed_bio *cb = bio->bi_private;
|
||||
struct compressed_bio *cb = bbio->private;
|
||||
|
||||
if (bio->bi_status)
|
||||
cb->status = bio->bi_status;
|
||||
if (bbio->bio.bi_status)
|
||||
cb->status = bbio->bio.bi_status;
|
||||
|
||||
if (refcount_dec_and_test(&cb->pending_ios)) {
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
|
||||
|
||||
btrfs_record_physical_zoned(cb->inode, cb->start, bio);
|
||||
btrfs_record_physical_zoned(cb->inode, cb->start, &bbio->bio);
|
||||
queue_work(fs_info->compressed_write_workers, &cb->write_end_work);
|
||||
}
|
||||
bio_put(bio);
|
||||
bio_put(&bbio->bio);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -335,7 +331,8 @@ static void end_compressed_bio_write(struct bio *bio)
|
||||
|
||||
|
||||
static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_bytenr,
|
||||
blk_opf_t opf, bio_end_io_t endio_func,
|
||||
blk_opf_t opf,
|
||||
btrfs_bio_end_io_t endio_func,
|
||||
u64 *next_stripe_start)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
|
||||
@@ -344,12 +341,8 @@ static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_byte
|
||||
struct bio *bio;
|
||||
int ret;
|
||||
|
||||
bio = btrfs_bio_alloc(BIO_MAX_VECS);
|
||||
|
||||
bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, endio_func, cb);
|
||||
bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
|
||||
bio->bi_opf = opf;
|
||||
bio->bi_private = cb;
|
||||
bio->bi_end_io = endio_func;
|
||||
|
||||
em = btrfs_get_chunk_map(fs_info, disk_bytenr, fs_info->sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
@@ -478,8 +471,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
||||
if (!skip_sum) {
|
||||
ret = btrfs_csum_one_bio(inode, bio, start, true);
|
||||
if (ret) {
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
btrfs_bio_end_io(btrfs_bio(bio), ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -596,7 +588,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
}
|
||||
|
||||
page_end = (pg_index << PAGE_SHIFT) + PAGE_SIZE - 1;
|
||||
lock_extent(tree, cur, page_end);
|
||||
lock_extent(tree, cur, page_end, NULL);
|
||||
read_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree, cur, page_end + 1 - cur);
|
||||
read_unlock(&em_tree->lock);
|
||||
@@ -610,7 +602,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
(cur + fs_info->sectorsize > extent_map_end(em)) ||
|
||||
(em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
|
||||
free_extent_map(em);
|
||||
unlock_extent(tree, cur, page_end);
|
||||
unlock_extent(tree, cur, page_end, NULL);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
break;
|
||||
@@ -630,7 +622,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
add_size = min(em->start + em->len, page_end + 1) - cur;
|
||||
ret = bio_add_page(cb->orig_bio, page, add_size, offset_in_page(cur));
|
||||
if (ret != add_size) {
|
||||
unlock_extent(tree, cur, page_end);
|
||||
unlock_extent(tree, cur, page_end, NULL);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
break;
|
||||
@@ -799,8 +791,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
|
||||
ret = btrfs_lookup_bio_sums(inode, comp_bio, NULL);
|
||||
if (ret) {
|
||||
comp_bio->bi_status = ret;
|
||||
bio_endio(comp_bio);
|
||||
btrfs_bio_end_io(btrfs_bio(comp_bio), ret);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -826,8 +817,7 @@ fail:
|
||||
kfree(cb);
|
||||
out:
|
||||
free_extent_map(em);
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
btrfs_bio_end_io(btrfs_bio(bio), ret);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -1447,6 +1447,11 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (p->nowait) {
|
||||
free_extent_buffer(tmp);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if (unlock_up)
|
||||
btrfs_unlock_up_safe(p, level + 1);
|
||||
|
||||
@@ -1467,6 +1472,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
||||
ret = -EAGAIN;
|
||||
|
||||
goto out;
|
||||
} else if (p->nowait) {
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if (unlock_up) {
|
||||
@@ -1634,7 +1641,13 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
|
||||
* We don't know the level of the root node until we actually
|
||||
* have it read locked
|
||||
*/
|
||||
b = btrfs_read_lock_root_node(root);
|
||||
if (p->nowait) {
|
||||
b = btrfs_try_read_lock_root_node(root);
|
||||
if (IS_ERR(b))
|
||||
return b;
|
||||
} else {
|
||||
b = btrfs_read_lock_root_node(root);
|
||||
}
|
||||
level = btrfs_header_level(b);
|
||||
if (level > write_lock_level)
|
||||
goto out;
|
||||
@@ -1910,6 +1923,13 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
WARN_ON(p->nodes[0] != NULL);
|
||||
BUG_ON(!cow && ins_len);
|
||||
|
||||
/*
|
||||
* For now only allow nowait for read only operations. There's no
|
||||
* strict reason why we can't, we just only need it for reads so it's
|
||||
* only implemented for reads.
|
||||
*/
|
||||
ASSERT(!p->nowait || !cow);
|
||||
|
||||
if (ins_len < 0) {
|
||||
lowest_unlock = 2;
|
||||
|
||||
@@ -1936,7 +1956,12 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
|
||||
if (p->need_commit_sem) {
|
||||
ASSERT(p->search_commit_root);
|
||||
down_read(&fs_info->commit_root_sem);
|
||||
if (p->nowait) {
|
||||
if (!down_read_trylock(&fs_info->commit_root_sem))
|
||||
return -EAGAIN;
|
||||
} else {
|
||||
down_read(&fs_info->commit_root_sem);
|
||||
}
|
||||
}
|
||||
|
||||
again:
|
||||
@@ -2082,7 +2107,15 @@ cow_done:
|
||||
btrfs_tree_lock(b);
|
||||
p->locks[level] = BTRFS_WRITE_LOCK;
|
||||
} else {
|
||||
btrfs_tree_read_lock(b);
|
||||
if (p->nowait) {
|
||||
if (!btrfs_try_tree_read_lock(b)) {
|
||||
free_extent_buffer(b);
|
||||
ret = -EAGAIN;
|
||||
goto done;
|
||||
}
|
||||
} else {
|
||||
btrfs_tree_read_lock(b);
|
||||
}
|
||||
p->locks[level] = BTRFS_READ_LOCK;
|
||||
}
|
||||
p->nodes[level] = b;
|
||||
@@ -2131,6 +2164,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
|
||||
|
||||
lowest_level = p->lowest_level;
|
||||
WARN_ON(p->nodes[0] != NULL);
|
||||
ASSERT(!p->nowait);
|
||||
|
||||
if (p->search_commit_root) {
|
||||
BUG_ON(time_seq);
|
||||
@@ -4432,6 +4466,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
|
||||
int ret = 1;
|
||||
int keep_locks = path->keep_locks;
|
||||
|
||||
ASSERT(!path->nowait);
|
||||
path->keep_locks = 1;
|
||||
again:
|
||||
cur = btrfs_read_lock_root_node(root);
|
||||
@@ -4612,6 +4647,8 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
ASSERT(!path->nowait);
|
||||
|
||||
nritems = btrfs_header_nritems(path->nodes[0]);
|
||||
if (nritems == 0)
|
||||
return 1;
|
||||
|
||||
370
fs/btrfs/ctree.h
370
fs/btrfs/ctree.h
File diff suppressed because it is too large
Load Diff
@@ -127,9 +127,11 @@ int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
|
||||
}
|
||||
|
||||
int btrfs_check_data_free_space(struct btrfs_inode *inode,
|
||||
struct extent_changeset **reserved, u64 start, u64 len)
|
||||
struct extent_changeset **reserved, u64 start,
|
||||
u64 len, bool noflush)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_DATA;
|
||||
int ret;
|
||||
|
||||
/* align the range */
|
||||
@@ -137,7 +139,12 @@ int btrfs_check_data_free_space(struct btrfs_inode *inode,
|
||||
round_down(start, fs_info->sectorsize);
|
||||
start = round_down(start, fs_info->sectorsize);
|
||||
|
||||
ret = btrfs_alloc_data_chunk_ondemand(inode, len);
|
||||
if (noflush)
|
||||
flush = BTRFS_RESERVE_NO_FLUSH;
|
||||
else if (btrfs_is_free_space_inode(inode))
|
||||
flush = BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE;
|
||||
|
||||
ret = btrfs_reserve_data_bytes(fs_info, len, flush);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
@@ -454,7 +461,7 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = btrfs_check_data_free_space(inode, reserved, start, len);
|
||||
ret = btrfs_check_data_free_space(inode, reserved, start, len, false);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = btrfs_delalloc_reserve_metadata(inode, len, len, false);
|
||||
|
||||
@@ -7,7 +7,8 @@ struct extent_changeset;
|
||||
|
||||
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
|
||||
int btrfs_check_data_free_space(struct btrfs_inode *inode,
|
||||
struct extent_changeset **reserved, u64 start, u64 len);
|
||||
struct extent_changeset **reserved, u64 start, u64 len,
|
||||
bool noflush);
|
||||
void btrfs_free_reserved_data_space(struct btrfs_inode *inode,
|
||||
struct extent_changeset *reserved, u64 start, u64 len);
|
||||
void btrfs_delalloc_release_space(struct btrfs_inode *inode,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -16,9 +16,10 @@
|
||||
#include <linux/refcount.h>
|
||||
#include "ctree.h"
|
||||
|
||||
/* types of the delayed item */
|
||||
#define BTRFS_DELAYED_INSERTION_ITEM 1
|
||||
#define BTRFS_DELAYED_DELETION_ITEM 2
|
||||
enum btrfs_delayed_item_type {
|
||||
BTRFS_DELAYED_INSERTION_ITEM,
|
||||
BTRFS_DELAYED_DELETION_ITEM
|
||||
};
|
||||
|
||||
struct btrfs_delayed_root {
|
||||
spinlock_t lock;
|
||||
@@ -73,14 +74,27 @@ struct btrfs_delayed_node {
|
||||
|
||||
struct btrfs_delayed_item {
|
||||
struct rb_node rb_node;
|
||||
struct btrfs_key key;
|
||||
/* Offset value of the corresponding dir index key. */
|
||||
u64 index;
|
||||
struct list_head tree_list; /* used for batch insert/delete items */
|
||||
struct list_head readdir_list; /* used for readdir items */
|
||||
/*
|
||||
* Used when logging a directory.
|
||||
* Insertions and deletions to this list are protected by the parent
|
||||
* delayed node's mutex.
|
||||
*/
|
||||
struct list_head log_list;
|
||||
u64 bytes_reserved;
|
||||
struct btrfs_delayed_node *delayed_node;
|
||||
refcount_t refs;
|
||||
int ins_or_del;
|
||||
u32 data_len;
|
||||
enum btrfs_delayed_item_type type:8;
|
||||
/*
|
||||
* Track if this delayed item was already logged.
|
||||
* Protected by the mutex of the parent delayed inode.
|
||||
*/
|
||||
bool logged;
|
||||
/* The maximum leaf size is 64K, so u16 is more than enough. */
|
||||
u16 data_len;
|
||||
char data[];
|
||||
};
|
||||
|
||||
@@ -144,6 +158,14 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
|
||||
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
|
||||
struct list_head *ins_list);
|
||||
|
||||
/* Used during directory logging. */
|
||||
void btrfs_log_get_delayed_items(struct btrfs_inode *inode,
|
||||
struct list_head *ins_list,
|
||||
struct list_head *del_list);
|
||||
void btrfs_log_put_delayed_items(struct btrfs_inode *inode,
|
||||
struct list_head *ins_list,
|
||||
struct list_head *del_list);
|
||||
|
||||
/* for init */
|
||||
int __init btrfs_delayed_inode_init(void);
|
||||
void __cold btrfs_delayed_inode_exit(void);
|
||||
|
||||
@@ -545,10 +545,7 @@ static int mark_block_group_to_copy(struct btrfs_fs_info *fs_info,
|
||||
if (!cache)
|
||||
continue;
|
||||
|
||||
spin_lock(&cache->lock);
|
||||
cache->to_copy = 1;
|
||||
spin_unlock(&cache->lock);
|
||||
|
||||
set_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags);
|
||||
btrfs_put_block_group(cache);
|
||||
}
|
||||
if (iter_ret < 0)
|
||||
@@ -577,7 +574,7 @@ bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
|
||||
return true;
|
||||
|
||||
spin_lock(&cache->lock);
|
||||
if (cache->removed) {
|
||||
if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &cache->runtime_flags)) {
|
||||
spin_unlock(&cache->lock);
|
||||
return true;
|
||||
}
|
||||
@@ -610,9 +607,7 @@ bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
|
||||
}
|
||||
|
||||
/* Last stripe on this device */
|
||||
spin_lock(&cache->lock);
|
||||
cache->to_copy = 0;
|
||||
spin_unlock(&cache->lock);
|
||||
clear_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1288,11 +1283,6 @@ int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
|
||||
return 1;
|
||||
}
|
||||
|
||||
void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
percpu_counter_inc(&fs_info->dev_replace.bio_counter);
|
||||
}
|
||||
|
||||
void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
|
||||
{
|
||||
percpu_counter_sub(&fs_info->dev_replace.bio_counter, amount);
|
||||
|
||||
@@ -7,6 +7,10 @@
|
||||
#define BTRFS_DEV_REPLACE_H
|
||||
|
||||
struct btrfs_ioctl_dev_replace_args;
|
||||
struct btrfs_fs_info;
|
||||
struct btrfs_trans_handle;
|
||||
struct btrfs_dev_replace;
|
||||
struct btrfs_block_group;
|
||||
|
||||
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_run_dev_replace(struct btrfs_trans_handle *trans);
|
||||
|
||||
@@ -131,8 +131,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
|
||||
if (atomic)
|
||||
return -EAGAIN;
|
||||
|
||||
lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
|
||||
&cached_state);
|
||||
lock_extent(io_tree, eb->start, eb->start + eb->len - 1, &cached_state);
|
||||
if (extent_buffer_uptodate(eb) &&
|
||||
btrfs_header_generation(eb) == parent_transid) {
|
||||
ret = 0;
|
||||
@@ -145,8 +144,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
|
||||
ret = 1;
|
||||
clear_extent_buffer_uptodate(eb);
|
||||
out:
|
||||
unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
|
||||
&cached_state);
|
||||
unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
|
||||
&cached_state);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -647,16 +646,14 @@ static void run_one_async_start(struct btrfs_work *work)
|
||||
*/
|
||||
static void run_one_async_done(struct btrfs_work *work)
|
||||
{
|
||||
struct async_submit_bio *async;
|
||||
struct inode *inode;
|
||||
|
||||
async = container_of(work, struct async_submit_bio, work);
|
||||
inode = async->inode;
|
||||
struct async_submit_bio *async =
|
||||
container_of(work, struct async_submit_bio, work);
|
||||
struct inode *inode = async->inode;
|
||||
struct btrfs_bio *bbio = btrfs_bio(async->bio);
|
||||
|
||||
/* If an error occurred we just want to clean up the bio and move on */
|
||||
if (async->status) {
|
||||
async->bio->bi_status = async->status;
|
||||
bio_endio(async->bio);
|
||||
btrfs_bio_end_io(bbio, async->status);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -757,6 +754,7 @@ static bool should_async_write(struct btrfs_fs_info *fs_info,
|
||||
void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_num)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
blk_status_t ret;
|
||||
|
||||
bio->bi_opf |= REQ_META;
|
||||
@@ -776,8 +774,7 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
|
||||
|
||||
ret = btree_csum_one_bio(bio);
|
||||
if (ret) {
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
btrfs_bio_end_io(bbio, ret);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1524,6 +1521,9 @@ static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
|
||||
if (objectid == BTRFS_UUID_TREE_OBJECTID)
|
||||
return btrfs_grab_root(fs_info->uuid_root) ?
|
||||
fs_info->uuid_root : ERR_PTR(-ENOENT);
|
||||
if (objectid == BTRFS_BLOCK_GROUP_TREE_OBJECTID)
|
||||
return btrfs_grab_root(fs_info->block_group_root) ?
|
||||
fs_info->block_group_root : ERR_PTR(-ENOENT);
|
||||
if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) {
|
||||
struct btrfs_root *root = btrfs_global_root(fs_info, &key);
|
||||
|
||||
@@ -1980,14 +1980,7 @@ static void backup_super_roots(struct btrfs_fs_info *info)
|
||||
btrfs_set_backup_chunk_root_level(root_backup,
|
||||
btrfs_header_level(info->chunk_root->node));
|
||||
|
||||
if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) {
|
||||
btrfs_set_backup_block_group_root(root_backup,
|
||||
info->block_group_root->node->start);
|
||||
btrfs_set_backup_block_group_root_gen(root_backup,
|
||||
btrfs_header_generation(info->block_group_root->node));
|
||||
btrfs_set_backup_block_group_root_level(root_backup,
|
||||
btrfs_header_level(info->block_group_root->node));
|
||||
} else {
|
||||
if (!btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE)) {
|
||||
struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
|
||||
struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
|
||||
|
||||
@@ -2225,6 +2218,8 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
|
||||
static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct inode *inode = fs_info->btree_inode;
|
||||
unsigned long hash = btrfs_inode_hash(BTRFS_BTREE_INODE_OBJECTID,
|
||||
fs_info->tree_root);
|
||||
|
||||
inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
|
||||
set_nlink(inode, 1);
|
||||
@@ -2238,8 +2233,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
|
||||
|
||||
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
|
||||
extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
|
||||
IO_TREE_BTREE_INODE_IO, inode);
|
||||
BTRFS_I(inode)->io_tree.track_uptodate = false;
|
||||
IO_TREE_BTREE_INODE_IO, NULL);
|
||||
extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
|
||||
|
||||
BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root);
|
||||
@@ -2247,7 +2241,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
|
||||
BTRFS_I(inode)->location.type = 0;
|
||||
BTRFS_I(inode)->location.offset = 0;
|
||||
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
|
||||
btrfs_insert_inode_hash(inode);
|
||||
__insert_inode_hash(inode, hash);
|
||||
}
|
||||
|
||||
static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
|
||||
@@ -2266,6 +2260,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
|
||||
fs_info->qgroup_seq = 1;
|
||||
fs_info->qgroup_ulist = NULL;
|
||||
fs_info->qgroup_rescan_running = false;
|
||||
fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
|
||||
mutex_init(&fs_info->qgroup_rescan_lock);
|
||||
}
|
||||
|
||||
@@ -2529,10 +2524,24 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
location.objectid = BTRFS_DEV_TREE_OBJECTID;
|
||||
location.type = BTRFS_ROOT_ITEM_KEY;
|
||||
location.offset = 0;
|
||||
|
||||
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) {
|
||||
location.objectid = BTRFS_BLOCK_GROUP_TREE_OBJECTID;
|
||||
root = btrfs_read_tree_root(tree_root, &location);
|
||||
if (IS_ERR(root)) {
|
||||
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
|
||||
ret = PTR_ERR(root);
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
|
||||
fs_info->block_group_root = root;
|
||||
}
|
||||
}
|
||||
|
||||
location.objectid = BTRFS_DEV_TREE_OBJECTID;
|
||||
root = btrfs_read_tree_root(tree_root, &location);
|
||||
if (IS_ERR(root)) {
|
||||
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
|
||||
@@ -2600,8 +2609,8 @@ out:
|
||||
* 1, 2 2nd and 3rd backup copy
|
||||
* -1 skip bytenr check
|
||||
*/
|
||||
static int validate_super(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_super_block *sb, int mirror_num)
|
||||
int btrfs_validate_super(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_super_block *sb, int mirror_num)
|
||||
{
|
||||
u64 nodesize = btrfs_super_nodesize(sb);
|
||||
u64 sectorsize = btrfs_super_sectorsize(sb);
|
||||
@@ -2703,6 +2712,18 @@ static int validate_super(struct btrfs_fs_info *fs_info,
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Artificial requirement for block-group-tree to force newer features
|
||||
* (free-space-tree, no-holes) so the test matrix is smaller.
|
||||
*/
|
||||
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) &&
|
||||
(!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID) ||
|
||||
!btrfs_fs_incompat(fs_info, NO_HOLES))) {
|
||||
btrfs_err(fs_info,
|
||||
"block-group-tree feature requires fres-space-tree and no-holes");
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid,
|
||||
BTRFS_FSID_SIZE) != 0) {
|
||||
btrfs_err(fs_info,
|
||||
@@ -2785,7 +2806,7 @@ static int validate_super(struct btrfs_fs_info *fs_info,
|
||||
*/
|
||||
static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return validate_super(fs_info, fs_info->super_copy, 0);
|
||||
return btrfs_validate_super(fs_info, fs_info->super_copy, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2799,7 +2820,7 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = validate_super(fs_info, sb, -1);
|
||||
ret = btrfs_validate_super(fs_info, sb, -1);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) {
|
||||
@@ -2860,17 +2881,7 @@ static int load_important_roots(struct btrfs_fs_info *fs_info)
|
||||
btrfs_warn(fs_info, "couldn't read tree root");
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
|
||||
return 0;
|
||||
|
||||
bytenr = btrfs_super_block_group_root(sb);
|
||||
gen = btrfs_super_block_group_root_generation(sb);
|
||||
level = btrfs_super_block_group_root_level(sb);
|
||||
ret = load_super_root(fs_info->block_group_root, bytenr, gen, level);
|
||||
if (ret)
|
||||
btrfs_warn(fs_info, "couldn't read block group root");
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
|
||||
@@ -2882,16 +2893,6 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
||||
struct btrfs_root *root;
|
||||
|
||||
root = btrfs_alloc_root(fs_info, BTRFS_BLOCK_GROUP_TREE_OBJECTID,
|
||||
GFP_KERNEL);
|
||||
if (!root)
|
||||
return -ENOMEM;
|
||||
fs_info->block_group_root = root;
|
||||
}
|
||||
|
||||
for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
|
||||
if (handle_error) {
|
||||
if (!IS_ERR(tree_root->node))
|
||||
@@ -2990,6 +2991,19 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
||||
mutex_init(&fs_info->zoned_data_reloc_io_lock);
|
||||
seqlock_init(&fs_info->profiles_lock);
|
||||
|
||||
btrfs_lockdep_init_map(fs_info, btrfs_trans_num_writers);
|
||||
btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters);
|
||||
btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered);
|
||||
btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent);
|
||||
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start,
|
||||
BTRFS_LOCKDEP_TRANS_COMMIT_START);
|
||||
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked,
|
||||
BTRFS_LOCKDEP_TRANS_UNBLOCKED);
|
||||
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed,
|
||||
BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
|
||||
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_completed,
|
||||
BTRFS_LOCKDEP_TRANS_COMPLETED);
|
||||
|
||||
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
|
||||
INIT_LIST_HEAD(&fs_info->space_info);
|
||||
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
|
||||
@@ -3279,6 +3293,112 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do various sanity and dependency checks of different features.
|
||||
*
|
||||
* This is the place for less strict checks (like for subpage or artificial
|
||||
* feature dependencies).
|
||||
*
|
||||
* For strict checks or possible corruption detection, see
|
||||
* btrfs_validate_super().
|
||||
*
|
||||
* This should be called after btrfs_parse_options(), as some mount options
|
||||
* (space cache related) can modify on-disk format like free space tree and
|
||||
* screw up certain feature dependencies.
|
||||
*/
|
||||
int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb)
|
||||
{
|
||||
struct btrfs_super_block *disk_super = fs_info->super_copy;
|
||||
u64 incompat = btrfs_super_incompat_flags(disk_super);
|
||||
const u64 compat_ro = btrfs_super_compat_ro_flags(disk_super);
|
||||
const u64 compat_ro_unsupp = (compat_ro & ~BTRFS_FEATURE_COMPAT_RO_SUPP);
|
||||
|
||||
if (incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
|
||||
btrfs_err(fs_info,
|
||||
"cannot mount because of unknown incompat features (0x%llx)",
|
||||
incompat);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Runtime limitation for mixed block groups. */
|
||||
if ((incompat & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
|
||||
(fs_info->sectorsize != fs_info->nodesize)) {
|
||||
btrfs_err(fs_info,
|
||||
"unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
|
||||
fs_info->nodesize, fs_info->sectorsize);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Mixed backref is an always-enabled feature. */
|
||||
incompat |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
|
||||
|
||||
/* Set compression related flags just in case. */
|
||||
if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
|
||||
incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
|
||||
else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD)
|
||||
incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD;
|
||||
|
||||
/*
|
||||
* An ancient flag, which should really be marked deprecated.
|
||||
* Such runtime limitation doesn't really need a incompat flag.
|
||||
*/
|
||||
if (btrfs_super_nodesize(disk_super) > PAGE_SIZE)
|
||||
incompat |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
|
||||
|
||||
if (compat_ro_unsupp && !sb_rdonly(sb)) {
|
||||
btrfs_err(fs_info,
|
||||
"cannot mount read-write because of unknown compat_ro features (0x%llx)",
|
||||
compat_ro);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have unsupported RO compat features, although RO mounted, we
|
||||
* should not cause any metadata writes, including log replay.
|
||||
* Or we could screw up whatever the new feature requires.
|
||||
*/
|
||||
if (compat_ro_unsupp && btrfs_super_log_root(disk_super) &&
|
||||
!btrfs_test_opt(fs_info, NOLOGREPLAY)) {
|
||||
btrfs_err(fs_info,
|
||||
"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
|
||||
compat_ro);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Artificial limitations for block group tree, to force
|
||||
* block-group-tree to rely on no-holes and free-space-tree.
|
||||
*/
|
||||
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) &&
|
||||
(!btrfs_fs_incompat(fs_info, NO_HOLES) ||
|
||||
!btrfs_test_opt(fs_info, FREE_SPACE_TREE))) {
|
||||
btrfs_err(fs_info,
|
||||
"block-group-tree feature requires no-holes and free-space-tree features");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Subpage runtime limitation on v1 cache.
|
||||
*
|
||||
* V1 space cache still has some hard codeed PAGE_SIZE usage, while
|
||||
* we're already defaulting to v2 cache, no need to bother v1 as it's
|
||||
* going to be deprecated anyway.
|
||||
*/
|
||||
if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
|
||||
btrfs_warn(fs_info,
|
||||
"v1 space cache is not supported for page size %lu with sectorsize %u",
|
||||
PAGE_SIZE, fs_info->sectorsize);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* This can be called by remount, we need to protect the super block. */
|
||||
spin_lock(&fs_info->super_lock);
|
||||
btrfs_set_super_incompat_flags(disk_super, incompat);
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices,
|
||||
char *options)
|
||||
{
|
||||
@@ -3428,72 +3548,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
features = btrfs_super_incompat_flags(disk_super) &
|
||||
~BTRFS_FEATURE_INCOMPAT_SUPP;
|
||||
if (features) {
|
||||
btrfs_err(fs_info,
|
||||
"cannot mount because of unsupported optional features (0x%llx)",
|
||||
features);
|
||||
err = -EINVAL;
|
||||
ret = btrfs_check_features(fs_info, sb);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
features = btrfs_super_incompat_flags(disk_super);
|
||||
features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
|
||||
if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
|
||||
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
|
||||
else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD)
|
||||
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD;
|
||||
|
||||
/*
|
||||
* Flag our filesystem as having big metadata blocks if they are bigger
|
||||
* than the page size.
|
||||
*/
|
||||
if (btrfs_super_nodesize(disk_super) > PAGE_SIZE)
|
||||
features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
|
||||
|
||||
/*
|
||||
* mixed block groups end up with duplicate but slightly offset
|
||||
* extent buffers for the same range. It leads to corruptions
|
||||
*/
|
||||
if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
|
||||
(sectorsize != nodesize)) {
|
||||
btrfs_err(fs_info,
|
||||
"unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
|
||||
nodesize, sectorsize);
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Needn't use the lock because there is no other task which will
|
||||
* update the flag.
|
||||
*/
|
||||
btrfs_set_super_incompat_flags(disk_super, features);
|
||||
|
||||
features = btrfs_super_compat_ro_flags(disk_super) &
|
||||
~BTRFS_FEATURE_COMPAT_RO_SUPP;
|
||||
if (!sb_rdonly(sb) && features) {
|
||||
btrfs_err(fs_info,
|
||||
"cannot mount read-write because of unsupported optional features (0x%llx)",
|
||||
features);
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
}
|
||||
/*
|
||||
* We have unsupported RO compat features, although RO mounted, we
|
||||
* should not cause any metadata write, including log replay.
|
||||
* Or we could screw up whatever the new feature requires.
|
||||
*/
|
||||
if (unlikely(features && btrfs_super_log_root(disk_super) &&
|
||||
!btrfs_test_opt(fs_info, NOLOGREPLAY))) {
|
||||
btrfs_err(fs_info,
|
||||
"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
|
||||
features);
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
|
||||
if (sectorsize < PAGE_SIZE) {
|
||||
struct btrfs_subpage_info *subpage_info;
|
||||
|
||||
@@ -3833,7 +3893,7 @@ static void btrfs_end_super_write(struct bio *bio)
|
||||
}
|
||||
|
||||
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
|
||||
int copy_num)
|
||||
int copy_num, bool drop_cache)
|
||||
{
|
||||
struct btrfs_super_block *super;
|
||||
struct page *page;
|
||||
@@ -3851,6 +3911,19 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
|
||||
if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (drop_cache) {
|
||||
/* This should only be called with the primary sb. */
|
||||
ASSERT(copy_num == 0);
|
||||
|
||||
/*
|
||||
* Drop the page of the primary superblock, so later read will
|
||||
* always read from the device.
|
||||
*/
|
||||
invalidate_inode_pages2_range(mapping,
|
||||
bytenr >> PAGE_SHIFT,
|
||||
(bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
|
||||
if (IS_ERR(page))
|
||||
return ERR_CAST(page);
|
||||
@@ -3882,7 +3955,7 @@ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
|
||||
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
|
||||
*/
|
||||
for (i = 0; i < 1; i++) {
|
||||
super = btrfs_read_dev_one_super(bdev, i);
|
||||
super = btrfs_read_dev_one_super(bdev, i, false);
|
||||
if (IS_ERR(super))
|
||||
continue;
|
||||
|
||||
|
||||
@@ -46,10 +46,13 @@ int __cold open_ctree(struct super_block *sb,
|
||||
struct btrfs_fs_devices *fs_devices,
|
||||
char *options);
|
||||
void __cold close_ctree(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_validate_super(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_super_block *sb, int mirror_num);
|
||||
int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb);
|
||||
int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
|
||||
struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
|
||||
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
|
||||
int copy_num);
|
||||
int copy_num, bool drop_cache);
|
||||
int btrfs_commit_super(struct btrfs_fs_info *fs_info);
|
||||
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
|
||||
struct btrfs_key *key);
|
||||
@@ -103,7 +106,7 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
|
||||
|
||||
static inline struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
|
||||
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
|
||||
return fs_info->block_group_root;
|
||||
return btrfs_extent_root(fs_info, 0);
|
||||
}
|
||||
|
||||
1673
fs/btrfs/extent-io-tree.c
Normal file
1673
fs/btrfs/extent-io-tree.c
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user