mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge tag 'for-6.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"There are no exciting changes for users, it's been mostly API
conversions and some fixes or refactoring.
The mount API conversion is a base for future improvements that would
come with VFS. Metadata processing has been converted to folios, not
yet enabling the large folios but it's one patch away once everything
gets tested enough.
Core changes:
- convert extent buffers to folios:
- direct API conversion where possible
- performance can drop by a few percent on metadata heavy
workloads, the folio sizes are not constant and the calculations
add up in the item helpers
- both regular and subpage modes
- data cannot be converted yet, we need to port that to iomap and
there are some other generic changes required
- convert mount to the new API, should not be user visible:
- options deprecated long time ago have been removed: inode_cache,
recovery
- the new logic that splits mount to two phases slightly changes
timing of device scanning for multi-device filesystems
- LSM options will now work (like for selinux)
- convert delayed nodes radix tree to xarray, preserving the
preload-like logic that still allows to allocate with GFP_NOFS
- more validation of sysfs value of scrub_speed_max
- refactor chunk map structure, reduce size and improve performance
- extent map refactoring, smaller data structures, improved
performance
- reduce size of struct extent_io_tree, embedded in several
structures
- temporary pages used for compression are cached and attached to a
shrinker, this may slightly improve performance
- in zoned mode, remove redirty extent buffer tracking, zeros are
written in case an out-of-order is detected and proper data are
written to the actual write pointer
- cleanups, refactoring, error message improvements, updated tests
- verify and update branch name or tag
- remove unwanted text"
* tag 'for-6.8-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (89 commits)
btrfs: pass btrfs_io_geometry into btrfs_max_io_len
btrfs: pass struct btrfs_io_geometry to set_io_stripe
btrfs: open code set_io_stripe for RAID56
btrfs: change block mapping to switch/case in btrfs_map_block
btrfs: factor out block mapping for single profiles
btrfs: factor out block mapping for RAID5/6
btrfs: reduce scope of data_stripes in btrfs_map_block
btrfs: factor out block mapping for RAID10
btrfs: factor out block mapping for DUP profiles
btrfs: factor out RAID1 block mapping
btrfs: factor out block-mapping for RAID0
btrfs: re-introduce struct btrfs_io_geometry
btrfs: factor out helper for single device IO check
btrfs: migrate btrfs_repair_io_failure() to folio interfaces
btrfs: migrate eb_bitmap_offset() to folio interfaces
btrfs: migrate various end io functions to folios
btrfs: migrate subpage code to folio interfaces
btrfs: migrate get_eb_page_index() and get_eb_offset_in_page() to folios
btrfs: don't double put our subpage reference in alloc_extent_buffer
btrfs: cleanup metadata page pointer usage
...
This commit is contained in:
@@ -27,7 +27,7 @@ static bool check_setget_bounds(const struct extent_buffer *eb,
|
||||
void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *eb)
|
||||
{
|
||||
token->eb = eb;
|
||||
token->kaddr = page_address(eb->pages[0]);
|
||||
token->kaddr = folio_address(eb->folios[0]);
|
||||
token->offset = 0;
|
||||
}
|
||||
|
||||
@@ -50,7 +50,7 @@ void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *e
|
||||
* an offset into the extent buffer page array, cast to a specific type. This
|
||||
* gives us all the type checking.
|
||||
*
|
||||
* The extent buffer pages stored in the array pages do not form a contiguous
|
||||
* The extent buffer pages stored in the array folios may not form a contiguous
|
||||
* phyusical range, but the API functions assume the linear offset to the range
|
||||
* from 0 to metadata node size.
|
||||
*/
|
||||
@@ -60,28 +60,30 @@ u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \
|
||||
const void *ptr, unsigned long off) \
|
||||
{ \
|
||||
const unsigned long member_offset = (unsigned long)ptr + off; \
|
||||
const unsigned long idx = get_eb_page_index(member_offset); \
|
||||
const unsigned long oip = get_eb_offset_in_page(token->eb, \
|
||||
member_offset); \
|
||||
const unsigned long idx = get_eb_folio_index(token->eb, member_offset); \
|
||||
const unsigned long oil = get_eb_offset_in_folio(token->eb, \
|
||||
member_offset);\
|
||||
const int unit_size = folio_size(token->eb->folios[0]); \
|
||||
const int unit_shift = folio_shift(token->eb->folios[0]); \
|
||||
const int size = sizeof(u##bits); \
|
||||
u8 lebytes[sizeof(u##bits)]; \
|
||||
const int part = PAGE_SIZE - oip; \
|
||||
const int part = unit_size - oil; \
|
||||
\
|
||||
ASSERT(token); \
|
||||
ASSERT(token->kaddr); \
|
||||
ASSERT(check_setget_bounds(token->eb, ptr, off, size)); \
|
||||
if (token->offset <= member_offset && \
|
||||
member_offset + size <= token->offset + PAGE_SIZE) { \
|
||||
return get_unaligned_le##bits(token->kaddr + oip); \
|
||||
member_offset + size <= token->offset + unit_size) { \
|
||||
return get_unaligned_le##bits(token->kaddr + oil); \
|
||||
} \
|
||||
token->kaddr = page_address(token->eb->pages[idx]); \
|
||||
token->offset = idx << PAGE_SHIFT; \
|
||||
if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE ) \
|
||||
return get_unaligned_le##bits(token->kaddr + oip); \
|
||||
token->kaddr = folio_address(token->eb->folios[idx]); \
|
||||
token->offset = idx << unit_shift; \
|
||||
if (INLINE_EXTENT_BUFFER_PAGES == 1 || oil + size <= unit_size) \
|
||||
return get_unaligned_le##bits(token->kaddr + oil); \
|
||||
\
|
||||
memcpy(lebytes, token->kaddr + oip, part); \
|
||||
token->kaddr = page_address(token->eb->pages[idx + 1]); \
|
||||
token->offset = (idx + 1) << PAGE_SHIFT; \
|
||||
memcpy(lebytes, token->kaddr + oil, part); \
|
||||
token->kaddr = folio_address(token->eb->folios[idx + 1]); \
|
||||
token->offset = (idx + 1) << unit_shift; \
|
||||
memcpy(lebytes + part, token->kaddr, size - part); \
|
||||
return get_unaligned_le##bits(lebytes); \
|
||||
} \
|
||||
@@ -89,19 +91,21 @@ u##bits btrfs_get_##bits(const struct extent_buffer *eb, \
|
||||
const void *ptr, unsigned long off) \
|
||||
{ \
|
||||
const unsigned long member_offset = (unsigned long)ptr + off; \
|
||||
const unsigned long oip = get_eb_offset_in_page(eb, member_offset); \
|
||||
const unsigned long idx = get_eb_page_index(member_offset); \
|
||||
char *kaddr = page_address(eb->pages[idx]); \
|
||||
const unsigned long idx = get_eb_folio_index(eb, member_offset);\
|
||||
const unsigned long oil = get_eb_offset_in_folio(eb, \
|
||||
member_offset);\
|
||||
const int unit_size = folio_size(eb->folios[0]); \
|
||||
char *kaddr = folio_address(eb->folios[idx]); \
|
||||
const int size = sizeof(u##bits); \
|
||||
const int part = PAGE_SIZE - oip; \
|
||||
const int part = unit_size - oil; \
|
||||
u8 lebytes[sizeof(u##bits)]; \
|
||||
\
|
||||
ASSERT(check_setget_bounds(eb, ptr, off, size)); \
|
||||
if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE) \
|
||||
return get_unaligned_le##bits(kaddr + oip); \
|
||||
if (INLINE_EXTENT_BUFFER_PAGES == 1 || oil + size <= unit_size) \
|
||||
return get_unaligned_le##bits(kaddr + oil); \
|
||||
\
|
||||
memcpy(lebytes, kaddr + oip, part); \
|
||||
kaddr = page_address(eb->pages[idx + 1]); \
|
||||
memcpy(lebytes, kaddr + oil, part); \
|
||||
kaddr = folio_address(eb->folios[idx + 1]); \
|
||||
memcpy(lebytes + part, kaddr, size - part); \
|
||||
return get_unaligned_le##bits(lebytes); \
|
||||
} \
|
||||
@@ -110,53 +114,59 @@ void btrfs_set_token_##bits(struct btrfs_map_token *token, \
|
||||
u##bits val) \
|
||||
{ \
|
||||
const unsigned long member_offset = (unsigned long)ptr + off; \
|
||||
const unsigned long idx = get_eb_page_index(member_offset); \
|
||||
const unsigned long oip = get_eb_offset_in_page(token->eb, \
|
||||
member_offset); \
|
||||
const unsigned long idx = get_eb_folio_index(token->eb, member_offset); \
|
||||
const unsigned long oil = get_eb_offset_in_folio(token->eb, \
|
||||
member_offset);\
|
||||
const int unit_size = folio_size(token->eb->folios[0]); \
|
||||
const int unit_shift = folio_shift(token->eb->folios[0]); \
|
||||
const int size = sizeof(u##bits); \
|
||||
u8 lebytes[sizeof(u##bits)]; \
|
||||
const int part = PAGE_SIZE - oip; \
|
||||
const int part = unit_size - oil; \
|
||||
\
|
||||
ASSERT(token); \
|
||||
ASSERT(token->kaddr); \
|
||||
ASSERT(check_setget_bounds(token->eb, ptr, off, size)); \
|
||||
if (token->offset <= member_offset && \
|
||||
member_offset + size <= token->offset + PAGE_SIZE) { \
|
||||
put_unaligned_le##bits(val, token->kaddr + oip); \
|
||||
member_offset + size <= token->offset + unit_size) { \
|
||||
put_unaligned_le##bits(val, token->kaddr + oil); \
|
||||
return; \
|
||||
} \
|
||||
token->kaddr = page_address(token->eb->pages[idx]); \
|
||||
token->offset = idx << PAGE_SHIFT; \
|
||||
if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE) { \
|
||||
put_unaligned_le##bits(val, token->kaddr + oip); \
|
||||
token->kaddr = folio_address(token->eb->folios[idx]); \
|
||||
token->offset = idx << unit_shift; \
|
||||
if (INLINE_EXTENT_BUFFER_PAGES == 1 || \
|
||||
oil + size <= unit_size) { \
|
||||
put_unaligned_le##bits(val, token->kaddr + oil); \
|
||||
return; \
|
||||
} \
|
||||
put_unaligned_le##bits(val, lebytes); \
|
||||
memcpy(token->kaddr + oip, lebytes, part); \
|
||||
token->kaddr = page_address(token->eb->pages[idx + 1]); \
|
||||
token->offset = (idx + 1) << PAGE_SHIFT; \
|
||||
memcpy(token->kaddr + oil, lebytes, part); \
|
||||
token->kaddr = folio_address(token->eb->folios[idx + 1]); \
|
||||
token->offset = (idx + 1) << unit_shift; \
|
||||
memcpy(token->kaddr, lebytes + part, size - part); \
|
||||
} \
|
||||
void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \
|
||||
unsigned long off, u##bits val) \
|
||||
{ \
|
||||
const unsigned long member_offset = (unsigned long)ptr + off; \
|
||||
const unsigned long oip = get_eb_offset_in_page(eb, member_offset); \
|
||||
const unsigned long idx = get_eb_page_index(member_offset); \
|
||||
char *kaddr = page_address(eb->pages[idx]); \
|
||||
const unsigned long idx = get_eb_folio_index(eb, member_offset);\
|
||||
const unsigned long oil = get_eb_offset_in_folio(eb, \
|
||||
member_offset);\
|
||||
const int unit_size = folio_size(eb->folios[0]); \
|
||||
char *kaddr = folio_address(eb->folios[idx]); \
|
||||
const int size = sizeof(u##bits); \
|
||||
const int part = PAGE_SIZE - oip; \
|
||||
const int part = unit_size - oil; \
|
||||
u8 lebytes[sizeof(u##bits)]; \
|
||||
\
|
||||
ASSERT(check_setget_bounds(eb, ptr, off, size)); \
|
||||
if (INLINE_EXTENT_BUFFER_PAGES == 1 || oip + size <= PAGE_SIZE) { \
|
||||
put_unaligned_le##bits(val, kaddr + oip); \
|
||||
if (INLINE_EXTENT_BUFFER_PAGES == 1 || \
|
||||
oil + size <= unit_size) { \
|
||||
put_unaligned_le##bits(val, kaddr + oil); \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
put_unaligned_le##bits(val, lebytes); \
|
||||
memcpy(kaddr + oip, lebytes, part); \
|
||||
kaddr = page_address(eb->pages[idx + 1]); \
|
||||
memcpy(kaddr + oil, lebytes, part); \
|
||||
kaddr = folio_address(eb->folios[idx + 1]); \
|
||||
memcpy(kaddr, lebytes + part, size - part); \
|
||||
}
|
||||
|
||||
|
||||
@@ -90,14 +90,14 @@ static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
|
||||
#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
|
||||
static inline u##bits btrfs_##name(const struct extent_buffer *eb) \
|
||||
{ \
|
||||
const type *p = page_address(eb->pages[0]) + \
|
||||
const type *p = folio_address(eb->folios[0]) + \
|
||||
offset_in_page(eb->start); \
|
||||
return get_unaligned_le##bits(&p->member); \
|
||||
} \
|
||||
static inline void btrfs_set_##name(const struct extent_buffer *eb, \
|
||||
u##bits val) \
|
||||
{ \
|
||||
type *p = page_address(eb->pages[0]) + offset_in_page(eb->start); \
|
||||
type *p = folio_address(eb->folios[0]) + offset_in_page(eb->start); \
|
||||
put_unaligned_le##bits(val, &p->member); \
|
||||
}
|
||||
|
||||
|
||||
@@ -194,6 +194,12 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
|
||||
struct bio_vec *bv = bio_first_bvec_all(&repair_bbio->bio);
|
||||
int mirror = repair_bbio->mirror_num;
|
||||
|
||||
/*
|
||||
* We can only trigger this for data bio, which doesn't support larger
|
||||
* folios yet.
|
||||
*/
|
||||
ASSERT(folio_order(page_folio(bv->bv_page)) == 0);
|
||||
|
||||
if (repair_bbio->bio.bi_status ||
|
||||
!btrfs_data_csum_ok(repair_bbio, dev, 0, bv)) {
|
||||
bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ);
|
||||
@@ -215,7 +221,7 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
|
||||
btrfs_repair_io_failure(fs_info, btrfs_ino(inode),
|
||||
repair_bbio->file_offset, fs_info->sectorsize,
|
||||
repair_bbio->saved_iter.bi_sector << SECTOR_SHIFT,
|
||||
bv->bv_page, bv->bv_offset, mirror);
|
||||
page_folio(bv->bv_page), bv->bv_offset, mirror);
|
||||
} while (mirror != fbio->bbio->mirror_num);
|
||||
|
||||
done:
|
||||
@@ -626,7 +632,7 @@ static bool should_async_write(struct btrfs_bio *bbio)
|
||||
/*
|
||||
* Submit bio to an async queue.
|
||||
*
|
||||
* Return true if the work has been succesfuly submitted, else false.
|
||||
* Return true if the work has been successfully submitted, else false.
|
||||
*/
|
||||
static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
|
||||
struct btrfs_io_context *bioc,
|
||||
@@ -767,8 +773,8 @@ void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num)
|
||||
* freeing the bio.
|
||||
*/
|
||||
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
||||
u64 length, u64 logical, struct page *page,
|
||||
unsigned int pg_offset, int mirror_num)
|
||||
u64 length, u64 logical, struct folio *folio,
|
||||
unsigned int folio_offset, int mirror_num)
|
||||
{
|
||||
struct btrfs_io_stripe smap = { 0 };
|
||||
struct bio_vec bvec;
|
||||
@@ -799,7 +805,8 @@ int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
||||
|
||||
bio_init(&bio, smap.dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC);
|
||||
bio.bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
|
||||
__bio_add_page(&bio, page, length, pg_offset);
|
||||
ret = bio_add_folio(&bio, folio, length, folio_offset);
|
||||
ASSERT(ret);
|
||||
ret = submit_bio_wait(&bio);
|
||||
if (ret) {
|
||||
/* try to remap that extent elsewhere? */
|
||||
|
||||
@@ -105,7 +105,7 @@ void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);
|
||||
void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num);
|
||||
void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace);
|
||||
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
||||
u64 length, u64 logical, struct page *page,
|
||||
unsigned int pg_offset, int mirror_num);
|
||||
u64 length, u64 logical, struct folio *folio,
|
||||
unsigned int folio_offset, int mirror_num);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -168,7 +168,7 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
|
||||
cache);
|
||||
|
||||
kfree(cache->free_space_ctl);
|
||||
kfree(cache->physical_map);
|
||||
btrfs_free_chunk_map(cache->physical_map);
|
||||
kfree(cache);
|
||||
}
|
||||
}
|
||||
@@ -1047,7 +1047,7 @@ static int remove_block_group_item(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
u64 group_start, struct extent_map *em)
|
||||
struct btrfs_chunk_map *map)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_path *path;
|
||||
@@ -1059,10 +1059,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
int index;
|
||||
int factor;
|
||||
struct btrfs_caching_control *caching_ctl = NULL;
|
||||
bool remove_em;
|
||||
bool remove_map;
|
||||
bool remove_rsv = false;
|
||||
|
||||
block_group = btrfs_lookup_block_group(fs_info, group_start);
|
||||
block_group = btrfs_lookup_block_group(fs_info, map->start);
|
||||
BUG_ON(!block_group);
|
||||
BUG_ON(!block_group->ro);
|
||||
|
||||
@@ -1252,7 +1252,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
* entries because we already removed them all when we called
|
||||
* btrfs_remove_free_space_cache().
|
||||
*
|
||||
* And we must not remove the extent map from the fs_info->mapping_tree
|
||||
* And we must not remove the chunk map from the fs_info->mapping_tree
|
||||
* to prevent the same logical address range and physical device space
|
||||
* ranges from being reused for a new block group. This is needed to
|
||||
* avoid races with trimming and scrub.
|
||||
@@ -1268,19 +1268,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
* in place until the extents have been discarded completely when
|
||||
* the transaction commit has completed.
|
||||
*/
|
||||
remove_em = (atomic_read(&block_group->frozen) == 0);
|
||||
remove_map = (atomic_read(&block_group->frozen) == 0);
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
if (remove_em) {
|
||||
struct extent_map_tree *em_tree;
|
||||
|
||||
em_tree = &fs_info->mapping_tree;
|
||||
write_lock(&em_tree->lock);
|
||||
remove_extent_mapping(em_tree, em);
|
||||
write_unlock(&em_tree->lock);
|
||||
/* once for the tree */
|
||||
free_extent_map(em);
|
||||
}
|
||||
if (remove_map)
|
||||
btrfs_remove_chunk_map(fs_info, map);
|
||||
|
||||
out:
|
||||
/* Once for the lookup reference */
|
||||
@@ -1295,15 +1287,12 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
|
||||
struct btrfs_fs_info *fs_info, const u64 chunk_offset)
|
||||
{
|
||||
struct btrfs_root *root = btrfs_block_group_root(fs_info);
|
||||
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
|
||||
struct extent_map *em;
|
||||
struct map_lookup *map;
|
||||
struct btrfs_chunk_map *map;
|
||||
unsigned int num_items;
|
||||
|
||||
read_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree, chunk_offset, 1);
|
||||
read_unlock(&em_tree->lock);
|
||||
ASSERT(em && em->start == chunk_offset);
|
||||
map = btrfs_find_chunk_map(fs_info, chunk_offset, 1);
|
||||
ASSERT(map != NULL);
|
||||
ASSERT(map->start == chunk_offset);
|
||||
|
||||
/*
|
||||
* We need to reserve 3 + N units from the metadata space info in order
|
||||
@@ -1324,9 +1313,8 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
|
||||
* more device items and remove one chunk item), but this is done at
|
||||
* btrfs_remove_chunk() through a call to check_system_chunk().
|
||||
*/
|
||||
map = em->map_lookup;
|
||||
num_items = 3 + map->num_stripes;
|
||||
free_extent_map(em);
|
||||
btrfs_free_chunk_map(map);
|
||||
|
||||
return btrfs_start_transaction_fallback_global_rsv(root, num_items);
|
||||
}
|
||||
@@ -1927,8 +1915,7 @@ void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg)
|
||||
static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
|
||||
struct btrfs_path *path)
|
||||
{
|
||||
struct extent_map_tree *em_tree;
|
||||
struct extent_map *em;
|
||||
struct btrfs_chunk_map *map;
|
||||
struct btrfs_block_group_item bg;
|
||||
struct extent_buffer *leaf;
|
||||
int slot;
|
||||
@@ -1938,23 +1925,20 @@ static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
|
||||
slot = path->slots[0];
|
||||
leaf = path->nodes[0];
|
||||
|
||||
em_tree = &fs_info->mapping_tree;
|
||||
read_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree, key->objectid, key->offset);
|
||||
read_unlock(&em_tree->lock);
|
||||
if (!em) {
|
||||
map = btrfs_find_chunk_map(fs_info, key->objectid, key->offset);
|
||||
if (!map) {
|
||||
btrfs_err(fs_info,
|
||||
"logical %llu len %llu found bg but no related chunk",
|
||||
key->objectid, key->offset);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
if (em->start != key->objectid || em->len != key->offset) {
|
||||
if (map->start != key->objectid || map->chunk_len != key->offset) {
|
||||
btrfs_err(fs_info,
|
||||
"block group %llu len %llu mismatch with chunk %llu len %llu",
|
||||
key->objectid, key->offset, em->start, em->len);
|
||||
key->objectid, key->offset, map->start, map->chunk_len);
|
||||
ret = -EUCLEAN;
|
||||
goto out_free_em;
|
||||
goto out_free_map;
|
||||
}
|
||||
|
||||
read_extent_buffer(leaf, &bg, btrfs_item_ptr_offset(leaf, slot),
|
||||
@@ -1962,16 +1946,16 @@ static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
|
||||
flags = btrfs_stack_block_group_flags(&bg) &
|
||||
BTRFS_BLOCK_GROUP_TYPE_MASK;
|
||||
|
||||
if (flags != (em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
|
||||
if (flags != (map->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
|
||||
btrfs_err(fs_info,
|
||||
"block group %llu len %llu type flags 0x%llx mismatch with chunk type flags 0x%llx",
|
||||
key->objectid, key->offset, flags,
|
||||
(BTRFS_BLOCK_GROUP_TYPE_MASK & em->map_lookup->type));
|
||||
(BTRFS_BLOCK_GROUP_TYPE_MASK & map->type));
|
||||
ret = -EUCLEAN;
|
||||
}
|
||||
|
||||
out_free_em:
|
||||
free_extent_map(em);
|
||||
out_free_map:
|
||||
btrfs_free_chunk_map(map);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2024,8 +2008,7 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
|
||||
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
|
||||
u64 physical, u64 **logical, int *naddrs, int *stripe_len)
|
||||
{
|
||||
struct extent_map *em;
|
||||
struct map_lookup *map;
|
||||
struct btrfs_chunk_map *map;
|
||||
u64 *buf;
|
||||
u64 bytenr;
|
||||
u64 data_stripe_length;
|
||||
@@ -2033,14 +2016,13 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
|
||||
int i, nr = 0;
|
||||
int ret = 0;
|
||||
|
||||
em = btrfs_get_chunk_map(fs_info, chunk_start, 1);
|
||||
if (IS_ERR(em))
|
||||
map = btrfs_get_chunk_map(fs_info, chunk_start, 1);
|
||||
if (IS_ERR(map))
|
||||
return -EIO;
|
||||
|
||||
map = em->map_lookup;
|
||||
data_stripe_length = em->orig_block_len;
|
||||
data_stripe_length = map->stripe_size;
|
||||
io_stripe_size = BTRFS_STRIPE_LEN;
|
||||
chunk_start = em->start;
|
||||
chunk_start = map->start;
|
||||
|
||||
/* For RAID5/6 adjust to a full IO stripe length */
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
|
||||
@@ -2094,7 +2076,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
|
||||
*naddrs = nr;
|
||||
*stripe_len = io_stripe_size;
|
||||
out:
|
||||
free_extent_map(em);
|
||||
btrfs_free_chunk_map(map);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2199,49 +2181,47 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
|
||||
*/
|
||||
static int check_chunk_block_group_mappings(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct extent_map_tree *map_tree = &fs_info->mapping_tree;
|
||||
struct extent_map *em;
|
||||
struct btrfs_block_group *bg;
|
||||
u64 start = 0;
|
||||
int ret = 0;
|
||||
|
||||
while (1) {
|
||||
read_lock(&map_tree->lock);
|
||||
struct btrfs_chunk_map *map;
|
||||
struct btrfs_block_group *bg;
|
||||
|
||||
/*
|
||||
* lookup_extent_mapping will return the first extent map
|
||||
* intersecting the range, so setting @len to 1 is enough to
|
||||
* btrfs_find_chunk_map() will return the first chunk map
|
||||
* intersecting the range, so setting @length to 1 is enough to
|
||||
* get the first chunk.
|
||||
*/
|
||||
em = lookup_extent_mapping(map_tree, start, 1);
|
||||
read_unlock(&map_tree->lock);
|
||||
if (!em)
|
||||
map = btrfs_find_chunk_map(fs_info, start, 1);
|
||||
if (!map)
|
||||
break;
|
||||
|
||||
bg = btrfs_lookup_block_group(fs_info, em->start);
|
||||
bg = btrfs_lookup_block_group(fs_info, map->start);
|
||||
if (!bg) {
|
||||
btrfs_err(fs_info,
|
||||
"chunk start=%llu len=%llu doesn't have corresponding block group",
|
||||
em->start, em->len);
|
||||
map->start, map->chunk_len);
|
||||
ret = -EUCLEAN;
|
||||
free_extent_map(em);
|
||||
btrfs_free_chunk_map(map);
|
||||
break;
|
||||
}
|
||||
if (bg->start != em->start || bg->length != em->len ||
|
||||
if (bg->start != map->start || bg->length != map->chunk_len ||
|
||||
(bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) !=
|
||||
(em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
|
||||
(map->type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
|
||||
btrfs_err(fs_info,
|
||||
"chunk start=%llu len=%llu flags=0x%llx doesn't match block group start=%llu len=%llu flags=0x%llx",
|
||||
em->start, em->len,
|
||||
em->map_lookup->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
|
||||
map->start, map->chunk_len,
|
||||
map->type & BTRFS_BLOCK_GROUP_TYPE_MASK,
|
||||
bg->start, bg->length,
|
||||
bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
|
||||
ret = -EUCLEAN;
|
||||
free_extent_map(em);
|
||||
btrfs_free_chunk_map(map);
|
||||
btrfs_put_block_group(bg);
|
||||
break;
|
||||
}
|
||||
start = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
start = map->start + map->chunk_len;
|
||||
btrfs_free_chunk_map(map);
|
||||
btrfs_put_block_group(bg);
|
||||
}
|
||||
return ret;
|
||||
@@ -2369,28 +2349,25 @@ error:
|
||||
|
||||
static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
|
||||
struct rb_node *node;
|
||||
int ret = 0;
|
||||
|
||||
for (node = rb_first_cached(&em_tree->map); node; node = rb_next(node)) {
|
||||
struct extent_map *em;
|
||||
struct map_lookup *map;
|
||||
for (node = rb_first_cached(&fs_info->mapping_tree); node; node = rb_next(node)) {
|
||||
struct btrfs_chunk_map *map;
|
||||
struct btrfs_block_group *bg;
|
||||
|
||||
em = rb_entry(node, struct extent_map, rb_node);
|
||||
map = em->map_lookup;
|
||||
bg = btrfs_create_block_group_cache(fs_info, em->start);
|
||||
map = rb_entry(node, struct btrfs_chunk_map, rb_node);
|
||||
bg = btrfs_create_block_group_cache(fs_info, map->start);
|
||||
if (!bg) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Fill dummy cache as FULL */
|
||||
bg->length = em->len;
|
||||
bg->length = map->chunk_len;
|
||||
bg->flags = map->type;
|
||||
bg->cached = BTRFS_CACHE_FINISHED;
|
||||
bg->used = em->len;
|
||||
bg->used = map->chunk_len;
|
||||
bg->flags = map->type;
|
||||
ret = btrfs_add_block_group_cache(fs_info, bg);
|
||||
/*
|
||||
@@ -2618,19 +2595,14 @@ static int insert_dev_extents(struct btrfs_trans_handle *trans,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_device *device;
|
||||
struct extent_map *em;
|
||||
struct map_lookup *map;
|
||||
struct btrfs_chunk_map *map;
|
||||
u64 dev_offset;
|
||||
u64 stripe_size;
|
||||
int i;
|
||||
int ret = 0;
|
||||
|
||||
em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
|
||||
if (IS_ERR(em))
|
||||
return PTR_ERR(em);
|
||||
|
||||
map = em->map_lookup;
|
||||
stripe_size = em->orig_block_len;
|
||||
map = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
|
||||
if (IS_ERR(map))
|
||||
return PTR_ERR(map);
|
||||
|
||||
/*
|
||||
* Take the device list mutex to prevent races with the final phase of
|
||||
@@ -2647,13 +2619,13 @@ static int insert_dev_extents(struct btrfs_trans_handle *trans,
|
||||
dev_offset = map->stripes[i].physical;
|
||||
|
||||
ret = insert_dev_extent(trans, device, chunk_offset, dev_offset,
|
||||
stripe_size);
|
||||
map->stripe_size);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
|
||||
free_extent_map(em);
|
||||
btrfs_free_chunk_map(map);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2910,7 +2882,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
|
||||
goto unlock_out;
|
||||
|
||||
/*
|
||||
* Skip chunk alloction if the bg is SYSTEM, this is to avoid system
|
||||
* Skip chunk allocation if the bg is SYSTEM, this is to avoid system
|
||||
* chunk allocation storm to exhaust the system chunk array. Otherwise
|
||||
* we still want to try our best to mark the block group read-only.
|
||||
*/
|
||||
@@ -4406,8 +4378,6 @@ void btrfs_freeze_block_group(struct btrfs_block_group *cache)
|
||||
void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||
struct extent_map_tree *em_tree;
|
||||
struct extent_map *em;
|
||||
bool cleanup;
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
@@ -4416,17 +4386,16 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
if (cleanup) {
|
||||
em_tree = &fs_info->mapping_tree;
|
||||
write_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree, block_group->start,
|
||||
1);
|
||||
BUG_ON(!em); /* logic error, can't happen */
|
||||
remove_extent_mapping(em_tree, em);
|
||||
write_unlock(&em_tree->lock);
|
||||
struct btrfs_chunk_map *map;
|
||||
|
||||
/* once for us and once for the tree */
|
||||
free_extent_map(em);
|
||||
free_extent_map(em);
|
||||
map = btrfs_find_chunk_map(fs_info, block_group->start, 1);
|
||||
/* Logic error, can't happen. */
|
||||
ASSERT(map);
|
||||
|
||||
btrfs_remove_chunk_map(fs_info, map);
|
||||
|
||||
/* Once for our lookup reference. */
|
||||
btrfs_free_chunk_map(map);
|
||||
|
||||
/*
|
||||
* We may have left one free space entry and other possible
|
||||
|
||||
@@ -5,6 +5,8 @@
|
||||
|
||||
#include "free-space-cache.h"
|
||||
|
||||
struct btrfs_chunk_map;
|
||||
|
||||
enum btrfs_disk_cache_state {
|
||||
BTRFS_DC_WRITTEN,
|
||||
BTRFS_DC_ERROR,
|
||||
@@ -243,7 +245,7 @@ struct btrfs_block_group {
|
||||
u64 zone_unusable;
|
||||
u64 zone_capacity;
|
||||
u64 meta_write_pointer;
|
||||
struct map_lookup *physical_map;
|
||||
struct btrfs_chunk_map *physical_map;
|
||||
struct list_head active_bg_list;
|
||||
struct work_struct zone_finish_work;
|
||||
struct extent_buffer *last_eb;
|
||||
@@ -297,7 +299,7 @@ struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
|
||||
struct btrfs_fs_info *fs_info,
|
||||
const u64 chunk_offset);
|
||||
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
u64 group_start, struct extent_map *em);
|
||||
struct btrfs_chunk_map *map);
|
||||
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_mark_bg_unused(struct btrfs_block_group *bg);
|
||||
void btrfs_reclaim_bgs_work(struct work_struct *work);
|
||||
|
||||
@@ -69,6 +69,8 @@ enum {
|
||||
BTRFS_INODE_VERITY_IN_PROGRESS,
|
||||
/* Set when this inode is a free space inode. */
|
||||
BTRFS_INODE_FREE_SPACE_INODE,
|
||||
/* Set when there are no capabilities in XATTs for the inode. */
|
||||
BTRFS_INODE_NO_CAP_XATTR,
|
||||
};
|
||||
|
||||
/* in memory btrfs inode */
|
||||
@@ -107,9 +109,11 @@ struct btrfs_inode {
|
||||
|
||||
/*
|
||||
* Keep track of where the inode has extent items mapped in order to
|
||||
* make sure the i_size adjustments are accurate
|
||||
* make sure the i_size adjustments are accurate. Not required when the
|
||||
* filesystem is NO_HOLES, the status can't be set while mounted as
|
||||
* it's a mkfs-time feature.
|
||||
*/
|
||||
struct extent_io_tree file_extent_tree;
|
||||
struct extent_io_tree *file_extent_tree;
|
||||
|
||||
/* held while logging the inode in tree-log.c */
|
||||
struct mutex log_mutex;
|
||||
@@ -487,7 +491,7 @@ struct inode *btrfs_iget_path(struct super_block *s, u64 ino,
|
||||
struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root);
|
||||
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
||||
struct page *page, size_t pg_offset,
|
||||
u64 start, u64 end);
|
||||
u64 start, u64 len);
|
||||
int btrfs_update_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *inode);
|
||||
int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
|
||||
|
||||
@@ -20,6 +20,7 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/shrinker.h>
|
||||
#include <crypto/hash.h>
|
||||
#include "misc.h"
|
||||
#include "ctree.h"
|
||||
@@ -163,13 +164,107 @@ static int compression_decompress(int type, struct list_head *ws,
|
||||
static void btrfs_free_compressed_pages(struct compressed_bio *cb)
|
||||
{
|
||||
for (unsigned int i = 0; i < cb->nr_pages; i++)
|
||||
put_page(cb->compressed_pages[i]);
|
||||
btrfs_free_compr_page(cb->compressed_pages[i]);
|
||||
kfree(cb->compressed_pages);
|
||||
}
|
||||
|
||||
static int btrfs_decompress_bio(struct compressed_bio *cb);
|
||||
|
||||
static void end_compressed_bio_read(struct btrfs_bio *bbio)
|
||||
/*
|
||||
* Global cache of last unused pages for compression/decompression.
|
||||
*/
|
||||
static struct btrfs_compr_pool {
|
||||
struct shrinker *shrinker;
|
||||
spinlock_t lock;
|
||||
struct list_head list;
|
||||
int count;
|
||||
int thresh;
|
||||
} compr_pool;
|
||||
|
||||
static unsigned long btrfs_compr_pool_count(struct shrinker *sh, struct shrink_control *sc)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We must not read the values more than once if 'ret' gets expanded in
|
||||
* the return statement so we don't accidentally return a negative
|
||||
* number, even if the first condition finds it positive.
|
||||
*/
|
||||
ret = READ_ONCE(compr_pool.count) - READ_ONCE(compr_pool.thresh);
|
||||
|
||||
return ret > 0 ? ret : 0;
|
||||
}
|
||||
|
||||
static unsigned long btrfs_compr_pool_scan(struct shrinker *sh, struct shrink_control *sc)
|
||||
{
|
||||
struct list_head remove;
|
||||
struct list_head *tmp, *next;
|
||||
int freed;
|
||||
|
||||
if (compr_pool.count == 0)
|
||||
return SHRINK_STOP;
|
||||
|
||||
INIT_LIST_HEAD(&remove);
|
||||
|
||||
/* For now, just simply drain the whole list. */
|
||||
spin_lock(&compr_pool.lock);
|
||||
list_splice_init(&compr_pool.list, &remove);
|
||||
freed = compr_pool.count;
|
||||
compr_pool.count = 0;
|
||||
spin_unlock(&compr_pool.lock);
|
||||
|
||||
list_for_each_safe(tmp, next, &remove) {
|
||||
struct page *page = list_entry(tmp, struct page, lru);
|
||||
|
||||
ASSERT(page_ref_count(page) == 1);
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
return freed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Common wrappers for page allocation from compression wrappers
|
||||
*/
|
||||
struct page *btrfs_alloc_compr_page(void)
|
||||
{
|
||||
struct page *page = NULL;
|
||||
|
||||
spin_lock(&compr_pool.lock);
|
||||
if (compr_pool.count > 0) {
|
||||
page = list_first_entry(&compr_pool.list, struct page, lru);
|
||||
list_del_init(&page->lru);
|
||||
compr_pool.count--;
|
||||
}
|
||||
spin_unlock(&compr_pool.lock);
|
||||
|
||||
if (page)
|
||||
return page;
|
||||
|
||||
return alloc_page(GFP_NOFS);
|
||||
}
|
||||
|
||||
void btrfs_free_compr_page(struct page *page)
|
||||
{
|
||||
bool do_free = false;
|
||||
|
||||
spin_lock(&compr_pool.lock);
|
||||
if (compr_pool.count > compr_pool.thresh) {
|
||||
do_free = true;
|
||||
} else {
|
||||
list_add(&page->lru, &compr_pool.list);
|
||||
compr_pool.count++;
|
||||
}
|
||||
spin_unlock(&compr_pool.lock);
|
||||
|
||||
if (!do_free)
|
||||
return;
|
||||
|
||||
ASSERT(page_ref_count(page) == 1);
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
static void end_bbio_comprssed_read(struct btrfs_bio *bbio)
|
||||
{
|
||||
struct compressed_bio *cb = to_compressed_bio(bbio);
|
||||
blk_status_t status = bbio->bio.bi_status;
|
||||
@@ -211,8 +306,8 @@ static noinline void end_compressed_writeback(const struct compressed_bio *cb)
|
||||
for (i = 0; i < ret; i++) {
|
||||
struct folio *folio = fbatch.folios[i];
|
||||
|
||||
btrfs_page_clamp_clear_writeback(fs_info, &folio->page,
|
||||
cb->start, cb->len);
|
||||
btrfs_folio_clamp_clear_writeback(fs_info, folio,
|
||||
cb->start, cb->len);
|
||||
}
|
||||
folio_batch_release(&fbatch);
|
||||
}
|
||||
@@ -242,7 +337,7 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work)
|
||||
* This also calls the writeback end hooks for the file pages so that metadata
|
||||
* and checksums can be updated in the file.
|
||||
*/
|
||||
static void end_compressed_bio_write(struct btrfs_bio *bbio)
|
||||
static void end_bbio_comprssed_write(struct btrfs_bio *bbio)
|
||||
{
|
||||
struct compressed_bio *cb = to_compressed_bio(bbio);
|
||||
struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
|
||||
@@ -289,7 +384,7 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
|
||||
|
||||
cb = alloc_compressed_bio(inode, ordered->file_offset,
|
||||
REQ_OP_WRITE | write_flags,
|
||||
end_compressed_bio_write);
|
||||
end_bbio_comprssed_write);
|
||||
cb->start = ordered->file_offset;
|
||||
cb->len = ordered->num_bytes;
|
||||
cb->compressed_pages = compressed_pages;
|
||||
@@ -446,7 +541,8 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
* subpage::readers and to unlock the page.
|
||||
*/
|
||||
if (fs_info->sectorsize < PAGE_SIZE)
|
||||
btrfs_subpage_start_reader(fs_info, page, cur, add_size);
|
||||
btrfs_subpage_start_reader(fs_info, page_folio(page),
|
||||
cur, add_size);
|
||||
put_page(page);
|
||||
cur += add_size;
|
||||
}
|
||||
@@ -489,11 +585,11 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
|
||||
ASSERT(extent_map_is_compressed(em));
|
||||
compressed_len = em->block_len;
|
||||
|
||||
cb = alloc_compressed_bio(inode, file_offset, REQ_OP_READ,
|
||||
end_compressed_bio_read);
|
||||
end_bbio_comprssed_read);
|
||||
|
||||
cb->start = em->orig_start;
|
||||
em_len = em->len;
|
||||
@@ -501,7 +597,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
|
||||
|
||||
cb->len = bbio->bio.bi_iter.bi_size;
|
||||
cb->compressed_len = compressed_len;
|
||||
cb->compress_type = em->compress_type;
|
||||
cb->compress_type = extent_map_compression(em);
|
||||
cb->orig_bbio = bbio;
|
||||
|
||||
free_extent_map(em);
|
||||
@@ -513,7 +609,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
|
||||
goto out_free_bio;
|
||||
}
|
||||
|
||||
ret2 = btrfs_alloc_page_array(cb->nr_pages, cb->compressed_pages);
|
||||
ret2 = btrfs_alloc_page_array(cb->nr_pages, cb->compressed_pages, 0);
|
||||
if (ret2) {
|
||||
ret = BLK_STS_RESOURCE;
|
||||
goto out_free_compressed_pages;
|
||||
@@ -960,15 +1056,36 @@ int __init btrfs_init_compress(void)
|
||||
offsetof(struct compressed_bio, bbio.bio),
|
||||
BIOSET_NEED_BVECS))
|
||||
return -ENOMEM;
|
||||
|
||||
compr_pool.shrinker = shrinker_alloc(SHRINKER_NONSLAB, "btrfs-compr-pages");
|
||||
if (!compr_pool.shrinker)
|
||||
return -ENOMEM;
|
||||
|
||||
btrfs_init_workspace_manager(BTRFS_COMPRESS_NONE);
|
||||
btrfs_init_workspace_manager(BTRFS_COMPRESS_ZLIB);
|
||||
btrfs_init_workspace_manager(BTRFS_COMPRESS_LZO);
|
||||
zstd_init_workspace_manager();
|
||||
|
||||
spin_lock_init(&compr_pool.lock);
|
||||
INIT_LIST_HEAD(&compr_pool.list);
|
||||
compr_pool.count = 0;
|
||||
/* 128K / 4K = 32, for 8 threads is 256 pages. */
|
||||
compr_pool.thresh = BTRFS_MAX_COMPRESSED / PAGE_SIZE * 8;
|
||||
compr_pool.shrinker->count_objects = btrfs_compr_pool_count;
|
||||
compr_pool.shrinker->scan_objects = btrfs_compr_pool_scan;
|
||||
compr_pool.shrinker->batch = 32;
|
||||
compr_pool.shrinker->seeks = DEFAULT_SEEKS;
|
||||
shrinker_register(compr_pool.shrinker);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __cold btrfs_exit_compress(void)
|
||||
{
|
||||
/* For now scan drains all pages and does not touch the parameters. */
|
||||
btrfs_compr_pool_scan(NULL, NULL);
|
||||
shrinker_free(compr_pool.shrinker);
|
||||
|
||||
btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_NONE);
|
||||
btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_ZLIB);
|
||||
btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_LZO);
|
||||
|
||||
@@ -32,6 +32,8 @@ static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
|
||||
|
||||
#define BTRFS_ZLIB_DEFAULT_LEVEL 3
|
||||
|
||||
struct page;
|
||||
|
||||
struct compressed_bio {
|
||||
/* Number of compressed pages in the array */
|
||||
unsigned int nr_pages;
|
||||
@@ -96,6 +98,9 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio);
|
||||
|
||||
unsigned int btrfs_compress_str2level(unsigned int type, const char *str);
|
||||
|
||||
struct page *btrfs_alloc_compr_page(void);
|
||||
void btrfs_free_compr_page(struct page *page);
|
||||
|
||||
enum btrfs_compression_type {
|
||||
BTRFS_COMPRESS_NONE = 0,
|
||||
BTRFS_COMPRESS_ZLIB = 1,
|
||||
|
||||
@@ -370,33 +370,41 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
|
||||
/*
|
||||
* check if the tree block can be shared by multiple trees
|
||||
*/
|
||||
int btrfs_block_can_be_shared(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct extent_buffer *buf)
|
||||
bool btrfs_block_can_be_shared(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct extent_buffer *buf)
|
||||
{
|
||||
const u64 buf_gen = btrfs_header_generation(buf);
|
||||
|
||||
/*
|
||||
* Tree blocks not in shareable trees and tree roots are never shared.
|
||||
* If a block was allocated after the last snapshot and the block was
|
||||
* not allocated by tree relocation, we know the block is not shared.
|
||||
*/
|
||||
if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
|
||||
buf != root->node &&
|
||||
(btrfs_header_generation(buf) <=
|
||||
btrfs_root_last_snapshot(&root->root_item) ||
|
||||
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) {
|
||||
if (buf != root->commit_root)
|
||||
return 1;
|
||||
/*
|
||||
* An extent buffer that used to be the commit root may still be
|
||||
* shared because the tree height may have increased and it
|
||||
* became a child of a higher level root. This can happen when
|
||||
* snapshotting a subvolume created in the current transaction.
|
||||
*/
|
||||
if (btrfs_header_generation(buf) == trans->transid)
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
|
||||
return false;
|
||||
|
||||
if (buf == root->node)
|
||||
return false;
|
||||
|
||||
if (buf_gen > btrfs_root_last_snapshot(&root->root_item) &&
|
||||
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))
|
||||
return false;
|
||||
|
||||
if (buf != root->commit_root)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* An extent buffer that used to be the commit root may still be shared
|
||||
* because the tree height may have increased and it became a child of a
|
||||
* higher level root. This can happen when snapshotting a subvolume
|
||||
* created in the current transaction.
|
||||
*/
|
||||
if (buf_gen == trans->transid)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
|
||||
@@ -812,7 +820,8 @@ int btrfs_bin_search(struct extent_buffer *eb, int first_slot,
|
||||
}
|
||||
|
||||
while (low < high) {
|
||||
unsigned long oip;
|
||||
const int unit_size = folio_size(eb->folios[0]);
|
||||
unsigned long oil;
|
||||
unsigned long offset;
|
||||
struct btrfs_disk_key *tmp;
|
||||
struct btrfs_disk_key unaligned;
|
||||
@@ -820,14 +829,14 @@ int btrfs_bin_search(struct extent_buffer *eb, int first_slot,
|
||||
|
||||
mid = (low + high) / 2;
|
||||
offset = p + mid * item_size;
|
||||
oip = offset_in_page(offset);
|
||||
oil = get_eb_offset_in_folio(eb, offset);
|
||||
|
||||
if (oip + key_size <= PAGE_SIZE) {
|
||||
const unsigned long idx = get_eb_page_index(offset);
|
||||
char *kaddr = page_address(eb->pages[idx]);
|
||||
if (oil + key_size <= unit_size) {
|
||||
const unsigned long idx = get_eb_folio_index(eb, offset);
|
||||
char *kaddr = folio_address(eb->folios[idx]);
|
||||
|
||||
oip = get_eb_offset_in_page(eb, offset);
|
||||
tmp = (struct btrfs_disk_key *)(kaddr + oip);
|
||||
oil = get_eb_offset_in_folio(eb, offset);
|
||||
tmp = (struct btrfs_disk_key *)(kaddr + oil);
|
||||
} else {
|
||||
read_extent_buffer(eb, &unaligned, offset, key_size);
|
||||
tmp = &unaligned;
|
||||
|
||||
@@ -212,8 +212,6 @@ struct btrfs_root {
|
||||
|
||||
u64 last_trans;
|
||||
|
||||
u32 type;
|
||||
|
||||
u64 free_objectid;
|
||||
|
||||
struct btrfs_key defrag_progress;
|
||||
@@ -224,18 +222,15 @@ struct btrfs_root {
|
||||
|
||||
struct list_head root_list;
|
||||
|
||||
spinlock_t log_extents_lock[2];
|
||||
struct list_head logged_list[2];
|
||||
|
||||
spinlock_t inode_lock;
|
||||
/* red-black tree that keeps track of in-memory inodes */
|
||||
struct rb_root inode_tree;
|
||||
|
||||
/*
|
||||
* radix tree that keeps track of delayed nodes of every inode,
|
||||
* protected by inode_lock
|
||||
* Xarray that keeps track of delayed nodes of every inode, protected
|
||||
* by @inode_lock.
|
||||
*/
|
||||
struct radix_tree_root delayed_nodes_tree;
|
||||
struct xarray delayed_nodes;
|
||||
/*
|
||||
* right now this just gets used so that a root has its own devid
|
||||
* for stat. It may be used for more later
|
||||
@@ -561,9 +556,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct extent_buffer *buf,
|
||||
struct extent_buffer **cow_ret, u64 new_root_objectid);
|
||||
int btrfs_block_can_be_shared(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct extent_buffer *buf);
|
||||
bool btrfs_block_can_be_shared(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct extent_buffer *buf);
|
||||
int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
struct btrfs_path *path, int level, int slot);
|
||||
void btrfs_extend_item(struct btrfs_trans_handle *trans,
|
||||
|
||||
@@ -775,7 +775,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
|
||||
* this em, as either we don't care about the generation, or the
|
||||
* merged extent map will be rejected anyway.
|
||||
*/
|
||||
if (em && test_bit(EXTENT_FLAG_MERGED, &em->flags) &&
|
||||
if (em && (em->flags & EXTENT_FLAG_MERGED) &&
|
||||
newer_than && em->generation >= newer_than) {
|
||||
free_extent_map(em);
|
||||
em = NULL;
|
||||
@@ -802,7 +802,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
|
||||
static u32 get_extent_max_capacity(const struct btrfs_fs_info *fs_info,
|
||||
const struct extent_map *em)
|
||||
{
|
||||
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
|
||||
if (extent_map_is_compressed(em))
|
||||
return BTRFS_MAX_COMPRESSED;
|
||||
return fs_info->max_extent_size;
|
||||
}
|
||||
@@ -828,7 +828,7 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
|
||||
/* No more em or hole */
|
||||
if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
|
||||
goto out;
|
||||
if (test_bit(EXTENT_FLAG_PREALLOC, &next->flags))
|
||||
if (next->flags & EXTENT_FLAG_PREALLOC)
|
||||
goto out;
|
||||
/*
|
||||
* If the next extent is at its max capacity, defragging current extent
|
||||
@@ -996,10 +996,9 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
|
||||
em->len <= inode->root->fs_info->max_inline)
|
||||
goto next;
|
||||
|
||||
/* Skip hole/delalloc/preallocated extents */
|
||||
/* Skip holes and preallocated extents. */
|
||||
if (em->block_start == EXTENT_MAP_HOLE ||
|
||||
em->block_start == EXTENT_MAP_DELALLOC ||
|
||||
test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
|
||||
(em->flags & EXTENT_FLAG_PREALLOC))
|
||||
goto next;
|
||||
|
||||
/* Skip older extent */
|
||||
@@ -1190,7 +1189,7 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
|
||||
/* Update the page status */
|
||||
for (i = start_index - first_index; i <= last_index - first_index; i++) {
|
||||
ClearPageChecked(pages[i]);
|
||||
btrfs_page_clamp_set_dirty(fs_info, pages[i], start, len);
|
||||
btrfs_folio_clamp_set_dirty(fs_info, page_folio(pages[i]), start, len);
|
||||
}
|
||||
btrfs_delalloc_release_extents(inode, len);
|
||||
extent_changeset_free(data_reserved);
|
||||
|
||||
@@ -71,7 +71,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
|
||||
}
|
||||
|
||||
spin_lock(&root->inode_lock);
|
||||
node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
|
||||
node = xa_load(&root->delayed_nodes, ino);
|
||||
|
||||
if (node) {
|
||||
if (btrfs_inode->delayed_node) {
|
||||
@@ -83,9 +83,9 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
|
||||
|
||||
/*
|
||||
* It's possible that we're racing into the middle of removing
|
||||
* this node from the radix tree. In this case, the refcount
|
||||
* this node from the xarray. In this case, the refcount
|
||||
* was zero and it should never go back to one. Just return
|
||||
* NULL like it was never in the radix at all; our release
|
||||
* NULL like it was never in the xarray at all; our release
|
||||
* function is in the process of removing it.
|
||||
*
|
||||
* Some implementations of refcount_inc refuse to bump the
|
||||
@@ -93,7 +93,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
|
||||
* here, refcount_inc() may decide to just WARN_ONCE() instead
|
||||
* of actually bumping the refcount.
|
||||
*
|
||||
* If this node is properly in the radix, we want to bump the
|
||||
* If this node is properly in the xarray, we want to bump the
|
||||
* refcount twice, once for the inode and once for this get
|
||||
* operation.
|
||||
*/
|
||||
@@ -120,6 +120,7 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
|
||||
struct btrfs_root *root = btrfs_inode->root;
|
||||
u64 ino = btrfs_ino(btrfs_inode);
|
||||
int ret;
|
||||
void *ptr;
|
||||
|
||||
again:
|
||||
node = btrfs_get_delayed_node(btrfs_inode);
|
||||
@@ -131,26 +132,30 @@ again:
|
||||
return ERR_PTR(-ENOMEM);
|
||||
btrfs_init_delayed_node(node, root, ino);
|
||||
|
||||
/* cached in the btrfs inode and can be accessed */
|
||||
/* Cached in the inode and can be accessed. */
|
||||
refcount_set(&node->refs, 2);
|
||||
|
||||
ret = radix_tree_preload(GFP_NOFS);
|
||||
if (ret) {
|
||||
/* Allocate and reserve the slot, from now it can return a NULL from xa_load(). */
|
||||
ret = xa_reserve(&root->delayed_nodes, ino, GFP_NOFS);
|
||||
if (ret == -ENOMEM) {
|
||||
kmem_cache_free(delayed_node_cache, node);
|
||||
return ERR_PTR(ret);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
spin_lock(&root->inode_lock);
|
||||
ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
|
||||
if (ret == -EEXIST) {
|
||||
ptr = xa_load(&root->delayed_nodes, ino);
|
||||
if (ptr) {
|
||||
/* Somebody inserted it, go back and read it. */
|
||||
spin_unlock(&root->inode_lock);
|
||||
kmem_cache_free(delayed_node_cache, node);
|
||||
radix_tree_preload_end();
|
||||
node = NULL;
|
||||
goto again;
|
||||
}
|
||||
ptr = xa_store(&root->delayed_nodes, ino, node, GFP_ATOMIC);
|
||||
ASSERT(xa_err(ptr) != -EINVAL);
|
||||
ASSERT(xa_err(ptr) != -ENOMEM);
|
||||
ASSERT(ptr == NULL);
|
||||
btrfs_inode->delayed_node = node;
|
||||
spin_unlock(&root->inode_lock);
|
||||
radix_tree_preload_end();
|
||||
|
||||
return node;
|
||||
}
|
||||
@@ -269,8 +274,7 @@ static void __btrfs_release_delayed_node(
|
||||
* back up. We can delete it now.
|
||||
*/
|
||||
ASSERT(refcount_read(&delayed_node->refs) == 0);
|
||||
radix_tree_delete(&root->delayed_nodes_tree,
|
||||
delayed_node->inode_id);
|
||||
xa_erase(&root->delayed_nodes, delayed_node->inode_id);
|
||||
spin_unlock(&root->inode_lock);
|
||||
kmem_cache_free(delayed_node_cache, delayed_node);
|
||||
}
|
||||
@@ -1036,14 +1040,33 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
|
||||
if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, &node->flags))
|
||||
goto out;
|
||||
|
||||
path->slots[0]++;
|
||||
if (path->slots[0] >= btrfs_header_nritems(leaf))
|
||||
goto search;
|
||||
again:
|
||||
/*
|
||||
* Now we're going to delete the INODE_REF/EXTREF, which should be the
|
||||
* only one ref left. Check if the next item is an INODE_REF/EXTREF.
|
||||
*
|
||||
* But if we're the last item already, release and search for the last
|
||||
* INODE_REF/EXTREF.
|
||||
*/
|
||||
if (path->slots[0] + 1 >= btrfs_header_nritems(leaf)) {
|
||||
key.objectid = node->inode_id;
|
||||
key.type = BTRFS_INODE_EXTREF_KEY;
|
||||
key.offset = (u64)-1;
|
||||
|
||||
btrfs_release_path(path);
|
||||
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
||||
if (ret < 0)
|
||||
goto err_out;
|
||||
ASSERT(ret > 0);
|
||||
ASSERT(path->slots[0] > 0);
|
||||
ret = 0;
|
||||
path->slots[0]--;
|
||||
leaf = path->nodes[0];
|
||||
} else {
|
||||
path->slots[0]++;
|
||||
}
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
if (key.objectid != node->inode_id)
|
||||
goto out;
|
||||
|
||||
if (key.type != BTRFS_INODE_REF_KEY &&
|
||||
key.type != BTRFS_INODE_EXTREF_KEY)
|
||||
goto out;
|
||||
@@ -1070,22 +1093,6 @@ err_out:
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
|
||||
return ret;
|
||||
|
||||
search:
|
||||
btrfs_release_path(path);
|
||||
|
||||
key.type = BTRFS_INODE_EXTREF_KEY;
|
||||
key.offset = -1;
|
||||
|
||||
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
||||
if (ret < 0)
|
||||
goto err_out;
|
||||
ASSERT(ret);
|
||||
|
||||
ret = 0;
|
||||
leaf = path->nodes[0];
|
||||
path->slots[0]--;
|
||||
goto again;
|
||||
}
|
||||
|
||||
static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
|
||||
@@ -2035,34 +2042,36 @@ void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
|
||||
|
||||
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
|
||||
{
|
||||
u64 inode_id = 0;
|
||||
unsigned long index = 0;
|
||||
struct btrfs_delayed_node *delayed_nodes[8];
|
||||
int i, n;
|
||||
|
||||
while (1) {
|
||||
struct btrfs_delayed_node *node;
|
||||
int count;
|
||||
|
||||
spin_lock(&root->inode_lock);
|
||||
n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
|
||||
(void **)delayed_nodes, inode_id,
|
||||
ARRAY_SIZE(delayed_nodes));
|
||||
if (!n) {
|
||||
if (xa_empty(&root->delayed_nodes)) {
|
||||
spin_unlock(&root->inode_lock);
|
||||
break;
|
||||
return;
|
||||
}
|
||||
|
||||
inode_id = delayed_nodes[n - 1]->inode_id + 1;
|
||||
for (i = 0; i < n; i++) {
|
||||
count = 0;
|
||||
xa_for_each_start(&root->delayed_nodes, index, node, index) {
|
||||
/*
|
||||
* Don't increase refs in case the node is dead and
|
||||
* about to be removed from the tree in the loop below
|
||||
*/
|
||||
if (!refcount_inc_not_zero(&delayed_nodes[i]->refs))
|
||||
delayed_nodes[i] = NULL;
|
||||
if (refcount_inc_not_zero(&node->refs)) {
|
||||
delayed_nodes[count] = node;
|
||||
count++;
|
||||
}
|
||||
if (count >= ARRAY_SIZE(delayed_nodes))
|
||||
break;
|
||||
}
|
||||
spin_unlock(&root->inode_lock);
|
||||
index++;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
if (!delayed_nodes[i])
|
||||
continue;
|
||||
for (int i = 0; i < count; i++) {
|
||||
__btrfs_kill_delayed_node(delayed_nodes[i]);
|
||||
btrfs_release_delayed_node(delayed_nodes[i]);
|
||||
}
|
||||
|
||||
@@ -550,8 +550,7 @@ bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
|
||||
u64 physical)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = cache->fs_info;
|
||||
struct extent_map *em;
|
||||
struct map_lookup *map;
|
||||
struct btrfs_chunk_map *map;
|
||||
u64 chunk_offset = cache->start;
|
||||
int num_extents, cur_extent;
|
||||
int i;
|
||||
@@ -567,9 +566,8 @@ bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
|
||||
}
|
||||
spin_unlock(&cache->lock);
|
||||
|
||||
em = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
|
||||
ASSERT(!IS_ERR(em));
|
||||
map = em->map_lookup;
|
||||
map = btrfs_get_chunk_map(fs_info, chunk_offset, 1);
|
||||
ASSERT(!IS_ERR(map));
|
||||
|
||||
num_extents = 0;
|
||||
cur_extent = 0;
|
||||
@@ -583,7 +581,7 @@ bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
|
||||
cur_extent = i;
|
||||
}
|
||||
|
||||
free_extent_map(em);
|
||||
btrfs_free_chunk_map(map);
|
||||
|
||||
if (num_extents > 1 && cur_extent < num_extents - 1) {
|
||||
/*
|
||||
@@ -812,25 +810,23 @@ static void btrfs_dev_replace_update_device_in_mapping_tree(
|
||||
struct btrfs_device *srcdev,
|
||||
struct btrfs_device *tgtdev)
|
||||
{
|
||||
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
|
||||
struct extent_map *em;
|
||||
struct map_lookup *map;
|
||||
u64 start = 0;
|
||||
int i;
|
||||
|
||||
write_lock(&em_tree->lock);
|
||||
write_lock(&fs_info->mapping_tree_lock);
|
||||
do {
|
||||
em = lookup_extent_mapping(em_tree, start, (u64)-1);
|
||||
if (!em)
|
||||
struct btrfs_chunk_map *map;
|
||||
|
||||
map = btrfs_find_chunk_map_nolock(fs_info, start, U64_MAX);
|
||||
if (!map)
|
||||
break;
|
||||
map = em->map_lookup;
|
||||
for (i = 0; i < map->num_stripes; i++)
|
||||
if (srcdev == map->stripes[i].dev)
|
||||
map->stripes[i].dev = tgtdev;
|
||||
start = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
start = map->start + map->chunk_len;
|
||||
btrfs_free_chunk_map(map);
|
||||
} while (start);
|
||||
write_unlock(&em_tree->lock);
|
||||
write_unlock(&fs_info->mapping_tree_lock);
|
||||
}
|
||||
|
||||
static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
|
||||
@@ -74,20 +74,37 @@ static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info)
|
||||
static void csum_tree_block(struct extent_buffer *buf, u8 *result)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = buf->fs_info;
|
||||
const int num_pages = num_extent_pages(buf);
|
||||
const int first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize);
|
||||
int num_pages;
|
||||
u32 first_page_part;
|
||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||
char *kaddr;
|
||||
int i;
|
||||
|
||||
shash->tfm = fs_info->csum_shash;
|
||||
crypto_shash_init(shash);
|
||||
kaddr = page_address(buf->pages[0]) + offset_in_page(buf->start);
|
||||
|
||||
if (buf->addr) {
|
||||
/* Pages are contiguous, handle them as a big one. */
|
||||
kaddr = buf->addr;
|
||||
first_page_part = fs_info->nodesize;
|
||||
num_pages = 1;
|
||||
} else {
|
||||
kaddr = folio_address(buf->folios[0]);
|
||||
first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize);
|
||||
num_pages = num_extent_pages(buf);
|
||||
}
|
||||
|
||||
crypto_shash_update(shash, kaddr + BTRFS_CSUM_SIZE,
|
||||
first_page_part - BTRFS_CSUM_SIZE);
|
||||
|
||||
/*
|
||||
* Multiple single-page folios case would reach here.
|
||||
*
|
||||
* nodesize <= PAGE_SIZE and large folio all handled by above
|
||||
* crypto_shash_update() already.
|
||||
*/
|
||||
for (i = 1; i < num_pages && INLINE_EXTENT_BUFFER_PAGES > 1; i++) {
|
||||
kaddr = page_address(buf->pages[i]);
|
||||
kaddr = folio_address(buf->folios[i]);
|
||||
crypto_shash_update(shash, kaddr, PAGE_SIZE);
|
||||
}
|
||||
memset(result, 0, BTRFS_CSUM_SIZE);
|
||||
@@ -166,20 +183,22 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
|
||||
int mirror_num)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = eb->fs_info;
|
||||
int i, num_pages = num_extent_pages(eb);
|
||||
int num_folios = num_extent_folios(eb);
|
||||
int ret = 0;
|
||||
|
||||
if (sb_rdonly(fs_info->sb))
|
||||
return -EROFS;
|
||||
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
struct page *p = eb->pages[i];
|
||||
u64 start = max_t(u64, eb->start, page_offset(p));
|
||||
u64 end = min_t(u64, eb->start + eb->len, page_offset(p) + PAGE_SIZE);
|
||||
for (int i = 0; i < num_folios; i++) {
|
||||
struct folio *folio = eb->folios[i];
|
||||
u64 start = max_t(u64, eb->start, folio_pos(folio));
|
||||
u64 end = min_t(u64, eb->start + eb->len,
|
||||
folio_pos(folio) + folio_size(folio));
|
||||
u32 len = end - start;
|
||||
|
||||
ret = btrfs_repair_io_failure(fs_info, 0, start, len,
|
||||
start, p, offset_in_page(start), mirror_num);
|
||||
start, folio, offset_in_folio(folio, start),
|
||||
mirror_num);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
@@ -254,15 +273,20 @@ blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
|
||||
if (WARN_ON_ONCE(bbio->bio.bi_iter.bi_size != eb->len))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) {
|
||||
WARN_ON_ONCE(found_start != 0);
|
||||
/*
|
||||
* If an extent_buffer is marked as EXTENT_BUFFER_ZONED_ZEROOUT, don't
|
||||
* checksum it but zero-out its content. This is done to preserve
|
||||
* ordering of I/O without unnecessarily writing out data.
|
||||
*/
|
||||
if (test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags)) {
|
||||
memzero_extent_buffer(eb, 0, eb->len);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
if (WARN_ON_ONCE(found_start != eb->start))
|
||||
return BLK_STS_IOERR;
|
||||
if (WARN_ON(!btrfs_page_test_uptodate(fs_info, eb->pages[0], eb->start,
|
||||
eb->len)))
|
||||
if (WARN_ON(!btrfs_folio_test_uptodate(fs_info, eb->folios[0],
|
||||
eb->start, eb->len)))
|
||||
return BLK_STS_IOERR;
|
||||
|
||||
ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
|
||||
@@ -371,8 +395,8 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
|
||||
}
|
||||
|
||||
csum_tree_block(eb, result);
|
||||
header_csum = page_address(eb->pages[0]) +
|
||||
get_eb_offset_in_page(eb, offsetof(struct btrfs_header, csum));
|
||||
header_csum = folio_address(eb->folios[0]) +
|
||||
get_eb_offset_in_folio(eb, offsetof(struct btrfs_header, csum));
|
||||
|
||||
if (memcmp(result, header_csum, csum_size) != 0) {
|
||||
btrfs_warn_rl(fs_info,
|
||||
@@ -639,7 +663,8 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
|
||||
root->nr_delalloc_inodes = 0;
|
||||
root->nr_ordered_extents = 0;
|
||||
root->inode_tree = RB_ROOT;
|
||||
INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
|
||||
/* GFP flags are compatible with XA_FLAGS_*. */
|
||||
xa_init_flags(&root->delayed_nodes, GFP_ATOMIC);
|
||||
|
||||
btrfs_init_root_block_rsv(root);
|
||||
|
||||
@@ -650,14 +675,10 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
|
||||
INIT_LIST_HEAD(&root->ordered_extents);
|
||||
INIT_LIST_HEAD(&root->ordered_root);
|
||||
INIT_LIST_HEAD(&root->reloc_dirty_list);
|
||||
INIT_LIST_HEAD(&root->logged_list[0]);
|
||||
INIT_LIST_HEAD(&root->logged_list[1]);
|
||||
spin_lock_init(&root->inode_lock);
|
||||
spin_lock_init(&root->delalloc_lock);
|
||||
spin_lock_init(&root->ordered_extent_lock);
|
||||
spin_lock_init(&root->accounting_lock);
|
||||
spin_lock_init(&root->log_extents_lock[0]);
|
||||
spin_lock_init(&root->log_extents_lock[1]);
|
||||
spin_lock_init(&root->qgroup_meta_rsv_lock);
|
||||
mutex_init(&root->objectid_mutex);
|
||||
mutex_init(&root->log_mutex);
|
||||
@@ -2618,9 +2639,6 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
|
||||
*/
|
||||
btrfs_set_super_log_root(sb, 0);
|
||||
|
||||
/* We can't trust the free space cache either */
|
||||
btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE);
|
||||
|
||||
btrfs_warn(fs_info, "try to load backup roots slot %d", i);
|
||||
ret = read_backup_root(fs_info, i);
|
||||
backup_index = ret;
|
||||
@@ -2724,7 +2742,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
||||
INIT_LIST_HEAD(&fs_info->allocated_ebs);
|
||||
spin_lock_init(&fs_info->eb_leak_lock);
|
||||
#endif
|
||||
extent_map_tree_init(&fs_info->mapping_tree);
|
||||
fs_info->mapping_tree = RB_ROOT_CACHED;
|
||||
rwlock_init(&fs_info->mapping_tree_lock);
|
||||
btrfs_init_block_rsv(&fs_info->global_block_rsv,
|
||||
BTRFS_BLOCK_RSV_GLOBAL);
|
||||
btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS);
|
||||
@@ -2794,6 +2813,9 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
||||
fs_info->sectorsize_bits = ilog2(4096);
|
||||
fs_info->stripesize = 4096;
|
||||
|
||||
/* Default compress algorithm when user does -o compress */
|
||||
fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
|
||||
|
||||
fs_info->max_extent_size = BTRFS_MAX_EXTENT_SIZE;
|
||||
|
||||
spin_lock_init(&fs_info->swapfile_pins_lock);
|
||||
@@ -2930,17 +2952,6 @@ out:
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Some options only have meaning at mount time and shouldn't persist across
|
||||
* remounts, or be displayed. Clear these at the end of mount and remount
|
||||
* code paths.
|
||||
*/
|
||||
void btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
btrfs_clear_opt(fs_info->mount_opt, USEBACKUPROOT);
|
||||
btrfs_clear_opt(fs_info->mount_opt, CLEAR_CACHE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Mounting logic specific to read-write file systems. Shared by open_ctree
|
||||
* and btrfs_remount when remounting from read-only to read-write.
|
||||
@@ -2953,7 +2964,11 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
|
||||
|
||||
if (btrfs_test_opt(fs_info, CLEAR_CACHE) &&
|
||||
btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) {
|
||||
rebuild_free_space_tree = true;
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
|
||||
btrfs_warn(fs_info,
|
||||
"'clear_cache' option is ignored with extent tree v2");
|
||||
else
|
||||
rebuild_free_space_tree = true;
|
||||
} else if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
|
||||
!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID)) {
|
||||
btrfs_warn(fs_info, "free space tree is invalid");
|
||||
@@ -3276,13 +3291,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
|
||||
WRITE_ONCE(fs_info->fs_error, -EUCLEAN);
|
||||
|
||||
/*
|
||||
* In the long term, we'll store the compression type in the super
|
||||
* block, and it'll be used for per file compression control.
|
||||
*/
|
||||
fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
|
||||
|
||||
|
||||
/* Set up fs_info before parsing mount options */
|
||||
nodesize = btrfs_super_nodesize(disk_super);
|
||||
sectorsize = btrfs_super_sectorsize(disk_super);
|
||||
@@ -3296,28 +3304,30 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
|
||||
fs_info->stripesize = stripesize;
|
||||
|
||||
ret = btrfs_parse_options(fs_info, options, sb->s_flags);
|
||||
if (ret)
|
||||
/*
|
||||
* Handle the space caching options appropriately now that we have the
|
||||
* super block loaded and validated.
|
||||
*/
|
||||
btrfs_set_free_space_cache_settings(fs_info);
|
||||
|
||||
if (!btrfs_check_options(fs_info, &fs_info->mount_opt, sb->s_flags)) {
|
||||
ret = -EINVAL;
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
ret = btrfs_check_features(fs_info, !sb_rdonly(sb));
|
||||
if (ret < 0)
|
||||
goto fail_alloc;
|
||||
|
||||
/*
|
||||
* At this point our mount options are validated, if we set ->max_inline
|
||||
* to something non-standard make sure we truncate it to sectorsize.
|
||||
*/
|
||||
fs_info->max_inline = min_t(u64, fs_info->max_inline, fs_info->sectorsize);
|
||||
|
||||
if (sectorsize < PAGE_SIZE) {
|
||||
struct btrfs_subpage_info *subpage_info;
|
||||
|
||||
/*
|
||||
* V1 space cache has some hardcoded PAGE_SIZE usage, and is
|
||||
* going to be deprecated.
|
||||
*
|
||||
* Force to use v2 cache for subpage case.
|
||||
*/
|
||||
btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE);
|
||||
btrfs_set_and_info(fs_info, FREE_SPACE_TREE,
|
||||
"forcing free space tree for sector size %u with page size %lu",
|
||||
sectorsize, PAGE_SIZE);
|
||||
|
||||
btrfs_warn(fs_info,
|
||||
"read-write for sector size %u with page size %lu is experimental",
|
||||
sectorsize, PAGE_SIZE);
|
||||
@@ -3494,29 +3504,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
goto fail_cleaner;
|
||||
}
|
||||
|
||||
if (!btrfs_test_opt(fs_info, NOSSD) &&
|
||||
!fs_info->fs_devices->rotating) {
|
||||
btrfs_set_and_info(fs_info, SSD, "enabling ssd optimizations");
|
||||
}
|
||||
|
||||
/*
|
||||
* For devices supporting discard turn on discard=async automatically,
|
||||
* unless it's already set or disabled. This could be turned off by
|
||||
* nodiscard for the same mount.
|
||||
*
|
||||
* The zoned mode piggy backs on the discard functionality for
|
||||
* resetting a zone. There is no reason to delay the zone reset as it is
|
||||
* fast enough. So, do not enable async discard for zoned mode.
|
||||
*/
|
||||
if (!(btrfs_test_opt(fs_info, DISCARD_SYNC) ||
|
||||
btrfs_test_opt(fs_info, DISCARD_ASYNC) ||
|
||||
btrfs_test_opt(fs_info, NODISCARD)) &&
|
||||
fs_info->fs_devices->discardable &&
|
||||
!btrfs_is_zoned(fs_info)) {
|
||||
btrfs_set_and_info(fs_info, DISCARD_ASYNC,
|
||||
"auto enabling async discard");
|
||||
}
|
||||
|
||||
ret = btrfs_read_qgroup_config(fs_info);
|
||||
if (ret)
|
||||
goto fail_trans_kthread;
|
||||
@@ -3542,7 +3529,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
}
|
||||
|
||||
if (sb_rdonly(sb))
|
||||
goto clear_oneshot;
|
||||
return 0;
|
||||
|
||||
ret = btrfs_start_pre_rw_mount(fs_info);
|
||||
if (ret) {
|
||||
@@ -3570,8 +3557,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
if (test_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags))
|
||||
wake_up_process(fs_info->cleaner_kthread);
|
||||
|
||||
clear_oneshot:
|
||||
btrfs_clear_oneshot_options(fs_info);
|
||||
return 0;
|
||||
|
||||
fail_qgroup:
|
||||
@@ -3608,7 +3593,7 @@ fail_sb_buffer:
|
||||
btrfs_stop_all_workers(fs_info);
|
||||
btrfs_free_block_groups(fs_info);
|
||||
fail_alloc:
|
||||
btrfs_mapping_tree_free(&fs_info->mapping_tree);
|
||||
btrfs_mapping_tree_free(fs_info);
|
||||
|
||||
iput(fs_info->btree_inode);
|
||||
fail:
|
||||
@@ -4391,7 +4376,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
|
||||
|
||||
iput(fs_info->btree_inode);
|
||||
|
||||
btrfs_mapping_tree_free(&fs_info->mapping_tree);
|
||||
btrfs_mapping_tree_free(fs_info);
|
||||
btrfs_close_devices(fs_info->fs_devices);
|
||||
}
|
||||
|
||||
|
||||
@@ -37,9 +37,6 @@ struct extent_buffer *btrfs_find_create_tree_block(
|
||||
struct btrfs_fs_info *fs_info,
|
||||
u64 bytenr, u64 owner_root,
|
||||
int level);
|
||||
void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
|
||||
struct extent_buffer *buf);
|
||||
void btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_super_block *disk_sb);
|
||||
|
||||
@@ -58,12 +58,13 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
|
||||
struct extent_io_tree *tree,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
struct btrfs_inode *inode = tree->inode;
|
||||
const struct btrfs_inode *inode;
|
||||
u64 isize;
|
||||
|
||||
if (!inode)
|
||||
if (tree->owner != IO_TREE_INODE_IO)
|
||||
return;
|
||||
|
||||
inode = extent_io_tree_to_inode_const(tree);
|
||||
isize = i_size_read(&inode->vfs_inode);
|
||||
if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
|
||||
btrfs_debug_rl(inode->root->fs_info,
|
||||
@@ -78,31 +79,46 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
|
||||
#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* For the file_extent_tree, we want to hold the inode lock when we lookup and
|
||||
* update the disk_i_size, but lockdep will complain because our io_tree we hold
|
||||
* the tree lock and get the inode lock when setting delalloc. These two things
|
||||
* are unrelated, so make a class for the file_extent_tree so we don't get the
|
||||
* two locking patterns mixed up.
|
||||
*/
|
||||
static struct lock_class_key file_extent_tree_class;
|
||||
|
||||
struct tree_entry {
|
||||
u64 start;
|
||||
u64 end;
|
||||
struct rb_node rb_node;
|
||||
};
|
||||
/*
|
||||
* The only tree allowed to set the inode is IO_TREE_INODE_IO.
|
||||
*/
|
||||
static bool is_inode_io_tree(const struct extent_io_tree *tree)
|
||||
{
|
||||
return tree->owner == IO_TREE_INODE_IO;
|
||||
}
|
||||
|
||||
/* Return the inode if it's valid for the given tree, otherwise NULL. */
|
||||
struct btrfs_inode *extent_io_tree_to_inode(struct extent_io_tree *tree)
|
||||
{
|
||||
if (tree->owner == IO_TREE_INODE_IO)
|
||||
return tree->inode;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Read-only access to the inode. */
|
||||
const struct btrfs_inode *extent_io_tree_to_inode_const(const struct extent_io_tree *tree)
|
||||
{
|
||||
if (tree->owner == IO_TREE_INODE_IO)
|
||||
return tree->inode;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* For read-only access to fs_info. */
|
||||
const struct btrfs_fs_info *extent_io_tree_to_fs_info(const struct extent_io_tree *tree)
|
||||
{
|
||||
if (tree->owner == IO_TREE_INODE_IO)
|
||||
return tree->inode->root->fs_info;
|
||||
return tree->fs_info;
|
||||
}
|
||||
|
||||
void extent_io_tree_init(struct btrfs_fs_info *fs_info,
|
||||
struct extent_io_tree *tree, unsigned int owner)
|
||||
{
|
||||
tree->fs_info = fs_info;
|
||||
tree->state = RB_ROOT;
|
||||
spin_lock_init(&tree->lock);
|
||||
tree->inode = NULL;
|
||||
tree->fs_info = fs_info;
|
||||
tree->owner = owner;
|
||||
if (owner == IO_TREE_INODE_FILE_EXTENT)
|
||||
lockdep_set_class(&tree->lock, &file_extent_tree_class);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -329,10 +345,14 @@ static inline struct extent_state *tree_search(struct extent_io_tree *tree, u64
|
||||
return tree_search_for_insert(tree, offset, NULL, NULL);
|
||||
}
|
||||
|
||||
static void extent_io_tree_panic(struct extent_io_tree *tree, int err)
|
||||
static void extent_io_tree_panic(const struct extent_io_tree *tree,
|
||||
const struct extent_state *state,
|
||||
const char *opname,
|
||||
int err)
|
||||
{
|
||||
btrfs_panic(tree->fs_info, err,
|
||||
"locking error: extent tree was modified by another thread while locked");
|
||||
btrfs_panic(extent_io_tree_to_fs_info(tree), err,
|
||||
"extent io tree error on %s state start %llu end %llu",
|
||||
opname, state->start, state->end);
|
||||
}
|
||||
|
||||
static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *state)
|
||||
@@ -341,8 +361,9 @@ static void merge_prev_state(struct extent_io_tree *tree, struct extent_state *s
|
||||
|
||||
prev = prev_state(state);
|
||||
if (prev && prev->end == state->start - 1 && prev->state == state->state) {
|
||||
if (tree->inode)
|
||||
btrfs_merge_delalloc_extent(tree->inode, state, prev);
|
||||
if (is_inode_io_tree(tree))
|
||||
btrfs_merge_delalloc_extent(extent_io_tree_to_inode(tree),
|
||||
state, prev);
|
||||
state->start = prev->start;
|
||||
rb_erase(&prev->rb_node, &tree->state);
|
||||
RB_CLEAR_NODE(&prev->rb_node);
|
||||
@@ -356,8 +377,9 @@ static void merge_next_state(struct extent_io_tree *tree, struct extent_state *s
|
||||
|
||||
next = next_state(state);
|
||||
if (next && next->start == state->end + 1 && next->state == state->state) {
|
||||
if (tree->inode)
|
||||
btrfs_merge_delalloc_extent(tree->inode, state, next);
|
||||
if (is_inode_io_tree(tree))
|
||||
btrfs_merge_delalloc_extent(extent_io_tree_to_inode(tree),
|
||||
state, next);
|
||||
state->end = next->end;
|
||||
rb_erase(&next->rb_node, &tree->state);
|
||||
RB_CLEAR_NODE(&next->rb_node);
|
||||
@@ -390,8 +412,8 @@ static void set_state_bits(struct extent_io_tree *tree,
|
||||
u32 bits_to_set = bits & ~EXTENT_CTLBITS;
|
||||
int ret;
|
||||
|
||||
if (tree->inode)
|
||||
btrfs_set_delalloc_extent(tree->inode, state, bits);
|
||||
if (is_inode_io_tree(tree))
|
||||
btrfs_set_delalloc_extent(extent_io_tree_to_inode(tree), state, bits);
|
||||
|
||||
ret = add_extent_changeset(state, bits_to_set, changeset, 1);
|
||||
BUG_ON(ret < 0);
|
||||
@@ -436,9 +458,10 @@ static struct extent_state *insert_state(struct extent_io_tree *tree,
|
||||
if (state->end < entry->start) {
|
||||
if (try_merge && end == entry->start &&
|
||||
state->state == entry->state) {
|
||||
if (tree->inode)
|
||||
btrfs_merge_delalloc_extent(tree->inode,
|
||||
state, entry);
|
||||
if (is_inode_io_tree(tree))
|
||||
btrfs_merge_delalloc_extent(
|
||||
extent_io_tree_to_inode(tree),
|
||||
state, entry);
|
||||
entry->start = state->start;
|
||||
merge_prev_state(tree, entry);
|
||||
state->state = 0;
|
||||
@@ -448,9 +471,10 @@ static struct extent_state *insert_state(struct extent_io_tree *tree,
|
||||
} else if (state->end > entry->end) {
|
||||
if (try_merge && entry->end == start &&
|
||||
state->state == entry->state) {
|
||||
if (tree->inode)
|
||||
btrfs_merge_delalloc_extent(tree->inode,
|
||||
state, entry);
|
||||
if (is_inode_io_tree(tree))
|
||||
btrfs_merge_delalloc_extent(
|
||||
extent_io_tree_to_inode(tree),
|
||||
state, entry);
|
||||
entry->end = state->end;
|
||||
merge_next_state(tree, entry);
|
||||
state->state = 0;
|
||||
@@ -458,9 +482,6 @@ static struct extent_state *insert_state(struct extent_io_tree *tree,
|
||||
}
|
||||
node = &(*node)->rb_right;
|
||||
} else {
|
||||
btrfs_err(tree->fs_info,
|
||||
"found node %llu %llu on insert of %llu %llu",
|
||||
entry->start, entry->end, state->start, state->end);
|
||||
return ERR_PTR(-EEXIST);
|
||||
}
|
||||
}
|
||||
@@ -505,8 +526,9 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
|
||||
struct rb_node *parent = NULL;
|
||||
struct rb_node **node;
|
||||
|
||||
if (tree->inode)
|
||||
btrfs_split_delalloc_extent(tree->inode, orig, split);
|
||||
if (is_inode_io_tree(tree))
|
||||
btrfs_split_delalloc_extent(extent_io_tree_to_inode(tree), orig,
|
||||
split);
|
||||
|
||||
prealloc->start = orig->start;
|
||||
prealloc->end = split - 1;
|
||||
@@ -553,8 +575,9 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
|
||||
u32 bits_to_clear = bits & ~EXTENT_CTLBITS;
|
||||
int ret;
|
||||
|
||||
if (tree->inode)
|
||||
btrfs_clear_delalloc_extent(tree->inode, state, bits);
|
||||
if (is_inode_io_tree(tree))
|
||||
btrfs_clear_delalloc_extent(extent_io_tree_to_inode(tree), state,
|
||||
bits);
|
||||
|
||||
ret = add_extent_changeset(state, bits_to_clear, changeset, 0);
|
||||
BUG_ON(ret < 0);
|
||||
@@ -695,7 +718,7 @@ hit_next:
|
||||
goto search_again;
|
||||
err = split_state(tree, state, prealloc, start);
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
extent_io_tree_panic(tree, state, "split", err);
|
||||
|
||||
prealloc = NULL;
|
||||
if (err)
|
||||
@@ -717,7 +740,7 @@ hit_next:
|
||||
goto search_again;
|
||||
err = split_state(tree, state, prealloc, end + 1);
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
extent_io_tree_panic(tree, state, "split", err);
|
||||
|
||||
if (wake)
|
||||
wake_up(&state->wq);
|
||||
@@ -939,6 +962,8 @@ int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
struct extent_state *state;
|
||||
int ret = 1;
|
||||
|
||||
ASSERT(!btrfs_fs_incompat(extent_io_tree_to_fs_info(tree), NO_HOLES));
|
||||
|
||||
spin_lock(&tree->lock);
|
||||
state = find_first_extent_bit_state(tree, start, bits);
|
||||
if (state) {
|
||||
@@ -1152,7 +1177,7 @@ hit_next:
|
||||
goto search_again;
|
||||
err = split_state(tree, state, prealloc, start);
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
extent_io_tree_panic(tree, state, "split", err);
|
||||
|
||||
prealloc = NULL;
|
||||
if (err)
|
||||
@@ -1200,7 +1225,7 @@ hit_next:
|
||||
inserted_state = insert_state(tree, prealloc, bits, changeset);
|
||||
if (IS_ERR(inserted_state)) {
|
||||
err = PTR_ERR(inserted_state);
|
||||
extent_io_tree_panic(tree, err);
|
||||
extent_io_tree_panic(tree, prealloc, "insert", err);
|
||||
}
|
||||
|
||||
cache_state(inserted_state, cached_state);
|
||||
@@ -1228,7 +1253,7 @@ hit_next:
|
||||
goto search_again;
|
||||
err = split_state(tree, state, prealloc, end + 1);
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
extent_io_tree_panic(tree, state, "split", err);
|
||||
|
||||
set_state_bits(tree, prealloc, bits, changeset);
|
||||
cache_state(prealloc, cached_state);
|
||||
@@ -1382,7 +1407,7 @@ hit_next:
|
||||
}
|
||||
err = split_state(tree, state, prealloc, start);
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
extent_io_tree_panic(tree, state, "split", err);
|
||||
prealloc = NULL;
|
||||
if (err)
|
||||
goto out;
|
||||
@@ -1430,7 +1455,7 @@ hit_next:
|
||||
inserted_state = insert_state(tree, prealloc, bits, NULL);
|
||||
if (IS_ERR(inserted_state)) {
|
||||
err = PTR_ERR(inserted_state);
|
||||
extent_io_tree_panic(tree, err);
|
||||
extent_io_tree_panic(tree, prealloc, "insert", err);
|
||||
}
|
||||
cache_state(inserted_state, cached_state);
|
||||
if (inserted_state == prealloc)
|
||||
@@ -1453,7 +1478,7 @@ hit_next:
|
||||
|
||||
err = split_state(tree, state, prealloc, end + 1);
|
||||
if (err)
|
||||
extent_io_tree_panic(tree, err);
|
||||
extent_io_tree_panic(tree, state, "split", err);
|
||||
|
||||
set_state_bits(tree, prealloc, bits, NULL);
|
||||
cache_state(prealloc, cached_state);
|
||||
|
||||
@@ -87,9 +87,17 @@ enum {
|
||||
|
||||
struct extent_io_tree {
|
||||
struct rb_root state;
|
||||
struct btrfs_fs_info *fs_info;
|
||||
/* Inode associated with this tree, or NULL. */
|
||||
struct btrfs_inode *inode;
|
||||
/*
|
||||
* The fs_info is needed for trace points, a tree attached to an inode
|
||||
* needs the inode.
|
||||
*
|
||||
* owner == IO_TREE_INODE_IO - then inode is valid and fs_info can be
|
||||
* accessed as inode->root->fs_info
|
||||
*/
|
||||
union {
|
||||
struct btrfs_fs_info *fs_info;
|
||||
struct btrfs_inode *inode;
|
||||
};
|
||||
|
||||
/* Who owns this io tree, should be one of IO_TREE_* */
|
||||
u8 owner;
|
||||
@@ -112,6 +120,10 @@ struct extent_state {
|
||||
#endif
|
||||
};
|
||||
|
||||
struct btrfs_inode *extent_io_tree_to_inode(struct extent_io_tree *tree);
|
||||
const struct btrfs_inode *extent_io_tree_to_inode_const(const struct extent_io_tree *tree);
|
||||
const struct btrfs_fs_info *extent_io_tree_to_fs_info(const struct extent_io_tree *tree);
|
||||
|
||||
void extent_io_tree_init(struct btrfs_fs_info *fs_info,
|
||||
struct extent_io_tree *tree, unsigned int owner);
|
||||
void extent_io_tree_release(struct extent_io_tree *tree);
|
||||
|
||||
@@ -3447,6 +3447,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_ref generic_ref = { 0 };
|
||||
struct btrfs_block_group *bg;
|
||||
int ret;
|
||||
|
||||
btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
|
||||
@@ -3460,67 +3461,64 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
}
|
||||
|
||||
if (last_ref && btrfs_header_generation(buf) == trans->transid) {
|
||||
struct btrfs_block_group *cache;
|
||||
bool must_pin = false;
|
||||
if (!last_ref)
|
||||
return;
|
||||
|
||||
if (root_id != BTRFS_TREE_LOG_OBJECTID) {
|
||||
ret = check_ref_cleanup(trans, buf->start);
|
||||
if (!ret) {
|
||||
btrfs_redirty_list_add(trans->transaction, buf);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
if (btrfs_header_generation(buf) != trans->transid)
|
||||
goto out;
|
||||
|
||||
cache = btrfs_lookup_block_group(fs_info, buf->start);
|
||||
|
||||
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
|
||||
pin_down_extent(trans, cache, buf->start, buf->len, 1);
|
||||
btrfs_put_block_group(cache);
|
||||
if (root_id != BTRFS_TREE_LOG_OBJECTID) {
|
||||
ret = check_ref_cleanup(trans, buf->start);
|
||||
if (!ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there are tree mod log users we may have recorded mod log
|
||||
* operations for this node. If we re-allocate this node we
|
||||
* could replay operations on this node that happened when it
|
||||
* existed in a completely different root. For example if it
|
||||
* was part of root A, then was reallocated to root B, and we
|
||||
* are doing a btrfs_old_search_slot(root b), we could replay
|
||||
* operations that happened when the block was part of root A,
|
||||
* giving us an inconsistent view of the btree.
|
||||
*
|
||||
* We are safe from races here because at this point no other
|
||||
* node or root points to this extent buffer, so if after this
|
||||
* check a new tree mod log user joins we will not have an
|
||||
* existing log of operations on this node that we have to
|
||||
* contend with.
|
||||
*/
|
||||
if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags))
|
||||
must_pin = true;
|
||||
|
||||
if (must_pin || btrfs_is_zoned(fs_info)) {
|
||||
btrfs_redirty_list_add(trans->transaction, buf);
|
||||
pin_down_extent(trans, cache, buf->start, buf->len, 1);
|
||||
btrfs_put_block_group(cache);
|
||||
goto out;
|
||||
}
|
||||
|
||||
WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
|
||||
|
||||
btrfs_add_free_space(cache, buf->start, buf->len);
|
||||
btrfs_free_reserved_bytes(cache, buf->len, 0);
|
||||
btrfs_put_block_group(cache);
|
||||
trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
|
||||
}
|
||||
|
||||
bg = btrfs_lookup_block_group(fs_info, buf->start);
|
||||
|
||||
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
|
||||
pin_down_extent(trans, bg, buf->start, buf->len, 1);
|
||||
btrfs_put_block_group(bg);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there are tree mod log users we may have recorded mod log
|
||||
* operations for this node. If we re-allocate this node we
|
||||
* could replay operations on this node that happened when it
|
||||
* existed in a completely different root. For example if it
|
||||
* was part of root A, then was reallocated to root B, and we
|
||||
* are doing a btrfs_old_search_slot(root b), we could replay
|
||||
* operations that happened when the block was part of root A,
|
||||
* giving us an inconsistent view of the btree.
|
||||
*
|
||||
* We are safe from races here because at this point no other
|
||||
* node or root points to this extent buffer, so if after this
|
||||
* check a new tree mod log user joins we will not have an
|
||||
* existing log of operations on this node that we have to
|
||||
* contend with.
|
||||
*/
|
||||
|
||||
if (test_bit(BTRFS_FS_TREE_MOD_LOG_USERS, &fs_info->flags)
|
||||
|| btrfs_is_zoned(fs_info)) {
|
||||
pin_down_extent(trans, bg, buf->start, buf->len, 1);
|
||||
btrfs_put_block_group(bg);
|
||||
goto out;
|
||||
}
|
||||
|
||||
WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags));
|
||||
|
||||
btrfs_add_free_space(bg, buf->start, buf->len);
|
||||
btrfs_free_reserved_bytes(bg, buf->len, 0);
|
||||
btrfs_put_block_group(bg);
|
||||
trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len);
|
||||
|
||||
out:
|
||||
if (last_ref) {
|
||||
/*
|
||||
* Deleting the buffer, clear the corrupt flag since it doesn't
|
||||
* matter anymore.
|
||||
*/
|
||||
clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Deleting the buffer, clear the corrupt flag since it doesn't
|
||||
* matter anymore.
|
||||
*/
|
||||
clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags);
|
||||
}
|
||||
|
||||
/* Can return -ENOMEM */
|
||||
@@ -5061,7 +5059,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
__btrfs_tree_lock(buf, nest);
|
||||
btrfs_clear_buffer_dirty(trans, buf);
|
||||
clear_bit(EXTENT_BUFFER_STALE, &buf->bflags);
|
||||
clear_bit(EXTENT_BUFFER_NO_CHECK, &buf->bflags);
|
||||
clear_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &buf->bflags);
|
||||
|
||||
set_extent_buffer_uptodate(buf);
|
||||
|
||||
|
||||
1051
fs/btrfs/extent_io.c
1051
fs/btrfs/extent_io.c
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user