mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
bcachefs: bcachefs_metadata_version_persistent_inode_cursors
Persistent cursors for inode allocation. A free inodes btree would add substantial overhead to inode allocation and freeing - a "next num to allocate" cursor is always going to be faster. We just need it to be persistent, to avoid scanning the inodes btree from the start on startup. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
This commit is contained in:
@@ -1063,9 +1063,6 @@ struct bch_fs {
|
||||
struct btree_node *verify_ondisk;
|
||||
struct mutex verify_lock;
|
||||
|
||||
u64 *unused_inode_hints;
|
||||
unsigned inode_shard_bits;
|
||||
|
||||
/*
|
||||
* A btree node on disk could have too many bsets for an iterator to fit
|
||||
* on the stack - have to dynamically allocate them
|
||||
|
||||
@@ -418,7 +418,8 @@ static inline void bkey_init(struct bkey *k)
|
||||
x(snapshot_tree, 31) \
|
||||
x(logged_op_truncate, 32) \
|
||||
x(logged_op_finsert, 33) \
|
||||
x(accounting, 34)
|
||||
x(accounting, 34) \
|
||||
x(inode_alloc_cursor, 35)
|
||||
|
||||
enum bch_bkey_type {
|
||||
#define x(name, nr) KEY_TYPE_##name = nr,
|
||||
@@ -682,7 +683,8 @@ struct bch_sb_field_ext {
|
||||
x(backpointer_bucket_gen, BCH_VERSION(1, 14)) \
|
||||
x(disk_accounting_big_endian, BCH_VERSION(1, 15)) \
|
||||
x(reflink_p_may_update_opts, BCH_VERSION(1, 16)) \
|
||||
x(inode_depth, BCH_VERSION(1, 17))
|
||||
x(inode_depth, BCH_VERSION(1, 17)) \
|
||||
x(persistent_inode_cursors, BCH_VERSION(1, 18))
|
||||
|
||||
enum bcachefs_metadata_version {
|
||||
bcachefs_metadata_version_min = 9,
|
||||
@@ -850,6 +852,7 @@ LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT,
|
||||
LE64_BITMASK(BCH_SB_VERSION_INCOMPAT, struct bch_sb, flags[5], 32, 48);
|
||||
LE64_BITMASK(BCH_SB_VERSION_INCOMPAT_ALLOWED,
|
||||
struct bch_sb, flags[5], 48, 64);
|
||||
LE64_BITMASK(BCH_SB_SHARD_INUMS_NBITS, struct bch_sb, flags[6], 0, 4);
|
||||
|
||||
static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb)
|
||||
{
|
||||
@@ -1347,7 +1350,8 @@ enum btree_id_flags {
|
||||
BIT_ULL(KEY_TYPE_set)) \
|
||||
x(logged_ops, 17, 0, \
|
||||
BIT_ULL(KEY_TYPE_logged_op_truncate)| \
|
||||
BIT_ULL(KEY_TYPE_logged_op_finsert)) \
|
||||
BIT_ULL(KEY_TYPE_logged_op_finsert)| \
|
||||
BIT_ULL(KEY_TYPE_inode_alloc_cursor)) \
|
||||
x(rebalance_work, 18, BTREE_ID_SNAPSHOT_FIELD, \
|
||||
BIT_ULL(KEY_TYPE_set)|BIT_ULL(KEY_TYPE_cookie)) \
|
||||
x(subvolume_children, 19, 0, \
|
||||
|
||||
@@ -588,7 +588,7 @@ struct jset_entry *__bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsi
|
||||
int bch2_bkey_get_empty_slot(struct btree_trans *trans, struct btree_iter *iter,
|
||||
enum btree_id btree, struct bpos end)
|
||||
{
|
||||
bch2_trans_iter_init(trans, iter, btree, POS_MAX, BTREE_ITER_intent);
|
||||
bch2_trans_iter_init(trans, iter, btree, end, BTREE_ITER_intent);
|
||||
struct bkey_s_c k = bch2_btree_iter_peek_prev(iter);
|
||||
int ret = bkey_err(k);
|
||||
if (ret)
|
||||
|
||||
@@ -799,6 +799,28 @@ void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
prt_printf(out, "generation: %u", le32_to_cpu(gen.v->bi_generation));
|
||||
}
|
||||
|
||||
int bch2_inode_alloc_cursor_validate(struct bch_fs *c, struct bkey_s_c k,
|
||||
struct bkey_validate_context from)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
bkey_fsck_err_on(k.k->p.inode != LOGGED_OPS_INUM_inode_cursors,
|
||||
c, inode_alloc_cursor_inode_bad,
|
||||
"k.p.inode bad");
|
||||
fsck_err:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch2_inode_alloc_cursor_to_text(struct printbuf *out, struct bch_fs *c,
|
||||
struct bkey_s_c k)
|
||||
{
|
||||
struct bkey_s_c_inode_alloc_cursor i = bkey_s_c_to_inode_alloc_cursor(k);
|
||||
|
||||
prt_printf(out, "idx %llu generation %llu",
|
||||
le64_to_cpu(i.v->idx),
|
||||
le64_to_cpu(i.v->gen));
|
||||
}
|
||||
|
||||
void bch2_inode_init_early(struct bch_fs *c,
|
||||
struct bch_inode_unpacked *inode_u)
|
||||
{
|
||||
@@ -859,6 +881,56 @@ static inline u32 bkey_generation(struct bkey_s_c k)
|
||||
}
|
||||
}
|
||||
|
||||
static struct bkey_i_inode_alloc_cursor *
|
||||
bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *max)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
|
||||
u64 cursor_idx = c->opts.inodes_32bit ? 0 : cpu + 1;
|
||||
|
||||
cursor_idx &= ~(~0ULL << c->opts.shard_inode_numbers_bits);
|
||||
|
||||
struct btree_iter iter;
|
||||
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter,
|
||||
BTREE_ID_logged_ops,
|
||||
POS(LOGGED_OPS_INUM_inode_cursors, cursor_idx),
|
||||
BTREE_ITER_cached);
|
||||
int ret = bkey_err(k);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
struct bkey_i_inode_alloc_cursor *cursor =
|
||||
k.k->type == KEY_TYPE_inode_alloc_cursor
|
||||
? bch2_bkey_make_mut_typed(trans, &iter, &k, 0, inode_alloc_cursor)
|
||||
: bch2_bkey_alloc(trans, &iter, 0, inode_alloc_cursor);
|
||||
ret = PTR_ERR_OR_ZERO(cursor);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
if (c->opts.inodes_32bit) {
|
||||
*min = BLOCKDEV_INODE_MAX;
|
||||
*max = INT_MAX;
|
||||
} else {
|
||||
cursor->v.bits = c->opts.shard_inode_numbers_bits;
|
||||
|
||||
unsigned bits = 63 - c->opts.shard_inode_numbers_bits;
|
||||
|
||||
*min = max(cpu << bits, (u64) INT_MAX + 1);
|
||||
*max = (cpu << bits) | ~(ULLONG_MAX << bits);
|
||||
}
|
||||
|
||||
if (le64_to_cpu(cursor->v.idx) < *min)
|
||||
cursor->v.idx = cpu_to_le64(*min);
|
||||
|
||||
if (le64_to_cpu(cursor->v.idx) >= *max) {
|
||||
cursor->v.idx = cpu_to_le64(*min);
|
||||
le32_add_cpu(&cursor->v.gen, 1);
|
||||
}
|
||||
err:
|
||||
bch2_trans_iter_exit(trans, &iter);
|
||||
return ret ? ERR_PTR(ret) : cursor;
|
||||
}
|
||||
|
||||
/*
|
||||
* This just finds an empty slot:
|
||||
*/
|
||||
@@ -867,35 +939,20 @@ int bch2_inode_create(struct btree_trans *trans,
|
||||
struct bch_inode_unpacked *inode_u,
|
||||
u32 snapshot, u64 cpu)
|
||||
{
|
||||
struct bch_fs *c = trans->c;
|
||||
struct bkey_s_c k;
|
||||
u64 min, max, start, pos, *hint;
|
||||
int ret = 0;
|
||||
unsigned bits = (c->opts.inodes_32bit ? 31 : 63);
|
||||
u64 min, max;
|
||||
struct bkey_i_inode_alloc_cursor *cursor =
|
||||
bch2_inode_alloc_cursor_get(trans, cpu, &min, &max);
|
||||
int ret = PTR_ERR_OR_ZERO(cursor);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (c->opts.shard_inode_numbers) {
|
||||
bits -= c->inode_shard_bits;
|
||||
u64 start = le64_to_cpu(cursor->v.idx);
|
||||
u64 pos = start;
|
||||
|
||||
min = (cpu << bits);
|
||||
max = (cpu << bits) | ~(ULLONG_MAX << bits);
|
||||
|
||||
min = max_t(u64, min, BLOCKDEV_INODE_MAX);
|
||||
hint = c->unused_inode_hints + cpu;
|
||||
} else {
|
||||
min = BLOCKDEV_INODE_MAX;
|
||||
max = ~(ULLONG_MAX << bits);
|
||||
hint = c->unused_inode_hints;
|
||||
}
|
||||
|
||||
start = READ_ONCE(*hint);
|
||||
|
||||
if (start >= max || start < min)
|
||||
start = min;
|
||||
|
||||
pos = start;
|
||||
bch2_trans_iter_init(trans, iter, BTREE_ID_inodes, POS(0, pos),
|
||||
BTREE_ITER_all_snapshots|
|
||||
BTREE_ITER_intent);
|
||||
struct bkey_s_c k;
|
||||
again:
|
||||
while ((k = bch2_btree_iter_peek(iter)).k &&
|
||||
!(ret = bkey_err(k)) &&
|
||||
@@ -925,6 +982,7 @@ again:
|
||||
/* Retry from start */
|
||||
pos = start = min;
|
||||
bch2_btree_iter_set_pos(iter, POS(0, pos));
|
||||
le32_add_cpu(&cursor->v.gen, 1);
|
||||
goto again;
|
||||
found_slot:
|
||||
bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot));
|
||||
@@ -935,9 +993,9 @@ found_slot:
|
||||
return ret;
|
||||
}
|
||||
|
||||
*hint = k.k->p.offset;
|
||||
inode_u->bi_inum = k.k->p.offset;
|
||||
inode_u->bi_generation = bkey_generation(k);
|
||||
inode_u->bi_generation = le64_to_cpu(cursor->v.gen);
|
||||
cursor->v.idx = cpu_to_le64(k.k->p.offset + 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -999,8 +1057,6 @@ int bch2_inode_rm(struct bch_fs *c, subvol_inum inum)
|
||||
{
|
||||
struct btree_trans *trans = bch2_trans_get(c);
|
||||
struct btree_iter iter = { NULL };
|
||||
struct bkey_i_inode_generation delete;
|
||||
struct bch_inode_unpacked inode_u;
|
||||
struct bkey_s_c k;
|
||||
u32 snapshot;
|
||||
int ret;
|
||||
@@ -1040,13 +1096,7 @@ retry:
|
||||
goto err;
|
||||
}
|
||||
|
||||
bch2_inode_unpack(k, &inode_u);
|
||||
|
||||
bkey_inode_generation_init(&delete.k_i);
|
||||
delete.k.p = iter.pos;
|
||||
delete.v.bi_generation = cpu_to_le32(inode_u.bi_generation + 1);
|
||||
|
||||
ret = bch2_trans_update(trans, &iter, &delete.k_i, 0) ?:
|
||||
ret = bch2_btree_delete_at(trans, &iter, 0) ?:
|
||||
bch2_trans_commit(trans, NULL, NULL,
|
||||
BCH_TRANS_COMMIT_no_enospc);
|
||||
err:
|
||||
|
||||
@@ -68,6 +68,16 @@ void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bk
|
||||
.min_val_size = 8, \
|
||||
})
|
||||
|
||||
int bch2_inode_alloc_cursor_validate(struct bch_fs *, struct bkey_s_c,
|
||||
struct bkey_validate_context);
|
||||
void bch2_inode_alloc_cursor_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c);
|
||||
|
||||
#define bch2_bkey_ops_inode_alloc_cursor ((struct bkey_ops) { \
|
||||
.key_validate = bch2_inode_alloc_cursor_validate, \
|
||||
.val_to_text = bch2_inode_alloc_cursor_to_text, \
|
||||
.min_val_size = 16, \
|
||||
})
|
||||
|
||||
#if 0
|
||||
typedef struct {
|
||||
u64 lo;
|
||||
|
||||
@@ -102,7 +102,8 @@ struct bch_inode_generation {
|
||||
x(bi_subvol, 32) \
|
||||
x(bi_parent_subvol, 32) \
|
||||
x(bi_nocow, 8) \
|
||||
x(bi_depth, 32)
|
||||
x(bi_depth, 32) \
|
||||
x(bi_inodes_32bit, 8)
|
||||
|
||||
/* subset of BCH_INODE_FIELDS */
|
||||
#define BCH_INODE_OPTS() \
|
||||
@@ -115,7 +116,8 @@ struct bch_inode_generation {
|
||||
x(foreground_target, 16) \
|
||||
x(background_target, 16) \
|
||||
x(erasure_code, 16) \
|
||||
x(nocow, 8)
|
||||
x(nocow, 8) \
|
||||
x(inodes_32bit, 8)
|
||||
|
||||
enum inode_opt_id {
|
||||
#define x(name, ...) \
|
||||
@@ -165,4 +167,12 @@ LE64_BITMASK(INODEv3_FIELDS_START,
|
||||
struct bch_inode_v3, bi_flags, 31, 36);
|
||||
LE64_BITMASK(INODEv3_MODE, struct bch_inode_v3, bi_flags, 36, 52);
|
||||
|
||||
struct bch_inode_alloc_cursor {
|
||||
struct bch_val v;
|
||||
__u8 bits;
|
||||
__u8 pad;
|
||||
__le32 gen;
|
||||
__le64 idx;
|
||||
};
|
||||
|
||||
#endif /* _BCACHEFS_INODE_FORMAT_H */
|
||||
|
||||
@@ -65,7 +65,8 @@ int bch2_resume_logged_ops(struct bch_fs *c)
|
||||
int ret = bch2_trans_run(c,
|
||||
for_each_btree_key_max(trans, iter,
|
||||
BTREE_ID_logged_ops,
|
||||
POS(LOGGED_OPS_INUM, 0), POS(LOGGED_OPS_INUM, U64_MAX),
|
||||
POS(LOGGED_OPS_INUM_logged_ops, 0),
|
||||
POS(LOGGED_OPS_INUM_logged_ops, U64_MAX),
|
||||
BTREE_ITER_prefetch, k,
|
||||
resume_logged_op(trans, &iter, k)));
|
||||
bch_err_fn(c, ret);
|
||||
@@ -76,7 +77,7 @@ static int __bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k)
|
||||
{
|
||||
struct btree_iter iter;
|
||||
int ret = bch2_bkey_get_empty_slot(trans, &iter,
|
||||
BTREE_ID_logged_ops, POS(LOGGED_OPS_INUM, U64_MAX));
|
||||
BTREE_ID_logged_ops, POS(LOGGED_OPS_INUM_logged_ops, U64_MAX));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
||||
@@ -2,7 +2,10 @@
|
||||
#ifndef _BCACHEFS_LOGGED_OPS_FORMAT_H
|
||||
#define _BCACHEFS_LOGGED_OPS_FORMAT_H
|
||||
|
||||
#define LOGGED_OPS_INUM 0
|
||||
enum logged_ops_inums {
|
||||
LOGGED_OPS_INUM_logged_ops,
|
||||
LOGGED_OPS_INUM_inode_cursors,
|
||||
};
|
||||
|
||||
struct bch_logged_op_truncate {
|
||||
struct bch_val v;
|
||||
|
||||
@@ -222,14 +222,14 @@ enum fsck_err_opts {
|
||||
BCH_SB_ERASURE_CODE, false, \
|
||||
NULL, "Enable erasure coding (DO NOT USE YET)") \
|
||||
x(inodes_32bit, u8, \
|
||||
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_FS|OPT_INODE|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_INODE_32BIT, true, \
|
||||
NULL, "Constrain inode numbers to 32 bits") \
|
||||
x(shard_inode_numbers, u8, \
|
||||
OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \
|
||||
OPT_BOOL(), \
|
||||
BCH_SB_SHARD_INUMS, true, \
|
||||
x(shard_inode_numbers_bits, u8, \
|
||||
OPT_FS|OPT_FORMAT, \
|
||||
OPT_UINT(0, 8), \
|
||||
BCH_SB_SHARD_INUMS_NBITS, 0, \
|
||||
NULL, "Shard new inode numbers by CPU id") \
|
||||
x(inodes_use_key_cache, u8, \
|
||||
OPT_FS|OPT_FORMAT|OPT_MOUNT, \
|
||||
|
||||
@@ -211,6 +211,7 @@ enum bch_fsck_flags {
|
||||
x(bkey_in_missing_snapshot, 190, 0) \
|
||||
x(inode_pos_inode_nonzero, 191, 0) \
|
||||
x(inode_pos_blockdev_range, 192, 0) \
|
||||
x(inode_alloc_cursor_inode_bad, 301, 0) \
|
||||
x(inode_unpack_error, 193, 0) \
|
||||
x(inode_str_hash_invalid, 194, 0) \
|
||||
x(inode_v3_fields_start_bad, 195, 0) \
|
||||
@@ -311,7 +312,7 @@ enum bch_fsck_flags {
|
||||
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
|
||||
x(compression_opt_not_marked_in_sb, 295, FSCK_AUTOFIX) \
|
||||
x(compression_type_not_marked_in_sb, 296, FSCK_AUTOFIX) \
|
||||
x(MAX, 301, 0)
|
||||
x(MAX, 302, 0)
|
||||
|
||||
enum bch_sb_error_id {
|
||||
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
|
||||
|
||||
@@ -460,6 +460,11 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb,
|
||||
SET_BCH_SB_PROMOTE_WHOLE_EXTENTS(sb, true);
|
||||
}
|
||||
|
||||
#ifdef __KERNEL__
|
||||
if (!BCH_SB_SHARD_INUMS_NBITS(sb))
|
||||
SET_BCH_SB_SHARD_INUMS_NBITS(sb, ilog2(roundup_pow_of_two(num_online_cpus())));
|
||||
#endif
|
||||
|
||||
for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) {
|
||||
const struct bch_option *opt = bch2_opt_table + opt_id;
|
||||
|
||||
|
||||
@@ -586,7 +586,6 @@ static void __bch2_fs_free(struct bch_fs *c)
|
||||
#endif
|
||||
kfree(rcu_dereference_protected(c->disk_groups, 1));
|
||||
kfree(c->journal_seq_blacklist_table);
|
||||
kfree(c->unused_inode_hints);
|
||||
|
||||
if (c->write_ref_wq)
|
||||
destroy_workqueue(c->write_ref_wq);
|
||||
@@ -872,8 +871,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
(btree_blocks(c) + 1) * 2 *
|
||||
sizeof(struct sort_iter_set);
|
||||
|
||||
c->inode_shard_bits = ilog2(roundup_pow_of_two(num_possible_cpus()));
|
||||
|
||||
if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
|
||||
WQ_HIGHPRI|WQ_FREEZABLE|WQ_MEM_RECLAIM|WQ_UNBOUND, 512)) ||
|
||||
!(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io",
|
||||
@@ -900,9 +897,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
|
||||
!(c->online_reserved = alloc_percpu(u64)) ||
|
||||
mempool_init_kvmalloc_pool(&c->btree_bounce_pool, 1,
|
||||
c->opts.btree_node_size) ||
|
||||
mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048) ||
|
||||
!(c->unused_inode_hints = kcalloc(1U << c->inode_shard_bits,
|
||||
sizeof(u64), GFP_KERNEL))) {
|
||||
mempool_init_kmalloc_pool(&c->large_bkey_pool, 1, 2048)) {
|
||||
ret = -BCH_ERR_ENOMEM_fs_other_alloc;
|
||||
goto err;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user