From e764e68103c12aef161480b8da984c36ca99cfb5 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 30 Sep 2024 18:46:48 -0400 Subject: [PATCH 01/18] bcachefs: Fix bad shift in bch2_read_flag_list() Signed-off-by: Kent Overstreet --- fs/bcachefs/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 42f565c76181..e0a876cbaa6b 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -222,7 +222,7 @@ u64 bch2_read_flag_list(const char *opt, const char * const list[]) break; } - ret |= 1 << flag; + ret |= BIT_ULL(flag); } kfree(d); From abaa6d4f6ab8371c5b73afb726ff1c012526e999 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Oct 2024 17:43:36 -0400 Subject: [PATCH 02/18] bcachefs: Fix return type of dirent_points_to_inode_nowarn() we're returning an error code now, not a bool Reported-by: Dan Carpenter Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 0d8b782b63fb..c6c98ee87ec9 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -28,8 +28,8 @@ static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, inode->bi_dir_offset == d.k->p.offset; } -static bool dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d, - struct bch_inode_unpacked *inode) +static int dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d, + struct bch_inode_unpacked *inode) { if (d.v->d_type == DT_SUBVOL ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol From 3b1425a4eb4e9750db8620c26e39390411eea185 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 2 Oct 2024 21:31:31 -0400 Subject: [PATCH 03/18] bcachefs: Fix bch2_inode_is_open() check Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index c6c98ee87ec9..351de61c7ed1 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1106,7 +1106,7 @@ static int check_inode(struct btree_trans *trans, if (ret) goto err; } else { - if (fsck_err_on(bch2_inode_is_open(c, k.k->p), + if (fsck_err_on(!bch2_inode_is_open(c, k.k->p), trans, inode_unlinked_and_not_open, "inode %llu%u unlinked and not open", u.bi_inum, u.bi_snapshot)) { From d28786606a51620df7b7a3e7231338d9bc081656 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 2 Oct 2024 21:35:38 -0400 Subject: [PATCH 04/18] bcachefs: Fix trans_commit disk accounting revert We only are applying JSET_ENTRY_TYPE_write_buffer_keys, revert path was missed. Fixes: a3581ca35d2b ("bcachefs: Fix BCH_TRANS_COMMIT_skip_accounting_apply") Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_trans_commit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index 1a74a1a252ee..9bf471fa4361 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -832,7 +832,8 @@ revert_fs_usage: for (struct jset_entry *entry2 = trans->journal_entries; entry2 != entry; entry2 = vstruct_next(entry2)) - if (jset_entry_is_key(entry2) && entry2->start->k.type == KEY_TYPE_accounting) { + if (entry2->type == BCH_JSET_ENTRY_write_buffer_keys && + entry2->start->k.type == KEY_TYPE_accounting) { struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); bch2_accounting_neg(a); From 6b63a948a73ba3df0fb3ab0c44807df344bc5bbf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 4 Oct 2024 19:44:32 -0400 Subject: [PATCH 05/18] bcachefs: Add missing wakeup to bch2_inode_hash_remove() This fixes two different bugs: - Looser locking with the rhashtable means we need to recheck if the inode is still hashed after prepare_to_wait(), and add a corresponding wakeup after removing from the hash table. - da18ecbf0fb6 ("fs: add i_state helpers") changed the bit waitqueues used for inodes, and bcachefs wasn't updated and thus broke; this updates bcachefs to the new helper. Fixes: 112d21fd1a12 ("bcachefs: switch to rhashtable for vfs inodes hash") Signed-off-by: Kent Overstreet --- fs/bcachefs/fs.c | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 4a1bb07a2574..5bfc26d58270 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -174,22 +174,26 @@ static const struct rhashtable_params bch2_vfs_inodes_params = { .automatic_shrinking = true, }; -static void __wait_on_freeing_inode(struct inode *inode) -{ - wait_queue_head_t *wq; - DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); - wq = bit_waitqueue(&inode->i_state, __I_NEW); - prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); - spin_unlock(&inode->i_lock); - schedule(); - finish_wait(wq, &wait.wq_entry); -} - struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum) { return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params); } +static void __wait_on_freeing_inode(struct bch_fs *c, + struct bch_inode_info *inode, + subvol_inum inum) +{ + wait_queue_head_t *wq; + DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW); + wq = inode_bit_waitqueue(&wait, &inode->v, __I_NEW); + prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); + spin_unlock(&inode->v.i_lock); + + if (__bch2_inode_hash_find(c, inum) == inode) + schedule_timeout(HZ * 10); + finish_wait(wq, &wait.wq_entry); +} + static struct bch_inode_info *bch2_inode_hash_find(struct bch_fs *c, struct btree_trans *trans, subvol_inum inum) { @@ -204,10 +208,10 @@ repeat: } if ((inode->v.i_state & (I_FREEING|I_WILL_FREE))) { if (!trans) { - __wait_on_freeing_inode(&inode->v); + __wait_on_freeing_inode(c, inode, inum); } else { bch2_trans_unlock(trans); - __wait_on_freeing_inode(&inode->v); + __wait_on_freeing_inode(c, inode, inum); int ret = bch2_trans_relock(trans); if (ret) return ERR_PTR(ret); @@ -232,6 +236,11 @@ static void bch2_inode_hash_remove(struct bch_fs *c, struct bch_inode_info *inod &inode->hash, bch2_vfs_inodes_params); BUG_ON(ret); inode->v.i_hash.pprev = NULL; + /* + * This pairs with the bch2_inode_hash_find() -> + * __wait_on_freeing_inode() path + */ + inode_wake_up_bit(&inode->v, __I_NEW); } } From 20826fe6b810bce3efba9ef5d74cf13ebe5f23d9 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 28 Sep 2024 02:44:12 -0400 Subject: [PATCH 06/18] bcachefs: Fix reattach_inode() Ensure a copy of the lost+found inode exists in the snapshot that we're reattaching, so that we don't trigger warnings in lookup_inode_for_snapshot() later. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 351de61c7ed1..881ad5b9447f 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -354,13 +354,12 @@ static int reattach_inode(struct btree_trans *trans, if (ret) return ret; - if (S_ISDIR(inode->bi_mode)) { - lostfound.bi_nlink++; + lostfound.bi_nlink += S_ISDIR(inode->bi_mode); - ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX); - if (ret) - return ret; - } + /* ensure lost+found inode is also present in inode snapshot */ + ret = __bch2_fsck_write_inode(trans, &lostfound, inode_snapshot); + if (ret) + return ret; dir_hash = bch2_hash_info_init(c, &lostfound); From fda7b1ffdef75cc0f4d34255e88b5894e2ce75b1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 28 Sep 2024 15:33:08 -0400 Subject: [PATCH 07/18] bcachefs: Create lost+found in correct snapshot Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 881ad5b9447f..f0d6696c4df6 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -283,11 +283,17 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, return ret; create_lostfound: + /* + * we always create lost+found in the root snapshot; we don't want + * different branches of the snapshot tree to have different lost+found + */ + snapshot = le32_to_cpu(st.root_snapshot); /* * XXX: we could have a nicer log message here if we had a nice way to * walk backpointers to print a path */ - bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot)); + bch_notice(c, "creating lost+found in subvol %llu snapshot %u", + root_inum.subvol, le32_to_cpu(st.root_snapshot)); u64 now = bch2_current_time(c); struct btree_iter lostfound_iter = { NULL }; From 658c82f41e8075e18b98b8705ed0cc34346f35c2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 4 Oct 2024 15:05:40 -0400 Subject: [PATCH 08/18] bcachefs: bkey errors are only AUTOFIX during read Newly generated keys, in the transaction commit path or write path, should not be AUTOFIX; those indicate bugs that we need to fail fast for. Fixes: 5612daafb764 ("bcachefs: Fix fsck warnings from bkey validation") Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 11 +++++++++-- fs/bcachefs/error.h | 9 +++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index 3a16b535b6c3..aac1960321b7 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -430,10 +430,17 @@ err: int __bch2_bkey_fsck_err(struct bch_fs *c, struct bkey_s_c k, - enum bch_fsck_flags flags, + enum bch_validate_flags validate_flags, enum bch_sb_error_id err, const char *fmt, ...) { + if (validate_flags & BCH_VALIDATE_silent) + return -BCH_ERR_fsck_delete_bkey; + + unsigned fsck_flags = 0; + if (!(validate_flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit))) + fsck_flags |= FSCK_AUTOFIX|FSCK_CAN_FIX; + struct printbuf buf = PRINTBUF; va_list args; @@ -445,7 +452,7 @@ int __bch2_bkey_fsck_err(struct bch_fs *c, va_end(args); prt_str(&buf, ": delete?"); - int ret = __bch2_fsck_err(c, NULL, flags, err, "%s", buf.buf); + int ret = __bch2_fsck_err(c, NULL, fsck_flags, err, "%s", buf.buf); printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 21ee7211b03e..6551ada926b6 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -167,10 +167,11 @@ void bch2_flush_fsck_errs(struct bch_fs *); #define fsck_err_on(cond, c, _err_type, ...) \ __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) +enum bch_validate_flags; __printf(5, 6) int __bch2_bkey_fsck_err(struct bch_fs *, struct bkey_s_c, - enum bch_fsck_flags, + enum bch_validate_flags, enum bch_sb_error_id, const char *, ...); @@ -180,11 +181,7 @@ int __bch2_bkey_fsck_err(struct bch_fs *, */ #define bkey_fsck_err(c, _err_type, _err_msg, ...) \ do { \ - if ((flags & BCH_VALIDATE_silent)) { \ - ret = -BCH_ERR_fsck_delete_bkey; \ - goto fsck_err; \ - } \ - int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX|FSCK_AUTOFIX,\ + int _ret = __bch2_bkey_fsck_err(c, k, flags, \ BCH_FSCK_ERR_##_err_type, \ _err_msg, ##__VA_ARGS__); \ if (_ret != -BCH_ERR_fsck_fix && \ From 492e24d7604a1b78c8af3c30984a0cffc17d6bdf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Oct 2024 16:26:02 -0400 Subject: [PATCH 09/18] bcachefs: Make sure we print error that causes fsck to bail out Signed-off-by: Kent Overstreet --- fs/bcachefs/error.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index aac1960321b7..7a79f695ba2e 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -393,6 +393,14 @@ int __bch2_fsck_err(struct bch_fs *c, !(flags & FSCK_CAN_IGNORE))) ret = -BCH_ERR_fsck_errors_not_fixed; + bool exiting = + test_bit(BCH_FS_fsck_running, &c->flags) && + (ret != -BCH_ERR_fsck_fix && + ret != -BCH_ERR_fsck_ignore); + + if (exiting) + print = true; + if (print) { if (bch2_fs_stdio_redirect(c)) bch2_print(c, "%s\n", out->buf); @@ -400,9 +408,7 @@ int __bch2_fsck_err(struct bch_fs *c, bch2_print_string_as_lines(KERN_ERR, out->buf); } - if (test_bit(BCH_FS_fsck_running, &c->flags) && - (ret != -BCH_ERR_fsck_fix && - ret != -BCH_ERR_fsck_ignore)) + if (exiting) bch_err(c, "Unable to continue, halting"); else if (suppressing) bch_err(c, "Ratelimiting new instances of previous error"); From 1bea714c532abf101e939a90b8c920ef9205cfa3 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Oct 2024 16:26:21 -0400 Subject: [PATCH 10/18] bcachefs: Mark more errors AUTOFIX Errors are getting marked as AUTOFIX once they've been (re)-tested and audited. Signed-off-by: Kent Overstreet --- fs/bcachefs/sb-errors_format.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index ed5dca5e1161..6a63d24180ca 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -129,20 +129,20 @@ enum bch_fsck_flags { x(freespace_key_wrong, 115, 0) \ x(freespace_hole_missing, 116, 0) \ x(bucket_gens_val_size_bad, 117, 0) \ - x(bucket_gens_key_wrong, 118, 0) \ - x(bucket_gens_hole_wrong, 119, 0) \ - x(bucket_gens_to_invalid_dev, 120, 0) \ - x(bucket_gens_to_invalid_buckets, 121, 0) \ - x(bucket_gens_nonzero_for_invalid_buckets, 122, 0) \ + x(bucket_gens_key_wrong, 118, FSCK_AUTOFIX) \ + x(bucket_gens_hole_wrong, 119, FSCK_AUTOFIX) \ + x(bucket_gens_to_invalid_dev, 120, FSCK_AUTOFIX) \ + x(bucket_gens_to_invalid_buckets, 121, FSCK_AUTOFIX) \ + x(bucket_gens_nonzero_for_invalid_buckets, 122, FSCK_AUTOFIX) \ x(need_discard_freespace_key_to_invalid_dev_bucket, 123, 0) \ x(need_discard_freespace_key_bad, 124, 0) \ x(backpointer_bucket_offset_wrong, 125, 0) \ x(backpointer_to_missing_device, 126, 0) \ x(backpointer_to_missing_alloc, 127, 0) \ x(backpointer_to_missing_ptr, 128, 0) \ - x(lru_entry_at_time_0, 129, 0) \ - x(lru_entry_to_invalid_bucket, 130, 0) \ - x(lru_entry_bad, 131, 0) \ + x(lru_entry_at_time_0, 129, FSCK_AUTOFIX) \ + x(lru_entry_to_invalid_bucket, 130, FSCK_AUTOFIX) \ + x(lru_entry_bad, 131, FSCK_AUTOFIX) \ x(btree_ptr_val_too_big, 132, 0) \ x(btree_ptr_v2_val_too_big, 133, 0) \ x(btree_ptr_has_non_ptr, 134, 0) \ @@ -158,9 +158,9 @@ enum bch_fsck_flags { x(ptr_after_last_bucket, 144, 0) \ x(ptr_before_first_bucket, 145, 0) \ x(ptr_spans_multiple_buckets, 146, 0) \ - x(ptr_to_missing_backpointer, 147, 0) \ - x(ptr_to_missing_alloc_key, 148, 0) \ - x(ptr_to_missing_replicas_entry, 149, 0) \ + x(ptr_to_missing_backpointer, 147, FSCK_AUTOFIX) \ + x(ptr_to_missing_alloc_key, 148, FSCK_AUTOFIX) \ + x(ptr_to_missing_replicas_entry, 149, FSCK_AUTOFIX) \ x(ptr_to_missing_stripe, 150, 0) \ x(ptr_to_incorrect_stripe, 151, 0) \ x(ptr_gen_newer_than_bucket_gen, 152, 0) \ @@ -194,7 +194,7 @@ enum bch_fsck_flags { x(snapshot_skiplist_not_normalized, 180, 0) \ x(snapshot_skiplist_bad, 181, 0) \ x(snapshot_should_not_have_subvol, 182, 0) \ - x(snapshot_to_bad_snapshot_tree, 183, 0) \ + x(snapshot_to_bad_snapshot_tree, 183, FSCK_AUTOFIX) \ x(snapshot_bad_depth, 184, 0) \ x(snapshot_bad_skiplist, 185, 0) \ x(subvol_pos_bad, 186, 0) \ From 01bf5e3bd26ff8e49bf06fa4180f3eab51ab06df Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Oct 2024 16:40:33 -0400 Subject: [PATCH 11/18] bcachefs: minor lru fsck fixes check_lru_key() wasn't using write buffer updates for deleting bad lru entries - dating from before the lru btree used the btree write buffer. And when possibly flushing the btree write buffer (to make sure we're seeing a real inconsistency), we need to be using the modern bch2_btree_write_buffer_maybe_flush(). Signed-off-by: Kent Overstreet --- fs/bcachefs/lru.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 96f2f4f8c397..dd5df668131a 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -2,6 +2,7 @@ #include "bcachefs.h" #include "alloc_background.h" +#include "bkey_buf.h" #include "btree_iter.h" #include "btree_update.h" #include "btree_write_buffer.h" @@ -118,7 +119,7 @@ fsck_err: static int bch2_check_lru_key(struct btree_trans *trans, struct btree_iter *lru_iter, struct bkey_s_c lru_k, - struct bpos *last_flushed_pos) + struct bkey_buf *last_flushed) { struct bch_fs *c = trans->c; struct btree_iter iter; @@ -136,7 +137,7 @@ static int bch2_check_lru_key(struct btree_trans *trans, trans, lru_entry_to_invalid_bucket, "lru key points to nonexistent device:bucket %llu:%llu", alloc_pos.inode, alloc_pos.offset)) - return bch2_btree_delete_at(trans, lru_iter, 0); + return bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false); k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, alloc_pos, 0); ret = bkey_err(k); @@ -156,12 +157,9 @@ static int bch2_check_lru_key(struct btree_trans *trans, if (lru_k.k->type != KEY_TYPE_set || lru_pos_time(lru_k.k->p) != idx) { - if (!bpos_eq(*last_flushed_pos, lru_k.k->p)) { - *last_flushed_pos = lru_k.k->p; - ret = bch2_btree_write_buffer_flush_sync(trans) ?: - -BCH_ERR_transaction_restart_write_buffer_flush; - goto out; - } + ret = bch2_btree_write_buffer_maybe_flush(trans, lru_k, last_flushed); + if (ret) + goto err; if (fsck_err(trans, lru_entry_bad, "incorrect lru entry: lru %s time %llu\n" @@ -171,9 +169,8 @@ static int bch2_check_lru_key(struct btree_trans *trans, lru_pos_time(lru_k.k->p), (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf), (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) - ret = bch2_btree_delete_at(trans, lru_iter, 0); + ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_lru, lru_iter->pos, false); } -out: err: fsck_err: bch2_trans_iter_exit(trans, &iter); @@ -184,12 +181,18 @@ fsck_err: int bch2_check_lrus(struct bch_fs *c) { - struct bpos last_flushed_pos = POS_MIN; + struct bkey_buf last_flushed; + + bch2_bkey_buf_init(&last_flushed); + bkey_init(&last_flushed.k->k); + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_lru, POS_MIN, BTREE_ITER_prefetch, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, - bch2_check_lru_key(trans, &iter, k, &last_flushed_pos))); + bch2_check_lru_key(trans, &iter, k, &last_flushed))); + + bch2_bkey_buf_exit(&last_flushed, c); bch_err_fn(c, ret); return ret; From 260af1562ec14353824da24fe7acc179a902558e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 1 Oct 2024 19:08:37 -0400 Subject: [PATCH 12/18] bcachefs: Kill alloc_v4.fragmentation_lru The fragmentation_lru field hasn't been needed since we reworked the LRU btrees to use the btree write buffer; previously it was used to resolve collisions, but the revised LRU btree uses the backpointer (the bucket) as part of the key. It should have been deleted at the time of the LRU rework; since it wasn't, that left places for bugs to hide, in check/repair. This fixes LRU fsck on a filesystem image helpfully provided by a user who disappeared before I could get his name for the reported-by. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 30 ++++++++++++++++++--------- fs/bcachefs/alloc_background_format.h | 2 +- fs/bcachefs/btree_gc.c | 3 --- fs/bcachefs/lru.c | 7 +++++-- fs/bcachefs/move.c | 2 +- fs/bcachefs/movinggc.c | 12 ++++++++--- fs/bcachefs/sb-errors_format.h | 4 ++-- 7 files changed, 38 insertions(+), 22 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 645b5ed4babb..4e4a448f6931 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -332,7 +332,6 @@ void bch2_alloc_v4_swab(struct bkey_s k) a->io_time[1] = swab64(a->io_time[1]); a->stripe = swab32(a->stripe); a->nr_external_backpointers = swab32(a->nr_external_backpointers); - a->fragmentation_lru = swab64(a->fragmentation_lru); a->stripe_sectors = swab32(a->stripe_sectors); bps = alloc_v4_backpointers(a); @@ -347,6 +346,7 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c { struct bch_alloc_v4 _a; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); + struct bch_dev *ca = c ? bch2_dev_bucket_tryget_noerror(c, k.k->p) : NULL; prt_newline(out); printbuf_indent_add(out, 2); @@ -364,9 +364,13 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy); prt_printf(out, "io_time[READ] %llu\n", a->io_time[READ]); prt_printf(out, "io_time[WRITE] %llu\n", a->io_time[WRITE]); - prt_printf(out, "fragmentation %llu\n", a->fragmentation_lru); + + if (ca) + prt_printf(out, "fragmentation %llu\n", alloc_lru_idx_fragmentation(*a, ca)); prt_printf(out, "bp_start %llu\n", BCH_ALLOC_V4_BACKPOINTERS_START(a)); printbuf_indent_sub(out, 2); + + bch2_dev_put(ca); } void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) @@ -882,12 +886,13 @@ int bch2_trigger_alloc(struct btree_trans *trans, goto err; } - new_a->fragmentation_lru = alloc_lru_idx_fragmentation(*new_a, ca); - if (old_a->fragmentation_lru != new_a->fragmentation_lru) { + old_lru = alloc_lru_idx_fragmentation(*old_a, ca); + new_lru = alloc_lru_idx_fragmentation(*new_a, ca); + if (old_lru != new_lru) { ret = bch2_lru_change(trans, BCH_LRU_FRAGMENTATION_START, bucket_to_u64(new.k->p), - old_a->fragmentation_lru, new_a->fragmentation_lru); + old_lru, new_lru); if (ret) goto err; } @@ -1629,18 +1634,22 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, if (ret) return ret; + struct bch_dev *ca = bch2_dev_tryget_noerror(c, alloc_k.k->p.inode); + if (!ca) + return 0; + a = bch2_alloc_to_v4(alloc_k, &a_convert); - if (a->fragmentation_lru) { + u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca); + if (lru_idx) { ret = bch2_lru_check_set(trans, BCH_LRU_FRAGMENTATION_START, - a->fragmentation_lru, - alloc_k, last_flushed); + lru_idx, alloc_k, last_flushed); if (ret) - return ret; + goto err; } if (a->data_type != BCH_DATA_cached) - return 0; + goto err; if (fsck_err_on(!a->io_time[READ], trans, alloc_key_cached_but_read_time_zero, @@ -1669,6 +1678,7 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, goto err; err: fsck_err: + bch2_dev_put(ca); printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/alloc_background_format.h b/fs/bcachefs/alloc_background_format.h index f754a2951d8a..befdaa95c515 100644 --- a/fs/bcachefs/alloc_background_format.h +++ b/fs/bcachefs/alloc_background_format.h @@ -70,7 +70,7 @@ struct bch_alloc_v4 { __u32 stripe; __u32 nr_external_backpointers; /* end of fields in original version of alloc_v4 */ - __u64 fragmentation_lru; + __u64 _fragmentation_lru; /* obsolete */ __u32 stripe_sectors; __u32 pad; } __packed __aligned(8); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 660d2fa02da2..771154e3a291 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -828,8 +828,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, return ret; } - gc.fragmentation_lru = alloc_lru_idx_fragmentation(gc, ca); - if (fsck_err_on(new.data_type != gc.data_type, trans, alloc_key_data_type_wrong, "bucket %llu:%llu gen %u has wrong data_type" @@ -857,7 +855,6 @@ static int bch2_alloc_write_key(struct btree_trans *trans, copy_bucket_field(alloc_key_cached_sectors_wrong, cached_sectors); copy_bucket_field(alloc_key_stripe_wrong, stripe); copy_bucket_field(alloc_key_stripe_redundancy_wrong, stripe_redundancy); - copy_bucket_field(alloc_key_fragmentation_lru_wrong, fragmentation_lru); #undef copy_bucket_field if (!bch2_alloc_v4_cmp(*old, new)) diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index dd5df668131a..10857eccdeaf 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -133,7 +133,9 @@ static int bch2_check_lru_key(struct btree_trans *trans, u64 idx; int ret; - if (fsck_err_on(!bch2_dev_bucket_exists(c, alloc_pos), + struct bch_dev *ca = bch2_dev_bucket_tryget_noerror(c, alloc_pos); + + if (fsck_err_on(!ca, trans, lru_entry_to_invalid_bucket, "lru key points to nonexistent device:bucket %llu:%llu", alloc_pos.inode, alloc_pos.offset)) @@ -151,7 +153,7 @@ static int bch2_check_lru_key(struct btree_trans *trans, idx = alloc_lru_idx_read(*a); break; case BCH_LRU_fragmentation: - idx = a->fragmentation_lru; + idx = alloc_lru_idx_fragmentation(*a, ca); break; } @@ -174,6 +176,7 @@ static int bch2_check_lru_key(struct btree_trans *trans, err: fsck_err: bch2_trans_iter_exit(trans, &iter); + bch2_dev_put(ca); printbuf_exit(&buf2); printbuf_exit(&buf1); return ret; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 7d3920e03742..8c456d8b8b99 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -692,7 +692,7 @@ int bch2_evacuate_bucket(struct moving_context *ctxt, a = bch2_alloc_to_v4(k, &a_convert); dirty_sectors = bch2_bucket_sectors_dirty(*a); bucket_size = ca->mi.bucket_size; - fragmentation = a->fragmentation_lru; + fragmentation = alloc_lru_idx_fragmentation(*a, ca); ret = bch2_btree_write_buffer_tryflush(trans); bch_err_msg(c, ret, "flushing btree write buffer"); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index d86565bf07c8..d658be90f737 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -73,6 +73,7 @@ move_bucket_in_flight_add(struct buckets_in_flight *list, struct move_bucket b) static int bch2_bucket_is_movable(struct btree_trans *trans, struct move_bucket *b, u64 time) { + struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; struct bch_alloc_v4 _a; @@ -90,14 +91,19 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, if (ret) return ret; + struct bch_dev *ca = bch2_dev_tryget(c, k.k->p.inode); + if (!ca) + goto out; + a = bch2_alloc_to_v4(k, &_a); b->k.gen = a->gen; b->sectors = bch2_bucket_sectors_dirty(*a); + u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca); - ret = data_type_movable(a->data_type) && - a->fragmentation_lru && - a->fragmentation_lru <= time; + ret = lru_idx && lru_idx <= time; + bch2_dev_put(ca); +out: bch2_trans_iter_exit(trans, &iter); return ret; } diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 6a63d24180ca..b4024870b65e 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -115,8 +115,8 @@ enum bch_fsck_flags { x(alloc_key_data_type_inconsistency, 101, 0) \ x(alloc_key_to_missing_dev_bucket, 102, 0) \ x(alloc_key_cached_inconsistency, 103, 0) \ - x(alloc_key_cached_but_read_time_zero, 104, 0) \ - x(alloc_key_to_missing_lru_entry, 105, 0) \ + x(alloc_key_cached_but_read_time_zero, 104, FSCK_AUTOFIX) \ + x(alloc_key_to_missing_lru_entry, 105, FSCK_AUTOFIX) \ x(alloc_key_data_type_wrong, 106, FSCK_AUTOFIX) \ x(alloc_key_gen_wrong, 107, FSCK_AUTOFIX) \ x(alloc_key_dirty_sectors_wrong, 108, FSCK_AUTOFIX) \ From 1c6051bbd76b2767d6acef6a1d0bdf99aa319273 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 28 Sep 2024 15:27:37 -0400 Subject: [PATCH 13/18] bcachefs: Check for directories with no backpointers It's legal for regular files to have missing backpointers (due to hardlinks), and fsck should automatically add them, but for directories this is an error that should be flagged. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 22 +++++++++++++++------- fs/bcachefs/sb-errors_format.h | 3 ++- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index f0d6696c4df6..d2aa6a5bc973 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1826,6 +1826,21 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, if (inode_points_to_dirent(target, d)) return 0; + if (!target->bi_dir && + !target->bi_dir_offset) { + fsck_err_on(S_ISDIR(target->bi_mode), + trans, inode_dir_missing_backpointer, + "directory with missing backpointer\n%s", + (bch2_bkey_val_to_text(&buf, c, d.s_c), + prt_printf(&buf, "\n "), + bch2_inode_unpacked_to_text(&buf, target), + buf.buf)); + + target->bi_dir = d.k->p.inode; + target->bi_dir_offset = d.k->p.offset; + return __bch2_fsck_write_inode(trans, target, target_snapshot); + } + if (bch2_inode_should_have_bp(target) && !fsck_err(trans, inode_wrong_backpointer, "dirent points to inode that does not point back:\n %s", @@ -1835,13 +1850,6 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, buf.buf))) goto err; - if (!target->bi_dir && - !target->bi_dir_offset) { - target->bi_dir = d.k->p.inode; - target->bi_dir_offset = d.k->p.offset; - return __bch2_fsck_write_inode(trans, target, target_snapshot); - } - struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); ret = bkey_err(bp_dirent); diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index b4024870b65e..fc3d31ed5975 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -219,6 +219,7 @@ enum bch_fsck_flags { x(inode_i_sectors_wrong, 204, FSCK_AUTOFIX) \ x(inode_dir_wrong_nlink, 205, FSCK_AUTOFIX) \ x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \ + x(inode_dir_missing_backpointer, 284, FSCK_AUTOFIX) \ x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ @@ -295,7 +296,7 @@ enum bch_fsck_flags { x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(MAX, 284, 0) + x(MAX, 285, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From c7da5ee2e5cc30faca49e3ea9dbecf8f6ee4f1ea Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 29 Sep 2024 22:38:04 -0400 Subject: [PATCH 14/18] bcachefs: Check for unlinked inodes with dirents link count works differently in bcachefs - it's only nonzero for files with multiple hardlinks, which means we can also avoid checking it except for files that are known to have hardlinks. That means we need a few different checks instead; in particular, we don't want fsck to delet a file that has a dirent pointing to it. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 53 +++++++++++++++++++++++++--------- fs/bcachefs/sb-errors_format.h | 3 +- 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index d2aa6a5bc973..09f890539ded 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1029,6 +1029,7 @@ static int check_inode(struct btree_trans *trans, bool full) { struct bch_fs *c = trans->c; + struct printbuf buf = PRINTBUF; struct bch_inode_unpacked u; bool do_update = false; int ret; @@ -1062,7 +1063,24 @@ static int check_inode(struct btree_trans *trans, trans, inode_snapshot_mismatch, "inodes in different snapshots don't match")) { bch_err(c, "repair not implemented yet"); - return -BCH_ERR_fsck_repair_unimplemented; + ret = -BCH_ERR_fsck_repair_unimplemented; + goto err_noprint; + } + + if (u.bi_dir || u.bi_dir_offset) { + ret = check_inode_dirent_inode(trans, &u, &do_update); + if (ret) + goto err; + } + + if (fsck_err_on(u.bi_dir && (u.bi_flags & BCH_INODE_unlinked), + trans, inode_unlinked_but_has_dirent, + "inode unlinked but has dirent\n%s", + (printbuf_reset(&buf), + bch2_inode_unpacked_to_text(&buf, &u), + buf.buf))) { + u.bi_flags &= ~BCH_INODE_unlinked; + do_update = true; } if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && @@ -1079,11 +1097,11 @@ static int check_inode(struct btree_trans *trans, bch_err_msg(c, ret, "in fsck updating inode"); if (ret) - return ret; + goto err_noprint; if (!bpos_eq(new_min_pos, POS_MIN)) bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos)); - return 0; + goto err_noprint; } if (u.bi_flags & BCH_INODE_unlinked) { @@ -1100,7 +1118,7 @@ static int check_inode(struct btree_trans *trans, */ ret = check_inode_deleted_list(trans, k.k->p); if (ret < 0) - return ret; + goto err_noprint; fsck_err_on(!ret, trans, unlinked_inode_not_on_deleted_list, @@ -1117,7 +1135,7 @@ static int check_inode(struct btree_trans *trans, u.bi_inum, u.bi_snapshot)) { ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); bch_err_msg(c, ret, "in fsck deleting inode"); - return ret; + goto err_noprint; } } } @@ -1182,12 +1200,6 @@ static int check_inode(struct btree_trans *trans, do_update = true; } - if (u.bi_dir || u.bi_dir_offset) { - ret = check_inode_dirent_inode(trans, &u, &do_update); - if (ret) - goto err; - } - if (fsck_err_on(u.bi_parent_subvol && (u.bi_subvol == 0 || u.bi_subvol == BCACHEFS_ROOT_SUBVOL), @@ -1232,11 +1244,13 @@ do_update: ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); bch_err_msg(c, ret, "in fsck updating inode"); if (ret) - return ret; + goto err_noprint; } err: fsck_err: bch_err_fn(c, ret); +err_noprint: + printbuf_exit(&buf); return ret; } @@ -1831,11 +1845,22 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, fsck_err_on(S_ISDIR(target->bi_mode), trans, inode_dir_missing_backpointer, "directory with missing backpointer\n%s", - (bch2_bkey_val_to_text(&buf, c, d.s_c), - prt_printf(&buf, "\n "), + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, d.s_c), + prt_printf(&buf, "\n"), bch2_inode_unpacked_to_text(&buf, target), buf.buf)); + fsck_err_on(target->bi_flags & BCH_INODE_unlinked, + trans, inode_unlinked_but_has_dirent, + "inode unlinked but has dirent\n%s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, d.s_c), + prt_printf(&buf, "\n"), + bch2_inode_unpacked_to_text(&buf, target), + buf.buf)); + + target->bi_flags &= ~BCH_INODE_unlinked; target->bi_dir = d.k->p.inode; target->bi_dir_offset = d.k->p.offset; return __bch2_fsck_write_inode(trans, target, target_snapshot); diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index fc3d31ed5975..4c8ff4cc17c2 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -211,6 +211,7 @@ enum bch_fsck_flags { x(inode_unlinked_but_clean, 197, 0) \ x(inode_unlinked_but_nlink_nonzero, 198, 0) \ x(inode_unlinked_and_not_open, 281, 0) \ + x(inode_unlinked_but_has_dirent, 285, 0) \ x(inode_checksum_type_invalid, 199, 0) \ x(inode_compression_type_invalid, 200, 0) \ x(inode_subvol_root_but_not_dir, 201, 0) \ @@ -296,7 +297,7 @@ enum bch_fsck_flags { x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(MAX, 285, 0) + x(MAX, 286, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From c9306a91c3fdc9915f5408561ea432c70b03383b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 29 Sep 2024 23:38:37 -0400 Subject: [PATCH 15/18] bcachefs: Check for unlinked, non-empty dirs in check_inode() We want to check for this early so it can be reattached if necessary in check_unreachable_inodes(); better than letting it be deleted and having the children reattached, losing their filenames. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 17 +++++++++++++++++ fs/bcachefs/sb-errors_format.h | 3 ++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 09f890539ded..30eca76554f6 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1083,6 +1083,23 @@ static int check_inode(struct btree_trans *trans, do_update = true; } + if (S_ISDIR(u.bi_mode) && (u.bi_flags & BCH_INODE_unlinked)) { + /* Check for this early so that check_unreachable_inode() will reattach it */ + + ret = bch2_empty_dir_snapshot(trans, k.k->p.offset, 0, k.k->p.snapshot); + if (ret && ret != -BCH_ERR_ENOTEMPTY_dir_not_empty) + goto err; + + fsck_err_on(ret, trans, inode_dir_unlinked_but_not_empty, + "dir unlinked but not empty\n%s", + (printbuf_reset(&buf), + bch2_inode_unpacked_to_text(&buf, &u), + buf.buf)); + u.bi_flags &= ~BCH_INODE_unlinked; + do_update = true; + ret = 0; + } + if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { struct bpos new_min_pos; diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index 4c8ff4cc17c2..4135b1ea2fec 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -221,6 +221,7 @@ enum bch_fsck_flags { x(inode_dir_wrong_nlink, 205, FSCK_AUTOFIX) \ x(inode_dir_multiple_links, 206, FSCK_AUTOFIX) \ x(inode_dir_missing_backpointer, 284, FSCK_AUTOFIX) \ + x(inode_dir_unlinked_but_not_empty, 286, FSCK_AUTOFIX) \ x(inode_multiple_links_but_nlink_0, 207, FSCK_AUTOFIX) \ x(inode_wrong_backpointer, 208, FSCK_AUTOFIX) \ x(inode_wrong_nlink, 209, FSCK_AUTOFIX) \ @@ -297,7 +298,7 @@ enum bch_fsck_flags { x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \ x(accounting_key_version_0, 282, FSCK_AUTOFIX) \ x(logged_op_but_clean, 283, FSCK_AUTOFIX) \ - x(MAX, 286, 0) + x(MAX, 287, 0) enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, From 72350ee0ea22c053f2683e50f1beba97df2ad053 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 30 Sep 2024 00:00:33 -0400 Subject: [PATCH 16/18] bcachefs: Kill snapshot arg to fsck_write_inode() It was initially believed that it would be better to be explicit about the snapshot we're updating when writing inodes in fsck; however, it turns out that passing around the snapshot separately is more error prone and we're usually updating the inode in the same snapshow we read it from. This is different from normal filesystem paths, where we do the update in the snapshot of the subvolume we're in. Signed-off-by: Kent Overstreet --- fs/bcachefs/fsck.c | 87 ++++++++++++++++++++--------------------- fs/bcachefs/inode.c | 12 ++---- fs/bcachefs/inode.h | 4 +- fs/bcachefs/subvolume.c | 3 +- 4 files changed, 51 insertions(+), 55 deletions(-) diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 30eca76554f6..b8a6ceb0cc7a 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -137,16 +137,15 @@ found: return ret; } -static int lookup_inode(struct btree_trans *trans, u64 inode_nr, - struct bch_inode_unpacked *inode, - u32 *snapshot) +static int lookup_inode(struct btree_trans *trans, u64 inode_nr, u32 snapshot, + struct bch_inode_unpacked *inode) { struct btree_iter iter; struct bkey_s_c k; int ret; k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, - SPOS(0, inode_nr, *snapshot), 0); + SPOS(0, inode_nr, snapshot), 0); ret = bkey_err(k); if (ret) goto err; @@ -154,8 +153,6 @@ static int lookup_inode(struct btree_trans *trans, u64 inode_nr, ret = bkey_is_inode(k.k) ? bch2_inode_unpack(k, inode) : -BCH_ERR_ENOENT_inode; - if (!ret) - *snapshot = iter.pos.snapshot; err: bch2_trans_iter_exit(trans, &iter); return ret; @@ -250,8 +247,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, struct bch_inode_unpacked root_inode; struct bch_hash_info root_hash_info; - u32 root_inode_snapshot = snapshot; - ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); + ret = lookup_inode(trans, root_inum.inum, snapshot, &root_inode); bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", root_inum.inum, le32_to_cpu(st.master_subvol)); if (ret) @@ -277,7 +273,7 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, * The bch2_check_dirents pass has already run, dangling dirents * shouldn't exist here: */ - ret = lookup_inode(trans, inum, lostfound, &snapshot); + ret = lookup_inode(trans, inum, snapshot, lostfound); bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); return ret; @@ -302,6 +298,7 @@ create_lostfound: bch2_inode_init_early(c, lostfound); bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); lostfound->bi_dir = root_inode.bi_inum; + lostfound->bi_snapshot = le32_to_cpu(st.root_snapshot); root_inode.bi_nlink++; @@ -329,9 +326,7 @@ err: return ret; } -static int reattach_inode(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u32 inode_snapshot) +static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) { struct bch_fs *c = trans->c; struct bch_hash_info dir_hash; @@ -339,7 +334,7 @@ static int reattach_inode(struct btree_trans *trans, char name_buf[20]; struct qstr name; u64 dir_offset = 0; - u32 dirent_snapshot = inode_snapshot; + u32 dirent_snapshot = inode->bi_snapshot; int ret; if (inode->bi_subvol) { @@ -363,7 +358,12 @@ static int reattach_inode(struct btree_trans *trans, lostfound.bi_nlink += S_ISDIR(inode->bi_mode); /* ensure lost+found inode is also present in inode snapshot */ - ret = __bch2_fsck_write_inode(trans, &lostfound, inode_snapshot); + if (!inode->bi_subvol) { + BUG_ON(!bch2_snapshot_is_ancestor(c, inode->bi_snapshot, lostfound.bi_snapshot)); + lostfound.bi_snapshot = inode->bi_snapshot; + } + + ret = __bch2_fsck_write_inode(trans, &lostfound); if (ret) return ret; @@ -388,7 +388,7 @@ static int reattach_inode(struct btree_trans *trans, inode->bi_dir = lostfound.bi_inum; inode->bi_dir_offset = dir_offset; - return __bch2_fsck_write_inode(trans, inode, inode_snapshot); + return __bch2_fsck_write_inode(trans, inode); } static int remove_backpointer(struct btree_trans *trans, @@ -427,7 +427,7 @@ static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume if (ret) return ret; - ret = reattach_inode(trans, &inode, le32_to_cpu(s.v->snapshot)); + ret = reattach_inode(trans, &inode); bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); return ret; } @@ -545,8 +545,9 @@ static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, i_mode|0600, 0, NULL); new_inode.bi_size = i_size; new_inode.bi_inum = inum; + new_inode.bi_snapshot = snapshot; - return __bch2_fsck_write_inode(trans, &new_inode, snapshot); + return __bch2_fsck_write_inode(trans, &new_inode); } struct snapshots_seen { @@ -1110,7 +1111,7 @@ static int check_inode(struct btree_trans *trans, u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; - ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); + ret = __bch2_fsck_write_inode(trans, &u); bch_err_msg(c, ret, "in fsck updating inode"); if (ret) @@ -1258,7 +1259,7 @@ static int check_inode(struct btree_trans *trans, } do_update: if (do_update) { - ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); + ret = __bch2_fsck_write_inode(trans, &u); bch_err_msg(c, ret, "in fsck updating inode"); if (ret) goto err_noprint; @@ -1383,7 +1384,7 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal w->last_pos.inode, i->snapshot, i->inode.bi_sectors, i->count)) { i->inode.bi_sectors = i->count; - ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); + ret = bch2_fsck_write_inode(trans, &i->inode); if (ret) break; } @@ -1825,7 +1826,7 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_ "directory %llu:%u with wrong i_nlink: got %u, should be %llu", w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { i->inode.bi_nlink = i->count; - ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); + ret = bch2_fsck_write_inode(trans, &i->inode); if (ret) break; } @@ -1846,8 +1847,7 @@ noinline_for_stack static int check_dirent_inode_dirent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_dirent d, - struct bch_inode_unpacked *target, - u32 target_snapshot) + struct bch_inode_unpacked *target) { struct bch_fs *c = trans->c; struct printbuf buf = PRINTBUF; @@ -1880,7 +1880,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, target->bi_flags &= ~BCH_INODE_unlinked; target->bi_dir = d.k->p.inode; target->bi_dir_offset = d.k->p.offset; - return __bch2_fsck_write_inode(trans, target, target_snapshot); + return __bch2_fsck_write_inode(trans, target); } if (bch2_inode_should_have_bp(target) && @@ -1893,7 +1893,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, goto err; struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, - SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); + SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot)); ret = bkey_err(bp_dirent); if (ret && !bch2_err_matches(ret, ENOENT)) goto err; @@ -1906,14 +1906,14 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, "inode %llu:%u has wrong backpointer:\n" "got %llu:%llu\n" "should be %llu:%llu", - target->bi_inum, target_snapshot, + target->bi_inum, target->bi_snapshot, target->bi_dir, target->bi_dir_offset, d.k->p.inode, d.k->p.offset)) { target->bi_dir = d.k->p.inode; target->bi_dir_offset = d.k->p.offset; - ret = __bch2_fsck_write_inode(trans, target, target_snapshot); + ret = __bch2_fsck_write_inode(trans, target); goto out; } @@ -1928,7 +1928,7 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, trans, inode_dir_multiple_links, "%s %llu:%u with multiple links\n%s", S_ISDIR(target->bi_mode) ? "directory" : "subvolume", - target->bi_inum, target_snapshot, buf.buf)) { + target->bi_inum, target->bi_snapshot, buf.buf)) { ret = __remove_dirent(trans, d.k->p); goto out; } @@ -1941,10 +1941,10 @@ static int check_dirent_inode_dirent(struct btree_trans *trans, if (fsck_err_on(backpointer_exists && !target->bi_nlink, trans, inode_multiple_links_but_nlink_0, "inode %llu:%u type %s has multiple links but i_nlink 0\n%s", - target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { + target->bi_inum, target->bi_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { target->bi_nlink++; target->bi_flags &= ~BCH_INODE_unlinked; - ret = __bch2_fsck_write_inode(trans, target, target_snapshot); + ret = __bch2_fsck_write_inode(trans, target); if (ret) goto err; } @@ -1961,15 +1961,14 @@ noinline_for_stack static int check_dirent_target(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c_dirent d, - struct bch_inode_unpacked *target, - u32 target_snapshot) + struct bch_inode_unpacked *target) { struct bch_fs *c = trans->c; struct bkey_i_dirent *n; struct printbuf buf = PRINTBUF; int ret = 0; - ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot); + ret = check_dirent_inode_dirent(trans, iter, d, target); if (ret) goto err; @@ -2128,7 +2127,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * u64 target_inum = le64_to_cpu(s.v->inode); u32 target_snapshot = le32_to_cpu(s.v->snapshot); - ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); + ret = lookup_inode(trans, target_inum, target_snapshot, &subvol_root); if (ret && !bch2_err_matches(ret, ENOENT)) goto err; @@ -2144,13 +2143,13 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * target_inum, subvol_root.bi_parent_subvol, parent_subvol)) { subvol_root.bi_parent_subvol = parent_subvol; - ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); + subvol_root.bi_snapshot = le32_to_cpu(s.v->snapshot); + ret = __bch2_fsck_write_inode(trans, &subvol_root); if (ret) goto err; } - ret = check_dirent_target(trans, iter, d, &subvol_root, - target_snapshot); + ret = check_dirent_target(trans, iter, d, &subvol_root); if (ret) goto err; out: @@ -2243,8 +2242,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, } darray_for_each(target->inodes, i) { - ret = check_dirent_target(trans, iter, d, - &i->inode, i->snapshot); + ret = check_dirent_target(trans, iter, d, &i->inode); if (ret) goto err; } @@ -2385,7 +2383,7 @@ static int check_root_trans(struct btree_trans *trans) goto err; } - ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); + ret = lookup_inode(trans, BCACHEFS_ROOT_INO, snapshot, &root_inode); if (ret && !bch2_err_matches(ret, ENOENT)) return ret; @@ -2398,8 +2396,9 @@ static int check_root_trans(struct btree_trans *trans) bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 0, NULL); root_inode.bi_inum = inum; + root_inode.bi_snapshot = snapshot; - ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot); + ret = __bch2_fsck_write_inode(trans, &root_inode); bch_err_msg(c, ret, "writing root inode"); } err: @@ -2566,7 +2565,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf))) - ret = reattach_inode(trans, &inode, snapshot); + ret = reattach_inode(trans, &inode); goto out; } @@ -2612,7 +2611,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino if (ret) break; - ret = reattach_inode(trans, &inode, snapshot); + ret = reattach_inode(trans, &inode); bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); } break; @@ -2842,7 +2841,7 @@ static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_ite u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], bch2_inode_nlink_get(&u), link->count)) { bch2_inode_nlink_set(&u, link->count); - ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot); + ret = __bch2_fsck_write_inode(trans, &u); } fsck_err: return ret; diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 753c208896c3..19aa31c6812f 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -387,9 +387,7 @@ int bch2_inode_write_flags(struct btree_trans *trans, return bch2_trans_update(trans, iter, &inode_p->inode.k_i, flags); } -int __bch2_fsck_write_inode(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u32 snapshot) +int __bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) { struct bkey_inode_buf *inode_p = bch2_trans_kmalloc(trans, sizeof(*inode_p)); @@ -398,19 +396,17 @@ int __bch2_fsck_write_inode(struct btree_trans *trans, return PTR_ERR(inode_p); bch2_inode_pack(inode_p, inode); - inode_p->inode.k.p.snapshot = snapshot; + inode_p->inode.k.p.snapshot = inode->bi_snapshot; return bch2_btree_insert_nonextent(trans, BTREE_ID_inodes, &inode_p->inode.k_i, BTREE_UPDATE_internal_snapshot_node); } -int bch2_fsck_write_inode(struct btree_trans *trans, - struct bch_inode_unpacked *inode, - u32 snapshot) +int bch2_fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) { int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __bch2_fsck_write_inode(trans, inode, snapshot)); + __bch2_fsck_write_inode(trans, inode)); bch_err_fn(trans->c, ret); return ret; } diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 695abd707cb6..f597d10a252d 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -112,8 +112,8 @@ static inline int bch2_inode_write(struct btree_trans *trans, return bch2_inode_write_flags(trans, iter, inode, 0); } -int __bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32); -int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *, u32); +int __bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *); +int bch2_fsck_write_inode(struct btree_trans *, struct bch_inode_unpacked *); void bch2_inode_init_early(struct bch_fs *, struct bch_inode_unpacked *); diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 6845dde1b339..44210a86c367 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -102,7 +102,8 @@ static int check_subvol(struct btree_trans *trans, inode.bi_inum, inode.bi_snapshot, inode.bi_subvol, subvol.k->p.offset)) { inode.bi_subvol = subvol.k->p.offset; - ret = __bch2_fsck_write_inode(trans, &inode, le32_to_cpu(subvol.v->snapshot)); + inode.bi_snapshot = le32_to_cpu(subvol.v->snapshot); + ret = __bch2_fsck_write_inode(trans, &inode); if (ret) goto err; } From 1f73cb4d34e787b3671f1e9d527eb8cf72c05283 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 24 Sep 2024 05:33:07 -0400 Subject: [PATCH 17/18] bcachefs: Add warn param to subvol_get_snapshot, peek_inode These shouldn't always be fatal errors - logged op resume, in particular, and we want it as a parameter there. Signed-off-by: Kent Overstreet --- fs/bcachefs/inode.c | 32 +++++++++++--------------------- fs/bcachefs/inode.h | 24 ++++++++++++++++++++---- fs/bcachefs/subvolume.c | 13 ++++++++++--- fs/bcachefs/subvolume.h | 2 ++ 4 files changed, 43 insertions(+), 28 deletions(-) diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 19aa31c6812f..74d7a42ba1a2 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -327,22 +327,20 @@ int bch2_inode_unpack(struct bkey_s_c k, : bch2_inode_unpack_slowpath(k, unpacked); } -int bch2_inode_peek_nowarn(struct btree_trans *trans, - struct btree_iter *iter, - struct bch_inode_unpacked *inode, - subvol_inum inum, unsigned flags) +int __bch2_inode_peek(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode, + subvol_inum inum, unsigned flags, + bool warn) { - struct bkey_s_c k; u32 snapshot; - int ret; - - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); + int ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn); if (ret) return ret; - k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes, - SPOS(0, inum.inum, snapshot), - flags|BTREE_ITER_cached); + struct bkey_s_c k = bch2_bkey_get_iter(trans, iter, BTREE_ID_inodes, + SPOS(0, inum.inum, snapshot), + flags|BTREE_ITER_cached); ret = bkey_err(k); if (ret) return ret; @@ -357,20 +355,12 @@ int bch2_inode_peek_nowarn(struct btree_trans *trans, return 0; err: + if (warn) + bch_err_msg(trans->c, ret, "looking up inum %llu:%llu:", inum.subvol, inum.inum); bch2_trans_iter_exit(trans, iter); return ret; } -int bch2_inode_peek(struct btree_trans *trans, - struct btree_iter *iter, - struct bch_inode_unpacked *inode, - subvol_inum inum, unsigned flags) -{ - int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags); - bch_err_msg(trans->c, ret, "looking up inum %llu:%llu:", inum.subvol, inum.inum); - return ret; -} - int bch2_inode_write_flags(struct btree_trans *trans, struct btree_iter *iter, struct bch_inode_unpacked *inode, diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index f597d10a252d..9c1f67705684 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -97,10 +97,26 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *, struct bkey_i *); void bch2_inode_unpacked_to_text(struct printbuf *, struct bch_inode_unpacked *); -int bch2_inode_peek_nowarn(struct btree_trans *, struct btree_iter *, - struct bch_inode_unpacked *, subvol_inum, unsigned); -int bch2_inode_peek(struct btree_trans *, struct btree_iter *, - struct bch_inode_unpacked *, subvol_inum, unsigned); +int __bch2_inode_peek(struct btree_trans *, struct btree_iter *, + struct bch_inode_unpacked *, subvol_inum, unsigned, bool); + +static inline int bch2_inode_peek_nowarn(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode, + subvol_inum inum, unsigned flags) +{ + return __bch2_inode_peek(trans, iter, inode, inum, flags, false); +} + +static inline int bch2_inode_peek(struct btree_trans *trans, + struct btree_iter *iter, + struct bch_inode_unpacked *inode, + subvol_inum inum, unsigned flags) +{ + return __bch2_inode_peek(trans, iter, inode, inum, flags, true); + int ret = bch2_inode_peek_nowarn(trans, iter, inode, inum, flags); + return ret; +} int bch2_inode_write_flags(struct btree_trans *, struct btree_iter *, struct bch_inode_unpacked *, enum btree_iter_update_trigger_flags); diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index 44210a86c367..91d8187ee168 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -332,8 +332,8 @@ int bch2_snapshot_get_subvol(struct btree_trans *trans, u32 snapshot, bch2_subvolume_get(trans, le32_to_cpu(snap.subvol), true, 0, subvol); } -int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, - u32 *snapid) +int __bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, + u32 *snapid, bool warn) { struct btree_iter iter; struct bkey_s_c_subvolume subvol; @@ -344,7 +344,8 @@ int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, BTREE_ITER_cached|BTREE_ITER_with_updates, subvolume); ret = bkey_err(subvol); - bch2_fs_inconsistent_on(bch2_err_matches(ret, ENOENT), trans->c, + + bch2_fs_inconsistent_on(warn && bch2_err_matches(ret, ENOENT), trans->c, "missing subvolume %u", subvolid); if (likely(!ret)) @@ -353,6 +354,12 @@ int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, return ret; } +int bch2_subvolume_get_snapshot(struct btree_trans *trans, u32 subvolid, + u32 *snapid) +{ + return __bch2_subvolume_get_snapshot(trans, subvolid, snapid, true); +} + static int bch2_subvolume_reparent(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k, diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index e62f876541fe..f897d106e142 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -26,6 +26,8 @@ int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, int bch2_subvol_has_children(struct btree_trans *, u32); int bch2_subvolume_get(struct btree_trans *, unsigned, bool, int, struct bch_subvolume *); +int __bch2_subvolume_get_snapshot(struct btree_trans *, u32, + u32 *, bool); int bch2_subvolume_get_snapshot(struct btree_trans *, u32, u32 *); int bch2_subvol_is_ro_trans(struct btree_trans *, u32); From 0f25eb4b60771f08fbcca878a8f7f88086d0c885 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 23 Sep 2024 22:06:58 -0400 Subject: [PATCH 18/18] bcachefs: Rework logged op error handling Initially it was thought that we just wanted to ignore errors from logged op replay, but it turns out we do need to catch -EROFS, or we'll go into an infinite loop. Signed-off-by: Kent Overstreet --- fs/bcachefs/io_misc.c | 63 +++++++++++++++++++++++++++------------- fs/bcachefs/logged_ops.c | 16 +++++----- fs/bcachefs/logged_ops.h | 2 +- 3 files changed, 53 insertions(+), 28 deletions(-) diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 177ed331c00b..307ed0a45184 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -224,13 +224,14 @@ void bch2_logged_op_truncate_to_text(struct printbuf *out, struct bch_fs *c, str static int truncate_set_isize(struct btree_trans *trans, subvol_inum inum, - u64 new_i_size) + u64 new_i_size, + bool warn) { struct btree_iter iter = { NULL }; struct bch_inode_unpacked inode_u; int ret; - ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent) ?: + ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn) ?: (inode_u.bi_size = new_i_size, 0) ?: bch2_inode_write(trans, &iter, &inode_u); @@ -247,10 +248,11 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i_logged_op_truncate *op = bkey_i_to_logged_op_truncate(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; u64 new_i_size = le64_to_cpu(op->v.new_i_size); + bool warn_errors = i_sectors_delta != NULL; int ret; ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - truncate_set_isize(trans, inum, new_i_size)); + truncate_set_isize(trans, inum, new_i_size, i_sectors_delta != NULL)); if (ret) goto err; @@ -263,8 +265,8 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; err: - bch2_logged_op_finish(trans, op_k); - bch_err_fn(c, ret); + if (warn_errors) + bch_err_fn(c, ret); return ret; } @@ -288,9 +290,14 @@ int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sec * resume only proceeding in one of the snapshots */ down_read(&c->snapshot_create_lock); - int ret = bch2_trans_run(c, - bch2_logged_op_start(trans, &op.k_i) ?: - __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta)); + struct btree_trans *trans = bch2_trans_get(c); + int ret = bch2_logged_op_start(trans, &op.k_i); + if (ret) + goto out; + ret = __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta); + ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; +out: + bch2_trans_put(trans); up_read(&c->snapshot_create_lock); return ret; @@ -308,7 +315,8 @@ void bch2_logged_op_finsert_to_text(struct printbuf *out, struct bch_fs *c, stru prt_printf(out, " src_offset=%llu", le64_to_cpu(op.v->src_offset)); } -static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len) +static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, + u64 offset, s64 len, bool warn) { struct btree_iter iter; struct bch_inode_unpacked inode_u; @@ -317,7 +325,7 @@ static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset offset <<= 9; len <<= 9; - ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent); + ret = __bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent, warn); if (ret) return ret; @@ -357,12 +365,22 @@ static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, u64 len = abs(shift); u64 pos = le64_to_cpu(op->v.pos); bool insert = shift > 0; + u32 snapshot; + bool warn_errors = i_sectors_delta != NULL; int ret = 0; ret = bch2_inum_opts_get(trans, inum, &opts); if (ret) return ret; + /* + * check for missing subvolume before fpunch, as in resume we don't want + * it to be a fatal error + */ + ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, warn_errors); + if (ret) + return ret; + bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, 0), BTREE_ITER_intent); @@ -373,7 +391,7 @@ case LOGGED_OP_FINSERT_start: if (insert) { ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - adjust_i_size(trans, inum, src_offset, len) ?: + adjust_i_size(trans, inum, src_offset, len, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); if (ret) goto err; @@ -396,11 +414,11 @@ case LOGGED_OP_FINSERT_shift_extents: struct bkey_i delete, *copy; struct bkey_s_c k; struct bpos src_pos = POS(inum.inum, src_offset); - u32 snapshot; bch2_trans_begin(trans); - ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); + ret = __bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot, + warn_errors); if (ret) goto btree_err; @@ -463,12 +481,12 @@ btree_err: if (!insert) { ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - adjust_i_size(trans, inum, src_offset, shift) ?: + adjust_i_size(trans, inum, src_offset, shift, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); } else { /* We need an inode update to update bi_journal_seq for fsync: */ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - adjust_i_size(trans, inum, 0, 0) ?: + adjust_i_size(trans, inum, 0, 0, warn_errors) ?: bch2_logged_op_update(trans, &op->k_i)); } @@ -477,9 +495,9 @@ case LOGGED_OP_FINSERT_finish: break; } err: - bch_err_fn(c, ret); - bch2_logged_op_finish(trans, op_k); bch2_trans_iter_exit(trans, &iter); + if (warn_errors) + bch_err_fn(c, ret); return ret; } @@ -508,9 +526,14 @@ int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum, * resume only proceeding in one of the snapshots */ down_read(&c->snapshot_create_lock); - int ret = bch2_trans_run(c, - bch2_logged_op_start(trans, &op.k_i) ?: - __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta)); + struct btree_trans *trans = bch2_trans_get(c); + int ret = bch2_logged_op_start(trans, &op.k_i); + if (ret) + goto out; + ret = __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta); + ret = bch2_logged_op_finish(trans, &op.k_i) ?: ret; +out: + bch2_trans_put(trans); up_read(&c->snapshot_create_lock); return ret; diff --git a/fs/bcachefs/logged_ops.c b/fs/bcachefs/logged_ops.c index 6f4a4e1083c9..60e00702d1a4 100644 --- a/fs/bcachefs/logged_ops.c +++ b/fs/bcachefs/logged_ops.c @@ -34,8 +34,6 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) { struct bch_fs *c = trans->c; - const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type); - struct bkey_buf sk; u32 restart_count = trans->restart_count; struct printbuf buf = PRINTBUF; int ret = 0; @@ -46,13 +44,15 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter, (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); - if (!fn) - return 0; - + struct bkey_buf sk; bch2_bkey_buf_init(&sk); bch2_bkey_buf_reassemble(&sk, c, k); - fn->resume(trans, sk.k); + const struct bch_logged_op_fn *fn = logged_op_fn(sk.k->k.type); + if (fn) + fn->resume(trans, sk.k); + + ret = bch2_logged_op_finish(trans, sk.k); bch2_bkey_buf_exit(&sk, c); fsck_err: @@ -93,7 +93,7 @@ int bch2_logged_op_start(struct btree_trans *trans, struct bkey_i *k) __bch2_logged_op_start(trans, k)); } -void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) +int bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) { int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_btree_delete(trans, BTREE_ID_logged_ops, k->k.p, 0)); @@ -113,4 +113,6 @@ void bch2_logged_op_finish(struct btree_trans *trans, struct bkey_i *k) buf.buf, bch2_err_str(ret)); printbuf_exit(&buf); } + + return ret; } diff --git a/fs/bcachefs/logged_ops.h b/fs/bcachefs/logged_ops.h index 4d1e786a27a8..30ae9ef737dd 100644 --- a/fs/bcachefs/logged_ops.h +++ b/fs/bcachefs/logged_ops.h @@ -15,6 +15,6 @@ static inline int bch2_logged_op_update(struct btree_trans *trans, struct bkey_i int bch2_resume_logged_ops(struct bch_fs *); int bch2_logged_op_start(struct btree_trans *, struct bkey_i *); -void bch2_logged_op_finish(struct btree_trans *, struct bkey_i *); +int bch2_logged_op_finish(struct btree_trans *, struct bkey_i *); #endif /* _BCACHEFS_LOGGED_OPS_H */