Merge tag 'bcachefs-2025-01-29' of git://evilpiepirate.org/bcachefs

Pull bcachefs fixes from Kent Overstreet:

 - second half of a fix for a bug that'd been causing oopses on
   filesystems using snapshots with memory pressure (key cache fills for
   snaphots btrees are tricky)

 - build fix for strange compiler configurations that double stack frame
   size

 - "journal stuck timeout" now takes into account device latency: this
   fixes some spurious warnings, and the main remaining source of SRCU
   lock hold time warnings (I'm no longer seeing this in my CI, so any
   users still seeing this should definitely ping me)

 - fix for slow/hanging unmounts (" Improve journal pin flushing")

 - some more tracepoint fixes/improvements, to chase down the "rebalance
   isn't making progress" issues

* tag 'bcachefs-2025-01-29' of git://evilpiepirate.org/bcachefs:
  bcachefs: Improve trace_move_extent_finish
  bcachefs: Fix trace_copygc
  bcachefs: Journal writes are now IOPRIO_CLASS_RT
  bcachefs: Improve journal pin flushing
  bcachefs: fix bch2_btree_node_flags
  bcachefs: rebalance, copygc enabled are runtime opts
  bcachefs: Improve decompression error messages
  bcachefs: bset_blacklisted_journal_seq is now AUTOFIX
  bcachefs: "Journal stuck" timeout now takes into account device latency
  bcachefs: Reduce stack frame size of __bch2_str_hash_check_key()
  bcachefs: Fix btree_trans_peek_key_cache()
This commit is contained in:
Linus Torvalds
2025-01-30 08:42:50 -08:00
21 changed files with 275 additions and 159 deletions

View File

@@ -24,7 +24,10 @@ do { \
} while (0)
const char * const bch2_btree_node_flags[] = {
#define x(f) #f,
"typebit",
"typebit",
"typebit",
#define x(f) [BTREE_NODE_##f] = #f,
BTREE_FLAGS()
#undef x
NULL

View File

@@ -2239,8 +2239,6 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos
if (unlikely(ret))
return bkey_s_c_err(ret);
btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path);
k = bch2_btree_path_peek_slot(trans->paths + iter->key_cache_path, &u);
if (!k.k)
return k;
@@ -2251,6 +2249,7 @@ struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos
iter->k = u;
k.k = &iter->k;
btree_path_set_should_be_locked(trans, trans->paths + iter->key_cache_path);
return k;
}

View File

@@ -291,8 +291,10 @@ static noinline int btree_key_cache_fill(struct btree_trans *trans,
struct btree_path *ck_path,
unsigned flags)
{
if (flags & BTREE_ITER_cached_nofill)
if (flags & BTREE_ITER_cached_nofill) {
ck_path->l[0].b = NULL;
return 0;
}
struct bch_fs *c = trans->c;
struct btree_iter iter;

View File

@@ -348,7 +348,7 @@ static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
unsigned flags)
{
return bch2_journal_res_get(&trans->c->journal, &trans->journal_res,
trans->journal_u64s, flags);
trans->journal_u64s, flags, trans);
}
#define JSET_ENTRY_LOG_U64s 4

View File

@@ -4,6 +4,7 @@
#include "compress.h"
#include "error.h"
#include "extents.h"
#include "io_write.h"
#include "opts.h"
#include "super-io.h"
@@ -254,11 +255,14 @@ err:
goto out;
}
int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
struct bch_extent_crc_unpacked *crc)
int bch2_bio_uncompress_inplace(struct bch_write_op *op,
struct bio *bio)
{
struct bch_fs *c = op->c;
struct bch_extent_crc_unpacked *crc = &op->crc;
struct bbuf data = { NULL };
size_t dst_len = crc->uncompressed_size << 9;
int ret = 0;
/* bio must own its pages: */
BUG_ON(!bio->bi_vcnt);
@@ -266,17 +270,26 @@ int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
if (crc->uncompressed_size << 9 > c->opts.encoded_extent_max ||
crc->compressed_size << 9 > c->opts.encoded_extent_max) {
bch_err(c, "error rewriting existing data: extent too big");
struct printbuf buf = PRINTBUF;
bch2_write_op_error(&buf, op);
prt_printf(&buf, "error rewriting existing data: extent too big");
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
return -EIO;
}
data = __bounce_alloc(c, dst_len, WRITE);
if (__bio_uncompress(c, bio, data.b, *crc)) {
if (!c->opts.no_data_io)
bch_err(c, "error rewriting existing data: decompression error");
bio_unmap_or_unbounce(c, data);
return -EIO;
if (!c->opts.no_data_io) {
struct printbuf buf = PRINTBUF;
bch2_write_op_error(&buf, op);
prt_printf(&buf, "error rewriting existing data: decompression error");
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
}
ret = -EIO;
goto err;
}
/*
@@ -293,9 +306,9 @@ int bch2_bio_uncompress_inplace(struct bch_fs *c, struct bio *bio,
crc->uncompressed_size = crc->live_size;
crc->offset = 0;
crc->csum = (struct bch_csum) { 0, 0 };
err:
bio_unmap_or_unbounce(c, data);
return 0;
return ret;
}
int bch2_bio_uncompress(struct bch_fs *c, struct bio *src,

View File

@@ -47,8 +47,8 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
}
int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
struct bch_extent_crc_unpacked *);
struct bch_write_op;
int bch2_bio_uncompress_inplace(struct bch_write_op *, struct bio *);
int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
struct bvec_iter, struct bch_extent_crc_unpacked);
unsigned bch2_bio_compress(struct bch_fs *, struct bio *, size_t *,

View File

@@ -91,15 +91,28 @@ static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struc
return true;
}
static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k)
static noinline void trace_move_extent_finish2(struct data_update *u,
struct bkey_i *new,
struct bkey_i *insert)
{
if (trace_move_extent_finish_enabled()) {
struct printbuf buf = PRINTBUF;
struct bch_fs *c = u->op.c;
struct printbuf buf = PRINTBUF;
bch2_bkey_val_to_text(&buf, c, k);
trace_move_extent_finish(c, buf.buf);
printbuf_exit(&buf);
}
prt_newline(&buf);
bch2_data_update_to_text(&buf, u);
prt_newline(&buf);
prt_str_indented(&buf, "new replicas:\t");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new));
prt_newline(&buf);
prt_str_indented(&buf, "insert:\t");
bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert));
prt_newline(&buf);
trace_move_extent_finish(c, buf.buf);
printbuf_exit(&buf);
}
static void trace_move_extent_fail2(struct data_update *m,
@@ -372,7 +385,8 @@ restart_drop_extra_replicas:
bch2_btree_iter_set_pos(&iter, next_pos);
this_cpu_add(c->counters[BCH_COUNTER_move_extent_finish], new->k.size);
trace_move_extent_finish2(c, bkey_i_to_s_c(&new->k_i));
if (trace_move_extent_finish_enabled())
trace_move_extent_finish2(m, &new->k_i, insert);
}
err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
@@ -525,34 +539,38 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c,
struct data_update_opts *data_opts)
{
printbuf_tabstop_push(out, 20);
prt_str(out, "rewrite ptrs:\t");
prt_str_indented(out, "rewrite ptrs:\t");
bch2_prt_u64_base2(out, data_opts->rewrite_ptrs);
prt_newline(out);
prt_str(out, "kill ptrs:\t");
prt_str_indented(out, "kill ptrs:\t");
bch2_prt_u64_base2(out, data_opts->kill_ptrs);
prt_newline(out);
prt_str(out, "target:\t");
prt_str_indented(out, "target:\t");
bch2_target_to_text(out, c, data_opts->target);
prt_newline(out);
prt_str(out, "compression:\t");
prt_str_indented(out, "compression:\t");
bch2_compression_opt_to_text(out, io_opts->background_compression);
prt_newline(out);
prt_str(out, "opts.replicas:\t");
prt_str_indented(out, "opts.replicas:\t");
prt_u64(out, io_opts->data_replicas);
prt_newline(out);
prt_str(out, "extra replicas:\t");
prt_str_indented(out, "extra replicas:\t");
prt_u64(out, data_opts->extra_replicas);
}
void bch2_data_update_to_text(struct printbuf *out, struct data_update *m)
{
bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
prt_newline(out);
bch2_data_update_opts_to_text(out, m->op.c, &m->op.opts, &m->data_opts);
prt_newline(out);
prt_str_indented(out, "old key:\t");
bch2_bkey_val_to_text(out, m->op.c, bkey_i_to_s_c(m->k.k));
}
int bch2_extent_drop_ptrs(struct btree_trans *trans,

View File

@@ -20,6 +20,7 @@
#include "extents.h"
#include "fsck.h"
#include "inode.h"
#include "journal_reclaim.h"
#include "super.h"
#include <linux/console.h>

View File

@@ -406,7 +406,7 @@ static void __bch2_write_op_error(struct printbuf *out, struct bch_write_op *op,
op->flags & BCH_WRITE_MOVE ? "(internal move)" : "");
}
static void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op)
void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op)
{
__bch2_write_op_error(out, op, op->pos.offset);
}
@@ -873,7 +873,7 @@ static enum prep_encoded_ret {
if (bch2_crc_cmp(op->crc.csum, csum) && !c->opts.no_data_io)
return PREP_ENCODED_CHECKSUM_ERR;
if (bch2_bio_uncompress_inplace(c, bio, &op->crc))
if (bch2_bio_uncompress_inplace(op, bio))
return PREP_ENCODED_ERR;
}

View File

@@ -20,6 +20,8 @@ static inline void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw
void bch2_submit_wbio_replicas(struct bch_write_bio *, struct bch_fs *,
enum bch_data_type, const struct bkey_i *, bool);
void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op);
#define BCH_WRITE_FLAGS() \
x(ALLOC_NOWAIT) \
x(CACHED) \

View File

@@ -113,11 +113,10 @@ journal_seq_to_buf(struct journal *j, u64 seq)
static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
{
unsigned i;
for (i = 0; i < ARRAY_SIZE(p->list); i++)
INIT_LIST_HEAD(&p->list[i]);
INIT_LIST_HEAD(&p->flushed);
for (unsigned i = 0; i < ARRAY_SIZE(p->unflushed); i++)
INIT_LIST_HEAD(&p->unflushed[i]);
for (unsigned i = 0; i < ARRAY_SIZE(p->flushed); i++)
INIT_LIST_HEAD(&p->flushed[i]);
atomic_set(&p->count, count);
p->devs.nr = 0;
}
@@ -601,6 +600,16 @@ out:
: -BCH_ERR_journal_res_get_blocked;
}
static unsigned max_dev_latency(struct bch_fs *c)
{
u64 nsecs = 0;
for_each_rw_member(c, ca)
nsecs = max(nsecs, ca->io_latency[WRITE].stats.max_duration);
return nsecs_to_jiffies(nsecs);
}
/*
* Essentially the entry function to the journaling code. When bcachefs is doing
* a btree insert, it calls this function to get the current journal write.
@@ -612,17 +621,31 @@ out:
* btree node write locks.
*/
int bch2_journal_res_get_slowpath(struct journal *j, struct journal_res *res,
unsigned flags)
unsigned flags,
struct btree_trans *trans)
{
int ret;
if (closure_wait_event_timeout(&j->async_wait,
(ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
(flags & JOURNAL_RES_GET_NONBLOCK),
HZ * 10))
HZ))
return ret;
if (trans)
bch2_trans_unlock_long(trans);
struct bch_fs *c = container_of(j, struct bch_fs, journal);
int remaining_wait = max(max_dev_latency(c) * 2, HZ * 10);
remaining_wait = max(0, remaining_wait - HZ);
if (closure_wait_event_timeout(&j->async_wait,
(ret = __journal_res_get(j, res, flags)) != -BCH_ERR_journal_res_get_blocked ||
(flags & JOURNAL_RES_GET_NONBLOCK),
remaining_wait))
return ret;
struct printbuf buf = PRINTBUF;
bch2_journal_debug_to_text(&buf, j);
bch_err(c, "Journal stuck? Waited for 10 seconds...\n%s",
@@ -727,7 +750,7 @@ recheck_need_open:
* livelock:
*/
sched_annotate_sleep();
ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL);
if (ret)
return ret;
@@ -848,7 +871,7 @@ out:
static int __bch2_journal_meta(struct journal *j)
{
struct journal_res res = {};
int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0);
int ret = bch2_journal_res_get(j, &res, jset_u64s(0), 0, NULL);
if (ret)
return ret;
@@ -1602,54 +1625,3 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
__bch2_journal_debug_to_text(out, j);
spin_unlock(&j->lock);
}
bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq)
{
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *pin;
spin_lock(&j->lock);
if (!test_bit(JOURNAL_running, &j->flags)) {
spin_unlock(&j->lock);
return true;
}
*seq = max(*seq, j->pin.front);
if (*seq >= j->pin.back) {
spin_unlock(&j->lock);
return true;
}
out->atomic++;
pin_list = journal_seq_pin(j, *seq);
prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count));
printbuf_indent_add(out, 2);
for (unsigned i = 0; i < ARRAY_SIZE(pin_list->list); i++)
list_for_each_entry(pin, &pin_list->list[i], list)
prt_printf(out, "\t%px %ps\n", pin, pin->flush);
if (!list_empty(&pin_list->flushed))
prt_printf(out, "flushed:\n");
list_for_each_entry(pin, &pin_list->flushed, list)
prt_printf(out, "\t%px %ps\n", pin, pin->flush);
printbuf_indent_sub(out, 2);
--out->atomic;
spin_unlock(&j->lock);
return false;
}
void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
{
u64 seq = 0;
while (!bch2_journal_seq_pins_to_text(out, j, &seq))
seq++;
}

View File

@@ -312,7 +312,7 @@ static inline void bch2_journal_res_put(struct journal *j,
}
int bch2_journal_res_get_slowpath(struct journal *, struct journal_res *,
unsigned);
unsigned, struct btree_trans *);
/* First bits for BCH_WATERMARK: */
enum journal_res_flags {
@@ -368,7 +368,8 @@ static inline int journal_res_get_fast(struct journal *j,
}
static inline int bch2_journal_res_get(struct journal *j, struct journal_res *res,
unsigned u64s, unsigned flags)
unsigned u64s, unsigned flags,
struct btree_trans *trans)
{
int ret;
@@ -380,7 +381,7 @@ static inline int bch2_journal_res_get(struct journal *j, struct journal_res *re
if (journal_res_get_fast(j, res, flags))
goto out;
ret = bch2_journal_res_get_slowpath(j, res, flags);
ret = bch2_journal_res_get_slowpath(j, res, flags, trans);
if (ret)
return ret;
out:
@@ -429,8 +430,6 @@ struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *, u
void __bch2_journal_debug_to_text(struct printbuf *, struct journal *);
void bch2_journal_debug_to_text(struct printbuf *, struct journal *);
void bch2_journal_pins_to_text(struct printbuf *, struct journal *);
bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
unsigned nr);

View File

@@ -17,6 +17,7 @@
#include "sb-clean.h"
#include "trace.h"
#include <linux/ioprio.h>
#include <linux/string_choices.h>
void bch2_journal_pos_from_member_info_set(struct bch_fs *c)
@@ -1763,6 +1764,7 @@ static CLOSURE_CALLBACK(journal_write_submit)
bio->bi_iter.bi_sector = ptr->offset;
bio->bi_end_io = journal_write_endio;
bio->bi_private = ca;
bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 0);
BUG_ON(bio->bi_iter.bi_sector == ca->prev_journal_sector);
ca->prev_journal_sector = bio->bi_iter.bi_sector;

View File

@@ -327,8 +327,10 @@ void bch2_journal_reclaim_fast(struct journal *j)
popped = true;
}
if (popped)
if (popped) {
bch2_journal_space_available(j);
__closure_wake_up(&j->reclaim_flush_wait);
}
}
bool __bch2_journal_pin_put(struct journal *j, u64 seq)
@@ -362,6 +364,9 @@ static inline bool __journal_pin_drop(struct journal *j,
pin->seq = 0;
list_del_init(&pin->list);
if (j->reclaim_flush_wait.list.first)
__closure_wake_up(&j->reclaim_flush_wait);
/*
* Unpinning a journal entry may make journal_next_bucket() succeed, if
* writing a new last_seq will now make another bucket available:
@@ -383,11 +388,11 @@ static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
{
if (fn == bch2_btree_node_flush0 ||
fn == bch2_btree_node_flush1)
return JOURNAL_PIN_btree;
return JOURNAL_PIN_TYPE_btree;
else if (fn == bch2_btree_key_cache_journal_flush)
return JOURNAL_PIN_key_cache;
return JOURNAL_PIN_TYPE_key_cache;
else
return JOURNAL_PIN_other;
return JOURNAL_PIN_TYPE_other;
}
static inline void bch2_journal_pin_set_locked(struct journal *j, u64 seq,
@@ -406,7 +411,12 @@ static inline void bch2_journal_pin_set_locked(struct journal *j, u64 seq,
atomic_inc(&pin_list->count);
pin->seq = seq;
pin->flush = flush_fn;
list_add(&pin->list, &pin_list->list[type]);
if (list_empty(&pin_list->unflushed[type]) &&
j->reclaim_flush_wait.list.first)
__closure_wake_up(&j->reclaim_flush_wait);
list_add(&pin->list, &pin_list->unflushed[type]);
}
void bch2_journal_pin_copy(struct journal *j,
@@ -499,16 +509,15 @@ journal_get_next_pin(struct journal *j,
{
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *ret = NULL;
unsigned i;
fifo_for_each_entry_ptr(pin_list, &j->pin, *seq) {
if (*seq > seq_to_flush && !allowed_above_seq)
break;
for (i = 0; i < JOURNAL_PIN_NR; i++)
if ((((1U << i) & allowed_below_seq) && *seq <= seq_to_flush) ||
((1U << i) & allowed_above_seq)) {
ret = list_first_entry_or_null(&pin_list->list[i],
for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++)
if (((BIT(i) & allowed_below_seq) && *seq <= seq_to_flush) ||
(BIT(i) & allowed_above_seq)) {
ret = list_first_entry_or_null(&pin_list->unflushed[i],
struct journal_entry_pin, list);
if (ret)
return ret;
@@ -544,8 +553,8 @@ static size_t journal_flush_pins(struct journal *j,
}
if (min_key_cache) {
allowed_above |= 1U << JOURNAL_PIN_key_cache;
allowed_below |= 1U << JOURNAL_PIN_key_cache;
allowed_above |= BIT(JOURNAL_PIN_TYPE_key_cache);
allowed_below |= BIT(JOURNAL_PIN_TYPE_key_cache);
}
cond_resched();
@@ -553,7 +562,9 @@ static size_t journal_flush_pins(struct journal *j,
j->last_flushed = jiffies;
spin_lock(&j->lock);
pin = journal_get_next_pin(j, seq_to_flush, allowed_below, allowed_above, &seq);
pin = journal_get_next_pin(j, seq_to_flush,
allowed_below,
allowed_above, &seq);
if (pin) {
BUG_ON(j->flush_in_progress);
j->flush_in_progress = pin;
@@ -576,7 +587,7 @@ static size_t journal_flush_pins(struct journal *j,
spin_lock(&j->lock);
/* Pin might have been dropped or rearmed: */
if (likely(!err && !j->flush_in_progress_dropped))
list_move(&pin->list, &journal_seq_pin(j, seq)->flushed);
list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(flush_fn)]);
j->flush_in_progress = NULL;
j->flush_in_progress_dropped = false;
spin_unlock(&j->lock);
@@ -816,10 +827,41 @@ int bch2_journal_reclaim_start(struct journal *j)
return 0;
}
static bool journal_pins_still_flushing(struct journal *j, u64 seq_to_flush,
unsigned types)
{
struct journal_entry_pin_list *pin_list;
u64 seq;
spin_lock(&j->lock);
fifo_for_each_entry_ptr(pin_list, &j->pin, seq) {
if (seq > seq_to_flush)
break;
for (unsigned i = 0; i < JOURNAL_PIN_TYPE_NR; i++)
if ((BIT(i) & types) &&
(!list_empty(&pin_list->unflushed[i]) ||
!list_empty(&pin_list->flushed[i]))) {
spin_unlock(&j->lock);
return true;
}
}
spin_unlock(&j->lock);
return false;
}
static bool journal_flush_pins_or_still_flushing(struct journal *j, u64 seq_to_flush,
unsigned types)
{
return journal_flush_pins(j, seq_to_flush, types, 0, 0, 0) ||
journal_pins_still_flushing(j, seq_to_flush, types);
}
static int journal_flush_done(struct journal *j, u64 seq_to_flush,
bool *did_work)
{
int ret;
int ret = 0;
ret = bch2_journal_error(j);
if (ret)
@@ -827,12 +869,18 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
mutex_lock(&j->reclaim_lock);
if (journal_flush_pins(j, seq_to_flush,
(1U << JOURNAL_PIN_key_cache)|
(1U << JOURNAL_PIN_other), 0, 0, 0) ||
journal_flush_pins(j, seq_to_flush,
(1U << JOURNAL_PIN_btree), 0, 0, 0))
if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
BIT(JOURNAL_PIN_TYPE_key_cache)|
BIT(JOURNAL_PIN_TYPE_other))) {
*did_work = true;
goto unlock;
}
if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
BIT(JOURNAL_PIN_TYPE_btree))) {
*did_work = true;
goto unlock;
}
if (seq_to_flush > journal_cur_seq(j))
bch2_journal_entry_close(j);
@@ -847,6 +895,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
!fifo_used(&j->pin);
spin_unlock(&j->lock);
unlock:
mutex_unlock(&j->reclaim_lock);
return ret;
@@ -860,7 +909,7 @@ bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush)
if (!test_bit(JOURNAL_running, &j->flags))
return false;
closure_wait_event(&j->async_wait,
closure_wait_event(&j->reclaim_flush_wait,
journal_flush_done(j, seq_to_flush, &did_work));
return did_work;
@@ -926,3 +975,54 @@ err:
return ret;
}
bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64 *seq)
{
struct journal_entry_pin_list *pin_list;
struct journal_entry_pin *pin;
spin_lock(&j->lock);
if (!test_bit(JOURNAL_running, &j->flags)) {
spin_unlock(&j->lock);
return true;
}
*seq = max(*seq, j->pin.front);
if (*seq >= j->pin.back) {
spin_unlock(&j->lock);
return true;
}
out->atomic++;
pin_list = journal_seq_pin(j, *seq);
prt_printf(out, "%llu: count %u\n", *seq, atomic_read(&pin_list->count));
printbuf_indent_add(out, 2);
prt_printf(out, "unflushed:\n");
for (unsigned i = 0; i < ARRAY_SIZE(pin_list->unflushed); i++)
list_for_each_entry(pin, &pin_list->unflushed[i], list)
prt_printf(out, "\t%px %ps\n", pin, pin->flush);
prt_printf(out, "flushed:\n");
for (unsigned i = 0; i < ARRAY_SIZE(pin_list->flushed); i++)
list_for_each_entry(pin, &pin_list->flushed[i], list)
prt_printf(out, "\t%px %ps\n", pin, pin->flush);
printbuf_indent_sub(out, 2);
--out->atomic;
spin_unlock(&j->lock);
return false;
}
void bch2_journal_pins_to_text(struct printbuf *out, struct journal *j)
{
u64 seq = 0;
while (!bch2_journal_seq_pins_to_text(out, j, &seq))
seq++;
}

View File

@@ -78,4 +78,7 @@ static inline bool bch2_journal_flush_all_pins(struct journal *j)
int bch2_journal_flush_device_pins(struct journal *, int);
void bch2_journal_pins_to_text(struct printbuf *, struct journal *);
bool bch2_journal_seq_pins_to_text(struct printbuf *, struct journal *, u64 *);
#endif /* _BCACHEFS_JOURNAL_RECLAIM_H */

View File

@@ -53,15 +53,15 @@ struct journal_buf {
*/
enum journal_pin_type {
JOURNAL_PIN_btree,
JOURNAL_PIN_key_cache,
JOURNAL_PIN_other,
JOURNAL_PIN_NR,
JOURNAL_PIN_TYPE_btree,
JOURNAL_PIN_TYPE_key_cache,
JOURNAL_PIN_TYPE_other,
JOURNAL_PIN_TYPE_NR,
};
struct journal_entry_pin_list {
struct list_head list[JOURNAL_PIN_NR];
struct list_head flushed;
struct list_head unflushed[JOURNAL_PIN_TYPE_NR];
struct list_head flushed[JOURNAL_PIN_TYPE_NR];
atomic_t count;
struct bch_devs_list devs;
};
@@ -226,6 +226,7 @@ struct journal {
/* Used when waiting because the journal was full */
wait_queue_head_t wait;
struct closure_waitlist async_wait;
struct closure_waitlist reclaim_flush_wait;
struct delayed_work write_work;
struct workqueue_struct *wq;

View File

@@ -215,7 +215,8 @@ static int bch2_copygc(struct moving_context *ctxt,
};
move_buckets buckets = { 0 };
struct move_bucket_in_flight *f;
u64 moved = atomic64_read(&ctxt->stats->sectors_moved);
u64 sectors_seen = atomic64_read(&ctxt->stats->sectors_seen);
u64 sectors_moved = atomic64_read(&ctxt->stats->sectors_moved);
int ret = 0;
ret = bch2_copygc_get_buckets(ctxt, buckets_in_flight, &buckets);
@@ -245,7 +246,6 @@ static int bch2_copygc(struct moving_context *ctxt,
*did_work = true;
}
err:
darray_exit(&buckets);
/* no entries in LRU btree found, or got to end: */
if (bch2_err_matches(ret, ENOENT))
@@ -254,8 +254,11 @@ err:
if (ret < 0 && !bch2_err_matches(ret, EROFS))
bch_err_msg(c, ret, "from bch2_move_data()");
moved = atomic64_read(&ctxt->stats->sectors_moved) - moved;
trace_and_count(c, copygc, c, moved, 0, 0, 0);
sectors_seen = atomic64_read(&ctxt->stats->sectors_seen) - sectors_seen;
sectors_moved = atomic64_read(&ctxt->stats->sectors_moved) - sectors_moved;
trace_and_count(c, copygc, c, buckets.nr, sectors_seen, sectors_moved);
darray_exit(&buckets);
return ret;
}

View File

@@ -476,13 +476,13 @@ enum fsck_err_opts {
NULL, "Enable nocow mode: enables runtime locking in\n"\
"data move path needed if nocow will ever be in use\n")\
x(copygc_enabled, u8, \
OPT_FS|OPT_MOUNT, \
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, true, \
NULL, "Enable copygc: disable for debugging, or to\n"\
"quiet the system when doing performance testing\n")\
x(rebalance_enabled, u8, \
OPT_FS|OPT_MOUNT, \
OPT_FS|OPT_MOUNT|OPT_RUNTIME, \
OPT_BOOL(), \
BCH2_NO_SB_OPT, true, \
NULL, "Enable rebalance: disable for debugging, or to\n"\

View File

@@ -57,7 +57,7 @@ enum bch_fsck_flags {
x(bset_wrong_sector_offset, 44, 0) \
x(bset_empty, 45, 0) \
x(bset_bad_seq, 46, 0) \
x(bset_blacklisted_journal_seq, 47, 0) \
x(bset_blacklisted_journal_seq, 47, FSCK_AUTOFIX) \
x(first_bset_blacklisted_journal_seq, 48, FSCK_AUTOFIX) \
x(btree_node_bad_btree, 49, 0) \
x(btree_node_bad_level, 50, 0) \

View File

@@ -31,11 +31,11 @@ static int bch2_dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dir
}
}
static int fsck_rename_dirent(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc desc,
struct bch_hash_info *hash_info,
struct bkey_s_c_dirent old)
static noinline int fsck_rename_dirent(struct btree_trans *trans,
struct snapshots_seen *s,
const struct bch_hash_desc desc,
struct bch_hash_info *hash_info,
struct bkey_s_c_dirent old)
{
struct qstr old_name = bch2_dirent_get_name(old);
struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32);
@@ -71,11 +71,11 @@ static int fsck_rename_dirent(struct btree_trans *trans,
return bch2_fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
}
static int hash_pick_winner(struct btree_trans *trans,
const struct bch_hash_desc desc,
struct bch_hash_info *hash_info,
struct bkey_s_c k1,
struct bkey_s_c k2)
static noinline int hash_pick_winner(struct btree_trans *trans,
const struct bch_hash_desc desc,
struct bch_hash_info *hash_info,
struct bkey_s_c k1,
struct bkey_s_c k2)
{
if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) &&
!memcmp(k1.v, k2.v, bkey_val_bytes(k1.k)))
@@ -142,8 +142,8 @@ fsck_err:
* All versions of the same inode in different snapshots must have the same hash
* seed/type: verify that the hash info we're using matches the root
*/
static int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum,
struct bch_hash_info *hash_info)
static noinline int check_inode_hash_info_matches_root(struct btree_trans *trans, u64 inum,
struct bch_hash_info *hash_info)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;

Some files were not shown because too many files have changed in this diff Show More