Merge tag 'for-5.19/drivers-2022-06-02' of git://git.kernel.dk/linux-block

Pull more block driver updates from Jens Axboe:
 "A collection of stragglers that were late on sending in their changes
  and just followup fixes.

   - NVMe fixes pull request via Christoph:
       - set controller enable bit in a separate write (Niklas Cassel)
       - disable namespace identifiers for the MAXIO MAP1001 (Christoph)
       - fix a comment typo (Julia Lawall)"

   - MD fixes pull request via Song:
       - Remove uses of bdevname (Christoph Hellwig)
       - Bug fixes (Guoqing Jiang, and Xiao Ni)

   - bcache fixes series (Coly)

   - null_blk zoned write fix (Damien)

   - nbd fixes (Yu, Zhang)

   - Fix for loop partition scanning (Christoph)"

* tag 'for-5.19/drivers-2022-06-02' of git://git.kernel.dk/linux-block: (23 commits)
  block: null_blk: Fix null_zone_write()
  nvmet: fix typo in comment
  nvme: set controller enable bit in a separate write
  nvme-pci: disable namespace identifiers for the MAXIO MAP1001
  bcache: avoid unnecessary soft lockup in kworker update_writeback_rate()
  nbd: use pr_err to output error message
  nbd: fix possible overflow on 'first_minor' in nbd_dev_add()
  nbd: fix io hung while disconnecting device
  nbd: don't clear 'NBD_CMD_INFLIGHT' flag if request is not completed
  nbd: fix race between nbd_alloc_config() and module removal
  nbd: call genl_unregister_family() first in nbd_cleanup()
  md: bcache: check the return value of kzalloc() in detached_dev_do_request()
  bcache: memset on stack variables in bch_btree_check() and bch_sectors_dirty_init()
  block, loop: support partitions without scanning
  bcache: avoid journal no-space deadlock by reserving 1 journal bucket
  bcache: remove incremental dirty sector counting for bch_sectors_dirty_init()
  bcache: improve multithreaded bch_sectors_dirty_init()
  bcache: improve multithreaded bch_btree_check()
  md: fix double free of io_acct_set bioset
  md: Don't set mddev private to NULL in raid0 pers->free
  ...
This commit is contained in:
Linus Torvalds
2022-06-03 10:25:56 -07:00
30 changed files with 394 additions and 390 deletions

View File

@@ -385,6 +385,8 @@ int disk_scan_partitions(struct gendisk *disk, fmode_t mode)
if (disk->flags & (GENHD_FL_NO_PART | GENHD_FL_HIDDEN))
return -EINVAL;
if (test_bit(GD_SUPPRESS_PART_SCAN, &disk->state))
return -EINVAL;
if (disk->open_partitions)
return -EBUSY;

View File

@@ -1102,7 +1102,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
lo->lo_flags |= LO_FLAGS_PARTSCAN;
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
if (partscan)
lo->lo_disk->flags &= ~GENHD_FL_NO_PART;
clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
loop_global_unlock(lo, is_loop);
if (partscan)
@@ -1198,7 +1198,7 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
*/
lo->lo_flags = 0;
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART;
set_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
mutex_lock(&lo->lo_mutex);
lo->lo_state = Lo_unbound;
mutex_unlock(&lo->lo_mutex);
@@ -1308,7 +1308,7 @@ out_unfreeze:
if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) &&
!(prev_lo_flags & LO_FLAGS_PARTSCAN)) {
lo->lo_disk->flags &= ~GENHD_FL_NO_PART;
clear_bit(GD_SUPPRESS_PART_SCAN, &lo->lo_disk->state);
partscan = true;
}
out_unlock:
@@ -2011,7 +2011,7 @@ static int loop_add(int i)
* userspace tools. Parameters like this in general should be avoided.
*/
if (!part_shift)
disk->flags |= GENHD_FL_NO_PART;
set_bit(GD_SUPPRESS_PART_SCAN, &disk->state);
mutex_init(&lo->lo_mutex);
lo->lo_number = i;
spin_lock_init(&lo->lo_lock);

View File

@@ -403,13 +403,14 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
if (!mutex_trylock(&cmd->lock))
return BLK_EH_RESET_TIMER;
if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
mutex_unlock(&cmd->lock);
return BLK_EH_DONE;
}
if (!refcount_inc_not_zero(&nbd->config_refs)) {
cmd->status = BLK_STS_TIMEOUT;
__clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
mutex_unlock(&cmd->lock);
goto done;
}
@@ -478,6 +479,7 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
dev_err_ratelimited(nbd_to_dev(nbd), "Connection timed out\n");
set_bit(NBD_RT_TIMEDOUT, &config->runtime_flags);
cmd->status = BLK_STS_IOERR;
__clear_bit(NBD_CMD_INFLIGHT, &cmd->flags);
mutex_unlock(&cmd->lock);
sock_shutdown(nbd);
nbd_config_put(nbd);
@@ -745,7 +747,7 @@ static struct nbd_cmd *nbd_handle_reply(struct nbd_device *nbd, int index,
cmd = blk_mq_rq_to_pdu(req);
mutex_lock(&cmd->lock);
if (!__test_and_clear_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
if (!test_bit(NBD_CMD_INFLIGHT, &cmd->flags)) {
dev_err(disk_to_dev(nbd->disk), "Suspicious reply %d (status %u flags %lu)",
tag, cmd->status, cmd->flags);
ret = -ENOENT;
@@ -854,8 +856,16 @@ static void recv_work(struct work_struct *work)
}
rq = blk_mq_rq_from_pdu(cmd);
if (likely(!blk_should_fake_timeout(rq->q)))
blk_mq_complete_request(rq);
if (likely(!blk_should_fake_timeout(rq->q))) {
bool complete;
mutex_lock(&cmd->lock);
complete = __test_and_clear_bit(NBD_CMD_INFLIGHT,
&cmd->flags);
mutex_unlock(&cmd->lock);
if (complete)
blk_mq_complete_request(rq);
}
percpu_ref_put(&q->q_usage_counter);
}
@@ -1419,7 +1429,7 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd)
static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
struct block_device *bdev)
{
sock_shutdown(nbd);
nbd_clear_sock(nbd);
__invalidate_device(bdev, true);
nbd_bdev_reset(nbd);
if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
@@ -1518,15 +1528,20 @@ static struct nbd_config *nbd_alloc_config(void)
{
struct nbd_config *config;
if (!try_module_get(THIS_MODULE))
return ERR_PTR(-ENODEV);
config = kzalloc(sizeof(struct nbd_config), GFP_NOFS);
if (!config)
return NULL;
if (!config) {
module_put(THIS_MODULE);
return ERR_PTR(-ENOMEM);
}
atomic_set(&config->recv_threads, 0);
init_waitqueue_head(&config->recv_wq);
init_waitqueue_head(&config->conn_wait);
config->blksize_bits = NBD_DEF_BLKSIZE_BITS;
atomic_set(&config->live_connections, 0);
try_module_get(THIS_MODULE);
return config;
}
@@ -1553,12 +1568,13 @@ static int nbd_open(struct block_device *bdev, fmode_t mode)
mutex_unlock(&nbd->config_lock);
goto out;
}
config = nbd->config = nbd_alloc_config();
if (!config) {
ret = -ENOMEM;
config = nbd_alloc_config();
if (IS_ERR(config)) {
ret = PTR_ERR(config);
mutex_unlock(&nbd->config_lock);
goto out;
}
nbd->config = config;
refcount_set(&nbd->config_refs, 1);
refcount_inc(&nbd->refs);
mutex_unlock(&nbd->config_lock);
@@ -1798,17 +1814,7 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs)
refcount_set(&nbd->refs, 0);
INIT_LIST_HEAD(&nbd->list);
disk->major = NBD_MAJOR;
/* Too big first_minor can cause duplicate creation of
* sysfs files/links, since index << part_shift might overflow, or
* MKDEV() expect that the max bits of first_minor is 20.
*/
disk->first_minor = index << part_shift;
if (disk->first_minor < index || disk->first_minor > MINORMASK) {
err = -EINVAL;
goto out_free_work;
}
disk->minors = 1 << part_shift;
disk->fops = &nbd_fops;
disk->private_data = nbd;
@@ -1913,14 +1919,25 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
if (!netlink_capable(skb, CAP_SYS_ADMIN))
return -EPERM;
if (info->attrs[NBD_ATTR_INDEX])
if (info->attrs[NBD_ATTR_INDEX]) {
index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
/*
* Too big first_minor can cause duplicate creation of
* sysfs files/links, since index << part_shift might overflow, or
* MKDEV() expect that the max bits of first_minor is 20.
*/
if (index < 0 || index > MINORMASK >> part_shift) {
pr_err("illegal input index %d\n", index);
return -EINVAL;
}
}
if (!info->attrs[NBD_ATTR_SOCKETS]) {
printk(KERN_ERR "nbd: must specify at least one socket\n");
pr_err("must specify at least one socket\n");
return -EINVAL;
}
if (!info->attrs[NBD_ATTR_SIZE_BYTES]) {
printk(KERN_ERR "nbd: must specify a size in bytes for the device\n");
pr_err("must specify a size in bytes for the device\n");
return -EINVAL;
}
again:
@@ -1956,7 +1973,7 @@ again:
nbd_put(nbd);
if (index == -1)
goto again;
printk(KERN_ERR "nbd: nbd%d already in use\n", index);
pr_err("nbd%d already in use\n", index);
return -EBUSY;
}
if (WARN_ON(nbd->config)) {
@@ -1964,13 +1981,14 @@ again:
nbd_put(nbd);
return -EINVAL;
}
config = nbd->config = nbd_alloc_config();
if (!nbd->config) {
config = nbd_alloc_config();
if (IS_ERR(config)) {
mutex_unlock(&nbd->config_lock);
nbd_put(nbd);
printk(KERN_ERR "nbd: couldn't allocate config\n");
return -ENOMEM;
pr_err("couldn't allocate config\n");
return PTR_ERR(config);
}
nbd->config = config;
refcount_set(&nbd->config_refs, 1);
set_bit(NBD_RT_BOUND, &config->runtime_flags);
@@ -2023,7 +2041,7 @@ again:
struct nlattr *socks[NBD_SOCK_MAX+1];
if (nla_type(attr) != NBD_SOCK_ITEM) {
printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
pr_err("socks must be embedded in a SOCK_ITEM attr\n");
ret = -EINVAL;
goto out;
}
@@ -2032,7 +2050,7 @@ again:
nbd_sock_policy,
info->extack);
if (ret != 0) {
printk(KERN_ERR "nbd: error processing sock list\n");
pr_err("error processing sock list\n");
ret = -EINVAL;
goto out;
}
@@ -2104,7 +2122,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
return -EPERM;
if (!info->attrs[NBD_ATTR_INDEX]) {
printk(KERN_ERR "nbd: must specify an index to disconnect\n");
pr_err("must specify an index to disconnect\n");
return -EINVAL;
}
index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
@@ -2112,14 +2130,12 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
nbd = idr_find(&nbd_index_idr, index);
if (!nbd) {
mutex_unlock(&nbd_index_mutex);
printk(KERN_ERR "nbd: couldn't find device at index %d\n",
index);
pr_err("couldn't find device at index %d\n", index);
return -EINVAL;
}
if (!refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
printk(KERN_ERR "nbd: device at index %d is going down\n",
index);
pr_err("device at index %d is going down\n", index);
return -EINVAL;
}
mutex_unlock(&nbd_index_mutex);
@@ -2144,7 +2160,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
return -EPERM;
if (!info->attrs[NBD_ATTR_INDEX]) {
printk(KERN_ERR "nbd: must specify a device to reconfigure\n");
pr_err("must specify a device to reconfigure\n");
return -EINVAL;
}
index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
@@ -2152,8 +2168,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
nbd = idr_find(&nbd_index_idr, index);
if (!nbd) {
mutex_unlock(&nbd_index_mutex);
printk(KERN_ERR "nbd: couldn't find a device at index %d\n",
index);
pr_err("couldn't find a device at index %d\n", index);
return -EINVAL;
}
if (nbd->backend) {
@@ -2174,8 +2189,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
}
if (!refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
printk(KERN_ERR "nbd: device at index %d is going down\n",
index);
pr_err("device at index %d is going down\n", index);
return -EINVAL;
}
mutex_unlock(&nbd_index_mutex);
@@ -2239,7 +2253,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
struct nlattr *socks[NBD_SOCK_MAX+1];
if (nla_type(attr) != NBD_SOCK_ITEM) {
printk(KERN_ERR "nbd: socks must be embedded in a SOCK_ITEM attr\n");
pr_err("socks must be embedded in a SOCK_ITEM attr\n");
ret = -EINVAL;
goto out;
}
@@ -2248,7 +2262,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
nbd_sock_policy,
info->extack);
if (ret != 0) {
printk(KERN_ERR "nbd: error processing sock list\n");
pr_err("error processing sock list\n");
ret = -EINVAL;
goto out;
}
@@ -2465,7 +2479,7 @@ static int __init nbd_init(void)
BUILD_BUG_ON(sizeof(struct nbd_request) != 28);
if (max_part < 0) {
printk(KERN_ERR "nbd: max_part must be >= 0\n");
pr_err("max_part must be >= 0\n");
return -EINVAL;
}
@@ -2528,6 +2542,12 @@ static void __exit nbd_cleanup(void)
struct nbd_device *nbd;
LIST_HEAD(del_list);
/*
* Unregister netlink interface prior to waiting
* for the completion of netlink commands.
*/
genl_unregister_family(&nbd_genl_family);
nbd_dbg_close();
mutex_lock(&nbd_index_mutex);
@@ -2537,8 +2557,11 @@ static void __exit nbd_cleanup(void)
while (!list_empty(&del_list)) {
nbd = list_first_entry(&del_list, struct nbd_device, list);
list_del_init(&nbd->list);
if (refcount_read(&nbd->config_refs))
pr_err("possibly leaking nbd_config (ref %d)\n",
refcount_read(&nbd->config_refs));
if (refcount_read(&nbd->refs) != 1)
printk(KERN_ERR "nbd: possibly leaking a device\n");
pr_err("possibly leaking a device\n");
nbd_put(nbd);
}
@@ -2546,7 +2569,6 @@ static void __exit nbd_cleanup(void)
destroy_workqueue(nbd_del_wq);
idr_destroy(&nbd_index_idr);
genl_unregister_family(&nbd_genl_family);
unregister_blkdev(NBD_MAJOR, "nbd");
}

View File

@@ -77,12 +77,6 @@ enum {
NULL_IRQ_TIMER = 2,
};
enum {
NULL_Q_BIO = 0,
NULL_Q_RQ = 1,
NULL_Q_MQ = 2,
};
static bool g_virt_boundary = false;
module_param_named(virt_boundary, g_virt_boundary, bool, 0444);
MODULE_PARM_DESC(virt_boundary, "Require a virtual boundary for the device. Default: False");

View File

@@ -60,6 +60,13 @@ struct nullb_zone {
unsigned int capacity;
};
/* Queue modes */
enum {
NULL_Q_BIO = 0,
NULL_Q_RQ = 1,
NULL_Q_MQ = 2,
};
struct nullb_device {
struct nullb *nullb;
struct config_item item;

View File

@@ -398,10 +398,10 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
*/
if (append) {
sector = zone->wp;
if (cmd->bio)
cmd->bio->bi_iter.bi_sector = sector;
else
if (dev->queue_mode == NULL_Q_MQ)
cmd->rq->__sector = sector;
else
cmd->bio->bi_iter.bi_sector = sector;
} else if (sector != zone->wp) {
ret = BLK_STS_IOERR;
goto unlock;

View File

@@ -395,6 +395,13 @@ struct cached_dev {
atomic_t io_errors;
unsigned int error_limit;
unsigned int offline_seconds;
/*
* Retry to update writeback_rate if contention happens for
* down_read(dc->writeback_lock) in update_writeback_rate()
*/
#define BCH_WBRATE_UPDATE_MAX_SKIPS 15
unsigned int rate_update_retry;
};
enum alloc_reserve {

View File

@@ -2006,8 +2006,7 @@ int bch_btree_check(struct cache_set *c)
int i;
struct bkey *k = NULL;
struct btree_iter iter;
struct btree_check_state *check_state;
char name[32];
struct btree_check_state check_state;
/* check and mark root node keys */
for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
@@ -2018,63 +2017,59 @@ int bch_btree_check(struct cache_set *c)
if (c->root->level == 0)
return 0;
check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL);
if (!check_state)
return -ENOMEM;
check_state->c = c;
check_state->total_threads = bch_btree_chkthread_nr();
check_state->key_idx = 0;
spin_lock_init(&check_state->idx_lock);
atomic_set(&check_state->started, 0);
atomic_set(&check_state->enough, 0);
init_waitqueue_head(&check_state->wait);
memset(&check_state, 0, sizeof(struct btree_check_state));
check_state.c = c;
check_state.total_threads = bch_btree_chkthread_nr();
check_state.key_idx = 0;
spin_lock_init(&check_state.idx_lock);
atomic_set(&check_state.started, 0);
atomic_set(&check_state.enough, 0);
init_waitqueue_head(&check_state.wait);
rw_lock(0, c->root, c->root->level);
/*
* Run multiple threads to check btree nodes in parallel,
* if check_state->enough is non-zero, it means current
* if check_state.enough is non-zero, it means current
* running check threads are enough, unncessary to create
* more.
*/
for (i = 0; i < check_state->total_threads; i++) {
/* fetch latest check_state->enough earlier */
for (i = 0; i < check_state.total_threads; i++) {
/* fetch latest check_state.enough earlier */
smp_mb__before_atomic();
if (atomic_read(&check_state->enough))
if (atomic_read(&check_state.enough))
break;
check_state->infos[i].result = 0;
check_state->infos[i].state = check_state;
snprintf(name, sizeof(name), "bch_btrchk[%u]", i);
atomic_inc(&check_state->started);
check_state.infos[i].result = 0;
check_state.infos[i].state = &check_state;
check_state->infos[i].thread =
check_state.infos[i].thread =
kthread_run(bch_btree_check_thread,
&check_state->infos[i],
name);
if (IS_ERR(check_state->infos[i].thread)) {
&check_state.infos[i],
"bch_btrchk[%d]", i);
if (IS_ERR(check_state.infos[i].thread)) {
pr_err("fails to run thread bch_btrchk[%d]\n", i);
for (--i; i >= 0; i--)
kthread_stop(check_state->infos[i].thread);
kthread_stop(check_state.infos[i].thread);
ret = -ENOMEM;
goto out;
}
atomic_inc(&check_state.started);
}
/*
* Must wait for all threads to stop.
*/
wait_event_interruptible(check_state->wait,
atomic_read(&check_state->started) == 0);
wait_event(check_state.wait, atomic_read(&check_state.started) == 0);
for (i = 0; i < check_state->total_threads; i++) {
if (check_state->infos[i].result) {
ret = check_state->infos[i].result;
for (i = 0; i < check_state.total_threads; i++) {
if (check_state.infos[i].result) {
ret = check_state.infos[i].result;
goto out;
}
}
out:
kfree(check_state);
rw_unlock(0, c->root);
return ret;
}

View File

@@ -226,7 +226,7 @@ struct btree_check_info {
int result;
};
#define BCH_BTR_CHKTHREAD_MAX 64
#define BCH_BTR_CHKTHREAD_MAX 12
struct btree_check_state {
struct cache_set *c;
int total_threads;

View File

@@ -405,6 +405,11 @@ err:
return ret;
}
void bch_journal_space_reserve(struct journal *j)
{
j->do_reserve = true;
}
/* Journalling */
static void btree_flush_write(struct cache_set *c)
@@ -621,12 +626,30 @@ static void do_journal_discard(struct cache *ca)
}
}
static unsigned int free_journal_buckets(struct cache_set *c)
{
struct journal *j = &c->journal;
struct cache *ca = c->cache;
struct journal_device *ja = &c->cache->journal;
unsigned int n;
/* In case njournal_buckets is not power of 2 */
if (ja->cur_idx >= ja->discard_idx)
n = ca->sb.njournal_buckets + ja->discard_idx - ja->cur_idx;
else
n = ja->discard_idx - ja->cur_idx;
if (n > (1 + j->do_reserve))
return n - (1 + j->do_reserve);
return 0;
}
static void journal_reclaim(struct cache_set *c)
{
struct bkey *k = &c->journal.key;
struct cache *ca = c->cache;
uint64_t last_seq;
unsigned int next;
struct journal_device *ja = &ca->journal;
atomic_t p __maybe_unused;
@@ -649,12 +672,10 @@ static void journal_reclaim(struct cache_set *c)
if (c->journal.blocks_free)
goto out;
next = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
/* No space available on this device */
if (next == ja->discard_idx)
if (!free_journal_buckets(c))
goto out;
ja->cur_idx = next;
ja->cur_idx = (ja->cur_idx + 1) % ca->sb.njournal_buckets;
k->ptr[0] = MAKE_PTR(0,
bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
ca->sb.nr_this_dev);

View File

@@ -105,6 +105,7 @@ struct journal {
spinlock_t lock;
spinlock_t flush_write_lock;
bool btree_flushing;
bool do_reserve;
/* used when waiting because the journal was full */
struct closure_waitlist wait;
struct closure io;
@@ -182,5 +183,6 @@ int bch_journal_replay(struct cache_set *c, struct list_head *list);
void bch_journal_free(struct cache_set *c);
int bch_journal_alloc(struct cache_set *c);
void bch_journal_space_reserve(struct journal *j);
#endif /* _BCACHE_JOURNAL_H */

View File

@@ -1105,6 +1105,12 @@ static void detached_dev_do_request(struct bcache_device *d, struct bio *bio,
* which would call closure_get(&dc->disk.cl)
*/
ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
if (!ddip) {
bio->bi_status = BLK_STS_RESOURCE;
bio->bi_end_io(bio);
return;
}
ddip->d = d;
/* Count on the bcache device */
ddip->orig_bdev = orig_bdev;

View File

@@ -2127,6 +2127,7 @@ static int run_cache_set(struct cache_set *c)
flash_devs_run(c);
bch_journal_space_reserve(&c->journal);
set_bit(CACHE_SET_RUNNING, &c->flags);
return 0;
err:

View File

@@ -235,19 +235,27 @@ static void update_writeback_rate(struct work_struct *work)
return;
}
if (atomic_read(&dc->has_dirty) && dc->writeback_percent) {
/*
* If the whole cache set is idle, set_at_max_writeback_rate()
* will set writeback rate to a max number. Then it is
* unncessary to update writeback rate for an idle cache set
* in maximum writeback rate number(s).
*/
if (!set_at_max_writeback_rate(c, dc)) {
down_read(&dc->writeback_lock);
/*
* If the whole cache set is idle, set_at_max_writeback_rate()
* will set writeback rate to a max number. Then it is
* unncessary to update writeback rate for an idle cache set
* in maximum writeback rate number(s).
*/
if (atomic_read(&dc->has_dirty) && dc->writeback_percent &&
!set_at_max_writeback_rate(c, dc)) {
do {
if (!down_read_trylock((&dc->writeback_lock))) {
dc->rate_update_retry++;
if (dc->rate_update_retry <=
BCH_WBRATE_UPDATE_MAX_SKIPS)
break;
down_read(&dc->writeback_lock);
dc->rate_update_retry = 0;
}
__update_writeback_rate(dc);
update_gc_after_writeback(c);
up_read(&dc->writeback_lock);
}
} while (0);
}
@@ -805,13 +813,11 @@ static int bch_writeback_thread(void *arg)
/* Init */
#define INIT_KEYS_EACH_TIME 500000
#define INIT_KEYS_SLEEP_MS 100
struct sectors_dirty_init {
struct btree_op op;
unsigned int inode;
size_t count;
struct bkey start;
};
static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
@@ -827,11 +833,8 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
KEY_START(k), KEY_SIZE(k));
op->count++;
if (atomic_read(&b->c->search_inflight) &&
!(op->count % INIT_KEYS_EACH_TIME)) {
bkey_copy_key(&op->start, k);
return -EAGAIN;
}
if (!(op->count % INIT_KEYS_EACH_TIME))
cond_resched();
return MAP_CONTINUE;
}
@@ -846,24 +849,16 @@ static int bch_root_node_dirty_init(struct cache_set *c,
bch_btree_op_init(&op.op, -1);
op.inode = d->id;
op.count = 0;
op.start = KEY(op.inode, 0, 0);
do {
ret = bcache_btree(map_keys_recurse,
k,
c->root,
&op.op,
&op.start,
sectors_dirty_init_fn,
0);
if (ret == -EAGAIN)
schedule_timeout_interruptible(
msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
else if (ret < 0) {
pr_warn("sectors dirty init failed, ret=%d!\n", ret);
break;
}
} while (ret == -EAGAIN);
ret = bcache_btree(map_keys_recurse,
k,
c->root,
&op.op,
&KEY(op.inode, 0, 0),
sectors_dirty_init_fn,
0);
if (ret < 0)
pr_warn("sectors dirty init failed, ret=%d!\n", ret);
return ret;
}
@@ -907,7 +902,6 @@ static int bch_dirty_init_thread(void *arg)
goto out;
}
skip_nr--;
cond_resched();
}
if (p) {
@@ -917,7 +911,6 @@ static int bch_dirty_init_thread(void *arg)
p = NULL;
prev_idx = cur_idx;
cond_resched();
}
out:
@@ -948,67 +941,56 @@ void bch_sectors_dirty_init(struct bcache_device *d)
struct btree_iter iter;
struct sectors_dirty_init op;
struct cache_set *c = d->c;
struct bch_dirty_init_state *state;
char name[32];
struct bch_dirty_init_state state;
/* Just count root keys if no leaf node */
rw_lock(0, c->root, c->root->level);
if (c->root->level == 0) {
bch_btree_op_init(&op.op, -1);
op.inode = d->id;
op.count = 0;
op.start = KEY(op.inode, 0, 0);
for_each_key_filter(&c->root->keys,
k, &iter, bch_ptr_invalid)
sectors_dirty_init_fn(&op.op, c->root, k);
rw_unlock(0, c->root);
return;
}
state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL);
if (!state) {
pr_warn("sectors dirty init failed: cannot allocate memory\n");
return;
}
memset(&state, 0, sizeof(struct bch_dirty_init_state));
state.c = c;
state.d = d;
state.total_threads = bch_btre_dirty_init_thread_nr();
state.key_idx = 0;
spin_lock_init(&state.idx_lock);
atomic_set(&state.started, 0);
atomic_set(&state.enough, 0);
init_waitqueue_head(&state.wait);
state->c = c;
state->d = d;
state->total_threads = bch_btre_dirty_init_thread_nr();
state->key_idx = 0;
spin_lock_init(&state->idx_lock);
atomic_set(&state->started, 0);
atomic_set(&state->enough, 0);
init_waitqueue_head(&state->wait);
for (i = 0; i < state->total_threads; i++) {
/* Fetch latest state->enough earlier */
for (i = 0; i < state.total_threads; i++) {
/* Fetch latest state.enough earlier */
smp_mb__before_atomic();
if (atomic_read(&state->enough))
if (atomic_read(&state.enough))
break;
state->infos[i].state = state;
atomic_inc(&state->started);
snprintf(name, sizeof(name), "bch_dirty_init[%d]", i);
state->infos[i].thread =
kthread_run(bch_dirty_init_thread,
&state->infos[i],
name);
if (IS_ERR(state->infos[i].thread)) {
state.infos[i].state = &state;
state.infos[i].thread =
kthread_run(bch_dirty_init_thread, &state.infos[i],
"bch_dirtcnt[%d]", i);
if (IS_ERR(state.infos[i].thread)) {
pr_err("fails to run thread bch_dirty_init[%d]\n", i);
for (--i; i >= 0; i--)
kthread_stop(state->infos[i].thread);
kthread_stop(state.infos[i].thread);
goto out;
}
atomic_inc(&state.started);
}
/*
* Must wait for all threads to stop.
*/
wait_event_interruptible(state->wait,
atomic_read(&state->started) == 0);
out:
kfree(state);
/* Must wait for all threads to stop. */
wait_event(state.wait, atomic_read(&state.started) == 0);
rw_unlock(0, c->root);
}
void bch_cached_dev_writeback_init(struct cached_dev *dc)
@@ -1032,6 +1014,9 @@ void bch_cached_dev_writeback_init(struct cached_dev *dc)
dc->writeback_rate_fp_term_high = 1000;
dc->writeback_rate_i_term_inverse = 10000;
/* For dc->writeback_lock contention in update_writeback_rate() */
dc->rate_update_retry = 0;
WARN_ON(test_and_clear_bit(BCACHE_DEV_WB_RUNNING, &dc->disk.flags));
INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
}

View File

@@ -20,7 +20,7 @@
#define BCH_WRITEBACK_FRAGMENT_THRESHOLD_MID 57
#define BCH_WRITEBACK_FRAGMENT_THRESHOLD_HIGH 64
#define BCH_DIRTY_INIT_THRD_MAX 64
#define BCH_DIRTY_INIT_THRD_MAX 12
/*
* 14 (16384ths) is chosen here as something that each backing device
* should be a reasonable fraction of the share, and not to blow up

View File

@@ -3725,7 +3725,7 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
md_reap_sync_thread(mddev, false);
}
} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
return -EBUSY;

View File

@@ -206,7 +206,6 @@ static void linear_free(struct mddev *mddev, void *priv)
static bool linear_make_request(struct mddev *mddev, struct bio *bio)
{
char b[BDEVNAME_SIZE];
struct dev_info *tmp_dev;
sector_t start_sector, end_sector, data_offset;
sector_t bio_sector = bio->bi_iter.bi_sector;
@@ -256,10 +255,10 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio)
return true;
out_of_bounds:
pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %s: %llu sectors, offset %llu\n",
pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %pg: %llu sectors, offset %llu\n",
mdname(mddev),
(unsigned long long)bio->bi_iter.bi_sector,
bdevname(tmp_dev->rdev->bdev, b),
tmp_dev->rdev->bdev,
(unsigned long long)tmp_dev->rdev->sectors,
(unsigned long long)start_sector);
bio_io_error(bio);

View File

@@ -87,10 +87,9 @@ static void multipath_end_request(struct bio *bio)
/*
* oops, IO error:
*/
char b[BDEVNAME_SIZE];
md_error (mp_bh->mddev, rdev);
pr_info("multipath: %s: rescheduling sector %llu\n",
bdevname(rdev->bdev,b),
pr_info("multipath: %pg: rescheduling sector %llu\n",
rdev->bdev,
(unsigned long long)bio->bi_iter.bi_sector);
multipath_reschedule_retry(mp_bh);
} else
@@ -154,7 +153,6 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev)
static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
{
struct mpconf *conf = mddev->private;
char b[BDEVNAME_SIZE];
if (conf->raid_disks - mddev->degraded <= 1) {
/*
@@ -177,9 +175,9 @@ static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
}
set_bit(Faulty, &rdev->flags);
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
pr_err("multipath: IO failure on %s, disabling IO path.\n"
pr_err("multipath: IO failure on %pg, disabling IO path.\n"
"multipath: Operation continuing on %d IO paths.\n",
bdevname(rdev->bdev, b),
rdev->bdev,
conf->raid_disks - mddev->degraded);
}
@@ -197,12 +195,11 @@ static void print_multipath_conf (struct mpconf *conf)
conf->raid_disks);
for (i = 0; i < conf->raid_disks; i++) {
char b[BDEVNAME_SIZE];
tmp = conf->multipaths + i;
if (tmp->rdev)
pr_debug(" disk%d, o:%d, dev:%s\n",
pr_debug(" disk%d, o:%d, dev:%pg\n",
i,!test_bit(Faulty, &tmp->rdev->flags),
bdevname(tmp->rdev->bdev,b));
tmp->rdev->bdev);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -719,7 +719,7 @@ extern struct md_thread *md_register_thread(
extern void md_unregister_thread(struct md_thread **threadp);
extern void md_wakeup_thread(struct md_thread *thread);
extern void md_check_recovery(struct mddev *mddev);
extern void md_reap_sync_thread(struct mddev *mddev);
extern void md_reap_sync_thread(struct mddev *mddev, bool reconfig_mutex_held);
extern int mddev_init_writes_pending(struct mddev *mddev);
extern bool md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_inc(struct mddev *mddev, struct bio *bi);

Some files were not shown because too many files have changed in this diff Show More