mirror of
https://github.com/Dasharo/linux.git
synced 2026-03-06 15:25:10 -08:00
Merge tag 'for-5.7/drivers-2020-03-29' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe: - floppy driver cleanup series from Willy - NVMe updates and fixes (Various) - null_blk trace improvements (Chaitanya) - bcache fixes (Coly) - md fixes (via Song) - loop block size change optimizations (Martijn) - scnprintf() use (Takashi) * tag 'for-5.7/drivers-2020-03-29' of git://git.kernel.dk/linux-block: (81 commits) null_blk: add trace in null_blk_zoned.c null_blk: add tracepoint helpers for zoned mode block: add a zone condition debug helper nvme: cleanup namespace identifier reporting in nvme_init_ns_head nvme: rename __nvme_find_ns_head to nvme_find_ns_head nvme: refactor nvme_identify_ns_descs error handling nvme-tcp: Add warning on state change failure at nvme_tcp_setup_ctrl nvme-rdma: Add warning on state change failure at nvme_rdma_setup_ctrl nvme: Fix controller creation races with teardown flow nvme: Make nvme_uninit_ctrl symmetric to nvme_init_ctrl nvme: Fix ctrl use-after-free during sysfs deletion nvme-pci: Re-order nvme_pci_free_ctrl nvme: Remove unused return code from nvme_delete_ctrl_sync nvme: Use nvme_state_terminal helper nvme: release ida resources nvme: Add compat_ioctl handler for NVME_IOCTL_SUBMIT_IO nvmet-tcp: optimize tcp stack TX when data digest is used nvme-fabrics: Use scnprintf() for avoiding potential buffer overflow nvme-multipath: do not reset on unknown status nvmet-rdma: allocate RW ctxs according to mdts ...
This commit is contained in:
@@ -8,16 +8,18 @@
|
||||
*/
|
||||
#ifndef __ASM_ARM_FLOPPY_H
|
||||
#define __ASM_ARM_FLOPPY_H
|
||||
#if 0
|
||||
#include <mach/floppy.h>
|
||||
#endif
|
||||
|
||||
#define fd_outb(val,port) \
|
||||
do { \
|
||||
if ((port) == (u32)FD_DOR) \
|
||||
fd_setdor((val)); \
|
||||
else \
|
||||
outb((val),(port)); \
|
||||
#define fd_outb(val,port) \
|
||||
do { \
|
||||
int new_val = (val); \
|
||||
if (((port) & 7) == FD_DOR) { \
|
||||
if (new_val & 0xf0) \
|
||||
new_val = (new_val & 0x0c) | \
|
||||
floppy_selects[new_val & 3]; \
|
||||
else \
|
||||
new_val &= 0x0c; \
|
||||
} \
|
||||
outb(new_val, (port)); \
|
||||
} while(0)
|
||||
|
||||
#define fd_inb(port) inb((port))
|
||||
@@ -53,69 +55,7 @@ static inline int fd_dma_setup(void *data, unsigned int length,
|
||||
* to a non-zero track, and then restoring it to track 0. If an error occurs,
|
||||
* then there is no floppy drive present. [to be put back in again]
|
||||
*/
|
||||
static unsigned char floppy_selects[2][4] =
|
||||
{
|
||||
{ 0x10, 0x21, 0x23, 0x33 },
|
||||
{ 0x10, 0x21, 0x23, 0x33 }
|
||||
};
|
||||
|
||||
#define fd_setdor(dor) \
|
||||
do { \
|
||||
int new_dor = (dor); \
|
||||
if (new_dor & 0xf0) \
|
||||
new_dor = (new_dor & 0x0c) | floppy_selects[fdc][new_dor & 3]; \
|
||||
else \
|
||||
new_dor &= 0x0c; \
|
||||
outb(new_dor, FD_DOR); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Someday, we'll automatically detect which drives are present...
|
||||
*/
|
||||
static inline void fd_scandrives (void)
|
||||
{
|
||||
#if 0
|
||||
int floppy, drive_count;
|
||||
|
||||
fd_disable_irq();
|
||||
raw_cmd = &default_raw_cmd;
|
||||
raw_cmd->flags = FD_RAW_SPIN | FD_RAW_NEED_SEEK;
|
||||
raw_cmd->track = 0;
|
||||
raw_cmd->rate = ?;
|
||||
drive_count = 0;
|
||||
for (floppy = 0; floppy < 4; floppy ++) {
|
||||
current_drive = drive_count;
|
||||
/*
|
||||
* Turn on floppy motor
|
||||
*/
|
||||
if (start_motor(redo_fd_request))
|
||||
continue;
|
||||
/*
|
||||
* Set up FDC
|
||||
*/
|
||||
fdc_specify();
|
||||
/*
|
||||
* Tell FDC to recalibrate
|
||||
*/
|
||||
output_byte(FD_RECALIBRATE);
|
||||
LAST_OUT(UNIT(floppy));
|
||||
/* wait for command to complete */
|
||||
if (!successful) {
|
||||
int i;
|
||||
for (i = drive_count; i < 3; i--)
|
||||
floppy_selects[fdc][i] = floppy_selects[fdc][i + 1];
|
||||
floppy_selects[fdc][3] = 0;
|
||||
floppy -= 1;
|
||||
} else
|
||||
drive_count++;
|
||||
}
|
||||
#else
|
||||
floppy_selects[0][0] = 0x10;
|
||||
floppy_selects[0][1] = 0x21;
|
||||
floppy_selects[0][2] = 0x23;
|
||||
floppy_selects[0][3] = 0x33;
|
||||
#endif
|
||||
}
|
||||
static unsigned char floppy_selects[4] = { 0x10, 0x21, 0x23, 0x33 };
|
||||
|
||||
#define FDC1 (0x3f0)
|
||||
|
||||
@@ -135,9 +75,7 @@ static inline void fd_scandrives (void)
|
||||
*/
|
||||
static void driveswap(int *ints, int dummy, int dummy2)
|
||||
{
|
||||
floppy_selects[0][0] ^= floppy_selects[0][1];
|
||||
floppy_selects[0][1] ^= floppy_selects[0][0];
|
||||
floppy_selects[0][0] ^= floppy_selects[0][1];
|
||||
swap(floppy_selects[0], floppy_selects[1]);
|
||||
}
|
||||
|
||||
#define EXTRA_FLOPPY_PARAMS ,{ "driveswap", &driveswap, NULL, 0, 0 }
|
||||
|
||||
@@ -628,6 +628,9 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
|
||||
printk(KERN_NOTICE "%s: Warning: Device %s is misaligned\n",
|
||||
top, bottom);
|
||||
}
|
||||
|
||||
t->backing_dev_info->io_pages =
|
||||
t->limits.max_sectors >> (PAGE_SHIFT - 9);
|
||||
}
|
||||
EXPORT_SYMBOL(disk_stack_limits);
|
||||
|
||||
|
||||
@@ -20,6 +20,38 @@
|
||||
|
||||
#include "blk.h"
|
||||
|
||||
#define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name
|
||||
static const char *const zone_cond_name[] = {
|
||||
ZONE_COND_NAME(NOT_WP),
|
||||
ZONE_COND_NAME(EMPTY),
|
||||
ZONE_COND_NAME(IMP_OPEN),
|
||||
ZONE_COND_NAME(EXP_OPEN),
|
||||
ZONE_COND_NAME(CLOSED),
|
||||
ZONE_COND_NAME(READONLY),
|
||||
ZONE_COND_NAME(FULL),
|
||||
ZONE_COND_NAME(OFFLINE),
|
||||
};
|
||||
#undef ZONE_COND_NAME
|
||||
|
||||
/**
|
||||
* blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX.
|
||||
* @zone_cond: BLK_ZONE_COND_XXX.
|
||||
*
|
||||
* Description: Centralize block layer function to convert BLK_ZONE_COND_XXX
|
||||
* into string format. Useful in the debugging and tracing zone conditions. For
|
||||
* invalid BLK_ZONE_COND_XXX it returns string "UNKNOWN".
|
||||
*/
|
||||
const char *blk_zone_cond_str(enum blk_zone_cond zone_cond)
|
||||
{
|
||||
static const char *zone_cond_str = "UNKNOWN";
|
||||
|
||||
if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond])
|
||||
zone_cond_str = zone_cond_name[zone_cond];
|
||||
|
||||
return zone_cond_str;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(blk_zone_cond_str);
|
||||
|
||||
static inline sector_t blk_zone_start(struct request_queue *q,
|
||||
sector_t sector)
|
||||
{
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
# Rewritten to use lists instead of if-statements.
|
||||
#
|
||||
|
||||
# needed for trace events
|
||||
ccflags-y += -I$(src)
|
||||
|
||||
obj-$(CONFIG_MAC_FLOPPY) += swim3.o
|
||||
obj-$(CONFIG_BLK_DEV_SWIM) += swim_mod.o
|
||||
obj-$(CONFIG_BLK_DEV_FD) += floppy.o
|
||||
@@ -39,6 +42,9 @@ obj-$(CONFIG_ZRAM) += zram/
|
||||
|
||||
obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o
|
||||
null_blk-objs := null_blk_main.o
|
||||
ifeq ($(CONFIG_BLK_DEV_ZONED), y)
|
||||
null_blk-$(CONFIG_TRACING) += null_blk_trace.o
|
||||
endif
|
||||
null_blk-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o
|
||||
|
||||
skd-y := skd_main.o
|
||||
|
||||
@@ -87,9 +87,9 @@ static ssize_t aoedisk_show_netif(struct device *dev,
|
||||
if (*nd == NULL)
|
||||
return snprintf(page, PAGE_SIZE, "none\n");
|
||||
for (p = page; nd < ne; nd++)
|
||||
p += snprintf(p, PAGE_SIZE - (p-page), "%s%s",
|
||||
p += scnprintf(p, PAGE_SIZE - (p-page), "%s%s",
|
||||
p == page ? "" : ",", (*nd)->name);
|
||||
p += snprintf(p, PAGE_SIZE - (p-page), "\n");
|
||||
p += scnprintf(p, PAGE_SIZE - (p-page), "\n");
|
||||
return p-page;
|
||||
}
|
||||
/* firmware version */
|
||||
|
||||
@@ -3413,22 +3413,11 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev)
|
||||
* the meta-data super block. This function sets MD_DIRTY, and starts a
|
||||
* timer that ensures that within five seconds you have to call drbd_md_sync().
|
||||
*/
|
||||
#ifdef DEBUG
|
||||
void drbd_md_mark_dirty_(struct drbd_device *device, unsigned int line, const char *func)
|
||||
{
|
||||
if (!test_and_set_bit(MD_DIRTY, &device->flags)) {
|
||||
mod_timer(&device->md_sync_timer, jiffies + HZ);
|
||||
device->last_md_mark_dirty.line = line;
|
||||
device->last_md_mark_dirty.func = func;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void drbd_md_mark_dirty(struct drbd_device *device)
|
||||
{
|
||||
if (!test_and_set_bit(MD_DIRTY, &device->flags))
|
||||
mod_timer(&device->md_sync_timer, jiffies + 5*HZ);
|
||||
}
|
||||
#endif
|
||||
|
||||
void drbd_uuid_move_history(struct drbd_device *device) __must_hold(local)
|
||||
{
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -214,7 +214,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
|
||||
* LO_FLAGS_READ_ONLY, both are set from kernel, and losetup
|
||||
* will get updated by ioctl(LOOP_GET_STATUS)
|
||||
*/
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
if (lo->lo_state == Lo_bound)
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
lo->use_dio = use_dio;
|
||||
if (use_dio) {
|
||||
blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, lo->lo_queue);
|
||||
@@ -223,7 +224,8 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
|
||||
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
|
||||
}
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
if (lo->lo_state == Lo_bound)
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -1539,16 +1541,16 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
|
||||
if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
|
||||
return -EINVAL;
|
||||
|
||||
if (lo->lo_queue->limits.logical_block_size != arg) {
|
||||
sync_blockdev(lo->lo_device);
|
||||
kill_bdev(lo->lo_device);
|
||||
}
|
||||
if (lo->lo_queue->limits.logical_block_size == arg)
|
||||
return 0;
|
||||
|
||||
sync_blockdev(lo->lo_device);
|
||||
kill_bdev(lo->lo_device);
|
||||
|
||||
blk_mq_freeze_queue(lo->lo_queue);
|
||||
|
||||
/* kill_bdev should have truncated all the pages */
|
||||
if (lo->lo_queue->limits.logical_block_size != arg &&
|
||||
lo->lo_device->bd_inode->i_mapping->nrpages) {
|
||||
if (lo->lo_device->bd_inode->i_mapping->nrpages) {
|
||||
err = -EAGAIN;
|
||||
pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
|
||||
__func__, lo->lo_number, lo->lo_file_name,
|
||||
|
||||
@@ -395,16 +395,19 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
|
||||
}
|
||||
config = nbd->config;
|
||||
|
||||
if (config->num_connections > 1) {
|
||||
if (config->num_connections > 1 ||
|
||||
(config->num_connections == 1 && nbd->tag_set.timeout)) {
|
||||
dev_err_ratelimited(nbd_to_dev(nbd),
|
||||
"Connection timed out, retrying (%d/%d alive)\n",
|
||||
atomic_read(&config->live_connections),
|
||||
config->num_connections);
|
||||
/*
|
||||
* Hooray we have more connections, requeue this IO, the submit
|
||||
* path will put it on a real connection.
|
||||
* path will put it on a real connection. Or if only one
|
||||
* connection is configured, the submit path will wait util
|
||||
* a new connection is reconfigured or util dead timeout.
|
||||
*/
|
||||
if (config->socks && config->num_connections > 1) {
|
||||
if (config->socks) {
|
||||
if (cmd->index < config->num_connections) {
|
||||
struct nbd_sock *nsock =
|
||||
config->socks[cmd->index];
|
||||
@@ -431,12 +434,22 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
|
||||
* Userspace sets timeout=0 to disable socket disconnection,
|
||||
* so just warn and reset the timer.
|
||||
*/
|
||||
struct nbd_sock *nsock = config->socks[cmd->index];
|
||||
cmd->retries++;
|
||||
dev_info(nbd_to_dev(nbd), "Possible stuck request %p: control (%s@%llu,%uB). Runtime %u seconds\n",
|
||||
req, nbdcmd_to_ascii(req_to_nbd_cmd_type(req)),
|
||||
(unsigned long long)blk_rq_pos(req) << 9,
|
||||
blk_rq_bytes(req), (req->timeout / HZ) * cmd->retries);
|
||||
|
||||
mutex_lock(&nsock->tx_lock);
|
||||
if (cmd->cookie != nsock->cookie) {
|
||||
nbd_requeue_cmd(cmd);
|
||||
mutex_unlock(&nsock->tx_lock);
|
||||
mutex_unlock(&cmd->lock);
|
||||
nbd_config_put(nbd);
|
||||
return BLK_EH_DONE;
|
||||
}
|
||||
mutex_unlock(&nsock->tx_lock);
|
||||
mutex_unlock(&cmd->lock);
|
||||
nbd_config_put(nbd);
|
||||
return BLK_EH_RESET_TIMER;
|
||||
@@ -741,14 +754,12 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
|
||||
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
|
||||
result);
|
||||
/*
|
||||
* If we've disconnected or we only have 1
|
||||
* connection then we need to make sure we
|
||||
* If we've disconnected, we need to make sure we
|
||||
* complete this request, otherwise error out
|
||||
* and let the timeout stuff handle resubmitting
|
||||
* this request onto another connection.
|
||||
*/
|
||||
if (nbd_disconnected(config) ||
|
||||
config->num_connections <= 1) {
|
||||
if (nbd_disconnected(config)) {
|
||||
cmd->status = BLK_STS_IOERR;
|
||||
goto out;
|
||||
}
|
||||
@@ -825,7 +836,7 @@ static int find_fallback(struct nbd_device *nbd, int index)
|
||||
|
||||
if (config->num_connections <= 1) {
|
||||
dev_err_ratelimited(disk_to_dev(nbd->disk),
|
||||
"Attempted send on invalid socket\n");
|
||||
"Dead connection, failed to find a fallback\n");
|
||||
return new_index;
|
||||
}
|
||||
|
||||
|
||||
@@ -97,14 +97,21 @@ module_param_named(home_node, g_home_node, int, 0444);
|
||||
MODULE_PARM_DESC(home_node, "Home node for the device");
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
|
||||
/*
|
||||
* For more details about fault injection, please refer to
|
||||
* Documentation/fault-injection/fault-injection.rst.
|
||||
*/
|
||||
static char g_timeout_str[80];
|
||||
module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
|
||||
MODULE_PARM_DESC(timeout, "Fault injection. timeout=<interval>,<probability>,<space>,<times>");
|
||||
|
||||
static char g_requeue_str[80];
|
||||
module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
|
||||
MODULE_PARM_DESC(requeue, "Fault injection. requeue=<interval>,<probability>,<space>,<times>");
|
||||
|
||||
static char g_init_hctx_str[80];
|
||||
module_param_string(init_hctx, g_init_hctx_str, sizeof(g_init_hctx_str), 0444);
|
||||
MODULE_PARM_DESC(init_hctx, "Fault injection to fail hctx init. init_hctx=<interval>,<probability>,<space>,<times>");
|
||||
#endif
|
||||
|
||||
static int g_queue_mode = NULL_Q_MQ;
|
||||
@@ -615,6 +622,7 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
|
||||
if (tag != -1U) {
|
||||
cmd = &nq->cmds[tag];
|
||||
cmd->tag = tag;
|
||||
cmd->error = BLK_STS_OK;
|
||||
cmd->nq = nq;
|
||||
if (nq->dev->irqmode == NULL_IRQ_TIMER) {
|
||||
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
|
||||
@@ -1395,6 +1403,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
cmd->timer.function = null_cmd_timer_expired;
|
||||
}
|
||||
cmd->rq = bd->rq;
|
||||
cmd->error = BLK_STS_OK;
|
||||
cmd->nq = nq;
|
||||
|
||||
blk_mq_start_request(bd->rq);
|
||||
|
||||
21
drivers/block/null_blk_trace.c
Normal file
21
drivers/block/null_blk_trace.c
Normal file
@@ -0,0 +1,21 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* null_blk trace related helpers.
|
||||
*
|
||||
* Copyright (C) 2020 Western Digital Corporation or its affiliates.
|
||||
*/
|
||||
#include "null_blk_trace.h"
|
||||
|
||||
/*
|
||||
* Helper to use for all null_blk traces to extract disk name.
|
||||
*/
|
||||
const char *nullb_trace_disk_name(struct trace_seq *p, char *name)
|
||||
{
|
||||
const char *ret = trace_seq_buffer_ptr(p);
|
||||
|
||||
if (name && *name)
|
||||
trace_seq_printf(p, "disk=%s, ", name);
|
||||
trace_seq_putc(p, 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
79
drivers/block/null_blk_trace.h
Normal file
79
drivers/block/null_blk_trace.h
Normal file
@@ -0,0 +1,79 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* null_blk device driver tracepoints.
|
||||
*
|
||||
* Copyright (C) 2020 Western Digital Corporation or its affiliates.
|
||||
*/
|
||||
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM nullb
|
||||
|
||||
#if !defined(_TRACE_NULLB_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_NULLB_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
#include <linux/trace_seq.h>
|
||||
|
||||
#include "null_blk.h"
|
||||
|
||||
const char *nullb_trace_disk_name(struct trace_seq *p, char *name);
|
||||
|
||||
#define __print_disk_name(name) nullb_trace_disk_name(p, name)
|
||||
|
||||
#ifndef TRACE_HEADER_MULTI_READ
|
||||
static inline void __assign_disk_name(char *name, struct gendisk *disk)
|
||||
{
|
||||
if (disk)
|
||||
memcpy(name, disk->disk_name, DISK_NAME_LEN);
|
||||
else
|
||||
memset(name, 0, DISK_NAME_LEN);
|
||||
}
|
||||
#endif
|
||||
|
||||
TRACE_EVENT(nullb_zone_op,
|
||||
TP_PROTO(struct nullb_cmd *cmd, unsigned int zone_no,
|
||||
unsigned int zone_cond),
|
||||
TP_ARGS(cmd, zone_no, zone_cond),
|
||||
TP_STRUCT__entry(
|
||||
__array(char, disk, DISK_NAME_LEN)
|
||||
__field(enum req_opf, op)
|
||||
__field(unsigned int, zone_no)
|
||||
__field(unsigned int, zone_cond)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->op = req_op(cmd->rq);
|
||||
__entry->zone_no = zone_no;
|
||||
__entry->zone_cond = zone_cond;
|
||||
__assign_disk_name(__entry->disk, cmd->rq->rq_disk);
|
||||
),
|
||||
TP_printk("%s req=%-15s zone_no=%u zone_cond=%-10s",
|
||||
__print_disk_name(__entry->disk),
|
||||
blk_op_str(__entry->op),
|
||||
__entry->zone_no,
|
||||
blk_zone_cond_str(__entry->zone_cond))
|
||||
);
|
||||
|
||||
TRACE_EVENT(nullb_report_zones,
|
||||
TP_PROTO(struct nullb *nullb, unsigned int nr_zones),
|
||||
TP_ARGS(nullb, nr_zones),
|
||||
TP_STRUCT__entry(
|
||||
__array(char, disk, DISK_NAME_LEN)
|
||||
__field(unsigned int, nr_zones)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->nr_zones = nr_zones;
|
||||
__assign_disk_name(__entry->disk, nullb->disk);
|
||||
),
|
||||
TP_printk("%s nr_zones=%u",
|
||||
__print_disk_name(__entry->disk), __entry->nr_zones)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_NULLB_H */
|
||||
|
||||
#undef TRACE_INCLUDE_PATH
|
||||
#define TRACE_INCLUDE_PATH .
|
||||
#undef TRACE_INCLUDE_FILE
|
||||
#define TRACE_INCLUDE_FILE null_blk_trace
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
||||
@@ -2,6 +2,9 @@
|
||||
#include <linux/vmalloc.h>
|
||||
#include "null_blk.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "null_blk_trace.h"
|
||||
|
||||
/* zone_size in MBs to sectors. */
|
||||
#define ZONE_SIZE_SHIFT 11
|
||||
|
||||
@@ -80,6 +83,8 @@ int null_report_zones(struct gendisk *disk, sector_t sector,
|
||||
return 0;
|
||||
|
||||
nr_zones = min(nr_zones, dev->nr_zones - first_zone);
|
||||
trace_nullb_report_zones(nullb, nr_zones);
|
||||
|
||||
for (i = 0; i < nr_zones; i++) {
|
||||
/*
|
||||
* Stacked DM target drivers will remap the zone information by
|
||||
@@ -148,6 +153,8 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector,
|
||||
/* Invalid zone condition */
|
||||
return BLK_STS_IOERR;
|
||||
}
|
||||
|
||||
trace_nullb_zone_op(cmd, zno, zone->cond);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
@@ -155,7 +162,8 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
|
||||
sector_t sector)
|
||||
{
|
||||
struct nullb_device *dev = cmd->nq->dev;
|
||||
struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)];
|
||||
unsigned int zone_no = null_zone_no(dev, sector);
|
||||
struct blk_zone *zone = &dev->zones[zone_no];
|
||||
size_t i;
|
||||
|
||||
switch (op) {
|
||||
@@ -203,6 +211,8 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
|
||||
default:
|
||||
return BLK_STS_NOTSUPP;
|
||||
}
|
||||
|
||||
trace_nullb_zone_op(cmd, zone_no, zone->cond);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
|
||||
@@ -80,7 +80,7 @@ struct dma_tracker {
|
||||
struct dma_tracker_list {
|
||||
spinlock_t lock;
|
||||
int head;
|
||||
struct dma_tracker list[0];
|
||||
struct dma_tracker list[];
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ static ssize_t pblk_sysfs_luns_show(struct pblk *pblk, char *page)
|
||||
active = 0;
|
||||
up(&rlun->wr_sem);
|
||||
}
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"pblk: pos:%d, ch:%d, lun:%d - %d\n",
|
||||
i,
|
||||
rlun->bppa.a.ch,
|
||||
@@ -120,7 +120,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
|
||||
struct nvm_addrf_12 *ppaf = (struct nvm_addrf_12 *)&pblk->addrf;
|
||||
struct nvm_addrf_12 *gppaf = (struct nvm_addrf_12 *)&geo->addrf;
|
||||
|
||||
sz = snprintf(page, PAGE_SIZE,
|
||||
sz = scnprintf(page, PAGE_SIZE,
|
||||
"g:(b:%d)blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
|
||||
pblk->addrf_len,
|
||||
ppaf->blk_offset, ppaf->blk_len,
|
||||
@@ -130,7 +130,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
|
||||
ppaf->pln_offset, ppaf->pln_len,
|
||||
ppaf->sec_offset, ppaf->sec_len);
|
||||
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"d:blk:%d/%d,pg:%d/%d,lun:%d/%d,ch:%d/%d,pl:%d/%d,sec:%d/%d\n",
|
||||
gppaf->blk_offset, gppaf->blk_len,
|
||||
gppaf->pg_offset, gppaf->pg_len,
|
||||
@@ -142,7 +142,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
|
||||
struct nvm_addrf *ppaf = &pblk->addrf;
|
||||
struct nvm_addrf *gppaf = &geo->addrf;
|
||||
|
||||
sz = snprintf(page, PAGE_SIZE,
|
||||
sz = scnprintf(page, PAGE_SIZE,
|
||||
"pblk:(s:%d)ch:%d/%d,lun:%d/%d,chk:%d/%d/sec:%d/%d\n",
|
||||
pblk->addrf_len,
|
||||
ppaf->ch_offset, ppaf->ch_len,
|
||||
@@ -150,7 +150,7 @@ static ssize_t pblk_sysfs_ppaf(struct pblk *pblk, char *page)
|
||||
ppaf->chk_offset, ppaf->chk_len,
|
||||
ppaf->sec_offset, ppaf->sec_len);
|
||||
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"device:ch:%d/%d,lun:%d/%d,chk:%d/%d,sec:%d/%d\n",
|
||||
gppaf->ch_offset, gppaf->ch_len,
|
||||
gppaf->lun_offset, gppaf->lun_len,
|
||||
@@ -278,11 +278,11 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
|
||||
pblk_err(pblk, "corrupted free line list:%d/%d\n",
|
||||
nr_free_lines, free_line_cnt);
|
||||
|
||||
sz = snprintf(page, PAGE_SIZE - sz,
|
||||
sz = scnprintf(page, PAGE_SIZE - sz,
|
||||
"line: nluns:%d, nblks:%d, nsecs:%d\n",
|
||||
geo->all_luns, lm->blk_per_line, lm->sec_per_line);
|
||||
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"lines:d:%d,l:%d-f:%d,m:%d/%d,c:%d,b:%d,co:%d(d:%d,l:%d)t:%d\n",
|
||||
cur_data, cur_log,
|
||||
nr_free_lines,
|
||||
@@ -292,12 +292,12 @@ static ssize_t pblk_sysfs_lines(struct pblk *pblk, char *page)
|
||||
d_line_cnt, l_line_cnt,
|
||||
l_mg->nr_lines);
|
||||
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"GC: full:%d, high:%d, mid:%d, low:%d, empty:%d, werr: %d, queue:%d\n",
|
||||
gc_full, gc_high, gc_mid, gc_low, gc_empty, gc_werr,
|
||||
atomic_read(&pblk->gc.read_inflight_gc));
|
||||
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"data (%d) cur:%d, left:%d, vsc:%d, s:%d, map:%d/%d (%d)\n",
|
||||
cur_data, cur_sec, msecs, vsc, sec_in_line,
|
||||
map_weight, lm->sec_per_line,
|
||||
@@ -313,19 +313,19 @@ static ssize_t pblk_sysfs_lines_info(struct pblk *pblk, char *page)
|
||||
struct pblk_line_meta *lm = &pblk->lm;
|
||||
ssize_t sz = 0;
|
||||
|
||||
sz = snprintf(page, PAGE_SIZE - sz,
|
||||
sz = scnprintf(page, PAGE_SIZE - sz,
|
||||
"smeta - len:%d, secs:%d\n",
|
||||
lm->smeta_len, lm->smeta_sec);
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"emeta - len:%d, sec:%d, bb_start:%d\n",
|
||||
lm->emeta_len[0], lm->emeta_sec[0],
|
||||
lm->emeta_bb);
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"bitmap lengths: sec:%d, blk:%d, lun:%d\n",
|
||||
lm->sec_bitmap_len,
|
||||
lm->blk_bitmap_len,
|
||||
lm->lun_bitmap_len);
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"blk_line:%d, sec_line:%d, sec_blk:%d\n",
|
||||
lm->blk_per_line,
|
||||
lm->sec_per_line,
|
||||
@@ -344,12 +344,12 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad,
|
||||
{
|
||||
int sz;
|
||||
|
||||
sz = snprintf(page, PAGE_SIZE,
|
||||
sz = scnprintf(page, PAGE_SIZE,
|
||||
"user:%lld gc:%lld pad:%lld WA:",
|
||||
user, gc, pad);
|
||||
|
||||
if (!user) {
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz, "NaN\n");
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz, "NaN\n");
|
||||
} else {
|
||||
u64 wa_int;
|
||||
u32 wa_frac;
|
||||
@@ -358,7 +358,7 @@ static ssize_t pblk_get_write_amp(u64 user, u64 gc, u64 pad,
|
||||
wa_int = div64_u64(wa_int, user);
|
||||
wa_int = div_u64_rem(wa_int, 100000, &wa_frac);
|
||||
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n",
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz, "%llu.%05u\n",
|
||||
wa_int, wa_frac);
|
||||
}
|
||||
|
||||
@@ -401,9 +401,9 @@ static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
|
||||
total = atomic64_read(&pblk->nr_flush) - pblk->nr_flush_rst;
|
||||
if (!total) {
|
||||
for (i = 0; i < (buckets + 1); i++)
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz,
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz,
|
||||
"%d:0 ", i);
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n");
|
||||
|
||||
return sz;
|
||||
}
|
||||
@@ -411,7 +411,7 @@ static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
|
||||
for (i = 0; i < buckets; i++)
|
||||
total_buckets += atomic64_read(&pblk->pad_dist[i]);
|
||||
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ",
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz, "0:%lld%% ",
|
||||
bucket_percentage(total - total_buckets, total));
|
||||
|
||||
for (i = 0; i < buckets; i++) {
|
||||
@@ -419,10 +419,10 @@ static ssize_t pblk_sysfs_get_padding_dist(struct pblk *pblk, char *page)
|
||||
|
||||
p = bucket_percentage(atomic64_read(&pblk->pad_dist[i]),
|
||||
total);
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ",
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz, "%d:%lld%% ",
|
||||
i + 1, p);
|
||||
}
|
||||
sz += snprintf(page + sz, PAGE_SIZE - sz, "\n");
|
||||
sz += scnprintf(page + sz, PAGE_SIZE - sz, "\n");
|
||||
|
||||
return sz;
|
||||
}
|
||||
|
||||
@@ -101,64 +101,6 @@
|
||||
|
||||
#define insert_lock(s, b) ((b)->level <= (s)->lock)
|
||||
|
||||
/*
|
||||
* These macros are for recursing down the btree - they handle the details of
|
||||
* locking and looking up nodes in the cache for you. They're best treated as
|
||||
* mere syntax when reading code that uses them.
|
||||
*
|
||||
* op->lock determines whether we take a read or a write lock at a given depth.
|
||||
* If you've got a read lock and find that you need a write lock (i.e. you're
|
||||
* going to have to split), set op->lock and return -EINTR; btree_root() will
|
||||
* call you again and you'll have the correct lock.
|
||||
*/
|
||||
|
||||
/**
|
||||
* btree - recurse down the btree on a specified key
|
||||
* @fn: function to call, which will be passed the child node
|
||||
* @key: key to recurse on
|
||||
* @b: parent btree node
|
||||
* @op: pointer to struct btree_op
|
||||
*/
|
||||
#define btree(fn, key, b, op, ...) \
|
||||
({ \
|
||||
int _r, l = (b)->level - 1; \
|
||||
bool _w = l <= (op)->lock; \
|
||||
struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
|
||||
_w, b); \
|
||||
if (!IS_ERR(_child)) { \
|
||||
_r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
|
||||
rw_unlock(_w, _child); \
|
||||
} else \
|
||||
_r = PTR_ERR(_child); \
|
||||
_r; \
|
||||
})
|
||||
|
||||
/**
|
||||
* btree_root - call a function on the root of the btree
|
||||
* @fn: function to call, which will be passed the child node
|
||||
* @c: cache set
|
||||
* @op: pointer to struct btree_op
|
||||
*/
|
||||
#define btree_root(fn, c, op, ...) \
|
||||
({ \
|
||||
int _r = -EINTR; \
|
||||
do { \
|
||||
struct btree *_b = (c)->root; \
|
||||
bool _w = insert_lock(op, _b); \
|
||||
rw_lock(_w, _b, _b->level); \
|
||||
if (_b == (c)->root && \
|
||||
_w == insert_lock(op, _b)) { \
|
||||
_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
|
||||
} \
|
||||
rw_unlock(_w, _b); \
|
||||
bch_cannibalize_unlock(c); \
|
||||
if (_r == -EINTR) \
|
||||
schedule(); \
|
||||
} while (_r == -EINTR); \
|
||||
\
|
||||
finish_wait(&(c)->btree_cache_wait, &(op)->wait); \
|
||||
_r; \
|
||||
})
|
||||
|
||||
static inline struct bset *write_block(struct btree *b)
|
||||
{
|
||||
@@ -1848,7 +1790,7 @@ static void bch_btree_gc(struct cache_set *c)
|
||||
|
||||
/* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
|
||||
do {
|
||||
ret = btree_root(gc_root, c, &op, &writes, &stats);
|
||||
ret = bcache_btree_root(gc_root, c, &op, &writes, &stats);
|
||||
closure_sync(&writes);
|
||||
cond_resched();
|
||||
|
||||
@@ -1946,7 +1888,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
|
||||
}
|
||||
|
||||
if (p)
|
||||
ret = btree(check_recurse, p, b, op);
|
||||
ret = bcache_btree(check_recurse, p, b, op);
|
||||
|
||||
p = k;
|
||||
} while (p && !ret);
|
||||
@@ -1955,13 +1897,176 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static int bch_btree_check_thread(void *arg)
|
||||
{
|
||||
int ret;
|
||||
struct btree_check_info *info = arg;
|
||||
struct btree_check_state *check_state = info->state;
|
||||
struct cache_set *c = check_state->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey *k, *p;
|
||||
int cur_idx, prev_idx, skip_nr;
|
||||
int i, n;
|
||||
|
||||
k = p = NULL;
|
||||
i = n = 0;
|
||||
cur_idx = prev_idx = 0;
|
||||
ret = 0;
|
||||
|
||||
/* root node keys are checked before thread created */
|
||||
bch_btree_iter_init(&c->root->keys, &iter, NULL);
|
||||
k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
|
||||
BUG_ON(!k);
|
||||
|
||||
p = k;
|
||||
while (k) {
|
||||
/*
|
||||
* Fetch a root node key index, skip the keys which
|
||||
* should be fetched by other threads, then check the
|
||||
* sub-tree indexed by the fetched key.
|
||||
*/
|
||||
spin_lock(&check_state->idx_lock);
|
||||
cur_idx = check_state->key_idx;
|
||||
check_state->key_idx++;
|
||||
spin_unlock(&check_state->idx_lock);
|
||||
|
||||
skip_nr = cur_idx - prev_idx;
|
||||
|
||||
while (skip_nr) {
|
||||
k = bch_btree_iter_next_filter(&iter,
|
||||
&c->root->keys,
|
||||
bch_ptr_bad);
|
||||
if (k)
|
||||
p = k;
|
||||
else {
|
||||
/*
|
||||
* No more keys to check in root node,
|
||||
* current checking threads are enough,
|
||||
* stop creating more.
|
||||
*/
|
||||
atomic_set(&check_state->enough, 1);
|
||||
/* Update check_state->enough earlier */
|
||||
smp_mb__after_atomic();
|
||||
goto out;
|
||||
}
|
||||
skip_nr--;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
if (p) {
|
||||
struct btree_op op;
|
||||
|
||||
btree_node_prefetch(c->root, p);
|
||||
c->gc_stats.nodes++;
|
||||
bch_btree_op_init(&op, 0);
|
||||
ret = bcache_btree(check_recurse, p, c->root, &op);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
p = NULL;
|
||||
prev_idx = cur_idx;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
out:
|
||||
info->result = ret;
|
||||
/* update check_state->started among all CPUs */
|
||||
smp_mb__before_atomic();
|
||||
if (atomic_dec_and_test(&check_state->started))
|
||||
wake_up(&check_state->wait);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int bch_btree_chkthread_nr(void)
|
||||
{
|
||||
int n = num_online_cpus()/2;
|
||||
|
||||
if (n == 0)
|
||||
n = 1;
|
||||
else if (n > BCH_BTR_CHKTHREAD_MAX)
|
||||
n = BCH_BTR_CHKTHREAD_MAX;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
int bch_btree_check(struct cache_set *c)
|
||||
{
|
||||
struct btree_op op;
|
||||
int ret = 0;
|
||||
int i;
|
||||
struct bkey *k = NULL;
|
||||
struct btree_iter iter;
|
||||
struct btree_check_state *check_state;
|
||||
char name[32];
|
||||
|
||||
bch_btree_op_init(&op, SHRT_MAX);
|
||||
/* check and mark root node keys */
|
||||
for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
|
||||
bch_initial_mark_key(c, c->root->level, k);
|
||||
|
||||
return btree_root(check_recurse, c, &op);
|
||||
bch_initial_mark_key(c, c->root->level + 1, &c->root->key);
|
||||
|
||||
if (c->root->level == 0)
|
||||
return 0;
|
||||
|
||||
check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL);
|
||||
if (!check_state)
|
||||
return -ENOMEM;
|
||||
|
||||
check_state->c = c;
|
||||
check_state->total_threads = bch_btree_chkthread_nr();
|
||||
check_state->key_idx = 0;
|
||||
spin_lock_init(&check_state->idx_lock);
|
||||
atomic_set(&check_state->started, 0);
|
||||
atomic_set(&check_state->enough, 0);
|
||||
init_waitqueue_head(&check_state->wait);
|
||||
|
||||
/*
|
||||
* Run multiple threads to check btree nodes in parallel,
|
||||
* if check_state->enough is non-zero, it means current
|
||||
* running check threads are enough, unncessary to create
|
||||
* more.
|
||||
*/
|
||||
for (i = 0; i < check_state->total_threads; i++) {
|
||||
/* fetch latest check_state->enough earlier */
|
||||
smp_mb__before_atomic();
|
||||
if (atomic_read(&check_state->enough))
|
||||
break;
|
||||
|
||||
check_state->infos[i].result = 0;
|
||||
check_state->infos[i].state = check_state;
|
||||
snprintf(name, sizeof(name), "bch_btrchk[%u]", i);
|
||||
atomic_inc(&check_state->started);
|
||||
|
||||
check_state->infos[i].thread =
|
||||
kthread_run(bch_btree_check_thread,
|
||||
&check_state->infos[i],
|
||||
name);
|
||||
if (IS_ERR(check_state->infos[i].thread)) {
|
||||
pr_err("fails to run thread bch_btrchk[%d]", i);
|
||||
for (--i; i >= 0; i--)
|
||||
kthread_stop(check_state->infos[i].thread);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
wait_event_interruptible(check_state->wait,
|
||||
atomic_read(&check_state->started) == 0 ||
|
||||
test_bit(CACHE_SET_IO_DISABLE, &c->flags));
|
||||
|
||||
for (i = 0; i < check_state->total_threads; i++) {
|
||||
if (check_state->infos[i].result) {
|
||||
ret = check_state->infos[i].result;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
kfree(check_state);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void bch_initial_gc_finish(struct cache_set *c)
|
||||
@@ -2401,7 +2506,7 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
|
||||
|
||||
while ((k = bch_btree_iter_next_filter(&iter, &b->keys,
|
||||
bch_ptr_bad))) {
|
||||
ret = btree(map_nodes_recurse, k, b,
|
||||
ret = bcache_btree(map_nodes_recurse, k, b,
|
||||
op, from, fn, flags);
|
||||
from = NULL;
|
||||
|
||||
@@ -2419,10 +2524,10 @@ static int bch_btree_map_nodes_recurse(struct btree *b, struct btree_op *op,
|
||||
int __bch_btree_map_nodes(struct btree_op *op, struct cache_set *c,
|
||||
struct bkey *from, btree_map_nodes_fn *fn, int flags)
|
||||
{
|
||||
return btree_root(map_nodes_recurse, c, op, from, fn, flags);
|
||||
return bcache_btree_root(map_nodes_recurse, c, op, from, fn, flags);
|
||||
}
|
||||
|
||||
static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
|
||||
int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
|
||||
struct bkey *from, btree_map_keys_fn *fn,
|
||||
int flags)
|
||||
{
|
||||
@@ -2435,7 +2540,8 @@ static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
|
||||
while ((k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad))) {
|
||||
ret = !b->level
|
||||
? fn(op, b, k)
|
||||
: btree(map_keys_recurse, k, b, op, from, fn, flags);
|
||||
: bcache_btree(map_keys_recurse, k,
|
||||
b, op, from, fn, flags);
|
||||
from = NULL;
|
||||
|
||||
if (ret != MAP_CONTINUE)
|
||||
@@ -2452,7 +2558,7 @@ static int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
|
||||
int bch_btree_map_keys(struct btree_op *op, struct cache_set *c,
|
||||
struct bkey *from, btree_map_keys_fn *fn, int flags)
|
||||
{
|
||||
return btree_root(map_keys_recurse, c, op, from, fn, flags);
|
||||
return bcache_btree_root(map_keys_recurse, c, op, from, fn, flags);
|
||||
}
|
||||
|
||||
/* Keybuf code */
|
||||
|
||||
@@ -145,6 +145,9 @@ struct btree {
|
||||
struct bio *bio;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
#define BTREE_FLAG(flag) \
|
||||
static inline bool btree_node_ ## flag(struct btree *b) \
|
||||
{ return test_bit(BTREE_NODE_ ## flag, &b->flags); } \
|
||||
@@ -216,6 +219,25 @@ struct btree_op {
|
||||
unsigned int insert_collision:1;
|
||||
};
|
||||
|
||||
struct btree_check_state;
|
||||
struct btree_check_info {
|
||||
struct btree_check_state *state;
|
||||
struct task_struct *thread;
|
||||
int result;
|
||||
};
|
||||
|
||||
#define BCH_BTR_CHKTHREAD_MAX 64
|
||||
struct btree_check_state {
|
||||
struct cache_set *c;
|
||||
int total_threads;
|
||||
int key_idx;
|
||||
spinlock_t idx_lock;
|
||||
atomic_t started;
|
||||
atomic_t enough;
|
||||
wait_queue_head_t wait;
|
||||
struct btree_check_info infos[BCH_BTR_CHKTHREAD_MAX];
|
||||
};
|
||||
|
||||
static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level)
|
||||
{
|
||||
memset(op, 0, sizeof(struct btree_op));
|
||||
@@ -284,6 +306,65 @@ static inline void force_wake_up_gc(struct cache_set *c)
|
||||
wake_up_gc(c);
|
||||
}
|
||||
|
||||
/*
|
||||
* These macros are for recursing down the btree - they handle the details of
|
||||
* locking and looking up nodes in the cache for you. They're best treated as
|
||||
* mere syntax when reading code that uses them.
|
||||
*
|
||||
* op->lock determines whether we take a read or a write lock at a given depth.
|
||||
* If you've got a read lock and find that you need a write lock (i.e. you're
|
||||
* going to have to split), set op->lock and return -EINTR; btree_root() will
|
||||
* call you again and you'll have the correct lock.
|
||||
*/
|
||||
|
||||
/**
|
||||
* btree - recurse down the btree on a specified key
|
||||
* @fn: function to call, which will be passed the child node
|
||||
* @key: key to recurse on
|
||||
* @b: parent btree node
|
||||
* @op: pointer to struct btree_op
|
||||
*/
|
||||
#define bcache_btree(fn, key, b, op, ...) \
|
||||
({ \
|
||||
int _r, l = (b)->level - 1; \
|
||||
bool _w = l <= (op)->lock; \
|
||||
struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
|
||||
_w, b); \
|
||||
if (!IS_ERR(_child)) { \
|
||||
_r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
|
||||
rw_unlock(_w, _child); \
|
||||
} else \
|
||||
_r = PTR_ERR(_child); \
|
||||
_r; \
|
||||
})
|
||||
|
||||
/**
|
||||
* btree_root - call a function on the root of the btree
|
||||
* @fn: function to call, which will be passed the child node
|
||||
* @c: cache set
|
||||
* @op: pointer to struct btree_op
|
||||
*/
|
||||
#define bcache_btree_root(fn, c, op, ...) \
|
||||
({ \
|
||||
int _r = -EINTR; \
|
||||
do { \
|
||||
struct btree *_b = (c)->root; \
|
||||
bool _w = insert_lock(op, _b); \
|
||||
rw_lock(_w, _b, _b->level); \
|
||||
if (_b == (c)->root && \
|
||||
_w == insert_lock(op, _b)) { \
|
||||
_r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
|
||||
} \
|
||||
rw_unlock(_w, _b); \
|
||||
bch_cannibalize_unlock(c); \
|
||||
if (_r == -EINTR) \
|
||||
schedule(); \
|
||||
} while (_r == -EINTR); \
|
||||
\
|
||||
finish_wait(&(c)->btree_cache_wait, &(op)->wait); \
|
||||
_r; \
|
||||
})
|
||||
|
||||
#define MAP_DONE 0
|
||||
#define MAP_CONTINUE 1
|
||||
|
||||
@@ -314,6 +395,9 @@ typedef int (btree_map_keys_fn)(struct btree_op *op, struct btree *b,
|
||||
struct bkey *k);
|
||||
int bch_btree_map_keys(struct btree_op *op, struct cache_set *c,
|
||||
struct bkey *from, btree_map_keys_fn *fn, int flags);
|
||||
int bch_btree_map_keys_recurse(struct btree *b, struct btree_op *op,
|
||||
struct bkey *from, btree_map_keys_fn *fn,
|
||||
int flags);
|
||||
|
||||
typedef bool (keybuf_pred_fn)(struct keybuf *buf, struct bkey *k);
|
||||
|
||||
|
||||
@@ -154,7 +154,7 @@ static ssize_t bch_snprint_string_list(char *buf,
|
||||
size_t i;
|
||||
|
||||
for (i = 0; list[i]; i++)
|
||||
out += snprintf(out, buf + size - out,
|
||||
out += scnprintf(out, buf + size - out,
|
||||
i == selected ? "[%s] " : "%s ", list[i]);
|
||||
|
||||
out[-1] = '\n';
|
||||
|
||||
@@ -183,7 +183,7 @@ static void update_writeback_rate(struct work_struct *work)
|
||||
*/
|
||||
set_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
|
||||
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
|
||||
smp_mb();
|
||||
smp_mb__after_atomic();
|
||||
|
||||
/*
|
||||
* CACHE_SET_IO_DISABLE might be set via sysfs interface,
|
||||
@@ -193,7 +193,7 @@ static void update_writeback_rate(struct work_struct *work)
|
||||
test_bit(CACHE_SET_IO_DISABLE, &c->flags)) {
|
||||
clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
|
||||
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
|
||||
smp_mb();
|
||||
smp_mb__after_atomic();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -229,7 +229,7 @@ static void update_writeback_rate(struct work_struct *work)
|
||||
*/
|
||||
clear_bit(BCACHE_DEV_RATE_DW_RUNNING, &dc->disk.flags);
|
||||
/* paired with where BCACHE_DEV_RATE_DW_RUNNING is tested */
|
||||
smp_mb();
|
||||
smp_mb__after_atomic();
|
||||
}
|
||||
|
||||
static unsigned int writeback_delay(struct cached_dev *dc,
|
||||
@@ -785,7 +785,9 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b,
|
||||
return MAP_CONTINUE;
|
||||
}
|
||||
|
||||
void bch_sectors_dirty_init(struct bcache_device *d)
|
||||
static int bch_root_node_dirty_init(struct cache_set *c,
|
||||
struct bcache_device *d,
|
||||
struct bkey *k)
|
||||
{
|
||||
struct sectors_dirty_init op;
|
||||
int ret;
|
||||
@@ -796,8 +798,13 @@ void bch_sectors_dirty_init(struct bcache_device *d)
|
||||
op.start = KEY(op.inode, 0, 0);
|
||||
|
||||
do {
|
||||
ret = bch_btree_map_keys(&op.op, d->c, &op.start,
|
||||
sectors_dirty_init_fn, 0);
|
||||
ret = bcache_btree(map_keys_recurse,
|
||||
k,
|
||||
c->root,
|
||||
&op.op,
|
||||
&op.start,
|
||||
sectors_dirty_init_fn,
|
||||
0);
|
||||
if (ret == -EAGAIN)
|
||||
schedule_timeout_interruptible(
|
||||
msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
|
||||
@@ -806,6 +813,151 @@ void bch_sectors_dirty_init(struct bcache_device *d)
|
||||
break;
|
||||
}
|
||||
} while (ret == -EAGAIN);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int bch_dirty_init_thread(void *arg)
|
||||
{
|
||||
struct dirty_init_thrd_info *info = arg;
|
||||
struct bch_dirty_init_state *state = info->state;
|
||||
struct cache_set *c = state->c;
|
||||
struct btree_iter iter;
|
||||
struct bkey *k, *p;
|
||||
int cur_idx, prev_idx, skip_nr;
|
||||
int i;
|
||||
|
||||
k = p = NULL;
|
||||
i = 0;
|
||||
cur_idx = prev_idx = 0;
|
||||
|
||||
bch_btree_iter_init(&c->root->keys, &iter, NULL);
|
||||
k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
|
||||
BUG_ON(!k);
|
||||
|
||||
p = k;
|
||||
|
||||
while (k) {
|
||||
spin_lock(&state->idx_lock);
|
||||
cur_idx = state->key_idx;
|
||||
state->key_idx++;
|
||||
spin_unlock(&state->idx_lock);
|
||||
|
||||
skip_nr = cur_idx - prev_idx;
|
||||
|
||||
while (skip_nr) {
|
||||
k = bch_btree_iter_next_filter(&iter,
|
||||
&c->root->keys,
|
||||
bch_ptr_bad);
|
||||
if (k)
|
||||
p = k;
|
||||
else {
|
||||
atomic_set(&state->enough, 1);
|
||||
/* Update state->enough earlier */
|
||||
smp_mb__after_atomic();
|
||||
goto out;
|
||||
}
|
||||
skip_nr--;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
if (p) {
|
||||
if (bch_root_node_dirty_init(c, state->d, p) < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
p = NULL;
|
||||
prev_idx = cur_idx;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
out:
|
||||
/* In order to wake up state->wait in time */
|
||||
smp_mb__before_atomic();
|
||||
if (atomic_dec_and_test(&state->started))
|
||||
wake_up(&state->wait);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bch_btre_dirty_init_thread_nr(void)
|
||||
{
|
||||
int n = num_online_cpus()/2;
|
||||
|
||||
if (n == 0)
|
||||
n = 1;
|
||||
else if (n > BCH_DIRTY_INIT_THRD_MAX)
|
||||
n = BCH_DIRTY_INIT_THRD_MAX;
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
void bch_sectors_dirty_init(struct bcache_device *d)
|
||||
{
|
||||
int i;
|
||||
struct bkey *k = NULL;
|
||||
struct btree_iter iter;
|
||||
struct sectors_dirty_init op;
|
||||
struct cache_set *c = d->c;
|
||||
struct bch_dirty_init_state *state;
|
||||
char name[32];
|
||||
|
||||
/* Just count root keys if no leaf node */
|
||||
if (c->root->level == 0) {
|
||||
bch_btree_op_init(&op.op, -1);
|
||||
op.inode = d->id;
|
||||
op.count = 0;
|
||||
op.start = KEY(op.inode, 0, 0);
|
||||
|
||||
for_each_key_filter(&c->root->keys,
|
||||
k, &iter, bch_ptr_invalid)
|
||||
sectors_dirty_init_fn(&op.op, c->root, k);
|
||||
return;
|
||||
}
|
||||
|
||||
state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL);
|
||||
if (!state) {
|
||||
pr_warn("sectors dirty init failed: cannot allocate memory");
|
||||
return;
|
||||
}
|
||||
|
||||
state->c = c;
|
||||
state->d = d;
|
||||
state->total_threads = bch_btre_dirty_init_thread_nr();
|
||||
state->key_idx = 0;
|
||||
spin_lock_init(&state->idx_lock);
|
||||
atomic_set(&state->started, 0);
|
||||
atomic_set(&state->enough, 0);
|
||||
init_waitqueue_head(&state->wait);
|
||||
|
||||
for (i = 0; i < state->total_threads; i++) {
|
||||
/* Fetch latest state->enough earlier */
|
||||
smp_mb__before_atomic();
|
||||
if (atomic_read(&state->enough))
|
||||
break;
|
||||
|
||||
state->infos[i].state = state;
|
||||
atomic_inc(&state->started);
|
||||
snprintf(name, sizeof(name), "bch_dirty_init[%d]", i);
|
||||
|
||||
state->infos[i].thread =
|
||||
kthread_run(bch_dirty_init_thread,
|
||||
&state->infos[i],
|
||||
name);
|
||||
if (IS_ERR(state->infos[i].thread)) {
|
||||
pr_err("fails to run thread bch_dirty_init[%d]", i);
|
||||
for (--i; i >= 0; i--)
|
||||
kthread_stop(state->infos[i].thread);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
wait_event_interruptible(state->wait,
|
||||
atomic_read(&state->started) == 0 ||
|
||||
test_bit(CACHE_SET_IO_DISABLE, &c->flags));
|
||||
|
||||
out:
|
||||
kfree(state);
|
||||
}
|
||||
|
||||
void bch_cached_dev_writeback_init(struct cached_dev *dc)
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
#define BCH_AUTO_GC_DIRTY_THRESHOLD 50
|
||||
|
||||
#define BCH_DIRTY_INIT_THRD_MAX 64
|
||||
/*
|
||||
* 14 (16384ths) is chosen here as something that each backing device
|
||||
* should be a reasonable fraction of the share, and not to blow up
|
||||
@@ -23,6 +24,24 @@
|
||||
*/
|
||||
#define WRITEBACK_SHARE_SHIFT 14
|
||||
|
||||
struct bch_dirty_init_state;
|
||||
struct dirty_init_thrd_info {
|
||||
struct bch_dirty_init_state *state;
|
||||
struct task_struct *thread;
|
||||
};
|
||||
|
||||
struct bch_dirty_init_state {
|
||||
struct cache_set *c;
|
||||
struct bcache_device *d;
|
||||
int total_threads;
|
||||
int key_idx;
|
||||
spinlock_t idx_lock;
|
||||
atomic_t started;
|
||||
atomic_t enough;
|
||||
wait_queue_head_t wait;
|
||||
struct dirty_init_thrd_info infos[BCH_DIRTY_INIT_THRD_MAX];
|
||||
};
|
||||
|
||||
static inline uint64_t bcache_dev_sectors_dirty(struct bcache_device *d)
|
||||
{
|
||||
uint64_t i, ret = 0;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user