You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block updates and fixes from Jens Axboe: - NVMe updates and fixes that missed the first pull request. This includes bug fixes, and support for autonomous power management. - Fix from Christoph for missing clear of the request payload, causing a problem with (at least) the storvsc driver. - Further fixes for the queue/bdi life time issues from Jan. - The Kconfig mq scheduler update from me. - Fixing a use-after-free in dm-rq, spotted by Bart, introduced in this merge window. - Three fixes for nbd from Josef. - Bug fix from Omar, fixing a bug in sas transport code that oopses when bsg ioctls were used. From Omar. - Improvements to the queue restart and tag wait from from Omar. - Set of fixes for the sed/opal code from Scott. - Three trivial patches to cciss from Tobin * 'for-linus' of git://git.kernel.dk/linux-block: (41 commits) dm-rq: don't dereference request payload after ending request blk-mq-sched: separate mark hctx and queue restart operations blk-mq: use sbq wait queues instead of restart for driver tags block/sed-opal: Propagate original error message to userland. nvme/pci: re-check security protocol support after reset block/sed-opal: Introduce free_opal_dev to free the structure and clean up state nvme: detect NVMe controller in recent MacBooks nvme-rdma: add support for host_traddr nvmet-rdma: Fix error handling nvmet-rdma: use nvme cm status helper nvme-rdma: move nvme cm status helper to .h file nvme-fc: don't bother to validate ioccsz and iorcsz nvme/pci: No special case for queue busy on IO nvme/core: Fix race kicking freed request_queue nvme/pci: Disable on removal when disconnected nvme: Enable autonomous power state transitions nvme: Add a quirk mechanism that uses identify_ctrl nvme: make nvmf_register_transport require a create_ctrl callback nvme: Use CNS as 8-bit field and avoid endianness conversion nvme: add semicolon in nvme_command setting ...
This commit is contained in:
@@ -69,50 +69,6 @@ config MQ_IOSCHED_DEADLINE
|
||||
---help---
|
||||
MQ version of the deadline IO scheduler.
|
||||
|
||||
config MQ_IOSCHED_NONE
|
||||
bool
|
||||
default y
|
||||
|
||||
choice
|
||||
prompt "Default single-queue blk-mq I/O scheduler"
|
||||
default DEFAULT_SQ_NONE
|
||||
help
|
||||
Select the I/O scheduler which will be used by default for blk-mq
|
||||
managed block devices with a single queue.
|
||||
|
||||
config DEFAULT_SQ_DEADLINE
|
||||
bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
|
||||
|
||||
config DEFAULT_SQ_NONE
|
||||
bool "None"
|
||||
|
||||
endchoice
|
||||
|
||||
config DEFAULT_SQ_IOSCHED
|
||||
string
|
||||
default "mq-deadline" if DEFAULT_SQ_DEADLINE
|
||||
default "none" if DEFAULT_SQ_NONE
|
||||
|
||||
choice
|
||||
prompt "Default multi-queue blk-mq I/O scheduler"
|
||||
default DEFAULT_MQ_NONE
|
||||
help
|
||||
Select the I/O scheduler which will be used by default for blk-mq
|
||||
managed block devices with multiple queues.
|
||||
|
||||
config DEFAULT_MQ_DEADLINE
|
||||
bool "MQ Deadline" if MQ_IOSCHED_DEADLINE=y
|
||||
|
||||
config DEFAULT_MQ_NONE
|
||||
bool "None"
|
||||
|
||||
endchoice
|
||||
|
||||
config DEFAULT_MQ_IOSCHED
|
||||
string
|
||||
default "mq-deadline" if DEFAULT_MQ_DEADLINE
|
||||
default "none" if DEFAULT_MQ_NONE
|
||||
|
||||
endmenu
|
||||
|
||||
endif
|
||||
|
||||
+8
-21
@@ -205,7 +205,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
|
||||
* needing a restart in that case.
|
||||
*/
|
||||
if (!list_empty(&rq_list)) {
|
||||
blk_mq_sched_mark_restart(hctx);
|
||||
blk_mq_sched_mark_restart_hctx(hctx);
|
||||
did_work = blk_mq_dispatch_rq_list(hctx, &rq_list);
|
||||
} else if (!has_sched_dispatch) {
|
||||
blk_mq_flush_busy_ctxs(hctx, &rq_list);
|
||||
@@ -331,20 +331,16 @@ static void blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
|
||||
|
||||
void blk_mq_sched_restart_queues(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
unsigned int i;
|
||||
|
||||
if (!(hctx->flags & BLK_MQ_F_TAG_SHARED))
|
||||
if (test_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) {
|
||||
if (test_and_clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags)) {
|
||||
queue_for_each_hw_ctx(q, hctx, i)
|
||||
blk_mq_sched_restart_hctx(hctx);
|
||||
}
|
||||
} else {
|
||||
blk_mq_sched_restart_hctx(hctx);
|
||||
else {
|
||||
struct request_queue *q = hctx->queue;
|
||||
|
||||
if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
|
||||
return;
|
||||
|
||||
clear_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
|
||||
|
||||
queue_for_each_hw_ctx(q, hctx, i)
|
||||
blk_mq_sched_restart_hctx(hctx);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -498,15 +494,6 @@ int blk_mq_sched_init(struct request_queue *q)
|
||||
{
|
||||
int ret;
|
||||
|
||||
#if defined(CONFIG_DEFAULT_SQ_NONE)
|
||||
if (q->nr_hw_queues == 1)
|
||||
return 0;
|
||||
#endif
|
||||
#if defined(CONFIG_DEFAULT_MQ_NONE)
|
||||
if (q->nr_hw_queues > 1)
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
mutex_lock(&q->sysfs_lock);
|
||||
ret = elevator_init(q, NULL);
|
||||
mutex_unlock(&q->sysfs_lock);
|
||||
|
||||
+18
-8
@@ -122,17 +122,27 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline void blk_mq_sched_mark_restart(struct blk_mq_hw_ctx *hctx)
|
||||
/*
|
||||
* Mark a hardware queue as needing a restart.
|
||||
*/
|
||||
static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
|
||||
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
||||
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
||||
if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
|
||||
struct request_queue *q = hctx->queue;
|
||||
}
|
||||
|
||||
if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
|
||||
set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Mark a hardware queue and the request queue it belongs to as needing a
|
||||
* restart.
|
||||
*/
|
||||
static inline void blk_mq_sched_mark_restart_queue(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
|
||||
if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
|
||||
set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
|
||||
if (!test_bit(QUEUE_FLAG_RESTART, &q->queue_flags))
|
||||
set_bit(QUEUE_FLAG_RESTART, &q->queue_flags);
|
||||
}
|
||||
|
||||
static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
|
||||
|
||||
+55
-9
@@ -904,6 +904,44 @@ static bool reorder_tags_to_front(struct list_head *list)
|
||||
return first != NULL;
|
||||
}
|
||||
|
||||
static int blk_mq_dispatch_wake(wait_queue_t *wait, unsigned mode, int flags,
|
||||
void *key)
|
||||
{
|
||||
struct blk_mq_hw_ctx *hctx;
|
||||
|
||||
hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
|
||||
|
||||
list_del(&wait->task_list);
|
||||
clear_bit_unlock(BLK_MQ_S_TAG_WAITING, &hctx->state);
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static bool blk_mq_dispatch_wait_add(struct blk_mq_hw_ctx *hctx)
|
||||
{
|
||||
struct sbq_wait_state *ws;
|
||||
|
||||
/*
|
||||
* The TAG_WAITING bit serves as a lock protecting hctx->dispatch_wait.
|
||||
* The thread which wins the race to grab this bit adds the hardware
|
||||
* queue to the wait queue.
|
||||
*/
|
||||
if (test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state) ||
|
||||
test_and_set_bit_lock(BLK_MQ_S_TAG_WAITING, &hctx->state))
|
||||
return false;
|
||||
|
||||
init_waitqueue_func_entry(&hctx->dispatch_wait, blk_mq_dispatch_wake);
|
||||
ws = bt_wait_ptr(&hctx->tags->bitmap_tags, hctx);
|
||||
|
||||
/*
|
||||
* As soon as this returns, it's no longer safe to fiddle with
|
||||
* hctx->dispatch_wait, since a completion can wake up the wait queue
|
||||
* and unlock the bit.
|
||||
*/
|
||||
add_wait_queue(&ws->wait, &hctx->dispatch_wait);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
|
||||
{
|
||||
struct request_queue *q = hctx->queue;
|
||||
@@ -931,15 +969,22 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We failed getting a driver tag. Mark the queue(s)
|
||||
* as needing a restart. Retry getting a tag again,
|
||||
* in case the needed IO completed right before we
|
||||
* marked the queue as needing a restart.
|
||||
* The initial allocation attempt failed, so we need to
|
||||
* rerun the hardware queue when a tag is freed.
|
||||
*/
|
||||
blk_mq_sched_mark_restart(hctx);
|
||||
if (!blk_mq_get_driver_tag(rq, &hctx, false))
|
||||
if (blk_mq_dispatch_wait_add(hctx)) {
|
||||
/*
|
||||
* It's possible that a tag was freed in the
|
||||
* window between the allocation failure and
|
||||
* adding the hardware queue to the wait queue.
|
||||
*/
|
||||
if (!blk_mq_get_driver_tag(rq, &hctx, false))
|
||||
break;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
list_del_init(&rq->queuelist);
|
||||
|
||||
bd.rq = rq;
|
||||
@@ -995,10 +1040,11 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
|
||||
*
|
||||
* blk_mq_run_hw_queue() already checks the STOPPED bit
|
||||
*
|
||||
* If RESTART is set, then let completion restart the queue
|
||||
* instead of potentially looping here.
|
||||
* If RESTART or TAG_WAITING is set, then let completion restart
|
||||
* the queue instead of potentially looping here.
|
||||
*/
|
||||
if (!blk_mq_sched_needs_restart(hctx))
|
||||
if (!blk_mq_sched_needs_restart(hctx) &&
|
||||
!test_bit(BLK_MQ_S_TAG_WAITING, &hctx->state))
|
||||
blk_mq_run_hw_queue(hctx, true);
|
||||
}
|
||||
|
||||
|
||||
+13
-6
@@ -220,17 +220,24 @@ int elevator_init(struct request_queue *q, char *name)
|
||||
}
|
||||
|
||||
if (!e) {
|
||||
if (q->mq_ops && q->nr_hw_queues == 1)
|
||||
e = elevator_get(CONFIG_DEFAULT_SQ_IOSCHED, false);
|
||||
else if (q->mq_ops)
|
||||
e = elevator_get(CONFIG_DEFAULT_MQ_IOSCHED, false);
|
||||
else
|
||||
/*
|
||||
* For blk-mq devices, we default to using mq-deadline,
|
||||
* if available, for single queue devices. If deadline
|
||||
* isn't available OR we have multiple queues, default
|
||||
* to "none".
|
||||
*/
|
||||
if (q->mq_ops) {
|
||||
if (q->nr_hw_queues == 1)
|
||||
e = elevator_get("mq-deadline", false);
|
||||
if (!e)
|
||||
return 0;
|
||||
} else
|
||||
e = elevator_get(CONFIG_DEFAULT_IOSCHED, false);
|
||||
|
||||
if (!e) {
|
||||
printk(KERN_ERR
|
||||
"Default I/O scheduler not found. " \
|
||||
"Using noop/none.\n");
|
||||
"Using noop.\n");
|
||||
e = elevator_get("noop", false);
|
||||
}
|
||||
}
|
||||
|
||||
+2
-2
@@ -669,14 +669,14 @@ void del_gendisk(struct gendisk *disk)
|
||||
disk_part_iter_init(&piter, disk,
|
||||
DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
|
||||
while ((part = disk_part_iter_next(&piter))) {
|
||||
bdev_unhash_inode(MKDEV(disk->major,
|
||||
disk->first_minor + part->partno));
|
||||
invalidate_partition(disk, part->partno);
|
||||
bdev_unhash_inode(part_devt(part));
|
||||
delete_partition(disk, part->partno);
|
||||
}
|
||||
disk_part_iter_exit(&piter);
|
||||
|
||||
invalidate_partition(disk, 0);
|
||||
bdev_unhash_inode(disk_devt(disk));
|
||||
set_capacity(disk, 0);
|
||||
disk->flags &= ~GENHD_FL_UP;
|
||||
|
||||
|
||||
+257
-320
File diff suppressed because it is too large
Load Diff
+88
-94
File diff suppressed because it is too large
Load Diff
+153
-164
@@ -96,6 +96,10 @@ static int max_part;
|
||||
static struct workqueue_struct *recv_workqueue;
|
||||
static int part_shift;
|
||||
|
||||
static int nbd_dev_dbg_init(struct nbd_device *nbd);
|
||||
static void nbd_dev_dbg_close(struct nbd_device *nbd);
|
||||
|
||||
|
||||
static inline struct device *nbd_to_dev(struct nbd_device *nbd)
|
||||
{
|
||||
return disk_to_dev(nbd->disk);
|
||||
@@ -120,7 +124,7 @@ static const char *nbdcmd_to_ascii(int cmd)
|
||||
|
||||
static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
|
||||
{
|
||||
bdev->bd_inode->i_size = 0;
|
||||
bd_set_size(bdev, 0);
|
||||
set_capacity(nbd->disk, 0);
|
||||
kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
|
||||
|
||||
@@ -129,29 +133,20 @@ static int nbd_size_clear(struct nbd_device *nbd, struct block_device *bdev)
|
||||
|
||||
static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev)
|
||||
{
|
||||
if (!nbd_is_connected(nbd))
|
||||
return;
|
||||
|
||||
bdev->bd_inode->i_size = nbd->bytesize;
|
||||
blk_queue_logical_block_size(nbd->disk->queue, nbd->blksize);
|
||||
blk_queue_physical_block_size(nbd->disk->queue, nbd->blksize);
|
||||
bd_set_size(bdev, nbd->bytesize);
|
||||
set_capacity(nbd->disk, nbd->bytesize >> 9);
|
||||
kobject_uevent(&nbd_to_dev(nbd)->kobj, KOBJ_CHANGE);
|
||||
}
|
||||
|
||||
static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
|
||||
static void nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
|
||||
loff_t blocksize, loff_t nr_blocks)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = set_blocksize(bdev, blocksize);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
nbd->blksize = blocksize;
|
||||
nbd->bytesize = blocksize * nr_blocks;
|
||||
|
||||
nbd_size_update(nbd, bdev);
|
||||
|
||||
return 0;
|
||||
if (nbd_is_connected(nbd))
|
||||
nbd_size_update(nbd, bdev);
|
||||
}
|
||||
|
||||
static void nbd_end_request(struct nbd_cmd *cmd)
|
||||
@@ -571,10 +566,17 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
return BLK_MQ_RQ_QUEUE_OK;
|
||||
}
|
||||
|
||||
static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock)
|
||||
static int nbd_add_socket(struct nbd_device *nbd, struct block_device *bdev,
|
||||
unsigned long arg)
|
||||
{
|
||||
struct socket *sock;
|
||||
struct nbd_sock **socks;
|
||||
struct nbd_sock *nsock;
|
||||
int err;
|
||||
|
||||
sock = sockfd_lookup(arg, &err);
|
||||
if (!sock)
|
||||
return err;
|
||||
|
||||
if (!nbd->task_setup)
|
||||
nbd->task_setup = current;
|
||||
@@ -598,26 +600,20 @@ static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock)
|
||||
nsock->sock = sock;
|
||||
socks[nbd->num_connections++] = nsock;
|
||||
|
||||
if (max_part)
|
||||
bdev->bd_invalidated = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Reset all properties of an NBD device */
|
||||
static void nbd_reset(struct nbd_device *nbd)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nbd->num_connections; i++)
|
||||
kfree(nbd->socks[i]);
|
||||
kfree(nbd->socks);
|
||||
nbd->socks = NULL;
|
||||
nbd->runtime_flags = 0;
|
||||
nbd->blksize = 1024;
|
||||
nbd->bytesize = 0;
|
||||
set_capacity(nbd->disk, 0);
|
||||
nbd->flags = 0;
|
||||
nbd->tag_set.timeout = 0;
|
||||
nbd->num_connections = 0;
|
||||
nbd->task_setup = NULL;
|
||||
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
|
||||
}
|
||||
|
||||
@@ -659,81 +655,143 @@ static void send_disconnects(struct nbd_device *nbd)
|
||||
}
|
||||
}
|
||||
|
||||
static int nbd_dev_dbg_init(struct nbd_device *nbd);
|
||||
static void nbd_dev_dbg_close(struct nbd_device *nbd);
|
||||
static int nbd_disconnect(struct nbd_device *nbd, struct block_device *bdev)
|
||||
{
|
||||
dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
|
||||
if (!nbd->socks)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_unlock(&nbd->config_lock);
|
||||
fsync_bdev(bdev);
|
||||
mutex_lock(&nbd->config_lock);
|
||||
|
||||
/* Check again after getting mutex back. */
|
||||
if (!nbd->socks)
|
||||
return -EINVAL;
|
||||
|
||||
if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
|
||||
&nbd->runtime_flags))
|
||||
send_disconnects(nbd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nbd_clear_sock(struct nbd_device *nbd, struct block_device *bdev)
|
||||
{
|
||||
sock_shutdown(nbd);
|
||||
nbd_clear_que(nbd);
|
||||
kill_bdev(bdev);
|
||||
nbd_bdev_reset(bdev);
|
||||
/*
|
||||
* We want to give the run thread a chance to wait for everybody
|
||||
* to clean up and then do it's own cleanup.
|
||||
*/
|
||||
if (!test_bit(NBD_RUNNING, &nbd->runtime_flags) &&
|
||||
nbd->num_connections) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nbd->num_connections; i++)
|
||||
kfree(nbd->socks[i]);
|
||||
kfree(nbd->socks);
|
||||
nbd->socks = NULL;
|
||||
nbd->num_connections = 0;
|
||||
}
|
||||
nbd->task_setup = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int nbd_start_device(struct nbd_device *nbd, struct block_device *bdev)
|
||||
{
|
||||
struct recv_thread_args *args;
|
||||
int num_connections = nbd->num_connections;
|
||||
int error = 0, i;
|
||||
|
||||
if (nbd->task_recv)
|
||||
return -EBUSY;
|
||||
if (!nbd->socks)
|
||||
return -EINVAL;
|
||||
if (num_connections > 1 &&
|
||||
!(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
|
||||
dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
|
||||
error = -EINVAL;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
set_bit(NBD_RUNNING, &nbd->runtime_flags);
|
||||
blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
|
||||
args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
|
||||
if (!args) {
|
||||
error = -ENOMEM;
|
||||
goto out_err;
|
||||
}
|
||||
nbd->task_recv = current;
|
||||
mutex_unlock(&nbd->config_lock);
|
||||
|
||||
nbd_parse_flags(nbd, bdev);
|
||||
|
||||
error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
|
||||
if (error) {
|
||||
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
|
||||
goto out_recv;
|
||||
}
|
||||
|
||||
nbd_size_update(nbd, bdev);
|
||||
|
||||
nbd_dev_dbg_init(nbd);
|
||||
for (i = 0; i < num_connections; i++) {
|
||||
sk_set_memalloc(nbd->socks[i]->sock->sk);
|
||||
atomic_inc(&nbd->recv_threads);
|
||||
INIT_WORK(&args[i].work, recv_work);
|
||||
args[i].nbd = nbd;
|
||||
args[i].index = i;
|
||||
queue_work(recv_workqueue, &args[i].work);
|
||||
}
|
||||
wait_event_interruptible(nbd->recv_wq,
|
||||
atomic_read(&nbd->recv_threads) == 0);
|
||||
for (i = 0; i < num_connections; i++)
|
||||
flush_work(&args[i].work);
|
||||
nbd_dev_dbg_close(nbd);
|
||||
nbd_size_clear(nbd, bdev);
|
||||
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
|
||||
out_recv:
|
||||
mutex_lock(&nbd->config_lock);
|
||||
nbd->task_recv = NULL;
|
||||
out_err:
|
||||
clear_bit(NBD_RUNNING, &nbd->runtime_flags);
|
||||
nbd_clear_sock(nbd, bdev);
|
||||
|
||||
/* user requested, ignore socket errors */
|
||||
if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
|
||||
error = 0;
|
||||
if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
|
||||
error = -ETIMEDOUT;
|
||||
|
||||
nbd_reset(nbd);
|
||||
return error;
|
||||
}
|
||||
|
||||
/* Must be called with config_lock held */
|
||||
static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
||||
unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
switch (cmd) {
|
||||
case NBD_DISCONNECT: {
|
||||
dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
|
||||
if (!nbd->socks)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_unlock(&nbd->config_lock);
|
||||
fsync_bdev(bdev);
|
||||
mutex_lock(&nbd->config_lock);
|
||||
|
||||
/* Check again after getting mutex back. */
|
||||
if (!nbd->socks)
|
||||
return -EINVAL;
|
||||
|
||||
if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED,
|
||||
&nbd->runtime_flags))
|
||||
send_disconnects(nbd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
case NBD_DISCONNECT:
|
||||
return nbd_disconnect(nbd, bdev);
|
||||
case NBD_CLEAR_SOCK:
|
||||
sock_shutdown(nbd);
|
||||
nbd_clear_que(nbd);
|
||||
kill_bdev(bdev);
|
||||
nbd_bdev_reset(bdev);
|
||||
/*
|
||||
* We want to give the run thread a chance to wait for everybody
|
||||
* to clean up and then do it's own cleanup.
|
||||
*/
|
||||
if (!test_bit(NBD_RUNNING, &nbd->runtime_flags)) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nbd->num_connections; i++)
|
||||
kfree(nbd->socks[i]);
|
||||
kfree(nbd->socks);
|
||||
nbd->socks = NULL;
|
||||
nbd->num_connections = 0;
|
||||
nbd->task_setup = NULL;
|
||||
}
|
||||
return nbd_clear_sock(nbd, bdev);
|
||||
case NBD_SET_SOCK:
|
||||
return nbd_add_socket(nbd, bdev, arg);
|
||||
case NBD_SET_BLKSIZE:
|
||||
nbd_size_set(nbd, bdev, arg,
|
||||
div_s64(nbd->bytesize, arg));
|
||||
return 0;
|
||||
|
||||
case NBD_SET_SOCK: {
|
||||
int err;
|
||||
struct socket *sock = sockfd_lookup(arg, &err);
|
||||
|
||||
if (!sock)
|
||||
return err;
|
||||
|
||||
err = nbd_add_socket(nbd, sock);
|
||||
if (!err && max_part)
|
||||
bdev->bd_invalidated = 1;
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
case NBD_SET_BLKSIZE: {
|
||||
loff_t bsize = div_s64(nbd->bytesize, arg);
|
||||
|
||||
return nbd_size_set(nbd, bdev, arg, bsize);
|
||||
}
|
||||
|
||||
case NBD_SET_SIZE:
|
||||
return nbd_size_set(nbd, bdev, nbd->blksize,
|
||||
div_s64(arg, nbd->blksize));
|
||||
|
||||
nbd_size_set(nbd, bdev, nbd->blksize,
|
||||
div_s64(arg, nbd->blksize));
|
||||
return 0;
|
||||
case NBD_SET_SIZE_BLOCKS:
|
||||
return nbd_size_set(nbd, bdev, nbd->blksize, arg);
|
||||
|
||||
nbd_size_set(nbd, bdev, nbd->blksize, arg);
|
||||
return 0;
|
||||
case NBD_SET_TIMEOUT:
|
||||
nbd->tag_set.timeout = arg * HZ;
|
||||
return 0;
|
||||
@@ -741,85 +799,14 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
||||
case NBD_SET_FLAGS:
|
||||
nbd->flags = arg;
|
||||
return 0;
|
||||
|
||||
case NBD_DO_IT: {
|
||||
struct recv_thread_args *args;
|
||||
int num_connections = nbd->num_connections;
|
||||
int error = 0, i;
|
||||
|
||||
if (nbd->task_recv)
|
||||
return -EBUSY;
|
||||
if (!nbd->socks)
|
||||
return -EINVAL;
|
||||
if (num_connections > 1 &&
|
||||
!(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) {
|
||||
dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n");
|
||||
error = -EINVAL;
|
||||
goto out_err;
|
||||
}
|
||||
|
||||
set_bit(NBD_RUNNING, &nbd->runtime_flags);
|
||||
blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections);
|
||||
args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL);
|
||||
if (!args) {
|
||||
error = -ENOMEM;
|
||||
goto out_err;
|
||||
}
|
||||
nbd->task_recv = current;
|
||||
mutex_unlock(&nbd->config_lock);
|
||||
|
||||
nbd_parse_flags(nbd, bdev);
|
||||
|
||||
error = device_create_file(disk_to_dev(nbd->disk), &pid_attr);
|
||||
if (error) {
|
||||
dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n");
|
||||
goto out_recv;
|
||||
}
|
||||
|
||||
nbd_size_update(nbd, bdev);
|
||||
|
||||
nbd_dev_dbg_init(nbd);
|
||||
for (i = 0; i < num_connections; i++) {
|
||||
sk_set_memalloc(nbd->socks[i]->sock->sk);
|
||||
atomic_inc(&nbd->recv_threads);
|
||||
INIT_WORK(&args[i].work, recv_work);
|
||||
args[i].nbd = nbd;
|
||||
args[i].index = i;
|
||||
queue_work(recv_workqueue, &args[i].work);
|
||||
}
|
||||
wait_event_interruptible(nbd->recv_wq,
|
||||
atomic_read(&nbd->recv_threads) == 0);
|
||||
for (i = 0; i < num_connections; i++)
|
||||
flush_work(&args[i].work);
|
||||
nbd_dev_dbg_close(nbd);
|
||||
nbd_size_clear(nbd, bdev);
|
||||
device_remove_file(disk_to_dev(nbd->disk), &pid_attr);
|
||||
out_recv:
|
||||
mutex_lock(&nbd->config_lock);
|
||||
nbd->task_recv = NULL;
|
||||
out_err:
|
||||
sock_shutdown(nbd);
|
||||
nbd_clear_que(nbd);
|
||||
kill_bdev(bdev);
|
||||
nbd_bdev_reset(bdev);
|
||||
|
||||
/* user requested, ignore socket errors */
|
||||
if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
|
||||
error = 0;
|
||||
if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
|
||||
error = -ETIMEDOUT;
|
||||
|
||||
nbd_reset(nbd);
|
||||
return error;
|
||||
}
|
||||
|
||||
case NBD_DO_IT:
|
||||
return nbd_start_device(nbd, bdev);
|
||||
case NBD_CLEAR_QUE:
|
||||
/*
|
||||
* This is for compatibility only. The queue is always cleared
|
||||
* by NBD_DO_IT or NBD_CLEAR_SOCK.
|
||||
*/
|
||||
return 0;
|
||||
|
||||
case NBD_PRINT_DEBUG:
|
||||
/*
|
||||
* For compatibility only, we no longer keep a list of
|
||||
@@ -1134,8 +1121,10 @@ static int __init nbd_init(void)
|
||||
if (!recv_workqueue)
|
||||
return -ENOMEM;
|
||||
|
||||
if (register_blkdev(NBD_MAJOR, "nbd"))
|
||||
if (register_blkdev(NBD_MAJOR, "nbd")) {
|
||||
destroy_workqueue(recv_workqueue);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
nbd_dbg_init();
|
||||
|
||||
|
||||
+4
-2
@@ -328,13 +328,15 @@ static void dm_softirq_done(struct request *rq)
|
||||
int rw;
|
||||
|
||||
if (!clone) {
|
||||
rq_end_stats(tio->md, rq);
|
||||
struct mapped_device *md = tio->md;
|
||||
|
||||
rq_end_stats(md, rq);
|
||||
rw = rq_data_dir(rq);
|
||||
if (!rq->q->mq_ops)
|
||||
blk_end_request_all(rq, tio->error);
|
||||
else
|
||||
blk_mq_end_request(rq, tio->error);
|
||||
rq_completed(tio->md, rw, false);
|
||||
rq_completed(md, rw, false);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
+250
-7
@@ -26,6 +26,7 @@
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/nvme_ioctl.h>
|
||||
#include <linux/t10-pi.h>
|
||||
#include <linux/pm_qos.h>
|
||||
#include <scsi/sg.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
@@ -56,6 +57,11 @@ EXPORT_SYMBOL_GPL(nvme_max_retries);
|
||||
static int nvme_char_major;
|
||||
module_param(nvme_char_major, int, 0);
|
||||
|
||||
static unsigned long default_ps_max_latency_us = 25000;
|
||||
module_param(default_ps_max_latency_us, ulong, 0644);
|
||||
MODULE_PARM_DESC(default_ps_max_latency_us,
|
||||
"max power saving latency for new devices; use PM QOS to change per device");
|
||||
|
||||
static LIST_HEAD(nvme_ctrl_list);
|
||||
static DEFINE_SPINLOCK(dev_list_lock);
|
||||
|
||||
@@ -560,7 +566,7 @@ int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
|
||||
|
||||
/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
|
||||
c.identify.opcode = nvme_admin_identify;
|
||||
c.identify.cns = cpu_to_le32(NVME_ID_CNS_CTRL);
|
||||
c.identify.cns = NVME_ID_CNS_CTRL;
|
||||
|
||||
*id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
|
||||
if (!*id)
|
||||
@@ -578,7 +584,7 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n
|
||||
struct nvme_command c = { };
|
||||
|
||||
c.identify.opcode = nvme_admin_identify;
|
||||
c.identify.cns = cpu_to_le32(NVME_ID_CNS_NS_ACTIVE_LIST);
|
||||
c.identify.cns = NVME_ID_CNS_NS_ACTIVE_LIST;
|
||||
c.identify.nsid = cpu_to_le32(nsid);
|
||||
return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000);
|
||||
}
|
||||
@@ -590,8 +596,9 @@ int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid,
|
||||
int error;
|
||||
|
||||
/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
|
||||
c.identify.opcode = nvme_admin_identify,
|
||||
c.identify.nsid = cpu_to_le32(nsid),
|
||||
c.identify.opcode = nvme_admin_identify;
|
||||
c.identify.nsid = cpu_to_le32(nsid);
|
||||
c.identify.cns = NVME_ID_CNS_NS;
|
||||
|
||||
*id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL);
|
||||
if (!*id)
|
||||
@@ -1251,6 +1258,176 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
|
||||
blk_queue_write_cache(q, vwc, vwc);
|
||||
}
|
||||
|
||||
static void nvme_configure_apst(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
/*
|
||||
* APST (Autonomous Power State Transition) lets us program a
|
||||
* table of power state transitions that the controller will
|
||||
* perform automatically. We configure it with a simple
|
||||
* heuristic: we are willing to spend at most 2% of the time
|
||||
* transitioning between power states. Therefore, when running
|
||||
* in any given state, we will enter the next lower-power
|
||||
* non-operational state after waiting 100 * (enlat + exlat)
|
||||
* microseconds, as long as that state's total latency is under
|
||||
* the requested maximum latency.
|
||||
*
|
||||
* We will not autonomously enter any non-operational state for
|
||||
* which the total latency exceeds ps_max_latency_us. Users
|
||||
* can set ps_max_latency_us to zero to turn off APST.
|
||||
*/
|
||||
|
||||
unsigned apste;
|
||||
struct nvme_feat_auto_pst *table;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* If APST isn't supported or if we haven't been initialized yet,
|
||||
* then don't do anything.
|
||||
*/
|
||||
if (!ctrl->apsta)
|
||||
return;
|
||||
|
||||
if (ctrl->npss > 31) {
|
||||
dev_warn(ctrl->device, "NPSS is invalid; not using APST\n");
|
||||
return;
|
||||
}
|
||||
|
||||
table = kzalloc(sizeof(*table), GFP_KERNEL);
|
||||
if (!table)
|
||||
return;
|
||||
|
||||
if (ctrl->ps_max_latency_us == 0) {
|
||||
/* Turn off APST. */
|
||||
apste = 0;
|
||||
} else {
|
||||
__le64 target = cpu_to_le64(0);
|
||||
int state;
|
||||
|
||||
/*
|
||||
* Walk through all states from lowest- to highest-power.
|
||||
* According to the spec, lower-numbered states use more
|
||||
* power. NPSS, despite the name, is the index of the
|
||||
* lowest-power state, not the number of states.
|
||||
*/
|
||||
for (state = (int)ctrl->npss; state >= 0; state--) {
|
||||
u64 total_latency_us, transition_ms;
|
||||
|
||||
if (target)
|
||||
table->entries[state] = target;
|
||||
|
||||
/*
|
||||
* Is this state a useful non-operational state for
|
||||
* higher-power states to autonomously transition to?
|
||||
*/
|
||||
if (!(ctrl->psd[state].flags &
|
||||
NVME_PS_FLAGS_NON_OP_STATE))
|
||||
continue;
|
||||
|
||||
total_latency_us =
|
||||
(u64)le32_to_cpu(ctrl->psd[state].entry_lat) +
|
||||
+ le32_to_cpu(ctrl->psd[state].exit_lat);
|
||||
if (total_latency_us > ctrl->ps_max_latency_us)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* This state is good. Use it as the APST idle
|
||||
* target for higher power states.
|
||||
*/
|
||||
transition_ms = total_latency_us + 19;
|
||||
do_div(transition_ms, 20);
|
||||
if (transition_ms > (1 << 24) - 1)
|
||||
transition_ms = (1 << 24) - 1;
|
||||
|
||||
target = cpu_to_le64((state << 3) |
|
||||
(transition_ms << 8));
|
||||
}
|
||||
|
||||
apste = 1;
|
||||
}
|
||||
|
||||
ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste,
|
||||
table, sizeof(*table), NULL);
|
||||
if (ret)
|
||||
dev_err(ctrl->device, "failed to set APST feature (%d)\n", ret);
|
||||
|
||||
kfree(table);
|
||||
}
|
||||
|
||||
static void nvme_set_latency_tolerance(struct device *dev, s32 val)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
|
||||
u64 latency;
|
||||
|
||||
switch (val) {
|
||||
case PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT:
|
||||
case PM_QOS_LATENCY_ANY:
|
||||
latency = U64_MAX;
|
||||
break;
|
||||
|
||||
default:
|
||||
latency = val;
|
||||
}
|
||||
|
||||
if (ctrl->ps_max_latency_us != latency) {
|
||||
ctrl->ps_max_latency_us = latency;
|
||||
nvme_configure_apst(ctrl);
|
||||
}
|
||||
}
|
||||
|
||||
struct nvme_core_quirk_entry {
|
||||
/*
|
||||
* NVMe model and firmware strings are padded with spaces. For
|
||||
* simplicity, strings in the quirk table are padded with NULLs
|
||||
* instead.
|
||||
*/
|
||||
u16 vid;
|
||||
const char *mn;
|
||||
const char *fr;
|
||||
unsigned long quirks;
|
||||
};
|
||||
|
||||
static const struct nvme_core_quirk_entry core_quirks[] = {
|
||||
/*
|
||||
* Seen on a Samsung "SM951 NVMe SAMSUNG 256GB": using APST causes
|
||||
* the controller to go out to lunch. It dies when the watchdog
|
||||
* timer reads CSTS and gets 0xffffffff.
|
||||
*/
|
||||
{
|
||||
.vid = 0x144d,
|
||||
.fr = "BXW75D0Q",
|
||||
.quirks = NVME_QUIRK_NO_APST,
|
||||
},
|
||||
};
|
||||
|
||||
/* match is null-terminated but idstr is space-padded. */
|
||||
static bool string_matches(const char *idstr, const char *match, size_t len)
|
||||
{
|
||||
size_t matchlen;
|
||||
|
||||
if (!match)
|
||||
return true;
|
||||
|
||||
matchlen = strlen(match);
|
||||
WARN_ON_ONCE(matchlen > len);
|
||||
|
||||
if (memcmp(idstr, match, matchlen))
|
||||
return false;
|
||||
|
||||
for (; matchlen < len; matchlen++)
|
||||
if (idstr[matchlen] != ' ')
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool quirk_matches(const struct nvme_id_ctrl *id,
|
||||
const struct nvme_core_quirk_entry *q)
|
||||
{
|
||||
return q->vid == le16_to_cpu(id->vid) &&
|
||||
string_matches(id->mn, q->mn, sizeof(id->mn)) &&
|
||||
string_matches(id->fr, q->fr, sizeof(id->fr));
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the cached copies of the Identify data and various controller
|
||||
* register in our nvme_ctrl structure. This should be called as soon as
|
||||
@@ -1262,6 +1439,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
|
||||
u64 cap;
|
||||
int ret, page_shift;
|
||||
u32 max_hw_sectors;
|
||||
u8 prev_apsta;
|
||||
|
||||
ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs);
|
||||
if (ret) {
|
||||
@@ -1285,6 +1463,24 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (!ctrl->identified) {
|
||||
/*
|
||||
* Check for quirks. Quirk can depend on firmware version,
|
||||
* so, in principle, the set of quirks present can change
|
||||
* across a reset. As a possible future enhancement, we
|
||||
* could re-scan for quirks every time we reinitialize
|
||||
* the device, but we'd have to make sure that the driver
|
||||
* behaves intelligently if the quirks change.
|
||||
*/
|
||||
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(core_quirks); i++) {
|
||||
if (quirk_matches(id, &core_quirks[i]))
|
||||
ctrl->quirks |= core_quirks[i].quirks;
|
||||
}
|
||||
}
|
||||
|
||||
ctrl->oacs = le16_to_cpu(id->oacs);
|
||||
ctrl->vid = le16_to_cpu(id->vid);
|
||||
ctrl->oncs = le16_to_cpup(&id->oncs);
|
||||
@@ -1305,6 +1501,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
|
||||
ctrl->sgls = le32_to_cpu(id->sgls);
|
||||
ctrl->kas = le16_to_cpu(id->kas);
|
||||
|
||||
ctrl->npss = id->npss;
|
||||
prev_apsta = ctrl->apsta;
|
||||
ctrl->apsta = (ctrl->quirks & NVME_QUIRK_NO_APST) ? 0 : id->apsta;
|
||||
memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
|
||||
|
||||
if (ctrl->ops->is_fabrics) {
|
||||
ctrl->icdoff = le16_to_cpu(id->icdoff);
|
||||
ctrl->ioccsz = le32_to_cpu(id->ioccsz);
|
||||
@@ -1328,6 +1529,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
|
||||
}
|
||||
|
||||
kfree(id);
|
||||
|
||||
if (ctrl->apsta && !prev_apsta)
|
||||
dev_pm_qos_expose_latency_tolerance(ctrl->device);
|
||||
else if (!ctrl->apsta && prev_apsta)
|
||||
dev_pm_qos_hide_latency_tolerance(ctrl->device);
|
||||
|
||||
nvme_configure_apst(ctrl);
|
||||
|
||||
ctrl->identified = true;
|
||||
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_init_identify);
|
||||
@@ -1577,6 +1788,29 @@ static ssize_t nvme_sysfs_show_transport(struct device *dev,
|
||||
}
|
||||
static DEVICE_ATTR(transport, S_IRUGO, nvme_sysfs_show_transport, NULL);
|
||||
|
||||
static ssize_t nvme_sysfs_show_state(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
|
||||
static const char *const state_name[] = {
|
||||
[NVME_CTRL_NEW] = "new",
|
||||
[NVME_CTRL_LIVE] = "live",
|
||||
[NVME_CTRL_RESETTING] = "resetting",
|
||||
[NVME_CTRL_RECONNECTING]= "reconnecting",
|
||||
[NVME_CTRL_DELETING] = "deleting",
|
||||
[NVME_CTRL_DEAD] = "dead",
|
||||
};
|
||||
|
||||
if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
|
||||
state_name[ctrl->state])
|
||||
return sprintf(buf, "%s\n", state_name[ctrl->state]);
|
||||
|
||||
return sprintf(buf, "unknown state\n");
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL);
|
||||
|
||||
static ssize_t nvme_sysfs_show_subsysnqn(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
@@ -1609,6 +1843,7 @@ static struct attribute *nvme_dev_attrs[] = {
|
||||
&dev_attr_transport.attr,
|
||||
&dev_attr_subsysnqn.attr,
|
||||
&dev_attr_address.attr,
|
||||
&dev_attr_state.attr,
|
||||
NULL
|
||||
};
|
||||
|
||||
@@ -2065,6 +2300,14 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
|
||||
list_add_tail(&ctrl->node, &nvme_ctrl_list);
|
||||
spin_unlock(&dev_list_lock);
|
||||
|
||||
/*
|
||||
* Initialize latency tolerance controls. The sysfs files won't
|
||||
* be visible to userspace unless the device actually supports APST.
|
||||
*/
|
||||
ctrl->device->power.set_latency_tolerance = nvme_set_latency_tolerance;
|
||||
dev_pm_qos_update_user_latency_tolerance(ctrl->device,
|
||||
min(default_ps_max_latency_us, (unsigned long)S32_MAX));
|
||||
|
||||
return 0;
|
||||
out_release_instance:
|
||||
nvme_release_instance(ctrl);
|
||||
@@ -2090,9 +2333,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
|
||||
* Revalidating a dead namespace sets capacity to 0. This will
|
||||
* end buffered writers dirtying pages that can't be synced.
|
||||
*/
|
||||
if (ns->disk && !test_and_set_bit(NVME_NS_DEAD, &ns->flags))
|
||||
revalidate_disk(ns->disk);
|
||||
|
||||
if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
|
||||
continue;
|
||||
revalidate_disk(ns->disk);
|
||||
blk_set_queue_dying(ns->queue);
|
||||
blk_mq_abort_requeue_list(ns->queue);
|
||||
blk_mq_start_stopped_hw_queues(ns->queue, true);
|
||||
|
||||
@@ -480,11 +480,16 @@ EXPORT_SYMBOL_GPL(nvmf_connect_io_queue);
|
||||
* being implemented to the common NVMe fabrics library. Part of
|
||||
* the overall init sequence of starting up a fabrics driver.
|
||||
*/
|
||||
void nvmf_register_transport(struct nvmf_transport_ops *ops)
|
||||
int nvmf_register_transport(struct nvmf_transport_ops *ops)
|
||||
{
|
||||
if (!ops->create_ctrl)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&nvmf_transports_mutex);
|
||||
list_add_tail(&ops->entry, &nvmf_transports);
|
||||
mutex_unlock(&nvmf_transports_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvmf_register_transport);
|
||||
|
||||
|
||||
@@ -128,7 +128,7 @@ int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
|
||||
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
|
||||
int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl);
|
||||
int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
|
||||
void nvmf_register_transport(struct nvmf_transport_ops *ops);
|
||||
int nvmf_register_transport(struct nvmf_transport_ops *ops);
|
||||
void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
|
||||
void nvmf_free_options(struct nvmf_ctrl_options *opts);
|
||||
const char *nvmf_get_subsysnqn(struct nvme_ctrl *ctrl);
|
||||
|
||||
+1
-14
@@ -2353,18 +2353,6 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
|
||||
|
||||
/* sanity checks */
|
||||
|
||||
/* FC-NVME supports 64-byte SQE only */
|
||||
if (ctrl->ctrl.ioccsz != 4) {
|
||||
dev_err(ctrl->ctrl.device, "ioccsz %d is not supported!\n",
|
||||
ctrl->ctrl.ioccsz);
|
||||
goto out_remove_admin_queue;
|
||||
}
|
||||
/* FC-NVME supports 16-byte CQE only */
|
||||
if (ctrl->ctrl.iorcsz != 1) {
|
||||
dev_err(ctrl->ctrl.device, "iorcsz %d is not supported!\n",
|
||||
ctrl->ctrl.iorcsz);
|
||||
goto out_remove_admin_queue;
|
||||
}
|
||||
/* FC-NVME does not have other data in the capsule */
|
||||
if (ctrl->ctrl.icdoff) {
|
||||
dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n",
|
||||
@@ -2562,8 +2550,7 @@ static int __init nvme_fc_init_module(void)
|
||||
if (!nvme_fc_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
nvmf_register_transport(&nvme_fc_transport);
|
||||
return 0;
|
||||
return nvmf_register_transport(&nvme_fc_transport);
|
||||
}
|
||||
|
||||
static void __exit nvme_fc_exit_module(void)
|
||||
|
||||
@@ -78,6 +78,11 @@ enum nvme_quirks {
|
||||
* readiness, which is done by reading the NVME_CSTS_RDY bit.
|
||||
*/
|
||||
NVME_QUIRK_DELAY_BEFORE_CHK_RDY = (1 << 3),
|
||||
|
||||
/*
|
||||
* APST should not be used.
|
||||
*/
|
||||
NVME_QUIRK_NO_APST = (1 << 4),
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -112,6 +117,7 @@ enum nvme_ctrl_state {
|
||||
|
||||
struct nvme_ctrl {
|
||||
enum nvme_ctrl_state state;
|
||||
bool identified;
|
||||
spinlock_t lock;
|
||||
const struct nvme_ctrl_ops *ops;
|
||||
struct request_queue *admin_q;
|
||||
@@ -147,13 +153,19 @@ struct nvme_ctrl {
|
||||
u32 vs;
|
||||
u32 sgls;
|
||||
u16 kas;
|
||||
u8 npss;
|
||||
u8 apsta;
|
||||
unsigned int kato;
|
||||
bool subsystem;
|
||||
unsigned long quirks;
|
||||
struct nvme_id_power_state psd[32];
|
||||
struct work_struct scan_work;
|
||||
struct work_struct async_event_work;
|
||||
struct delayed_work ka_work;
|
||||
|
||||
/* Power saving configuration */
|
||||
u64 ps_max_latency_us;
|
||||
|
||||
/* Fabrics only */
|
||||
u16 sqsize;
|
||||
u32 ioccsz;
|
||||
|
||||
+15
-12
@@ -613,10 +613,7 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
|
||||
spin_lock_irq(&nvmeq->q_lock);
|
||||
if (unlikely(nvmeq->cq_vector < 0)) {
|
||||
if (ns && !test_bit(NVME_NS_DEAD, &ns->flags))
|
||||
ret = BLK_MQ_RQ_QUEUE_BUSY;
|
||||
else
|
||||
ret = BLK_MQ_RQ_QUEUE_ERROR;
|
||||
ret = BLK_MQ_RQ_QUEUE_ERROR;
|
||||
spin_unlock_irq(&nvmeq->q_lock);
|
||||
goto out_cleanup_iod;
|
||||
}
|
||||
@@ -1739,7 +1736,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
|
||||
if (dev->ctrl.admin_q)
|
||||
blk_put_queue(dev->ctrl.admin_q);
|
||||
kfree(dev->queues);
|
||||
kfree(dev->ctrl.opal_dev);
|
||||
free_opal_dev(dev->ctrl.opal_dev);
|
||||
kfree(dev);
|
||||
}
|
||||
|
||||
@@ -1789,14 +1786,17 @@ static void nvme_reset_work(struct work_struct *work)
|
||||
if (result)
|
||||
goto out;
|
||||
|
||||
if ((dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) && !dev->ctrl.opal_dev) {
|
||||
dev->ctrl.opal_dev =
|
||||
init_opal_dev(&dev->ctrl, &nvme_sec_submit);
|
||||
if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) {
|
||||
if (!dev->ctrl.opal_dev)
|
||||
dev->ctrl.opal_dev =
|
||||
init_opal_dev(&dev->ctrl, &nvme_sec_submit);
|
||||
else if (was_suspend)
|
||||
opal_unlock_from_suspend(dev->ctrl.opal_dev);
|
||||
} else {
|
||||
free_opal_dev(dev->ctrl.opal_dev);
|
||||
dev->ctrl.opal_dev = NULL;
|
||||
}
|
||||
|
||||
if (was_suspend)
|
||||
opal_unlock_from_suspend(dev->ctrl.opal_dev);
|
||||
|
||||
result = nvme_setup_io_queues(dev);
|
||||
if (result)
|
||||
goto out;
|
||||
@@ -2001,8 +2001,10 @@ static void nvme_remove(struct pci_dev *pdev)
|
||||
|
||||
pci_set_drvdata(pdev, NULL);
|
||||
|
||||
if (!pci_device_is_present(pdev))
|
||||
if (!pci_device_is_present(pdev)) {
|
||||
nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
|
||||
nvme_dev_disable(dev, false);
|
||||
}
|
||||
|
||||
flush_work(&dev->reset_work);
|
||||
nvme_uninit_ctrl(&dev->ctrl);
|
||||
@@ -2121,6 +2123,7 @@ static const struct pci_device_id nvme_id_table[] = {
|
||||
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
|
||||
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
|
||||
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
|
||||
{ 0, }
|
||||
};
|
||||
MODULE_DEVICE_TABLE(pci, nvme_id_table);
|
||||
|
||||
+22
-26
@@ -42,28 +42,6 @@
|
||||
|
||||
#define NVME_RDMA_MAX_INLINE_SEGMENTS 1
|
||||
|
||||
static const char *const nvme_rdma_cm_status_strs[] = {
|
||||
[NVME_RDMA_CM_INVALID_LEN] = "invalid length",
|
||||
[NVME_RDMA_CM_INVALID_RECFMT] = "invalid record format",
|
||||
[NVME_RDMA_CM_INVALID_QID] = "invalid queue ID",
|
||||
[NVME_RDMA_CM_INVALID_HSQSIZE] = "invalid host SQ size",
|
||||
[NVME_RDMA_CM_INVALID_HRQSIZE] = "invalid host RQ size",
|
||||
[NVME_RDMA_CM_NO_RSC] = "resource not found",
|
||||
[NVME_RDMA_CM_INVALID_IRD] = "invalid IRD",
|
||||
[NVME_RDMA_CM_INVALID_ORD] = "Invalid ORD",
|
||||
};
|
||||
|
||||
static const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status)
|
||||
{
|
||||
size_t index = status;
|
||||
|
||||
if (index < ARRAY_SIZE(nvme_rdma_cm_status_strs) &&
|
||||
nvme_rdma_cm_status_strs[index])
|
||||
return nvme_rdma_cm_status_strs[index];
|
||||
else
|
||||
return "unrecognized reason";
|
||||
};
|
||||
|
||||
/*
|
||||
* We handle AEN commands ourselves and don't even let the
|
||||
* block layer know about them.
|
||||
@@ -155,6 +133,10 @@ struct nvme_rdma_ctrl {
|
||||
struct sockaddr addr;
|
||||
struct sockaddr_in addr_in;
|
||||
};
|
||||
union {
|
||||
struct sockaddr src_addr;
|
||||
struct sockaddr_in src_addr_in;
|
||||
};
|
||||
|
||||
struct nvme_ctrl ctrl;
|
||||
};
|
||||
@@ -567,6 +549,7 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
|
||||
int idx, size_t queue_size)
|
||||
{
|
||||
struct nvme_rdma_queue *queue;
|
||||
struct sockaddr *src_addr = NULL;
|
||||
int ret;
|
||||
|
||||
queue = &ctrl->queues[idx];
|
||||
@@ -589,7 +572,10 @@ static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl,
|
||||
}
|
||||
|
||||
queue->cm_error = -ETIMEDOUT;
|
||||
ret = rdma_resolve_addr(queue->cm_id, NULL, &ctrl->addr,
|
||||
if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
|
||||
src_addr = &ctrl->src_addr;
|
||||
|
||||
ret = rdma_resolve_addr(queue->cm_id, src_addr, &ctrl->addr,
|
||||
NVME_RDMA_CONNECT_TIMEOUT_MS);
|
||||
if (ret) {
|
||||
dev_info(ctrl->ctrl.device,
|
||||
@@ -1905,6 +1891,16 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
|
||||
goto out_free_ctrl;
|
||||
}
|
||||
|
||||
if (opts->mask & NVMF_OPT_HOST_TRADDR) {
|
||||
ret = nvme_rdma_parse_ipaddr(&ctrl->src_addr_in,
|
||||
opts->host_traddr);
|
||||
if (ret) {
|
||||
pr_err("malformed src IP address passed: %s\n",
|
||||
opts->host_traddr);
|
||||
goto out_free_ctrl;
|
||||
}
|
||||
}
|
||||
|
||||
if (opts->mask & NVMF_OPT_TRSVCID) {
|
||||
u16 port;
|
||||
|
||||
@@ -2016,7 +2012,8 @@ out_free_ctrl:
|
||||
static struct nvmf_transport_ops nvme_rdma_transport = {
|
||||
.name = "rdma",
|
||||
.required_opts = NVMF_OPT_TRADDR,
|
||||
.allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY,
|
||||
.allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
|
||||
NVMF_OPT_HOST_TRADDR,
|
||||
.create_ctrl = nvme_rdma_create_ctrl,
|
||||
};
|
||||
|
||||
@@ -2063,8 +2060,7 @@ static int __init nvme_rdma_init_module(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
nvmf_register_transport(&nvme_rdma_transport);
|
||||
return 0;
|
||||
return nvmf_register_transport(&nvme_rdma_transport);
|
||||
}
|
||||
|
||||
static void __exit nvme_rdma_cleanup_module(void)
|
||||
|
||||
@@ -41,7 +41,7 @@ static u16 nvmet_get_smart_log_nsid(struct nvmet_req *req,
|
||||
ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->get_log_page.nsid);
|
||||
if (!ns) {
|
||||
status = NVME_SC_INVALID_NS;
|
||||
pr_err("nvmet : Counld not find namespace id : %d\n",
|
||||
pr_err("nvmet : Could not find namespace id : %d\n",
|
||||
le32_to_cpu(req->cmd->get_log_page.nsid));
|
||||
goto out;
|
||||
}
|
||||
@@ -509,7 +509,7 @@ int nvmet_parse_admin_cmd(struct nvmet_req *req)
|
||||
break;
|
||||
case nvme_admin_identify:
|
||||
req->data_len = 4096;
|
||||
switch (le32_to_cpu(cmd->identify.cns)) {
|
||||
switch (cmd->identify.cns) {
|
||||
case NVME_ID_CNS_NS:
|
||||
req->execute = nvmet_execute_identify_ns;
|
||||
return 0;
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "nvmet.h"
|
||||
|
||||
static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
|
||||
static DEFINE_IDA(cntlid_ida);
|
||||
|
||||
/*
|
||||
* This read/write semaphore is used to synchronize access to configuration
|
||||
@@ -749,7 +750,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
|
||||
if (!ctrl->sqs)
|
||||
goto out_free_cqs;
|
||||
|
||||
ret = ida_simple_get(&subsys->cntlid_ida,
|
||||
ret = ida_simple_get(&cntlid_ida,
|
||||
NVME_CNTLID_MIN, NVME_CNTLID_MAX,
|
||||
GFP_KERNEL);
|
||||
if (ret < 0) {
|
||||
@@ -819,7 +820,7 @@ static void nvmet_ctrl_free(struct kref *ref)
|
||||
flush_work(&ctrl->async_event_work);
|
||||
cancel_work_sync(&ctrl->fatal_err_work);
|
||||
|
||||
ida_simple_remove(&subsys->cntlid_ida, ctrl->cntlid);
|
||||
ida_simple_remove(&cntlid_ida, ctrl->cntlid);
|
||||
nvmet_subsys_put(subsys);
|
||||
|
||||
kfree(ctrl->sqs);
|
||||
@@ -918,9 +919,6 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
|
||||
mutex_init(&subsys->lock);
|
||||
INIT_LIST_HEAD(&subsys->namespaces);
|
||||
INIT_LIST_HEAD(&subsys->ctrls);
|
||||
|
||||
ida_init(&subsys->cntlid_ida);
|
||||
|
||||
INIT_LIST_HEAD(&subsys->hosts);
|
||||
|
||||
return subsys;
|
||||
@@ -933,7 +931,6 @@ static void nvmet_subsys_free(struct kref *ref)
|
||||
|
||||
WARN_ON_ONCE(!list_empty(&subsys->namespaces));
|
||||
|
||||
ida_destroy(&subsys->cntlid_ida);
|
||||
kfree(subsys->subsysnqn);
|
||||
kfree(subsys);
|
||||
}
|
||||
@@ -976,6 +973,7 @@ static void __exit nvmet_exit(void)
|
||||
{
|
||||
nvmet_exit_configfs();
|
||||
nvmet_exit_discovery();
|
||||
ida_destroy(&cntlid_ida);
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
|
||||
BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
|
||||
|
||||
@@ -186,14 +186,14 @@ int nvmet_parse_discovery_cmd(struct nvmet_req *req)
|
||||
}
|
||||
case nvme_admin_identify:
|
||||
req->data_len = 4096;
|
||||
switch (le32_to_cpu(cmd->identify.cns)) {
|
||||
switch (cmd->identify.cns) {
|
||||
case NVME_ID_CNS_CTRL:
|
||||
req->execute =
|
||||
nvmet_execute_identify_disc_ctrl;
|
||||
return 0;
|
||||
default:
|
||||
pr_err("nvmet: unsupported identify cns %d\n",
|
||||
le32_to_cpu(cmd->identify.cns));
|
||||
cmd->identify.cns);
|
||||
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
|
||||
}
|
||||
default:
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user