remove the lightnvm subsystem

Lightnvm supports the OCSSD 1.x and 2.0 specs which were early attempts
to produce Open Channel SSDs and never made it into the NVMe spec
proper.  They have since been superceeded by NVMe enhancements such
as ZNS support.  Remove the support per the deprecation schedule.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20210812132308.38486-1-hch@lst.de
Reviewed-by: Matias Bjørling <mb@lightnvm.io>
Reviewed-by: Javier González <javier@javigon.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Christoph Hellwig
2021-08-12 15:23:08 +02:00
committed by Jens Axboe
parent 6e4df4c648
commit 9ea9b9c483
30 changed files with 1 additions and 13678 deletions

View File

@@ -85,7 +85,6 @@ available subsections can be seen below.
io-mapping
io_ordering
generic-counter
lightnvm-pblk
memory-devices/index
men-chameleon-bus
ntb

View File

@@ -1,21 +0,0 @@
pblk: Physical Block Device Target
==================================
pblk implements a fully associative, host-based FTL that exposes a traditional
block I/O interface. Its primary responsibilities are:
- Map logical addresses onto physical addresses (4KB granularity) in a
logical-to-physical (L2P) table.
- Maintain the integrity and consistency of the L2P table as well as its
recovery from normal tear down and power outage.
- Deal with controller- and media-specific constrains.
- Handle I/O errors.
- Implement garbage collection.
- Maintain consistency across the I/O stack during synchronization points.
For more information please refer to:
http://lightnvm.io
which maintains updated FAQs, manual pages, technical documentation, tools,
contacts, etc.

View File

@@ -160,7 +160,6 @@ Code Seq# Include File Comments
'K' all linux/kd.h
'L' 00-1F linux/loop.h conflict!
'L' 10-1F drivers/scsi/mpt3sas/mpt3sas_ctl.h conflict!
'L' 20-2F linux/lightnvm.h
'L' E0-FF linux/ppdd.h encrypted disk device driver
<http://linux01.gwdg.de/~alatham/ppdd.html>
'M' all linux/soundcard.h conflict!

View File

@@ -10609,15 +10609,6 @@ F: LICENSES/
F: scripts/spdxcheck-test.sh
F: scripts/spdxcheck.py
LIGHTNVM PLATFORM SUPPORT
M: Matias Bjorling <mb@lightnvm.io>
L: linux-block@vger.kernel.org
S: Maintained
W: http://github/OpenChannelSSD
F: drivers/lightnvm/
F: include/linux/lightnvm.h
F: include/uapi/linux/lightnvm.h
LINEAR RANGES HELPERS
M: Mark Brown <broonie@kernel.org>
R: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>

View File

@@ -51,8 +51,6 @@ source "drivers/net/Kconfig"
source "drivers/isdn/Kconfig"
source "drivers/lightnvm/Kconfig"
# input before char - char/joystick depends on it. As does USB.
source "drivers/input/Kconfig"

View File

@@ -70,7 +70,6 @@ obj-$(CONFIG_FB_I810) += video/fbdev/i810/
obj-$(CONFIG_FB_INTEL) += video/fbdev/intelfb/
obj-$(CONFIG_PARPORT) += parport/
obj-$(CONFIG_NVM) += lightnvm/
obj-y += base/ block/ misc/ mfd/ nfc/
obj-$(CONFIG_LIBNVDIMM) += nvdimm/
obj-$(CONFIG_DAX) += dax/

View File

@@ -1,44 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# Open-Channel SSD NVM configuration
#
menuconfig NVM
bool "Open-Channel SSD target support (DEPRECATED)"
depends on BLOCK
help
Say Y here to get to enable Open-channel SSDs.
Open-Channel SSDs implement a set of extension to SSDs, that
exposes direct access to the underlying non-volatile memory.
If you say N, all options in this submenu will be skipped and disabled
only do this if you know what you are doing.
This code is deprecated and will be removed in Linux 5.15.
if NVM
config NVM_PBLK
tristate "Physical Block Device Open-Channel SSD target"
select CRC32
help
Allows an open-channel SSD to be exposed as a block device to the
host. The target assumes the device exposes raw flash and must be
explicitly managed by the host.
Please note the disk format is considered EXPERIMENTAL for now.
if NVM_PBLK
config NVM_PBLK_DEBUG
bool "PBlk Debug Support"
default n
help
Enables debug support for pblk. This includes extra checks, more
vocal error messages, and extra tracking fields in the pblk sysfs
entries.
endif # NVM_PBLK_DEBUG
endif # NVM

View File

@@ -1,11 +0,0 @@
# SPDX-License-Identifier: GPL-2.0
#
# Makefile for Open-Channel SSDs.
#
obj-$(CONFIG_NVM) := core.o
obj-$(CONFIG_NVM_PBLK) += pblk.o
pblk-y := pblk-init.o pblk-core.o pblk-rb.o \
pblk-write.o pblk-cache.o pblk-read.o \
pblk-gc.o pblk-recovery.o pblk-map.o \
pblk-rl.o pblk-sysfs.o

File diff suppressed because it is too large Load Diff

View File

@@ -1,137 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-cache.c - pblk's write cache
*/
#include "pblk.h"
void pblk_write_to_cache(struct pblk *pblk, struct bio *bio,
unsigned long flags)
{
struct pblk_w_ctx w_ctx;
sector_t lba = pblk_get_lba(bio);
unsigned long start_time;
unsigned int bpos, pos;
int nr_entries = pblk_get_secs(bio);
int i, ret;
start_time = bio_start_io_acct(bio);
/* Update the write buffer head (mem) with the entries that we can
* write. The write in itself cannot fail, so there is no need to
* rollback from here on.
*/
retry:
ret = pblk_rb_may_write_user(&pblk->rwb, bio, nr_entries, &bpos);
switch (ret) {
case NVM_IO_REQUEUE:
io_schedule();
goto retry;
case NVM_IO_ERR:
pblk_pipeline_stop(pblk);
bio_io_error(bio);
goto out;
}
pblk_ppa_set_empty(&w_ctx.ppa);
w_ctx.flags = flags;
if (bio->bi_opf & REQ_PREFLUSH) {
w_ctx.flags |= PBLK_FLUSH_ENTRY;
pblk_write_kick(pblk);
}
if (unlikely(!bio_has_data(bio)))
goto out;
for (i = 0; i < nr_entries; i++) {
void *data = bio_data(bio);
w_ctx.lba = lba + i;
pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + i);
pblk_rb_write_entry_user(&pblk->rwb, data, w_ctx, pos);
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
}
atomic64_add(nr_entries, &pblk->user_wa);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(nr_entries, &pblk->inflight_writes);
atomic_long_add(nr_entries, &pblk->req_writes);
#endif
pblk_rl_inserted(&pblk->rl, nr_entries);
out:
bio_end_io_acct(bio, start_time);
pblk_write_should_kick(pblk);
if (ret == NVM_IO_DONE)
bio_endio(bio);
}
/*
* On GC the incoming lbas are not necessarily sequential. Also, some of the
* lbas might not be valid entries, which are marked as empty by the GC thread
*/
int pblk_write_gc_to_cache(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
{
struct pblk_w_ctx w_ctx;
unsigned int bpos, pos;
void *data = gc_rq->data;
int i, valid_entries;
/* Update the write buffer head (mem) with the entries that we can
* write. The write in itself cannot fail, so there is no need to
* rollback from here on.
*/
retry:
if (!pblk_rb_may_write_gc(&pblk->rwb, gc_rq->secs_to_gc, &bpos)) {
io_schedule();
goto retry;
}
w_ctx.flags = PBLK_IOTYPE_GC;
pblk_ppa_set_empty(&w_ctx.ppa);
for (i = 0, valid_entries = 0; i < gc_rq->nr_secs; i++) {
if (gc_rq->lba_list[i] == ADDR_EMPTY)
continue;
w_ctx.lba = gc_rq->lba_list[i];
pos = pblk_rb_wrap_pos(&pblk->rwb, bpos + valid_entries);
pblk_rb_write_entry_gc(&pblk->rwb, data, w_ctx, gc_rq->line,
gc_rq->paddr_list[i], pos);
data += PBLK_EXPOSED_PAGE_SIZE;
valid_entries++;
}
WARN_ONCE(gc_rq->secs_to_gc != valid_entries,
"pblk: inconsistent GC write\n");
atomic64_add(valid_entries, &pblk->gc_wa);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(valid_entries, &pblk->inflight_writes);
atomic_long_add(valid_entries, &pblk->recov_gc_writes);
#endif
pblk_write_should_kick(pblk);
return NVM_IO_OK;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,210 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-map.c - pblk's lba-ppa mapping strategy
*
*/
#include "pblk.h"
static int pblk_map_page_data(struct pblk *pblk, unsigned int sentry,
struct ppa_addr *ppa_list,
unsigned long *lun_bitmap,
void *meta_list,
unsigned int valid_secs)
{
struct pblk_line *line = pblk_line_get_data(pblk);
struct pblk_emeta *emeta;
struct pblk_w_ctx *w_ctx;
__le64 *lba_list;
u64 paddr;
int nr_secs = pblk->min_write_pgs;
int i;
if (!line)
return -ENOSPC;
if (pblk_line_is_full(line)) {
struct pblk_line *prev_line = line;
/* If we cannot allocate a new line, make sure to store metadata
* on current line and then fail
*/
line = pblk_line_replace_data(pblk);
pblk_line_close_meta(pblk, prev_line);
if (!line) {
pblk_pipeline_stop(pblk);
return -ENOSPC;
}
}
emeta = line->emeta;
lba_list = emeta_to_lbas(pblk, emeta->buf);
paddr = pblk_alloc_page(pblk, line, nr_secs);
for (i = 0; i < nr_secs; i++, paddr++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
/* ppa to be sent to the device */
ppa_list[i] = addr_to_gen_ppa(pblk, paddr, line->id);
/* Write context for target bio completion on write buffer. Note
* that the write buffer is protected by the sync backpointer,
* and a single writer thread have access to each specific entry
* at a time. Thus, it is safe to modify the context for the
* entry we are setting up for submission without taking any
* lock or memory barrier.
*/
if (i < valid_secs) {
kref_get(&line->ref);
atomic_inc(&line->sec_to_update);
w_ctx = pblk_rb_w_ctx(&pblk->rwb, sentry + i);
w_ctx->ppa = ppa_list[i];
meta->lba = cpu_to_le64(w_ctx->lba);
lba_list[paddr] = cpu_to_le64(w_ctx->lba);
if (lba_list[paddr] != addr_empty)
line->nr_valid_lbas++;
else
atomic64_inc(&pblk->pad_wa);
} else {
lba_list[paddr] = addr_empty;
meta->lba = addr_empty;
__pblk_map_invalidate(pblk, line, paddr);
}
}
pblk_down_rq(pblk, ppa_list[0], lun_bitmap);
return 0;
}
int pblk_map_rq(struct pblk *pblk, struct nvm_rq *rqd, unsigned int sentry,
unsigned long *lun_bitmap, unsigned int valid_secs,
unsigned int off)
{
void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
void *meta_buffer;
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
unsigned int map_secs;
int min = pblk->min_write_pgs;
int i;
int ret;
for (i = off; i < rqd->nr_ppas; i += min) {
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
meta_buffer = pblk_get_meta(pblk, meta_list, i);
ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
lun_bitmap, meta_buffer, map_secs);
if (ret)
return ret;
}
return 0;
}
/* only if erase_ppa is set, acquire erase semaphore */
int pblk_map_erase_rq(struct pblk *pblk, struct nvm_rq *rqd,
unsigned int sentry, unsigned long *lun_bitmap,
unsigned int valid_secs, struct ppa_addr *erase_ppa)
{
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_meta *lm = &pblk->lm;
void *meta_list = pblk_get_meta_for_writes(pblk, rqd);
void *meta_buffer;
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
struct pblk_line *e_line, *d_line;
unsigned int map_secs;
int min = pblk->min_write_pgs;
int i, erase_lun;
int ret;
for (i = 0; i < rqd->nr_ppas; i += min) {
map_secs = (i + min > valid_secs) ? (valid_secs % min) : min;
meta_buffer = pblk_get_meta(pblk, meta_list, i);
ret = pblk_map_page_data(pblk, sentry + i, &ppa_list[i],
lun_bitmap, meta_buffer, map_secs);
if (ret)
return ret;
erase_lun = pblk_ppa_to_pos(geo, ppa_list[i]);
/* line can change after page map. We might also be writing the
* last line.
*/
e_line = pblk_line_get_erase(pblk);
if (!e_line)
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
valid_secs, i + min);
spin_lock(&e_line->lock);
if (!test_bit(erase_lun, e_line->erase_bitmap)) {
set_bit(erase_lun, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
*erase_ppa = ppa_list[i];
erase_ppa->a.blk = e_line->id;
erase_ppa->a.reserved = 0;
spin_unlock(&e_line->lock);
/* Avoid evaluating e_line->left_eblks */
return pblk_map_rq(pblk, rqd, sentry, lun_bitmap,
valid_secs, i + min);
}
spin_unlock(&e_line->lock);
}
d_line = pblk_line_get_data(pblk);
/* line can change after page map. We might also be writing the
* last line.
*/
e_line = pblk_line_get_erase(pblk);
if (!e_line)
return -ENOSPC;
/* Erase blocks that are bad in this line but might not be in next */
if (unlikely(pblk_ppa_empty(*erase_ppa)) &&
bitmap_weight(d_line->blk_bitmap, lm->blk_per_line)) {
int bit = -1;
retry:
bit = find_next_bit(d_line->blk_bitmap,
lm->blk_per_line, bit + 1);
if (bit >= lm->blk_per_line)
return 0;
spin_lock(&e_line->lock);
if (test_bit(bit, e_line->erase_bitmap)) {
spin_unlock(&e_line->lock);
goto retry;
}
spin_unlock(&e_line->lock);
set_bit(bit, e_line->erase_bitmap);
atomic_dec(&e_line->left_eblks);
*erase_ppa = pblk->luns[bit].bppa; /* set ch and lun */
erase_ppa->a.blk = e_line->id;
}
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,474 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-read.c - pblk's read path
*/
#include "pblk.h"
/*
* There is no guarantee that the value read from cache has not been updated and
* resides at another location in the cache. We guarantee though that if the
* value is read from the cache, it belongs to the mapped lba. In order to
* guarantee and order between writes and reads are ordered, a flush must be
* issued.
*/
static int pblk_read_from_cache(struct pblk *pblk, struct bio *bio,
sector_t lba, struct ppa_addr ppa)
{
#ifdef CONFIG_NVM_PBLK_DEBUG
/* Callers must ensure that the ppa points to a cache address */
BUG_ON(pblk_ppa_empty(ppa));
BUG_ON(!pblk_addr_in_cache(ppa));
#endif
return pblk_rb_copy_to_bio(&pblk->rwb, bio, lba, ppa);
}
static int pblk_read_ppalist_rq(struct pblk *pblk, struct nvm_rq *rqd,
struct bio *bio, sector_t blba,
bool *from_cache)
{
void *meta_list = rqd->meta_list;
int nr_secs, i;
retry:
nr_secs = pblk_lookup_l2p_seq(pblk, rqd->ppa_list, blba, rqd->nr_ppas,
from_cache);
if (!*from_cache)
goto end;
for (i = 0; i < nr_secs; i++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
sector_t lba = blba + i;
if (pblk_ppa_empty(rqd->ppa_list[i])) {
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
meta->lba = addr_empty;
} else if (pblk_addr_in_cache(rqd->ppa_list[i])) {
/*
* Try to read from write buffer. The address is later
* checked on the write buffer to prevent retrieving
* overwritten data.
*/
if (!pblk_read_from_cache(pblk, bio, lba,
rqd->ppa_list[i])) {
if (i == 0) {
/*
* We didn't call with bio_advance()
* yet, so we can just retry.
*/
goto retry;
} else {
/*
* We already call bio_advance()
* so we cannot retry and we need
* to quit that function in order
* to allow caller to handle the bio
* splitting in the current sector
* position.
*/
nr_secs = i;
goto end;
}
}
meta->lba = cpu_to_le64(lba);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_inc(&pblk->cache_reads);
#endif
}
bio_advance(bio, PBLK_EXPOSED_PAGE_SIZE);
}
end:
if (pblk_io_aligned(pblk, nr_secs))
rqd->is_seq = 1;
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(nr_secs, &pblk->inflight_reads);
#endif
return nr_secs;
}
static void pblk_read_check_seq(struct pblk *pblk, struct nvm_rq *rqd,
sector_t blba)
{
void *meta_list = rqd->meta_list;
int nr_lbas = rqd->nr_ppas;
int i;
if (!pblk_is_oob_meta_supported(pblk))
return;
for (i = 0; i < nr_lbas; i++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk, meta_list, i);
u64 lba = le64_to_cpu(meta->lba);
if (lba == ADDR_EMPTY)
continue;
if (lba != blba + i) {
#ifdef CONFIG_NVM_PBLK_DEBUG
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
print_ppa(pblk, &ppa_list[i], "seq", i);
#endif
pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n",
lba, (u64)blba + i);
WARN_ON(1);
}
}
}
/*
* There can be holes in the lba list.
*/
static void pblk_read_check_rand(struct pblk *pblk, struct nvm_rq *rqd,
u64 *lba_list, int nr_lbas)
{
void *meta_lba_list = rqd->meta_list;
int i, j;
if (!pblk_is_oob_meta_supported(pblk))
return;
for (i = 0, j = 0; i < nr_lbas; i++) {
struct pblk_sec_meta *meta = pblk_get_meta(pblk,
meta_lba_list, j);
u64 lba = lba_list[i];
u64 meta_lba;
if (lba == ADDR_EMPTY)
continue;
meta_lba = le64_to_cpu(meta->lba);
if (lba != meta_lba) {
#ifdef CONFIG_NVM_PBLK_DEBUG
struct ppa_addr *ppa_list = nvm_rq_to_ppa_list(rqd);
print_ppa(pblk, &ppa_list[j], "rnd", j);
#endif
pblk_err(pblk, "corrupted read LBA (%llu/%llu)\n",
meta_lba, lba);
WARN_ON(1);
}
j++;
}
WARN_ONCE(j != rqd->nr_ppas, "pblk: corrupted random request\n");
}
static void pblk_end_user_read(struct bio *bio, int error)
{
if (error && error != NVM_RSP_WARN_HIGHECC)
bio_io_error(bio);
else
bio_endio(bio);
}
static void __pblk_end_io_read(struct pblk *pblk, struct nvm_rq *rqd,
bool put_line)
{
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct bio *int_bio = rqd->bio;
unsigned long start_time = r_ctx->start_time;
bio_end_io_acct(int_bio, start_time);
if (rqd->error)
pblk_log_read_err(pblk, rqd);
pblk_read_check_seq(pblk, rqd, r_ctx->lba);
bio_put(int_bio);
if (put_line)
pblk_rq_to_line_put(pblk, rqd);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(rqd->nr_ppas, &pblk->sync_reads);
atomic_long_sub(rqd->nr_ppas, &pblk->inflight_reads);
#endif
pblk_free_rqd(pblk, rqd, PBLK_READ);
atomic_dec(&pblk->inflight_io);
}
static void pblk_end_io_read(struct nvm_rq *rqd)
{
struct pblk *pblk = rqd->private;
struct pblk_g_ctx *r_ctx = nvm_rq_to_pdu(rqd);
struct bio *bio = (struct bio *)r_ctx->private;
pblk_end_user_read(bio, rqd->error);
__pblk_end_io_read(pblk, rqd, true);
}
static void pblk_read_rq(struct pblk *pblk, struct nvm_rq *rqd, struct bio *bio,
sector_t lba, bool *from_cache)
{
struct pblk_sec_meta *meta = pblk_get_meta(pblk, rqd->meta_list, 0);
struct ppa_addr ppa;
pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_inc(&pblk->inflight_reads);
#endif
retry:
if (pblk_ppa_empty(ppa)) {
__le64 addr_empty = cpu_to_le64(ADDR_EMPTY);
meta->lba = addr_empty;
return;
}
/* Try to read from write buffer. The address is later checked on the
* write buffer to prevent retrieving overwritten data.
*/
if (pblk_addr_in_cache(ppa)) {
if (!pblk_read_from_cache(pblk, bio, lba, ppa)) {
pblk_lookup_l2p_seq(pblk, &ppa, lba, 1, from_cache);
goto retry;
}
meta->lba = cpu_to_le64(lba);
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_inc(&pblk->cache_reads);
#endif
} else {
rqd->ppa_addr = ppa;
}
}
void pblk_submit_read(struct pblk *pblk, struct bio *bio)
{
sector_t blba = pblk_get_lba(bio);
unsigned int nr_secs = pblk_get_secs(bio);
bool from_cache;
struct pblk_g_ctx *r_ctx;
struct nvm_rq *rqd;
struct bio *int_bio, *split_bio;
unsigned long start_time;
start_time = bio_start_io_acct(bio);
rqd = pblk_alloc_rqd(pblk, PBLK_READ);
rqd->opcode = NVM_OP_PREAD;
rqd->nr_ppas = nr_secs;
rqd->private = pblk;
rqd->end_io = pblk_end_io_read;
r_ctx = nvm_rq_to_pdu(rqd);
r_ctx->start_time = start_time;
r_ctx->lba = blba;
if (pblk_alloc_rqd_meta(pblk, rqd)) {
bio_io_error(bio);
pblk_free_rqd(pblk, rqd, PBLK_READ);
return;
}
/* Clone read bio to deal internally with:
* -read errors when reading from drive
* -bio_advance() calls during cache reads
*/
int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
if (nr_secs > 1)
nr_secs = pblk_read_ppalist_rq(pblk, rqd, int_bio, blba,
&from_cache);
else
pblk_read_rq(pblk, rqd, int_bio, blba, &from_cache);
split_retry:
r_ctx->private = bio; /* original bio */
rqd->bio = int_bio; /* internal bio */
if (from_cache && nr_secs == rqd->nr_ppas) {
/* All data was read from cache, we can complete the IO. */
pblk_end_user_read(bio, 0);
atomic_inc(&pblk->inflight_io);
__pblk_end_io_read(pblk, rqd, false);
} else if (nr_secs != rqd->nr_ppas) {
/* The read bio request could be partially filled by the write
* buffer, but there are some holes that need to be read from
* the drive. In order to handle this, we will use block layer
* mechanism to split this request in to smaller ones and make
* a chain of it.
*/
split_bio = bio_split(bio, nr_secs * NR_PHY_IN_LOG, GFP_KERNEL,
&pblk_bio_set);
bio_chain(split_bio, bio);
submit_bio_noacct(bio);
/* New bio contains first N sectors of the previous one, so
* we can continue to use existing rqd, but we need to shrink
* the number of PPAs in it. New bio is also guaranteed that
* it contains only either data from cache or from drive, newer
* mix of them.
*/
bio = split_bio;
rqd->nr_ppas = nr_secs;
if (rqd->nr_ppas == 1)
rqd->ppa_addr = rqd->ppa_list[0];
/* Recreate int_bio - existing might have some needed internal
* fields modified already.
*/
bio_put(int_bio);
int_bio = bio_clone_fast(bio, GFP_KERNEL, &pblk_bio_set);
goto split_retry;
} else if (pblk_submit_io(pblk, rqd, NULL)) {
/* Submitting IO to drive failed, let's report an error */
rqd->error = -ENODEV;
pblk_end_io_read(rqd);
}
}
static int read_ppalist_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_line *line, u64 *lba_list,
u64 *paddr_list_gc, unsigned int nr_secs)
{
struct ppa_addr ppa_list_l2p[NVM_MAX_VLBA];
struct ppa_addr ppa_gc;
int valid_secs = 0;
int i;
pblk_lookup_l2p_rand(pblk, ppa_list_l2p, lba_list, nr_secs);
for (i = 0; i < nr_secs; i++) {
if (lba_list[i] == ADDR_EMPTY)
continue;
ppa_gc = addr_to_gen_ppa(pblk, paddr_list_gc[i], line->id);
if (!pblk_ppa_comp(ppa_list_l2p[i], ppa_gc)) {
paddr_list_gc[i] = lba_list[i] = ADDR_EMPTY;
continue;
}
rqd->ppa_list[valid_secs++] = ppa_list_l2p[i];
}
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(valid_secs, &pblk->inflight_reads);
#endif
return valid_secs;
}
static int read_rq_gc(struct pblk *pblk, struct nvm_rq *rqd,
struct pblk_line *line, sector_t lba,
u64 paddr_gc)
{
struct ppa_addr ppa_l2p, ppa_gc;
int valid_secs = 0;
if (lba == ADDR_EMPTY)
goto out;
/* logic error: lba out-of-bounds */
if (lba >= pblk->capacity) {
WARN(1, "pblk: read lba out of bounds\n");
goto out;
}
spin_lock(&pblk->trans_lock);
ppa_l2p = pblk_trans_map_get(pblk, lba);
spin_unlock(&pblk->trans_lock);
ppa_gc = addr_to_gen_ppa(pblk, paddr_gc, line->id);
if (!pblk_ppa_comp(ppa_l2p, ppa_gc))
goto out;
rqd->ppa_addr = ppa_l2p;
valid_secs = 1;
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_inc(&pblk->inflight_reads);
#endif
out:
return valid_secs;
}
int pblk_submit_read_gc(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
{
struct nvm_rq rqd;
int ret = NVM_IO_OK;
memset(&rqd, 0, sizeof(struct nvm_rq));
ret = pblk_alloc_rqd_meta(pblk, &rqd);
if (ret)
return ret;
if (gc_rq->nr_secs > 1) {
gc_rq->secs_to_gc = read_ppalist_rq_gc(pblk, &rqd, gc_rq->line,
gc_rq->lba_list,
gc_rq->paddr_list,
gc_rq->nr_secs);
if (gc_rq->secs_to_gc == 1)
rqd.ppa_addr = rqd.ppa_list[0];
} else {
gc_rq->secs_to_gc = read_rq_gc(pblk, &rqd, gc_rq->line,
gc_rq->lba_list[0],
gc_rq->paddr_list[0]);
}
if (!(gc_rq->secs_to_gc))
goto out;
rqd.opcode = NVM_OP_PREAD;
rqd.nr_ppas = gc_rq->secs_to_gc;
if (pblk_submit_io_sync(pblk, &rqd, gc_rq->data)) {
ret = -EIO;
goto err_free_dma;
}
pblk_read_check_rand(pblk, &rqd, gc_rq->lba_list, gc_rq->nr_secs);
atomic_dec(&pblk->inflight_io);
if (rqd.error) {
atomic_long_inc(&pblk->read_failed_gc);
#ifdef CONFIG_NVM_PBLK_DEBUG
pblk_print_failed_rqd(pblk, &rqd, rqd.error);
#endif
}
#ifdef CONFIG_NVM_PBLK_DEBUG
atomic_long_add(gc_rq->secs_to_gc, &pblk->sync_reads);
atomic_long_add(gc_rq->secs_to_gc, &pblk->recov_gc_reads);
atomic_long_sub(gc_rq->secs_to_gc, &pblk->inflight_reads);
#endif
out:
pblk_free_rqd_meta(pblk, &rqd);
return ret;
err_free_dma:
pblk_free_rqd_meta(pblk, &rqd);
return ret;
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,254 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2016 CNEX Labs
* Initial release: Javier Gonzalez <javier@cnexlabs.com>
* Matias Bjorling <matias@cnexlabs.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* pblk-rl.c - pblk's rate limiter for user I/O
*
*/
#include "pblk.h"
static void pblk_rl_kick_u_timer(struct pblk_rl *rl)
{
mod_timer(&rl->u_timer, jiffies + msecs_to_jiffies(5000));
}
int pblk_rl_is_limit(struct pblk_rl *rl)
{
int rb_space;
rb_space = atomic_read(&rl->rb_space);
return (rb_space == 0);
}
int pblk_rl_user_may_insert(struct pblk_rl *rl, int nr_entries)
{
int rb_user_cnt = atomic_read(&rl->rb_user_cnt);
int rb_space = atomic_read(&rl->rb_space);
if (unlikely(rb_space >= 0) && (rb_space - nr_entries < 0))
return NVM_IO_ERR;
if (rb_user_cnt >= rl->rb_user_max)
return NVM_IO_REQUEUE;
return NVM_IO_OK;
}
void pblk_rl_inserted(struct pblk_rl *rl, int nr_entries)
{
int rb_space = atomic_read(&rl->rb_space);
if (unlikely(rb_space >= 0))
atomic_sub(nr_entries, &rl->rb_space);
}
int pblk_rl_gc_may_insert(struct pblk_rl *rl, int nr_entries)
{
int rb_gc_cnt = atomic_read(&rl->rb_gc_cnt);
int rb_user_active;
/* If there is no user I/O let GC take over space on the write buffer */
rb_user_active = READ_ONCE(rl->rb_user_active);
return (!(rb_gc_cnt >= rl->rb_gc_max && rb_user_active));
}
void pblk_rl_user_in(struct pblk_rl *rl, int nr_entries)
{
atomic_add(nr_entries, &rl->rb_user_cnt);
/* Release user I/O state. Protect from GC */
smp_store_release(&rl->rb_user_active, 1);
pblk_rl_kick_u_timer(rl);
}
void pblk_rl_werr_line_in(struct pblk_rl *rl)
{
atomic_inc(&rl->werr_lines);
}
void pblk_rl_werr_line_out(struct pblk_rl *rl)
{
atomic_dec(&rl->werr_lines);
}
void pblk_rl_gc_in(struct pblk_rl *rl, int nr_entries)
{
atomic_add(nr_entries, &rl->rb_gc_cnt);
}
void pblk_rl_out(struct pblk_rl *rl, int nr_user, int nr_gc)
{
atomic_sub(nr_user, &rl->rb_user_cnt);
atomic_sub(nr_gc, &rl->rb_gc_cnt);
}
unsigned long pblk_rl_nr_free_blks(struct pblk_rl *rl)
{
return atomic_read(&rl->free_blocks);
}
unsigned long pblk_rl_nr_user_free_blks(struct pblk_rl *rl)
{
return atomic_read(&rl->free_user_blocks);
}
static void __pblk_rl_update_rates(struct pblk_rl *rl,
unsigned long free_blocks)
{
struct pblk *pblk = container_of(rl, struct pblk, rl);
int max = rl->rb_budget;
int werr_gc_needed = atomic_read(&rl->werr_lines);
if (free_blocks >= rl->high) {
if (werr_gc_needed) {
/* Allocate a small budget for recovering
* lines with write errors
*/
rl->rb_gc_max = 1 << rl->rb_windows_pw;
rl->rb_user_max = max - rl->rb_gc_max;
rl->rb_state = PBLK_RL_WERR;
} else {
rl->rb_user_max = max;
rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_OFF;
}
} else if (free_blocks < rl->high) {
int shift = rl->high_pw - rl->rb_windows_pw;
int user_windows = free_blocks >> shift;
int user_max = user_windows << ilog2(NVM_MAX_VLBA);
rl->rb_user_max = user_max;
rl->rb_gc_max = max - user_max;
if (free_blocks <= rl->rsv_blocks) {
rl->rb_user_max = 0;
rl->rb_gc_max = max;
}
/* In the worst case, we will need to GC lines in the low list
* (high valid sector count). If there are lines to GC on high
* or mid lists, these will be prioritized
*/
rl->rb_state = PBLK_RL_LOW;
}
if (rl->rb_state != PBLK_RL_OFF)
pblk_gc_should_start(pblk);
else
pblk_gc_should_stop(pblk);
}
void pblk_rl_update_rates(struct pblk_rl *rl)
{
__pblk_rl_update_rates(rl, pblk_rl_nr_user_free_blks(rl));
}
void pblk_rl_free_lines_inc(struct pblk_rl *rl, struct pblk_line *line)
{
int blk_in_line = atomic_read(&line->blk_in_line);
int free_blocks;
atomic_add(blk_in_line, &rl->free_blocks);
free_blocks = atomic_add_return(blk_in_line, &rl->free_user_blocks);
__pblk_rl_update_rates(rl, free_blocks);
}
void pblk_rl_free_lines_dec(struct pblk_rl *rl, struct pblk_line *line,
bool used)
{
int blk_in_line = atomic_read(&line->blk_in_line);
int free_blocks;
atomic_sub(blk_in_line, &rl->free_blocks);
if (used)
free_blocks = atomic_sub_return(blk_in_line,
&rl->free_user_blocks);
else
free_blocks = atomic_read(&rl->free_user_blocks);
__pblk_rl_update_rates(rl, free_blocks);
}
int pblk_rl_high_thrs(struct pblk_rl *rl)
{
return rl->high;
}
int pblk_rl_max_io(struct pblk_rl *rl)
{
return rl->rb_max_io;
}
static void pblk_rl_u_timer(struct timer_list *t)
{
struct pblk_rl *rl = from_timer(rl, t, u_timer);
/* Release user I/O state. Protect from GC */
smp_store_release(&rl->rb_user_active, 0);
}
void pblk_rl_free(struct pblk_rl *rl)
{
del_timer(&rl->u_timer);
}
void pblk_rl_init(struct pblk_rl *rl, int budget, int threshold)
{
struct pblk *pblk = container_of(rl, struct pblk, rl);
struct nvm_tgt_dev *dev = pblk->dev;
struct nvm_geo *geo = &dev->geo;
struct pblk_line_mgmt *l_mg = &pblk->l_mg;
struct pblk_line_meta *lm = &pblk->lm;
int sec_meta, blk_meta;
unsigned int rb_windows;
/* Consider sectors used for metadata */
sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
blk_meta = DIV_ROUND_UP(sec_meta, geo->clba);
rl->high = pblk->op_blks - blk_meta - lm->blk_per_line;
rl->high_pw = get_count_order(rl->high);
rl->rsv_blocks = pblk_get_min_chks(pblk);
/* This will always be a power-of-2 */
rb_windows = budget / NVM_MAX_VLBA;
rl->rb_windows_pw = get_count_order(rb_windows);
/* To start with, all buffer is available to user I/O writers */
rl->rb_budget = budget;
rl->rb_user_max = budget;
rl->rb_gc_max = 0;
rl->rb_state = PBLK_RL_HIGH;
/* Maximize I/O size and ansure that back threshold is respected */
if (threshold)
rl->rb_max_io = budget - pblk->min_write_pgs_data - threshold;
else
rl->rb_max_io = budget - pblk->min_write_pgs_data - 1;
atomic_set(&rl->rb_user_cnt, 0);
atomic_set(&rl->rb_gc_cnt, 0);
atomic_set(&rl->rb_space, -1);
atomic_set(&rl->werr_lines, 0);
timer_setup(&rl->u_timer, pblk_rl_u_timer, 0);
rl->rb_user_active = 0;
rl->rb_gc_active = 0;
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,145 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM pblk
#if !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_PBLK_H
#include <linux/tracepoint.h>
struct ppa_addr;
#define show_chunk_flags(state) __print_flags(state, "", \
{ NVM_CHK_ST_FREE, "FREE", }, \
{ NVM_CHK_ST_CLOSED, "CLOSED", }, \
{ NVM_CHK_ST_OPEN, "OPEN", }, \
{ NVM_CHK_ST_OFFLINE, "OFFLINE", })
#define show_line_state(state) __print_symbolic(state, \
{ PBLK_LINESTATE_NEW, "NEW", }, \
{ PBLK_LINESTATE_FREE, "FREE", }, \
{ PBLK_LINESTATE_OPEN, "OPEN", }, \
{ PBLK_LINESTATE_CLOSED, "CLOSED", }, \
{ PBLK_LINESTATE_GC, "GC", }, \
{ PBLK_LINESTATE_BAD, "BAD", }, \
{ PBLK_LINESTATE_CORRUPT, "CORRUPT" })
#define show_pblk_state(state) __print_symbolic(state, \
{ PBLK_STATE_RUNNING, "RUNNING", }, \
{ PBLK_STATE_STOPPING, "STOPPING", }, \
{ PBLK_STATE_RECOVERING, "RECOVERING", }, \
{ PBLK_STATE_STOPPED, "STOPPED" })
#define show_chunk_erase_state(state) __print_symbolic(state, \
{ PBLK_CHUNK_RESET_START, "START", }, \
{ PBLK_CHUNK_RESET_DONE, "OK", }, \
{ PBLK_CHUNK_RESET_FAILED, "FAILED" })
TRACE_EVENT(pblk_chunk_reset,
TP_PROTO(const char *name, struct ppa_addr *ppa, int state),
TP_ARGS(name, ppa, state),
TP_STRUCT__entry(
__string(name, name)
__field(u64, ppa)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->ppa = ppa->ppa;
__entry->state = state;
),
TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk),
show_chunk_erase_state((int)__entry->state))
);
TRACE_EVENT(pblk_chunk_state,
TP_PROTO(const char *name, struct ppa_addr *ppa, int state),
TP_ARGS(name, ppa, state),
TP_STRUCT__entry(
__string(name, name)
__field(u64, ppa)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->ppa = ppa->ppa;
__entry->state = state;
),
TP_printk("dev=%s grp=%llu pu=%llu chk=%llu state=%s", __get_str(name),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.grp),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.pu),
(u64)(((struct ppa_addr *)(&__entry->ppa))->m.chk),
show_chunk_flags((int)__entry->state))
);
TRACE_EVENT(pblk_line_state,
TP_PROTO(const char *name, int line, int state),
TP_ARGS(name, line, state),
TP_STRUCT__entry(
__string(name, name)
__field(int, line)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->line = line;
__entry->state = state;
),
TP_printk("dev=%s line=%d state=%s", __get_str(name),
(int)__entry->line,
show_line_state((int)__entry->state))
);
TRACE_EVENT(pblk_state,
TP_PROTO(const char *name, int state),
TP_ARGS(name, state),
TP_STRUCT__entry(
__string(name, name)
__field(int, state)
),
TP_fast_assign(
__assign_str(name, name);
__entry->state = state;
),
TP_printk("dev=%s state=%s", __get_str(name),
show_pblk_state((int)__entry->state))
);
#endif /* !defined(_TRACE_PBLK_H) || defined(TRACE_HEADER_MULTI_READ) */
/* This part must be outside protection */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ../../drivers/lightnvm
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE pblk-trace
#include <trace/define_trace.h>

Some files were not shown because too many files have changed in this diff Show More