You've already forked linux-apfs
mirror of
https://github.com/linux-apfs/linux-apfs.git
synced 2026-05-01 15:00:59 -07:00
Merge tag 'libnvdimm-for-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm
Pull libnvdimm updates from Dan Williams:
"The bulk of this has been in multiple -next releases. There were a few
late breaking fixes and small features that got added in the last
couple days, but the whole set has received a build success
notification from the kbuild robot.
Change summary:
- Region media error reporting: A libnvdimm region device is the
parent to one or more namespaces. To date, media errors have been
reported via the "badblocks" attribute attached to pmem block
devices for namespaces in "raw" or "memory" mode. Given that
namespaces can be in "device-dax" or "btt-sector" mode this new
interface reports media errors generically, i.e. independent of
namespace modes or state.
This subsequently allows userspace tooling to craft "ACPI 6.1
Section 9.20.7.6 Function Index 4 - Clear Uncorrectable Error"
requests and submit them via the ioctl path for NVDIMM root bus
devices.
- Introduce 'struct dax_device' and 'struct dax_operations': Prompted
by a request from Linus and feedback from Christoph this allows for
dax capable drivers to publish their own custom dax operations.
This fixes the broken assumption that all dax operations are
related to a persistent memory device, and makes it easier for
other architectures and platforms to add customized persistent
memory support.
- 'libnvdimm' core updates: A new "deep_flush" sysfs attribute is
available for storage appliance applications to manually trigger
memory controllers to drain write-pending buffers that would
otherwise be flushed automatically by the platform ADR
(asynchronous-DRAM-refresh) mechanism at a power loss event.
Support for "locked" DIMMs is included to prevent namespaces from
surfacing when the namespace label data area is locked. Finally,
fixes for various reported deadlocks and crashes, also tagged for
-stable.
- ACPI / nfit driver updates: General updates of the nfit driver to
add DSM command overrides, ACPI 6.1 health state flags support, DSM
payload debug available by default, and various fixes.
Acknowledgements that came after the branch was pushed:
- commmit 565851c972 "device-dax: fix sysfs attribute deadlock":
Tested-by: Yi Zhang <yizhan@redhat.com>
- commit 23f4984483 "libnvdimm: rework region badblocks clearing"
Tested-by: Toshi Kani <toshi.kani@hpe.com>"
* tag 'libnvdimm-for-4.12' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (52 commits)
libnvdimm, pfn: fix 'npfns' vs section alignment
libnvdimm: handle locked label storage areas
libnvdimm: convert NDD_ flags to use bitops, introduce NDD_LOCKED
brd: fix uninitialized use of brd->dax_dev
block, dax: use correct format string in bdev_dax_supported
device-dax: fix sysfs attribute deadlock
libnvdimm: restore "libnvdimm: band aid btt vs clear poison locking"
libnvdimm: fix nvdimm_bus_lock() vs device_lock() ordering
libnvdimm: rework region badblocks clearing
acpi, nfit: kill ACPI_NFIT_DEBUG
libnvdimm: fix clear length of nvdimm_forget_poison()
libnvdimm, pmem: fix a NULL pointer BUG in nd_pmem_notify
libnvdimm, region: sysfs trigger for nvdimm_flush()
libnvdimm: fix phys_addr for nvdimm_clear_poison
x86, dax, pmem: remove indirection around memcpy_from_pmem()
block: remove block_device_operations ->direct_access()
block, dax: convert bdev_dax_supported() to dax_direct_access()
filesystem-dax: convert to dax_direct_access()
Revert "block: use DAX for partition table reads"
ext2, ext4, xfs: retrieve dax_device for iomap operations
...
This commit is contained in:
@@ -284,6 +284,7 @@ config CPM2
|
||||
config AXON_RAM
|
||||
tristate "Axon DDR2 memory device driver"
|
||||
depends on PPC_IBM_CELL_BLADE && BLOCK
|
||||
select DAX
|
||||
default m
|
||||
help
|
||||
It registers one block device per Axon's DDR2 memory bank found
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/dax.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/fs.h>
|
||||
@@ -62,6 +63,7 @@ static int azfs_major, azfs_minor;
|
||||
struct axon_ram_bank {
|
||||
struct platform_device *device;
|
||||
struct gendisk *disk;
|
||||
struct dax_device *dax_dev;
|
||||
unsigned int irq_id;
|
||||
unsigned long ph_addr;
|
||||
unsigned long io_addr;
|
||||
@@ -137,25 +139,32 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
|
||||
return BLK_QC_T_NONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* axon_ram_direct_access - direct_access() method for block device
|
||||
* @device, @sector, @data: see block_device_operations method
|
||||
*/
|
||||
static const struct block_device_operations axon_ram_devops = {
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
static long
|
||||
axon_ram_direct_access(struct block_device *device, sector_t sector,
|
||||
void **kaddr, pfn_t *pfn, long size)
|
||||
__axon_ram_direct_access(struct axon_ram_bank *bank, pgoff_t pgoff, long nr_pages,
|
||||
void **kaddr, pfn_t *pfn)
|
||||
{
|
||||
struct axon_ram_bank *bank = device->bd_disk->private_data;
|
||||
loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
|
||||
resource_size_t offset = pgoff * PAGE_SIZE;
|
||||
|
||||
*kaddr = (void *) bank->io_addr + offset;
|
||||
*pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
|
||||
return bank->size - offset;
|
||||
return (bank->size - offset) / PAGE_SIZE;
|
||||
}
|
||||
|
||||
static const struct block_device_operations axon_ram_devops = {
|
||||
.owner = THIS_MODULE,
|
||||
.direct_access = axon_ram_direct_access
|
||||
static long
|
||||
axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
|
||||
void **kaddr, pfn_t *pfn)
|
||||
{
|
||||
struct axon_ram_bank *bank = dax_get_private(dax_dev);
|
||||
|
||||
return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn);
|
||||
}
|
||||
|
||||
static const struct dax_operations axon_ram_dax_ops = {
|
||||
.direct_access = axon_ram_dax_direct_access,
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -219,6 +228,7 @@ static int axon_ram_probe(struct platform_device *device)
|
||||
goto failed;
|
||||
}
|
||||
|
||||
|
||||
bank->disk->major = azfs_major;
|
||||
bank->disk->first_minor = azfs_minor;
|
||||
bank->disk->fops = &axon_ram_devops;
|
||||
@@ -227,6 +237,13 @@ static int axon_ram_probe(struct platform_device *device)
|
||||
sprintf(bank->disk->disk_name, "%s%d",
|
||||
AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
|
||||
|
||||
bank->dax_dev = alloc_dax(bank, bank->disk->disk_name,
|
||||
&axon_ram_dax_ops);
|
||||
if (!bank->dax_dev) {
|
||||
rc = -ENOMEM;
|
||||
goto failed;
|
||||
}
|
||||
|
||||
bank->disk->queue = blk_alloc_queue(GFP_KERNEL);
|
||||
if (bank->disk->queue == NULL) {
|
||||
dev_err(&device->dev, "Cannot register disk queue\n");
|
||||
@@ -278,6 +295,8 @@ failed:
|
||||
del_gendisk(bank->disk);
|
||||
put_disk(bank->disk);
|
||||
}
|
||||
kill_dax(bank->dax_dev);
|
||||
put_dax(bank->dax_dev);
|
||||
device->dev.platform_data = NULL;
|
||||
if (bank->io_addr != 0)
|
||||
iounmap((void __iomem *) bank->io_addr);
|
||||
@@ -300,6 +319,8 @@ axon_ram_remove(struct platform_device *device)
|
||||
|
||||
device_remove_file(&device->dev, &dev_attr_ecc);
|
||||
free_irq(bank->irq_id, device);
|
||||
kill_dax(bank->dax_dev);
|
||||
put_dax(bank->dax_dev);
|
||||
del_gendisk(bank->disk);
|
||||
put_disk(bank->disk);
|
||||
iounmap((void __iomem *) bank->io_addr);
|
||||
|
||||
@@ -44,11 +44,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
|
||||
BUG();
|
||||
}
|
||||
|
||||
static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n)
|
||||
{
|
||||
return memcpy_mcsafe(dst, src, n);
|
||||
}
|
||||
|
||||
/**
|
||||
* arch_wb_cache_pmem - write back a cache range with CLWB
|
||||
* @vaddr: virtual start address
|
||||
|
||||
@@ -79,6 +79,7 @@ int strcmp(const char *cs, const char *ct);
|
||||
#define memset(s, c, n) __memset(s, c, n)
|
||||
#endif
|
||||
|
||||
#define __HAVE_ARCH_MEMCPY_MCSAFE 1
|
||||
__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt);
|
||||
DECLARE_STATIC_KEY_FALSE(mcsafe_key);
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ menuconfig BLOCK
|
||||
default y
|
||||
select SBITMAP
|
||||
select SRCU
|
||||
select DAX
|
||||
help
|
||||
Provide block layer support for the kernel.
|
||||
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
#include <linux/kmod.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/genhd.h>
|
||||
#include <linux/dax.h>
|
||||
#include <linux/blktrace_api.h>
|
||||
|
||||
#include "partitions/check.h"
|
||||
@@ -630,24 +629,12 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n)
|
||||
{
|
||||
struct address_space *mapping = bdev->bd_inode->i_mapping;
|
||||
|
||||
return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)),
|
||||
NULL);
|
||||
}
|
||||
|
||||
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
|
||||
{
|
||||
struct address_space *mapping = bdev->bd_inode->i_mapping;
|
||||
struct page *page;
|
||||
|
||||
/* don't populate page cache for dax capable devices */
|
||||
if (IS_DAX(bdev->bd_inode))
|
||||
page = read_dax_sector(bdev, n);
|
||||
else
|
||||
page = read_pagecache_sector(bdev, n);
|
||||
|
||||
page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), NULL);
|
||||
if (!IS_ERR(page)) {
|
||||
if (PageError(page))
|
||||
goto fail;
|
||||
|
||||
+1
-1
@@ -71,7 +71,7 @@ obj-$(CONFIG_PARPORT) += parport/
|
||||
obj-$(CONFIG_NVM) += lightnvm/
|
||||
obj-y += base/ block/ misc/ mfd/ nfc/
|
||||
obj-$(CONFIG_LIBNVDIMM) += nvdimm/
|
||||
obj-$(CONFIG_DEV_DAX) += dax/
|
||||
obj-$(CONFIG_DAX) += dax/
|
||||
obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
|
||||
obj-$(CONFIG_NUBUS) += nubus/
|
||||
obj-y += macintosh/
|
||||
|
||||
@@ -12,15 +12,3 @@ config ACPI_NFIT
|
||||
|
||||
To compile this driver as a module, choose M here:
|
||||
the module will be called nfit.
|
||||
|
||||
config ACPI_NFIT_DEBUG
|
||||
bool "NFIT DSM debug"
|
||||
depends on ACPI_NFIT
|
||||
depends on DYNAMIC_DEBUG
|
||||
default n
|
||||
help
|
||||
Enabling this option causes the nfit driver to dump the
|
||||
input and output buffers of _DSM operations on the ACPI0012
|
||||
device and its children. This can be very verbose, so leave
|
||||
it disabled unless you are debugging a hardware / firmware
|
||||
issue.
|
||||
|
||||
+166
-69
@@ -49,7 +49,16 @@ MODULE_PARM_DESC(scrub_overflow_abort,
|
||||
static bool disable_vendor_specific;
|
||||
module_param(disable_vendor_specific, bool, S_IRUGO);
|
||||
MODULE_PARM_DESC(disable_vendor_specific,
|
||||
"Limit commands to the publicly specified set\n");
|
||||
"Limit commands to the publicly specified set");
|
||||
|
||||
static unsigned long override_dsm_mask;
|
||||
module_param(override_dsm_mask, ulong, S_IRUGO);
|
||||
MODULE_PARM_DESC(override_dsm_mask, "Bitmask of allowed NVDIMM DSM functions");
|
||||
|
||||
static int default_dsm_family = -1;
|
||||
module_param(default_dsm_family, int, S_IRUGO);
|
||||
MODULE_PARM_DESC(default_dsm_family,
|
||||
"Try this DSM type first when identifying NVDIMM family");
|
||||
|
||||
LIST_HEAD(acpi_descs);
|
||||
DEFINE_MUTEX(acpi_desc_lock);
|
||||
@@ -175,14 +184,29 @@ static int xlat_bus_status(void *buf, unsigned int cmd, u32 status)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xlat_nvdimm_status(void *buf, unsigned int cmd, u32 status)
|
||||
{
|
||||
switch (cmd) {
|
||||
case ND_CMD_GET_CONFIG_SIZE:
|
||||
if (status >> 16 & ND_CONFIG_LOCKED)
|
||||
return -EACCES;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* all other non-zero status results in an error */
|
||||
if (status)
|
||||
return -EIO;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int xlat_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd,
|
||||
u32 status)
|
||||
{
|
||||
if (!nvdimm)
|
||||
return xlat_bus_status(buf, cmd, status);
|
||||
if (status)
|
||||
return -EIO;
|
||||
return 0;
|
||||
return xlat_nvdimm_status(buf, cmd, status);
|
||||
}
|
||||
|
||||
int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
|
||||
@@ -259,14 +283,11 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
|
||||
in_buf.buffer.length = call_pkg->nd_size_in;
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
|
||||
dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n",
|
||||
__func__, dimm_name, cmd, func,
|
||||
in_buf.buffer.length);
|
||||
print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4,
|
||||
dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n",
|
||||
__func__, dimm_name, cmd, func, in_buf.buffer.length);
|
||||
print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4,
|
||||
in_buf.buffer.pointer,
|
||||
min_t(u32, 256, in_buf.buffer.length), true);
|
||||
}
|
||||
|
||||
out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj);
|
||||
if (!out_obj) {
|
||||
@@ -298,13 +319,11 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
|
||||
dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__,
|
||||
dimm_name, cmd_name, out_obj->buffer.length);
|
||||
print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
|
||||
4, out_obj->buffer.pointer, min_t(u32, 128,
|
||||
out_obj->buffer.length), true);
|
||||
}
|
||||
dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__, dimm_name,
|
||||
cmd_name, out_obj->buffer.length);
|
||||
print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
|
||||
out_obj->buffer.pointer,
|
||||
min_t(u32, 128, out_obj->buffer.length), true);
|
||||
|
||||
for (i = 0, offset = 0; i < desc->out_num; i++) {
|
||||
u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
|
||||
@@ -448,9 +467,9 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
|
||||
INIT_LIST_HEAD(&nfit_memdev->list);
|
||||
memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev));
|
||||
list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
|
||||
dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d\n",
|
||||
dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d flags: %#x\n",
|
||||
__func__, memdev->device_handle, memdev->range_index,
|
||||
memdev->region_index);
|
||||
memdev->region_index, memdev->flags);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -729,28 +748,38 @@ static void nfit_mem_init_bdw(struct acpi_nfit_desc *acpi_desc,
|
||||
}
|
||||
}
|
||||
|
||||
static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
|
||||
static int __nfit_mem_init(struct acpi_nfit_desc *acpi_desc,
|
||||
struct acpi_nfit_system_address *spa)
|
||||
{
|
||||
struct nfit_mem *nfit_mem, *found;
|
||||
struct nfit_memdev *nfit_memdev;
|
||||
int type = nfit_spa_type(spa);
|
||||
int type = spa ? nfit_spa_type(spa) : 0;
|
||||
|
||||
switch (type) {
|
||||
case NFIT_SPA_DCR:
|
||||
case NFIT_SPA_PM:
|
||||
break;
|
||||
default:
|
||||
return 0;
|
||||
if (spa)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This loop runs in two modes, when a dimm is mapped the loop
|
||||
* adds memdev associations to an existing dimm, or creates a
|
||||
* dimm. In the unmapped dimm case this loop sweeps for memdev
|
||||
* instances with an invalid / zero range_index and adds those
|
||||
* dimms without spa associations.
|
||||
*/
|
||||
list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
|
||||
struct nfit_flush *nfit_flush;
|
||||
struct nfit_dcr *nfit_dcr;
|
||||
u32 device_handle;
|
||||
u16 dcr;
|
||||
|
||||
if (nfit_memdev->memdev->range_index != spa->range_index)
|
||||
if (spa && nfit_memdev->memdev->range_index != spa->range_index)
|
||||
continue;
|
||||
if (!spa && nfit_memdev->memdev->range_index)
|
||||
continue;
|
||||
found = NULL;
|
||||
dcr = nfit_memdev->memdev->region_index;
|
||||
@@ -835,14 +864,15 @@ static int nfit_mem_dcr_init(struct acpi_nfit_desc *acpi_desc,
|
||||
break;
|
||||
}
|
||||
nfit_mem_init_bdw(acpi_desc, nfit_mem, spa);
|
||||
} else {
|
||||
} else if (type == NFIT_SPA_PM) {
|
||||
/*
|
||||
* A single dimm may belong to multiple SPA-PM
|
||||
* ranges, record at least one in addition to
|
||||
* any SPA-DCR range.
|
||||
*/
|
||||
nfit_mem->memdev_pmem = nfit_memdev->memdev;
|
||||
}
|
||||
} else
|
||||
nfit_mem->memdev_dcr = nfit_memdev->memdev;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@@ -866,6 +896,8 @@ static int nfit_mem_cmp(void *priv, struct list_head *_a, struct list_head *_b)
|
||||
static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
|
||||
{
|
||||
struct nfit_spa *nfit_spa;
|
||||
int rc;
|
||||
|
||||
|
||||
/*
|
||||
* For each SPA-DCR or SPA-PMEM address range find its
|
||||
@@ -876,13 +908,20 @@ static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
|
||||
* BDWs are optional.
|
||||
*/
|
||||
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
|
||||
int rc;
|
||||
|
||||
rc = nfit_mem_dcr_init(acpi_desc, nfit_spa->spa);
|
||||
rc = __nfit_mem_init(acpi_desc, nfit_spa->spa);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a DIMM has failed to be mapped into SPA there will be no
|
||||
* SPA entries above. Find and register all the unmapped DIMMs
|
||||
* for reporting and recovery purposes.
|
||||
*/
|
||||
rc = __nfit_mem_init(acpi_desc, NULL);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
list_sort(NULL, &acpi_desc->dimms, nfit_mem_cmp);
|
||||
|
||||
return 0;
|
||||
@@ -1237,12 +1276,14 @@ static ssize_t flags_show(struct device *dev,
|
||||
{
|
||||
u16 flags = to_nfit_memdev(dev)->flags;
|
||||
|
||||
return sprintf(buf, "%s%s%s%s%s\n",
|
||||
return sprintf(buf, "%s%s%s%s%s%s%s\n",
|
||||
flags & ACPI_NFIT_MEM_SAVE_FAILED ? "save_fail " : "",
|
||||
flags & ACPI_NFIT_MEM_RESTORE_FAILED ? "restore_fail " : "",
|
||||
flags & ACPI_NFIT_MEM_FLUSH_FAILED ? "flush_fail " : "",
|
||||
flags & ACPI_NFIT_MEM_NOT_ARMED ? "not_armed " : "",
|
||||
flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart_event " : "");
|
||||
flags & ACPI_NFIT_MEM_HEALTH_OBSERVED ? "smart_event " : "",
|
||||
flags & ACPI_NFIT_MEM_MAP_FAILED ? "map_fail " : "",
|
||||
flags & ACPI_NFIT_MEM_HEALTH_ENABLED ? "smart_notify " : "");
|
||||
}
|
||||
static DEVICE_ATTR_RO(flags);
|
||||
|
||||
@@ -1290,8 +1331,16 @@ static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
|
||||
struct device *dev = container_of(kobj, struct device, kobj);
|
||||
struct nvdimm *nvdimm = to_nvdimm(dev);
|
||||
|
||||
if (!to_nfit_dcr(dev))
|
||||
if (!to_nfit_dcr(dev)) {
|
||||
/* Without a dcr only the memdev attributes can be surfaced */
|
||||
if (a == &dev_attr_handle.attr || a == &dev_attr_phys_id.attr
|
||||
|| a == &dev_attr_flags.attr
|
||||
|| a == &dev_attr_family.attr
|
||||
|| a == &dev_attr_dsm_mask.attr)
|
||||
return a->mode;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (a == &dev_attr_format1.attr && num_nvdimm_formats(nvdimm) <= 1)
|
||||
return 0;
|
||||
return a->mode;
|
||||
@@ -1368,6 +1417,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
|
||||
unsigned long dsm_mask;
|
||||
const u8 *uuid;
|
||||
int i;
|
||||
int family = -1;
|
||||
|
||||
/* nfit test assumes 1:1 relationship between commands and dsms */
|
||||
nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en;
|
||||
@@ -1398,11 +1448,14 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
|
||||
*/
|
||||
for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_MSFT; i++)
|
||||
if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1))
|
||||
break;
|
||||
if (family < 0 || i == default_dsm_family)
|
||||
family = i;
|
||||
|
||||
/* limit the supported commands to those that are publicly documented */
|
||||
nfit_mem->family = i;
|
||||
if (nfit_mem->family == NVDIMM_FAMILY_INTEL) {
|
||||
nfit_mem->family = family;
|
||||
if (override_dsm_mask && !disable_vendor_specific)
|
||||
dsm_mask = override_dsm_mask;
|
||||
else if (nfit_mem->family == NVDIMM_FAMILY_INTEL) {
|
||||
dsm_mask = 0x3fe;
|
||||
if (disable_vendor_specific)
|
||||
dsm_mask &= ~(1 << ND_CMD_VENDOR);
|
||||
@@ -1462,6 +1515,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
|
||||
list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
|
||||
struct acpi_nfit_flush_address *flush;
|
||||
unsigned long flags = 0, cmd_mask;
|
||||
struct nfit_memdev *nfit_memdev;
|
||||
u32 device_handle;
|
||||
u16 mem_flags;
|
||||
|
||||
@@ -1473,11 +1527,22 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
|
||||
}
|
||||
|
||||
if (nfit_mem->bdw && nfit_mem->memdev_pmem)
|
||||
flags |= NDD_ALIASING;
|
||||
set_bit(NDD_ALIASING, &flags);
|
||||
|
||||
/* collate flags across all memdevs for this dimm */
|
||||
list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
|
||||
struct acpi_nfit_memory_map *dimm_memdev;
|
||||
|
||||
dimm_memdev = __to_nfit_memdev(nfit_mem);
|
||||
if (dimm_memdev->device_handle
|
||||
!= nfit_memdev->memdev->device_handle)
|
||||
continue;
|
||||
dimm_memdev->flags |= nfit_memdev->memdev->flags;
|
||||
}
|
||||
|
||||
mem_flags = __to_nfit_memdev(nfit_mem)->flags;
|
||||
if (mem_flags & ACPI_NFIT_MEM_NOT_ARMED)
|
||||
flags |= NDD_UNARMED;
|
||||
set_bit(NDD_UNARMED, &flags);
|
||||
|
||||
rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle);
|
||||
if (rc)
|
||||
@@ -1507,12 +1572,13 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
|
||||
if ((mem_flags & ACPI_NFIT_MEM_FAILED_MASK) == 0)
|
||||
continue;
|
||||
|
||||
dev_info(acpi_desc->dev, "%s flags:%s%s%s%s\n",
|
||||
dev_info(acpi_desc->dev, "%s flags:%s%s%s%s%s\n",
|
||||
nvdimm_name(nvdimm),
|
||||
mem_flags & ACPI_NFIT_MEM_SAVE_FAILED ? " save_fail" : "",
|
||||
mem_flags & ACPI_NFIT_MEM_RESTORE_FAILED ? " restore_fail":"",
|
||||
mem_flags & ACPI_NFIT_MEM_FLUSH_FAILED ? " flush_fail" : "",
|
||||
mem_flags & ACPI_NFIT_MEM_NOT_ARMED ? " not_armed" : "");
|
||||
mem_flags & ACPI_NFIT_MEM_NOT_ARMED ? " not_armed" : "",
|
||||
mem_flags & ACPI_NFIT_MEM_MAP_FAILED ? " map_fail" : "");
|
||||
|
||||
}
|
||||
|
||||
@@ -1783,8 +1849,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
|
||||
mmio_flush_range((void __force *)
|
||||
mmio->addr.aperture + offset, c);
|
||||
|
||||
memcpy_from_pmem(iobuf + copied,
|
||||
mmio->addr.aperture + offset, c);
|
||||
memcpy(iobuf + copied, mmio->addr.aperture + offset, c);
|
||||
}
|
||||
|
||||
copied += c;
|
||||
@@ -2525,6 +2590,7 @@ static void acpi_nfit_scrub(struct work_struct *work)
|
||||
acpi_nfit_register_region(acpi_desc, nfit_spa);
|
||||
}
|
||||
}
|
||||
acpi_desc->init_complete = 1;
|
||||
|
||||
list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
|
||||
acpi_nfit_async_scrub(acpi_desc, nfit_spa);
|
||||
@@ -2547,7 +2613,8 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
|
||||
return rc;
|
||||
}
|
||||
|
||||
queue_work(nfit_wq, &acpi_desc->work);
|
||||
if (!acpi_desc->cancel)
|
||||
queue_work(nfit_wq, &acpi_desc->work);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2593,32 +2660,11 @@ static int acpi_nfit_desc_init_scrub_attr(struct acpi_nfit_desc *acpi_desc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void acpi_nfit_destruct(void *data)
|
||||
static void acpi_nfit_unregister(void *data)
|
||||
{
|
||||
struct acpi_nfit_desc *acpi_desc = data;
|
||||
struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
|
||||
|
||||
/*
|
||||
* Destruct under acpi_desc_lock so that nfit_handle_mce does not
|
||||
* race teardown
|
||||
*/
|
||||
mutex_lock(&acpi_desc_lock);
|
||||
acpi_desc->cancel = 1;
|
||||
/*
|
||||
* Bounce the nvdimm bus lock to make sure any in-flight
|
||||
* acpi_nfit_ars_rescan() submissions have had a chance to
|
||||
* either submit or see ->cancel set.
|
||||
*/
|
||||
device_lock(bus_dev);
|
||||
device_unlock(bus_dev);
|
||||
|
||||
flush_workqueue(nfit_wq);
|
||||
if (acpi_desc->scrub_count_state)
|
||||
sysfs_put(acpi_desc->scrub_count_state);
|
||||
nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
|
||||
acpi_desc->nvdimm_bus = NULL;
|
||||
list_del(&acpi_desc->list);
|
||||
mutex_unlock(&acpi_desc_lock);
|
||||
}
|
||||
|
||||
int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
|
||||
@@ -2636,7 +2682,7 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
|
||||
if (!acpi_desc->nvdimm_bus)
|
||||
return -ENOMEM;
|
||||
|
||||
rc = devm_add_action_or_reset(dev, acpi_nfit_destruct,
|
||||
rc = devm_add_action_or_reset(dev, acpi_nfit_unregister,
|
||||
acpi_desc);
|
||||
if (rc)
|
||||
return rc;
|
||||
@@ -2728,6 +2774,13 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
|
||||
device_lock(dev);
|
||||
device_unlock(dev);
|
||||
|
||||
/* bounce the init_mutex to make init_complete valid */
|
||||
mutex_lock(&acpi_desc->init_mutex);
|
||||
if (acpi_desc->cancel || acpi_desc->init_complete) {
|
||||
mutex_unlock(&acpi_desc->init_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Scrub work could take 10s of seconds, userspace may give up so we
|
||||
* need to be interruptible while waiting.
|
||||
@@ -2735,6 +2788,7 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
|
||||
INIT_WORK_ONSTACK(&flush.work, flush_probe);
|
||||
COMPLETION_INITIALIZER_ONSTACK(flush.cmp);
|
||||
queue_work(nfit_wq, &flush.work);
|
||||
mutex_unlock(&acpi_desc->init_mutex);
|
||||
|
||||
rc = wait_for_completion_interruptible(&flush.cmp);
|
||||
cancel_work_sync(&flush.work);
|
||||
@@ -2771,10 +2825,12 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc)
|
||||
if (work_busy(&acpi_desc->work))
|
||||
return -EBUSY;
|
||||
|
||||
if (acpi_desc->cancel)
|
||||
return 0;
|
||||
|
||||
mutex_lock(&acpi_desc->init_mutex);
|
||||
if (acpi_desc->cancel) {
|
||||
mutex_unlock(&acpi_desc->init_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
|
||||
struct acpi_nfit_system_address *spa = nfit_spa->spa;
|
||||
|
||||
@@ -2818,6 +2874,40 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
|
||||
|
||||
static void acpi_nfit_put_table(void *table)
|
||||
{
|
||||
acpi_put_table(table);
|
||||
}
|
||||
|
||||
void acpi_nfit_shutdown(void *data)
|
||||
{
|
||||
struct acpi_nfit_desc *acpi_desc = data;
|
||||
struct device *bus_dev = to_nvdimm_bus_dev(acpi_desc->nvdimm_bus);
|
||||
|
||||
/*
|
||||
* Destruct under acpi_desc_lock so that nfit_handle_mce does not
|
||||
* race teardown
|
||||
*/
|
||||
mutex_lock(&acpi_desc_lock);
|
||||
list_del(&acpi_desc->list);
|
||||
mutex_unlock(&acpi_desc_lock);
|
||||
|
||||
mutex_lock(&acpi_desc->init_mutex);
|
||||
acpi_desc->cancel = 1;
|
||||
mutex_unlock(&acpi_desc->init_mutex);
|
||||
|
||||
/*
|
||||
* Bounce the nvdimm bus lock to make sure any in-flight
|
||||
* acpi_nfit_ars_rescan() submissions have had a chance to
|
||||
* either submit or see ->cancel set.
|
||||
*/
|
||||
device_lock(bus_dev);
|
||||
device_unlock(bus_dev);
|
||||
|
||||
flush_workqueue(nfit_wq);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(acpi_nfit_shutdown);
|
||||
|
||||
static int acpi_nfit_add(struct acpi_device *adev)
|
||||
{
|
||||
struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
|
||||
@@ -2834,6 +2924,10 @@ static int acpi_nfit_add(struct acpi_device *adev)
|
||||
dev_dbg(dev, "failed to find NFIT at startup\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
rc = devm_add_action_or_reset(dev, acpi_nfit_put_table, tbl);
|
||||
if (rc)
|
||||
return rc;
|
||||
sz = tbl->length;
|
||||
|
||||
acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
|
||||
@@ -2861,12 +2955,15 @@ static int acpi_nfit_add(struct acpi_device *adev)
|
||||
rc = acpi_nfit_init(acpi_desc, (void *) tbl
|
||||
+ sizeof(struct acpi_table_nfit),
|
||||
sz - sizeof(struct acpi_table_nfit));
|
||||
return rc;
|
||||
|
||||
if (rc)
|
||||
return rc;
|
||||
return devm_add_action_or_reset(dev, acpi_nfit_shutdown, acpi_desc);
|
||||
}
|
||||
|
||||
static int acpi_nfit_remove(struct acpi_device *adev)
|
||||
{
|
||||
/* see acpi_nfit_destruct */
|
||||
/* see acpi_nfit_unregister */
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
|
||||
#define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \
|
||||
| ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \
|
||||
| ACPI_NFIT_MEM_NOT_ARMED)
|
||||
| ACPI_NFIT_MEM_NOT_ARMED | ACPI_NFIT_MEM_MAP_FAILED)
|
||||
|
||||
enum nfit_uuids {
|
||||
/* for simplicity alias the uuid index with the family id */
|
||||
@@ -163,6 +163,7 @@ struct acpi_nfit_desc {
|
||||
unsigned int scrub_count;
|
||||
unsigned int scrub_mode;
|
||||
unsigned int cancel:1;
|
||||
unsigned int init_complete:1;
|
||||
unsigned long dimm_cmd_force_en;
|
||||
unsigned long bus_cmd_force_en;
|
||||
int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
|
||||
@@ -238,6 +239,7 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
|
||||
|
||||
const u8 *to_nfit_uuid(enum nfit_uuids id);
|
||||
int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
|
||||
void acpi_nfit_shutdown(void *data);
|
||||
void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event);
|
||||
void __acpi_nvdimm_notify(struct device *dev, u32 event);
|
||||
int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
|
||||
|
||||
@@ -323,6 +323,7 @@ config BLK_DEV_SX8
|
||||
|
||||
config BLK_DEV_RAM
|
||||
tristate "RAM block device support"
|
||||
select DAX if BLK_DEV_RAM_DAX
|
||||
---help---
|
||||
Saying Y here will allow you to use a portion of your RAM memory as
|
||||
a block device, so that you can make file systems on it, read and
|
||||
|
||||
+37
-11
@@ -21,6 +21,7 @@
|
||||
#include <linux/slab.h>
|
||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||
#include <linux/pfn_t.h>
|
||||
#include <linux/dax.h>
|
||||
#endif
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
@@ -41,6 +42,9 @@ struct brd_device {
|
||||
|
||||
struct request_queue *brd_queue;
|
||||
struct gendisk *brd_disk;
|
||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||
struct dax_device *dax_dev;
|
||||
#endif
|
||||
struct list_head brd_list;
|
||||
|
||||
/*
|
||||
@@ -326,30 +330,38 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||
static long brd_direct_access(struct block_device *bdev, sector_t sector,
|
||||
void **kaddr, pfn_t *pfn, long size)
|
||||
static long __brd_direct_access(struct brd_device *brd, pgoff_t pgoff,
|
||||
long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
{
|
||||
struct brd_device *brd = bdev->bd_disk->private_data;
|
||||
struct page *page;
|
||||
|
||||
if (!brd)
|
||||
return -ENODEV;
|
||||
page = brd_insert_page(brd, sector);
|
||||
page = brd_insert_page(brd, PFN_PHYS(pgoff) / 512);
|
||||
if (!page)
|
||||
return -ENOSPC;
|
||||
*kaddr = page_address(page);
|
||||
*pfn = page_to_pfn_t(page);
|
||||
|
||||
return PAGE_SIZE;
|
||||
return 1;
|
||||
}
|
||||
#else
|
||||
#define brd_direct_access NULL
|
||||
|
||||
static long brd_dax_direct_access(struct dax_device *dax_dev,
|
||||
pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
|
||||
{
|
||||
struct brd_device *brd = dax_get_private(dax_dev);
|
||||
|
||||
return __brd_direct_access(brd, pgoff, nr_pages, kaddr, pfn);
|
||||
}
|
||||
|
||||
static const struct dax_operations brd_dax_ops = {
|
||||
.direct_access = brd_dax_direct_access,
|
||||
};
|
||||
#endif
|
||||
|
||||
static const struct block_device_operations brd_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.rw_page = brd_rw_page,
|
||||
.direct_access = brd_direct_access,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -415,9 +427,6 @@ static struct brd_device *brd_alloc(int i)
|
||||
* is harmless)
|
||||
*/
|
||||
blk_queue_physical_block_size(brd->brd_queue, PAGE_SIZE);
|
||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue);
|
||||
#endif
|
||||
disk = brd->brd_disk = alloc_disk(max_part);
|
||||
if (!disk)
|
||||
goto out_free_queue;
|
||||
@@ -430,8 +439,21 @@ static struct brd_device *brd_alloc(int i)
|
||||
sprintf(disk->disk_name, "ram%d", i);
|
||||
set_capacity(disk, rd_size * 2);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue);
|
||||
brd->dax_dev = alloc_dax(brd, disk->disk_name, &brd_dax_ops);
|
||||
if (!brd->dax_dev)
|
||||
goto out_free_inode;
|
||||
#endif
|
||||
|
||||
|
||||
return brd;
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||
out_free_inode:
|
||||
kill_dax(brd->dax_dev);
|
||||
put_dax(brd->dax_dev);
|
||||
#endif
|
||||
out_free_queue:
|
||||
blk_cleanup_queue(brd->brd_queue);
|
||||
out_free_dev:
|
||||
@@ -471,6 +493,10 @@ out:
|
||||
static void brd_del_one(struct brd_device *brd)
|
||||
{
|
||||
list_del(&brd->brd_list);
|
||||
#ifdef CONFIG_BLK_DEV_RAM_DAX
|
||||
kill_dax(brd->dax_dev);
|
||||
put_dax(brd->dax_dev);
|
||||
#endif
|
||||
del_gendisk(brd->brd_disk);
|
||||
brd_free(brd);
|
||||
}
|
||||
|
||||
+8
-4
@@ -1,8 +1,13 @@
|
||||
menuconfig DEV_DAX
|
||||
menuconfig DAX
|
||||
tristate "DAX: direct access to differentiated memory"
|
||||
default m if NVDIMM_DAX
|
||||
depends on TRANSPARENT_HUGEPAGE
|
||||
select SRCU
|
||||
default m if NVDIMM_DAX
|
||||
|
||||
if DAX
|
||||
|
||||
config DEV_DAX
|
||||
tristate "Device DAX: direct access mapping device"
|
||||
depends on TRANSPARENT_HUGEPAGE
|
||||
help
|
||||
Support raw access to differentiated (persistence, bandwidth,
|
||||
latency...) memory via an mmap(2) capable character
|
||||
@@ -11,7 +16,6 @@ menuconfig DEV_DAX
|
||||
baseline memory pool. Mappings of a /dev/daxX.Y device impose
|
||||
restrictions that make the mapping behavior deterministic.
|
||||
|
||||
if DEV_DAX
|
||||
|
||||
config DEV_DAX_PMEM
|
||||
tristate "PMEM DAX: direct access to persistent memory"
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
obj-$(CONFIG_DEV_DAX) += dax.o
|
||||
obj-$(CONFIG_DAX) += dax.o
|
||||
obj-$(CONFIG_DEV_DAX) += device_dax.o
|
||||
obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
|
||||
|
||||
dax-y := super.o
|
||||
dax_pmem-y := pmem.o
|
||||
device_dax-y := device.o
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright(c) 2016 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#ifndef __DAX_PRIVATE_H__
|
||||
#define __DAX_PRIVATE_H__
|
||||
|
||||
#include <linux/device.h>
|
||||
#include <linux/cdev.h>
|
||||
|
||||
/**
|
||||
* struct dax_region - mapping infrastructure for dax devices
|
||||
* @id: kernel-wide unique region for a memory range
|
||||
* @base: linear address corresponding to @res
|
||||
* @kref: to pin while other agents have a need to do lookups
|
||||
* @dev: parent device backing this region
|
||||
* @align: allocation and mapping alignment for child dax devices
|
||||
* @res: physical address range of the region
|
||||
* @pfn_flags: identify whether the pfns are paged back or not
|
||||
*/
|
||||
struct dax_region {
|
||||
int id;
|
||||
struct ida ida;
|
||||
void *base;
|
||||
struct kref kref;
|
||||
struct device *dev;
|
||||
unsigned int align;
|
||||
struct resource res;
|
||||
unsigned long pfn_flags;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct dev_dax - instance data for a subdivision of a dax region
|
||||
* @region - parent region
|
||||
* @dax_dev - core dax functionality
|
||||
* @dev - device core
|
||||
* @id - child id in the region
|
||||
* @num_resources - number of physical address extents in this device
|
||||
* @res - array of physical address ranges
|
||||
*/
|
||||
struct dev_dax {
|
||||
struct dax_region *region;
|
||||
struct dax_device *dax_dev;
|
||||
struct device dev;
|
||||
int id;
|
||||
int num_resources;
|
||||
struct resource res[0];
|
||||
};
|
||||
#endif
|
||||
+4
-11
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright(c) 2016 Intel Corporation. All rights reserved.
|
||||
* Copyright(c) 2016 - 2017 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
@@ -12,14 +12,7 @@
|
||||
*/
|
||||
#ifndef __DAX_H__
|
||||
#define __DAX_H__
|
||||
struct device;
|
||||
struct dax_dev;
|
||||
struct resource;
|
||||
struct dax_region;
|
||||
void dax_region_put(struct dax_region *dax_region);
|
||||
struct dax_region *alloc_dax_region(struct device *parent,
|
||||
int region_id, struct resource *res, unsigned int align,
|
||||
void *addr, unsigned long flags);
|
||||
struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region,
|
||||
struct resource *res, int count);
|
||||
struct dax_device;
|
||||
struct dax_device *inode_dax(struct inode *inode);
|
||||
struct inode *dax_inode(struct dax_device *dax_dev);
|
||||
#endif /* __DAX_H__ */
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright(c) 2016 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#ifndef __DEVICE_DAX_H__
|
||||
#define __DEVICE_DAX_H__
|
||||
struct device;
|
||||
struct dev_dax;
|
||||
struct resource;
|
||||
struct dax_region;
|
||||
void dax_region_put(struct dax_region *dax_region);
|
||||
struct dax_region *alloc_dax_region(struct device *parent,
|
||||
int region_id, struct resource *res, unsigned int align,
|
||||
void *addr, unsigned long flags);
|
||||
struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region,
|
||||
struct resource *res, int count);
|
||||
#endif /* __DEVICE_DAX_H__ */
|
||||
File diff suppressed because it is too large
Load Diff
+5
-5
@@ -16,7 +16,7 @@
|
||||
#include <linux/pfn_t.h>
|
||||
#include "../nvdimm/pfn.h"
|
||||
#include "../nvdimm/nd.h"
|
||||
#include "dax.h"
|
||||
#include "device-dax.h"
|
||||
|
||||
struct dax_pmem {
|
||||
struct device *dev;
|
||||
@@ -61,8 +61,8 @@ static int dax_pmem_probe(struct device *dev)
|
||||
int rc;
|
||||
void *addr;
|
||||
struct resource res;
|
||||
struct dax_dev *dax_dev;
|
||||
struct nd_pfn_sb *pfn_sb;
|
||||
struct dev_dax *dev_dax;
|
||||
struct dax_pmem *dax_pmem;
|
||||
struct nd_region *nd_region;
|
||||
struct nd_namespace_io *nsio;
|
||||
@@ -130,12 +130,12 @@ static int dax_pmem_probe(struct device *dev)
|
||||
return -ENOMEM;
|
||||
|
||||
/* TODO: support for subdividing a dax region... */
|
||||
dax_dev = devm_create_dax_dev(dax_region, &res, 1);
|
||||
dev_dax = devm_create_dev_dax(dax_region, &res, 1);
|
||||
|
||||
/* child dax_dev instances now own the lifetime of the dax_region */
|
||||
/* child dev_dax instances now own the lifetime of the dax_region */
|
||||
dax_region_put(dax_region);
|
||||
|
||||
return PTR_ERR_OR_ZERO(dax_dev);
|
||||
return PTR_ERR_OR_ZERO(dev_dax);
|
||||
}
|
||||
|
||||
static struct nd_device_driver dax_pmem_driver = {
|
||||
|
||||
@@ -0,0 +1,425 @@
|
||||
/*
|
||||
* Copyright(c) 2017 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mount.h>
|
||||
#include <linux/magic.h>
|
||||
#include <linux/cdev.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/dax.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
static int nr_dax = CONFIG_NR_DEV_DAX;
|
||||
module_param(nr_dax, int, S_IRUGO);
|
||||
MODULE_PARM_DESC(nr_dax, "max number of dax device instances");
|
||||
|
||||
static dev_t dax_devt;
|
||||
DEFINE_STATIC_SRCU(dax_srcu);
|
||||
static struct vfsmount *dax_mnt;
|
||||
static DEFINE_IDA(dax_minor_ida);
|
||||
static struct kmem_cache *dax_cache __read_mostly;
|
||||
static struct super_block *dax_superblock __read_mostly;
|
||||
|
||||
#define DAX_HASH_SIZE (PAGE_SIZE / sizeof(struct hlist_head))
|
||||
static struct hlist_head dax_host_list[DAX_HASH_SIZE];
|
||||
static DEFINE_SPINLOCK(dax_host_lock);
|
||||
|
||||
int dax_read_lock(void)
|
||||
{
|
||||
return srcu_read_lock(&dax_srcu);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_read_lock);
|
||||
|
||||
void dax_read_unlock(int id)
|
||||
{
|
||||
srcu_read_unlock(&dax_srcu, id);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_read_unlock);
|
||||
|
||||
/**
|
||||
* struct dax_device - anchor object for dax services
|
||||
* @inode: core vfs
|
||||
* @cdev: optional character interface for "device dax"
|
||||
* @host: optional name for lookups where the device path is not available
|
||||
* @private: dax driver private data
|
||||
* @alive: !alive + rcu grace period == no new operations / mappings
|
||||
*/
|
||||
struct dax_device {
|
||||
struct hlist_node list;
|
||||
struct inode inode;
|
||||
struct cdev cdev;
|
||||
const char *host;
|
||||
void *private;
|
||||
bool alive;
|
||||
const struct dax_operations *ops;
|
||||
};
|
||||
|
||||
/**
|
||||
* dax_direct_access() - translate a device pgoff to an absolute pfn
|
||||
* @dax_dev: a dax_device instance representing the logical memory range
|
||||
* @pgoff: offset in pages from the start of the device to translate
|
||||
* @nr_pages: number of consecutive pages caller can handle relative to @pfn
|
||||
* @kaddr: output parameter that returns a virtual address mapping of pfn
|
||||
* @pfn: output parameter that returns an absolute pfn translation of @pgoff
|
||||
*
|
||||
* Return: negative errno if an error occurs, otherwise the number of
|
||||
* pages accessible at the device relative @pgoff.
|
||||
*/
|
||||
long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
|
||||
void **kaddr, pfn_t *pfn)
|
||||
{
|
||||
long avail;
|
||||
|
||||
/*
|
||||
* The device driver is allowed to sleep, in order to make the
|
||||
* memory directly accessible.
|
||||
*/
|
||||
might_sleep();
|
||||
|
||||
if (!dax_dev)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (!dax_alive(dax_dev))
|
||||
return -ENXIO;
|
||||
|
||||
if (nr_pages < 0)
|
||||
return nr_pages;
|
||||
|
||||
avail = dax_dev->ops->direct_access(dax_dev, pgoff, nr_pages,
|
||||
kaddr, pfn);
|
||||
if (!avail)
|
||||
return -ERANGE;
|
||||
return min(avail, nr_pages);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_direct_access);
|
||||
|
||||
bool dax_alive(struct dax_device *dax_dev)
|
||||
{
|
||||
lockdep_assert_held(&dax_srcu);
|
||||
return dax_dev->alive;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_alive);
|
||||
|
||||
static int dax_host_hash(const char *host)
|
||||
{
|
||||
return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note, rcu is not protecting the liveness of dax_dev, rcu is ensuring
|
||||
* that any fault handlers or operations that might have seen
|
||||
* dax_alive(), have completed. Any operations that start after
|
||||
* synchronize_srcu() has run will abort upon seeing !dax_alive().
|
||||
*/
|
||||
void kill_dax(struct dax_device *dax_dev)
|
||||
{
|
||||
if (!dax_dev)
|
||||
return;
|
||||
|
||||
dax_dev->alive = false;
|
||||
|
||||
synchronize_srcu(&dax_srcu);
|
||||
|
||||
spin_lock(&dax_host_lock);
|
||||
hlist_del_init(&dax_dev->list);
|
||||
spin_unlock(&dax_host_lock);
|
||||
|
||||
dax_dev->private = NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kill_dax);
|
||||
|
||||
static struct inode *dax_alloc_inode(struct super_block *sb)
|
||||
{
|
||||
struct dax_device *dax_dev;
|
||||
|
||||
dax_dev = kmem_cache_alloc(dax_cache, GFP_KERNEL);
|
||||
return &dax_dev->inode;
|
||||
}
|
||||
|
||||
static struct dax_device *to_dax_dev(struct inode *inode)
|
||||
{
|
||||
return container_of(inode, struct dax_device, inode);
|
||||
}
|
||||
|
||||
static void dax_i_callback(struct rcu_head *head)
|
||||
{
|
||||
struct inode *inode = container_of(head, struct inode, i_rcu);
|
||||
struct dax_device *dax_dev = to_dax_dev(inode);
|
||||
|
||||
kfree(dax_dev->host);
|
||||
dax_dev->host = NULL;
|
||||
ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev));
|
||||
kmem_cache_free(dax_cache, dax_dev);
|
||||
}
|
||||
|
||||
static void dax_destroy_inode(struct inode *inode)
|
||||
{
|
||||
struct dax_device *dax_dev = to_dax_dev(inode);
|
||||
|
||||
WARN_ONCE(dax_dev->alive,
|
||||
"kill_dax() must be called before final iput()\n");
|
||||
call_rcu(&inode->i_rcu, dax_i_callback);
|
||||
}
|
||||
|
||||
static const struct super_operations dax_sops = {
|
||||
.statfs = simple_statfs,
|
||||
.alloc_inode = dax_alloc_inode,
|
||||
.destroy_inode = dax_destroy_inode,
|
||||
.drop_inode = generic_delete_inode,
|
||||
};
|
||||
|
||||
static struct dentry *dax_mount(struct file_system_type *fs_type,
|
||||
int flags, const char *dev_name, void *data)
|
||||
{
|
||||
return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC);
|
||||
}
|
||||
|
||||
static struct file_system_type dax_fs_type = {
|
||||
.name = "dax",
|
||||
.mount = dax_mount,
|
||||
.kill_sb = kill_anon_super,
|
||||
};
|
||||
|
||||
static int dax_test(struct inode *inode, void *data)
|
||||
{
|
||||
dev_t devt = *(dev_t *) data;
|
||||
|
||||
return inode->i_rdev == devt;
|
||||
}
|
||||
|
||||
static int dax_set(struct inode *inode, void *data)
|
||||
{
|
||||
dev_t devt = *(dev_t *) data;
|
||||
|
||||
inode->i_rdev = devt;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct dax_device *dax_dev_get(dev_t devt)
|
||||
{
|
||||
struct dax_device *dax_dev;
|
||||
struct inode *inode;
|
||||
|
||||
inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31),
|
||||
dax_test, dax_set, &devt);
|
||||
|
||||
if (!inode)
|
||||
return NULL;
|
||||
|
||||
dax_dev = to_dax_dev(inode);
|
||||
if (inode->i_state & I_NEW) {
|
||||
dax_dev->alive = true;
|
||||
inode->i_cdev = &dax_dev->cdev;
|
||||
inode->i_mode = S_IFCHR;
|
||||
inode->i_flags = S_DAX;
|
||||
mapping_set_gfp_mask(&inode->i_data, GFP_USER);
|
||||
unlock_new_inode(inode);
|
||||
}
|
||||
|
||||
return dax_dev;
|
||||
}
|
||||
|
||||
static void dax_add_host(struct dax_device *dax_dev, const char *host)
|
||||
{
|
||||
int hash;
|
||||
|
||||
/*
|
||||
* Unconditionally init dax_dev since it's coming from a
|
||||
* non-zeroed slab cache
|
||||
*/
|
||||
INIT_HLIST_NODE(&dax_dev->list);
|
||||
dax_dev->host = host;
|
||||
if (!host)
|
||||
return;
|
||||
|
||||
hash = dax_host_hash(host);
|
||||
spin_lock(&dax_host_lock);
|
||||
hlist_add_head(&dax_dev->list, &dax_host_list[hash]);
|
||||
spin_unlock(&dax_host_lock);
|
||||
}
|
||||
|
||||
struct dax_device *alloc_dax(void *private, const char *__host,
|
||||
const struct dax_operations *ops)
|
||||
{
|
||||
struct dax_device *dax_dev;
|
||||
const char *host;
|
||||
dev_t devt;
|
||||
int minor;
|
||||
|
||||
host = kstrdup(__host, GFP_KERNEL);
|
||||
if (__host && !host)
|
||||
return NULL;
|
||||
|
||||
minor = ida_simple_get(&dax_minor_ida, 0, nr_dax, GFP_KERNEL);
|
||||
if (minor < 0)
|
||||
goto err_minor;
|
||||
|
||||
devt = MKDEV(MAJOR(dax_devt), minor);
|
||||
dax_dev = dax_dev_get(devt);
|
||||
if (!dax_dev)
|
||||
goto err_dev;
|
||||
|
||||
dax_add_host(dax_dev, host);
|
||||
dax_dev->ops = ops;
|
||||
dax_dev->private = private;
|
||||
return dax_dev;
|
||||
|
||||
err_dev:
|
||||
ida_simple_remove(&dax_minor_ida, minor);
|
||||
err_minor:
|
||||
kfree(host);
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(alloc_dax);
|
||||
|
||||
void put_dax(struct dax_device *dax_dev)
|
||||
{
|
||||
if (!dax_dev)
|
||||
return;
|
||||
iput(&dax_dev->inode);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(put_dax);
|
||||
|
||||
/**
|
||||
* dax_get_by_host() - temporary lookup mechanism for filesystem-dax
|
||||
* @host: alternate name for the device registered by a dax driver
|
||||
*/
|
||||
struct dax_device *dax_get_by_host(const char *host)
|
||||
{
|
||||
struct dax_device *dax_dev, *found = NULL;
|
||||
int hash, id;
|
||||
|
||||
if (!host)
|
||||
return NULL;
|
||||
|
||||
hash = dax_host_hash(host);
|
||||
|
||||
id = dax_read_lock();
|
||||
spin_lock(&dax_host_lock);
|
||||
hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) {
|
||||
if (!dax_alive(dax_dev)
|
||||
|| strcmp(host, dax_dev->host) != 0)
|
||||
continue;
|
||||
|
||||
if (igrab(&dax_dev->inode))
|
||||
found = dax_dev;
|
||||
break;
|
||||
}
|
||||
spin_unlock(&dax_host_lock);
|
||||
dax_read_unlock(id);
|
||||
|
||||
return found;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_get_by_host);
|
||||
|
||||
/**
|
||||
* inode_dax: convert a public inode into its dax_dev
|
||||
* @inode: An inode with i_cdev pointing to a dax_dev
|
||||
*
|
||||
* Note this is not equivalent to to_dax_dev() which is for private
|
||||
* internal use where we know the inode filesystem type == dax_fs_type.
|
||||
*/
|
||||
struct dax_device *inode_dax(struct inode *inode)
|
||||
{
|
||||
struct cdev *cdev = inode->i_cdev;
|
||||
|
||||
return container_of(cdev, struct dax_device, cdev);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(inode_dax);
|
||||
|
||||
struct inode *dax_inode(struct dax_device *dax_dev)
|
||||
{
|
||||
return &dax_dev->inode;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_inode);
|
||||
|
||||
void *dax_get_private(struct dax_device *dax_dev)
|
||||
{
|
||||
return dax_dev->private;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dax_get_private);
|
||||
|
||||
static void init_once(void *_dax_dev)
|
||||
{
|
||||
struct dax_device *dax_dev = _dax_dev;
|
||||
struct inode *inode = &dax_dev->inode;
|
||||
|
||||
inode_init_once(inode);
|
||||
}
|
||||
|
||||
static int __dax_fs_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
dax_cache = kmem_cache_create("dax_cache", sizeof(struct dax_device), 0,
|
||||
(SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
|
||||
SLAB_MEM_SPREAD|SLAB_ACCOUNT),
|
||||
init_once);
|
||||
if (!dax_cache)
|
||||
return -ENOMEM;
|
||||
|
||||
rc = register_filesystem(&dax_fs_type);
|
||||
if (rc)
|
||||
goto err_register_fs;
|
||||
|
||||
dax_mnt = kern_mount(&dax_fs_type);
|
||||
if (IS_ERR(dax_mnt)) {
|
||||
rc = PTR_ERR(dax_mnt);
|
||||
goto err_mount;
|
||||
}
|
||||
dax_superblock = dax_mnt->mnt_sb;
|
||||
|
||||
return 0;
|
||||
|
||||
err_mount:
|
||||
unregister_filesystem(&dax_fs_type);
|
||||
err_register_fs:
|
||||
kmem_cache_destroy(dax_cache);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void __dax_fs_exit(void)
|
||||
{
|
||||
kern_unmount(dax_mnt);
|
||||
unregister_filesystem(&dax_fs_type);
|
||||
kmem_cache_destroy(dax_cache);
|
||||
}
|
||||
|
||||
static int __init dax_fs_init(void)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = __dax_fs_init();
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
nr_dax = max(nr_dax, 256);
|
||||
rc = alloc_chrdev_region(&dax_devt, 0, nr_dax, "dax");
|
||||
if (rc)
|
||||
__dax_fs_exit();
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void __exit dax_fs_exit(void)
|
||||
{
|
||||
unregister_chrdev_region(dax_devt, nr_dax);
|
||||
ida_destroy(&dax_minor_ida);
|
||||
__dax_fs_exit();
|
||||
}
|
||||
|
||||
MODULE_AUTHOR("Intel Corporation");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
subsys_initcall(dax_fs_init);
|
||||
module_exit(dax_fs_exit);
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user