Merge tag 'libnvdimm-for-4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Dan Williams:
 "libnvdimm updates for the latest ACPI and UEFI specifications. This
  pull request also includes new 'struct dax_operations' enabling to
  undo the abuse of copy_user_nocache() for copy operations to pmem.

  The dax work originally missed 4.12 to address concerns raised by Al.

  Summary:

   - Introduce the _flushcache() family of memory copy helpers and use
     them for persistent memory write operations on x86. The
     _flushcache() semantic indicates that the cache is either bypassed
     for the copy operation (movnt) or any lines dirtied by the copy
     operation are written back (clwb, clflushopt, or clflush).

   - Extend dax_operations with ->copy_from_iter() and ->flush()
     operations. These operations and other infrastructure updates allow
     all persistent memory specific dax functionality to be pushed into
     libnvdimm and the pmem driver directly. It also allows dax-specific
     sysfs attributes to be linked to a host device, for example:
     /sys/block/pmem0/dax/write_cache

   - Add support for the new NVDIMM platform/firmware mechanisms
     introduced in ACPI 6.2 and UEFI 2.7. This support includes the v1.2
     namespace label format, extensions to the address-range-scrub
     command set, new error injection commands, and a new BTT
     (block-translation-table) layout. These updates support inter-OS
     and pre-OS compatibility.

   - Fix a longstanding memory corruption bug in nfit_test.

   - Make the pmem and nvdimm-region 'badblocks' sysfs files poll(2)
     capable.

   - Miscellaneous fixes and small updates across libnvdimm and the nfit
     driver.

  Acknowledgements that came after the branch was pushed: commit
  6aa734a2f3 ("libnvdimm, region, pmem: fix 'badblocks'
  sysfs_get_dirent() reference lifetime") was reviewed by Toshi Kani
  <toshi.kani@hpe.com>"

* tag 'libnvdimm-for-4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (42 commits)
  libnvdimm, namespace: record 'lbasize' for pmem namespaces
  acpi/nfit: Issue Start ARS to retrieve existing records
  libnvdimm: New ACPI 6.2 DSM functions
  acpi, nfit: Show bus_dsm_mask in sysfs
  libnvdimm, acpi, nfit: Add bus level dsm mask for pass thru.
  acpi, nfit: Enable DSM pass thru for root functions.
  libnvdimm: passthru functions clear to send
  libnvdimm, btt: convert some info messages to warn/err
  libnvdimm, region, pmem: fix 'badblocks' sysfs_get_dirent() reference lifetime
  libnvdimm: fix the clear-error check in nsio_rw_bytes
  libnvdimm, btt: fix btt_rw_page not returning errors
  acpi, nfit: quiet invalid block-aperture-region warnings
  libnvdimm, btt: BTT updates for UEFI 2.7 format
  acpi, nfit: constify *_attribute_group
  libnvdimm, pmem: disable dax flushing when pmem is fronting a volatile region
  libnvdimm, pmem, dax: export a cache control attribute
  dax: convert to bitmask for flags
  dax: remove default copy_from_iter fallback
  libnvdimm, nfit: enable support for volatile ranges
  libnvdimm, pmem: fix persistence warning
  ...
This commit is contained in:
Linus Torvalds
2017-07-07 09:44:06 -07:00
47 changed files with 1505 additions and 461 deletions
+1 -3
View File
@@ -7680,9 +7680,7 @@ M: Ross Zwisler <ross.zwisler@linux.intel.com>
L: linux-nvdimm@lists.01.org
Q: https://patchwork.kernel.org/project/linux-nvdimm/list/
S: Supported
F: drivers/nvdimm/pmem.c
F: include/linux/pmem.h
F: arch/*/include/asm/pmem.h
F: drivers/nvdimm/pmem*
LIGHTNVM PLATFORM SUPPORT
M: Matias Bjorling <mb@lightnvm.io>
+8
View File
@@ -45,6 +45,7 @@
#include <linux/of_device.h>
#include <linux/of_platform.h>
#include <linux/pfn_t.h>
#include <linux/uio.h>
#include <asm/page.h>
#include <asm/prom.h>
@@ -163,8 +164,15 @@ axon_ram_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pa
return __axon_ram_direct_access(bank, pgoff, nr_pages, kaddr, pfn);
}
static size_t axon_ram_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
return copy_from_iter(addr, bytes, i);
}
static const struct dax_operations axon_ram_dax_ops = {
.direct_access = axon_ram_dax_direct_access,
.copy_from_iter = axon_ram_copy_from_iter,
};
/**
+1
View File
@@ -54,6 +54,7 @@ config X86
select ARCH_HAS_KCOV if X86_64
select ARCH_HAS_MMIO_FLUSH
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
-136
View File
@@ -1,136 +0,0 @@
/*
* Copyright(c) 2015 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef __ASM_X86_PMEM_H__
#define __ASM_X86_PMEM_H__
#include <linux/uaccess.h>
#include <asm/cacheflush.h>
#include <asm/cpufeature.h>
#include <asm/special_insns.h>
#ifdef CONFIG_ARCH_HAS_PMEM_API
/**
* arch_memcpy_to_pmem - copy data to persistent memory
* @dst: destination buffer for the copy
* @src: source buffer for the copy
* @n: length of the copy in bytes
*
* Copy data to persistent memory media via non-temporal stores so that
* a subsequent pmem driver flush operation will drain posted write queues.
*/
static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
{
int rem;
/*
* We are copying between two kernel buffers, if
* __copy_from_user_inatomic_nocache() returns an error (page
* fault) we would have already reported a general protection fault
* before the WARN+BUG.
*/
rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n);
if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n",
__func__, dst, src, rem))
BUG();
}
/**
* arch_wb_cache_pmem - write back a cache range with CLWB
* @vaddr: virtual start address
* @size: number of bytes to write back
*
* Write back a cache range using the CLWB (cache line write back)
* instruction. Note that @size is internally rounded up to be cache
* line size aligned.
*/
static inline void arch_wb_cache_pmem(void *addr, size_t size)
{
u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
unsigned long clflush_mask = x86_clflush_size - 1;
void *vend = addr + size;
void *p;
for (p = (void *)((unsigned long)addr & ~clflush_mask);
p < vend; p += x86_clflush_size)
clwb(p);
}
/**
* arch_copy_from_iter_pmem - copy data from an iterator to PMEM
* @addr: PMEM destination address
* @bytes: number of bytes to copy
* @i: iterator with source data
*
* Copy data from the iterator 'i' to the PMEM buffer starting at 'addr'.
*/
static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes,
struct iov_iter *i)
{
size_t len;
/* TODO: skip the write-back by always using non-temporal stores */
len = copy_from_iter_nocache(addr, bytes, i);
/*
* In the iovec case on x86_64 copy_from_iter_nocache() uses
* non-temporal stores for the bulk of the transfer, but we need
* to manually flush if the transfer is unaligned. A cached
* memory copy is used when destination or size is not naturally
* aligned. That is:
* - Require 8-byte alignment when size is 8 bytes or larger.
* - Require 4-byte alignment when size is 4 bytes.
*
* In the non-iovec case the entire destination needs to be
* flushed.
*/
if (iter_is_iovec(i)) {
unsigned long flushed, dest = (unsigned long) addr;
if (bytes < 8) {
if (!IS_ALIGNED(dest, 4) || (bytes != 4))
arch_wb_cache_pmem(addr, bytes);
} else {
if (!IS_ALIGNED(dest, 8)) {
dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
arch_wb_cache_pmem(addr, 1);
}
flushed = dest - (unsigned long) addr;
if (bytes > flushed && !IS_ALIGNED(bytes - flushed, 8))
arch_wb_cache_pmem(addr + bytes - 1, 1);
}
} else
arch_wb_cache_pmem(addr, bytes);
return len;
}
/**
* arch_clear_pmem - zero a PMEM memory range
* @addr: virtual start address
* @size: number of bytes to zero
*
* Write zeros into the memory range starting at 'addr' for 'size' bytes.
*/
static inline void arch_clear_pmem(void *addr, size_t size)
{
memset(addr, 0, size);
arch_wb_cache_pmem(addr, size);
}
static inline void arch_invalidate_pmem(void *addr, size_t size)
{
clflush_cache_range(addr, size);
}
#endif /* CONFIG_ARCH_HAS_PMEM_API */
#endif /* __ASM_X86_PMEM_H__ */
+5
View File
@@ -109,6 +109,11 @@ memcpy_mcsafe(void *dst, const void *src, size_t cnt)
return 0;
}
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
#define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
void memcpy_flushcache(void *dst, const void *src, size_t cnt);
#endif
#endif /* __KERNEL__ */
#endif /* _ASM_X86_STRING_64_H */
+11
View File
@@ -171,6 +171,10 @@ unsigned long raw_copy_in_user(void __user *dst, const void __user *src, unsigne
extern long __copy_user_nocache(void *dst, const void __user *src,
unsigned size, int zerorest);
extern long __copy_user_flushcache(void *dst, const void __user *src, unsigned size);
extern void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
size_t len);
static inline int
__copy_from_user_inatomic_nocache(void *dst, const void __user *src,
unsigned size)
@@ -179,6 +183,13 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
return __copy_user_nocache(dst, src, size, 0);
}
static inline int
__copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
{
kasan_check_write(dst, size);
return __copy_user_flushcache(dst, src, size);
}
unsigned long
copy_user_handle_tail(char *to, char *from, unsigned len);
+134
View File
@@ -7,6 +7,7 @@
*/
#include <linux/export.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
/*
* Zero Userspace
@@ -73,3 +74,136 @@ copy_user_handle_tail(char *to, char *from, unsigned len)
clac();
return len;
}
#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
/**
* clean_cache_range - write back a cache range with CLWB
* @vaddr: virtual start address
* @size: number of bytes to write back
*
* Write back a cache range using the CLWB (cache line write back)
* instruction. Note that @size is internally rounded up to be cache
* line size aligned.
*/
static void clean_cache_range(void *addr, size_t size)
{
u16 x86_clflush_size = boot_cpu_data.x86_clflush_size;
unsigned long clflush_mask = x86_clflush_size - 1;
void *vend = addr + size;
void *p;
for (p = (void *)((unsigned long)addr & ~clflush_mask);
p < vend; p += x86_clflush_size)
clwb(p);
}
void arch_wb_cache_pmem(void *addr, size_t size)
{
clean_cache_range(addr, size);
}
EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
long __copy_user_flushcache(void *dst, const void __user *src, unsigned size)
{
unsigned long flushed, dest = (unsigned long) dst;
long rc = __copy_user_nocache(dst, src, size, 0);
/*
* __copy_user_nocache() uses non-temporal stores for the bulk
* of the transfer, but we need to manually flush if the
* transfer is unaligned. A cached memory copy is used when
* destination or size is not naturally aligned. That is:
* - Require 8-byte alignment when size is 8 bytes or larger.
* - Require 4-byte alignment when size is 4 bytes.
*/
if (size < 8) {
if (!IS_ALIGNED(dest, 4) || size != 4)
clean_cache_range(dst, 1);
} else {
if (!IS_ALIGNED(dest, 8)) {
dest = ALIGN(dest, boot_cpu_data.x86_clflush_size);
clean_cache_range(dst, 1);
}
flushed = dest - (unsigned long) dst;
if (size > flushed && !IS_ALIGNED(size - flushed, 8))
clean_cache_range(dst + size - 1, 1);
}
return rc;
}
void memcpy_flushcache(void *_dst, const void *_src, size_t size)
{
unsigned long dest = (unsigned long) _dst;
unsigned long source = (unsigned long) _src;
/* cache copy and flush to align dest */
if (!IS_ALIGNED(dest, 8)) {
unsigned len = min_t(unsigned, size, ALIGN(dest, 8) - dest);
memcpy((void *) dest, (void *) source, len);
clean_cache_range((void *) dest, len);
dest += len;
source += len;
size -= len;
if (!size)
return;
}
/* 4x8 movnti loop */
while (size >= 32) {
asm("movq (%0), %%r8\n"
"movq 8(%0), %%r9\n"
"movq 16(%0), %%r10\n"
"movq 24(%0), %%r11\n"
"movnti %%r8, (%1)\n"
"movnti %%r9, 8(%1)\n"
"movnti %%r10, 16(%1)\n"
"movnti %%r11, 24(%1)\n"
:: "r" (source), "r" (dest)
: "memory", "r8", "r9", "r10", "r11");
dest += 32;
source += 32;
size -= 32;
}
/* 1x8 movnti loop */
while (size >= 8) {
asm("movq (%0), %%r8\n"
"movnti %%r8, (%1)\n"
:: "r" (source), "r" (dest)
: "memory", "r8");
dest += 8;
source += 8;
size -= 8;
}
/* 1x4 movnti loop */
while (size >= 4) {
asm("movl (%0), %%r8d\n"
"movnti %%r8d, (%1)\n"
:: "r" (source), "r" (dest)
: "memory", "r8");
dest += 4;
source += 4;
size -= 4;
}
/* cache copy for remaining bytes */
if (size) {
memcpy((void *) dest, (void *) source, size);
clean_cache_range((void *) dest, size);
}
}
EXPORT_SYMBOL_GPL(memcpy_flushcache);
void memcpy_page_flushcache(char *to, struct page *page, size_t offset,
size_t len)
{
char *from = kmap_atomic(page);
memcpy_flushcache(to, from + offset, len);
kunmap_atomic(from);
}
#endif
+6
View File
@@ -150,6 +150,12 @@ void clflush_cache_range(void *vaddr, unsigned int size)
}
EXPORT_SYMBOL_GPL(clflush_cache_range);
void arch_invalidate_pmem(void *addr, size_t size)
{
clflush_cache_range(addr, size);
}
EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
static void __cpa_flush_all(void *arg)
{
unsigned long cache = (unsigned long)arg;
+142 -25
View File
@@ -20,7 +20,6 @@
#include <linux/list.h>
#include <linux/acpi.h>
#include <linux/sort.h>
#include <linux/pmem.h>
#include <linux/io.h>
#include <linux/nd.h>
#include <asm/cacheflush.h>
@@ -253,6 +252,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
cmd_name = nvdimm_bus_cmd_name(cmd);
cmd_mask = nd_desc->cmd_mask;
dsm_mask = cmd_mask;
if (cmd == ND_CMD_CALL)
dsm_mask = nd_desc->bus_dsm_mask;
desc = nd_cmd_bus_desc(cmd);
guid = to_nfit_uuid(NFIT_DEV_BUS);
handle = adev->handle;
@@ -927,6 +928,17 @@ static int nfit_mem_init(struct acpi_nfit_desc *acpi_desc)
return 0;
}
static ssize_t bus_dsm_mask_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
return sprintf(buf, "%#lx\n", nd_desc->bus_dsm_mask);
}
static struct device_attribute dev_attr_bus_dsm_mask =
__ATTR(dsm_mask, 0444, bus_dsm_mask_show, NULL);
static ssize_t revision_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -1031,7 +1043,7 @@ static ssize_t scrub_store(struct device *dev,
if (nd_desc) {
struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
rc = acpi_nfit_ars_rescan(acpi_desc);
rc = acpi_nfit_ars_rescan(acpi_desc, 0);
}
device_unlock(dev);
if (rc)
@@ -1063,10 +1075,11 @@ static struct attribute *acpi_nfit_attributes[] = {
&dev_attr_revision.attr,
&dev_attr_scrub.attr,
&dev_attr_hw_error_scrub.attr,
&dev_attr_bus_dsm_mask.attr,
NULL,
};
static struct attribute_group acpi_nfit_attribute_group = {
static const struct attribute_group acpi_nfit_attribute_group = {
.name = "nfit",
.attrs = acpi_nfit_attributes,
.is_visible = nfit_visible,
@@ -1346,7 +1359,7 @@ static umode_t acpi_nfit_dimm_attr_visible(struct kobject *kobj,
return a->mode;
}
static struct attribute_group acpi_nfit_dimm_attribute_group = {
static const struct attribute_group acpi_nfit_dimm_attribute_group = {
.name = "nfit",
.attrs = acpi_nfit_dimm_attributes,
.is_visible = acpi_nfit_dimm_attr_visible,
@@ -1608,11 +1621,23 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
acpi_desc);
}
/*
* These constants are private because there are no kernel consumers of
* these commands.
*/
enum nfit_aux_cmds {
NFIT_CMD_TRANSLATE_SPA = 5,
NFIT_CMD_ARS_INJECT_SET = 7,
NFIT_CMD_ARS_INJECT_CLEAR = 8,
NFIT_CMD_ARS_INJECT_GET = 9,
};
static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
{
struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
const guid_t *guid = to_nfit_uuid(NFIT_DEV_BUS);
struct acpi_device *adev;
unsigned long dsm_mask;
int i;
nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en;
@@ -1623,6 +1648,20 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i))
set_bit(i, &nd_desc->cmd_mask);
set_bit(ND_CMD_CALL, &nd_desc->cmd_mask);
dsm_mask =
(1 << ND_CMD_ARS_CAP) |
(1 << ND_CMD_ARS_START) |
(1 << ND_CMD_ARS_STATUS) |
(1 << ND_CMD_CLEAR_ERROR) |
(1 << NFIT_CMD_TRANSLATE_SPA) |
(1 << NFIT_CMD_ARS_INJECT_SET) |
(1 << NFIT_CMD_ARS_INJECT_CLEAR) |
(1 << NFIT_CMD_ARS_INJECT_GET);
for_each_set_bit(i, &dsm_mask, BITS_PER_LONG)
if (acpi_check_dsm(adev->handle, guid, 1, 1ULL << i))
set_bit(i, &nd_desc->bus_dsm_mask);
}
static ssize_t range_index_show(struct device *dev,
@@ -1640,7 +1679,7 @@ static struct attribute *acpi_nfit_region_attributes[] = {
NULL,
};
static struct attribute_group acpi_nfit_region_attribute_group = {
static const struct attribute_group acpi_nfit_region_attribute_group = {
.name = "nfit",
.attrs = acpi_nfit_region_attributes,
};
@@ -1663,12 +1702,29 @@ struct nfit_set_info {
} mapping[0];
};
struct nfit_set_info2 {
struct nfit_set_info_map2 {
u64 region_offset;
u32 serial_number;
u16 vendor_id;
u16 manufacturing_date;
u8 manufacturing_location;
u8 reserved[31];
} mapping[0];
};
static size_t sizeof_nfit_set_info(int num_mappings)
{
return sizeof(struct nfit_set_info)
+ num_mappings * sizeof(struct nfit_set_info_map);
}
static size_t sizeof_nfit_set_info2(int num_mappings)
{
return sizeof(struct nfit_set_info2)
+ num_mappings * sizeof(struct nfit_set_info_map2);
}
static int cmp_map_compat(const void *m0, const void *m1)
{
const struct nfit_set_info_map *map0 = m0;
@@ -1690,6 +1746,18 @@ static int cmp_map(const void *m0, const void *m1)
return 0;
}
static int cmp_map2(const void *m0, const void *m1)
{
const struct nfit_set_info_map2 *map0 = m0;
const struct nfit_set_info_map2 *map1 = m1;
if (map0->region_offset < map1->region_offset)
return -1;
else if (map0->region_offset > map1->region_offset)
return 1;
return 0;
}
/* Retrieve the nth entry referencing this spa */
static struct acpi_nfit_memory_map *memdev_from_spa(
struct acpi_nfit_desc *acpi_desc, u16 range_index, int n)
@@ -1707,27 +1775,31 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
struct nd_region_desc *ndr_desc,
struct acpi_nfit_system_address *spa)
{
int i, spa_type = nfit_spa_type(spa);
struct device *dev = acpi_desc->dev;
struct nd_interleave_set *nd_set;
u16 nr = ndr_desc->num_mappings;
struct nfit_set_info2 *info2;
struct nfit_set_info *info;
if (spa_type == NFIT_SPA_PM || spa_type == NFIT_SPA_VOLATILE)
/* pass */;
else
return 0;
int i;
nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL);
if (!nd_set)
return -ENOMEM;
ndr_desc->nd_set = nd_set;
guid_copy(&nd_set->type_guid, (guid_t *) spa->range_guid);
info = devm_kzalloc(dev, sizeof_nfit_set_info(nr), GFP_KERNEL);
if (!info)
return -ENOMEM;
info2 = devm_kzalloc(dev, sizeof_nfit_set_info2(nr), GFP_KERNEL);
if (!info2)
return -ENOMEM;
for (i = 0; i < nr; i++) {
struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
struct nfit_set_info_map *map = &info->mapping[i];
struct nfit_set_info_map2 *map2 = &info2->mapping[i];
struct nvdimm *nvdimm = mapping->nvdimm;
struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
@@ -1740,19 +1812,32 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
map->region_offset = memdev->region_offset;
map->serial_number = nfit_mem->dcr->serial_number;
map2->region_offset = memdev->region_offset;
map2->serial_number = nfit_mem->dcr->serial_number;
map2->vendor_id = nfit_mem->dcr->vendor_id;
map2->manufacturing_date = nfit_mem->dcr->manufacturing_date;
map2->manufacturing_location = nfit_mem->dcr->manufacturing_location;
}
/* v1.1 namespaces */
sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
cmp_map, NULL);
nd_set->cookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
nd_set->cookie1 = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
/* support namespaces created with the wrong sort order */
/* v1.2 namespaces */
sort(&info2->mapping[0], nr, sizeof(struct nfit_set_info_map2),
cmp_map2, NULL);
nd_set->cookie2 = nd_fletcher64(info2, sizeof_nfit_set_info2(nr), 0);
/* support v1.1 namespaces created with the wrong sort order */
sort(&info->mapping[0], nr, sizeof(struct nfit_set_info_map),
cmp_map_compat, NULL);
nd_set->altcookie = nd_fletcher64(info, sizeof_nfit_set_info(nr), 0);
ndr_desc->nd_set = nd_set;
devm_kfree(dev, info);
devm_kfree(dev, info2);
return 0;
}
@@ -1842,8 +1927,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
}
if (rw)
memcpy_to_pmem(mmio->addr.aperture + offset,
iobuf + copied, c);
memcpy_flushcache(mmio->addr.aperture + offset, iobuf + copied, c);
else {
if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
mmio_flush_range((void __force *)
@@ -1957,7 +2041,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
nfit_blk->bdw_offset = nfit_mem->bdw->offset;
mmio = &nfit_blk->mmio[BDW];
mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
nfit_mem->spa_bdw->length, ARCH_MEMREMAP_PMEM);
nfit_mem->spa_bdw->length, nd_blk_memremap_flags(ndbr));
if (!mmio->addr.base) {
dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
nvdimm_name(nvdimm));
@@ -2051,6 +2135,7 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa
memset(&ars_start, 0, sizeof(ars_start));
ars_start.address = spa->address;
ars_start.length = spa->length;
ars_start.flags = acpi_desc->ars_start_flags;
if (nfit_spa_type(spa) == NFIT_SPA_PM)
ars_start.type = ND_ARS_PERSISTENT;
else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
@@ -2077,6 +2162,7 @@ static int ars_continue(struct acpi_nfit_desc *acpi_desc)
ars_start.address = ars_status->restart_address;
ars_start.length = ars_status->restart_length;
ars_start.type = ars_status->type;
ars_start.flags = acpi_desc->ars_start_flags;
rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
sizeof(ars_start), &cmd_rc);
if (rc < 0)
@@ -2179,7 +2265,7 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
struct acpi_nfit_system_address *spa = nfit_spa->spa;
struct nd_blk_region_desc *ndbr_desc;
struct nfit_mem *nfit_mem;
int blk_valid = 0;
int blk_valid = 0, rc;
if (!nvdimm) {
dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
@@ -2211,6 +2297,9 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
ndbr_desc = to_blk_region_desc(ndr_desc);
ndbr_desc->enable = acpi_nfit_blk_region_enable;
ndbr_desc->do_io = acpi_desc->blk_do_io;
rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
if (rc)
return rc;
nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus,
ndr_desc);
if (!nfit_spa->nd_region)
@@ -2229,6 +2318,13 @@ static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa)
nfit_spa_type(spa) == NFIT_SPA_PCD);
}
static bool nfit_spa_is_volatile(struct acpi_nfit_system_address *spa)
{
return (nfit_spa_type(spa) == NFIT_SPA_VDISK ||
nfit_spa_type(spa) == NFIT_SPA_VCD ||
nfit_spa_type(spa) == NFIT_SPA_VOLATILE);
}
static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
struct nfit_spa *nfit_spa)
{
@@ -2303,7 +2399,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
ndr_desc);
if (!nfit_spa->nd_region)
rc = -ENOMEM;
} else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
} else if (nfit_spa_is_volatile(spa)) {
nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus,
ndr_desc);
if (!nfit_spa->nd_region)
@@ -2595,6 +2691,7 @@ static void acpi_nfit_scrub(struct work_struct *work)
list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
acpi_nfit_async_scrub(acpi_desc, nfit_spa);
acpi_desc->scrub_count++;
acpi_desc->ars_start_flags = 0;
if (acpi_desc->scrub_count_state)
sysfs_notify_dirent(acpi_desc->scrub_count_state);
mutex_unlock(&acpi_desc->init_mutex);
@@ -2613,6 +2710,7 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
return rc;
}
acpi_desc->ars_start_flags = 0;
if (!acpi_desc->cancel)
queue_work(nfit_wq, &acpi_desc->work);
return 0;
@@ -2817,7 +2915,7 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
return 0;
}
int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc)
int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags)
{
struct device *dev = acpi_desc->dev;
struct nfit_spa *nfit_spa;
@@ -2839,6 +2937,7 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc)
nfit_spa->ars_required = 1;
}
acpi_desc->ars_start_flags = flags;
queue_work(nfit_wq, &acpi_desc->work);
dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
mutex_unlock(&acpi_desc->init_mutex);
@@ -2967,7 +3066,7 @@ static int acpi_nfit_remove(struct acpi_device *adev)
return 0;
}
void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
{
struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -2975,11 +3074,6 @@ void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
acpi_status status;
int ret;
dev_dbg(dev, "%s: event: %d\n", __func__, event);
if (event != NFIT_NOTIFY_UPDATE)
return;
if (!dev->driver) {
/* dev->driver may be null if we're being removed */
dev_dbg(dev, "%s: no driver found for dev\n", __func__);
@@ -3016,6 +3110,29 @@ void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
dev_err(dev, "Invalid _FIT\n");
kfree(buf.pointer);
}
static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle)
{
struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
u8 flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
0 : ND_ARS_RETURN_PREV_DATA;
acpi_nfit_ars_rescan(acpi_desc, flags);
}
void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
{
dev_dbg(dev, "%s: event: 0x%x\n", __func__, event);
switch (event) {
case NFIT_NOTIFY_UPDATE:
return acpi_nfit_update_notify(dev, handle);
case NFIT_NOTIFY_UC_MEMORY_ERROR:
return acpi_nfit_uc_error_notify(dev, handle);
default:
return;
}
}
EXPORT_SYMBOL_GPL(__acpi_nfit_notify);
static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
+1 -1
View File
@@ -79,7 +79,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
* already in progress, just let that be the last
* authoritative one
*/
acpi_nfit_ars_rescan(acpi_desc);
acpi_nfit_ars_rescan(acpi_desc, 0);
}
break;
}
+3 -1
View File
@@ -79,6 +79,7 @@ enum {
enum nfit_root_notifiers {
NFIT_NOTIFY_UPDATE = 0x80,
NFIT_NOTIFY_UC_MEMORY_ERROR = 0x81,
};
enum nfit_dimm_notifiers {
@@ -154,6 +155,7 @@ struct acpi_nfit_desc {
struct list_head idts;
struct nvdimm_bus *nvdimm_bus;
struct device *dev;
u8 ars_start_flags;
struct nd_cmd_ars_status *ars_status;
size_t ars_status_size;
struct work_struct work;
@@ -206,7 +208,7 @@ struct nfit_blk {
extern struct list_head acpi_descs;
extern struct mutex acpi_desc_lock;
int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc);
int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags);
#ifdef CONFIG_X86_MCE
void nfit_mce_register(void);
+8
View File
@@ -22,6 +22,7 @@
#ifdef CONFIG_BLK_DEV_RAM_DAX
#include <linux/pfn_t.h>
#include <linux/dax.h>
#include <linux/uio.h>
#endif
#include <linux/uaccess.h>
@@ -354,8 +355,15 @@ static long brd_dax_direct_access(struct dax_device *dax_dev,
return __brd_direct_access(brd, pgoff, nr_pages, kaddr, pfn);
}
static size_t brd_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
return copy_from_iter(addr, bytes, i);
}
static const struct dax_operations brd_dax_ops = {
.direct_access = brd_dax_direct_access,
.copy_from_iter = brd_dax_copy_from_iter,
};
#endif
+112 -6
View File
@@ -18,6 +18,7 @@
#include <linux/cdev.h>
#include <linux/hash.h>
#include <linux/slab.h>
#include <linux/uio.h>
#include <linux/dax.h>
#include <linux/fs.h>
@@ -115,13 +116,20 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
EXPORT_SYMBOL_GPL(__bdev_dax_supported);
#endif
enum dax_device_flags {
/* !alive + rcu grace period == no new operations / mappings */
DAXDEV_ALIVE,
/* gate whether dax_flush() calls the low level flush routine */
DAXDEV_WRITE_CACHE,
};
/**
* struct dax_device - anchor object for dax services
* @inode: core vfs
* @cdev: optional character interface for "device dax"
* @host: optional name for lookups where the device path is not available
* @private: dax driver private data
* @alive: !alive + rcu grace period == no new operations / mappings
* @flags: state and boolean properties
*/
struct dax_device {
struct hlist_node list;
@@ -129,10 +137,75 @@ struct dax_device {
struct cdev cdev;
const char *host;
void *private;
bool alive;
unsigned long flags;
const struct dax_operations *ops;
};
static ssize_t write_cache_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
ssize_t rc;
WARN_ON_ONCE(!dax_dev);
if (!dax_dev)
return -ENXIO;
rc = sprintf(buf, "%d\n", !!test_bit(DAXDEV_WRITE_CACHE,
&dax_dev->flags));
put_dax(dax_dev);
return rc;
}
static ssize_t write_cache_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
bool write_cache;
int rc = strtobool(buf, &write_cache);
struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
WARN_ON_ONCE(!dax_dev);
if (!dax_dev)
return -ENXIO;
if (rc)
len = rc;
else if (write_cache)
set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
else
clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
put_dax(dax_dev);
return len;
}
static DEVICE_ATTR_RW(write_cache);
static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n)
{
struct device *dev = container_of(kobj, typeof(*dev), kobj);
struct dax_device *dax_dev = dax_get_by_host(dev_name(dev));
WARN_ON_ONCE(!dax_dev);
if (!dax_dev)
return 0;
if (a == &dev_attr_write_cache.attr && !dax_dev->ops->flush)
return 0;
return a->mode;
}
static struct attribute *dax_attributes[] = {
&dev_attr_write_cache.attr,
NULL,
};
struct attribute_group dax_attribute_group = {
.name = "dax",
.attrs = dax_attributes,
.is_visible = dax_visible,
};
EXPORT_SYMBOL_GPL(dax_attribute_group);
/**
* dax_direct_access() - translate a device pgoff to an absolute pfn
* @dax_dev: a dax_device instance representing the logical memory range
@@ -172,10 +245,43 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
}
EXPORT_SYMBOL_GPL(dax_direct_access);
size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t bytes, struct iov_iter *i)
{
if (!dax_alive(dax_dev))
return 0;
return dax_dev->ops->copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
EXPORT_SYMBOL_GPL(dax_copy_from_iter);
void dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t size)
{
if (!dax_alive(dax_dev))
return;
if (!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags))
return;
if (dax_dev->ops->flush)
dax_dev->ops->flush(dax_dev, pgoff, addr, size);
}
EXPORT_SYMBOL_GPL(dax_flush);
void dax_write_cache(struct dax_device *dax_dev, bool wc)
{
if (wc)
set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
else
clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags);
}
EXPORT_SYMBOL_GPL(dax_write_cache);
bool dax_alive(struct dax_device *dax_dev)
{
lockdep_assert_held(&dax_srcu);
return dax_dev->alive;
return test_bit(DAXDEV_ALIVE, &dax_dev->flags);
}
EXPORT_SYMBOL_GPL(dax_alive);
@@ -195,7 +301,7 @@ void kill_dax(struct dax_device *dax_dev)
if (!dax_dev)
return;
dax_dev->alive = false;
clear_bit(DAXDEV_ALIVE, &dax_dev->flags);
synchronize_srcu(&dax_srcu);
@@ -239,7 +345,7 @@ static void dax_destroy_inode(struct inode *inode)
{
struct dax_device *dax_dev = to_dax_dev(inode);
WARN_ONCE(dax_dev->alive,
WARN_ONCE(test_bit(DAXDEV_ALIVE, &dax_dev->flags),
"kill_dax() must be called before final iput()\n");
call_rcu(&inode->i_rcu, dax_i_callback);
}
@@ -291,7 +397,7 @@ static struct dax_device *dax_dev_get(dev_t devt)
dax_dev = to_dax_dev(inode);
if (inode->i_state & I_NEW) {
dax_dev->alive = true;
set_bit(DAXDEV_ALIVE, &dax_dev->flags);
inode->i_cdev = &dax_dev->cdev;
inode->i_mode = S_IFCHR;
inode->i_flags = S_DAX;
+30
View File
@@ -170,6 +170,34 @@ static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
}
static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
struct dax_device *dax_dev = lc->dev->dax_dev;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
dev_sector = linear_map_sector(ti, sector);
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
static void linear_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr,
size_t size)
{
struct linear_c *lc = ti->private;
struct block_device *bdev = lc->dev->bdev;
struct dax_device *dax_dev = lc->dev->dax_dev;
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
dev_sector = linear_map_sector(ti, sector);
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff))
return;
dax_flush(dax_dev, pgoff, addr, size);
}
static struct target_type linear_target = {
.name = "linear",
.version = {1, 4, 0},
@@ -183,6 +211,8 @@ static struct target_type linear_target = {
.prepare_ioctl = linear_prepare_ioctl,
.iterate_devices = linear_iterate_devices,
.direct_access = linear_dax_direct_access,
.dax_copy_from_iter = linear_dax_copy_from_iter,
.dax_flush = linear_dax_flush,
};
int __init dm_linear_init(void)
+40
View File
@@ -332,6 +332,44 @@ static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn);
}
static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct stripe_c *sc = ti->private;
struct dax_device *dax_dev;
struct block_device *bdev;
uint32_t stripe;
stripe_map_sector(sc, sector, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev;
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff))
return 0;
return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
}
static void stripe_dax_flush(struct dm_target *ti, pgoff_t pgoff, void *addr,
size_t size)
{
sector_t dev_sector, sector = pgoff * PAGE_SECTORS;
struct stripe_c *sc = ti->private;
struct dax_device *dax_dev;
struct block_device *bdev;
uint32_t stripe;
stripe_map_sector(sc, sector, &stripe, &dev_sector);
dev_sector += sc->stripe[stripe].physical_start;
dax_dev = sc->stripe[stripe].dev->dax_dev;
bdev = sc->stripe[stripe].dev->bdev;
if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(size, PAGE_SIZE), &pgoff))
return;
dax_flush(dax_dev, pgoff, addr, size);
}
/*
* Stripe status:
*
@@ -452,6 +490,8 @@ static struct target_type stripe_target = {
.iterate_devices = stripe_iterate_devices,
.io_hints = stripe_io_hints,
.direct_access = stripe_dax_direct_access,
.dax_copy_from_iter = stripe_dax_copy_from_iter,
.dax_flush = stripe_dax_flush,
};
int __init dm_stripe_init(void)
+45
View File
@@ -19,6 +19,7 @@
#include <linux/dax.h>
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/uio.h>
#include <linux/hdreg.h>
#include <linux/delay.h>
#include <linux/wait.h>
@@ -972,6 +973,48 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
return ret;
}
static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
struct mapped_device *md = dax_get_private(dax_dev);
sector_t sector = pgoff * PAGE_SECTORS;
struct dm_target *ti;
long ret = 0;
int srcu_idx;
ti = dm_dax_get_live_target(md, sector, &srcu_idx);
if (!ti)
goto out;
if (!ti->type->dax_copy_from_iter) {
ret = copy_from_iter(addr, bytes, i);
goto out;
}
ret = ti->type->dax_copy_from_iter(ti, pgoff, addr, bytes, i);
out:
dm_put_live_table(md, srcu_idx);
return ret;
}
static void dm_dax_flush(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
size_t size)
{
struct mapped_device *md = dax_get_private(dax_dev);
sector_t sector = pgoff * PAGE_SECTORS;
struct dm_target *ti;
int srcu_idx;
ti = dm_dax_get_live_target(md, sector, &srcu_idx);
if (!ti)
goto out;
if (ti->type->dax_flush)
ti->type->dax_flush(ti, pgoff, addr, size);
out:
dm_put_live_table(md, srcu_idx);
}
/*
* A target may call dm_accept_partial_bio only from the map routine. It is
* allowed for all bio types except REQ_PREFLUSH.
@@ -2958,6 +3001,8 @@ static const struct block_device_operations dm_blk_dops = {
static const struct dax_operations dm_dax_ops = {
.direct_access = dm_dax_direct_access,
.copy_from_iter = dm_dax_copy_from_iter,
.flush = dm_dax_flush,
};
/*
+30 -15
View File
@@ -37,8 +37,8 @@ static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
struct nd_btt *nd_btt = arena->nd_btt;
struct nd_namespace_common *ndns = nd_btt->ndns;
/* arena offsets are 4K from the base of the device */
offset += SZ_4K;
/* arena offsets may be shifted from the base of the device */
offset += arena->nd_btt->initial_offset;
return nvdimm_read_bytes(ndns, offset, buf, n, flags);
}
@@ -48,8 +48,8 @@ static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
struct nd_btt *nd_btt = arena->nd_btt;
struct nd_namespace_common *ndns = nd_btt->ndns;
/* arena offsets are 4K from the base of the device */
offset += SZ_4K;
/* arena offsets may be shifted from the base of the device */
offset += arena->nd_btt->initial_offset;
return nvdimm_write_bytes(ndns, offset, buf, n, flags);
}
@@ -323,7 +323,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane,
old_ent = btt_log_get_old(log);
if (old_ent < 0 || old_ent > 1) {
dev_info(to_dev(arena),
dev_err(to_dev(arena),
"log corruption (%d): lane %d seq [%d, %d]\n",
old_ent, lane, log[0].seq, log[1].seq);
/* TODO set error state? */
@@ -576,8 +576,8 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
arena->internal_lbasize = roundup(arena->external_lbasize,
INT_LBASIZE_ALIGNMENT);
arena->nfree = BTT_DEFAULT_NFREE;
arena->version_major = 1;
arena->version_minor = 1;
arena->version_major = btt->nd_btt->version_major;
arena->version_minor = btt->nd_btt->version_minor;
if (available % BTT_PG_SIZE)
available -= (available % BTT_PG_SIZE);
@@ -684,7 +684,7 @@ static int discover_arenas(struct btt *btt)
dev_info(to_dev(arena), "No existing arenas\n");
goto out;
} else {
dev_info(to_dev(arena),
dev_err(to_dev(arena),
"Found corrupted metadata!\n");
ret = -ENODEV;
goto out;
@@ -1227,7 +1227,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
op_is_write(bio_op(bio)), iter.bi_sector);
if (err) {
dev_info(&btt->nd_btt->dev,
dev_err(&btt->nd_btt->dev,
"io error in %s sector %lld, len %d,\n",
(op_is_write(bio_op(bio))) ? "WRITE" :
"READ",
@@ -1248,10 +1248,13 @@ static int btt_rw_page(struct block_device *bdev, sector_t sector,
struct page *page, bool is_write)
{
struct btt *btt = bdev->bd_disk->private_data;
int rc;
btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector);
page_endio(page, is_write, 0);
return 0;
rc = btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector);
if (rc == 0)
page_endio(page, is_write, 0);
return rc;
}
@@ -1369,7 +1372,7 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
}
if (btt->init_state != INIT_READY && nd_region->ro) {
dev_info(dev, "%s is read-only, unable to init btt metadata\n",
dev_warn(dev, "%s is read-only, unable to init btt metadata\n",
dev_name(&nd_region->dev));
return NULL;
} else if (btt->init_state != INIT_READY) {
@@ -1424,6 +1427,7 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
{
struct nd_btt *nd_btt = to_nd_btt(ndns->claim);
struct nd_region *nd_region;
struct btt_sb *btt_sb;
struct btt *btt;
size_t rawsize;
@@ -1432,10 +1436,21 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
return -ENODEV;
}
rawsize = nvdimm_namespace_capacity(ndns) - SZ_4K;
btt_sb = devm_kzalloc(&nd_btt->dev, sizeof(*btt_sb), GFP_KERNEL);
/*
* If this returns < 0, that is ok as it just means there wasn't
* an existing BTT, and we're creating a new one. We still need to
* call this as we need the version dependent fields in nd_btt to be
* set correctly based on the holder class
*/
nd_btt_version(nd_btt, ndns, btt_sb);
rawsize = nvdimm_namespace_capacity(ndns) - nd_btt->initial_offset;
if (rawsize < ARENA_MIN_SIZE) {
dev_dbg(&nd_btt->dev, "%s must be at least %ld bytes\n",
dev_name(&ndns->dev), ARENA_MIN_SIZE + SZ_4K);
dev_name(&ndns->dev),
ARENA_MIN_SIZE + nd_btt->initial_offset);
return -ENXIO;
}
nd_region = to_nd_region(nd_btt->dev.parent);
+2
View File
@@ -184,5 +184,7 @@ struct btt {
};
bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super);
int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns,
struct btt_sb *btt_sb);
#endif
+49 -5
View File
@@ -260,20 +260,55 @@ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
}
EXPORT_SYMBOL(nd_btt_arena_is_valid);
int nd_btt_version(struct nd_btt *nd_btt, struct nd_namespace_common *ndns,
struct btt_sb *btt_sb)
{
if (ndns->claim_class == NVDIMM_CCLASS_BTT2) {
/* Probe/setup for BTT v2.0 */
nd_btt->initial_offset = 0;
nd_btt->version_major = 2;
nd_btt->version_minor = 0;
if (nvdimm_read_bytes(ndns, 0, btt_sb, sizeof(*btt_sb), 0))
return -ENXIO;
if (!nd_btt_arena_is_valid(nd_btt, btt_sb))
return -ENODEV;
if ((le16_to_cpu(btt_sb->version_major) != 2) ||
(le16_to_cpu(btt_sb->version_minor) != 0))
return -ENODEV;
} else {
/*
* Probe/setup for BTT v1.1 (NVDIMM_CCLASS_NONE or
* NVDIMM_CCLASS_BTT)
*/
nd_btt->initial_offset = SZ_4K;
nd_btt->version_major = 1;
nd_btt->version_minor = 1;
if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb), 0))
return -ENXIO;
if (!nd_btt_arena_is_valid(nd_btt, btt_sb))
return -ENODEV;
if ((le16_to_cpu(btt_sb->version_major) != 1) ||
(le16_to_cpu(btt_sb->version_minor) != 1))
return -ENODEV;
}
return 0;
}
EXPORT_SYMBOL(nd_btt_version);
static int __nd_btt_probe(struct nd_btt *nd_btt,
struct nd_namespace_common *ndns, struct btt_sb *btt_sb)
{
int rc;
if (!btt_sb || !ndns || !nd_btt)
return -ENODEV;
if (nvdimm_read_bytes(ndns, SZ_4K, btt_sb, sizeof(*btt_sb), 0))
return -ENXIO;
if (nvdimm_namespace_capacity(ndns) < SZ_16M)
return -ENXIO;
if (!nd_btt_arena_is_valid(nd_btt, btt_sb))
return -ENODEV;
rc = nd_btt_version(nd_btt, ndns, btt_sb);
if (rc < 0)
return rc;
nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize);
nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
@@ -295,6 +330,15 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns)
if (ndns->force_raw)
return -ENODEV;
switch (ndns->claim_class) {
case NVDIMM_CCLASS_NONE:
case NVDIMM_CCLASS_BTT:
case NVDIMM_CCLASS_BTT2:
break;
default:
return -ENODEV;
}
nvdimm_bus_lock(&ndns->dev);
btt_dev = __nd_btt_create(nd_region, 0, NULL, ndns);
nvdimm_bus_unlock(&ndns->dev);
+10 -5
View File
@@ -38,13 +38,13 @@ static int to_nd_device_type(struct device *dev)
{
if (is_nvdimm(dev))
return ND_DEVICE_DIMM;
else if (is_nd_pmem(dev))
else if (is_memory(dev))
return ND_DEVICE_REGION_PMEM;
else if (is_nd_blk(dev))
return ND_DEVICE_REGION_BLK;
else if (is_nd_dax(dev))
return ND_DEVICE_DAX_PMEM;
else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent))
else if (is_nd_region(dev->parent))
return nd_region_to_nstype(to_nd_region(dev->parent));
return 0;
@@ -56,7 +56,7 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
* Ensure that region devices always have their numa node set as
* early as possible.
*/
if (is_nd_pmem(dev) || is_nd_blk(dev))
if (is_nd_region(dev))
set_dev_node(dev, to_nd_region(dev)->numa_node);
return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT,
to_nd_device_type(dev));
@@ -65,7 +65,7 @@ static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
static struct module *to_bus_provider(struct device *dev)
{
/* pin bus providers while regions are enabled */
if (is_nd_pmem(dev) || is_nd_blk(dev)) {
if (is_nd_region(dev)) {
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
return nvdimm_bus->nd_desc->module;
@@ -198,6 +198,9 @@ static int nvdimm_clear_badblocks_region(struct device *dev, void *data)
sector = (ctx->phys - nd_region->ndr_start) / 512;
badblocks_clear(&nd_region->bb, sector, ctx->cleared / 512);
if (nd_region->bb_state)
sysfs_notify_dirent(nd_region->bb_state);
return 0;
}
@@ -907,6 +910,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
static char in_env[ND_CMD_MAX_ENVELOPE];
const struct nd_cmd_desc *desc = NULL;
unsigned int cmd = _IOC_NR(ioctl_cmd);
unsigned int func = cmd;
void __user *p = (void __user *) arg;
struct device *dev = &nvdimm_bus->dev;
struct nd_cmd_pkg pkg;
@@ -972,6 +976,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
}
if (cmd == ND_CMD_CALL) {
func = pkg.nd_command;
dev_dbg(dev, "%s:%s, idx: %llu, in: %zu, out: %zu, len %zu\n",
__func__, dimm_name, pkg.nd_command,
in_len, out_len, buf_len);
@@ -1020,7 +1025,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
}
nvdimm_bus_lock(&nvdimm_bus->dev);
rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd, buf);
rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf);
if (rc)
goto out_unlock;

Some files were not shown because too many files have changed in this diff Show More