Merge branch 'akpm' (patches from Andrew)

Merge more updates from Andrew Morton:
 "87 patches.

  Subsystems affected by this patch series: mm (pagecache and hugetlb),
  procfs, misc, MAINTAINERS, lib, checkpatch, binfmt, kallsyms, ramfs,
  init, codafs, nilfs2, hfs, crash_dump, signals, seq_file, fork,
  sysvfs, kcov, gdb, resource, selftests, and ipc"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (87 commits)
  ipc/ipc_sysctl.c: remove fallback for !CONFIG_PROC_SYSCTL
  ipc: check checkpoint_restore_ns_capable() to modify C/R proc files
  selftests/kselftest/runner/run_one(): allow running non-executable files
  virtio-mem: disallow mapping virtio-mem memory via /dev/mem
  kernel/resource: disallow access to exclusive system RAM regions
  kernel/resource: clean up and optimize iomem_is_exclusive()
  scripts/gdb: handle split debug for vmlinux
  kcov: replace local_irq_save() with a local_lock_t
  kcov: avoid enable+disable interrupts if !in_task()
  kcov: allocate per-CPU memory on the relevant node
  Documentation/kcov: define `ip' in the example
  Documentation/kcov: include types.h in the example
  sysv: use BUILD_BUG_ON instead of runtime check
  kernel/fork.c: unshare(): use swap() to make code cleaner
  seq_file: fix passing wrong private data
  seq_file: move seq_escape() to a header
  signal: remove duplicate include in signal.h
  crash_dump: remove duplicate include in crash_dump.h
  crash_dump: fix boolreturn.cocci warning
  hfs/hfsplus: use WARN_ON for sanity check
  ...
This commit is contained in:
Linus Torvalds
2021-11-09 10:11:53 -08:00
131 changed files with 1178 additions and 654 deletions

View File

@@ -73,6 +73,8 @@ Chris Chiu <chris.chiu@canonical.com> <chiu@endlessm.com>
Chris Chiu <chris.chiu@canonical.com> <chiu@endlessos.org>
Christophe Ricard <christophe.ricard@gmail.com>
Christoph Hellwig <hch@lst.de>
Colin Ian King <colin.king@intel.com> <colin.king@canonical.com>
Colin Ian King <colin.king@intel.com> <colin.i.king@gmail.com>
Corey Minyard <minyard@acm.org>
Damian Hobson-Garcia <dhobsong@igel.co.jp>
Daniel Borkmann <daniel@iogearbox.net> <danborkmann@googlemail.com>

View File

@@ -50,6 +50,7 @@ program using kcov:
#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>
#include <linux/types.h>
#define KCOV_INIT_TRACE _IOR('c', 1, unsigned long)
#define KCOV_ENABLE _IO('c', 100)
@@ -177,6 +178,8 @@ Comparison operands collection is similar to coverage collection:
/* Read number of comparisons collected. */
n = __atomic_load_n(&cover[0], __ATOMIC_RELAXED);
for (i = 0; i < n; i++) {
uint64_t ip;
type = cover[i * KCOV_WORDS_PER_CMP + 1];
/* arg1 and arg2 - operands of the comparison. */
arg1 = cover[i * KCOV_WORDS_PER_CMP + 2];
@@ -251,6 +254,8 @@ selectively from different subsystems.
.. code-block:: c
/* Same includes and defines as above. */
struct kcov_remote_arg {
__u32 trace_mode;
__u32 area_size;

View File

@@ -767,7 +767,7 @@ F: drivers/crypto/allwinner/
ALLWINNER HARDWARE SPINLOCK SUPPORT
M: Wilken Gottwalt <wilken.gottwalt@posteo.net>
S: Maintained
F: Documentation/devicetree/bindings/hwlock/allwinner,sun6i-hwspinlock.yaml
F: Documentation/devicetree/bindings/hwlock/allwinner,sun6i-a31-hwspinlock.yaml
F: drivers/hwspinlock/sun6i_hwspinlock.c
ALLWINNER THERMAL DRIVER
@@ -2783,7 +2783,7 @@ F: Documentation/devicetree/bindings/arm/toshiba.yaml
F: Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml
F: Documentation/devicetree/bindings/gpio/toshiba,gpio-visconti.yaml
F: Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml
F: Documentation/devicetree/bindings/pinctrl/toshiba,tmpv7700-pinctrl.yaml
F: Documentation/devicetree/bindings/pinctrl/toshiba,visconti-pinctrl.yaml
F: Documentation/devicetree/bindings/watchdog/toshiba,visconti-wdt.yaml
F: arch/arm64/boot/dts/toshiba/
F: drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
@@ -7119,6 +7119,20 @@ F: include/uapi/linux/mdio.h
F: include/uapi/linux/mii.h
F: net/core/of_net.c
EXEC & BINFMT API
R: Eric Biederman <ebiederm@xmission.com>
R: Kees Cook <keescook@chromium.org>
F: arch/alpha/kernel/binfmt_loader.c
F: arch/x86/ia32/ia32_aout.c
F: fs/*binfmt_*.c
F: fs/exec.c
F: include/linux/binfmts.h
F: include/linux/elf.h
F: include/uapi/linux/binfmts.h
F: tools/testing/selftests/exec/
N: asm/elf.h
N: binfmt
EXFAT FILE SYSTEM
M: Namjae Jeon <linkinjeon@kernel.org>
M: Sungjong Seo <sj1557.seo@samsung.com>
@@ -8562,7 +8576,6 @@ M: John Stultz <john.stultz@linaro.org>
L: linux-kernel@vger.kernel.org
S: Maintained
F: drivers/misc/hisi_hikey_usb.c
F: Documentation/devicetree/bindings/misc/hisilicon-hikey-usb.yaml
HISILICON PMU DRIVER
M: Shaokun Zhang <zhangshaokun@hisilicon.com>
@@ -9621,7 +9634,7 @@ INTEL KEEM BAY DRM DRIVER
M: Anitha Chrisanthus <anitha.chrisanthus@intel.com>
M: Edmund Dea <edmund.j.dea@intel.com>
S: Maintained
F: Documentation/devicetree/bindings/display/intel,kmb_display.yaml
F: Documentation/devicetree/bindings/display/intel,keembay-display.yaml
F: drivers/gpu/drm/kmb/
INTEL KEEM BAY OCS AES/SM4 CRYPTO DRIVER

View File

@@ -129,9 +129,7 @@ dik_show_trace(unsigned long *sp, const char *loglvl)
extern char _stext[], _etext[];
unsigned long tmp = *sp;
sp++;
if (tmp < (unsigned long) &_stext)
continue;
if (tmp >= (unsigned long) &_etext)
if (!is_kernel_text(tmp))
continue;
printk("%s[<%lx>] %pSR\n", loglvl, tmp, (void *)tmp);
if (i > 40) {

View File

@@ -34,6 +34,7 @@
#include <linux/mm_types.h>
#include <linux/pgtable.h>
#include <linux/memblock.h>
#include <linux/kallsyms.h>
#include <asm/pgalloc.h>
#include <linux/io.h>
@@ -171,7 +172,7 @@ void __init mapin_ram(void)
for (s = 0; s < lowmem_size; s += PAGE_SIZE) {
f = _PAGE_PRESENT | _PAGE_ACCESSED |
_PAGE_SHARED | _PAGE_HWEXEC;
if ((char *) v < _stext || (char *) v >= _etext)
if (!is_kernel_text(v))
f |= _PAGE_WRENABLE;
else
/* On the MicroBlaze, no user access

View File

@@ -33,8 +33,6 @@
#include <mm/mmu_decl.h>
extern char etext[], _stext[], _sinittext[], _einittext[];
static u8 early_fixmap_pagetable[FIXMAP_PTE_SIZE] __page_aligned_data;
notrace void __init early_ioremap_init(void)
@@ -104,14 +102,13 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
{
unsigned long v, s;
phys_addr_t p;
int ktext;
bool ktext;
s = offset;
v = PAGE_OFFSET + s;
p = memstart_addr + s;
for (; s < top; s += PAGE_SIZE) {
ktext = ((char *)v >= _stext && (char *)v < etext) ||
((char *)v >= _sinittext && (char *)v < _einittext);
ktext = core_kernel_text(v);
map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL);
v += PAGE_SIZE;
p += PAGE_SIZE;

View File

@@ -4,10 +4,14 @@
*/
#include <linux/delay.h>
#include <linux/math.h>
#include <linux/param.h>
#include <linux/timex.h>
#include <linux/types.h>
#include <linux/export.h>
#include <asm/processor.h>
/*
* This is copies from arch/arm/include/asm/delay.h
*

View File

@@ -9,8 +9,12 @@
#define __ASM_FACILITY_H
#include <asm/facility-defs.h>
#include <linux/minmax.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/preempt.h>
#include <asm/lowcore.h>
#define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8)

View File

@@ -73,12 +73,23 @@ static int gart_mem_pfn_is_ram(unsigned long pfn)
(pfn >= aperture_pfn_start + aperture_page_count));
}
#ifdef CONFIG_PROC_VMCORE
static bool gart_oldmem_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn)
{
return !!gart_mem_pfn_is_ram(pfn);
}
static struct vmcore_cb gart_vmcore_cb = {
.pfn_is_ram = gart_oldmem_pfn_is_ram,
};
#endif
static void __init exclude_from_core(u64 aper_base, u32 aper_order)
{
aperture_pfn_start = aper_base >> PAGE_SHIFT;
aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
#ifdef CONFIG_PROC_VMCORE
WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram));
register_vmcore_cb(&gart_vmcore_cb);
#endif
#ifdef CONFIG_PROC_KCORE
WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));

View File

@@ -175,7 +175,7 @@ static struct orc_entry *orc_find(unsigned long ip)
}
/* vmlinux .init slow lookup: */
if (init_kernel_text(ip))
if (is_kernel_inittext(ip))
return __orc_find(__start_orc_unwind_ip, __start_orc_unwind,
__stop_orc_unwind_ip - __start_orc_unwind_ip, ip);

View File

@@ -238,11 +238,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
}
}
/*
* The <linux/kallsyms.h> already defines is_kernel_text,
* using '__' prefix not to get in conflict.
*/
static inline int __is_kernel_text(unsigned long addr)
static inline int is_x86_32_kernel_text(unsigned long addr)
{
if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end)
return 1;
@@ -333,8 +329,8 @@ repeat:
addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
PAGE_OFFSET + PAGE_SIZE-1;
if (__is_kernel_text(addr) ||
__is_kernel_text(addr2))
if (is_x86_32_kernel_text(addr) ||
is_x86_32_kernel_text(addr2))
prot = PAGE_KERNEL_LARGE_EXEC;
pages_2m++;
@@ -359,7 +355,7 @@ repeat:
*/
pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
if (__is_kernel_text(addr))
if (is_x86_32_kernel_text(addr))
prot = PAGE_KERNEL_EXEC;
pages_4k++;
@@ -789,7 +785,7 @@ static void mark_nxdata_nx(void)
*/
unsigned long start = PFN_ALIGN(_etext);
/*
* This comes from __is_kernel_text upper limit. Also HPAGE where used:
* This comes from is_x86_32_kernel_text upper limit. Also HPAGE where used:
*/
unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start;

View File

@@ -9,39 +9,28 @@
#ifdef CONFIG_PROC_VMCORE
/*
* This function is used in two contexts:
* - the kdump kernel has to check whether a pfn of the crashed kernel
* was a ballooned page. vmcore is using this function to decide
* whether to access a pfn of the crashed kernel.
* - the kexec kernel has to check whether a pfn was ballooned by the
* previous kernel. If the pfn is ballooned, handle it properly.
* Returns 0 if the pfn is not backed by a RAM page, the caller may
* The kdump kernel has to check whether a pfn of the crashed kernel
* was a ballooned page. vmcore is using this function to decide
* whether to access a pfn of the crashed kernel.
* Returns "false" if the pfn is not backed by a RAM page, the caller may
* handle the pfn special in this case.
*/
static int xen_oldmem_pfn_is_ram(unsigned long pfn)
static bool xen_vmcore_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn)
{
struct xen_hvm_get_mem_type a = {
.domid = DOMID_SELF,
.pfn = pfn,
};
int ram;
if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a))
return -ENXIO;
switch (a.mem_type) {
case HVMMEM_mmio_dm:
ram = 0;
break;
case HVMMEM_ram_rw:
case HVMMEM_ram_ro:
default:
ram = 1;
break;
if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) {
pr_warn_once("Unexpected HVMOP_get_mem_type failure\n");
return true;
}
return ram;
return a.mem_type != HVMMEM_mmio_dm;
}
static struct vmcore_cb xen_vmcore_cb = {
.pfn_is_ram = xen_vmcore_pfn_is_ram,
};
#endif
static void xen_hvm_exit_mmap(struct mm_struct *mm)
@@ -75,6 +64,6 @@ void __init xen_hvm_init_mmu_ops(void)
if (is_pagetable_dying_supported())
pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap;
#ifdef CONFIG_PROC_VMCORE
WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram));
register_vmcore_cb(&xen_vmcore_cb);
#endif
}

View File

@@ -1668,13 +1668,10 @@ __dump_topology_ref_history(struct drm_dp_mst_topology_ref_history *history,
for (i = 0; i < history->len; i++) {
const struct drm_dp_mst_topology_ref_entry *entry =
&history->entries[i];
ulong *entries;
uint nr_entries;
u64 ts_nsec = entry->ts_nsec;
u32 rem_nsec = do_div(ts_nsec, 1000000000);
nr_entries = stack_depot_fetch(entry->backtrace, &entries);
stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 4);
stack_depot_snprint(entry->backtrace, buf, PAGE_SIZE, 4);
drm_printf(&p, " %d %ss (last at %5llu.%06u):\n%s",
entry->count,

View File

@@ -118,8 +118,6 @@ static noinline void save_stack(struct drm_mm_node *node)
static void show_leaks(struct drm_mm *mm)
{
struct drm_mm_node *node;
unsigned long *entries;
unsigned int nr_entries;
char *buf;
buf = kmalloc(BUFSZ, GFP_KERNEL);
@@ -133,8 +131,7 @@ static void show_leaks(struct drm_mm *mm)
continue;
}
nr_entries = stack_depot_fetch(node->stack, &entries);
stack_trace_snprint(buf, BUFSZ, entries, nr_entries, 0);
stack_depot_snprint(node->stack, buf, BUFSZ, 0);
DRM_ERROR("node [%08llx + %08llx]: inserted at\n%s",
node->start, node->size, buf);
}

View File

@@ -56,8 +56,6 @@ void i915_vma_free(struct i915_vma *vma)
static void vma_print_allocator(struct i915_vma *vma, const char *reason)
{
unsigned long *entries;
unsigned int nr_entries;
char buf[512];
if (!vma->node.stack) {
@@ -66,8 +64,7 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
return;
}
nr_entries = stack_depot_fetch(vma->node.stack, &entries);
stack_trace_snprint(buf, sizeof(buf), entries, nr_entries, 0);
stack_depot_snprint(vma->node.stack, buf, sizeof(buf), 0);
DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: inserted at %s\n",
vma->node.start, vma->node.size, reason, buf);
}

View File

@@ -65,16 +65,6 @@ static noinline depot_stack_handle_t __save_depot_stack(void)
return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
}
static void __print_depot_stack(depot_stack_handle_t stack,
char *buf, int sz, int indent)
{
unsigned long *entries;
unsigned int nr_entries;
nr_entries = stack_depot_fetch(stack, &entries);
stack_trace_snprint(buf, sz, entries, nr_entries, indent);
}
static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
{
spin_lock_init(&rpm->debug.lock);
@@ -146,12 +136,12 @@ static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm,
if (!buf)
return;
__print_depot_stack(stack, buf, PAGE_SIZE, 2);
stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf);
stack = READ_ONCE(rpm->debug.last_release);
if (stack) {
__print_depot_stack(stack, buf, PAGE_SIZE, 2);
stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
DRM_DEBUG_DRIVER("wakeref last released at\n%s", buf);
}
@@ -183,12 +173,12 @@ __print_intel_runtime_pm_wakeref(struct drm_printer *p,
return;
if (dbg->last_acquire) {
__print_depot_stack(dbg->last_acquire, buf, PAGE_SIZE, 2);
stack_depot_snprint(dbg->last_acquire, buf, PAGE_SIZE, 2);
drm_printf(p, "Wakeref last acquired:\n%s", buf);
}
if (dbg->last_release) {
__print_depot_stack(dbg->last_release, buf, PAGE_SIZE, 2);
stack_depot_snprint(dbg->last_release, buf, PAGE_SIZE, 2);
drm_printf(p, "Wakeref last released:\n%s", buf);
}
@@ -203,7 +193,7 @@ __print_intel_runtime_pm_wakeref(struct drm_printer *p,
rep = 1;
while (i + 1 < dbg->count && dbg->owners[i + 1] == stack)
rep++, i++;
__print_depot_stack(stack, buf, PAGE_SIZE, 2);
stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf);
}

View File

@@ -12,6 +12,7 @@
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/delay.h>
#include <linux/bits.h>
#include <linux/string.h>
int cxd2880_convert2s_complement(u32 value, u32 bitlen);

View File

@@ -111,6 +111,7 @@ config VIRTIO_MEM
depends on MEMORY_HOTPLUG
depends on MEMORY_HOTREMOVE
depends on CONTIG_ALLOC
depends on EXCLUSIVE_SYSTEM_RAM
help
This driver provides access to virtio-mem paravirtualized memory
devices, allowing to hotplug and hotunplug memory.

View File

@@ -223,6 +223,9 @@ struct virtio_mem {
* When this lock is held the pointers can't change, ONLINE and
* OFFLINE blocks can't change the state and no subblocks will get
* plugged/unplugged.
*
* In kdump mode, used to serialize requests, last_block_addr and
* last_block_plugged.
*/
struct mutex hotplug_mutex;
bool hotplug_active;
@@ -230,6 +233,9 @@ struct virtio_mem {
/* An error occurred we cannot handle - stop processing requests. */
bool broken;
/* Cached valued of is_kdump_kernel() when the device was probed. */
bool in_kdump;
/* The driver is being removed. */
spinlock_t removal_lock;
bool removing;
@@ -243,6 +249,13 @@ struct virtio_mem {
/* Memory notifier (online/offline events). */
struct notifier_block memory_notifier;
#ifdef CONFIG_PROC_VMCORE
/* vmcore callback for /proc/vmcore handling in kdump mode */
struct vmcore_cb vmcore_cb;
uint64_t last_block_addr;
bool last_block_plugged;
#endif /* CONFIG_PROC_VMCORE */
/* Next device in the list of virtio-mem devices. */
struct list_head next;
};
@@ -260,6 +273,8 @@ static void virtio_mem_fake_offline_going_offline(unsigned long pfn,
static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn,
unsigned long nr_pages);
static void virtio_mem_retry(struct virtio_mem *vm);
static int virtio_mem_create_resource(struct virtio_mem *vm);
static void virtio_mem_delete_resource(struct virtio_mem *vm);
/*
* Register a virtio-mem device so it will be considered for the online_page
@@ -2291,6 +2306,12 @@ static void virtio_mem_run_wq(struct work_struct *work)
uint64_t diff;
int rc;
if (unlikely(vm->in_kdump)) {
dev_warn_once(&vm->vdev->dev,
"unexpected workqueue run in kdump kernel\n");
return;
}
hrtimer_cancel(&vm->retry_timer);
if (vm->broken)
@@ -2392,41 +2413,11 @@ static int virtio_mem_init_vq(struct virtio_mem *vm)
return 0;
}
static int virtio_mem_init(struct virtio_mem *vm)
static int virtio_mem_init_hotplug(struct virtio_mem *vm)
{
const struct range pluggable_range = mhp_get_pluggable_range(true);
uint64_t sb_size, addr;
uint16_t node_id;
if (!vm->vdev->config->get) {
dev_err(&vm->vdev->dev, "config access disabled\n");
return -EINVAL;
}
/*
* We don't want to (un)plug or reuse any memory when in kdump. The
* memory is still accessible (but not mapped).
*/
if (is_kdump_kernel()) {
dev_warn(&vm->vdev->dev, "disabled in kdump kernel\n");
return -EBUSY;
}
/* Fetch all properties that can't change. */
virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size,
&vm->plugged_size);
virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size,
&vm->device_block_size);
virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id,
&node_id);
vm->nid = virtio_mem_translate_node_id(vm, node_id);
virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr);
virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size,
&vm->region_size);
/* Determine the nid for the device based on the lowest address. */
if (vm->nid == NUMA_NO_NODE)
vm->nid = memory_add_physaddr_to_nid(vm->addr);
uint64_t unit_pages, sb_size, addr;
int rc;
/* bad device setup - warn only */
if (!IS_ALIGNED(vm->addr, memory_block_size_bytes()))
@@ -2496,10 +2487,6 @@ static int virtio_mem_init(struct virtio_mem *vm)
vm->offline_threshold);
}
dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr);
dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size);
dev_info(&vm->vdev->dev, "device block size: 0x%llx",
(unsigned long long)vm->device_block_size);
dev_info(&vm->vdev->dev, "memory block size: 0x%lx",
memory_block_size_bytes());
if (vm->in_sbm)
@@ -2508,10 +2495,170 @@ static int virtio_mem_init(struct virtio_mem *vm)
else
dev_info(&vm->vdev->dev, "big block size: 0x%llx",
(unsigned long long)vm->bbm.bb_size);
/* create the parent resource for all memory */
rc = virtio_mem_create_resource(vm);
if (rc)
return rc;
/* use a single dynamic memory group to cover the whole memory device */
if (vm->in_sbm)
unit_pages = PHYS_PFN(memory_block_size_bytes());
else
unit_pages = PHYS_PFN(vm->bbm.bb_size);
rc = memory_group_register_dynamic(vm->nid, unit_pages);
if (rc < 0)
goto out_del_resource;
vm->mgid = rc;
/*
* If we still have memory plugged, we have to unplug all memory first.
* Registering our parent resource makes sure that this memory isn't
* actually in use (e.g., trying to reload the driver).
*/
if (vm->plugged_size) {
vm->unplug_all_required = true;
dev_info(&vm->vdev->dev, "unplugging all memory is required\n");
}
/* register callbacks */
vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb;
rc = register_memory_notifier(&vm->memory_notifier);
if (rc)
goto out_unreg_group;
rc = register_virtio_mem_device(vm);
if (rc)
goto out_unreg_mem;
return 0;
out_unreg_mem:
unregister_memory_notifier(&vm->memory_notifier);
out_unreg_group:
memory_group_unregister(vm->mgid);
out_del_resource:
virtio_mem_delete_resource(vm);
return rc;
}
#ifdef CONFIG_PROC_VMCORE
static int virtio_mem_send_state_request(struct virtio_mem *vm, uint64_t addr,
uint64_t size)
{
const uint64_t nb_vm_blocks = size / vm->device_block_size;
const struct virtio_mem_req req = {
.type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_STATE),
.u.state.addr = cpu_to_virtio64(vm->vdev, addr),
.u.state.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks),
};
int rc = -ENOMEM;
dev_dbg(&vm->vdev->dev, "requesting state: 0x%llx - 0x%llx\n", addr,
addr + size - 1);
switch (virtio_mem_send_request(vm, &req)) {
case VIRTIO_MEM_RESP_ACK:
return virtio16_to_cpu(vm->vdev, vm->resp.u.state.state);
case VIRTIO_MEM_RESP_ERROR:
rc = -EINVAL;
break;
default:
break;
}
dev_dbg(&vm->vdev->dev, "requesting state failed: %d\n", rc);
return rc;
}
static bool virtio_mem_vmcore_pfn_is_ram(struct vmcore_cb *cb,
unsigned long pfn)
{
struct virtio_mem *vm = container_of(cb, struct virtio_mem,
vmcore_cb);
uint64_t addr = PFN_PHYS(pfn);
bool is_ram;
int rc;
if (!virtio_mem_contains_range(vm, addr, PAGE_SIZE))
return true;
if (!vm->plugged_size)
return false;
/*
* We have to serialize device requests and access to the information
* about the block queried last.
*/
mutex_lock(&vm->hotplug_mutex);
addr = ALIGN_DOWN(addr, vm->device_block_size);
if (addr != vm->last_block_addr) {
rc = virtio_mem_send_state_request(vm, addr,
vm->device_block_size);
/* On any kind of error, we're going to signal !ram. */
if (rc == VIRTIO_MEM_STATE_PLUGGED)
vm->last_block_plugged = true;
else
vm->last_block_plugged = false;
vm->last_block_addr = addr;
}
is_ram = vm->last_block_plugged;
mutex_unlock(&vm->hotplug_mutex);
return is_ram;
}
#endif /* CONFIG_PROC_VMCORE */
static int virtio_mem_init_kdump(struct virtio_mem *vm)
{
#ifdef CONFIG_PROC_VMCORE
dev_info(&vm->vdev->dev, "memory hot(un)plug disabled in kdump kernel\n");
vm->vmcore_cb.pfn_is_ram = virtio_mem_vmcore_pfn_is_ram;
register_vmcore_cb(&vm->vmcore_cb);
return 0;
#else /* CONFIG_PROC_VMCORE */
dev_warn(&vm->vdev->dev, "disabled in kdump kernel without vmcore\n");
return -EBUSY;
#endif /* CONFIG_PROC_VMCORE */
}
static int virtio_mem_init(struct virtio_mem *vm)
{
uint16_t node_id;
if (!vm->vdev->config->get) {
dev_err(&vm->vdev->dev, "config access disabled\n");
return -EINVAL;
}
/* Fetch all properties that can't change. */
virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size,
&vm->plugged_size);
virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size,
&vm->device_block_size);
virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id,
&node_id);
vm->nid = virtio_mem_translate_node_id(vm, node_id);
virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr);
virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size,
&vm->region_size);
/* Determine the nid for the device based on the lowest address. */
if (vm->nid == NUMA_NO_NODE)
vm->nid = memory_add_physaddr_to_nid(vm->addr);
dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr);
dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size);
dev_info(&vm->vdev->dev, "device block size: 0x%llx",
(unsigned long long)vm->device_block_size);
if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA))
dev_info(&vm->vdev->dev, "nid: %d", vm->nid);
return 0;
/*
* We don't want to (un)plug or reuse any memory when in kdump. The
* memory is still accessible (but not exposed to Linux).
*/
if (vm->in_kdump)
return virtio_mem_init_kdump(vm);
return virtio_mem_init_hotplug(vm);
}
static int virtio_mem_create_resource(struct virtio_mem *vm)
@@ -2525,8 +2672,10 @@ static int virtio_mem_create_resource(struct virtio_mem *vm)
if (!name)
return -ENOMEM;
/* Disallow mapping device memory via /dev/mem completely. */
vm->parent_resource = __request_mem_region(vm->addr, vm->region_size,
name, IORESOURCE_SYSTEM_RAM);
name, IORESOURCE_SYSTEM_RAM |
IORESOURCE_EXCLUSIVE);
if (!vm->parent_resource) {
kfree(name);
dev_warn(&vm->vdev->dev, "could not reserve device region\n");
@@ -2571,7 +2720,6 @@ static bool virtio_mem_has_memory_added(struct virtio_mem *vm)
static int virtio_mem_probe(struct virtio_device *vdev)
{
struct virtio_mem *vm;
uint64_t unit_pages;
int rc;
BUILD_BUG_ON(sizeof(struct virtio_mem_req) != 24);
@@ -2590,6 +2738,7 @@ static int virtio_mem_probe(struct virtio_device *vdev)
hrtimer_init(&vm->retry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
vm->retry_timer.function = virtio_mem_timer_expired;
vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS;
vm->in_kdump = is_kdump_kernel();
/* register the virtqueue */
rc = virtio_mem_init_vq(vm);
@@ -2601,53 +2750,15 @@ static int virtio_mem_probe(struct virtio_device *vdev)
if (rc)
goto out_del_vq;
/* create the parent resource for all memory */
rc = virtio_mem_create_resource(vm);
if (rc)
goto out_del_vq;
/* use a single dynamic memory group to cover the whole memory device */
if (vm->in_sbm)
unit_pages = PHYS_PFN(memory_block_size_bytes());
else
unit_pages = PHYS_PFN(vm->bbm.bb_size);
rc = memory_group_register_dynamic(vm->nid, unit_pages);
if (rc < 0)
goto out_del_resource;
vm->mgid = rc;
/*
* If we still have memory plugged, we have to unplug all memory first.
* Registering our parent resource makes sure that this memory isn't
* actually in use (e.g., trying to reload the driver).
*/
if (vm->plugged_size) {
vm->unplug_all_required = true;
dev_info(&vm->vdev->dev, "unplugging all memory is required\n");
}
/* register callbacks */
vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb;
rc = register_memory_notifier(&vm->memory_notifier);
if (rc)
goto out_unreg_group;
rc = register_virtio_mem_device(vm);
if (rc)
goto out_unreg_mem;
virtio_device_ready(vdev);
/* trigger a config update to start processing the requested_size */
atomic_set(&vm->config_changed, 1);
queue_work(system_freezable_wq, &vm->wq);
if (!vm->in_kdump) {
atomic_set(&vm->config_changed, 1);
queue_work(system_freezable_wq, &vm->wq);
}
return 0;
out_unreg_mem:
unregister_memory_notifier(&vm->memory_notifier);
out_unreg_group:
memory_group_unregister(vm->mgid);
out_del_resource:
virtio_mem_delete_resource(vm);
out_del_vq:
vdev->config->del_vqs(vdev);
out_free_vm:
@@ -2657,9 +2768,8 @@ out_free_vm:
return rc;
}
static void virtio_mem_remove(struct virtio_device *vdev)
static void virtio_mem_deinit_hotplug(struct virtio_mem *vm)
{
struct virtio_mem *vm = vdev->priv;
unsigned long mb_id;
int rc;
@@ -2706,7 +2816,8 @@ static void virtio_mem_remove(struct virtio_device *vdev)
* away. Warn at least.
*/
if (virtio_mem_has_memory_added(vm)) {
dev_warn(&vdev->dev, "device still has system memory added\n");
dev_warn(&vm->vdev->dev,
"device still has system memory added\n");
} else {
virtio_mem_delete_resource(vm);
kfree_const(vm->resource_name);
@@ -2720,6 +2831,23 @@ static void virtio_mem_remove(struct virtio_device *vdev)
} else {
vfree(vm->bbm.bb_states);
}
}
static void virtio_mem_deinit_kdump(struct virtio_mem *vm)
{
#ifdef CONFIG_PROC_VMCORE
unregister_vmcore_cb(&vm->vmcore_cb);
#endif /* CONFIG_PROC_VMCORE */
}
static void virtio_mem_remove(struct virtio_device *vdev)
{
struct virtio_mem *vm = vdev->priv;
if (vm->in_kdump)
virtio_mem_deinit_kdump(vm);
else
virtio_mem_deinit_hotplug(vm);
/* reset the device and cleanup the queues */
vdev->config->reset(vdev);
@@ -2733,6 +2861,9 @@ static void virtio_mem_config_changed(struct virtio_device *vdev)
{
struct virtio_mem *vm = vdev->priv;
if (unlikely(vm->in_kdump))
return;
atomic_set(&vm->config_changed, 1);
virtio_mem_retry(vm);
}

View File

@@ -156,7 +156,7 @@ static int padzero(unsigned long elf_bss)
#define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
#define STACK_ROUND(sp, items) \
(((unsigned long) (sp - items)) &~ 15UL)
#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
#define STACK_ALLOC(sp, len) (sp -= len)
#endif
#ifndef ELF_BASE_PLATFORM
@@ -1074,20 +1074,26 @@ out_free_interp:
vaddr = elf_ppnt->p_vaddr;
/*
* If we are loading ET_EXEC or we have already performed
* the ET_DYN load_addr calculations, proceed normally.
* The first time through the loop, load_addr_set is false:
* layout will be calculated. Once set, use MAP_FIXED since
* we know we've already safely mapped the entire region with
* MAP_FIXED_NOREPLACE in the once-per-binary logic following.
*/
if (elf_ex->e_type == ET_EXEC || load_addr_set) {
if (load_addr_set) {
elf_flags |= MAP_FIXED;
} else if (elf_ex->e_type == ET_EXEC) {
/*
* This logic is run once for the first LOAD Program
* Header for ET_EXEC binaries. No special handling
* is needed.
*/
elf_flags |= MAP_FIXED_NOREPLACE;
} else if (elf_ex->e_type == ET_DYN) {
/*
* This logic is run once for the first LOAD Program
* Header for ET_DYN binaries to calculate the
* randomization (load_bias) for all the LOAD
* Program Headers, and to calculate the entire
* size of the ELF mapping (total_size). (Note that
* load_addr_set is set to true later once the
* initial mapping is performed.)
* Program Headers.
*
* There are effectively two types of ET_DYN
* binaries: programs (i.e. PIE: ET_DYN with INTERP)
@@ -1108,7 +1114,7 @@ out_free_interp:
* Therefore, programs are loaded offset from
* ELF_ET_DYN_BASE and loaders are loaded into the
* independently randomized mmap region (0 load_bias
* without MAP_FIXED).
* without MAP_FIXED nor MAP_FIXED_NOREPLACE).
*/
if (interpreter) {
load_bias = ELF_ET_DYN_BASE;
@@ -1117,7 +1123,7 @@ out_free_interp:
alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
if (alignment)
load_bias &= ~(alignment - 1);
elf_flags |= MAP_FIXED;
elf_flags |= MAP_FIXED_NOREPLACE;
} else
load_bias = 0;
@@ -1129,7 +1135,14 @@ out_free_interp:
* is then page aligned.
*/
load_bias = ELF_PAGESTART(load_bias - vaddr);
}
/*
* Calculate the entire size of the ELF mapping (total_size).
* (Note that load_addr_set is set to true later once the
* initial mapping is performed.)
*/
if (!load_addr_set) {
total_size = total_mapping_size(elf_phdata,
elf_ex->e_phnum);
if (!total_size) {

Some files were not shown because too many files have changed in this diff Show More