Merge branch 'akpm' (patches from Andrew)

Merge second patch-bomb from Andrew Morton:

 - more MM stuff:

    - Kirill's page-flags rework

    - Kirill's now-allegedly-fixed THP rework

    - MADV_FREE implementation

    - DAX feature work (msync/fsync).  This isn't quite complete but DAX
      is new and it's good enough and the guys have a handle on what
      needs to be done - I expect this to be wrapped in the next week or
      two.

  - some vsprintf maintenance work

  - various other misc bits

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (145 commits)
  printk: change recursion_bug type to bool
  lib/vsprintf: factor out %pN[F] handler as netdev_bits()
  lib/vsprintf: refactor duplicate code to special_hex_number()
  printk-formats.txt: remove unimplemented %pT
  printk: help pr_debug and pr_devel to optimize out arguments
  lib/test_printf.c: test dentry printing
  lib/test_printf.c: add test for large bitmaps
  lib/test_printf.c: account for kvasprintf tests
  lib/test_printf.c: add a few number() tests
  lib/test_printf.c: test precision quirks
  lib/test_printf.c: check for out-of-bound writes
  lib/test_printf.c: don't BUG
  lib/kasprintf.c: add sanity check to kvasprintf
  lib/vsprintf.c: warn about too large precisions and field widths
  lib/vsprintf.c: help gcc make number() smaller
  lib/vsprintf.c: expand field_width to 24 bits
  lib/vsprintf.c: eliminate potential race in string()
  lib/vsprintf.c: move string() below widen_string()
  lib/vsprintf.c: pull out padding code from dentry_name()
  printk: do cond_resched() between lines while outputting to consoles
  ...
This commit is contained in:
Linus Torvalds
2016-01-17 12:58:52 -08:00
189 changed files with 4373 additions and 2902 deletions
+6 -5
View File
@@ -161,7 +161,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
const unsigned long mmun_end = addr + PAGE_SIZE;
struct mem_cgroup *memcg;
err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg);
err = mem_cgroup_try_charge(kpage, vma->vm_mm, GFP_KERNEL, &memcg,
false);
if (err)
return err;
@@ -175,8 +176,8 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
goto unlock;
get_page(kpage);
page_add_new_anon_rmap(kpage, vma, addr);
mem_cgroup_commit_charge(kpage, memcg, false);
page_add_new_anon_rmap(kpage, vma, addr, false);
mem_cgroup_commit_charge(kpage, memcg, false, false);
lru_cache_add_active_or_unevictable(kpage, vma);
if (!PageAnon(page)) {
@@ -188,7 +189,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
ptep_clear_flush_notify(vma, addr, ptep);
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
page_remove_rmap(page);
page_remove_rmap(page, false);
if (!page_mapped(page))
try_to_free_swap(page);
pte_unmap_unlock(ptep, ptl);
@@ -199,7 +200,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
err = 0;
unlock:
mem_cgroup_cancel_charge(kpage, memcg);
mem_cgroup_cancel_charge(kpage, memcg, false);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
unlock_page(page);
return err;
+15 -50
View File
@@ -469,7 +469,8 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
{
unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm;
struct page *page, *page_head;
struct page *page;
struct address_space *mapping;
int err, ro = 0;
/*
@@ -519,46 +520,9 @@ again:
else
err = 0;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
page_head = page;
if (unlikely(PageTail(page))) {
put_page(page);
/* serialize against __split_huge_page_splitting() */
local_irq_disable();
if (likely(__get_user_pages_fast(address, 1, !ro, &page) == 1)) {
page_head = compound_head(page);
/*
* page_head is valid pointer but we must pin
* it before taking the PG_lock and/or
* PG_compound_lock. The moment we re-enable
* irqs __split_huge_page_splitting() can
* return and the head page can be freed from
* under us. We can't take the PG_lock and/or
* PG_compound_lock on a page that could be
* freed from under us.
*/
if (page != page_head) {
get_page(page_head);
put_page(page);
}
local_irq_enable();
} else {
local_irq_enable();
goto again;
}
}
#else
page_head = compound_head(page);
if (page != page_head) {
get_page(page_head);
put_page(page);
}
#endif
lock_page(page_head);
lock_page(page);
/*
* If page_head->mapping is NULL, then it cannot be a PageAnon
* If page->mapping is NULL, then it cannot be a PageAnon
* page; but it might be the ZERO_PAGE or in the gate area or
* in a special mapping (all cases which we are happy to fail);
* or it may have been a good file page when get_user_pages_fast
@@ -570,12 +534,13 @@ again:
*
* The case we do have to guard against is when memory pressure made
* shmem_writepage move it from filecache to swapcache beneath us:
* an unlikely race, but we do need to retry for page_head->mapping.
* an unlikely race, but we do need to retry for page->mapping.
*/
if (!page_head->mapping) {
int shmem_swizzled = PageSwapCache(page_head);
unlock_page(page_head);
put_page(page_head);
mapping = compound_head(page)->mapping;
if (!mapping) {
int shmem_swizzled = PageSwapCache(page);
unlock_page(page);
put_page(page);
if (shmem_swizzled)
goto again;
return -EFAULT;
@@ -588,7 +553,7 @@ again:
* it's a read-only handle, it's expected that futexes attach to
* the object not the particular process.
*/
if (PageAnon(page_head)) {
if (PageAnon(page)) {
/*
* A RO anonymous page will never change and thus doesn't make
* sense for futex operations.
@@ -603,15 +568,15 @@ again:
key->private.address = address;
} else {
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
key->shared.inode = page_head->mapping->host;
key->shared.inode = mapping->host;
key->shared.pgoff = basepage_index(page);
}
get_futex_key_refs(key); /* implies MB (B) */
out:
unlock_page(page_head);
put_page(page_head);
unlock_page(page);
put_page(page);
return err;
}
@@ -639,7 +604,7 @@ static int fault_in_user_writeable(u32 __user *uaddr)
down_read(&mm->mmap_sem);
ret = fixup_user_fault(current, mm, (unsigned long)uaddr,
FAULT_FLAG_WRITE);
FAULT_FLAG_WRITE, NULL);
up_read(&mm->mmap_sem);
return ret < 0 ? ret : 0;
+211 -10
View File
@@ -10,8 +10,11 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/radix-tree.h>
#include <linux/memremap.h>
#include <linux/device.h>
#include <linux/types.h>
#include <linux/pfn_t.h>
#include <linux/io.h>
#include <linux/mm.h>
#include <linux/memory_hotplug.h>
@@ -147,24 +150,127 @@ void devm_memunmap(struct device *dev, void *addr)
}
EXPORT_SYMBOL(devm_memunmap);
pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
{
return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
}
EXPORT_SYMBOL(phys_to_pfn_t);
#ifdef CONFIG_ZONE_DEVICE
static DEFINE_MUTEX(pgmap_lock);
static RADIX_TREE(pgmap_radix, GFP_KERNEL);
#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
struct page_map {
struct resource res;
struct percpu_ref *ref;
struct dev_pagemap pgmap;
struct vmem_altmap altmap;
};
static void devm_memremap_pages_release(struct device *dev, void *res)
void get_zone_device_page(struct page *page)
{
struct page_map *page_map = res;
percpu_ref_get(page->pgmap->ref);
}
EXPORT_SYMBOL(get_zone_device_page);
/* pages are dead and unused, undo the arch mapping */
arch_remove_memory(page_map->res.start, resource_size(&page_map->res));
void put_zone_device_page(struct page *page)
{
put_dev_pagemap(page->pgmap);
}
EXPORT_SYMBOL(put_zone_device_page);
static void pgmap_radix_release(struct resource *res)
{
resource_size_t key;
mutex_lock(&pgmap_lock);
for (key = res->start; key <= res->end; key += SECTION_SIZE)
radix_tree_delete(&pgmap_radix, key >> PA_SECTION_SHIFT);
mutex_unlock(&pgmap_lock);
}
void *devm_memremap_pages(struct device *dev, struct resource *res)
static unsigned long pfn_first(struct page_map *page_map)
{
struct dev_pagemap *pgmap = &page_map->pgmap;
const struct resource *res = &page_map->res;
struct vmem_altmap *altmap = pgmap->altmap;
unsigned long pfn;
pfn = res->start >> PAGE_SHIFT;
if (altmap)
pfn += vmem_altmap_offset(altmap);
return pfn;
}
static unsigned long pfn_end(struct page_map *page_map)
{
const struct resource *res = &page_map->res;
return (res->start + resource_size(res)) >> PAGE_SHIFT;
}
#define for_each_device_pfn(pfn, map) \
for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++)
static void devm_memremap_pages_release(struct device *dev, void *data)
{
struct page_map *page_map = data;
struct resource *res = &page_map->res;
resource_size_t align_start, align_size;
struct dev_pagemap *pgmap = &page_map->pgmap;
if (percpu_ref_tryget_live(pgmap->ref)) {
dev_WARN(dev, "%s: page mapping is still live!\n", __func__);
percpu_ref_put(pgmap->ref);
}
pgmap_radix_release(res);
/* pages are dead and unused, undo the arch mapping */
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE);
arch_remove_memory(align_start, align_size);
dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
"%s: failed to free all reserved pages\n", __func__);
}
/* assumes rcu_read_lock() held at entry */
struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
{
struct page_map *page_map;
WARN_ON_ONCE(!rcu_read_lock_held());
page_map = radix_tree_lookup(&pgmap_radix, phys >> PA_SECTION_SHIFT);
return page_map ? &page_map->pgmap : NULL;
}
/**
* devm_memremap_pages - remap and provide memmap backing for the given resource
* @dev: hosting device for @res
* @res: "host memory" address range
* @ref: a live per-cpu reference count
* @altmap: optional descriptor for allocating the memmap from @res
*
* Notes:
* 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time
* (or devm release event).
*
* 2/ @res is expected to be a host memory range that could feasibly be
* treated as a "System RAM" range, i.e. not a device mmio range, but
* this is not enforced.
*/
void *devm_memremap_pages(struct device *dev, struct resource *res,
struct percpu_ref *ref, struct vmem_altmap *altmap)
{
int is_ram = region_intersects(res->start, resource_size(res),
"System RAM");
resource_size_t key, align_start, align_size;
struct dev_pagemap *pgmap;
struct page_map *page_map;
unsigned long pfn;
int error, nid;
if (is_ram == REGION_MIXED) {
@@ -176,25 +282,120 @@ void *devm_memremap_pages(struct device *dev, struct resource *res)
if (is_ram == REGION_INTERSECTS)
return __va(res->start);
if (altmap && !IS_ENABLED(CONFIG_SPARSEMEM_VMEMMAP)) {
dev_err(dev, "%s: altmap requires CONFIG_SPARSEMEM_VMEMMAP=y\n",
__func__);
return ERR_PTR(-ENXIO);
}
if (!ref)
return ERR_PTR(-EINVAL);
page_map = devres_alloc_node(devm_memremap_pages_release,
sizeof(*page_map), GFP_KERNEL, dev_to_node(dev));
if (!page_map)
return ERR_PTR(-ENOMEM);
pgmap = &page_map->pgmap;
memcpy(&page_map->res, res, sizeof(*res));
pgmap->dev = dev;
if (altmap) {
memcpy(&page_map->altmap, altmap, sizeof(*altmap));
pgmap->altmap = &page_map->altmap;
}
pgmap->ref = ref;
pgmap->res = &page_map->res;
mutex_lock(&pgmap_lock);
error = 0;
for (key = res->start; key <= res->end; key += SECTION_SIZE) {
struct dev_pagemap *dup;
rcu_read_lock();
dup = find_dev_pagemap(key);
rcu_read_unlock();
if (dup) {
dev_err(dev, "%s: %pr collides with mapping for %s\n",
__func__, res, dev_name(dup->dev));
error = -EBUSY;
break;
}
error = radix_tree_insert(&pgmap_radix, key >> PA_SECTION_SHIFT,
page_map);
if (error) {
dev_err(dev, "%s: failed: %d\n", __func__, error);
break;
}
}
mutex_unlock(&pgmap_lock);
if (error)
goto err_radix;
nid = dev_to_node(dev);
if (nid < 0)
nid = numa_mem_id();
error = arch_add_memory(nid, res->start, resource_size(res), true);
if (error) {
devres_free(page_map);
return ERR_PTR(error);
}
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE);
error = arch_add_memory(nid, align_start, align_size, true);
if (error)
goto err_add_memory;
for_each_device_pfn(pfn, page_map) {
struct page *page = pfn_to_page(pfn);
/* ZONE_DEVICE pages must never appear on a slab lru */
list_force_poison(&page->lru);
page->pgmap = pgmap;
}
devres_add(dev, page_map);
return __va(res->start);
err_add_memory:
err_radix:
pgmap_radix_release(res);
devres_free(page_map);
return ERR_PTR(error);
}
EXPORT_SYMBOL(devm_memremap_pages);
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
{
/* number of pfns from base where pfn_to_page() is valid */
return altmap->reserve + altmap->free;
}
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
{
altmap->alloc -= nr_pfns;
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
{
/*
* 'memmap_start' is the virtual address for the first "struct
* page" in this range of the vmemmap array. In the case of
* CONFIG_SPARSE_VMEMMAP a page_to_pfn conversion is simple
* pointer arithmetic, so we can perform this to_vmem_altmap()
* conversion without concern for the initialization state of
* the struct page fields.
*/
struct page *page = (struct page *) memmap_start;
struct dev_pagemap *pgmap;
/*
* Uncoditionally retrieve a dev_pagemap associated with the
* given physical address, this is only for use in the
* arch_{add|remove}_memory() for setting up and tearing down
* the memmap.
*/
rcu_read_lock();
pgmap = find_dev_pagemap(__pfn_to_phys(page_to_pfn(page)));
rcu_read_unlock();
return pgmap ? pgmap->altmap : NULL;
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
#endif /* CONFIG_ZONE_DEVICE */
+1 -2
View File
@@ -180,8 +180,7 @@ void panic(const char *fmt, ...)
* panic() is not being callled from OOPS.
*/
debug_locks_off();
console_trylock();
console_unlock();
console_flush_on_panic();
if (!panic_blink)
panic_blink = no_blink;
+62 -5
View File
@@ -48,6 +48,7 @@
#include <linux/uio.h>
#include <asm/uaccess.h>
#include <asm-generic/sections.h>
#define CREATE_TRACE_POINTS
#include <trace/events/printk.h>
@@ -1660,7 +1661,7 @@ asmlinkage int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, va_list args)
{
static int recursion_bug;
static bool recursion_bug;
static char textbuf[LOG_LINE_MAX];
char *text = textbuf;
size_t text_len = 0;
@@ -1696,7 +1697,7 @@ asmlinkage int vprintk_emit(int facility, int level,
* it can be printed at the next appropriate moment:
*/
if (!oops_in_progress && !lockdep_recursing(current)) {
recursion_bug = 1;
recursion_bug = true;
local_irq_restore(flags);
return 0;
}
@@ -1711,7 +1712,7 @@ asmlinkage int vprintk_emit(int facility, int level,
static const char recursion_msg[] =
"BUG: recent printk recursion!";
recursion_bug = 0;
recursion_bug = false;
/* emit KERN_CRIT message */
printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0,
NULL, 0, recursion_msg,
@@ -2233,13 +2234,24 @@ void console_unlock(void)
static u64 seen_seq;
unsigned long flags;
bool wake_klogd = false;
bool retry;
bool do_cond_resched, retry;
if (console_suspended) {
up_console_sem();
return;
}
/*
* Console drivers are called under logbuf_lock, so
* @console_may_schedule should be cleared before; however, we may
* end up dumping a lot of lines, for example, if called from
* console registration path, and should invoke cond_resched()
* between lines if allowable. Not doing so can cause a very long
* scheduling stall on a slow console leading to RCU stall and
* softlockup warnings which exacerbate the issue with more
* messages practically incapacitating the system.
*/
do_cond_resched = console_may_schedule;
console_may_schedule = 0;
/* flush buffered message fragment immediately to console */
@@ -2311,6 +2323,9 @@ skip:
call_console_drivers(level, ext_text, ext_len, text, len);
start_critical_timings();
local_irq_restore(flags);
if (do_cond_resched)
cond_resched();
}
console_locked = 0;
@@ -2378,6 +2393,25 @@ void console_unblank(void)
console_unlock();
}
/**
* console_flush_on_panic - flush console content on panic
*
* Immediately output all pending messages no matter what.
*/
void console_flush_on_panic(void)
{
/*
* If someone else is holding the console lock, trylock will fail
* and may_schedule may be set. Ignore and proceed to unlock so
* that messages are flushed out. As this can be called from any
* context and we don't want to get preempted while flushing,
* ensure may_schedule is cleared.
*/
console_trylock();
console_may_schedule = 0;
console_unlock();
}
/*
* Return the console tty driver structure and its associated index
*/
@@ -2658,13 +2692,36 @@ int unregister_console(struct console *console)
}
EXPORT_SYMBOL(unregister_console);
/*
* Some boot consoles access data that is in the init section and which will
* be discarded after the initcalls have been run. To make sure that no code
* will access this data, unregister the boot consoles in a late initcall.
*
* If for some reason, such as deferred probe or the driver being a loadable
* module, the real console hasn't registered yet at this point, there will
* be a brief interval in which no messages are logged to the console, which
* makes it difficult to diagnose problems that occur during this time.
*
* To mitigate this problem somewhat, only unregister consoles whose memory
* intersects with the init section. Note that code exists elsewhere to get
* rid of the boot console as soon as the proper console shows up, so there
* won't be side-effects from postponing the removal.
*/
static int __init printk_late_init(void)
{
struct console *con;
for_each_console(con) {
if (!keep_bootcon && con->flags & CON_BOOT) {
unregister_console(con);
/*
* Make sure to unregister boot consoles whose data
* resides in the init section before the init section
* is discarded. Boot consoles whose data will stick
* around will automatically be unregistered when the
* proper console replaces them.
*/
if (init_section_intersects(con, sizeof(*con)))
unregister_console(con);
}
}
hotcpu_notifier(console_cpu_notify, 0);
-4
View File
@@ -529,8 +529,6 @@ static int __init cpu_stop_init(void)
}
early_initcall(cpu_stop_init);
#if defined(CONFIG_SMP) || defined(CONFIG_HOTPLUG_CPU)
static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
{
struct multi_stop_data msdata = {
@@ -628,5 +626,3 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
mutex_unlock(&stop_cpus_mutex);
return ret ?: done.ret;
}
#endif /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */