Commit Graph

4490 Commits

Author SHA1 Message Date
Eric Biggers cf4c950b87 once: switch to new jump label API
Switch the DO_ONCE() macro from the deprecated jump label API to the new
one.  The new one is more readable, and for DO_ONCE() it also makes the
generated code more icache-friendly: now the one-time initialization
code is placed out-of-line at the jump target, rather than at the inline
fallthrough case.

Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-10-09 20:26:23 -07:00
David S. Miller 53954cf8c5 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Just simple overlapping changes.

Signed-off-by: David S. Miller <davem@davemloft.net>
2017-10-05 18:19:22 -07:00
Sergey Senozhatsky 656d61ce96 lib/ratelimit.c: use deferred printk() version
printk_ratelimit() invokes ___ratelimit() which may invoke a normal
printk() (pr_warn() in this particular case) to warn about suppressed
output.  Given that printk_ratelimit() may be called from anywhere, that
pr_warn() is dangerous - it may end up deadlocking the system.  Fix
___ratelimit() by using deferred printk().

Sasha reported the following lockdep error:

 : Unregister pv shared memory for cpu 8
 : select_fallback_rq: 3 callbacks suppressed
 : process 8583 (trinity-c78) no longer affine to cpu8
 :
 : ======================================================
 : WARNING: possible circular locking dependency detected
 : 4.14.0-rc2-next-20170927+ #252 Not tainted
 : ------------------------------------------------------
 : migration/8/62 is trying to acquire lock:
 : (&port_lock_key){-.-.}, at: serial8250_console_write()
 :
 : but task is already holding lock:
 : (&rq->lock){-.-.}, at: sched_cpu_dying()
 :
 : which lock already depends on the new lock.
 :
 :
 : the existing dependency chain (in reverse order) is:
 :
 : -> #3 (&rq->lock){-.-.}:
 : __lock_acquire()
 : lock_acquire()
 : _raw_spin_lock()
 : task_fork_fair()
 : sched_fork()
 : copy_process.part.31()
 : _do_fork()
 : kernel_thread()
 : rest_init()
 : start_kernel()
 : x86_64_start_reservations()
 : x86_64_start_kernel()
 : verify_cpu()
 :
 : -> #2 (&p->pi_lock){-.-.}:
 : __lock_acquire()
 : lock_acquire()
 : _raw_spin_lock_irqsave()
 : try_to_wake_up()
 : default_wake_function()
 : woken_wake_function()
 : __wake_up_common()
 : __wake_up_common_lock()
 : __wake_up()
 : tty_wakeup()
 : tty_port_default_wakeup()
 : tty_port_tty_wakeup()
 : uart_write_wakeup()
 : serial8250_tx_chars()
 : serial8250_handle_irq.part.25()
 : serial8250_default_handle_irq()
 : serial8250_interrupt()
 : __handle_irq_event_percpu()
 : handle_irq_event_percpu()
 : handle_irq_event()
 : handle_level_irq()
 : handle_irq()
 : do_IRQ()
 : ret_from_intr()
 : native_safe_halt()
 : default_idle()
 : arch_cpu_idle()
 : default_idle_call()
 : do_idle()
 : cpu_startup_entry()
 : rest_init()
 : start_kernel()
 : x86_64_start_reservations()
 : x86_64_start_kernel()
 : verify_cpu()
 :
 : -> #1 (&tty->write_wait){-.-.}:
 : __lock_acquire()
 : lock_acquire()
 : _raw_spin_lock_irqsave()
 : __wake_up_common_lock()
 : __wake_up()
 : tty_wakeup()
 : tty_port_default_wakeup()
 : tty_port_tty_wakeup()
 : uart_write_wakeup()
 : serial8250_tx_chars()
 : serial8250_handle_irq.part.25()
 : serial8250_default_handle_irq()
 : serial8250_interrupt()
 : __handle_irq_event_percpu()
 : handle_irq_event_percpu()
 : handle_irq_event()
 : handle_level_irq()
 : handle_irq()
 : do_IRQ()
 : ret_from_intr()
 : native_safe_halt()
 : default_idle()
 : arch_cpu_idle()
 : default_idle_call()
 : do_idle()
 : cpu_startup_entry()
 : rest_init()
 : start_kernel()
 : x86_64_start_reservations()
 : x86_64_start_kernel()
 : verify_cpu()
 :
 : -> #0 (&port_lock_key){-.-.}:
 : check_prev_add()
 : __lock_acquire()
 : lock_acquire()
 : _raw_spin_lock_irqsave()
 : serial8250_console_write()
 : univ8250_console_write()
 : console_unlock()
 : vprintk_emit()
 : vprintk_default()
 : vprintk_func()
 : printk()
 : ___ratelimit()
 : __printk_ratelimit()
 : select_fallback_rq()
 : sched_cpu_dying()
 : cpuhp_invoke_callback()
 : take_cpu_down()
 : multi_cpu_stop()
 : cpu_stopper_thread()
 : smpboot_thread_fn()
 : kthread()
 : ret_from_fork()
 :
 : other info that might help us debug this:
 :
 : Chain exists of:
 :   &port_lock_key --> &p->pi_lock --> &rq->lock
 :
 :  Possible unsafe locking scenario:
 :
 :        CPU0                    CPU1
 :        ----                    ----
 :   lock(&rq->lock);
 :                                lock(&p->pi_lock);
 :                                lock(&rq->lock);
 :   lock(&port_lock_key);
 :
 :  *** DEADLOCK ***
 :
 : 4 locks held by migration/8/62:
 : #0: (&p->pi_lock){-.-.}, at: sched_cpu_dying()
 : #1: (&rq->lock){-.-.}, at: sched_cpu_dying()
 : #2: (printk_ratelimit_state.lock){....}, at: ___ratelimit()
 : #3: (console_lock){+.+.}, at: vprintk_emit()
 :
 : stack backtrace:
 : CPU: 8 PID: 62 Comm: migration/8 Not tainted 4.14.0-rc2-next-20170927+ #252
 : Call Trace:
 : dump_stack()
 : print_circular_bug()
 : check_prev_add()
 : ? add_lock_to_list.isra.26()
 : ? check_usage()
 : ? kvm_clock_read()
 : ? kvm_sched_clock_read()
 : ? sched_clock()
 : ? check_preemption_disabled()
 : __lock_acquire()
 : ? __lock_acquire()
 : ? add_lock_to_list.isra.26()
 : ? debug_check_no_locks_freed()
 : ? memcpy()
 : lock_acquire()
 : ? serial8250_console_write()
 : _raw_spin_lock_irqsave()
 : ? serial8250_console_write()
 : serial8250_console_write()
 : ? serial8250_start_tx()
 : ? lock_acquire()
 : ? memcpy()
 : univ8250_console_write()
 : console_unlock()
 : ? __down_trylock_console_sem()
 : vprintk_emit()
 : vprintk_default()
 : vprintk_func()
 : printk()
 : ? show_regs_print_info()
 : ? lock_acquire()
 : ___ratelimit()
 : __printk_ratelimit()
 : select_fallback_rq()
 : sched_cpu_dying()
 : ? sched_cpu_starting()
 : ? rcutree_dying_cpu()
 : ? sched_cpu_starting()
 : cpuhp_invoke_callback()
 : ? cpu_disable_common()
 : take_cpu_down()
 : ? trace_hardirqs_off_caller()
 : ? cpuhp_invoke_callback()
 : multi_cpu_stop()
 : ? __this_cpu_preempt_check()
 : ? cpu_stop_queue_work()
 : cpu_stopper_thread()
 : ? cpu_stop_create()
 : smpboot_thread_fn()
 : ? sort_range()
 : ? schedule()
 : ? __kthread_parkme()
 : kthread()
 : ? sort_range()
 : ? kthread_create_on_node()
 : ret_from_fork()
 : process 9121 (trinity-c78) no longer affine to cpu8
 : smpboot: CPU 8 is now offline

Link: http://lkml.kernel.org/r/20170928120405.18273-1-sergey.senozhatsky@gmail.com
Fixes: 6b1d174b0c ("ratelimit: extend to print suppressed messages on release")
Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Reported-by: Sasha Levin <levinsasha928@gmail.com>
Reviewed-by: Petr Mladek <pmladek@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Borislav Petkov <bp@suse.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-10-03 17:54:26 -07:00
Eric Biggers a70e43a59d lib/idr.c: fix comment for idr_replace()
idr_replace() returns the old value on success, not 0.

Link: http://lkml.kernel.org/r/20170918162642.37511-1-ebiggers3@gmail.com
Signed-off-by: Eric Biggers <ebiggers@google.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-10-03 17:54:25 -07:00
Colin Ian King 8cb5d74828 lib/lz4: make arrays static const, reduces object code size
Don't populate the read-only arrays dec32table and dec64table on the
stack, instead make them both static const.  Makes the object code
smaller by over 10K bytes:

  Before:
     text	   data	    bss	    dec	    hex	filename
    31500	      0	      0	  31500	   7b0c	lib/lz4/lz4_decompress.o

  After:
     text	   data	    bss	    dec	    hex	filename
    20237	    176	      0	  20413	   4fbd	lib/lz4/lz4_decompress.o

(gcc version 7.2.0 x86_64)

Link: http://lkml.kernel.org/r/20170921221939.20820-1-colin.king@canonical.com
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Cc: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: Sven Schmidt <4sschmid@informatik.uni-hamburg.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-10-03 17:54:25 -07:00
Linus Torvalds c4142ed602 Merge tag 'driver-core-4.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
Pull driver core fixes from Greg KH:
 "Here are a few small fixes for 4.14-rc4.

  The removal of DRIVER_ATTR() was almost completed by 4.14-rc1, but one
  straggler made it in through some other tree (odds are, one of
  mine...) So there's a simple removal of the last user, and then
  finally the macro is removed from the tree.

  There's a fix for old crazy udev instances that insist on reloading a
  module when it is removed from the kernel due to the new uevents for
  bind/unbind. This fixes the reported regression, hopefully some year
  in the future we can drop the workaround, once users update to the
  latest version, but I'm not holding my breath.

  And then there's a build fix for a linker warning, and a buffer
  overflow fix to match the PCI fixes you took through the PCI tree in
  the same area.

  All of these have been in linux-next for a few weeks while I've been
  traveling, sorry for the delay"

* tag 'driver-core-4.14-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core:
  driver core: remove DRIVER_ATTR
  fpga: altera-cvp: remove DRIVER_ATTR() usage
  driver core: platform: Don't read past the end of "driver_override" buffer
  base: arch_topology: fix section mismatch build warnings
  driver core: suppress sending MODALIAS in UNBIND uevents
2017-10-03 08:57:07 -07:00
David S. Miller 1f8d31d189 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net 2017-09-23 10:16:53 -07:00
Linus Torvalds cd4175b116 Merge branch 'parisc-4.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux
Pull parisc fixes from Helge Deller:

 - Unbreak parisc bootloader by avoiding a gcc-7 optimization to convert
   multiple byte-accesses into one word-access.

 - Add missing HWPOISON page fault handler code. I completely missed
   that when I added HWPOISON support during this merge window and it
   only showed up now with the madvise07 LTP test case.

 - Fix backtrace unwinding to stop when stack start has been reached.

 - Issue warning if initrd has been loaded into memory regions with
   broken RAM modules.

 - Fix HPMC handler (parisc hardware fault handler) to comply with
   architecture specification.

 - Avoid compiler warnings about too large frame sizes.

 - Minor init-section fixes.

* 'parisc-4.14-2' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux:
  parisc: Unbreak bootloader due to gcc-7 optimizations
  parisc: Reintroduce option to gzip-compress the kernel
  parisc: Add HWPOISON page fault handler code
  parisc: Move init_per_cpu() into init section
  parisc: Check if initrd was loaded into broken RAM
  parisc: Add PDCE_CHECK instruction to HPMC handler
  parisc: Add wrapper for pdc_instr() firmware function
  parisc: Move start_parisc() into init section
  parisc: Stop unwinding at start of stack
  parisc: Fix too large frame size warnings
2017-09-23 06:14:06 -10:00
Linus Torvalds 71aa60f67f Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Pull networking fixes from David Miller:

 1) Fix NAPI poll list corruption in enic driver, from Christian
    Lamparter.

 2) Fix route use after free, from Eric Dumazet.

 3) Fix regression in reuseaddr handling, from Josef Bacik.

 4) Assert the size of control messages in compat handling since we copy
    it in from userspace twice. From Meng Xu.

 5) SMC layer bug fixes (missing RCU locking, bad refcounting, etc.)
    from Ursula Braun.

 6) Fix races in AF_PACKET fanout handling, from Willem de Bruijn.

 7) Don't use ARRAY_SIZE on spinlock array which might have zero
    entries, from Geert Uytterhoeven.

 8) Fix miscomputation of checksum in ipv6 udp code, from Subash Abhinov
    Kasiviswanathan.

 9) Push the ipv6 header properly in ipv6 GRE tunnel driver, from Xin
    Long.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (75 commits)
  inet: fix improper empty comparison
  net: use inet6_rcv_saddr to compare sockets
  net: set tb->fast_sk_family
  net: orphan frags on stand-alone ptype in dev_queue_xmit_nit
  MAINTAINERS: update git tree locations for ieee802154 subsystem
  net: prevent dst uses after free
  net: phy: Fix truncation of large IRQ numbers in phy_attached_print()
  net/smc: no close wait in case of process shut down
  net/smc: introduce a delay
  net/smc: terminate link group if out-of-sync is received
  net/smc: longer delay for client link group removal
  net/smc: adapt send request completion notification
  net/smc: adjust net_device refcount
  net/smc: take RCU read lock for routing cache lookup
  net/smc: add receive timeout check
  net/smc: add missing dev_put
  net: stmmac: Cocci spatch "of_table"
  lan78xx: Use default values loaded from EEPROM/OTP after reset
  lan78xx: Allow EEPROM write for less than MAX_EEPROM_SIZE
  lan78xx: Fix for eeprom read/write when device auto suspend
  ...
2017-09-23 05:41:27 -10:00
Helge Deller 432654df90 parisc: Fix too large frame size warnings
The parisc architecture has larger stack frames than most other
architectures on 32-bit kernels.

Increase the maximum allowed stack frame to 1280 bytes for parisc to
avoid warnings in the do_sys_poll() and pat_memconfig() functions.

Signed-off-by: Helge Deller <deller@gmx.de>
2017-09-22 19:46:07 +02:00
Florian Westphal 411d788a23 test_rhashtable: remove initdata annotation
kbuild test robot reported a section mismatch warning w. gcc 4.x:
WARNING: lib/test_rhashtable.o(.text+0x139e):
Section mismatch in reference from the function rhltable_insert.clone.3() to the variable .init.data:rhlt

so remove this annotation.

Fixes: cdd4de372e ("test_rhashtable: add test case for rhl_table interface")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-09-21 20:40:55 -07:00
Petar Penkov a90bcb86ae iov_iter: fix page_copy_sane for compound pages
Issue is that if the data crosses a page boundary inside a compound
page, this check will incorrectly trigger a WARN_ON.

To fix this, compute the order using the head of the compound page and
adjust the offset to be relative to that head.

Fixes: 72e809ed81 ("iov_iter: sanity checks for copy to/from page
primitives")

Signed-off-by: Petar Penkov <ppenkov@google.com>
CC: Al Viro <viro@zeniv.linux.org.uk>
CC: Eric Dumazet <edumazet@google.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
2017-09-20 23:27:48 -04:00
Eric Dumazet d464e84eed kobject: factorize skb setup in kobject_uevent_net_broadcast()
We can build one skb and let it be cloned in netlink.

This is much faster, and use less memory (all clones will
share the same skb->head)

Tested:

time perf record (for f in `seq 1 3000` ; do ip netns add tast$f; done)
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 4.110 MB perf.data (~179584 samples) ]

real    0m24.227s # instead of 0m52.554s
user    0m0.329s
sys 0m23.753s # instead of 0m51.375s

    14.77%       ip  [kernel.kallsyms]  [k] __ip6addrlbl_add
    14.56%       ip  [kernel.kallsyms]  [k] netlink_broadcast_filtered
    11.65%       ip  [kernel.kallsyms]  [k] netlink_has_listeners
     6.19%       ip  [kernel.kallsyms]  [k] _raw_spin_lock_irqsave
     5.66%       ip  [kernel.kallsyms]  [k] kobject_uevent_env
     4.97%       ip  [kernel.kallsyms]  [k] memset_erms
     4.67%       ip  [kernel.kallsyms]  [k] refcount_sub_and_test
     4.41%       ip  [kernel.kallsyms]  [k] _raw_read_lock
     3.59%       ip  [kernel.kallsyms]  [k] refcount_inc_not_zero
     3.13%       ip  [kernel.kallsyms]  [k] _raw_spin_unlock_irqrestore
     1.55%       ip  [kernel.kallsyms]  [k] __wake_up
     1.20%       ip  [kernel.kallsyms]  [k] strlen
     1.03%       ip  [kernel.kallsyms]  [k] __wake_up_common
     0.93%       ip  [kernel.kallsyms]  [k] consume_skb
     0.92%       ip  [kernel.kallsyms]  [k] netlink_trim
     0.87%       ip  [kernel.kallsyms]  [k] insert_header
     0.63%       ip  [kernel.kallsyms]  [k] unmap_page_range

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-09-19 16:32:23 -07:00
Eric Dumazet 4a336a23d6 kobject: copy env blob in one go
No need to iterate over strings, just copy in one efficient memcpy() call.

Tested:
time perf record "(for f in `seq 1 3000` ; do ip netns add tast$f; done)"
[ perf record: Woken up 10 times to write data ]
[ perf record: Captured and wrote 8.224 MB perf.data (~359301 samples) ]

real    0m52.554s  # instead of 1m7.492s
user    0m0.309s
sys 0m51.375s # instead of 1m6.875s

     9.88%       ip  [kernel.kallsyms]  [k] netlink_broadcast_filtered
     8.86%       ip  [kernel.kallsyms]  [k] string
     7.37%       ip  [kernel.kallsyms]  [k] __ip6addrlbl_add
     5.68%       ip  [kernel.kallsyms]  [k] netlink_has_listeners
     5.52%       ip  [kernel.kallsyms]  [k] memcpy_erms
     4.76%       ip  [kernel.kallsyms]  [k] __alloc_skb
     4.54%       ip  [kernel.kallsyms]  [k] vsnprintf
     3.94%       ip  [kernel.kallsyms]  [k] format_decode
     3.80%       ip  [kernel.kallsyms]  [k] kmem_cache_alloc_node_trace
     3.71%       ip  [kernel.kallsyms]  [k] kmem_cache_alloc_node
     3.66%       ip  [kernel.kallsyms]  [k] kobject_uevent_env
     3.38%       ip  [kernel.kallsyms]  [k] strlen
     2.65%       ip  [kernel.kallsyms]  [k] _raw_spin_lock_irqsave
     2.20%       ip  [kernel.kallsyms]  [k] kfree
     2.09%       ip  [kernel.kallsyms]  [k] memset_erms
     2.07%       ip  [kernel.kallsyms]  [k] ___cache_free
     1.95%       ip  [kernel.kallsyms]  [k] kmem_cache_free
     1.91%       ip  [kernel.kallsyms]  [k] _raw_read_lock
     1.45%       ip  [kernel.kallsyms]  [k] ksize
     1.25%       ip  [kernel.kallsyms]  [k] _raw_spin_unlock_irqrestore
     1.00%       ip  [kernel.kallsyms]  [k] widen_string

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-09-19 16:32:23 -07:00
Eric Dumazet 16dff336b3 kobject: add kobject_uevent_net_broadcast()
This removes some #ifdef pollution and will ease follow up patches.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-09-19 16:32:23 -07:00
Florian Westphal cdd4de372e test_rhashtable: add test case for rhl_table interface
also test rhltable.  rhltable remove operations are slow as
deletions require a list walk, thus test with 1/16th of the given
entry count number to get a run duration similar to rhashtable one.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-09-19 16:15:47 -07:00
Florian Westphal a6359bd8dd test_rhashtable: add a check for max_size
add a test that tries to insert more than max_size elements.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-09-19 16:15:47 -07:00
Florian Westphal f651616e79 test_rhashtable: don't use global entries variable
pass the entries to test as an argument instead.
Followup patch will add an rhlist test case; rhlist delete opererations
are slow so we need to use a smaller number to test it.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-09-19 16:15:47 -07:00
Florian Westphal 7e936bd734 test_rhashtable: don't allocate huge static array
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-09-19 16:15:47 -07:00
Andreas Gruenbacher 0647169cf9 rhashtable: Documentation tweak
Clarify that rhashtable_walk_{stop,start} will not reset the iterator to
the beginning of the hash table.  Confusion between rhashtable_walk_enter
and rhashtable_walk_start has already lead to a bug.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-09-19 15:18:33 -07:00
Dmitry Torokhov 6878e7de6a driver core: suppress sending MODALIAS in UNBIND uevents
The current udev rules cause modules to be loaded on all device events save
for "remove". With the introduction of KOBJ_BIND/KOBJ_UNBIND this causes
issues, as driver modules that have devices bound to their drivers get
immediately reloaded, and it appears to the user that module unloading doe
snot work.

The standard udev matching rule is foillowing:

ENV{MODALIAS}=="?*", RUN{builtin}+="kmod load $env{MODALIAS}"

Given that MODALIAS data is not terribly useful for UNBIND event, let's zap
it from the generated uevent environment until we get userspace updated
with the correct udev rule that only loads modules on "add" event.

Reported-by: Jakub Kicinski <kubakici@wp.pl>
Tested-by: Jakub Kicinski <kubakici@wp.pl>
Fixes: 1455cf8dbf ("driver core: emit uevents when device is bound ...")
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-09-18 16:48:33 +02:00
Linus Torvalds e7cdb60fd2 Merge branch 'zstd-minimal' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull zstd support from Chris Mason:
 "Nick Terrell's patch series to add zstd support to the kernel has been
  floating around for a while. After talking with Dave Sterba, Herbert
  and Phillip, we decided to send the whole thing in as one pull
  request.

  zstd is a big win in speed over zlib and in compression ratio over
  lzo, and the compression team here at FB has gotten great results
  using it in production. Nick will continue to update the kernel side
  with new improvements from the open source zstd userland code.

  Nick has a number of benchmarks for the main zstd code in his lib/zstd
  commit:

      I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB
      of RAM. The VM is running on a MacBook Pro with a 3.1 GHz Intel
      Core i7 processor, 16 GB of RAM, and a SSD. I benchmarked using
      `silesia.tar` [3], which is 211,988,480 B large. Run the following
      commands for the benchmark:

        sudo modprobe zstd_compress_test
        sudo mknod zstd_compress_test c 245 0
        sudo cp silesia.tar zstd_compress_test

      The time is reported by the time of the userland `cp`.
      The MB/s is computed with

        1,536,217,008 B / time(buffer size, hash)

      which includes the time to copy from userland.
      The Adjusted MB/s is computed with

        1,536,217,088 B / (time(buffer size, hash) - time(buffer size, none)).

      The memory reported is the amount of memory the compressor
      requests.

        | Method   | Size (B) | Time (s) | Ratio | MB/s    | Adj MB/s | Mem (MB) |
        |----------|----------|----------|-------|---------|----------|----------|
        | none     | 11988480 |    0.100 |     1 | 2119.88 |        - |        - |
        | zstd -1  | 73645762 |    1.044 | 2.878 |  203.05 |   224.56 |     1.23 |
        | zstd -3  | 66988878 |    1.761 | 3.165 |  120.38 |   127.63 |     2.47 |
        | zstd -5  | 65001259 |    2.563 | 3.261 |   82.71 |    86.07 |     2.86 |
        | zstd -10 | 60165346 |   13.242 | 3.523 |   16.01 |    16.13 |    13.22 |
        | zstd -15 | 58009756 |   47.601 | 3.654 |    4.45 |     4.46 |    21.61 |
        | zstd -19 | 54014593 |  102.835 | 3.925 |    2.06 |     2.06 |    60.15 |
        | zlib -1  | 77260026 |    2.895 | 2.744 |   73.23 |    75.85 |     0.27 |
        | zlib -3  | 72972206 |    4.116 | 2.905 |   51.50 |    52.79 |     0.27 |
        | zlib -6  | 68190360 |    9.633 | 3.109 |   22.01 |    22.24 |     0.27 |
        | zlib -9  | 67613382 |   22.554 | 3.135 |    9.40 |     9.44 |     0.27 |

      I benchmarked zstd decompression using the same method on the same
      machine. The benchmark file is located in the upstream zstd repo
      under `contrib/linux-kernel/zstd_decompress_test.c` [4]. The
      memory reported is the amount of memory required to decompress
      data compressed with the given compression level. If you know the
      maximum size of your input, you can reduce the memory usage of
      decompression irrespective of the compression level.

        | Method   | Time (s) | MB/s    | Adjusted MB/s | Memory (MB) |
        |----------|----------|---------|---------------|-------------|
        | none     |    0.025 | 8479.54 |             - |           - |
        | zstd -1  |    0.358 |  592.15 |        636.60 |        0.84 |
        | zstd -3  |    0.396 |  535.32 |        571.40 |        1.46 |
        | zstd -5  |    0.396 |  535.32 |        571.40 |        1.46 |
        | zstd -10 |    0.374 |  566.81 |        607.42 |        2.51 |
        | zstd -15 |    0.379 |  559.34 |        598.84 |        4.61 |
        | zstd -19 |    0.412 |  514.54 |        547.77 |        8.80 |
        | zlib -1  |    0.940 |  225.52 |        231.68 |        0.04 |
        | zlib -3  |    0.883 |  240.08 |        247.07 |        0.04 |
        | zlib -6  |    0.844 |  251.17 |        258.84 |        0.04 |
        | zlib -9  |    0.837 |  253.27 |        287.64 |        0.04 |

  I ran a long series of tests and benchmarks on the btrfs side and the
  gains are very similar to the core benchmarks Nick ran"

* 'zstd-minimal' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  squashfs: Add zstd support
  btrfs: Add zstd support
  lib: Add zstd modules
  lib: Add xxhash module
2017-09-14 17:30:49 -07:00
Michal Hocko 0ee931c4e3 mm: treewide: remove GFP_TEMPORARY allocation flag
GFP_TEMPORARY was introduced by commit e12ba74d8f ("Group short-lived
and reclaimable kernel allocations") along with __GFP_RECLAIMABLE.  It's
primary motivation was to allow users to tell that an allocation is
short lived and so the allocator can try to place such allocations close
together and prevent long term fragmentation.  As much as this sounds
like a reasonable semantic it becomes much less clear when to use the
highlevel GFP_TEMPORARY allocation flag.  How long is temporary? Can the
context holding that memory sleep? Can it take locks? It seems there is
no good answer for those questions.

The current implementation of GFP_TEMPORARY is basically GFP_KERNEL |
__GFP_RECLAIMABLE which in itself is tricky because basically none of
the existing caller provide a way to reclaim the allocated memory.  So
this is rather misleading and hard to evaluate for any benefits.

I have checked some random users and none of them has added the flag
with a specific justification.  I suspect most of them just copied from
other existing users and others just thought it might be a good idea to
use without any measuring.  This suggests that GFP_TEMPORARY just
motivates for cargo cult usage without any reasoning.

I believe that our gfp flags are quite complex already and especially
those with highlevel semantic should be clearly defined to prevent from
confusion and abuse.  Therefore I propose dropping GFP_TEMPORARY and
replace all existing users to simply use GFP_KERNEL.  Please note that
SLAB users with shrinkers will still get __GFP_RECLAIMABLE heuristic and
so they will be placed properly for memory fragmentation prevention.

I can see reasons we might want some gfp flag to reflect shorterm
allocations but I propose starting from a clear semantic definition and
only then add users with proper justification.

This was been brought up before LSF this year by Matthew [1] and it
turned out that GFP_TEMPORARY really doesn't have a clear semantic.  It
seems to be a heuristic without any measured advantage for most (if not
all) its current users.  The follow up discussion has revealed that
opinions on what might be temporary allocation differ a lot between
developers.  So rather than trying to tweak existing users into a
semantic which they haven't expected I propose to simply remove the flag
and start from scratch if we really need a semantic for short term
allocations.

[1] http://lkml.kernel.org/r/20170118054945.GD18349@bombadil.infradead.org

[akpm@linux-foundation.org: fix typo]
[akpm@linux-foundation.org: coding-style fixes]
[sfr@canb.auug.org.au: drm/i915: fix up]
  Link: http://lkml.kernel.org/r/20170816144703.378d4f4d@canb.auug.org.au
Link: http://lkml.kernel.org/r/20170728091904.14627-1-mhocko@kernel.org
Signed-off-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Mel Gorman <mgorman@suse.de>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Neil Brown <neilb@suse.de>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-09-13 18:53:16 -07:00
Geert Uytterhoeven 8185f5708d lib/test_bitmap.c: use ULL suffix for 64-bit constants
With gcc 4.1.2:

  lib/test_bitmap.c:189: warning: integer constant is too large for `long' type
  lib/test_bitmap.c:190: warning: integer constant is too large for `long' type
  lib/test_bitmap.c:194: warning: integer constant is too large for `long' type
  lib/test_bitmap.c:195: warning: integer constant is too large for `long' type

Add the missing "ULL" suffix to fix this.

Link: http://lkml.kernel.org/r/1505040523-31230-1-git-send-email-geert@linux-m68k.org
Fixes: 60ef690018 ("bitmap: introduce BITMAP_FROM_U64()")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Yury Norov <ynorov@caviumnetworks.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-09-13 18:53:15 -07:00
Eric Biggers a47f68d6a9 idr: remove WARN_ON_ONCE() when trying to replace negative ID
IDR only supports non-negative IDs.  There used to be a 'WARN_ON_ONCE(id <
0)' in idr_replace(), but it was intentionally removed by commit
2e1c9b2867 ("idr: remove WARN_ON_ONCE() on negative IDs").

Then it was added back by commit 0a835c4f09 ("Reimplement IDR and IDA
using the radix tree").  However it seems that adding it back was a
mistake, given that some users such as drm_gem_handle_delete()
(DRM_IOCTL_GEM_CLOSE) pass in a value from userspace to idr_replace(),
allowing the WARN_ON_ONCE to be triggered.  drm_gem_handle_delete()
actually just wants idr_replace() to return an error code if the ID is
not allocated, including in the case where the ID is invalid (negative).

So once again remove the bogus WARN_ON_ONCE().

This bug was found by syzkaller, which encountered the following
warning:

    WARNING: CPU: 3 PID: 3008 at lib/idr.c:157 idr_replace+0x1d8/0x240 lib/idr.c:157
    Kernel panic - not syncing: panic_on_warn set ...

    CPU: 3 PID: 3008 Comm: syzkaller218828 Not tainted 4.13.0-rc4-next-20170811 #2
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
    Call Trace:
     fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:190
     do_trap_no_signal arch/x86/kernel/traps.c:224 [inline]
     do_trap+0x260/0x390 arch/x86/kernel/traps.c:273
     do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:310
     do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:323
     invalid_op+0x1e/0x30 arch/x86/entry/entry_64.S:930
    RIP: 0010:idr_replace+0x1d8/0x240 lib/idr.c:157
    RSP: 0018:ffff8800394bf9f8 EFLAGS: 00010297
    RAX: ffff88003c6c60c0 RBX: 1ffff10007297f43 RCX: 0000000000000000
    RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff8800394bfa78
    RBP: ffff8800394bfae0 R08: ffffffff82856487 R09: 0000000000000000
    R10: ffff8800394bf9a8 R11: ffff88006c8bae28 R12: ffffffffffffffff
    R13: ffff8800394bfab8 R14: dffffc0000000000 R15: ffff8800394bfbc8
     drm_gem_handle_delete+0x33/0xa0 drivers/gpu/drm/drm_gem.c:297
     drm_gem_close_ioctl+0xa1/0xe0 drivers/gpu/drm/drm_gem.c:671
     drm_ioctl_kernel+0x1e7/0x2e0 drivers/gpu/drm/drm_ioctl.c:729
     drm_ioctl+0x72e/0xa50 drivers/gpu/drm/drm_ioctl.c:825
     vfs_ioctl fs/ioctl.c:45 [inline]
     do_vfs_ioctl+0x1b1/0x1520 fs/ioctl.c:685
     SYSC_ioctl fs/ioctl.c:700 [inline]
     SyS_ioctl+0x8f/0xc0 fs/ioctl.c:691
     entry_SYSCALL_64_fastpath+0x1f/0xbe

Here is a C reproducer:

    #include <fcntl.h>
    #include <stddef.h>
    #include <stdint.h>
    #include <sys/ioctl.h>
    #include <drm/drm.h>

    int main(void)
    {
            int cardfd = open("/dev/dri/card0", O_RDONLY);

            ioctl(cardfd, DRM_IOCTL_GEM_CLOSE,
                  &(struct drm_gem_close) { .handle = -1 } );
    }

Link: http://lkml.kernel.org/r/20170906235306.20534-1-ebiggers3@gmail.com
Fixes: 0a835c4f09 ("Reimplement IDR and IDA using the radix tree")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: <stable@vger.kernel.org> [v4.11+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-09-13 18:53:15 -07:00