From ad41706c771a038e9a334fa55216abd69b32bfdf Mon Sep 17 00:00:00 2001 From: Xiaolong Huang Date: Fri, 20 Aug 2021 03:50:34 +0800 Subject: [PATCH 001/105] net: qrtr: fix another OOB Read in qrtr_endpoint_post commit 7e78c597c3ebfd0cb329aa09a838734147e4f117 upstream. This check was incomplete, did not consider size is 0: if (len != ALIGN(size, 4) + hdrlen) goto err; if size from qrtr_hdr is 0, the result of ALIGN(size, 4) will be 0, In case of len == hdrlen and size == 0 in header this check won't fail and if (cb->type == QRTR_TYPE_NEW_SERVER) { /* Remote node endpoint can bridge other distant nodes */ const struct qrtr_ctrl_pkt *pkt = data + hdrlen; qrtr_node_assign(node, le32_to_cpu(pkt->server.node)); } will also read out of bound from data, which is hdrlen allocated block. Fixes: 194ccc88297a ("net: qrtr: Support decoding incoming v2 packets") Fixes: ad9d24c9429e ("net: qrtr: fix OOB Read in qrtr_endpoint_post") Signed-off-by: Xiaolong Huang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/qrtr/qrtr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 6826558483f9..56cffbfa000b 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -486,7 +486,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) goto err; } - if (len != ALIGN(size, 4) + hdrlen) + if (!size || len != ALIGN(size, 4) + hdrlen) goto err; if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA && From 9dd6f6d89693d8f09af53d2488afad22a8a44a57 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 23 Aug 2021 21:02:09 +0200 Subject: [PATCH 002/105] bpf: Fix ringbuf helper function compatibility commit 5b029a32cfe4600f5e10e36b41778506b90fd4de upstream. Commit 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") extended check_map_func_compatibility() by enforcing map -> helper function match, but not helper -> map type match. Due to this all of the bpf_ringbuf_*() helper functions could be used with a wrong map type such as array or hash map, leading to invalid access due to type confusion. Also, both BPF_FUNC_ringbuf_{submit,discard} have ARG_PTR_TO_ALLOC_MEM as argument and not a BPF map. Therefore, their check_map_func_compatibility() presence is incorrect since it's only for map type checking. Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Reported-by: Ryota Shiga (Flatt Security) Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1410f128c404..29d4f4e37595 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4693,8 +4693,6 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_RINGBUF: if (func_id != BPF_FUNC_ringbuf_output && func_id != BPF_FUNC_ringbuf_reserve && - func_id != BPF_FUNC_ringbuf_submit && - func_id != BPF_FUNC_ringbuf_discard && func_id != BPF_FUNC_ringbuf_query) goto error; break; @@ -4798,6 +4796,12 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) goto error; break; + case BPF_FUNC_ringbuf_output: + case BPF_FUNC_ringbuf_reserve: + case BPF_FUNC_ringbuf_query: + if (map->map_type != BPF_MAP_TYPE_RINGBUF) + goto error; + break; case BPF_FUNC_get_stackid: if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) goto error; From d81ddadabdee32732477f9f85f1c7cec3c89b00f Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 23 Aug 2021 10:36:46 -0700 Subject: [PATCH 003/105] bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper commit b910eaaaa4b89976ef02e5d6448f3f73dc671d91 upstream. Jiri Olsa reported a bug ([1]) in kernel where cgroup local storage pointer may be NULL in bpf_get_local_storage() helper. There are two issues uncovered by this bug: (1). kprobe or tracepoint prog incorrectly sets cgroup local storage before prog run, (2). due to change from preempt_disable to migrate_disable, preemption is possible and percpu storage might be overwritten by other tasks. This issue (1) is fixed in [2]. This patch tried to address issue (2). The following shows how things can go wrong: task 1: bpf_cgroup_storage_set() for percpu local storage preemption happens task 2: bpf_cgroup_storage_set() for percpu local storage preemption happens task 1: run bpf program task 1 will effectively use the percpu local storage setting by task 2 which will be either NULL or incorrect ones. Instead of just one common local storage per cpu, this patch fixed the issue by permitting 8 local storages per cpu and each local storage is identified by a task_struct pointer. This way, we allow at most 8 nested preemption between bpf_cgroup_storage_set() and bpf_cgroup_storage_unset(). The percpu local storage slot is released (calling bpf_cgroup_storage_unset()) by the same task after bpf program finished running. bpf_test_run() is also fixed to use the new bpf_cgroup_storage_set() interface. The patch is tested on top of [2] with reproducer in [1]. Without this patch, kernel will emit error in 2-3 minutes. With this patch, after one hour, still no error. [1] https://lore.kernel.org/bpf/CAKH8qBuXCfUz=w8L+Fj74OaUpbosO29niYwTki7e3Ag044_aww@mail.gmail.com/T [2] https://lore.kernel.org/bpf/20210309185028.3763817-1-yhs@fb.com Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov Acked-by: Roman Gushchin Link: https://lore.kernel.org/bpf/20210323055146.3334476-1-yhs@fb.com Cc: # 5.10.x Signed-off-by: Stanislav Fomichev Signed-off-by: Sasha Levin --- include/linux/bpf-cgroup.h | 57 ++++++++++++++++++++++++++++++++------ include/linux/bpf.h | 15 +++++++--- kernel/bpf/helpers.c | 15 +++++++--- kernel/bpf/local_storage.c | 5 ++-- net/bpf/test_run.c | 6 +++- 5 files changed, 79 insertions(+), 19 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index ed71bd1a0825..53f14e8827cc 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -20,14 +20,25 @@ struct bpf_sock_ops_kern; struct bpf_cgroup_storage; struct ctl_table; struct ctl_table_header; +struct task_struct; #ifdef CONFIG_CGROUP_BPF extern struct static_key_false cgroup_bpf_enabled_key; #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) -DECLARE_PER_CPU(struct bpf_cgroup_storage*, - bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); +#define BPF_CGROUP_STORAGE_NEST_MAX 8 + +struct bpf_cgroup_storage_info { + struct task_struct *task; + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; +}; + +/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks + * to use bpf cgroup storage simultaneously. + */ +DECLARE_PER_CPU(struct bpf_cgroup_storage_info, + bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); #define for_each_cgroup_storage_type(stype) \ for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) @@ -156,13 +167,42 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type( return BPF_CGROUP_STORAGE_SHARED; } -static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage - *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) +static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage + *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { enum bpf_cgroup_storage_type stype; + int i, err = 0; - for_each_cgroup_storage_type(stype) - this_cpu_write(bpf_cgroup_storage[stype], storage[stype]); + preempt_disable(); + for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { + if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL)) + continue; + + this_cpu_write(bpf_cgroup_storage_info[i].task, current); + for_each_cgroup_storage_type(stype) + this_cpu_write(bpf_cgroup_storage_info[i].storage[stype], + storage[stype]); + goto out; + } + err = -EBUSY; + WARN_ON_ONCE(1); + +out: + preempt_enable(); + return err; +} + +static inline void bpf_cgroup_storage_unset(void) +{ + int i; + + for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { + if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) + continue; + + this_cpu_write(bpf_cgroup_storage_info[i].task, NULL); + return; + } } struct bpf_cgroup_storage * @@ -410,8 +450,9 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, return -EINVAL; } -static inline void bpf_cgroup_storage_set( - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {} +static inline int bpf_cgroup_storage_set( + struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; } +static inline void bpf_cgroup_storage_unset(void) {} static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map) { return 0; } static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c3ccb242d199..3f93a50c25ef 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1089,9 +1089,14 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, goto _out; \ _item = &_array->items[0]; \ while ((_prog = READ_ONCE(_item->prog))) { \ - if (set_cg_storage) \ - bpf_cgroup_storage_set(_item->cgroup_storage); \ - _ret &= func(_prog, ctx); \ + if (!set_cg_storage) { \ + _ret &= func(_prog, ctx); \ + } else { \ + if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ + break; \ + _ret &= func(_prog, ctx); \ + bpf_cgroup_storage_unset(); \ + } \ _item++; \ } \ _out: \ @@ -1135,8 +1140,10 @@ _out: \ _array = rcu_dereference(array); \ _item = &_array->items[0]; \ while ((_prog = READ_ONCE(_item->prog))) { \ - bpf_cgroup_storage_set(_item->cgroup_storage); \ + if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ + break; \ ret = func(_prog, ctx); \ + bpf_cgroup_storage_unset(); \ _ret &= (ret & 1); \ _cn |= (ret & 2); \ _item++; \ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index f7e99bb8c3b6..3bd7fbd8c543 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -372,8 +372,8 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { }; #ifdef CONFIG_CGROUP_BPF -DECLARE_PER_CPU(struct bpf_cgroup_storage*, - bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); +DECLARE_PER_CPU(struct bpf_cgroup_storage_info, + bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) { @@ -382,10 +382,17 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) * verifier checks that its value is correct. */ enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); - struct bpf_cgroup_storage *storage; + struct bpf_cgroup_storage *storage = NULL; void *ptr; + int i; - storage = this_cpu_read(bpf_cgroup_storage[stype]); + for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { + if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) + continue; + + storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]); + break; + } if (stype == BPF_CGROUP_STORAGE_SHARED) ptr = &READ_ONCE(storage->buf)->data[0]; diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 571bb351ed3b..b139247d2dd3 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -9,10 +9,11 @@ #include #include -DEFINE_PER_CPU(struct bpf_cgroup_storage*, bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]); - #ifdef CONFIG_CGROUP_BPF +DEFINE_PER_CPU(struct bpf_cgroup_storage_info, + bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); + #include "../cgroup/cgroup-internal.h" #define LOCAL_STORAGE_CREATE_FLAG_MASK \ diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index e7cbd1b4a5e5..72d424a5a142 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -42,13 +42,17 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, migrate_disable(); time_start = ktime_get_ns(); for (i = 0; i < repeat; i++) { - bpf_cgroup_storage_set(storage); + ret = bpf_cgroup_storage_set(storage); + if (ret) + break; if (xdp) *retval = bpf_prog_run_xdp(prog, ctx); else *retval = BPF_PROG_RUN(prog, ctx); + bpf_cgroup_storage_unset(); + if (signal_pending(current)) { ret = -EINTR; break; From 34cc80ec12d61acd999cf72973c23751be3e40ec Mon Sep 17 00:00:00 2001 From: Derek Fang Date: Wed, 21 Jul 2021 21:31:21 +0800 Subject: [PATCH 004/105] ASoC: rt5682: Adjust headset volume button threshold [ Upstream commit 6d20bf7c020f417fdef1810a22da17c126603472 ] Adjust the threshold of headset button volume+ to fix the wrong button detection issue with some brand headsets. Signed-off-by: Derek Fang Link: https://lore.kernel.org/r/20210721133121.12333-1-derek.fang@realtek.com Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/codecs/rt5682.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 2e41b8c169e5..0486b1469799 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -44,6 +44,7 @@ static const struct reg_sequence patch_list[] = { {RT5682_I2C_CTRL, 0x000f}, {RT5682_PLL2_INTERNAL, 0x8266}, {RT5682_SAR_IL_CMD_3, 0x8365}, + {RT5682_SAR_IL_CMD_6, 0x0180}, }; void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev) From 0af6a9f82ca36c795fd35248ce731f1284cc34af Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 26 Jul 2021 20:41:23 +0100 Subject: [PATCH 005/105] ASoC: component: Remove misplaced prefix handling in pin control functions [ Upstream commit 31428c78748cafdd9352e1f622eb89bf453d9700 ] When the component level pin control functions were added they for some no longer obvious reason handled adding prefixing of widget names. This meant that when the lack of prefix handling in the DAPM level pin operations was fixed by ae4fc532244b3bb4d (ASoC: dapm: use component prefix when checking widget names) the one device using the component level API ended up with the prefix being applied twice, causing all lookups to fail. Fix this by removing the redundant prefixing from the component code, which has the nice side effect of also making that code much simpler. Reported-by: Richard Fitzgerald Signed-off-by: Mark Brown Tested-by: Lucas Tanure Link: https://lore.kernel.org/r/20210726194123.54585-1-broonie@kernel.org Signed-off-by: Mark Brown Signed-off-by: Sasha Levin --- sound/soc/soc-component.c | 63 +++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c index 728e93f35ffb..4295c0592901 100644 --- a/sound/soc/soc-component.c +++ b/sound/soc/soc-component.c @@ -135,86 +135,75 @@ int snd_soc_component_set_bias_level(struct snd_soc_component *component, return soc_component_ret(component, ret); } -static int soc_component_pin(struct snd_soc_component *component, - const char *pin, - int (*pin_func)(struct snd_soc_dapm_context *dapm, - const char *pin)) -{ - struct snd_soc_dapm_context *dapm = - snd_soc_component_get_dapm(component); - char *full_name; - int ret; - - if (!component->name_prefix) { - ret = pin_func(dapm, pin); - goto end; - } - - full_name = kasprintf(GFP_KERNEL, "%s %s", component->name_prefix, pin); - if (!full_name) { - ret = -ENOMEM; - goto end; - } - - ret = pin_func(dapm, full_name); - kfree(full_name); -end: - return soc_component_ret(component, ret); -} - int snd_soc_component_enable_pin(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_enable_pin); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_enable_pin(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_enable_pin); int snd_soc_component_enable_pin_unlocked(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_enable_pin_unlocked); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_enable_pin_unlocked(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_enable_pin_unlocked); int snd_soc_component_disable_pin(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_disable_pin); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_disable_pin(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_disable_pin); int snd_soc_component_disable_pin_unlocked(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_disable_pin_unlocked); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_disable_pin_unlocked(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_disable_pin_unlocked); int snd_soc_component_nc_pin(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_nc_pin); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_nc_pin(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_nc_pin); int snd_soc_component_nc_pin_unlocked(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_nc_pin_unlocked); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_nc_pin_unlocked(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_nc_pin_unlocked); int snd_soc_component_get_pin_status(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_get_pin_status); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_get_pin_status(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_get_pin_status); int snd_soc_component_force_enable_pin(struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_force_enable_pin); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_force_enable_pin(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin); @@ -222,7 +211,9 @@ int snd_soc_component_force_enable_pin_unlocked( struct snd_soc_component *component, const char *pin) { - return soc_component_pin(component, pin, snd_soc_dapm_force_enable_pin_unlocked); + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(component); + return snd_soc_dapm_force_enable_pin_unlocked(dapm, pin); } EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin_unlocked); From a13a2df0b14910eea92229aadb209bdc86e23600 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 10 Jul 2021 07:50:33 -0700 Subject: [PATCH 006/105] ARC: Fix CONFIG_STACKDEPOT [ Upstream commit bf79167fd86f3b97390fe2e70231d383526bd9cc ] Enabling CONFIG_STACKDEPOT results in the following build error. arc-elf-ld: lib/stackdepot.o: in function `filter_irq_stacks': stackdepot.c:(.text+0x456): undefined reference to `__irqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x456): undefined reference to `__irqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x476): undefined reference to `__irqentry_text_end' arc-elf-ld: stackdepot.c:(.text+0x476): undefined reference to `__irqentry_text_end' arc-elf-ld: stackdepot.c:(.text+0x484): undefined reference to `__softirqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x484): undefined reference to `__softirqentry_text_start' arc-elf-ld: stackdepot.c:(.text+0x48c): undefined reference to `__softirqentry_text_end' arc-elf-ld: stackdepot.c:(.text+0x48c): undefined reference to `__softirqentry_text_end' Other architectures address this problem by adding IRQENTRY_TEXT and SOFTIRQENTRY_TEXT to the text segment, so do the same here. Signed-off-by: Guenter Roeck Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/kernel/vmlinux.lds.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index 33ce59d91461..f67e4ad7b3ce 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -88,6 +88,8 @@ SECTIONS CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT *(.fixup) *(.gnu.warning) } From f68ad168e23565ce2a3891fec537cfaf8410d1e6 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 27 Jul 2021 00:29:19 +0200 Subject: [PATCH 007/105] netfilter: conntrack: collect all entries in one cycle [ Upstream commit 4608fdfc07e116f9fc0895beb40abad7cdb5ee3d ] Michal Kubecek reports that conntrack gc is responsible for frequent wakeups (every 125ms) on idle systems. On busy systems, timed out entries are evicted during lookup. The gc worker is only needed to remove entries after system becomes idle after a busy period. To resolve this, always scan the entire table. If the scan is taking too long, reschedule so other work_structs can run and resume from next bucket. After a completed scan, wait for 2 minutes before the next cycle. Heuristics for faster re-schedule are removed. GC_SCAN_INTERVAL could be exposed as a sysctl in the future to allow tuning this as-needed or even turn the gc worker off. Reported-by: Michal Kubecek Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso Signed-off-by: Sasha Levin --- net/netfilter/nf_conntrack_core.c | 71 ++++++++++--------------------- 1 file changed, 22 insertions(+), 49 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f9f2af26ccb3..54430a34d2f6 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -66,22 +66,17 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash); struct conntrack_gc_work { struct delayed_work dwork; - u32 last_bucket; + u32 next_bucket; bool exiting; bool early_drop; - long next_gc_run; }; static __read_mostly struct kmem_cache *nf_conntrack_cachep; static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; -/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ -#define GC_MAX_BUCKETS_DIV 128u -/* upper bound of full table scan */ -#define GC_MAX_SCAN_JIFFIES (16u * HZ) -/* desired ratio of entries found to be expired */ -#define GC_EVICT_RATIO 50u +#define GC_SCAN_INTERVAL (120u * HZ) +#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) static struct conntrack_gc_work conntrack_gc_work; @@ -1352,17 +1347,13 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct) static void gc_worker(struct work_struct *work) { - unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u); - unsigned int i, goal, buckets = 0, expired_count = 0; - unsigned int nf_conntrack_max95 = 0; + unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION; + unsigned int i, hashsz, nf_conntrack_max95 = 0; + unsigned long next_run = GC_SCAN_INTERVAL; struct conntrack_gc_work *gc_work; - unsigned int ratio, scanned = 0; - unsigned long next_run; - gc_work = container_of(work, struct conntrack_gc_work, dwork.work); - goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV; - i = gc_work->last_bucket; + i = gc_work->next_bucket; if (gc_work->early_drop) nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; @@ -1370,22 +1361,21 @@ static void gc_worker(struct work_struct *work) struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; struct hlist_nulls_node *n; - unsigned int hashsz; struct nf_conn *tmp; - i++; rcu_read_lock(); nf_conntrack_get_ht(&ct_hash, &hashsz); - if (i >= hashsz) - i = 0; + if (i >= hashsz) { + rcu_read_unlock(); + break; + } hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { struct net *net; tmp = nf_ct_tuplehash_to_ctrack(h); - scanned++; if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) { nf_ct_offload_timeout(tmp); continue; @@ -1393,7 +1383,6 @@ static void gc_worker(struct work_struct *work) if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); - expired_count++; continue; } @@ -1425,7 +1414,14 @@ static void gc_worker(struct work_struct *work) */ rcu_read_unlock(); cond_resched(); - } while (++buckets < goal); + i++; + + if (time_after(jiffies, end_time) && i < hashsz) { + gc_work->next_bucket = i; + next_run = 0; + break; + } + } while (i < hashsz); if (gc_work->exiting) return; @@ -1436,40 +1432,17 @@ static void gc_worker(struct work_struct *work) * * This worker is only here to reap expired entries when system went * idle after a busy period. - * - * The heuristics below are supposed to balance conflicting goals: - * - * 1. Minimize time until we notice a stale entry - * 2. Maximize scan intervals to not waste cycles - * - * Normally, expire ratio will be close to 0. - * - * As soon as a sizeable fraction of the entries have expired - * increase scan frequency. */ - ratio = scanned ? expired_count * 100 / scanned : 0; - if (ratio > GC_EVICT_RATIO) { - gc_work->next_gc_run = min_interval; - } else { - unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV; - - BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0); - - gc_work->next_gc_run += min_interval; - if (gc_work->next_gc_run > max) - gc_work->next_gc_run = max; + if (next_run) { + gc_work->early_drop = false; + gc_work->next_bucket = 0; } - - next_run = gc_work->next_gc_run; - gc_work->last_bucket = i; - gc_work->early_drop = false; queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker); - gc_work->next_gc_run = HZ; gc_work->exiting = false; } From 6815e21fe28ddfe8f55b4ca53031957dcd65843a Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Fri, 6 Aug 2021 16:21:24 +0800 Subject: [PATCH 008/105] once: Fix panic when module unload [ Upstream commit 1027b96ec9d34f9abab69bc1a4dc5b1ad8ab1349 ] DO_ONCE DEFINE_STATIC_KEY_TRUE(___once_key); __do_once_done once_disable_jump(once_key); INIT_WORK(&w->work, once_deferred); struct once_work *w; w->key = key; schedule_work(&w->work); module unload //*the key is destroy* process_one_work once_deferred BUG_ON(!static_key_enabled(work->key)); static_key_count((struct static_key *)x) //*access key, crash* When module uses DO_ONCE mechanism, it could crash due to the above concurrency problem, we could reproduce it with link[1]. Fix it by add/put module refcount in the once work process. [1] https://lore.kernel.org/netdev/eaa6c371-465e-57eb-6be9-f4b16b9d7cbf@huawei.com/ Cc: Hannes Frederic Sowa Cc: Daniel Borkmann Cc: David S. Miller Cc: Eric Dumazet Reported-by: Minmin chen Signed-off-by: Kefeng Wang Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/once.h | 4 ++-- lib/once.c | 11 ++++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/include/linux/once.h b/include/linux/once.h index 9225ee6d96c7..ae6f4eb41cbe 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -7,7 +7,7 @@ bool __do_once_start(bool *done, unsigned long *flags); void __do_once_done(bool *done, struct static_key_true *once_key, - unsigned long *flags); + unsigned long *flags, struct module *mod); /* Call a function exactly once. The idea of DO_ONCE() is to perform * a function call such as initialization of random seeds, etc, only @@ -46,7 +46,7 @@ void __do_once_done(bool *done, struct static_key_true *once_key, if (unlikely(___ret)) { \ func(__VA_ARGS__); \ __do_once_done(&___done, &___once_key, \ - &___flags); \ + &___flags, THIS_MODULE); \ } \ } \ ___ret; \ diff --git a/lib/once.c b/lib/once.c index 8b7d6235217e..59149bf3bfb4 100644 --- a/lib/once.c +++ b/lib/once.c @@ -3,10 +3,12 @@ #include #include #include +#include struct once_work { struct work_struct work; struct static_key_true *key; + struct module *module; }; static void once_deferred(struct work_struct *w) @@ -16,10 +18,11 @@ static void once_deferred(struct work_struct *w) work = container_of(w, struct once_work, work); BUG_ON(!static_key_enabled(work->key)); static_branch_disable(work->key); + module_put(work->module); kfree(work); } -static void once_disable_jump(struct static_key_true *key) +static void once_disable_jump(struct static_key_true *key, struct module *mod) { struct once_work *w; @@ -29,6 +32,8 @@ static void once_disable_jump(struct static_key_true *key) INIT_WORK(&w->work, once_deferred); w->key = key; + w->module = mod; + __module_get(mod); schedule_work(&w->work); } @@ -53,11 +58,11 @@ bool __do_once_start(bool *done, unsigned long *flags) EXPORT_SYMBOL(__do_once_start); void __do_once_done(bool *done, struct static_key_true *once_key, - unsigned long *flags) + unsigned long *flags, struct module *mod) __releases(once_lock) { *done = true; spin_unlock_irqrestore(&once_lock, *flags); - once_disable_jump(once_key); + once_disable_jump(once_key, mod); } EXPORT_SYMBOL(__do_once_done); From c94d50979f20b0a62171d312af907c29f5ec4866 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 3 Aug 2021 15:06:08 +0800 Subject: [PATCH 009/105] blk-iocost: fix lockdep warning on blkcg->lock [ Upstream commit 11431e26c9c43fa26f6b33ee1a90989f57b86024 ] blkcg->lock depends on q->queue_lock which may depend on another driver lock required in irq context, one example is dm-thin: Chain exists of: &pool->lock#3 --> &q->queue_lock --> &blkcg->lock Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&blkcg->lock); local_irq_disable(); lock(&pool->lock#3); lock(&q->queue_lock); lock(&pool->lock#3); Fix the issue by using spin_lock_irq(&blkcg->lock) in ioc_weight_write(). Cc: Tejun Heo Reported-by: Bruno Goncalves Link: https://lore.kernel.org/linux-block/CA+QYu4rzz6079ighEanS3Qq_Dmnczcf45ZoJoHKVLVATTo1e4Q@mail.gmail.com/T/#u Signed-off-by: Ming Lei Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210803070608.1766400-1-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-iocost.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index b7d8a954d99c..e95b93f72bd5 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3039,19 +3039,19 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf, if (v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX) return -EINVAL; - spin_lock(&blkcg->lock); + spin_lock_irq(&blkcg->lock); iocc->dfl_weight = v * WEIGHT_ONE; hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { struct ioc_gq *iocg = blkg_to_iocg(blkg); if (iocg) { - spin_lock_irq(&iocg->ioc->lock); + spin_lock(&iocg->ioc->lock); ioc_now(iocg->ioc, &now); weight_updated(iocg, &now); - spin_unlock_irq(&iocg->ioc->lock); + spin_unlock(&iocg->ioc->lock); } } - spin_unlock(&blkcg->lock); + spin_unlock_irq(&blkcg->lock); return nbytes; } From ef2d68ef9a3bff68915e6fdf5b61822bd1f6af4c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 6 Aug 2021 10:03:12 +0200 Subject: [PATCH 010/105] ovl: fix uninitialized pointer read in ovl_lookup_real_one() [ Upstream commit 580c610429b3994e8db24418927747cf28443cde ] One error path can result in release_dentry_name_snapshot() being called before "name" was initialized by take_dentry_name_snapshot(). Fix by moving the release_dentry_name_snapshot() to immediately after the only use. Reported-by: Colin Ian King Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/overlayfs/export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index ed35be3fafc6..f469982dcb36 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -390,6 +390,7 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, */ take_dentry_name_snapshot(&name, real); this = lookup_one_len(name.name.name, connected, name.name.len); + release_dentry_name_snapshot(&name); err = PTR_ERR(this); if (IS_ERR(this)) { goto fail; @@ -404,7 +405,6 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected, } out: - release_dentry_name_snapshot(&name); dput(parent); inode_unlock(dir); return this; From 45b7b209715319cca7dbadfa098939bee0a8dc1a Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 10 Aug 2021 13:37:48 +0100 Subject: [PATCH 011/105] net: mscc: Fix non-GPL export of regmap APIs [ Upstream commit 48c812e0327744b4965296f65c23fe2405692afc ] The ocelot driver makes use of regmap, wrapping it with driver specific operations that are thin wrappers around the core regmap APIs. These are exported with EXPORT_SYMBOL, dropping the _GPL from the core regmap exports which is frowned upon. Add _GPL suffixes to at least the APIs that are doing register I/O. Signed-off-by: Mark Brown Acked-by: Alexandre Belloni Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/mscc/ocelot_io.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mscc/ocelot_io.c b/drivers/net/ethernet/mscc/ocelot_io.c index ea4e83410fe4..7390fa3980ec 100644 --- a/drivers/net/ethernet/mscc/ocelot_io.c +++ b/drivers/net/ethernet/mscc/ocelot_io.c @@ -21,7 +21,7 @@ u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset) ocelot->map[target][reg & REG_MASK] + offset, &val); return val; } -EXPORT_SYMBOL(__ocelot_read_ix); +EXPORT_SYMBOL_GPL(__ocelot_read_ix); void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset) { @@ -32,7 +32,7 @@ void __ocelot_write_ix(struct ocelot *ocelot, u32 val, u32 reg, u32 offset) regmap_write(ocelot->targets[target], ocelot->map[target][reg & REG_MASK] + offset, val); } -EXPORT_SYMBOL(__ocelot_write_ix); +EXPORT_SYMBOL_GPL(__ocelot_write_ix); void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg, u32 offset) @@ -45,7 +45,7 @@ void __ocelot_rmw_ix(struct ocelot *ocelot, u32 val, u32 mask, u32 reg, ocelot->map[target][reg & REG_MASK] + offset, mask, val); } -EXPORT_SYMBOL(__ocelot_rmw_ix); +EXPORT_SYMBOL_GPL(__ocelot_rmw_ix); u32 ocelot_port_readl(struct ocelot_port *port, u32 reg) { @@ -58,7 +58,7 @@ u32 ocelot_port_readl(struct ocelot_port *port, u32 reg) regmap_read(port->target, ocelot->map[target][reg & REG_MASK], &val); return val; } -EXPORT_SYMBOL(ocelot_port_readl); +EXPORT_SYMBOL_GPL(ocelot_port_readl); void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg) { @@ -69,7 +69,7 @@ void ocelot_port_writel(struct ocelot_port *port, u32 val, u32 reg) regmap_write(port->target, ocelot->map[target][reg & REG_MASK], val); } -EXPORT_SYMBOL(ocelot_port_writel); +EXPORT_SYMBOL_GPL(ocelot_port_writel); void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg) { @@ -77,7 +77,7 @@ void ocelot_port_rmwl(struct ocelot_port *port, u32 val, u32 mask, u32 reg) ocelot_port_writel(port, (cur & (~mask)) | val, reg); } -EXPORT_SYMBOL(ocelot_port_rmwl); +EXPORT_SYMBOL_GPL(ocelot_port_rmwl); u32 __ocelot_target_read_ix(struct ocelot *ocelot, enum ocelot_target target, u32 reg, u32 offset) @@ -128,7 +128,7 @@ int ocelot_regfields_init(struct ocelot *ocelot, return 0; } -EXPORT_SYMBOL(ocelot_regfields_init); +EXPORT_SYMBOL_GPL(ocelot_regfields_init); static struct regmap_config ocelot_regmap_config = { .reg_bits = 32, @@ -148,4 +148,4 @@ struct regmap *ocelot_regmap_init(struct ocelot *ocelot, struct resource *res) return devm_regmap_init_mmio(ocelot->dev, regs, &ocelot_regmap_config); } -EXPORT_SYMBOL(ocelot_regmap_init); +EXPORT_SYMBOL_GPL(ocelot_regmap_init); From 7008b9981b6ab6150075891b433b2b2d426618a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20M=C3=A4tje?= Date: Wed, 25 Aug 2021 23:52:27 +0200 Subject: [PATCH 012/105] can: usb: esd_usb2: esd_usb2_rx_event(): fix the interchange of the CAN RX and TX error counters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 044012b52029204900af9e4230263418427f4ba4 upstream. This patch fixes the interchanged fetch of the CAN RX and TX error counters from the ESD_EV_CAN_ERROR_EXT message. The RX error counter is really in struct rx_msg::data[2] and the TX error counter is in struct rx_msg::data[3]. Fixes: 96d8e90382dc ("can: Add driver for esd CAN-USB/2 device") Link: https://lore.kernel.org/r/20210825215227.4947-2-stefan.maetje@esd.eu Cc: stable@vger.kernel.org Signed-off-by: Stefan Mätje Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/usb/esd_usb2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 485e20e0dec2..8847942a8d97 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -224,8 +224,8 @@ static void esd_usb2_rx_event(struct esd_usb2_net_priv *priv, if (id == ESD_EV_CAN_ERROR_EXT) { u8 state = msg->msg.rx.data[0]; u8 ecc = msg->msg.rx.data[1]; - u8 txerr = msg->msg.rx.data[2]; - u8 rxerr = msg->msg.rx.data[3]; + u8 rxerr = msg->msg.rx.data[2]; + u8 txerr = msg->msg.rx.data[3]; skb = alloc_can_err_skb(priv->netdev, &cf); if (skb == NULL) { From e55a8b461585a77ad3c42e6ae1fdad9efb0ff207 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Wed, 18 Aug 2021 21:38:42 +0800 Subject: [PATCH 013/105] ceph: correctly handle releasing an embedded cap flush commit b2f9fa1f3bd8846f50b355fc2168236975c4d264 upstream. The ceph_cap_flush structures are usually dynamically allocated, but the ceph_cap_snap has an embedded one. When force umounting, the client will try to remove all the session caps. During this, it will free them, but that should not be done with the ones embedded in a capsnap. Fix this by adding a new boolean that indicates that the cap flush is embedded in a capsnap, and skip freeing it if that's set. At the same time, switch to using list_del_init() when detaching the i_list and g_list heads. It's possible for a forced umount to remove these objects but then handle_cap_flushsnap_ack() races in and does the list_del_init() again, corrupting memory. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/52283 Signed-off-by: Xiubo Li Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov Signed-off-by: Greg Kroah-Hartman --- fs/ceph/caps.c | 21 +++++++++++++-------- fs/ceph/mds_client.c | 7 ++++--- fs/ceph/snap.c | 3 +++ fs/ceph/super.h | 3 ++- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 45093a765a9b..b864c9b9e8df 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1752,7 +1752,11 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask, struct ceph_cap_flush *ceph_alloc_cap_flush(void) { - return kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL); + struct ceph_cap_flush *cf; + + cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL); + cf->is_capsnap = false; + return cf; } void ceph_free_cap_flush(struct ceph_cap_flush *cf) @@ -1787,7 +1791,7 @@ static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc, prev->wake = true; wake = false; } - list_del(&cf->g_list); + list_del_init(&cf->g_list); return wake; } @@ -1802,7 +1806,7 @@ static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci, prev->wake = true; wake = false; } - list_del(&cf->i_list); + list_del_init(&cf->i_list); return wake; } @@ -2422,7 +2426,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH; list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) { - if (!cf->caps) { + if (cf->is_capsnap) { last_snap_flush = cf->tid; break; } @@ -2441,7 +2445,7 @@ static void __kick_flushing_caps(struct ceph_mds_client *mdsc, first_tid = cf->tid + 1; - if (cf->caps) { + if (!cf->is_capsnap) { struct cap_msg_args arg; dout("kick_flushing_caps %p cap %p tid %llu %s\n", @@ -3564,7 +3568,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, cleaned = cf->caps; /* Is this a capsnap? */ - if (cf->caps == 0) + if (cf->is_capsnap) continue; if (cf->tid <= flush_tid) { @@ -3637,8 +3641,9 @@ out: while (!list_empty(&to_remove)) { cf = list_first_entry(&to_remove, struct ceph_cap_flush, i_list); - list_del(&cf->i_list); - ceph_free_cap_flush(cf); + list_del_init(&cf->i_list); + if (!cf->is_capsnap) + ceph_free_cap_flush(cf); } if (wake_ci) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 1701902415c4..816cea497537 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1618,7 +1618,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, spin_lock(&mdsc->cap_dirty_lock); list_for_each_entry(cf, &to_remove, i_list) - list_del(&cf->g_list); + list_del_init(&cf->g_list); if (!list_empty(&ci->i_dirty_item)) { pr_warn_ratelimited( @@ -1670,8 +1670,9 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_cap_flush *cf; cf = list_first_entry(&to_remove, struct ceph_cap_flush, i_list); - list_del(&cf->i_list); - ceph_free_cap_flush(cf); + list_del_init(&cf->i_list); + if (!cf->is_capsnap) + ceph_free_cap_flush(cf); } wake_up_all(&ci->i_cap_wq); diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 803b60a96702..0369f672a76f 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -487,6 +487,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) pr_err("ENOMEM allocating ceph_cap_snap on %p\n", inode); return; } + capsnap->cap_flush.is_capsnap = true; + INIT_LIST_HEAD(&capsnap->cap_flush.i_list); + INIT_LIST_HEAD(&capsnap->cap_flush.g_list); spin_lock(&ci->i_ceph_lock); used = __ceph_caps_used(ci); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 6712509ae1d6..a8c460393b01 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -181,8 +181,9 @@ struct ceph_cap { struct ceph_cap_flush { u64 tid; - int caps; /* 0 means capsnap */ + int caps; bool wake; /* wake up flush waiters when finish ? */ + bool is_capsnap; /* true means capsnap */ struct list_head g_list; // global struct list_head i_list; // per inode }; From 921c2533aa3a980daa9942f9476e008ddcff65be Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Tue, 3 Aug 2021 17:27:51 +0800 Subject: [PATCH 014/105] riscv: Ensure the value of FP registers in the core dump file is up to date commit 379eb01c21795edb4ca8d342503bd2183a19ec3a upstream. The value of FP registers in the core dump file comes from the thread.fstate. However, kernel saves the FP registers to the thread.fstate only before scheduling out the process. If no process switch happens during the exception handling process, kernel will not have a chance to save the latest value of FP registers to thread.fstate. It will cause the value of FP registers in the core dump file may be incorrect. To solve this problem, this patch force lets kernel save the FP register into the thread.fstate if the target task_struct equals the current. Signed-off-by: Vincent Chen Reviewed-by: Jisheng Zhang Fixes: b8c8a9590e4f ("RISC-V: Add FP register ptrace support for gdb.") Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/ptrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 2d6395f5ad54..69678ab6457d 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -56,6 +57,9 @@ static int riscv_fpr_get(struct task_struct *target, { struct __riscv_d_ext_state *fstate = &target->thread.fstate; + if (target == current) + fstate_save(current, task_pt_regs(current)); + membuf_write(&to, fstate, offsetof(struct __riscv_d_ext_state, fcsr)); membuf_store(&to, fstate->fcsr); return membuf_zero(&to, 4); // explicitly pad From 3134292a8e79e089f0f19f28b8b20eb1f961575c Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 25 Aug 2021 13:41:42 +0800 Subject: [PATCH 015/105] Revert "btrfs: compression: don't try to compress if we don't have enough pages" commit 4e9655763b82a91e4c341835bb504a2b1590f984 upstream. This reverts commit f2165627319ffd33a6217275e5690b1ab5c45763. [BUG] It's no longer possible to create compressed inline extent after commit f2165627319f ("btrfs: compression: don't try to compress if we don't have enough pages"). [CAUSE] For compression code, there are several possible reasons we have a range that needs to be compressed while it's no more than one page. - Compressed inline write The data is always smaller than one sector and the test lacks the condition to properly recognize a non-inline extent. - Compressed subpage write For the incoming subpage compressed write support, we require page alignment of the delalloc range. And for 64K page size, we can compress just one page into smaller sectors. For those reasons, the requirement for the data to be more than one page is not correct, and is already causing regression for compressed inline data writeback. The idea of skipping one page to avoid wasting CPU time could be revisited in the future. [FIX] Fix it by reverting the offending commit. Reported-by: Zygo Blaxell Link: https://lore.kernel.org/linux-btrfs/afa2742.c084f5d6.17b6b08dffc@tnonline.net Fixes: f2165627319f ("btrfs: compression: don't try to compress if we don't have enough pages") CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fc4311415fc6..d24dbb1e8850 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -547,7 +547,7 @@ again: * inode has not been flagged as nocompress. This flag can * change at any time if we discover bad compression ratios. */ - if (nr_pages > 1 && inode_need_compress(BTRFS_I(inode), start, end)) { + if (inode_need_compress(BTRFS_I(inode), start, end)) { WARN_ON(pages); pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); if (!pages) { From da3067eadcc156b742657c0694beae0a7c49d157 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Tue, 17 Aug 2021 10:23:25 +0200 Subject: [PATCH 016/105] drm/amdgpu: Cancel delayed work when GFXOFF is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 32bc8f8373d2d6a681c96e4b25dca60d4d1c6016 upstream. schedule_delayed_work does not push back the work if it was already scheduled before, so amdgpu_device_delay_enable_gfx_off ran ~100 ms after the first time GFXOFF was disabled and re-enabled, even if GFXOFF was disabled and re-enabled again during those 100 ms. This resulted in frame drops / stutter with the upcoming mutter 41 release on Navi 14, due to constantly enabling GFXOFF in the HW and disabling it again (for getting the GPU clock counter). To fix this, call cancel_delayed_work_sync when the disable count transitions from 0 to 1, and only schedule the delayed work on the reverse transition, not if the disable count was already 0. This makes sure the delayed work doesn't run at unexpected times, and allows it to be lock-free. v2: * Use cancel_delayed_work_sync & mutex_trylock instead of mod_delayed_work. v3: * Make amdgpu_device_delay_enable_gfx_off lock-free (Christian König) v4: * Fix race condition between amdgpu_gfx_off_ctrl incrementing adev->gfx.gfx_off_req_count and amdgpu_device_delay_enable_gfx_off checking for it to be 0 (Evan Quan) Cc: stable@vger.kernel.org Reviewed-by: Evan Quan Reviewed-by: Lijo Lazar # v3 Acked-by: Christian König # v3 Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 11 +++---- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 36 +++++++++++++++------- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ffd310279a69..97723f2b5ece 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2619,12 +2619,11 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) struct amdgpu_device *adev = container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); - mutex_lock(&adev->gfx.gfx_off_mutex); - if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { - if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) - adev->gfx.gfx_off_state = true; - } - mutex_unlock(&adev->gfx.gfx_off_mutex); + WARN_ON_ONCE(adev->gfx.gfx_off_state); + WARN_ON_ONCE(adev->gfx.gfx_off_req_count); + + if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) + adev->gfx.gfx_off_state = true; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index c485ec86804e..9f9f55a2b257 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -556,24 +556,38 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) mutex_lock(&adev->gfx.gfx_off_mutex); - if (!enable) - adev->gfx.gfx_off_req_count++; - else if (adev->gfx.gfx_off_req_count > 0) + if (enable) { + /* If the count is already 0, it means there's an imbalance bug somewhere. + * Note that the bug may be in a different caller than the one which triggers the + * WARN_ON_ONCE. + */ + if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0)) + goto unlock; + adev->gfx.gfx_off_req_count--; - if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { - schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE); - } else if (!enable && adev->gfx.gfx_off_state) { - if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) { - adev->gfx.gfx_off_state = false; + if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE); + } else { + if (adev->gfx.gfx_off_req_count == 0) { + cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); - if (adev->gfx.funcs->init_spm_golden) { - dev_dbg(adev->dev, "GFXOFF is disabled, re-init SPM golden settings\n"); - amdgpu_gfx_init_spm_golden(adev); + if (adev->gfx.gfx_off_state && + !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) { + adev->gfx.gfx_off_state = false; + + if (adev->gfx.funcs->init_spm_golden) { + dev_dbg(adev->dev, + "GFXOFF is disabled, re-init SPM golden settings\n"); + amdgpu_gfx_init_spm_golden(adev); + } } } + + adev->gfx.gfx_off_req_count++; } +unlock: mutex_unlock(&adev->gfx.gfx_off_mutex); } From 8437e07c370fc4dfcaf639b4372e16fc5f32acf5 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Tue, 24 Aug 2021 14:19:26 +0200 Subject: [PATCH 017/105] Revert "USB: serial: ch341: fix character loss at high transfer rates" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit df7b16d1c00ecb3da3a30c999cdb39f273c99a2f upstream. This reverts commit 3c18e9baee0ef97510dcda78c82285f52626764b. These devices do not appear to send a zero-length packet when the transfer size is a multiple of the bulk-endpoint max-packet size. This means that incoming data may not be processed by the driver until a short packet is received or the receive buffer is full. Revert back to using endpoint-sized receive buffers to avoid stalled reads. Reported-by: Paul Größel Link: https://bugzilla.kernel.org/show_bug.cgi?id=214131 Fixes: 3c18e9baee0e ("USB: serial: ch341: fix character loss at high transfer rates") Cc: stable@vger.kernel.org Cc: Willy Tarreau Link: https://lore.kernel.org/r/20210824121926.19311-1-johan@kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ch341.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 119b7b6e1ada..f26861246f65 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -853,7 +853,6 @@ static struct usb_serial_driver ch341_device = { .owner = THIS_MODULE, .name = "ch341-uart", }, - .bulk_in_size = 512, .id_table = id_table, .num_ports = 1, .open = ch341_open, From b0bcc8038868e354d093f595be5509ffacbb37c6 Mon Sep 17 00:00:00 2001 From: Zhengjun Zhang Date: Mon, 9 Aug 2021 21:35:53 +0800 Subject: [PATCH 018/105] USB: serial: option: add new VID/PID to support Fibocom FG150 commit 2829a4e3cf3a6ac2fa3cdb681b37574630fb9c1a upstream. Fibocom FG150 is a 5G module based on Qualcomm SDX55 platform, support Sub-6G band. Here are the outputs of lsusb -v and usb-devices: > T: Bus=02 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 2 Spd=5000 MxCh= 0 > D: Ver= 3.20 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 9 #Cfgs= 1 > P: Vendor=2cb7 ProdID=010b Rev=04.14 > S: Manufacturer=Fibocom > S: Product=Fibocom Modem_SN:XXXXXXXX > S: SerialNumber=XXXXXXXX > C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=896mA > I: If#=0x0 Alt= 0 #EPs= 1 Cls=ef(misc ) Sub=04 Prot=01 Driver=rndis_host > I: If#=0x1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=rndis_host > I: If#=0x2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) > I: If#=0x3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=30 Driver=(none) > I: If#=0x4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=42 Prot=01 Driver=(none) > Bus 002 Device 002: ID 2cb7:010b Fibocom Fibocom Modem_SN:XXXXXXXX > Device Descriptor: > bLength 18 > bDescriptorType 1 > bcdUSB 3.20 > bDeviceClass 0 > bDeviceSubClass 0 > bDeviceProtocol 0 > bMaxPacketSize0 9 > idVendor 0x2cb7 Fibocom > idProduct 0x010b > bcdDevice 4.14 > iManufacturer 1 Fibocom > iProduct 2 Fibocom Modem_SN:XXXXXXXX > iSerial 3 XXXXXXXX > bNumConfigurations 1 > Configuration Descriptor: > bLength 9 > bDescriptorType 2 > wTotalLength 0x00e6 > bNumInterfaces 5 > bConfigurationValue 1 > iConfiguration 4 RNDIS_DUN_DIAG_ADB > bmAttributes 0xa0 > (Bus Powered) > Remote Wakeup > MaxPower 896mA > Interface Association: > bLength 8 > bDescriptorType 11 > bFirstInterface 0 > bInterfaceCount 2 > bFunctionClass 239 Miscellaneous Device > bFunctionSubClass 4 > bFunctionProtocol 1 > iFunction 7 RNDIS > Interface Descriptor: > bLength 9 > bDescriptorType 4 > bInterfaceNumber 0 > bAlternateSetting 0 > bNumEndpoints 1 > bInterfaceClass 239 Miscellaneous Device > bInterfaceSubClass 4 > bInterfaceProtocol 1 > iInterface 0 > ** UNRECOGNIZED: 05 24 00 10 01 > ** UNRECOGNIZED: 05 24 01 00 01 > ** UNRECOGNIZED: 04 24 02 00 > ** UNRECOGNIZED: 05 24 06 00 01 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x81 EP 1 IN > bmAttributes 3 > Transfer Type Interrupt > Synch Type None > Usage Type Data > wMaxPacketSize 0x0008 1x 8 bytes > bInterval 9 > bMaxBurst 0 > Interface Descriptor: > bLength 9 > bDescriptorType 4 > bInterfaceNumber 1 > bAlternateSetting 0 > bNumEndpoints 2 > bInterfaceClass 10 CDC Data > bInterfaceSubClass 0 > bInterfaceProtocol 0 > iInterface 0 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x8e EP 14 IN > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 6 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x0f EP 15 OUT > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 6 > Interface Descriptor: > bLength 9 > bDescriptorType 4 > bInterfaceNumber 2 > bAlternateSetting 0 > bNumEndpoints 3 > bInterfaceClass 255 Vendor Specific Class > bInterfaceSubClass 0 > bInterfaceProtocol 0 > iInterface 0 > ** UNRECOGNIZED: 05 24 00 10 01 > ** UNRECOGNIZED: 05 24 01 00 00 > ** UNRECOGNIZED: 04 24 02 02 > ** UNRECOGNIZED: 05 24 06 00 00 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x83 EP 3 IN > bmAttributes 3 > Transfer Type Interrupt > Synch Type None > Usage Type Data > wMaxPacketSize 0x000a 1x 10 bytes > bInterval 9 > bMaxBurst 0 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x82 EP 2 IN > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 0 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x01 EP 1 OUT > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 0 > Interface Descriptor: > bLength 9 > bDescriptorType 4 > bInterfaceNumber 3 > bAlternateSetting 0 > bNumEndpoints 2 > bInterfaceClass 255 Vendor Specific Class > bInterfaceSubClass 255 Vendor Specific Subclass > bInterfaceProtocol 48 > iInterface 0 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x84 EP 4 IN > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 0 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x02 EP 2 OUT > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 0 > Interface Descriptor: > bLength 9 > bDescriptorType 4 > bInterfaceNumber 4 > bAlternateSetting 0 > bNumEndpoints 2 > bInterfaceClass 255 Vendor Specific Class > bInterfaceSubClass 66 > bInterfaceProtocol 1 > iInterface 0 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x03 EP 3 OUT > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 0 > Endpoint Descriptor: > bLength 7 > bDescriptorType 5 > bEndpointAddress 0x85 EP 5 IN > bmAttributes 2 > Transfer Type Bulk > Synch Type None > Usage Type Data > wMaxPacketSize 0x0400 1x 1024 bytes > bInterval 0 > bMaxBurst 0 > Binary Object Store Descriptor: > bLength 5 > bDescriptorType 15 > wTotalLength 0x0016 > bNumDeviceCaps 2 > USB 2.0 Extension Device Capability: > bLength 7 > bDescriptorType 16 > bDevCapabilityType 2 > bmAttributes 0x00000006 > BESL Link Power Management (LPM) Supported > SuperSpeed USB Device Capability: > bLength 10 > bDescriptorType 16 > bDevCapabilityType 3 > bmAttributes 0x00 > wSpeedsSupported 0x000f > Device can operate at Low Speed (1Mbps) > Device can operate at Full Speed (12Mbps) > Device can operate at High Speed (480Mbps) > Device can operate at SuperSpeed (5Gbps) > bFunctionalitySupport 1 > Lowest fully-functional device speed is Full Speed (12Mbps) > bU1DevExitLat 1 micro seconds > bU2DevExitLat 500 micro seconds > Device Status: 0x0000 > (Bus Powered) Signed-off-by: Zhengjun Zhang Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 2b85e0e9bffd..acb8eec14f68 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -2074,6 +2074,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) | RSVD(5) }, { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x0105, 0xff), /* Fibocom NL678 series */ .driver_info = RSVD(6) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ From 56c92b8ddc0cb26fe7d4e5beeace89aec0b73342 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 26 Aug 2021 14:41:27 +0200 Subject: [PATCH 019/105] usb: renesas-xhci: Prefer firmware loading on unknown ROM state commit c82cacd2f1e622a461a77d275a75d7e19e7635a3 upstream. The recent attempt to handle an unknown ROM state in the commit d143825baf15 ("usb: renesas-xhci: Fix handling of unknown ROM state") resulted in a regression and reverted later by the commit 44cf53602f5a ("Revert "usb: renesas-xhci: Fix handling of unknown ROM state""). The problem of the former fix was that it treated the failure of firmware loading as a fatal error. Since the firmware files aren't included in the standard linux-firmware tree, most users don't have them, hence they got the non-working system after that. The revert fixed the regression, but also it didn't make the firmware loading triggered even on the devices that do need it. So we need still a fix for them. This is another attempt to handle the unknown ROM state. Like the previous fix, this also tries to load the firmware when ROM shows unknown state. In this patch, however, the failure of a firmware loading (such as a missing firmware file) isn't handled as a fatal error any longer when ROM has been already detected, but it falls back to the ROM mode like before. The error is returned only when no ROM is detected and the firmware loading failed. Along with it, for simplifying the code flow, the detection and the check of ROM is factored out from renesas_fw_check_running() and done in the caller side, renesas_xhci_check_request_fw(). It avoids the redundant ROM checks. The patch was tested on Lenovo Thinkpad T14 gen (BIOS 1.34). Also it was confirmed that no regression is seen on another Thinkpad T14 machine that has worked without the patch, too. Fixes: 44cf53602f5a ("Revert "usb: renesas-xhci: Fix handling of unknown ROM state"") Cc: stable Signed-off-by: Takashi Iwai BugLink: https://bugzilla.opensuse.org/show_bug.cgi?id=1189207 Link: https://lore.kernel.org/r/20210826124127.14789-1-tiwai@suse.de Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-pci-renesas.c | 35 +++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/drivers/usb/host/xhci-pci-renesas.c b/drivers/usb/host/xhci-pci-renesas.c index f97ac9f52bf4..96692dbbd4da 100644 --- a/drivers/usb/host/xhci-pci-renesas.c +++ b/drivers/usb/host/xhci-pci-renesas.c @@ -207,7 +207,8 @@ static int renesas_check_rom_state(struct pci_dev *pdev) return 0; case RENESAS_ROM_STATUS_NO_RESULT: /* No result yet */ - return 0; + dev_dbg(&pdev->dev, "Unknown ROM status ...\n"); + return -ENOENT; case RENESAS_ROM_STATUS_ERROR: /* Error State */ default: /* All other states are marked as "Reserved states" */ @@ -224,14 +225,6 @@ static int renesas_fw_check_running(struct pci_dev *pdev) u8 fw_state; int err; - /* Check if device has ROM and loaded, if so skip everything */ - err = renesas_check_rom(pdev); - if (err) { /* we have rom */ - err = renesas_check_rom_state(pdev); - if (!err) - return err; - } - /* * Test if the device is actually needing the firmware. As most * BIOSes will initialize the device for us. If the device is @@ -591,21 +584,39 @@ int renesas_xhci_check_request_fw(struct pci_dev *pdev, (struct xhci_driver_data *)id->driver_data; const char *fw_name = driver_data->firmware; const struct firmware *fw; + bool has_rom; int err; + /* Check if device has ROM and loaded, if so skip everything */ + has_rom = renesas_check_rom(pdev); + if (has_rom) { + err = renesas_check_rom_state(pdev); + if (!err) + return 0; + else if (err != -ENOENT) + has_rom = false; + } + err = renesas_fw_check_running(pdev); /* Continue ahead, if the firmware is already running. */ if (err == 0) return 0; + /* no firmware interface available */ if (err != 1) - return err; + return has_rom ? 0 : err; pci_dev_get(pdev); - err = request_firmware(&fw, fw_name, &pdev->dev); + err = firmware_request_nowarn(&fw, fw_name, &pdev->dev); pci_dev_put(pdev); if (err) { - dev_err(&pdev->dev, "request_firmware failed: %d\n", err); + if (has_rom) { + dev_info(&pdev->dev, "failed to load firmware %s, fallback to ROM\n", + fw_name); + return 0; + } + dev_err(&pdev->dev, "failed to load firmware %s: %d\n", + fw_name, err); return err; } From 87b2016493eb72659a154fe2cfe4cd14dd4a1661 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Thu, 19 Aug 2021 03:17:03 +0200 Subject: [PATCH 020/105] usb: dwc3: gadget: Fix dwc3_calc_trbs_left() commit 51f1954ad853d01ba4dc2b35dee14d8490ee05a1 upstream. We can't depend on the TRB's HWO bit to determine if the TRB ring is "full". A TRB is only available when the driver had processed it, not when the controller consumed and relinquished the TRB's ownership to the driver. Otherwise, the driver may overwrite unprocessed TRBs. This can happen when many transfer events accumulate and the system is slow to process them and/or when there are too many small requests. If a request is in the started_list, that means there is one or more unprocessed TRBs remained. Check this instead of the TRB's HWO bit whether the TRB ring is full. Fixes: c4233573f6ee ("usb: dwc3: gadget: prepare TRBs on update transfers too") Cc: Acked-by: Felipe Balbi Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/e91e975affb0d0d02770686afc3a5b9eb84409f6.1629335416.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 756839e0e91d..820b9b76af40 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -932,19 +932,19 @@ static struct dwc3_trb *dwc3_ep_prev_trb(struct dwc3_ep *dep, u8 index) static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep) { - struct dwc3_trb *tmp; u8 trbs_left; /* - * If enqueue & dequeue are equal than it is either full or empty. - * - * One way to know for sure is if the TRB right before us has HWO bit - * set or not. If it has, then we're definitely full and can't fit any - * more transfers in our ring. + * If the enqueue & dequeue are equal then the TRB ring is either full + * or empty. It's considered full when there are DWC3_TRB_NUM-1 of TRBs + * pending to be processed by the driver. */ if (dep->trb_enqueue == dep->trb_dequeue) { - tmp = dwc3_ep_prev_trb(dep, dep->trb_enqueue); - if (tmp->ctrl & DWC3_TRB_CTRL_HWO) + /* + * If there is any request remained in the started_list at + * this point, that means there is no TRB available. + */ + if (!list_empty(&dep->started_list)) return 0; return DWC3_TRB_NUM - 1; From 01da7c1dc4cfc3332351037b12adcc8bb355095b Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Tue, 24 Aug 2021 21:28:55 -0700 Subject: [PATCH 021/105] usb: dwc3: gadget: Stop EP0 transfers during pullup disable commit 4a1e25c0a029b97ea4a3d423a6392bfacc3b2e39 upstream. During a USB cable disconnect, or soft disconnect scenario, a pending SETUP transaction may not be completed, leading to the following error: dwc3 a600000.dwc3: timed out waiting for SETUP phase If this occurs, then the entire pullup disable routine is skipped and proper cleanup and halting of the controller does not complete. Instead of returning an error (which is ignored from the UDC perspective), allow the pullup disable routine to continue, which will also handle disabling of EP0/1. This will end any active transfers as well. Ensure to clear any delayed_status also, as the timeout could happen within the STATUS stage. Fixes: bb0147364850 ("usb: dwc3: gadget: don't clear RUN/STOP when it's invalid to do so") Cc: Reviewed-by: Thinh Nguyen Acked-by: Felipe Balbi Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/20210825042855.7977-1-wcheng@codeaurora.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 820b9b76af40..b75fe568096f 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2125,10 +2125,8 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) ret = wait_for_completion_timeout(&dwc->ep0_in_setup, msecs_to_jiffies(DWC3_PULL_UP_TIMEOUT)); - if (ret == 0) { - dev_err(dwc->dev, "timed out waiting for SETUP phase\n"); - return -ETIMEDOUT; - } + if (ret == 0) + dev_warn(dwc->dev, "timed out waiting for SETUP phase\n"); } /* @@ -2332,6 +2330,7 @@ static int __dwc3_gadget_start(struct dwc3 *dwc) /* begin to receive SETUP packets */ dwc->ep0state = EP0_SETUP_PHASE; dwc->link_state = DWC3_LINK_STATE_SS_DIS; + dwc->delayed_status = false; dwc3_ep0_out_start(dwc); dwc3_gadget_enable_irq(dwc); From 22c18102ec59adf71f220f66e9e6ac598f408c4f Mon Sep 17 00:00:00 2001 From: Li Jinlin Date: Tue, 24 Aug 2021 10:59:21 +0800 Subject: [PATCH 022/105] scsi: core: Fix hang of freezing queue between blocking and running device commit 02c6dcd543f8f051973ee18bfbc4dc3bd595c558 upstream. We found a hang, the steps to reproduce are as follows: 1. blocking device via scsi_device_set_state() 2. dd if=/dev/sda of=/mnt/t.log bs=1M count=10 3. echo none > /sys/block/sda/queue/scheduler 4. echo "running" >/sys/block/sda/device/state Step 3 and 4 should complete after step 4, but they hang. CPU#0 CPU#1 CPU#2 --------------- ---------------- ---------------- Step 1: blocking device Step 2: dd xxxx ^^^^^^ get request q_usage_counter++ Step 3: switching scheculer elv_iosched_store elevator_switch blk_mq_freeze_queue blk_freeze_queue > blk_freeze_queue_start ^^^^^^ mq_freeze_depth++ > blk_mq_run_hw_queues ^^^^^^ can't run queue when dev blocked > blk_mq_freeze_queue_wait ^^^^^^ Hang here!!! wait q_usage_counter==0 Step 4: running device store_state_field scsi_rescan_device scsi_attach_vpd scsi_vpd_inquiry __scsi_execute blk_get_request blk_mq_alloc_request blk_queue_enter ^^^^^^ Hang here!!! wait mq_freeze_depth==0 blk_mq_run_hw_queues ^^^^^^ dispatch IO, q_usage_counter will reduce to zero blk_mq_unfreeze_queue ^^^^^ mq_freeze_depth-- To fix this, we need to run queue before rescanning device when the device state changes to SDEV_RUNNING. Link: https://lore.kernel.org/r/20210824025921.3277629-1-lijinlin3@huawei.com Fixes: f0f82e2476f6 ("scsi: core: Fix capacity set to zero after offlinining device") Reviewed-by: Bart Van Assche Signed-off-by: Li Jinlin Signed-off-by: Qiu Laibin Signed-off-by: Martin K. Petersen Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/scsi_sysfs.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 4dcced95c8b4..8173b67ec7b0 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -808,12 +808,15 @@ store_state_field(struct device *dev, struct device_attribute *attr, ret = scsi_device_set_state(sdev, state); /* * If the device state changes to SDEV_RUNNING, we need to - * rescan the device to revalidate it, and run the queue to - * avoid I/O hang. + * run the queue to avoid I/O hang, and rescan the device + * to revalidate it. Running the queue first is necessary + * because another thread may be waiting inside + * blk_mq_freeze_queue_wait() and because that call may be + * waiting for pending I/O to finish. */ if (ret == 0 && state == SDEV_RUNNING) { - scsi_rescan_device(dev); blk_mq_run_hw_queues(sdev->request_queue, true); + scsi_rescan_device(dev); } mutex_unlock(&sdev->state_mutex); From 3e949aaa8bef4326aaf687c3b2c58f674ff4db91 Mon Sep 17 00:00:00 2001 From: Naresh Kumar PBS Date: Wed, 18 Aug 2021 20:25:52 -0700 Subject: [PATCH 023/105] RDMA/bnxt_re: Add missing spin lock initialization [ Upstream commit 17f2569dce1848080825b8336e6b7c6900193b44 ] Add the missing initialization of srq lock. Fixes: 37cb11acf1f7 ("RDMA/bnxt_re: Add SRQ support for Broadcom adapters") Link: https://lore.kernel.org/r/1629343553-5843-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Naresh Kumar PBS Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 266de55f5719..441952a5eca4 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1691,6 +1691,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, if (nq) nq->budget++; atomic_inc(&rdev->srq_count); + spin_lock_init(&srq->lock); return 0; From 56ac7463a1403996f263b08812f64179e54e3f0b Mon Sep 17 00:00:00 2001 From: Tuo Li Date: Fri, 6 Aug 2021 06:30:29 -0700 Subject: [PATCH 024/105] IB/hfi1: Fix possible null-pointer dereference in _extend_sdma_tx_descs() [ Upstream commit cbe71c61992c38f72c2b625b2ef25916b9f0d060 ] kmalloc_array() is called to allocate memory for tx->descp. If it fails, the function __sdma_txclean() is called: __sdma_txclean(dd, tx); However, in the function __sdma_txclean(), tx-descp is dereferenced if tx->num_desc is not zero: sdma_unmap_desc(dd, &tx->descp[0]); To fix this possible null-pointer dereference, assign the return value of kmalloc_array() to a local variable descp, and then assign it to tx->descp if it is not NULL. Otherwise, go to enomem. Fixes: 7724105686e7 ("IB/hfi1: add driver files") Link: https://lore.kernel.org/r/20210806133029.194964-1-islituo@gmail.com Reported-by: TOTE Robot Signed-off-by: Tuo Li Tested-by: Mike Marciniszyn Acked-by: Mike Marciniszyn Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/hfi1/sdma.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index a307d4c8b15a..ac6f87137b63 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -3055,6 +3055,7 @@ static void __sdma_process_event(struct sdma_engine *sde, static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) { int i; + struct sdma_desc *descp; /* Handle last descriptor */ if (unlikely((tx->num_desc == (MAX_DESC - 1)))) { @@ -3075,12 +3076,10 @@ static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx) if (unlikely(tx->num_desc == MAX_DESC)) goto enomem; - tx->descp = kmalloc_array( - MAX_DESC, - sizeof(struct sdma_desc), - GFP_ATOMIC); - if (!tx->descp) + descp = kmalloc_array(MAX_DESC, sizeof(struct sdma_desc), GFP_ATOMIC); + if (!descp) goto enomem; + tx->descp = descp; /* reserve last descriptor for coalescing */ tx->desc_limit = MAX_DESC - 1; From 3a2c5fbb1cc60b9ef6e3c142c40952fe711470bf Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 16 Aug 2021 16:55:31 +0800 Subject: [PATCH 025/105] RDMA/bnxt_re: Remove unpaired rtnl unlock in bnxt_re_dev_init() [ Upstream commit a036ad088306a88de87e973981f2b9224e466c3f ] The fixed commit removes all rtnl_lock() and rtnl_unlock() calls in function bnxt_re_dev_init(), but forgets to remove a rtnl_unlock() in the error handling path of bnxt_re_register_netdev(), which may cause a deadlock. This bug is suggested by a static analysis tool. Fixes: c2b777a95923 ("RDMA/bnxt_re: Refactor device add/remove functionalities") Link: https://lore.kernel.org/r/20210816085531.12167-1-dinghao.liu@zju.edu.cn Signed-off-by: Dinghao Liu Acked-by: Selvin Xavier Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/bnxt_re/main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 1fadca8af71a..9ef6aea29ff1 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -1410,7 +1410,6 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) memset(&rattr, 0, sizeof(rattr)); rc = bnxt_re_register_netdev(rdev); if (rc) { - rtnl_unlock(); ibdev_err(&rdev->ibdev, "Failed to register with netedev: %#x\n", rc); return -EINVAL; From 3217c9d4602ff1d21f1cbadabbee559c6df5a318 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 19 Aug 2021 15:34:51 -0700 Subject: [PATCH 026/105] ice: do not abort devlink info if board identifier can't be found [ Upstream commit a8f89fa27773a8c96fd09fb4e2f4892d794f21f6 ] The devlink dev info command reports version information about the device and firmware running on the board. This includes the "board.id" field which is supposed to represent an identifier of the board design. The ice driver uses the Product Board Assembly identifier for this. In some cases, the PBA is not present in the NVM. If this happens, devlink dev info will fail with an error. Instead, modify the ice_info_pba function to just exit without filling in the context buffer. This will cause the board.id field to be skipped. Log a dev_dbg message in case someone wants to confirm why board.id is not showing up for them. Fixes: e961b679fb0b ("ice: add board identifier info to devlink .info_get") Signed-off-by: Jacob Keller Tested-by: Tony Brelinski Signed-off-by: Tony Nguyen Link: https://lore.kernel.org/r/20210819223451.245613-1-anthony.l.nguyen@intel.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/ice/ice_devlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 511da59bd6f2..f18ce43b7e74 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -23,7 +23,9 @@ static int ice_info_pba(struct ice_pf *pf, char *buf, size_t len) status = ice_read_pba_string(hw, (u8 *)buf, len); if (status) - return -EIO; + /* We failed to locate the PBA, so just skip this entry */ + dev_dbg(ice_pf_to_dev(pf), "Failed to read Product Board Assembly string, status %s\n", + ice_stat_str(status)); return 0; } From 384dea502e9119d90a1f700ddf2d5a861f8bde34 Mon Sep 17 00:00:00 2001 From: Petko Manolov Date: Fri, 20 Aug 2021 09:57:53 +0300 Subject: [PATCH 027/105] net: usb: pegasus: fixes of set_register(s) return value evaluation; [ Upstream commit ffc9c3ebb4af870a121da99826e9ccb63dc8b3d7 ] - restore the behavior in enable_net_traffic() to avoid regressions - Jakub Kicinski; - hurried up and removed redundant assignment in pegasus_open() before yet another checker complains; Fixes: 8a160e2e9aeb ("net: usb: pegasus: Check the return value of get_geristers() and friends;") Reported-by: Jakub Kicinski Signed-off-by: Petko Manolov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/usb/pegasus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index fb1a8c4486dd..2a748a924f83 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -471,7 +471,7 @@ static int enable_net_traffic(struct net_device *dev, struct usb_device *usb) write_mii_word(pegasus, 0, 0x1b, &auxmode); } - return 0; + return ret; fail: netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); return ret; @@ -861,7 +861,7 @@ static int pegasus_open(struct net_device *net) if (!pegasus->rx_skb) goto exit; - res = set_registers(pegasus, EthID, 6, net->dev_addr); + set_registers(pegasus, EthID, 6, net->dev_addr); usb_fill_bulk_urb(pegasus->rx_urb, pegasus->usb, usb_rcvbulkpipe(pegasus->usb, 1), From ae6480ba06654b87ed35eb96f4ac6d9ee7cc81b9 Mon Sep 17 00:00:00 2001 From: Aaron Ma Date: Tue, 13 Jul 2021 21:00:36 +0800 Subject: [PATCH 028/105] igc: fix page fault when thunderbolt is unplugged [ Upstream commit 4b79959510e6612d80f8d86022e0cb44eee6f4a2 ] After unplug thunderbolt dock with i225, pciehp interrupt is triggered, remove call will read/write mmio address which is already disconnected, then cause page fault and make system hang. Check PCI state to remove device safely. Trace: BUG: unable to handle page fault for address: 000000000000b604 Oops: 0000 [#1] SMP NOPTI RIP: 0010:igc_rd32+0x1c/0x90 [igc] Call Trace: igc_ptp_suspend+0x6c/0xa0 [igc] igc_ptp_stop+0x12/0x50 [igc] igc_remove+0x7f/0x1c0 [igc] pci_device_remove+0x3e/0xb0 __device_release_driver+0x181/0x240 Fixes: 13b5b7fd6a4a ("igc: Add support for Tx/Rx rings") Fixes: b03c49cde61f ("igc: Save PTP time before a reset") Signed-off-by: Aaron Ma Tested-by: Dvora Fuxbrumer Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_main.c | 32 ++++++++++++++--------- drivers/net/ethernet/intel/igc/igc_ptp.c | 3 ++- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index b9fe2785f573..66f181d12578 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -138,6 +138,9 @@ static void igc_release_hw_control(struct igc_adapter *adapter) struct igc_hw *hw = &adapter->hw; u32 ctrl_ext; + if (!pci_device_is_present(adapter->pdev)) + return; + /* Let firmware take over control of h/w */ ctrl_ext = rd32(IGC_CTRL_EXT); wr32(IGC_CTRL_EXT, @@ -3782,26 +3785,29 @@ void igc_down(struct igc_adapter *adapter) igc_ptp_suspend(adapter); - /* disable receives in the hardware */ - rctl = rd32(IGC_RCTL); - wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN); - /* flush and sleep below */ - + if (pci_device_is_present(adapter->pdev)) { + /* disable receives in the hardware */ + rctl = rd32(IGC_RCTL); + wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN); + /* flush and sleep below */ + } /* set trans_start so we don't get spurious watchdogs during reset */ netif_trans_update(netdev); netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); - /* disable transmits in the hardware */ - tctl = rd32(IGC_TCTL); - tctl &= ~IGC_TCTL_EN; - wr32(IGC_TCTL, tctl); - /* flush both disables and wait for them to finish */ - wrfl(); - usleep_range(10000, 20000); + if (pci_device_is_present(adapter->pdev)) { + /* disable transmits in the hardware */ + tctl = rd32(IGC_TCTL); + tctl &= ~IGC_TCTL_EN; + wr32(IGC_TCTL, tctl); + /* flush both disables and wait for them to finish */ + wrfl(); + usleep_range(10000, 20000); - igc_irq_disable(adapter); + igc_irq_disable(adapter); + } adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE; diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 545f4d0e67cf..4ab46eee3d93 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -557,7 +557,8 @@ void igc_ptp_suspend(struct igc_adapter *adapter) adapter->ptp_tx_skb = NULL; clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state); - igc_ptp_time_save(adapter); + if (pci_device_is_present(adapter->pdev)) + igc_ptp_time_save(adapter); } /** From 58b3dbf10c01ee076504a203ae93c43d46accf15 Mon Sep 17 00:00:00 2001 From: Toshiki Nishioka Date: Wed, 21 Jul 2021 17:34:03 -0700 Subject: [PATCH 029/105] igc: Use num_tx_queues when iterating over tx_ring queue [ Upstream commit 691bd4d7761992914a0e83c27a4ce57d01474cda ] Use num_tx_queues rather than the IGC_MAX_TX_QUEUES fixed number 4 when iterating over tx_ring queue since instantiated queue count could be less than 4 where on-line cpu count is less than 4. Fixes: ec50a9d437f0 ("igc: Add support for taprio offloading") Signed-off-by: Toshiki Nishioka Signed-off-by: Muhammad Husaini Zulkifli Tested-by: Muhammad Husaini Zulkifli Acked-by: Sasha Neftin Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/igc/igc_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 66f181d12578..013dd2955381 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -4761,7 +4761,7 @@ static bool validate_schedule(struct igc_adapter *adapter, if (e->command != TC_TAPRIO_CMD_SET_GATES) return false; - for (i = 0; i < IGC_MAX_TX_QUEUES; i++) { + for (i = 0; i < adapter->num_tx_queues; i++) { if (e->gate_mask & BIT(i)) queue_uses[i]++; @@ -4818,7 +4818,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, end_time += e->interval; - for (i = 0; i < IGC_MAX_TX_QUEUES; i++) { + for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; if (!(e->gate_mask & BIT(i))) From 87285ac51ecf4c3073d8a9d953422a4d776599fe Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 4 Jul 2021 10:11:41 +0300 Subject: [PATCH 030/105] e1000e: Fix the max snoop/no-snoop latency for 10M [ Upstream commit 44a13a5d99c71bf9e1676d9e51679daf4d7b3d73 ] We should decode the latency and the max_latency before directly compare. The latency should be presented as lat_enc = scale x value: lat_enc_d = (lat_enc & 0x0x3ff) x (1U << (5*((max_ltr_enc & 0x1c00) >> 10))) Fixes: cf8fb73c23aa ("e1000e: add support for LTR on I217/I218") Suggested-by: Yee Li Signed-off-by: Sasha Neftin Tested-by: Dvora Fuxbrumer Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 14 +++++++++++++- drivers/net/ethernet/intel/e1000e/ich8lan.h | 3 +++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 6fb46682b058..5f0f1bd522f0 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1006,6 +1006,8 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) { u32 reg = link << (E1000_LTRV_REQ_SHIFT + E1000_LTRV_NOSNOOP_SHIFT) | link << E1000_LTRV_REQ_SHIFT | E1000_LTRV_SEND; + u16 max_ltr_enc_d = 0; /* maximum LTR decoded by platform */ + u16 lat_enc_d = 0; /* latency decoded */ u16 lat_enc = 0; /* latency encoded */ if (link) { @@ -1059,7 +1061,17 @@ static s32 e1000_platform_pm_pch_lpt(struct e1000_hw *hw, bool link) E1000_PCI_LTR_CAP_LPT + 2, &max_nosnoop); max_ltr_enc = max_t(u16, max_snoop, max_nosnoop); - if (lat_enc > max_ltr_enc) + lat_enc_d = (lat_enc & E1000_LTRV_VALUE_MASK) * + (1U << (E1000_LTRV_SCALE_FACTOR * + ((lat_enc & E1000_LTRV_SCALE_MASK) + >> E1000_LTRV_SCALE_SHIFT))); + + max_ltr_enc_d = (max_ltr_enc & E1000_LTRV_VALUE_MASK) * + (1U << (E1000_LTRV_SCALE_FACTOR * + ((max_ltr_enc & E1000_LTRV_SCALE_MASK) + >> E1000_LTRV_SCALE_SHIFT))); + + if (lat_enc_d > max_ltr_enc_d) lat_enc = max_ltr_enc; } diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index 1502895eb45d..e757896287eb 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -274,8 +274,11 @@ /* Latency Tolerance Reporting */ #define E1000_LTRV 0x000F8 +#define E1000_LTRV_VALUE_MASK 0x000003FF #define E1000_LTRV_SCALE_MAX 5 #define E1000_LTRV_SCALE_FACTOR 5 +#define E1000_LTRV_SCALE_SHIFT 10 +#define E1000_LTRV_SCALE_MASK 0x00001C00 #define E1000_LTRV_REQ_SHIFT 15 #define E1000_LTRV_NOSNOOP_SHIFT 16 #define E1000_LTRV_SEND (1 << 30) From 8f1e3ad9456935f538c4ba06d2c04f2581ec385f Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Sun, 18 Jul 2021 07:10:31 +0300 Subject: [PATCH 031/105] e1000e: Do not take care about recovery NVM checksum [ Upstream commit 4051f68318ca9f3d3becef3b54e70ad2c146df97 ] On new platforms, the NVM is read-only. Attempting to update the NVM is causing a lockup to occur. Do not attempt to write to the NVM on platforms where it's not supported. Emit an error message when the NVM checksum is invalid. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=213667 Fixes: fb776f5d57ee ("e1000e: Add support for Tiger Lake") Suggested-by: Dima Ruinskiy Suggested-by: Vitaly Lifshits Signed-off-by: Sasha Neftin Tested-by: Dvora Fuxbrumer Signed-off-by: Tony Nguyen Signed-off-by: Sasha Levin --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 5f0f1bd522f0..854c585de2e1 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -4134,13 +4134,17 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw) return ret_val; if (!(data & valid_csum_mask)) { - data |= valid_csum_mask; - ret_val = e1000_write_nvm(hw, word, 1, &data); - if (ret_val) - return ret_val; - ret_val = e1000e_update_nvm_checksum(hw); - if (ret_val) - return ret_val; + e_dbg("NVM Checksum Invalid\n"); + + if (hw->mac.type < e1000_pch_cnp) { + data |= valid_csum_mask; + ret_val = e1000_write_nvm(hw, word, 1, &data); + if (ret_val) + return ret_val; + ret_val = e1000e_update_nvm_checksum(hw); + if (ret_val) + return ret_val; + } } return e1000e_validate_nvm_checksum_generic(hw); From e78006b59a30d9c17cb5476acfa084f2b3545f73 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 11 Aug 2021 18:11:28 +0300 Subject: [PATCH 032/105] RDMA/efa: Free IRQ vectors on error flow [ Upstream commit dbe986bdfd6dfe6ef24b833767fff4151e024357 ] Make sure to free the IRQ vectors in case the allocation doesn't return the expected number of IRQs. Fixes: b7f5e880f377 ("RDMA/efa: Add the efa module") Link: https://lore.kernel.org/r/20210811151131.39138-2-galpress@amazon.com Reviewed-by: Firas JahJah Reviewed-by: Yossi Leybovich Signed-off-by: Gal Pressman Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/efa/efa_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/efa/efa_main.c b/drivers/infiniband/hw/efa/efa_main.c index 6faed3a81e08..ffdd18f4217f 100644 --- a/drivers/infiniband/hw/efa/efa_main.c +++ b/drivers/infiniband/hw/efa/efa_main.c @@ -377,6 +377,7 @@ static int efa_enable_msix(struct efa_dev *dev) } if (irq_num != msix_vecs) { + efa_disable_msix(dev); dev_err(&dev->pdev->dev, "Allocated %d MSI-X (out of %d requested)\n", irq_num, msix_vecs); From fb45459d9ddb1edd4a8b087bafe875707753cb10 Mon Sep 17 00:00:00 2001 From: Shreyansh Chouhan Date: Sat, 21 Aug 2021 12:44:24 +0530 Subject: [PATCH 033/105] ip_gre: add validation for csum_start [ Upstream commit 1d011c4803c72f3907eccfc1ec63caefb852fcbf ] Validate csum_start in gre_handle_offloads before we call _gre_xmit so that we do not crash later when the csum_start value is used in the lco_csum function call. This patch deals with ipv4 code. Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") Reported-by: syzbot+ff8e1b9f2f36481e2efc@syzkaller.appspotmail.com Signed-off-by: Shreyansh Chouhan Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/ip_gre.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index e70291748889..a0829495b211 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -468,6 +468,8 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, static int gre_handle_offloads(struct sk_buff *skb, bool csum) { + if (csum && skb_checksum_start(skb) < skb->data) + return -EINVAL; return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } From 850401a23a854ae5e6d7881855c6b8880e8f1563 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 21 Aug 2021 09:35:23 +0200 Subject: [PATCH 034/105] xgene-v2: Fix a resource leak in the error handling path of 'xge_probe()' [ Upstream commit 5ed74b03eb4d08f5dd281dcb5f1c9bb92b363a8d ] A successful 'xge_mdio_config()' call should be balanced by a corresponding 'xge_mdio_remove()' call in the error handling path of the probe, as already done in the remove function. Update the error handling path accordingly. Fixes: ea8ab16ab225 ("drivers: net: xgene-v2: Add MDIO support") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/apm/xgene-v2/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c index 860c18fb7aae..80399c8980bd 100644 --- a/drivers/net/ethernet/apm/xgene-v2/main.c +++ b/drivers/net/ethernet/apm/xgene-v2/main.c @@ -677,11 +677,13 @@ static int xge_probe(struct platform_device *pdev) ret = register_netdev(ndev); if (ret) { netdev_err(ndev, "Failed to register netdev\n"); - goto err; + goto err_mdio_remove; } return 0; +err_mdio_remove: + xge_mdio_remove(ndev); err: free_netdev(ndev); From 8e0881f6f57eff04351ae92a296363ebf1b5dd5c Mon Sep 17 00:00:00 2001 From: Maxim Kiselev Date: Fri, 20 Aug 2021 18:39:51 +0300 Subject: [PATCH 035/105] net: marvell: fix MVNETA_TX_IN_PRGRS bit number [ Upstream commit 359f4cdd7d78fdf8c098713b05fee950a730f131 ] According to Armada XP datasheet bit at 0 position is corresponding for TxInProg indication. Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Signed-off-by: Maxim Kiselev Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index c6b735b30515..74e266c0b8e1 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -103,7 +103,7 @@ #define MVNETA_DESC_SWAP BIT(6) #define MVNETA_TX_BRST_SZ_MASK(burst) ((burst) << 22) #define MVNETA_PORT_STATUS 0x2444 -#define MVNETA_TX_IN_PRGRS BIT(1) +#define MVNETA_TX_IN_PRGRS BIT(0) #define MVNETA_TX_FIFO_EMPTY BIT(8) #define MVNETA_RX_MIN_FRAME_SIZE 0x247c /* Only exists on Armada XP and Armada 370 */ From b493af3a66e067f93e5e03465507866ddeabff9e Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Mon, 23 Aug 2021 18:16:33 +0200 Subject: [PATCH 036/105] ucounts: Increase ucounts reference counter before the security hook [ Upstream commit bbb6d0f3e1feb43d663af089c7dedb23be6a04fb ] We need to increment the ucounts reference counter befor security_prepare_creds() because this function may fail and abort_creds() will try to decrement this reference. [ 96.465056][ T8641] FAULT_INJECTION: forcing a failure. [ 96.465056][ T8641] name fail_page_alloc, interval 1, probability 0, space 0, times 0 [ 96.478453][ T8641] CPU: 1 PID: 8641 Comm: syz-executor668 Not tainted 5.14.0-rc6-syzkaller #0 [ 96.487215][ T8641] Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 [ 96.497254][ T8641] Call Trace: [ 96.500517][ T8641] dump_stack_lvl+0x1d3/0x29f [ 96.505758][ T8641] ? show_regs_print_info+0x12/0x12 [ 96.510944][ T8641] ? log_buf_vmcoreinfo_setup+0x498/0x498 [ 96.516652][ T8641] should_fail+0x384/0x4b0 [ 96.521141][ T8641] prepare_alloc_pages+0x1d1/0x5a0 [ 96.526236][ T8641] __alloc_pages+0x14d/0x5f0 [ 96.530808][ T8641] ? __rmqueue_pcplist+0x2030/0x2030 [ 96.536073][ T8641] ? lockdep_hardirqs_on_prepare+0x3e2/0x750 [ 96.542056][ T8641] ? alloc_pages+0x3f3/0x500 [ 96.546635][ T8641] allocate_slab+0xf1/0x540 [ 96.551120][ T8641] ___slab_alloc+0x1cf/0x350 [ 96.555689][ T8641] ? kzalloc+0x1d/0x30 [ 96.559740][ T8641] __kmalloc+0x2e7/0x390 [ 96.563980][ T8641] ? kzalloc+0x1d/0x30 [ 96.568029][ T8641] kzalloc+0x1d/0x30 [ 96.571903][ T8641] security_prepare_creds+0x46/0x220 [ 96.577174][ T8641] prepare_creds+0x411/0x640 [ 96.581747][ T8641] __sys_setfsuid+0xe2/0x3a0 [ 96.586333][ T8641] do_syscall_64+0x3d/0xb0 [ 96.590739][ T8641] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 96.596611][ T8641] RIP: 0033:0x445a69 [ 96.600483][ T8641] Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 11 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 [ 96.620152][ T8641] RSP: 002b:00007f1054173318 EFLAGS: 00000246 ORIG_RAX: 000000000000007a [ 96.628543][ T8641] RAX: ffffffffffffffda RBX: 00000000004ca4c8 RCX: 0000000000445a69 [ 96.636600][ T8641] RDX: 0000000000000010 RSI: 00007f10541732f0 RDI: 0000000000000000 [ 96.644550][ T8641] RBP: 00000000004ca4c0 R08: 0000000000000001 R09: 0000000000000000 [ 96.652500][ T8641] R10: 0000000000000000 R11: 0000000000000246 R12: 00000000004ca4cc [ 96.660631][ T8641] R13: 00007fffffe0b62f R14: 00007f1054173400 R15: 0000000000022000 Fixes: 905ae01c4ae2 ("Add a reference to ucounts for each cred") Reported-by: syzbot+01985d7909f9468f013c@syzkaller.appspotmail.com Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/97433b1742c3331f02ad92de5a4f07d673c90613.1629735352.git.legion@kernel.org Signed-off-by: Eric W. Biederman Signed-off-by: Sasha Levin --- kernel/cred.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/cred.c b/kernel/cred.c index 098213d4a39c..8c0983fa794a 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -286,13 +286,13 @@ struct cred *prepare_creds(void) new->security = NULL; #endif - if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) - goto error; - new->ucounts = get_ucounts(new->ucounts); if (!new->ucounts) goto error; + if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) + goto error; + validate_creds(new); return new; @@ -753,13 +753,13 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) #ifdef CONFIG_SECURITY new->security = NULL; #endif - if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) - goto error; - new->ucounts = get_ucounts(new->ucounts); if (!new->ucounts) goto error; + if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) + goto error; + put_cred(old); validate_creds(new); return new; From f517335a61ff8037b18ba1b0a002c1f82926a934 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 25 Aug 2021 00:33:48 +0200 Subject: [PATCH 037/105] net/sched: ets: fix crash when flipping from 'strict' to 'quantum' [ Upstream commit cd9b50adc6bb9ad3f7d244590a389522215865c4 ] While running kselftests, Hangbin observed that sch_ets.sh often crashes, and splats like the following one are seen in the output of 'dmesg': BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 159f12067 P4D 159f12067 PUD 159f13067 PMD 0 Oops: 0000 [#1] SMP NOPTI CPU: 2 PID: 921 Comm: tc Not tainted 5.14.0-rc6+ #458 Hardware name: Red Hat KVM, BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/01/2014 RIP: 0010:__list_del_entry_valid+0x2d/0x50 Code: 48 8b 57 08 48 b9 00 01 00 00 00 00 ad de 48 39 c8 0f 84 ac 6e 5b 00 48 b9 22 01 00 00 00 00 ad de 48 39 ca 0f 84 cf 6e 5b 00 <48> 8b 32 48 39 fe 0f 85 af 6e 5b 00 48 8b 50 08 48 39 f2 0f 85 94 RSP: 0018:ffffb2da005c3890 EFLAGS: 00010217 RAX: 0000000000000000 RBX: ffff9073ba23f800 RCX: dead000000000122 RDX: 0000000000000000 RSI: 0000000000000008 RDI: ffff9073ba23fbc8 RBP: ffff9073ba23f890 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000001 R11: 0000000000000001 R12: dead000000000100 R13: ffff9073ba23fb00 R14: 0000000000000002 R15: 0000000000000002 FS: 00007f93e5564e40(0000) GS:ffff9073bba00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000014ad34000 CR4: 0000000000350ee0 Call Trace: ets_qdisc_reset+0x6e/0x100 [sch_ets] qdisc_reset+0x49/0x1d0 tbf_reset+0x15/0x60 [sch_tbf] qdisc_reset+0x49/0x1d0 dev_reset_queue.constprop.42+0x2f/0x90 dev_deactivate_many+0x1d3/0x3d0 dev_deactivate+0x56/0x90 qdisc_graft+0x47e/0x5a0 tc_get_qdisc+0x1db/0x3e0 rtnetlink_rcv_msg+0x164/0x4c0 netlink_rcv_skb+0x50/0x100 netlink_unicast+0x1a5/0x280 netlink_sendmsg+0x242/0x480 sock_sendmsg+0x5b/0x60 ____sys_sendmsg+0x1f2/0x260 ___sys_sendmsg+0x7c/0xc0 __sys_sendmsg+0x57/0xa0 do_syscall_64+0x3a/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f93e44b8338 Code: 89 02 48 c7 c0 ff ff ff ff eb b5 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 25 43 2c 00 8b 00 85 c0 75 17 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 41 54 41 89 d4 55 RSP: 002b:00007ffc0db737a8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 0000000061255c06 RCX: 00007f93e44b8338 RDX: 0000000000000000 RSI: 00007ffc0db73810 RDI: 0000000000000003 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 R10: 000000000000000b R11: 0000000000000246 R12: 0000000000000001 R13: 0000000000687880 R14: 0000000000000000 R15: 0000000000000000 Modules linked in: sch_ets sch_tbf dummy rfkill iTCO_wdt iTCO_vendor_support intel_rapl_msr intel_rapl_common joydev i2c_i801 pcspkr i2c_smbus lpc_ich virtio_balloon ip_tables xfs libcrc32c crct10dif_pclmul crc32_pclmul crc32c_intel ahci libahci ghash_clmulni_intel libata serio_raw virtio_blk virtio_console virtio_net net_failover failover sunrpc dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 When the change() function decreases the value of 'nstrict', we must take into account that packets might be already enqueued on a class that flips from 'strict' to 'quantum': otherwise that class will not be added to the bandwidth-sharing list. Then, a call to ets_qdisc_reset() will attempt to do list_del(&alist) with 'alist' filled with zero, hence the NULL pointer dereference. For classes flipping from 'strict' to 'quantum', initialize an empty list and eventually add it to the bandwidth-sharing list, if there are packets already enqueued. In this way, the kernel will: a) prevent crashing as described above. b) avoid retaining the backlog packets (for an arbitrarily long time) in case no packet is enqueued after a change from 'strict' to 'quantum'. Reported-by: Hangbin Liu Fixes: dcc68b4d8084 ("net: sch_ets: Add a new Qdisc") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/sched/sch_ets.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index c1e84d1eeaba..c76701ac35ab 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -660,6 +660,13 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, sch_tree_lock(sch); q->nbands = nbands; + for (i = nstrict; i < q->nstrict; i++) { + INIT_LIST_HEAD(&q->classes[i].alist); + if (q->classes[i].qdisc->q.qlen) { + list_add_tail(&q->classes[i].alist, &q->active); + q->classes[i].deficit = quanta[i]; + } + } q->nstrict = nstrict; memcpy(q->prio2band, priomap, sizeof(priomap)); From dced8347a727528b388f04820f48166f1e651af6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Aug 2021 16:17:28 -0700 Subject: [PATCH 038/105] ipv6: use siphash in rt6_exception_hash() [ Upstream commit 4785305c05b25a242e5314cc821f54ade4c18810 ] A group of security researchers brought to our attention the weakness of hash function used in rt6_exception_hash() Lets use siphash instead of Jenkins Hash, to considerably reduce security risks. Following patch deals with IPv4. Fixes: 35732d01fe31 ("ipv6: introduce a hash table to store dst cache") Signed-off-by: Eric Dumazet Reported-by: Keyu Man Cc: Wei Wang Cc: Martin KaFai Lau Acked-by: Wei Wang Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/route.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 62db3c98424b..bcf4fae83a9b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -1482,17 +1483,24 @@ static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket) static u32 rt6_exception_hash(const struct in6_addr *dst, const struct in6_addr *src) { - static u32 seed __read_mostly; - u32 val; + static siphash_key_t rt6_exception_key __read_mostly; + struct { + struct in6_addr dst; + struct in6_addr src; + } __aligned(SIPHASH_ALIGNMENT) combined = { + .dst = *dst, + }; + u64 val; - net_get_random_once(&seed, sizeof(seed)); - val = jhash2((const u32 *)dst, sizeof(*dst)/sizeof(u32), seed); + net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key)); #ifdef CONFIG_IPV6_SUBTREES if (src) - val = jhash2((const u32 *)src, sizeof(*src)/sizeof(u32), val); + combined.src = *src; #endif - return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); + val = siphash(&combined, sizeof(combined), &rt6_exception_key); + + return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); } /* Helper function to find the cached rt in the hash table From beefd5f0c63a31a83bc5a99e6888af884745684b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Aug 2021 16:17:29 -0700 Subject: [PATCH 039/105] ipv4: use siphash instead of Jenkins in fnhe_hashfun() [ Upstream commit 6457378fe796815c973f631a1904e147d6ee33b1 ] A group of security researchers brought to our attention the weakness of hash function used in fnhe_hashfun(). Lets use siphash instead of Jenkins Hash, to considerably reduce security risks. Also remove the inline keyword, this really is distracting. Fixes: d546c621542d ("ipv4: harden fnhe_hashfun()") Signed-off-by: Eric Dumazet Reported-by: Keyu Man Cc: Willy Tarreau Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/route.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e15c1d8b7c8d..3d9946fd41f3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -624,14 +624,14 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) return oldest; } -static inline u32 fnhe_hashfun(__be32 daddr) +static u32 fnhe_hashfun(__be32 daddr) { - static u32 fnhe_hashrnd __read_mostly; - u32 hval; + static siphash_key_t fnhe_hash_key __read_mostly; + u64 hval; - net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd)); - hval = jhash_1word((__force u32)daddr, fnhe_hashrnd); - return hash_32(hval, FNHE_HASH_SHIFT); + net_get_random_once(&fnhe_hash_key, sizeof(fnhe_hash_key)); + hval = siphash_1u32((__force u32)daddr, &fnhe_hash_key); + return hash_64(hval, FNHE_HASH_SHIFT); } static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) From 51bc5c66606d5e155638a95266b674992e088e6e Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Thu, 26 Aug 2021 02:59:42 +0530 Subject: [PATCH 040/105] cxgb4: dont touch blocked freelist bitmap after free [ Upstream commit 43fed4d48d325e0a61dc2638a84da972fbb1087b ] When adapter init fails, the blocked freelist bitmap is already freed up and should not be touched. So, move the bitmap zeroing closer to where it was successfully allocated. Also handle adapter init failure unwind path immediately and avoid setting up RDMA memory windows. Fixes: 5b377d114f2b ("cxgb4: Add debugfs facility to inject FL starvation") Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 6698afad4379..3c28a1c3c1ed 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -5072,6 +5072,7 @@ static int adap_init0(struct adapter *adap, int vpd_skip) ret = -ENOMEM; goto bye; } + bitmap_zero(adap->sge.blocked_fl, adap->sge.egr_sz); #endif params[0] = FW_PARAM_PFVF(CLIP_START); @@ -6792,13 +6793,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) setup_memwin(adapter); err = adap_init0(adapter, 0); -#ifdef CONFIG_DEBUG_FS - bitmap_zero(adapter->sge.blocked_fl, adapter->sge.egr_sz); -#endif - setup_memwin_rdma(adapter); if (err) goto out_unmap_bar; + setup_memwin_rdma(adapter); + /* configure SGE_STAT_CFG_A to read WC stats */ if (!is_t4(adapter->params.chip)) t4_write_reg(adapter, SGE_STAT_CFG_A, STATSOURCE_T5_V(7) | From ad0db838557587eac9a7c396b8ae9fe12a700c20 Mon Sep 17 00:00:00 2001 From: Andrey Ignatov Date: Wed, 25 Aug 2021 17:25:40 -0700 Subject: [PATCH 041/105] rtnetlink: Return correct error on changing device netns [ Upstream commit 96a6b93b69880b2c978e1b2be9cae6970b605008 ] Currently when device is moved between network namespaces using RTM_NEWLINK message type and one of netns attributes (FLA_NET_NS_PID, IFLA_NET_NS_FD, IFLA_TARGET_NETNSID) but w/o specifying IFLA_IFNAME, and target namespace already has device with same name, userspace will get EINVAL what is confusing and makes debugging harder. Fix it so that userspace gets more appropriate EEXIST instead what makes debugging much easier. Before: # ./ifname.sh + ip netns add ns0 + ip netns exec ns0 ip link add l0 type dummy + ip netns exec ns0 ip link show l0 8: l0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 66:90:b5:d5:78:69 brd ff:ff:ff:ff:ff:ff + ip link add l0 type dummy + ip link show l0 10: l0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 6e:c6:1f:15:20:8d brd ff:ff:ff:ff:ff:ff + ip link set l0 netns ns0 RTNETLINK answers: Invalid argument After: # ./ifname.sh + ip netns add ns0 + ip netns exec ns0 ip link add l0 type dummy + ip netns exec ns0 ip link show l0 8: l0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether 1e:4a:72:e3:e3:8f brd ff:ff:ff:ff:ff:ff + ip link add l0 type dummy + ip link show l0 10: l0: mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000 link/ether f2:fc:fe:2b:7d:a6 brd ff:ff:ff:ff:ff:ff + ip link set l0 netns ns0 RTNETLINK answers: File exists The problem is that do_setlink() passes its `char *ifname` argument, that it gets from a caller, to __dev_change_net_namespace() as is (as `const char *pat`), but semantics of ifname and pat can be different. For example, __rtnl_newlink() does this: net/core/rtnetlink.c 3270 char ifname[IFNAMSIZ]; ... 3286 if (tb[IFLA_IFNAME]) 3287 nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); 3288 else 3289 ifname[0] = '\0'; ... 3364 if (dev) { ... 3394 return do_setlink(skb, dev, ifm, extack, tb, ifname, status); 3395 } , i.e. do_setlink() gets ifname pointer that is always valid no matter if user specified IFLA_IFNAME or not and then do_setlink() passes this ifname pointer as is to __dev_change_net_namespace() as pat argument. But the pat (pattern) in __dev_change_net_namespace() is used as: net/core/dev.c 11198 err = -EEXIST; 11199 if (__dev_get_by_name(net, dev->name)) { 11200 /* We get here if we can't use the current device name */ 11201 if (!pat) 11202 goto out; 11203 err = dev_get_valid_name(net, dev, pat); 11204 if (err < 0) 11205 goto out; 11206 } As the result the `goto out` path on line 11202 is neven taken and instead of returning EEXIST defined on line 11198, __dev_change_net_namespace() returns an error from dev_get_valid_name() and this, in turn, will be EINVAL for ifname[0] = '\0' set earlier. Fixes: d8a5ec672768 ("[NET]: netlink support for moving devices between network namespaces.") Signed-off-by: Andrey Ignatov Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/core/rtnetlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dd4659246405..7266571d5c7e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2601,6 +2601,7 @@ static int do_setlink(const struct sk_buff *skb, return err; if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) { + const char *pat = ifname && ifname[0] ? ifname : NULL; struct net *net = rtnl_link_get_net_capable(skb, dev_net(dev), tb, CAP_NET_ADMIN); if (IS_ERR(net)) { @@ -2608,7 +2609,7 @@ static int do_setlink(const struct sk_buff *skb, goto errout; } - err = dev_change_net_namespace(dev, net, ifname); + err = dev_change_net_namespace(dev, net, pat); put_net(net); if (err) goto errout; From 9820af16a879f799d9e15ff556699fd0fc1ceabb Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Thu, 26 Aug 2021 19:21:55 +0800 Subject: [PATCH 042/105] net: hns3: clear hardware resource when loading driver [ Upstream commit 1a6d281946c330cee2855f6d0cd796616e54601f ] If a PF is bonded to a virtual machine and the virtual machine exits unexpectedly, some hardware resource cannot be cleared. In this case, loading driver may cause exceptions. Therefore, the hardware resource needs to be cleared when the driver is loaded. Fixes: 46a3df9f9718 ("net: hns3: Add HNS3 Acceleration Engine & Compatibility Layer Support") Signed-off-by: Yufeng Mo Signed-off-by: Salil Mehta Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../hisilicon/hns3/hns3pf/hclge_cmd.h | 3 +++ .../hisilicon/hns3/hns3pf/hclge_main.c | 26 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index 36690fc5c1af..b38b48b9f0b1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -262,6 +262,9 @@ enum hclge_opcode_type { /* Led command */ HCLGE_OPC_LED_STATUS_CFG = 0xB000, + /* clear hardware resource command */ + HCLGE_OPC_CLEAR_HW_RESOURCE = 0x700B, + /* NCL config command */ HCLGE_OPC_QUERY_NCL_CONFIG = 0x7011, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 98190aa90781..c48c845472ca 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -10030,6 +10030,28 @@ static void hclge_clear_resetting_state(struct hclge_dev *hdev) } } +static int hclge_clear_hw_resource(struct hclge_dev *hdev) +{ + struct hclge_desc desc; + int ret; + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CLEAR_HW_RESOURCE, false); + + ret = hclge_cmd_send(&hdev->hw, &desc, 1); + /* This new command is only supported by new firmware, it will + * fail with older firmware. Error value -EOPNOSUPP can only be + * returned by older firmware running this command, to keep code + * backward compatible we will override this value and return + * success. + */ + if (ret && ret != -EOPNOTSUPP) { + dev_err(&hdev->pdev->dev, + "failed to clear hw resource, ret = %d\n", ret); + return ret; + } + return 0; +} + static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) { struct pci_dev *pdev = ae_dev->pdev; @@ -10067,6 +10089,10 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) if (ret) goto err_cmd_uninit; + ret = hclge_clear_hw_resource(hdev); + if (ret) + goto err_cmd_uninit; + ret = hclge_get_cap(hdev); if (ret) goto err_cmd_uninit; From 5931ec35e992d8343ae6fdbce2206196289b0926 Mon Sep 17 00:00:00 2001 From: Yufeng Mo Date: Thu, 26 Aug 2021 19:21:56 +0800 Subject: [PATCH 043/105] net: hns3: add waiting time before cmdq memory is released [ Upstream commit a96d9330b02a3d051ae689bc2c5e7d3a2ba25594 ] After the cmdq registers are cleared, the firmware may take time to clear out possible left over commands in the cmdq. Driver must release cmdq memory only after firmware has completed processing of left over commands. Fixes: 232d0d55fca6 ("net: hns3: uninitialize command queue while unloading PF driver") Signed-off-by: Yufeng Mo Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 6 +++++- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 1 + drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c | 7 ++++++- drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h | 1 + 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c index e6321dda0f3f..6f9f759ce0c0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c @@ -521,9 +521,13 @@ static void hclge_cmd_uninit_regs(struct hclge_hw *hw) void hclge_cmd_uninit(struct hclge_dev *hdev) { + set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); + /* wait to ensure that the firmware completes the possible left + * over commands. + */ + msleep(HCLGE_CMDQ_CLEAR_WAIT_TIME); spin_lock_bh(&hdev->hw.cmq.csq.lock); spin_lock(&hdev->hw.cmq.crq.lock); - set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); hclge_cmd_uninit_regs(&hdev->hw); spin_unlock(&hdev->hw.cmq.crq.lock); spin_unlock_bh(&hdev->hw.cmq.csq.lock); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index b38b48b9f0b1..3d70c3a47d63 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -9,6 +9,7 @@ #include "hnae3.h" #define HCLGE_CMDQ_TX_TIMEOUT 30000 +#define HCLGE_CMDQ_CLEAR_WAIT_TIME 200 #define HCLGE_DESC_DATA_LEN 6 struct hclge_dev; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c index 66866c1cfb12..cae6db17cb19 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c @@ -472,12 +472,17 @@ static void hclgevf_cmd_uninit_regs(struct hclgevf_hw *hw) void hclgevf_cmd_uninit(struct hclgevf_dev *hdev) { + set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state); + /* wait to ensure that the firmware completes the possible left + * over commands. + */ + msleep(HCLGEVF_CMDQ_CLEAR_WAIT_TIME); spin_lock_bh(&hdev->hw.cmq.csq.lock); spin_lock(&hdev->hw.cmq.crq.lock); - set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state); hclgevf_cmd_uninit_regs(&hdev->hw); spin_unlock(&hdev->hw.cmq.crq.lock); spin_unlock_bh(&hdev->hw.cmq.csq.lock); + hclgevf_free_cmd_desc(&hdev->hw.cmq.csq); hclgevf_free_cmd_desc(&hdev->hw.cmq.crq); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h index 9460c128c095..f90ff8a84b7e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h @@ -8,6 +8,7 @@ #include "hnae3.h" #define HCLGEVF_CMDQ_TX_TIMEOUT 30000 +#define HCLGEVF_CMDQ_CLEAR_WAIT_TIME 200 #define HCLGEVF_CMDQ_RX_INVLD_B 0 #define HCLGEVF_CMDQ_RX_OUTVLD_B 1 From e834ca7c7924bf1ae19228035574dae29ad61e38 Mon Sep 17 00:00:00 2001 From: Guojia Liao Date: Thu, 26 Aug 2021 19:21:58 +0800 Subject: [PATCH 044/105] net: hns3: fix duplicate node in VLAN list [ Upstream commit 94391fae82f71c98ecc7716a32611fcca73c74eb ] VLAN list should not be added duplicate VLAN node, otherwise it would cause "add failed" when restore VLAN from VLAN list, so this patch adds VLAN ID check before adding node into VLAN list. Fixes: c6075b193462 ("net: hns3: Record VF vlan tables") Signed-off-by: Guojia Liao Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index c48c845472ca..2261de5caf86 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -8792,7 +8792,11 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev) static void hclge_add_vport_vlan_table(struct hclge_vport *vport, u16 vlan_id, bool writen_to_tbl) { - struct hclge_vport_vlan_cfg *vlan; + struct hclge_vport_vlan_cfg *vlan, *tmp; + + list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) + if (vlan->vlan_id == vlan_id) + return; vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); if (!vlan) From 411680a07cc6851f549cc43edecf4d317aa01304 Mon Sep 17 00:00:00 2001 From: Guangbin Huang Date: Thu, 26 Aug 2021 19:22:01 +0800 Subject: [PATCH 045/105] net: hns3: fix get wrong pfc_en when query PFC configuration [ Upstream commit 8c1671e0d13d4a0ba4fb3a0da932bf3736d7ff73 ] Currently, when query PFC configuration by dcbtool, driver will return PFC enable status based on TC. As all priorities are mapped to TC0 by default, if TC0 is enabled, then all priorities mapped to TC0 will be shown as enabled status when query PFC setting, even though some priorities have never been set. for example: $ dcb pfc show dev eth0 pfc-cap 4 macsec-bypass off delay 0 prio-pfc 0:off 1:off 2:off 3:off 4:off 5:off 6:off 7:off $ dcb pfc set dev eth0 prio-pfc 0:on 1:on 2:on 3:on $ dcb pfc show dev eth0 pfc-cap 4 macsec-bypass off delay 0 prio-pfc 0:on 1:on 2:on 3:on 4:on 5:on 6:on 7:on To fix this problem, just returns user's PFC config parameter saved in driver. Fixes: cacde272dd00 ("net: hns3: Add hclge_dcb module for the support of DCB feature") Signed-off-by: Guangbin Huang Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- .../net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index 3606240025a8..a93c7eb4e7cb 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -283,21 +283,12 @@ static int hclge_ieee_getpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) u64 requests[HNAE3_MAX_TC], indications[HNAE3_MAX_TC]; struct hclge_vport *vport = hclge_get_vport(h); struct hclge_dev *hdev = vport->back; - u8 i, j, pfc_map, *prio_tc; int ret; + u8 i; memset(pfc, 0, sizeof(*pfc)); pfc->pfc_cap = hdev->pfc_max; - prio_tc = hdev->tm_info.prio_tc; - pfc_map = hdev->tm_info.hw_pfc_map; - - /* Pfc setting is based on TC */ - for (i = 0; i < hdev->tm_info.num_tc; i++) { - for (j = 0; j < HNAE3_MAX_USER_PRIO; j++) { - if ((prio_tc[j] == i) && (pfc_map & BIT(i))) - pfc->pfc_en |= BIT(j); - } - } + pfc->pfc_en = hdev->tm_info.pfc_en; ret = hclge_pfc_tx_stats_get(hdev, requests); if (ret) From ac874290e75cd5250bcb80632a32a6c64c6ac04a Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 27 Aug 2021 16:30:36 +0200 Subject: [PATCH 046/105] Revert "mmc: sdhci-iproc: Set SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN on BCM2711" [ Upstream commit 885814a97f5a1a2daf66bde5f2076f0bf632c174 ] This reverts commit 419dd626e357e89fc9c4e3863592c8b38cfe1571. It turned out that the change from the reverted commit breaks the ACPI based rpi's because it causes the 100Mhz max clock to be overridden to the return from sdhci_iproc_get_max_clock(), which is 0 because there isn't a OF/DT based clock device. Reported-by: Jeremy Linton Fixes: 419dd626e357 ("mmc: sdhci-iproc: Set SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN on BCM2711") Acked-by: Stefan Wahren Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/sdhci-iproc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c index 9f0eef97ebdd..b9eb2ec61a83 100644 --- a/drivers/mmc/host/sdhci-iproc.c +++ b/drivers/mmc/host/sdhci-iproc.c @@ -295,8 +295,7 @@ static const struct sdhci_ops sdhci_iproc_bcm2711_ops = { }; static const struct sdhci_pltfm_data sdhci_bcm2711_pltfm_data = { - .quirks = SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 | - SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN, + .quirks = SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12, .ops = &sdhci_iproc_bcm2711_ops, }; From b2091d47a14e8e6b3f03d792c1b25255d60b3219 Mon Sep 17 00:00:00 2001 From: Xiaoliang Yang Date: Mon, 5 Jul 2021 18:26:54 +0800 Subject: [PATCH 047/105] net: stmmac: add mutex lock to protect est parameters [ Upstream commit b2aae654a4794ef898ad33a179f341eb610f6b85 ] Add a mutex lock to protect est structure parameters so that the EST parameters can be updated by other threads. Signed-off-by: Xiaoliang Yang Signed-off-by: David S. Miller Signed-off-by: Pavel Machek (CIP) Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 14 +++++++++++++- include/linux/stmmac.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 40dc14d1415f..6bd9f0c50ffe 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -689,14 +689,18 @@ static int tc_setup_taprio(struct stmmac_priv *priv, GFP_KERNEL); if (!plat->est) return -ENOMEM; + + mutex_init(&priv->plat->est->lock); } else { memset(plat->est, 0, sizeof(*plat->est)); } size = qopt->num_entries; + mutex_lock(&priv->plat->est->lock); priv->plat->est->gcl_size = size; priv->plat->est->enable = qopt->enable; + mutex_unlock(&priv->plat->est->lock); for (i = 0; i < size; i++) { s64 delta_ns = qopt->entries[i].interval; @@ -727,6 +731,7 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->est->gcl[i] = delta_ns | (gates << wid); } + mutex_lock(&priv->plat->est->lock); /* Adjust for real system time */ priv->ptp_clock_ops.gettime64(&priv->ptp_clock_ops, ¤t_time); current_time_ns = timespec64_to_ktime(current_time); @@ -751,19 +756,23 @@ static int tc_setup_taprio(struct stmmac_priv *priv, priv->plat->est->ctr[0] = do_div(ctr, NSEC_PER_SEC); priv->plat->est->ctr[1] = (u32)ctr; - if (fpe && !priv->dma_cap.fpesel) + if (fpe && !priv->dma_cap.fpesel) { + mutex_unlock(&priv->plat->est->lock); return -EOPNOTSUPP; + } ret = stmmac_fpe_configure(priv, priv->ioaddr, priv->plat->tx_queues_to_use, priv->plat->rx_queues_to_use, fpe); if (ret && fpe) { + mutex_unlock(&priv->plat->est->lock); netdev_err(priv->dev, "failed to enable Frame Preemption\n"); return ret; } ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, priv->plat->clk_ptp_rate); + mutex_unlock(&priv->plat->est->lock); if (ret) { netdev_err(priv->dev, "failed to configure EST\n"); goto disable; @@ -773,9 +782,12 @@ static int tc_setup_taprio(struct stmmac_priv *priv, return 0; disable: + mutex_lock(&priv->plat->est->lock); priv->plat->est->enable = false; stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, priv->plat->clk_ptp_rate); + mutex_unlock(&priv->plat->est->lock); + return ret; } diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 15ca6b4167cc..b56e1dedcf2f 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -112,6 +112,7 @@ struct stmmac_axi { #define EST_GCL 1024 struct stmmac_est { + struct mutex lock; int enable; u32 btr_offset[2]; u32 btr[2]; From e49b8d9c5e886ba50769d5eeb46dc1f76f01f5e1 Mon Sep 17 00:00:00 2001 From: Wong Vee Khee Date: Fri, 20 Aug 2021 21:26:22 +0800 Subject: [PATCH 048/105] net: stmmac: fix kernel panic due to NULL pointer dereference of plat->est [ Upstream commit 82a44ae113b7b35850f4542f0443fcab221e376a ] In the case of taprio offload is not enabled, the error handling path causes a kernel crash due to kernel NULL pointer deference. Fix this by adding check for NULL before attempt to access 'plat->est' on the mutex_lock() call. The following kernel panic is observed without this patch: RIP: 0010:mutex_lock+0x10/0x20 Call Trace: tc_setup_taprio+0x482/0x560 [stmmac] kmem_cache_alloc_trace+0x13f/0x490 taprio_disable_offload.isra.0+0x9d/0x180 [sch_taprio] taprio_destroy+0x6c/0x100 [sch_taprio] qdisc_create+0x2e5/0x4f0 tc_modify_qdisc+0x126/0x740 rtnetlink_rcv_msg+0x12b/0x380 _raw_spin_lock_irqsave+0x19/0x40 _raw_spin_unlock_irqrestore+0x18/0x30 create_object+0x212/0x340 rtnl_calcit.isra.0+0x110/0x110 netlink_rcv_skb+0x50/0x100 netlink_unicast+0x191/0x230 netlink_sendmsg+0x243/0x470 sock_sendmsg+0x5e/0x60 ____sys_sendmsg+0x20b/0x280 copy_msghdr_from_user+0x5c/0x90 __mod_memcg_state+0x87/0xf0 ___sys_sendmsg+0x7c/0xc0 lru_cache_add+0x7f/0xa0 _raw_spin_unlock+0x16/0x30 wp_page_copy+0x449/0x890 handle_mm_fault+0x921/0xfc0 __sys_sendmsg+0x59/0xa0 do_syscall_64+0x33/0x40 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ---[ end trace b1f19b24368a96aa ]--- Fixes: b60189e0392f ("net: stmmac: Integrate EST with TAPRIO scheduler API") Cc: # 5.10.x Signed-off-by: Wong Vee Khee Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 6bd9f0c50ffe..639980306115 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -782,11 +782,13 @@ static int tc_setup_taprio(struct stmmac_priv *priv, return 0; disable: - mutex_lock(&priv->plat->est->lock); - priv->plat->est->enable = false; - stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, - priv->plat->clk_ptp_rate); - mutex_unlock(&priv->plat->est->lock); + if (priv->plat->est) { + mutex_lock(&priv->plat->est->lock); + priv->plat->est->enable = false; + stmmac_est_configure(priv, priv->ioaddr, priv->plat->est, + priv->plat->clk_ptp_rate); + mutex_unlock(&priv->plat->est->lock); + } return ret; } From 257ea8a5edc04d5199db83137fbaa24e1de98e9e Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 30 Jul 2021 12:53:42 -0700 Subject: [PATCH 049/105] drm/i915: Fix syncmap memory leak [ Upstream commit a63bcf08f0efb5348105bb8e0e1e8c6671077753 ] A small race exists between intel_gt_retire_requests_timeout and intel_timeline_exit which could result in the syncmap not getting free'd. Rather than work to hard to seal this race, simply cleanup the syncmap on fini. unreferenced object 0xffff88813bc53b18 (size 96): comm "gem_close_race", pid 5410, jiffies 4294917818 (age 1105.600s) hex dump (first 32 bytes): 01 00 00 00 00 00 00 00 00 00 00 00 0a 00 00 00 ................ 00 00 00 00 00 00 00 00 6b 6b 6b 6b 06 00 00 00 ........kkkk.... backtrace: [<00000000120b863a>] __sync_alloc_leaf+0x1e/0x40 [i915] [<00000000042f6959>] __sync_set+0x1bb/0x240 [i915] [<0000000090f0e90f>] i915_request_await_dma_fence+0x1c7/0x400 [i915] [<0000000056a48219>] i915_request_await_object+0x222/0x360 [i915] [<00000000aaac4ee3>] i915_gem_do_execbuffer+0x1bd0/0x2250 [i915] [<000000003c9d830f>] i915_gem_execbuffer2_ioctl+0x405/0xce0 [i915] [<00000000fd7a8e68>] drm_ioctl_kernel+0xb0/0xf0 [drm] [<00000000e721ee87>] drm_ioctl+0x305/0x3c0 [drm] [<000000008b0d8986>] __x64_sys_ioctl+0x71/0xb0 [<0000000076c362a4>] do_syscall_64+0x33/0x80 [<00000000eb7a4831>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Signed-off-by: Matthew Brost Fixes: 531958f6f357 ("drm/i915/gt: Track timeline activeness in enter/exit") Cc: Reviewed-by: John Harrison Signed-off-by: John Harrison Link: https://patchwork.freedesktop.org/patch/msgid/20210730195342.110234-1-matthew.brost@intel.com (cherry picked from commit faf890985e30d5e88cc3a7c50c1bcad32f89ab7c) Signed-off-by: Rodrigo Vivi Signed-off-by: Sasha Levin --- drivers/gpu/drm/i915/gt/intel_timeline.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 8015964043eb..e25385ad2c1e 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -296,6 +296,14 @@ static void intel_timeline_fini(struct intel_timeline *timeline) i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); i915_vma_put(timeline->hwsp_ggtt); + + /* + * A small race exists between intel_gt_retire_requests_timeout and + * intel_timeline_exit which could result in the syncmap not getting + * free'd. Rather than work to hard to seal this race, simply cleanup + * the syncmap on fini. + */ + i915_syncmap_free(&timeline->sync); } struct intel_timeline * From ad5329a5332771b03acd469a12e702b1cb9a53dd Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Fri, 27 Aug 2021 11:29:27 +0200 Subject: [PATCH 050/105] usb: gadget: u_audio: fix race condition on endpoint stop [ Upstream commit 068fdad20454f815e61e6f6eb9f051a8b3120e88 ] If the endpoint completion callback is call right after the ep_enabled flag is cleared and before usb_ep_dequeue() is call, we could do a double free on the request and the associated buffer. Fix this by clearing ep_enabled after all the endpoint requests have been dequeued. Fixes: 7de8681be2cd ("usb: gadget: u_audio: Free requests only after callback") Cc: stable Reported-by: Thinh Nguyen Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20210827092927.366482-1-jbrunet@baylibre.com Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/usb/gadget/function/u_audio.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index 908e49dafd62..95605b1ef4eb 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -334,8 +334,6 @@ static inline void free_ep(struct uac_rtd_params *prm, struct usb_ep *ep) if (!prm->ep_enabled) return; - prm->ep_enabled = false; - audio_dev = uac->audio_dev; params = &audio_dev->params; @@ -353,11 +351,12 @@ static inline void free_ep(struct uac_rtd_params *prm, struct usb_ep *ep) } } + prm->ep_enabled = false; + if (usb_ep_disable(ep)) dev_err(uac->card->dev, "%s:%d Error!\n", __func__, __LINE__); } - int u_audio_start_capture(struct g_audio *audio_dev) { struct snd_uac_chip *uac = audio_dev->uac; From c5600b914690d440bb9593d41cd57d98a85cac21 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 17 Aug 2021 12:47:55 -0500 Subject: [PATCH 051/105] dt-bindings: sifive-l2-cache: Fix 'select' matching [ Upstream commit 1c8094e394bceb4f1880f9d539bdd255c130826e ] When the schema fixups are applied to 'select' the result is a single entry is required for a match, but that will never match as there should be 2 entries. Also, a 'select' schema should have the widest possible match, so use 'contains' which matches the compatible string(s) in any position and not just the first position. Fixes: 993dcfac64eb ("dt-bindings: riscv: sifive-l2-cache: convert bindings to json-schema") Signed-off-by: Rob Herring Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- .../devicetree/bindings/riscv/sifive-l2-cache.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml index efc0198eeb74..5444be7667b6 100644 --- a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml +++ b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml @@ -24,9 +24,9 @@ allOf: select: properties: compatible: - items: - - enum: - - sifive,fu540-c000-ccache + contains: + enum: + - sifive,fu540-c000-ccache required: - compatible From bdc5049c3698258ae9217532508374802e8a8847 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 6 Jul 2021 12:45:53 +0100 Subject: [PATCH 052/105] perf/x86/intel/uncore: Fix integer overflow on 23 bit left shift of a u32 [ Upstream commit 0b3a8738b76fe2087f7bc2bd59f4c78504c79180 ] The u32 variable pci_dword is being masked with 0x1fffffff and then left shifted 23 places. The shift is a u32 operation,so a value of 0x200 or more in pci_dword will overflow the u32 and only the bottow 32 bits are assigned to addr. I don't believe this was the original intent. Fix this by casting pci_dword to a resource_size_t to ensure no overflow occurs. Note that the mask and 12 bit left shift operation does not need this because the mask SNR_IMC_MMIO_MEM0_MASK and shift is always a 32 bit value. Fixes: ee49532b38dd ("perf/x86/intel/uncore: Add IMC uncore support for Snow Ridge") Addresses-Coverity: ("Unintentional integer overflow") Signed-off-by: Colin Ian King Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Kan Liang Link: https://lore.kernel.org/r/20210706114553.28249-1-colin.king@canonical.com Signed-off-by: Sasha Levin --- arch/x86/events/intel/uncore_snbep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 9c936d06fb61..2701f87a9a7c 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -4669,7 +4669,7 @@ static void __snr_uncore_mmio_init_box(struct intel_uncore_box *box, return; pci_read_config_dword(pdev, SNR_IMC_MMIO_BASE_OFFSET, &pci_dword); - addr = (pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; + addr = ((resource_size_t)pci_dword & SNR_IMC_MMIO_BASE_MASK) << 23; pci_read_config_dword(pdev, mem_offset, &pci_dword); addr |= (pci_dword & SNR_IMC_MMIO_MEM0_MASK) << 12; From 9a6a5602c2176b5ff68c9075f2c7b2aabe0d59b8 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Thu, 26 Aug 2021 09:17:21 -0500 Subject: [PATCH 053/105] clk: renesas: rcar-usb2-clock-sel: Fix kernel NULL pointer dereference [ Upstream commit 1669a941f7c4844ae808cf441db51dde9e94db07 ] The probe was manually passing NULL instead of dev to devm_clk_hw_register. This caused a Unable to handle kernel NULL pointer dereference error. Fix this by passing 'dev'. Signed-off-by: Adam Ford Fixes: a20a40a8bbc2 ("clk: renesas: rcar-usb2-clock-sel: Fix error handling in .probe()") Reviewed-by: Geert Uytterhoeven Signed-off-by: Stephen Boyd Signed-off-by: Sasha Levin --- drivers/clk/renesas/rcar-usb2-clock-sel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/renesas/rcar-usb2-clock-sel.c b/drivers/clk/renesas/rcar-usb2-clock-sel.c index 0ccc6e709a38..7a64dcb7209e 100644 --- a/drivers/clk/renesas/rcar-usb2-clock-sel.c +++ b/drivers/clk/renesas/rcar-usb2-clock-sel.c @@ -190,7 +190,7 @@ static int rcar_usb2_clock_sel_probe(struct platform_device *pdev) init.num_parents = 0; priv->hw.init = &init; - ret = devm_clk_hw_register(NULL, &priv->hw); + ret = devm_clk_hw_register(dev, &priv->hw); if (ret) goto pm_put; From be37f7dbcd2c706c30b6ff10332fb7fd37211fbe Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 19 Jul 2021 14:01:56 +0200 Subject: [PATCH 054/105] iwlwifi: pnvm: accept multiple HW-type TLVs [ Upstream commit 0f673c16c850250db386537a422c11d248fb123c ] Some products (So) may have two different types of products with different mac-type that are otherwise equivalent, and have the same PNVM data, so the PNVM file will contain two (or perhaps later more) HW-type TLVs. Accept the file and use the data section that contains any matching entry. Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo Link: https://lore.kernel.org/r/20210719140154.a6a86e903035.Ic0b1b75c45d386698859f251518e8a5144431938@changeid Signed-off-by: Sasha Levin --- drivers/net/wireless/intel/iwlwifi/fw/pnvm.c | 25 +++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c index 37ce4fe136c5..cdea741be6f6 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c @@ -38,6 +38,7 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data, u32 sha1 = 0; u16 mac_type = 0, rf_id = 0; u8 *pnvm_data = NULL, *tmp; + bool hw_match = false; u32 size = 0; int ret; @@ -84,6 +85,9 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data, break; } + if (hw_match) + break; + mac_type = le16_to_cpup((__le16 *)data); rf_id = le16_to_cpup((__le16 *)(data + sizeof(__le16))); @@ -91,15 +95,9 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data, "Got IWL_UCODE_TLV_HW_TYPE mac_type 0x%0x rf_id 0x%0x\n", mac_type, rf_id); - if (mac_type != CSR_HW_REV_TYPE(trans->hw_rev) || - rf_id != CSR_HW_RFID_TYPE(trans->hw_rf_id)) { - IWL_DEBUG_FW(trans, - "HW mismatch, skipping PNVM section, mac_type 0x%0x, rf_id 0x%0x.\n", - CSR_HW_REV_TYPE(trans->hw_rev), trans->hw_rf_id); - ret = -ENOENT; - goto out; - } - + if (mac_type == CSR_HW_REV_TYPE(trans->hw_rev) && + rf_id == CSR_HW_RFID_TYPE(trans->hw_rf_id)) + hw_match = true; break; case IWL_UCODE_TLV_SEC_RT: { struct iwl_pnvm_section *section = (void *)data; @@ -150,6 +148,15 @@ static int iwl_pnvm_handle_section(struct iwl_trans *trans, const u8 *data, } done: + if (!hw_match) { + IWL_DEBUG_FW(trans, + "HW mismatch, skipping PNVM section (need mac_type 0x%x rf_id 0x%x)\n", + CSR_HW_REV_TYPE(trans->hw_rev), + CSR_HW_RFID_TYPE(trans->hw_rf_id)); + ret = -ENOENT; + goto out; + } + if (!size) { IWL_DEBUG_FW(trans, "Empty PNVM, skipping.\n"); ret = -ENOENT; From 3dea931590389de002718d388117584f65035664 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Mon, 26 Jul 2021 10:30:56 +0200 Subject: [PATCH 055/105] opp: remove WARN when no valid OPPs remain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 335ffab3ef864539e814b9a2903b0ae420c1c067 ] This WARN can be triggered per-core and the stack trace is not useful. Replace it with plain dev_err(). Fix a comment while at it. Signed-off-by: Michał Mirosław Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/opp/of.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 363277b31ecb..d92a1bfe1690 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -870,8 +870,9 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table) } } - /* There should be one of more OPP defined */ - if (WARN_ON(!count)) { + /* There should be one or more OPPs defined */ + if (!count) { + dev_err(dev, "%s: no supported OPPs", __func__); ret = -ENOENT; goto remove_static_opp; } From f41c7462d8ae8e0a8623b54280f397c0169a9008 Mon Sep 17 00:00:00 2001 From: Thara Gopinath Date: Wed, 4 Aug 2021 16:34:20 -0400 Subject: [PATCH 056/105] cpufreq: blocklist Qualcomm sm8150 in cpufreq-dt-platdev [ Upstream commit 5d79e5ce5489b489cbc4c327305be9dfca0fc9ce ] The Qualcomm sm8150 platform uses the qcom-cpufreq-hw driver, so add it to the cpufreq-dt-platdev driver's blocklist. Signed-off-by: Thara Gopinath Reviewed-by: Bjorn Andersson Signed-off-by: Viresh Kumar Signed-off-by: Sasha Levin --- drivers/cpufreq/cpufreq-dt-platdev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index 1c192a42f11e..a3734014db47 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -136,6 +136,7 @@ static const struct of_device_id blacklist[] __initconst = { { .compatible = "qcom,qcs404", }, { .compatible = "qcom,sc7180", }, { .compatible = "qcom,sdm845", }, + { .compatible = "qcom,sm8150", }, { .compatible = "st,stih407", }, { .compatible = "st,stih410", }, From 065a13c299b493ba63d526bbba4e44b2dbc2962e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 21 Jul 2021 17:26:45 +0300 Subject: [PATCH 057/105] virtio: Improve vq->broken access to avoid any compiler optimization [ Upstream commit 60f0779862e4ab943810187752c462e85f5fa371 ] Currently vq->broken field is read by virtqueue_is_broken() in busy loop in one context by virtnet_send_command(). vq->broken is set to true in other process context by virtio_break_device(). Reader and writer are accessing it without any synchronization. This may lead to a compiler optimization which may result to optimize reading vq->broken only once. Hence, force reading vq->broken on each invocation of virtqueue_is_broken() and also force writing it so that such update is visible to the readers. It is a theoretical fix that isn't yet encountered in the field. Signed-off-by: Parav Pandit Link: https://lore.kernel.org/r/20210721142648.1525924-2-parav@nvidia.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/virtio/virtio_ring.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 6b7aa26c5384..6c730d6d50f7 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -2268,7 +2268,7 @@ bool virtqueue_is_broken(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - return vq->broken; + return READ_ONCE(vq->broken); } EXPORT_SYMBOL_GPL(virtqueue_is_broken); @@ -2283,7 +2283,9 @@ void virtio_break_device(struct virtio_device *dev) spin_lock(&dev->vqs_list_lock); list_for_each_entry(_vq, &dev->vqs, list) { struct vring_virtqueue *vq = to_vvq(_vq); - vq->broken = true; + + /* Pairs with READ_ONCE() in virtqueue_is_broken(). */ + WRITE_ONCE(vq->broken, true); } spin_unlock(&dev->vqs_list_lock); } From 0698278e8eefb22660b6fa27b002b4232131c146 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Wed, 21 Jul 2021 17:26:48 +0300 Subject: [PATCH 058/105] virtio_pci: Support surprise removal of virtio pci device [ Upstream commit 43bb40c5b92659966bdf4bfe584fde0a3575a049 ] When a virtio pci device undergo surprise removal (aka async removal in PCIe spec), mark the device as broken so that any upper layer drivers can abort any outstanding operation. When a virtio net pci device undergo surprise removal which is used by a NetworkManager, a below call trace was observed. kernel:watchdog: BUG: soft lockup - CPU#1 stuck for 26s! [kworker/1:1:27059] watchdog: BUG: soft lockup - CPU#1 stuck for 52s! [kworker/1:1:27059] CPU: 1 PID: 27059 Comm: kworker/1:1 Tainted: G S W I L 5.13.0-hotplug+ #8 Hardware name: Dell Inc. PowerEdge R640/0H28RR, BIOS 2.9.4 11/06/2020 Workqueue: events linkwatch_event RIP: 0010:virtnet_send_command+0xfc/0x150 [virtio_net] Call Trace: virtnet_set_rx_mode+0xcf/0x2a7 [virtio_net] ? __hw_addr_create_ex+0x85/0xc0 __dev_mc_add+0x72/0x80 igmp6_group_added+0xa7/0xd0 ipv6_mc_up+0x3c/0x60 ipv6_find_idev+0x36/0x80 addrconf_add_dev+0x1e/0xa0 addrconf_dev_config+0x71/0x130 addrconf_notify+0x1f5/0xb40 ? rtnl_is_locked+0x11/0x20 ? __switch_to_asm+0x42/0x70 ? finish_task_switch+0xaf/0x2c0 ? raw_notifier_call_chain+0x3e/0x50 raw_notifier_call_chain+0x3e/0x50 netdev_state_change+0x67/0x90 linkwatch_do_dev+0x3c/0x50 __linkwatch_run_queue+0xd2/0x220 linkwatch_event+0x21/0x30 process_one_work+0x1c8/0x370 worker_thread+0x30/0x380 ? process_one_work+0x370/0x370 kthread+0x118/0x140 ? set_kthread_struct+0x40/0x40 ret_from_fork+0x1f/0x30 Hence, add the ability to abort the command on surprise removal which prevents infinite loop and system lockup. Signed-off-by: Parav Pandit Link: https://lore.kernel.org/r/20210721142648.1525924-5-parav@nvidia.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/virtio/virtio_pci_common.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 222d630c41fc..b35bb2d57f62 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -576,6 +576,13 @@ static void virtio_pci_remove(struct pci_dev *pci_dev) struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev); struct device *dev = get_device(&vp_dev->vdev.dev); + /* + * Device is marked broken on surprise removal so that virtio upper + * layers can abort any ongoing operation. + */ + if (!pci_device_is_present(pci_dev)) + virtio_break_device(&vp_dev->vdev); + pci_disable_sriov(pci_dev); unregister_virtio_device(&vp_dev->vdev); From 6c074eaaf7855dfee8faa8a093940fff5e779ec3 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Thu, 1 Jul 2021 13:46:52 +0200 Subject: [PATCH 059/105] virtio_vdpa: reject invalid vq indices [ Upstream commit cb5d2c1f6cc0e5769099a7d44b9d08cf58cae206 ] Do not call vDPA drivers' callbacks with vq indicies larger than what the drivers indicate that they support. vDPA drivers do not bounds check the indices. Signed-off-by: Vincent Whitchurch Link: https://lore.kernel.org/r/20210701114652.21956-1-vincent.whitchurch@axis.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Reviewed-by: Stefano Garzarella Signed-off-by: Sasha Levin --- drivers/virtio/virtio_vdpa.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c index 4a9ddb44b2a7..3f95dedccceb 100644 --- a/drivers/virtio/virtio_vdpa.c +++ b/drivers/virtio/virtio_vdpa.c @@ -149,6 +149,9 @@ virtio_vdpa_setup_vq(struct virtio_device *vdev, unsigned int index, if (!name) return NULL; + if (index >= vdpa->nvqs) + return ERR_PTR(-ENOENT); + /* Queue shouldn't already be set up. */ if (ops->get_vq_ready(vdpa, index)) return ERR_PTR(-ENOENT); From c7ee4d22614e43ae1d633864b70ac075469ad27f Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Fri, 25 Jun 2021 08:55:02 +0530 Subject: [PATCH 060/105] vringh: Use wiov->used to check for read/write desc order [ Upstream commit e74cfa91f42c50f7f649b0eca46aa049754ccdbd ] As __vringh_iov() traverses a descriptor chain, it populates each descriptor entry into either read or write vring iov and increments that iov's ->used member. So, as we iterate over a descriptor chain, at any point, (riov/wriov)->used value gives the number of descriptor enteries available, which are to be read or written by the device. As all read iovs must precede the write iovs, wiov->used should be zero when we are traversing a read descriptor. Current code checks for wiov->i, to figure out whether any previous entry in the current descriptor chain was a write descriptor. However, iov->i is only incremented, when these vring iovs are consumed, at a later point, and remain 0 in __vringh_iov(). So, correct the check for read and write descriptor order, to use wiov->used. Acked-by: Jason Wang Reviewed-by: Stefano Garzarella Signed-off-by: Neeraj Upadhyay Link: https://lore.kernel.org/r/1624591502-4827-1-git-send-email-neeraju@codeaurora.org Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- drivers/vhost/vringh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index b7403ba8e7f7..0bd7e64331f0 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -341,7 +341,7 @@ __vringh_iov(struct vringh *vrh, u16 i, iov = wiov; else { iov = riov; - if (unlikely(wiov && wiov->i)) { + if (unlikely(wiov && wiov->used)) { vringh_bad("Readable desc %p after writable", &descs[i]); err = -EINVAL; From 4ac9c81e8a541dd3fb53127cb9184a0d79341e38 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 10 Aug 2021 12:26:05 -0400 Subject: [PATCH 061/105] tools/virtio: fix build [ Upstream commit a24ce06c70fe7df795a846ad713ccaa9b56a7666 ] We use a spinlock now so add a stub. Ignore bogus uninitialized variable warnings. Signed-off-by: Michael S. Tsirkin Signed-off-by: Sasha Levin --- tools/virtio/Makefile | 3 +- tools/virtio/linux/spinlock.h | 56 +++++++++++++++++++++++++++++++++++ tools/virtio/linux/virtio.h | 2 ++ 3 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 tools/virtio/linux/spinlock.h diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index b587b9a7a124..0d7bbe49359d 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -4,7 +4,8 @@ test: virtio_test vringh_test virtio_test: virtio_ring.o virtio_test.o vringh_test: vringh_test.o vringh.o virtio_ring.o -CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h +CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h +LDFLAGS += -lpthread vpath %.c ../../drivers/virtio ../../drivers/vhost mod: ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} diff --git a/tools/virtio/linux/spinlock.h b/tools/virtio/linux/spinlock.h new file mode 100644 index 000000000000..028e3cdcc5d3 --- /dev/null +++ b/tools/virtio/linux/spinlock.h @@ -0,0 +1,56 @@ +#ifndef SPINLOCK_H_STUB +#define SPINLOCK_H_STUB + +#include + +typedef pthread_spinlock_t spinlock_t; + +static inline void spin_lock_init(spinlock_t *lock) +{ + int r = pthread_spin_init(lock, 0); + assert(!r); +} + +static inline void spin_lock(spinlock_t *lock) +{ + int ret = pthread_spin_lock(lock); + assert(!ret); +} + +static inline void spin_unlock(spinlock_t *lock) +{ + int ret = pthread_spin_unlock(lock); + assert(!ret); +} + +static inline void spin_lock_bh(spinlock_t *lock) +{ + spin_lock(lock); +} + +static inline void spin_unlock_bh(spinlock_t *lock) +{ + spin_unlock(lock); +} + +static inline void spin_lock_irq(spinlock_t *lock) +{ + spin_lock(lock); +} + +static inline void spin_unlock_irq(spinlock_t *lock) +{ + spin_unlock(lock); +} + +static inline void spin_lock_irqsave(spinlock_t *lock, unsigned long f) +{ + spin_lock(lock); +} + +static inline void spin_unlock_irqrestore(spinlock_t *lock, unsigned long f) +{ + spin_unlock(lock); +} + +#endif diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index 5d90254ddae4..363b98228301 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -3,6 +3,7 @@ #define LINUX_VIRTIO_H #include #include +#include struct device { void *parent; @@ -12,6 +13,7 @@ struct virtio_device { struct device dev; u64 features; struct list_head vqs; + spinlock_t vqs_list_lock; }; struct virtqueue { From 18a65ba06903862a5c64ff66f464a57f488a7298 Mon Sep 17 00:00:00 2001 From: Shai Malin Date: Sun, 15 Aug 2021 14:05:08 +0300 Subject: [PATCH 062/105] qed: qed ll2 race condition fixes [ Upstream commit 37110237f31105d679fc0aa7b11cdec867750ea7 ] Avoiding qed ll2 race condition and NULL pointer dereference as part of the remove and recovery flows. Changes form V1: - Change (!p_rx->set_prod_addr). - qed_ll2.c checkpatch fixes. Change from V2: - Revert "qed_ll2.c checkpatch fixes". Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 49783f365079..f2c8273dce67 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -327,6 +327,9 @@ static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) unsigned long flags; int rc = -EINVAL; + if (!p_ll2_conn) + return rc; + spin_lock_irqsave(&p_tx->lock, flags); if (p_tx->b_completing_packet) { rc = -EBUSY; @@ -500,7 +503,16 @@ static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) unsigned long flags = 0; int rc = 0; + if (!p_ll2_conn) + return rc; + spin_lock_irqsave(&p_rx->lock, flags); + + if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) { + spin_unlock_irqrestore(&p_rx->lock, flags); + return 0; + } + cq_new_idx = le16_to_cpu(*p_rx->p_fw_cons); cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain); @@ -821,6 +833,9 @@ static int qed_ll2_lb_rxq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie; int rc; + if (!p_ll2_conn) + return 0; + if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) return 0; @@ -844,6 +859,9 @@ static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) u16 new_idx = 0, num_bds = 0; int rc; + if (!p_ll2_conn) + return 0; + if (!QED_LL2_TX_REGISTERED(p_ll2_conn)) return 0; @@ -1725,6 +1743,8 @@ int qed_ll2_post_rx_buffer(void *cxt, if (!p_ll2_conn) return -EINVAL; p_rx = &p_ll2_conn->rx_queue; + if (!p_rx->set_prod_addr) + return -EIO; spin_lock_irqsave(&p_rx->lock, flags); if (!list_empty(&p_rx->free_descq)) From cc126b400b25f7fa0d86d02b9cdbaa45759566aa Mon Sep 17 00:00:00 2001 From: Shai Malin Date: Sun, 15 Aug 2021 14:06:39 +0300 Subject: [PATCH 063/105] qed: Fix null-pointer dereference in qed_rdma_create_qp() [ Upstream commit d33d19d313d3466abdf8b0428be7837aff767802 ] Fix a possible null-pointer dereference in qed_rdma_create_qp(). Changes from V2: - Revert checkpatch fixes. Reported-by: TOTE Robot Signed-off-by: Ariel Elior Signed-off-by: Shai Malin Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/ethernet/qlogic/qed/qed_rdma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c index da864d12916b..4f4b79250a2b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c +++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c @@ -1285,8 +1285,7 @@ qed_rdma_create_qp(void *rdma_cxt, if (!rdma_cxt || !in_params || !out_params || !p_hwfn->p_rdma_info->active) { - DP_ERR(p_hwfn->cdev, - "qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", + pr_err("qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", rdma_cxt, in_params, out_params); return NULL; } From 3c37ec4350220a548ffc6753646913899e86b1c7 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 13 Aug 2021 14:20:33 +0800 Subject: [PATCH 064/105] Revert "drm/amd/pm: fix workload mismatch on vega10" [ Upstream commit 2fd31689f9e44af949f60ff4f8aca013e628ab81 ] This reverts commit 0979d43259e13846d86ba17e451e17fec185d240. Revert this because it does not apply to all the cards. Signed-off-by: Kenneth Feng Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index 132c269c7c89..ed4eafc744d3 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -5159,7 +5159,7 @@ static int vega10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, ui out: smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask, - (!power_profile_mode) ? 0 : 1 << (power_profile_mode - 1), + 1 << power_profile_mode, NULL); hwmgr->power_profile_mode = power_profile_mode; From b00ca567579a4c2f9a4cd6f9a63946f793e8b506 Mon Sep 17 00:00:00 2001 From: Kenneth Feng Date: Fri, 13 Aug 2021 14:40:18 +0800 Subject: [PATCH 065/105] drm/amd/pm: change the workload type for some cards [ Upstream commit 93c5701b00d50d192ce2247cb10d6c0b3fe25cd8 ] change the workload type for some cards as it is needed. Signed-off-by: Kenneth Feng Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index ed4eafc744d3..4dc27ec4d012 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -5122,6 +5122,13 @@ static int vega10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf) return size; } +static bool vega10_get_power_profile_mode_quirks(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = hwmgr->adev; + + return (adev->pdev->device == 0x6860); +} + static int vega10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, uint32_t size) { struct vega10_hwmgr *data = hwmgr->backend; @@ -5158,9 +5165,15 @@ static int vega10_set_power_profile_mode(struct pp_hwmgr *hwmgr, long *input, ui } out: - smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask, + if (vega10_get_power_profile_mode_quirks(hwmgr)) + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask, 1 << power_profile_mode, NULL); + else + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetWorkloadMask, + (!power_profile_mode) ? 0 : 1 << (power_profile_mode - 1), + NULL); + hwmgr->power_profile_mode = power_profile_mode; return 0; From 26ee94ba343c63d9d23112521c68fa72c82a8805 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 11 Aug 2021 23:52:02 +0800 Subject: [PATCH 066/105] blk-mq: don't grab rq's refcount in blk_mq_check_expired() [ Upstream commit c797b40ccc340b8a66f7a7842aecc90bf749f087 ] Inside blk_mq_queue_tag_busy_iter() we already grabbed request's refcount before calling ->fn(), so needn't to grab it one more time in blk_mq_check_expired(). Meantime remove extra request expire check in blk_mq_check_expired(). Cc: Keith Busch Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Reviewed-by: John Garry Link: https://lore.kernel.org/r/20210811155202.629575-1-ming.lei@redhat.com Signed-off-by: Jens Axboe Signed-off-by: Sasha Levin --- block/blk-mq.c | 30 +++++------------------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index a368eb6dc647..044d0e3a15ad 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -941,34 +941,14 @@ static bool blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, unsigned long *next = priv; /* - * Just do a quick check if it is expired before locking the request in - * so we're not unnecessarilly synchronizing across CPUs. - */ - if (!blk_mq_req_expired(rq, next)) - return true; - - /* - * We have reason to believe the request may be expired. Take a - * reference on the request to lock this request lifetime into its - * currently allocated context to prevent it from being reallocated in - * the event the completion by-passes this timeout handler. - * - * If the reference was already released, then the driver beat the - * timeout handler to posting a natural completion. - */ - if (!refcount_inc_not_zero(&rq->ref)) - return true; - - /* - * The request is now locked and cannot be reallocated underneath the - * timeout handler's processing. Re-verify this exact request is truly - * expired; if it is not expired, then the request was completed and - * reallocated as a new request. + * blk_mq_queue_tag_busy_iter() has locked the request, so it cannot + * be reallocated underneath the timeout handler's processing, then + * the expire check is reliable. If the request is not expired, then + * it was completed and reallocated as a new request after returning + * from blk_mq_check_expired(). */ if (blk_mq_req_expired(rq, next)) blk_mq_rq_timed_out(rq, reserved); - - blk_mq_put_rq_ref(rq); return true; } From 6fd6e20520ccd05a1ac3321404dd498cc28576cb Mon Sep 17 00:00:00 2001 From: Mark Yacoub Date: Thu, 12 Aug 2021 15:49:17 -0400 Subject: [PATCH 067/105] drm: Copy drm_wait_vblank to user before returning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit fa0b1ef5f7a694f48e00804a391245f3471aa155 ] [Why] Userspace should get back a copy of drm_wait_vblank that's been modified even when drm_wait_vblank_ioctl returns a failure. Rationale: drm_wait_vblank_ioctl modifies the request and expects the user to read it back. When the type is RELATIVE, it modifies it to ABSOLUTE and updates the sequence to become current_vblank_count + sequence (which was RELATIVE), but now it became ABSOLUTE. drmWaitVBlank (in libdrm) expects this to be the case as it modifies the request to be Absolute so it expects the sequence to would have been updated. The change is in compat_drm_wait_vblank, which is called by drm_compat_ioctl. This change of copying the data back regardless of the return number makes it en par with drm_ioctl, which always copies the data before returning. [How] Return from the function after everything has been copied to user. Fixes IGT:kms_flip::modeset-vs-vblank-race-interruptible Tested on ChromeOS Trogdor(msm) Reviewed-by: Michel Dänzer Signed-off-by: Mark Yacoub Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20210812194917.1703356-1-markyacoub@chromium.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/drm_ioc32.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index dc734d4828a1..aaf8d625ce1a 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -865,8 +865,6 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, req.request.sequence = req32.request.sequence; req.request.signal = req32.request.signal; err = drm_ioctl_kernel(file, drm_wait_vblank_ioctl, &req, DRM_UNLOCKED); - if (err) - return err; req32.reply.type = req.reply.type; req32.reply.sequence = req.reply.sequence; @@ -875,7 +873,7 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, if (copy_to_user(argp, &req32, sizeof(req32))) return -EFAULT; - return 0; + return err; } #if defined(CONFIG_X86) From 7f422cda03a62b7e9355ea8dded431c066dca3b1 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 9 Aug 2021 16:40:48 +1000 Subject: [PATCH 068/105] drm/nouveau/disp: power down unused DP links during init [ Upstream commit 6eaa1f3c59a707332e921e32782ffcad49915c5e ] When booted with multiple displays attached, the EFI GOP driver on (at least) Ampere, can leave DP links powered up that aren't being used to display anything. This confuses our tracking of SOR routing, with the likely result being a failed modeset and display engine hang. Fix this by (ab?)using the DisableLT IED script to power-down the link, restoring HW to a state the driver expects. Signed-off-by: Ben Skeggs Reviewed-by: Lyude Paul Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c | 2 +- drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h | 1 + drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c index 3800aeb507d0..2a7b8bc3ec4d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c @@ -419,7 +419,7 @@ nvkm_dp_train(struct nvkm_dp *dp, u32 dataKBps) return ret; } -static void +void nvkm_dp_disable(struct nvkm_outp *outp, struct nvkm_ior *ior) { struct nvkm_dp *dp = nvkm_dp(outp); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h index 428b3f488f03..e484d0c3b0d4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h @@ -32,6 +32,7 @@ struct nvkm_dp { int nvkm_dp_new(struct nvkm_disp *, int index, struct dcb_output *, struct nvkm_outp **); +void nvkm_dp_disable(struct nvkm_outp *, struct nvkm_ior *); /* DPCD Receiver Capabilities */ #define DPCD_RC00_DPCD_REV 0x00000 diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c index dffcac249211..129982fef7ef 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.c @@ -22,6 +22,7 @@ * Authors: Ben Skeggs */ #include "outp.h" +#include "dp.h" #include "ior.h" #include @@ -257,6 +258,14 @@ nvkm_outp_init_route(struct nvkm_outp *outp) if (!ior->arm.head || ior->arm.proto != proto) { OUTP_DBG(outp, "no heads (%x %d %d)", ior->arm.head, ior->arm.proto, proto); + + /* The EFI GOP driver on Ampere can leave unused DP links routed, + * which we don't expect. The DisableLT IED script *should* get + * us back to where we need to be. + */ + if (ior->func->route.get && !ior->arm.head && outp->info.type == DCB_OUTPUT_DP) + nvkm_dp_disable(outp, ior); + return; } From b882dda2bf7a7b4eef9568c00f03365f77e2e17f Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 10 Aug 2021 19:29:57 +1000 Subject: [PATCH 069/105] drm/nouveau/kms/nv50: workaround EFI GOP window channel format differences [ Upstream commit e78b1b545c6cfe9f87fc577128e00026fff230ba ] Should fix some initial modeset failures on (at least) Ampere boards. Signed-off-by: Ben Skeggs Reviewed-by: Lyude Paul Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/dispnv50/disp.c | 27 +++++++++++++++++++++++++ drivers/gpu/drm/nouveau/dispnv50/head.c | 13 ++++++++---- drivers/gpu/drm/nouveau/dispnv50/head.h | 1 + 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 5b8cabb099eb..c2d34c91e840 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2202,6 +2202,33 @@ nv50_disp_atomic_commit_tail(struct drm_atomic_state *state) interlock[NV50_DISP_INTERLOCK_CORE] = 0; } + /* Finish updating head(s)... + * + * NVD is rather picky about both where window assignments can change, + * *and* about certain core and window channel states matching. + * + * The EFI GOP driver on newer GPUs configures window channels with a + * different output format to what we do, and the core channel update + * in the assign_windows case above would result in a state mismatch. + * + * Delay some of the head update until after that point to workaround + * the issue. This only affects the initial modeset. + * + * TODO: handle this better when adding flexible window mapping + */ + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + struct nv50_head_atom *asyh = nv50_head_atom(new_crtc_state); + struct nv50_head *head = nv50_head(crtc); + + NV_ATOMIC(drm, "%s: set %04x (clr %04x)\n", crtc->name, + asyh->set.mask, asyh->clr.mask); + + if (asyh->set.mask) { + nv50_head_flush_set_wndw(head, asyh); + interlock[NV50_DISP_INTERLOCK_CORE] = 1; + } + } + /* Update plane(s). */ for_each_new_plane_in_state(state, plane, new_plane_state, i) { struct nv50_wndw_atom *asyw = nv50_wndw_atom(new_plane_state); diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c index 841edfaf5b9d..61826cac3061 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/head.c +++ b/drivers/gpu/drm/nouveau/dispnv50/head.c @@ -49,11 +49,8 @@ nv50_head_flush_clr(struct nv50_head *head, } void -nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh) +nv50_head_flush_set_wndw(struct nv50_head *head, struct nv50_head_atom *asyh) { - if (asyh->set.view ) head->func->view (head, asyh); - if (asyh->set.mode ) head->func->mode (head, asyh); - if (asyh->set.core ) head->func->core_set(head, asyh); if (asyh->set.olut ) { asyh->olut.offset = nv50_lut_load(&head->olut, asyh->olut.buffer, @@ -61,6 +58,14 @@ nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh) asyh->olut.load); head->func->olut_set(head, asyh); } +} + +void +nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh) +{ + if (asyh->set.view ) head->func->view (head, asyh); + if (asyh->set.mode ) head->func->mode (head, asyh); + if (asyh->set.core ) head->func->core_set(head, asyh); if (asyh->set.curs ) head->func->curs_set(head, asyh); if (asyh->set.base ) head->func->base (head, asyh); if (asyh->set.ovly ) head->func->ovly (head, asyh); diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.h b/drivers/gpu/drm/nouveau/dispnv50/head.h index dae841dc05fd..0bac6be9ba34 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/head.h +++ b/drivers/gpu/drm/nouveau/dispnv50/head.h @@ -21,6 +21,7 @@ struct nv50_head { struct nv50_head *nv50_head_create(struct drm_device *, int index); void nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh); +void nv50_head_flush_set_wndw(struct nv50_head *head, struct nv50_head_atom *asyh); void nv50_head_flush_clr(struct nv50_head *head, struct nv50_head_atom *asyh, bool flush); From 6f38d95f33be52993033a2a893fc415ff9133196 Mon Sep 17 00:00:00 2001 From: Gerd Rausch Date: Tue, 17 Aug 2021 10:04:37 -0700 Subject: [PATCH 070/105] net/rds: dma_map_sg is entitled to merge entries [ Upstream commit fb4b1373dcab086d0619c29310f0466a0b2ceb8a ] Function "dma_map_sg" is entitled to merge adjacent entries and return a value smaller than what was passed as "nents". Subsequently "ib_map_mr_sg" needs to work with this value ("sg_dma_len") rather than the original "nents" parameter ("sg_len"). This old RDS bug was exposed and reliably causes kernel panics (using RDMA operations "rds-stress -D") on x86_64 starting with: commit c588072bba6b ("iommu/vt-d: Convert intel iommu driver to the iommu ops") Simply put: Linux 5.11 and later. Signed-off-by: Gerd Rausch Acked-by: Santosh Shilimkar Link: https://lore.kernel.org/r/60efc69f-1f35-529d-a7ef-da0549cad143@oracle.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/rds/ib_frmr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c index 9b6ffff72f2d..28c1b0022178 100644 --- a/net/rds/ib_frmr.c +++ b/net/rds/ib_frmr.c @@ -131,9 +131,9 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) cpu_relax(); } - ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len, + ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len, &off, PAGE_SIZE); - if (unlikely(ret != ibmr->sg_len)) + if (unlikely(ret != ibmr->sg_dma_len)) return ret < 0 ? ret : -EINVAL; if (cmpxchg(&frmr->fr_state, From d845f89d59fc3f17ea4e86321b82d8edf6c1719f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 23 Feb 2021 12:08:48 +0000 Subject: [PATCH 071/105] btrfs: fix race between marking inode needs to be logged and log syncing commit bc0939fcfab0d7efb2ed12896b1af3d819954a14 upstream. We have a race between marking that an inode needs to be logged, either at btrfs_set_inode_last_trans() or at btrfs_page_mkwrite(), and between btrfs_sync_log(). The following steps describe how the race happens. 1) We are at transaction N; 2) Inode I was previously fsynced in the current transaction so it has: inode->logged_trans set to N; 3) The inode's root currently has: root->log_transid set to 1 root->last_log_commit set to 0 Which means only one log transaction was committed to far, log transaction 0. When a log tree is created we set ->log_transid and ->last_log_commit of its parent root to 0 (at btrfs_add_log_tree()); 4) One more range of pages is dirtied in inode I; 5) Some task A starts an fsync against some other inode J (same root), and so it joins log transaction 1. Before task A calls btrfs_sync_log()... 6) Task B starts an fsync against inode I, which currently has the full sync flag set, so it starts delalloc and waits for the ordered extent to complete before calling btrfs_inode_in_log() at btrfs_sync_file(); 7) During ordered extent completion we have btrfs_update_inode() called against inode I, which in turn calls btrfs_set_inode_last_trans(), which does the following: spin_lock(&inode->lock); inode->last_trans = trans->transaction->transid; inode->last_sub_trans = inode->root->log_transid; inode->last_log_commit = inode->root->last_log_commit; spin_unlock(&inode->lock); So ->last_trans is set to N and ->last_sub_trans set to 1. But before setting ->last_log_commit... 8) Task A is at btrfs_sync_log(): - it increments root->log_transid to 2 - starts writeback for all log tree extent buffers - waits for the writeback to complete - writes the super blocks - updates root->last_log_commit to 1 It's a lot of slow steps between updating root->log_transid and root->last_log_commit; 9) The task doing the ordered extent completion, currently at btrfs_set_inode_last_trans(), then finally runs: inode->last_log_commit = inode->root->last_log_commit; spin_unlock(&inode->lock); Which results in inode->last_log_commit being set to 1. The ordered extent completes; 10) Task B is resumed, and it calls btrfs_inode_in_log() which returns true because we have all the following conditions met: inode->logged_trans == N which matches fs_info->generation && inode->last_subtrans (1) <= inode->last_log_commit (1) && inode->last_subtrans (1) <= root->last_log_commit (1) && list inode->extent_tree.modified_extents is empty And as a consequence we return without logging the inode, so the existing logged version of the inode does not point to the extent that was written after the previous fsync. It should be impossible in practice for one task be able to do so much progress in btrfs_sync_log() while another task is at btrfs_set_inode_last_trans() right after it reads root->log_transid and before it reads root->last_log_commit. Even if kernel preemption is enabled we know the task at btrfs_set_inode_last_trans() can not be preempted because it is holding the inode's spinlock. However there is another place where we do the same without holding the spinlock, which is in the memory mapped write path at: vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf) { (...) BTRFS_I(inode)->last_trans = fs_info->generation; BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; (...) So with preemption happening after setting ->last_sub_trans and before setting ->last_log_commit, it is less of a stretch to have another task do enough progress at btrfs_sync_log() such that the task doing the memory mapped write ends up with ->last_sub_trans and ->last_log_commit set to the same value. It is still a big stretch to get there, as the task doing btrfs_sync_log() has to start writeback, wait for its completion and write the super blocks. So fix this in two different ways: 1) For btrfs_set_inode_last_trans(), simply set ->last_log_commit to the value of ->last_sub_trans minus 1; 2) For btrfs_page_mkwrite() only set the inode's ->last_sub_trans, just like we do for buffered and direct writes at btrfs_file_write_iter(), which is all we need to make sure multiple writes and fsyncs to an inode in the same transaction never result in an fsync missing that the inode changed and needs to be logged. Turn this into a helper function and use it both at btrfs_page_mkwrite() and at btrfs_file_write_iter() - this also fixes the problem that at btrfs_page_mkwrite() we were setting those fields without the protection of the inode's spinlock. This is an extremely unlikely race to happen in practice. Signed-off-by: Filipe Manana Signed-off-by: David Sterba Signed-off-by: Anand Jain Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/btrfs_inode.h | 15 +++++++++++++++ fs/btrfs/file.c | 11 ++--------- fs/btrfs/inode.c | 4 +--- fs/btrfs/transaction.h | 2 +- 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 8de4bf8edb9c..5a43f8e07122 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -308,6 +308,21 @@ static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode, mod); } +/* + * Called every time after doing a buffered, direct IO or memory mapped write. + * + * This is to ensure that if we write to a file that was previously fsynced in + * the current transaction, then try to fsync it again in the same transaction, + * we will know that there were changes in the file and that it needs to be + * logged. + */ +static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode) +{ + spin_lock(&inode->lock); + inode->last_sub_trans = inode->root->log_transid; + spin_unlock(&inode->lock); +} + static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) { int ret = 0; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ffa48ac98d1e..6ab91661cd26 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1862,7 +1862,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); - struct btrfs_root *root = BTRFS_I(inode)->root; u64 start_pos; u64 end_pos; ssize_t num_written = 0; @@ -2006,14 +2005,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, inode_unlock(inode); - /* - * We also have to set last_sub_trans to the current log transid, - * otherwise subsequent syncs to a file that's been synced in this - * transaction will appear to have already occurred. - */ - spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->last_sub_trans = root->log_transid; - spin_unlock(&BTRFS_I(inode)->lock); + btrfs_set_inode_last_sub_trans(BTRFS_I(inode)); + if (num_written > 0) num_written = generic_write_sync(iocb, num_written); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d24dbb1e8850..69c6786a9fdf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8449,9 +8449,7 @@ again: set_page_dirty(page); SetPageUptodate(page); - BTRFS_I(inode)->last_trans = fs_info->generation; - BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; - BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->root->last_log_commit; + btrfs_set_inode_last_sub_trans(BTRFS_I(inode)); unlock_extent_cached(io_tree, page_start, page_end, &cached_state); diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 858d9153a1cd..f73654d93fa0 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -171,7 +171,7 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, spin_lock(&inode->lock); inode->last_trans = trans->transaction->transid; inode->last_sub_trans = inode->root->log_transid; - inode->last_log_commit = inode->root->last_log_commit; + inode->last_log_commit = inode->last_sub_trans - 1; spin_unlock(&inode->lock); } From e91da23c1be16ebcfca0991976ed9377a8233935 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 5 Aug 2021 10:04:43 -0700 Subject: [PATCH 072/105] pipe: avoid unnecessary EPOLLET wakeups under normal loads commit 3b844826b6c6affa80755254da322b017358a2f4 upstream. I had forgotten just how sensitive hackbench is to extra pipe wakeups, and commit 3a34b13a88ca ("pipe: make pipe writes always wake up readers") ended up causing a quite noticeable regression on larger machines. Now, hackbench isn't necessarily a hugely meaningful benchmark, and it's not clear that this matters in real life all that much, but as Mel points out, it's used often enough when comparing kernels and so the performance regression shows up like a sore thumb. It's easy enough to fix at least for the common cases where pipes are used purely for data transfer, and you never have any exciting poll usage at all. So set a special 'poll_usage' flag when there is polling activity, and make the ugly "EPOLLET has crazy legacy expectations" semantics explicit to only that case. I would love to limit it to just the broken EPOLLET case, but the pipe code can't see the difference between epoll and regular select/poll, so any non-read/write waiting will trigger the extra wakeup behavior. That is sufficient for at least the hackbench case. Apart from making the odd extra wakeup cases more explicitly about EPOLLET, this also makes the extra wakeup be at the _end_ of the pipe write, not at the first write chunk. That is actually much saner semantics (as much as you can call any of the legacy edge-triggered expectations for EPOLLET "sane") since it means that you know the wakeup will happen once the write is done, rather than possibly in the middle of one. [ For stable people: I'm putting a "Fixes" tag on this, but I leave it up to you to decide whether you actually want to backport it or not. It likely has no impact outside of synthetic benchmarks - Linus ] Link: https://lore.kernel.org/lkml/20210802024945.GA8372@xsang-OptiPlex-9020/ Fixes: 3a34b13a88ca ("pipe: make pipe writes always wake up readers") Reported-by: kernel test robot Tested-by: Sandeep Patil Tested-by: Mel Gorman Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/pipe.c | 15 +++++++++------ include/linux/pipe_fs_i.h | 2 ++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 28b2e973f10e..48abe65333c4 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -444,9 +444,6 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) #endif /* - * Epoll nonsensically wants a wakeup whether the pipe - * was already empty or not. - * * If it wasn't empty we try to merge new data into * the last buffer. * @@ -455,9 +452,9 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) * spanning multiple pages. */ head = pipe->head; - was_empty = true; + was_empty = pipe_empty(head, pipe->tail); chars = total_len & (PAGE_SIZE-1); - if (chars && !pipe_empty(head, pipe->tail)) { + if (chars && !was_empty) { unsigned int mask = pipe->ring_size - 1; struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask]; int offset = buf->offset + buf->len; @@ -590,8 +587,11 @@ out: * This is particularly important for small writes, because of * how (for example) the GNU make jobserver uses small writes to * wake up pending jobs + * + * Epoll nonsensically wants a wakeup whether the pipe + * was already empty or not. */ - if (was_empty) { + if (was_empty || pipe->poll_usage) { wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } @@ -654,6 +654,9 @@ pipe_poll(struct file *filp, poll_table *wait) struct pipe_inode_info *pipe = filp->private_data; unsigned int head, tail; + /* Epoll has some historical nasty semantics, this enables them */ + pipe->poll_usage = 1; + /* * Reading pipe state only -- no need for acquiring the semaphore. * diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 5d2705f1d01c..fc5642431b92 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -48,6 +48,7 @@ struct pipe_buffer { * @files: number of struct file referring this pipe (protected by ->i_lock) * @r_counter: reader counter * @w_counter: writer counter + * @poll_usage: is this pipe used for epoll, which has crazy wakeups? * @fasync_readers: reader side fasync * @fasync_writers: writer side fasync * @bufs: the circular array of pipe buffers @@ -70,6 +71,7 @@ struct pipe_inode_info { unsigned int files; unsigned int r_counter; unsigned int w_counter; + unsigned int poll_usage; struct page *tmp_page; struct fasync_struct *fasync_readers; struct fasync_struct *fasync_writers; From 3b2018f9c9c088741d7d33a2baf9aa39e93d58c5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 24 Aug 2021 10:39:25 -0700 Subject: [PATCH 073/105] pipe: do FASYNC notifications for every pipe IO, not just state changes commit fe67f4dd8daa252eb9aa7acb61555f3cc3c1ce4c upstream. It turns out that the SIGIO/FASYNC situation is almost exactly the same as the EPOLLET case was: user space really wants to be notified after every operation. Now, in a perfect world it should be sufficient to only notify user space on "state transitions" when the IO state changes (ie when a pipe goes from unreadable to readable, or from unwritable to writable). User space should then do as much as possible - fully emptying the buffer or what not - and we'll notify it again the next time the state changes. But as with EPOLLET, we have at least one case (stress-ng) where the kernel sent SIGIO due to the pipe being marked for asynchronous notification, but the user space signal handler then didn't actually necessarily read it all before returning (it read more than what was written, but since there could be multiple writes, it could leave data pending). The user space code then expected to get another SIGIO for subsequent writes - even though the pipe had been readable the whole time - and would only then read more. This is arguably a user space bug - and Colin King already fixed the stress-ng code in question - but the kernel regression rules are clear: it doesn't matter if kernel people think that user space did something silly and wrong. What matters is that it used to work. So if user space depends on specific historical kernel behavior, it's a regression when that behavior changes. It's on us: we were silly to have that non-optimal historical behavior, and our old kernel behavior was what user space was tested against. Because of how the FASYNC notification was tied to wakeup behavior, this was first broken by commits f467a6a66419 and 1b6b26ae7053 ("pipe: fix and clarify pipe read/write wakeup logic"), but at the time it seems nobody noticed. Probably because the stress-ng problem case ends up being timing-dependent too. It was then unwittingly fixed by commit 3a34b13a88ca ("pipe: make pipe writes always wake up readers") only to be broken again when by commit 3b844826b6c6 ("pipe: avoid unnecessary EPOLLET wakeups under normal loads"). And at that point the kernel test robot noticed the performance refression in the stress-ng.sigio.ops_per_sec case. So the "Fixes" tag below is somewhat ad hoc, but it matches when the issue was noticed. Fix it for good (knock wood) by simply making the kill_fasync() case separate from the wakeup case. FASYNC is quite rare, and we clearly shouldn't even try to use the "avoid unnecessary wakeups" logic for it. Link: https://lore.kernel.org/lkml/20210824151337.GC27667@xsang-OptiPlex-9020/ Fixes: 3b844826b6c6 ("pipe: avoid unnecessary EPOLLET wakeups under normal loads") Reported-by: kernel test robot Tested-by: Oliver Sang Cc: Eric Biederman Cc: Colin Ian King Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/pipe.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 48abe65333c4..d6d4019ba32f 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -363,10 +363,9 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) * _very_ unlikely case that the pipe was full, but we got * no data. */ - if (unlikely(was_full)) { + if (unlikely(was_full)) wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); - } + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); /* * But because we didn't read anything, at this point we can @@ -385,12 +384,11 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) wake_next_reader = false; __pipe_unlock(pipe); - if (was_full) { + if (was_full) wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); - kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); - } if (wake_next_reader) wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); + kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); if (ret > 0) file_accessed(filp); return ret; @@ -565,10 +563,9 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) * become empty while we dropped the lock. */ __pipe_unlock(pipe); - if (was_empty) { + if (was_empty) wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); - } + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); __pipe_lock(pipe); was_empty = pipe_empty(pipe->head, pipe->tail); @@ -591,10 +588,9 @@ out: * Epoll nonsensically wants a wakeup whether the pipe * was already empty or not. */ - if (was_empty || pipe->poll_usage) { + if (was_empty || pipe->poll_usage) wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); - kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); - } + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); if (wake_next_writer) wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) { From ded6da217ced636f757051baa98a2564d7ec8100 Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 30 Aug 2021 15:02:10 +0200 Subject: [PATCH 074/105] mtd: spinand: Fix incorrect parameters for on-die ECC The new generic NAND ECC framework stores the configuration and requirements in separate places since commit 93ef92f6f422 ("mtd: nand: Use the new generic ECC object"). In 5.10.x The SPI NAND layer still uses only the requirements to track the ECC properties. This mismatch leads to values of zero being used for ECC strength and step_size in the SPI NAND layer wherever nanddev_get_ecc_conf() is used and therefore breaks the SPI NAND on-die ECC support in 5.10.x. By using nanddev_get_ecc_requirements() instead of nanddev_get_ecc_conf() for SPI NAND, we make sure that the correct parameters for the detected chip are used. In later versions (5.11.x) this is fixed anyway with the implementation of the SPI NAND on-die ECC engine. Cc: stable@vger.kernel.org # 5.10.x Reported-by: voice INTER connect GmbH Signed-off-by: Frieder Schrempf Acked-by: Miquel Raynal Signed-off-by: Greg Kroah-Hartman --- drivers/mtd/nand/spi/core.c | 6 +++--- drivers/mtd/nand/spi/macronix.c | 6 +++--- drivers/mtd/nand/spi/toshiba.c | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 558d8a14810b..8794a1f6eacd 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -419,7 +419,7 @@ static int spinand_check_ecc_status(struct spinand_device *spinand, u8 status) * fixed, so let's return the maximum possible value so that * wear-leveling layers move the data immediately. */ - return nanddev_get_ecc_conf(nand)->strength; + return nanddev_get_ecc_requirements(nand)->strength; case STATUS_ECC_UNCOR_ERROR: return -EBADMSG; @@ -1090,8 +1090,8 @@ static int spinand_init(struct spinand_device *spinand) mtd->oobavail = ret; /* Propagate ECC information to mtd_info */ - mtd->ecc_strength = nanddev_get_ecc_conf(nand)->strength; - mtd->ecc_step_size = nanddev_get_ecc_conf(nand)->step_size; + mtd->ecc_strength = nanddev_get_ecc_requirements(nand)->strength; + mtd->ecc_step_size = nanddev_get_ecc_requirements(nand)->step_size; return 0; diff --git a/drivers/mtd/nand/spi/macronix.c b/drivers/mtd/nand/spi/macronix.c index 8e801e4c3a00..cd7a9cacc3fb 100644 --- a/drivers/mtd/nand/spi/macronix.c +++ b/drivers/mtd/nand/spi/macronix.c @@ -84,11 +84,11 @@ static int mx35lf1ge4ab_ecc_get_status(struct spinand_device *spinand, * data around if it's not necessary. */ if (mx35lf1ge4ab_get_eccsr(spinand, &eccsr)) - return nanddev_get_ecc_conf(nand)->strength; + return nanddev_get_ecc_requirements(nand)->strength; - if (WARN_ON(eccsr > nanddev_get_ecc_conf(nand)->strength || + if (WARN_ON(eccsr > nanddev_get_ecc_requirements(nand)->strength || !eccsr)) - return nanddev_get_ecc_conf(nand)->strength; + return nanddev_get_ecc_requirements(nand)->strength; return eccsr; diff --git a/drivers/mtd/nand/spi/toshiba.c b/drivers/mtd/nand/spi/toshiba.c index 21fde2875674..6fe7bd2a94d2 100644 --- a/drivers/mtd/nand/spi/toshiba.c +++ b/drivers/mtd/nand/spi/toshiba.c @@ -90,12 +90,12 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand, * data around if it's not necessary. */ if (spi_mem_exec_op(spinand->spimem, &op)) - return nanddev_get_ecc_conf(nand)->strength; + return nanddev_get_ecc_requirements(nand)->strength; mbf >>= 4; - if (WARN_ON(mbf > nanddev_get_ecc_conf(nand)->strength || !mbf)) - return nanddev_get_ecc_conf(nand)->strength; + if (WARN_ON(mbf > nanddev_get_ecc_requirements(nand)->strength || !mbf)) + return nanddev_get_ecc_requirements(nand)->strength; return mbf; From 0a178a01516158caeb3aa26c1b54d50ad12333f6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 15 Aug 2021 03:13:36 -0400 Subject: [PATCH 075/105] tipc: call tipc_wait_for_connect only when dlen is not 0 commit 7387a72c5f84f0dfb57618f9e4770672c0d2e4c9 upstream. __tipc_sendmsg() is called to send SYN packet by either tipc_sendmsg() or tipc_connect(). The difference is in tipc_connect(), it will call tipc_wait_for_connect() after __tipc_sendmsg() to wait until connecting is done. So there's no need to wait in __tipc_sendmsg() for this case. This patch is to fix it by calling tipc_wait_for_connect() only when dlen is not 0 in __tipc_sendmsg(), which means it's called by tipc_connect(). Note this also fixes the failure in tipcutils/test/ptts/: # ./tipcTS & # ./tipcTC 9 (hang) Fixes: 36239dab6da7 ("tipc: fix implicit-connect for SYN+") Reported-by: Shuang Li Signed-off-by: Xin Long Acked-by: Jon Maloy Signed-off-by: David S. Miller Cc: Paul Gortmaker Signed-off-by: Greg Kroah-Hartman --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 4f9bd95b4eee..9bd72468bc68 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1511,7 +1511,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(syn && !rc)) { tipc_set_sk_state(sk, TIPC_CONNECTING); - if (timeout) { + if (dlen && timeout) { timeout = msecs_to_jiffies(timeout); tipc_wait_for_connect(sock, &timeout); } From 60d69cb4e60de0067e5d8aecacd86dfe92a5384a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 30 Aug 2021 08:55:18 -0700 Subject: [PATCH 076/105] vt_kdsetmode: extend console locking commit 2287a51ba822384834dafc1c798453375d1107c7 upstream. As per the long-suffering comment. Reported-by: Minh Yuan Cc: Greg Kroah-Hartman Cc: Jiri Slaby Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- drivers/tty/vt/vt_ioctl.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 09b8d02acd99..90e4fcd3dc39 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -246,6 +246,8 @@ int vt_waitactive(int n) * * XXX It should at least call into the driver, fbdev's definitely need to * restore their engine state. --BenH + * + * Called with the console lock held. */ static int vt_kdsetmode(struct vc_data *vc, unsigned long mode) { @@ -262,7 +264,6 @@ static int vt_kdsetmode(struct vc_data *vc, unsigned long mode) return -EINVAL; } - /* FIXME: this needs the console lock extending */ if (vc->vc_mode == mode) return 0; @@ -271,12 +272,10 @@ static int vt_kdsetmode(struct vc_data *vc, unsigned long mode) return 0; /* explicitly blank/unblank the screen if switching modes */ - console_lock(); if (mode == KD_TEXT) do_unblank_screen(1); else do_blank_screen(1); - console_unlock(); return 0; } @@ -378,7 +377,10 @@ static int vt_k_ioctl(struct tty_struct *tty, unsigned int cmd, if (!perm) return -EPERM; - return vt_kdsetmode(vc, arg); + console_lock(); + ret = vt_kdsetmode(vc, arg); + console_unlock(); + return ret; case KDGETMODE: return put_user(vc->vc_mode, (int __user *)arg); From b42fde92cddeb0c7326877f1832d61816c9a2aa9 Mon Sep 17 00:00:00 2001 From: Pauli Virtanen Date: Mon, 26 Jul 2021 21:02:06 +0300 Subject: [PATCH 077/105] Bluetooth: btusb: check conditions before enabling USB ALT 3 for WBS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 55981d3541812234e687062926ff199c83f79a39 upstream. Some USB BT adapters don't satisfy the MTU requirement mentioned in commit e848dbd364ac ("Bluetooth: btusb: Add support USB ALT 3 for WBS") and have ALT 3 setting that produces no/garbled audio. Some adapters with larger MTU were also reported to have problems with ALT 3. Add a flag and check it and MTU before selecting ALT 3, falling back to ALT 1. Enable the flag for Realtek, restoring the previous behavior for non-Realtek devices. Tested with USB adapters (mtu<72, no/garbled sound with ALT3, ALT1 works) BCM20702A1 0b05:17cb, CSR8510A10 0a12:0001, and (mtu>=72, ALT3 works) RTL8761BU 0bda:8771, Intel AX200 8087:0029 (after disabling ALT6). Also got reports for (mtu>=72, ALT 3 reported to produce bad audio) Intel 8087:0a2b. Signed-off-by: Pauli Virtanen Fixes: e848dbd364ac ("Bluetooth: btusb: Add support USB ALT 3 for WBS") Tested-by: Michał Kępień Tested-by: Jonathan Lampérth Signed-off-by: Luiz Augusto von Dentz Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/btusb.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index afd2b1f12d49..e0859f4e2807 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -486,6 +486,7 @@ static const struct dmi_system_id btusb_needs_reset_resume_table[] = { #define BTUSB_HW_RESET_ACTIVE 12 #define BTUSB_TX_WAIT_VND_EVT 13 #define BTUSB_WAKEUP_DISABLE 14 +#define BTUSB_USE_ALT3_FOR_WBS 15 struct btusb_data { struct hci_dev *hdev; @@ -1718,16 +1719,20 @@ static void btusb_work(struct work_struct *work) /* Bluetooth USB spec recommends alt 6 (63 bytes), but * many adapters do not support it. Alt 1 appears to * work for all adapters that do not have alt 6, and - * which work with WBS at all. + * which work with WBS at all. Some devices prefer + * alt 3 (HCI payload >= 60 Bytes let air packet + * data satisfy 60 bytes), requiring + * MTU >= 3 (packets) * 25 (size) - 3 (headers) = 72 + * see also Core spec 5, vol 4, B 2.1.1 & Table 2.1. */ - new_alts = btusb_find_altsetting(data, 6) ? 6 : 1; - /* Because mSBC frames do not need to be aligned to the - * SCO packet boundary. If support the Alt 3, use the - * Alt 3 for HCI payload >= 60 Bytes let air packet - * data satisfy 60 bytes. - */ - if (new_alts == 1 && btusb_find_altsetting(data, 3)) + if (btusb_find_altsetting(data, 6)) + new_alts = 6; + else if (btusb_find_altsetting(data, 3) && + hdev->sco_mtu >= 72 && + test_bit(BTUSB_USE_ALT3_FOR_WBS, &data->flags)) new_alts = 3; + else + new_alts = 1; } if (btusb_switch_alt_setting(hdev, new_alts) < 0) @@ -4170,6 +4175,7 @@ static int btusb_probe(struct usb_interface *intf, * (DEVICE_REMOTE_WAKEUP) */ set_bit(BTUSB_WAKEUP_DISABLE, &data->flags); + set_bit(BTUSB_USE_ALT3_FOR_WBS, &data->flags); } if (!reset) From 7e2087249e87b0fceb53f428cbf872b9d2db44ad Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 17 Dec 2020 16:01:39 +0000 Subject: [PATCH 078/105] riscv: Fixup wrong ftrace remove cflag commit 67d945778099b14324811fe67c5aff2cda7a7ad5 upstream. We must use $(CC_FLAGS_FTRACE) instead of directly using -pg. It will cause -fpatchable-function-entry error. Signed-off-by: Guo Ren Signed-off-by: Palmer Dabbelt Signed-off-by: Dimitri John Ledkov Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/Makefile | 4 ++-- arch/riscv/mm/Makefile | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index fa896c5f7ccb..27f10eb28bd3 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -4,8 +4,8 @@ # ifdef CONFIG_FTRACE -CFLAGS_REMOVE_ftrace.o = -pg -CFLAGS_REMOVE_patch.o = -pg +CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) endif extra-y += head.o diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index c0185e556ca5..6b4b7ec1bda2 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -2,7 +2,7 @@ CFLAGS_init.o := -mcmodel=medany ifdef CONFIG_FTRACE -CFLAGS_REMOVE_init.o = -pg +CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE) endif KCOV_INSTRUMENT_init.o := n From 133d7f93eecd23b003fc2dc58302ec7de723cf72 Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Thu, 17 Dec 2020 16:01:40 +0000 Subject: [PATCH 079/105] riscv: Fixup patch_text panic in ftrace commit 5ad84adf5456313e285734102367c861c436c5ed upstream. Just like arm64, we can't trace the function in the patch_text path. Here is the bug log: [ 45.234334] Unable to handle kernel paging request at virtual address ffffffd38ae80900 [ 45.242313] Oops [#1] [ 45.244600] Modules linked in: [ 45.247678] CPU: 0 PID: 11 Comm: migration/0 Not tainted 5.9.0-00025-g9b7db83-dirty #215 [ 45.255797] epc: ffffffe00021689a ra : ffffffe00021718e sp : ffffffe01afabb58 [ 45.262955] gp : ffffffe00136afa0 tp : ffffffe01af94d00 t0 : 0000000000000002 [ 45.270200] t1 : 0000000000000000 t2 : 0000000000000001 s0 : ffffffe01afabc08 [ 45.277443] s1 : ffffffe0013718a8 a0 : 0000000000000000 a1 : ffffffe01afabba8 [ 45.284686] a2 : 0000000000000000 a3 : 0000000000000000 a4 : c4c16ad38ae80900 [ 45.291929] a5 : 0000000000000000 a6 : 0000000000000000 a7 : 0000000052464e43 [ 45.299173] s2 : 0000000000000001 s3 : ffffffe000206a60 s4 : ffffffe000206a60 [ 45.306415] s5 : 00000000000009ec s6 : ffffffe0013718a8 s7 : c4c16ad38ae80900 [ 45.313658] s8 : 0000000000000004 s9 : 0000000000000001 s10: 0000000000000001 [ 45.320902] s11: 0000000000000003 t3 : 0000000000000001 t4 : ffffffffd192fe79 [ 45.328144] t5 : ffffffffb8f80000 t6 : 0000000000040000 [ 45.333472] status: 0000000200000100 badaddr: ffffffd38ae80900 cause: 000000000000000f [ 45.341514] ---[ end trace d95102172248fdcf ]--- [ 45.346176] note: migration/0[11] exited with preempt_count 1 (gdb) x /2i $pc => 0xffffffe00021689a <__do_proc_dointvec+196>: sd zero,0(s7) 0xffffffe00021689e <__do_proc_dointvec+200>: li s11,0 (gdb) bt 0 __do_proc_dointvec (tbl_data=0x0, table=0xffffffe01afabba8, write=0, buffer=0x0, lenp=0x7bf897061f9a0800, ppos=0x4, conv=0x0, data=0x52464e43) at kernel/sysctl.c:581 1 0xffffffe00021718e in do_proc_dointvec (data=, conv=, ppos=, lenp=, buffer=, write=, table=) at kernel/sysctl.c:964 2 proc_dointvec_minmax (ppos=, lenp=, buffer=, write=, table=) at kernel/sysctl.c:964 3 proc_do_static_key (table=, write=1, buffer=0x0, lenp=0x0, ppos=0x7bf897061f9a0800) at kernel/sysctl.c:1643 4 0xffffffe000206792 in ftrace_make_call (rec=, addr=) at arch/riscv/kernel/ftrace.c:109 5 0xffffffe0002c9c04 in __ftrace_replace_code (rec=0xffffffe01ae40c30, enable=3) at kernel/trace/ftrace.c:2503 6 0xffffffe0002ca0b2 in ftrace_replace_code (mod_flags=) at kernel/trace/ftrace.c:2530 7 0xffffffe0002ca26a in ftrace_modify_all_code (command=5) at kernel/trace/ftrace.c:2677 8 0xffffffe0002ca30e in __ftrace_modify_code (data=) at kernel/trace/ftrace.c:2703 9 0xffffffe0002c13b0 in multi_cpu_stop (data=0x0) at kernel/stop_machine.c:224 10 0xffffffe0002c0fde in cpu_stopper_thread (cpu=) at kernel/stop_machine.c:491 11 0xffffffe0002343de in smpboot_thread_fn (data=0x0) at kernel/smpboot.c:165 12 0xffffffe00022f8b4 in kthread (_create=0xffffffe01af0c040) at kernel/kthread.c:292 13 0xffffffe000201fac in handle_exception () at arch/riscv/kernel/entry.S:236 0xffffffe00020678a <+114>: auipc ra,0xffffe 0xffffffe00020678e <+118>: jalr -118(ra) # 0xffffffe000204714 0xffffffe000206792 <+122>: snez a0,a0 (gdb) disassemble patch_text_nosync Dump of assembler code for function patch_text_nosync: 0xffffffe000204714 <+0>: addi sp,sp,-32 0xffffffe000204716 <+2>: sd s0,16(sp) 0xffffffe000204718 <+4>: sd ra,24(sp) 0xffffffe00020471a <+6>: addi s0,sp,32 0xffffffe00020471c <+8>: auipc ra,0x0 0xffffffe000204720 <+12>: jalr -384(ra) # 0xffffffe00020459c 0xffffffe000204724 <+16>: beqz a0,0xffffffe00020472e 0xffffffe000204726 <+18>: ld ra,24(sp) 0xffffffe000204728 <+20>: ld s0,16(sp) 0xffffffe00020472a <+22>: addi sp,sp,32 0xffffffe00020472c <+24>: ret 0xffffffe00020472e <+26>: sd a0,-24(s0) 0xffffffe000204732 <+30>: auipc ra,0x4 0xffffffe000204736 <+34>: jalr -1464(ra) # 0xffffffe00020817a 0xffffffe00020473a <+38>: ld a0,-24(s0) 0xffffffe00020473e <+42>: ld ra,24(sp) 0xffffffe000204740 <+44>: ld s0,16(sp) 0xffffffe000204742 <+46>: addi sp,sp,32 0xffffffe000204744 <+48>: ret (gdb) disassemble flush_icache_all-4 Dump of assembler code for function flush_icache_all: 0xffffffe00020817a <+0>: addi sp,sp,-8 0xffffffe00020817c <+2>: sd ra,0(sp) 0xffffffe00020817e <+4>: auipc ra,0xfffff 0xffffffe000208182 <+8>: jalr -1822(ra) # 0xffffffe000206a60 0xffffffe000208186 <+12>: ld ra,0(sp) 0xffffffe000208188 <+14>: addi sp,sp,8 0xffffffe00020818a <+0>: addi sp,sp,-16 0xffffffe00020818c <+2>: sd s0,0(sp) 0xffffffe00020818e <+4>: sd ra,8(sp) 0xffffffe000208190 <+6>: addi s0,sp,16 0xffffffe000208192 <+8>: li a0,0 0xffffffe000208194 <+10>: auipc ra,0xfffff 0xffffffe000208198 <+14>: jalr -410(ra) # 0xffffffe000206ffa 0xffffffe00020819c <+18>: ld s0,0(sp) 0xffffffe00020819e <+20>: ld ra,8(sp) 0xffffffe0002081a0 <+22>: addi sp,sp,16 0xffffffe0002081a2 <+24>: ret (gdb) frame 5 (rec=0xffffffe01ae40c30, enable=3) at kernel/trace/ftrace.c:2503 2503 return ftrace_make_call(rec, ftrace_addr); (gdb) p /x rec->ip $2 = 0xffffffe00020817a -> flush_icache_all ! When we modified flush_icache_all's patchable-entry with ftrace_caller: - Insert ftrace_caller at flush_icache_all prologue. - Call flush_icache_all to sync I/Dcache, but flush_icache_all is just we modified by half. Link: https://lore.kernel.org/linux-riscv/CAJF2gTT=oDWesWe0JVWvTpGi60-gpbNhYLdFWN_5EbyeqoEDdw@mail.gmail.com/T/#t Signed-off-by: Guo Ren Reviewed-by: Atish Patra Signed-off-by: Palmer Dabbelt Signed-off-by: Dimitri John Ledkov Signed-off-by: Greg Kroah-Hartman --- arch/riscv/kernel/Makefile | 1 + arch/riscv/mm/Makefile | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 27f10eb28bd3..62de075fc60c 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -6,6 +6,7 @@ ifdef CONFIG_FTRACE CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) endif extra-y += head.o diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 6b4b7ec1bda2..7ebaef10ea1b 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -3,6 +3,7 @@ CFLAGS_init.o := -mcmodel=medany ifdef CONFIG_FTRACE CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE) endif KCOV_INSTRUMENT_init.o := n From 0d8e39bb9416ffb34cc5693a9d9a62e289159b80 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 3 Jun 2021 00:40:23 +0200 Subject: [PATCH 080/105] perf env: Fix memory leak of bpf_prog_info_linear member commit 67069a1f0fe5f9eeca86d954fff2087f5542a008 upstream. ASan reported a memory leak caused by info_linear not being deallocated. The info_linear was allocated during in perf_event__synthesize_one_bpf_prog(). This patch adds the corresponding free() when bpf_prog_info_node is freed in perf_env__purge_bpf(). $ sudo ./perf record -- sleep 5 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.025 MB perf.data (8 samples) ] ================================================================= ==297735==ERROR: LeakSanitizer: detected memory leaks Direct leak of 7688 byte(s) in 19 object(s) allocated from: #0 0x4f420f in malloc (/home/user/linux/tools/perf/perf+0x4f420f) #1 0xc06a74 in bpf_program__get_prog_info_linear /home/user/linux/tools/lib/bpf/libbpf.c:11113:16 #2 0xb426fe in perf_event__synthesize_one_bpf_prog /home/user/linux/tools/perf/util/bpf-event.c:191:16 #3 0xb42008 in perf_event__synthesize_bpf_events /home/user/linux/tools/perf/util/bpf-event.c:410:9 #4 0x594596 in record__synthesize /home/user/linux/tools/perf/builtin-record.c:1490:8 #5 0x58c9ac in __cmd_record /home/user/linux/tools/perf/builtin-record.c:1798:8 #6 0x58990b in cmd_record /home/user/linux/tools/perf/builtin-record.c:2901:8 #7 0x7b2a20 in run_builtin /home/user/linux/tools/perf/perf.c:313:11 #8 0x7b12ff in handle_internal_command /home/user/linux/tools/perf/perf.c:365:8 #9 0x7b2583 in run_argv /home/user/linux/tools/perf/perf.c:409:2 #10 0x7b0d79 in main /home/user/linux/tools/perf/perf.c:539:3 #11 0x7fa357ef6b74 in __libc_start_main /usr/src/debug/glibc-2.33-8.fc34.x86_64/csu/../csu/libc-start.c:332:16 Signed-off-by: Riccardo Mancini Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Andrii Nakryiko Cc: Daniel Borkmann Cc: Jiri Olsa Cc: John Fastabend Cc: KP Singh Cc: Mark Rutland Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Song Liu Cc: Yonghong Song Link: http://lore.kernel.org/lkml/20210602224024.300485-1-rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Hanjun Guo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/env.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 03bc843b1cf8..f0dceb527ca3 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -142,6 +142,7 @@ static void perf_env__purge_bpf(struct perf_env *env) node = rb_entry(next, struct bpf_prog_info_node, rb_node); next = rb_next(&node->rb_node); rb_erase(&node->rb_node, root); + free(node->info_linear); free(node); } From e0ca67030fdae4bf4342085184ff0df17fa5c245 Mon Sep 17 00:00:00 2001 From: Riccardo Mancini Date: Thu, 3 Jun 2021 00:08:33 +0200 Subject: [PATCH 081/105] perf symbol-elf: Fix memory leak by freeing sdt_note.args commit 69c9ffed6cede9c11697861f654946e3ae95a930 upstream. Reported by ASan. Signed-off-by: Riccardo Mancini Acked-by: Ian Rogers Cc: Alexander Shishkin Cc: Fabian Hemmer Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Remi Bernon Cc: Jiri Slaby Link: http://lore.kernel.org/lkml/20210602220833.285226-1-rickyman7@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Hanjun Guo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/symbol-elf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 44dd86a4f25f..7356eb398b32 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -2360,6 +2360,7 @@ int cleanup_sdt_note_list(struct list_head *sdt_notes) list_for_each_entry_safe(pos, tmp, sdt_notes, note_list) { list_del_init(&pos->note_list); + zfree(&pos->args); zfree(&pos->name); zfree(&pos->provider); free(pos); From 94687c49b65bc08c85fde79d917c8d64de1104bc Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 15 Mar 2021 13:56:41 +0900 Subject: [PATCH 082/105] perf record: Fix memory leak in vDSO found using ASAN commit 41d585411311abf187e5f09042978fe7073a9375 upstream. I got several memory leak reports from Asan with a simple command. It was because VDSO is not released due to the refcount. Like in __dsos_addnew_id(), it should put the refcount after adding to the list. $ perf record true [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.030 MB perf.data (10 samples) ] ================================================================= ==692599==ERROR: LeakSanitizer: detected memory leaks Direct leak of 439 byte(s) in 1 object(s) allocated from: #0 0x7fea52341037 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:154 #1 0x559bce4aa8ee in dso__new_id util/dso.c:1256 #2 0x559bce59245a in __machine__addnew_vdso util/vdso.c:132 #3 0x559bce59245a in machine__findnew_vdso util/vdso.c:347 #4 0x559bce50826c in map__new util/map.c:175 #5 0x559bce503c92 in machine__process_mmap2_event util/machine.c:1787 #6 0x559bce512f6b in machines__deliver_event util/session.c:1481 #7 0x559bce515107 in perf_session__deliver_event util/session.c:1551 #8 0x559bce51d4d2 in do_flush util/ordered-events.c:244 #9 0x559bce51d4d2 in __ordered_events__flush util/ordered-events.c:323 #10 0x559bce519bea in __perf_session__process_events util/session.c:2268 #11 0x559bce519bea in perf_session__process_events util/session.c:2297 #12 0x559bce2e7a52 in process_buildids /home/namhyung/project/linux/tools/perf/builtin-record.c:1017 #13 0x559bce2e7a52 in record__finish_output /home/namhyung/project/linux/tools/perf/builtin-record.c:1234 #14 0x559bce2ed4f6 in __cmd_record /home/namhyung/project/linux/tools/perf/builtin-record.c:2026 #15 0x559bce2ed4f6 in cmd_record /home/namhyung/project/linux/tools/perf/builtin-record.c:2858 #16 0x559bce422db4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #17 0x559bce2acac8 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #18 0x559bce2acac8 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #19 0x559bce2acac8 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #20 0x7fea51e76d09 in __libc_start_main ../csu/libc-start.c:308 Indirect leak of 32 byte(s) in 1 object(s) allocated from: #0 0x7fea52341037 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:154 #1 0x559bce520907 in nsinfo__copy util/namespaces.c:169 #2 0x559bce50821b in map__new util/map.c:168 #3 0x559bce503c92 in machine__process_mmap2_event util/machine.c:1787 #4 0x559bce512f6b in machines__deliver_event util/session.c:1481 #5 0x559bce515107 in perf_session__deliver_event util/session.c:1551 #6 0x559bce51d4d2 in do_flush util/ordered-events.c:244 #7 0x559bce51d4d2 in __ordered_events__flush util/ordered-events.c:323 #8 0x559bce519bea in __perf_session__process_events util/session.c:2268 #9 0x559bce519bea in perf_session__process_events util/session.c:2297 #10 0x559bce2e7a52 in process_buildids /home/namhyung/project/linux/tools/perf/builtin-record.c:1017 #11 0x559bce2e7a52 in record__finish_output /home/namhyung/project/linux/tools/perf/builtin-record.c:1234 #12 0x559bce2ed4f6 in __cmd_record /home/namhyung/project/linux/tools/perf/builtin-record.c:2026 #13 0x559bce2ed4f6 in cmd_record /home/namhyung/project/linux/tools/perf/builtin-record.c:2858 #14 0x559bce422db4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #15 0x559bce2acac8 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #16 0x559bce2acac8 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #17 0x559bce2acac8 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #18 0x7fea51e76d09 in __libc_start_main ../csu/libc-start.c:308 SUMMARY: AddressSanitizer: 471 byte(s) leaked in 2 allocation(s). Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210315045641.700430-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Hanjun Guo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/vdso.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 3cc91ad048ea..43beb169631d 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -133,6 +133,8 @@ static struct dso *__machine__addnew_vdso(struct machine *machine, const char *s if (dso != NULL) { __dsos__add(&machine->dsos, dso); dso__set_long_name(dso, long_name, false); + /* Put dso here because __dsos_add already got it */ + dso__put(dso); } return dso; From 9f9e40ddfca32a36f770b5cd2bcdc32f76dd9a78 Mon Sep 17 00:00:00 2001 From: Jianlin Lv Date: Thu, 18 Feb 2021 11:12:45 +0800 Subject: [PATCH 083/105] perf tools: Fix arm64 build error with gcc-11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 067012974c8ae31a8886046df082aeba93592972 upstream. gcc version: 11.0.0 20210208 (experimental) (GCC) Following build error on arm64: ....... In function ‘printf’, inlined from ‘regs_dump__printf’ at util/session.c:1141:3, inlined from ‘regs__printf’ at util/session.c:1169:2: /usr/include/aarch64-linux-gnu/bits/stdio2.h:107:10: \ error: ‘%-5s’ directive argument is null [-Werror=format-overflow=] 107 | return __printf_chk (__USE_FORTIFY_LEVEL - 1, __fmt, \ __va_arg_pack ()); ...... In function ‘fprintf’, inlined from ‘perf_sample__fprintf_regs.isra’ at \ builtin-script.c:622:14: /usr/include/aarch64-linux-gnu/bits/stdio2.h:100:10: \ error: ‘%5s’ directive argument is null [-Werror=format-overflow=] 100 | return __fprintf_chk (__stream, __USE_FORTIFY_LEVEL - 1, __fmt, 101 | __va_arg_pack ()); cc1: all warnings being treated as errors ....... This patch fixes Wformat-overflow warnings. Add helper function to convert NULL to "unknown". Signed-off-by: Jianlin Lv Reviewed-by: John Garry Acked-by: Jiri Olsa Cc: Albert Ou Cc: Alexander Shishkin Cc: Anju T Sudhakar Cc: Athira Jajeev Cc: Guo Ren Cc: Kajol Jain Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Will Deacon Cc: Palmer Dabbelt Cc: iecedge@gmail.com Cc: linux-csky@vger.kernel.org Cc: linux-riscv@lists.infradead.org Link: http://lore.kernel.org/lkml/20210218031245.2078492-1-Jianlin.Lv@arm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Hanjun Guo Signed-off-by: Greg Kroah-Hartman --- tools/perf/arch/arm/include/perf_regs.h | 2 +- tools/perf/arch/arm64/include/perf_regs.h | 2 +- tools/perf/arch/csky/include/perf_regs.h | 2 +- tools/perf/arch/powerpc/include/perf_regs.h | 2 +- tools/perf/arch/riscv/include/perf_regs.h | 2 +- tools/perf/arch/s390/include/perf_regs.h | 2 +- tools/perf/arch/x86/include/perf_regs.h | 2 +- tools/perf/util/perf_regs.h | 7 +++++++ 8 files changed, 14 insertions(+), 7 deletions(-) diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h index ed20e0253e25..4085419283d0 100644 --- a/tools/perf/arch/arm/include/perf_regs.h +++ b/tools/perf/arch/arm/include/perf_regs.h @@ -15,7 +15,7 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_ARM_PC #define PERF_REG_SP PERF_REG_ARM_SP -static inline const char *perf_reg_name(int id) +static inline const char *__perf_reg_name(int id) { switch (id) { case PERF_REG_ARM_R0: diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h index baaa5e64a3fb..fa3e07459f76 100644 --- a/tools/perf/arch/arm64/include/perf_regs.h +++ b/tools/perf/arch/arm64/include/perf_regs.h @@ -15,7 +15,7 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_ARM64_PC #define PERF_REG_SP PERF_REG_ARM64_SP -static inline const char *perf_reg_name(int id) +static inline const char *__perf_reg_name(int id) { switch (id) { case PERF_REG_ARM64_X0: diff --git a/tools/perf/arch/csky/include/perf_regs.h b/tools/perf/arch/csky/include/perf_regs.h index 8f336ea1161a..25ac3bdcb9d1 100644 --- a/tools/perf/arch/csky/include/perf_regs.h +++ b/tools/perf/arch/csky/include/perf_regs.h @@ -15,7 +15,7 @@ #define PERF_REG_IP PERF_REG_CSKY_PC #define PERF_REG_SP PERF_REG_CSKY_SP -static inline const char *perf_reg_name(int id) +static inline const char *__perf_reg_name(int id) { switch (id) { case PERF_REG_CSKY_A0: diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h index 63f3ac91049f..004bed228693 100644 --- a/tools/perf/arch/powerpc/include/perf_regs.h +++ b/tools/perf/arch/powerpc/include/perf_regs.h @@ -73,7 +73,7 @@ static const char *reg_names[] = { [PERF_REG_POWERPC_SIER3] = "sier3", }; -static inline const char *perf_reg_name(int id) +static inline const char *__perf_reg_name(int id) { return reg_names[id]; } diff --git a/tools/perf/arch/riscv/include/perf_regs.h b/tools/perf/arch/riscv/include/perf_regs.h index 7a8bcde7a2b1..6b02a767c918 100644 --- a/tools/perf/arch/riscv/include/perf_regs.h +++ b/tools/perf/arch/riscv/include/perf_regs.h @@ -19,7 +19,7 @@ #define PERF_REG_IP PERF_REG_RISCV_PC #define PERF_REG_SP PERF_REG_RISCV_SP -static inline const char *perf_reg_name(int id) +static inline const char *__perf_reg_name(int id) { switch (id) { case PERF_REG_RISCV_PC: diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h index bcfbaed78cc2..ce3031526623 100644 --- a/tools/perf/arch/s390/include/perf_regs.h +++ b/tools/perf/arch/s390/include/perf_regs.h @@ -14,7 +14,7 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_S390_PC #define PERF_REG_SP PERF_REG_S390_R15 -static inline const char *perf_reg_name(int id) +static inline const char *__perf_reg_name(int id) { switch (id) { case PERF_REG_S390_R0: diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h index b7321337d100..cddc4cdc0d9b 100644 --- a/tools/perf/arch/x86/include/perf_regs.h +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -23,7 +23,7 @@ void perf_regs_load(u64 *regs); #define PERF_REG_IP PERF_REG_X86_IP #define PERF_REG_SP PERF_REG_X86_SP -static inline const char *perf_reg_name(int id) +static inline const char *__perf_reg_name(int id) { switch (id) { case PERF_REG_X86_AX: diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index a45499126184..eeac181ebccf 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -33,6 +33,13 @@ extern const struct sample_reg sample_reg_masks[]; int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); +static inline const char *perf_reg_name(int id) +{ + const char *reg_name = __perf_reg_name(id); + + return reg_name ?: "unknown"; +} + #else #define PERF_REGS_MASK 0 #define PERF_REGS_MAX 0 From 77b77d45a4b125f7fbe38fb4bb8d679963f91724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Li=C5=A1ka?= Date: Thu, 11 Feb 2021 13:37:55 +0100 Subject: [PATCH 084/105] perf annotate: Fix jump parsing for C++ code. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 1f0e6edcd968ff19211245f7da6039e983aa51e5 upstream. Considering the following testcase: int foo(int a, int b) { for (unsigned i = 0; i < 1000000000; i++) a += b; return a; } int main() { foo (3, 4); return 0; } 'perf annotate' displays: 86.52 │40055e: → ja 40056c 13.37 │400560: mov -0x18(%rbp),%eax │400563: add %eax,-0x14(%rbp) │400566: addl $0x1,-0x4(%rbp) 0.11 │40056a: → jmp 400557 │40056c: mov -0x14(%rbp),%eax │40056f: pop %rbp and the 'ja 40056c' does not link to the location in the function. It's caused by fact that comma is wrongly parsed, it's part of function signature. With my patch I see: 86.52 │ ┌──ja 26 13.37 │ │ mov -0x18(%rbp),%eax │ │ add %eax,-0x14(%rbp) │ │ addl $0x1,-0x4(%rbp) 0.11 │ │↑ jmp 11 │26:└─→mov -0x14(%rbp),%eax and 'o' output prints: 86.52 │4005┌── ↓ ja 40056c 13.37 │4005│0: mov -0x18(%rbp),%eax │4005│3: add %eax,-0x14(%rbp) │4005│6: addl $0x1,-0x4(%rbp) 0.11 │4005│a: ↑ jmp 400557 │4005└─→ mov -0x14(%rbp),%eax On the contrary, compiling the very same file with gcc -x c, the parsing is fine because function arguments are not displayed: jmp 400543 Committer testing: Before: $ cat cpp_args_annotate.c int foo(int a, int b) { for (unsigned i = 0; i < 1000000000; i++) a += b; return a; } int main() { foo (3, 4); return 0; } $ gcc --version |& head -1 gcc (GCC) 10.2.1 20201125 (Red Hat 10.2.1-9) $ gcc -g cpp_args_annotate.c -o cpp_args_annotate $ perf record ./cpp_args_annotate [ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.275 MB perf.data (7188 samples) ] $ perf annotate --stdio2 foo Samples: 7K of event 'cycles:u', 4000 Hz, Event count (approx.): 7468429289, [percent: local period] foo() /home/acme/c/cpp_args_annotate Percent 0000000000401106 : foo(): int foo(int a, int b) { push %rbp mov %rsp,%rbp mov %edi,-0x14(%rbp) mov %esi,-0x18(%rbp) for (unsigned i = 0; i < 1000000000; i++) movl $0x0,-0x4(%rbp) ↓ jmp 1d a += b; 13.45 13: mov -0x18(%rbp),%eax add %eax,-0x14(%rbp) for (unsigned i = 0; i < 1000000000; i++) addl $0x1,-0x4(%rbp) 0.09 1d: cmpl $0x3b9ac9ff,-0x4(%rbp) 86.46 ↑ jbe 13 return a; mov -0x14(%rbp),%eax } pop %rbp ← retq $ I.e. works for C, now lets switch to C++: $ g++ -g cpp_args_annotate.c -o cpp_args_annotate $ perf record ./cpp_args_annotate [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.268 MB perf.data (6976 samples) ] $ perf annotate --stdio2 foo Samples: 6K of event 'cycles:u', 4000 Hz, Event count (approx.): 7380681761, [percent: local period] foo() /home/acme/c/cpp_args_annotate Percent 0000000000401106 : foo(int, int): int foo(int a, int b) { push %rbp mov %rsp,%rbp mov %edi,-0x14(%rbp) mov %esi,-0x18(%rbp) for (unsigned i = 0; i < 1000000000; i++) movl $0x0,-0x4(%rbp) cmpl $0x3b9ac9ff,-0x4(%rbp) 86.53 → ja 40112c a += b; 13.32 mov -0x18(%rbp),%eax 0.00 add %eax,-0x14(%rbp) for (unsigned i = 0; i < 1000000000; i++) addl $0x1,-0x4(%rbp) 0.15 → jmp 401117 return a; mov -0x14(%rbp),%eax } pop %rbp ← retq $ Reproduced. Now with this patch: Reusing the C++ built binary, as we can see here: $ readelf -wi cpp_args_annotate | grep producer DW_AT_producer : (indirect string, offset: 0x2e): GNU C++14 10.2.1 20201125 (Red Hat 10.2.1-9) -mtune=generic -march=x86-64 -g $ And furthermore: $ file cpp_args_annotate cpp_args_annotate: ELF 64-bit LSB executable, x86-64, version 1 (SYSV), dynamically linked, interpreter /lib64/ld-linux-x86-64.so.2, BuildID[sha1]=4fe3cab260204765605ec630d0dc7a7e93c361a9, for GNU/Linux 3.2.0, with debug_info, not stripped $ perf buildid-list -i cpp_args_annotate 4fe3cab260204765605ec630d0dc7a7e93c361a9 $ perf buildid-list | grep cpp_args_annotate 4fe3cab260204765605ec630d0dc7a7e93c361a9 /home/acme/c/cpp_args_annotate $ It now works: $ perf annotate --stdio2 foo Samples: 6K of event 'cycles:u', 4000 Hz, Event count (approx.): 7380681761, [percent: local period] foo() /home/acme/c/cpp_args_annotate Percent 0000000000401106 : foo(int, int): int foo(int a, int b) { push %rbp mov %rsp,%rbp mov %edi,-0x14(%rbp) mov %esi,-0x18(%rbp) for (unsigned i = 0; i < 1000000000; i++) movl $0x0,-0x4(%rbp) 11: cmpl $0x3b9ac9ff,-0x4(%rbp) 86.53 ↓ ja 26 a += b; 13.32 mov -0x18(%rbp),%eax 0.00 add %eax,-0x14(%rbp) for (unsigned i = 0; i < 1000000000; i++) addl $0x1,-0x4(%rbp) 0.15 ↑ jmp 11 return a; 26: mov -0x14(%rbp),%eax } pop %rbp ← retq $ Signed-off-by: Martin Liška Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Slaby Link: http://lore.kernel.org/lkml/13e1a405-edf9-e4c2-4327-a9b454353730@suse.cz Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Hanjun Guo Signed-off-by: Greg Kroah-Hartman --- tools/perf/util/annotate.c | 8 ++++++++ tools/perf/util/annotate.h | 1 + 2 files changed, 9 insertions(+) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 6c8575e182ed..308189454788 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -317,12 +317,18 @@ bool ins__is_call(const struct ins *ins) /* * Prevents from matching commas in the comment section, e.g.: * ffff200008446e70: b.cs ffff2000084470f4 // b.hs, b.nlast + * + * and skip comma as part of function arguments, e.g.: + * 1d8b4ac */ static inline const char *validate_comma(const char *c, struct ins_operands *ops) { if (ops->raw_comment && c > ops->raw_comment) return NULL; + if (ops->raw_func_start && c > ops->raw_func_start) + return NULL; + return c; } @@ -337,6 +343,8 @@ static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_s u64 start, end; ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char); + ops->raw_func_start = strchr(ops->raw, '<'); + c = validate_comma(c, ops); /* diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 0a0cd4f32175..096cdaf21b01 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -32,6 +32,7 @@ struct ins { struct ins_operands { char *raw; char *raw_comment; + char *raw_func_start; struct { char *raw; char *name; From d3c38d8549c0844e48d1cca2885568833bc94300 Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Tue, 1 Dec 2020 04:28:00 -0500 Subject: [PATCH 085/105] powerpc/perf: Invoke per-CPU variable access with disabled interrupts commit f66de7ac4849eb42a7b18e26b8ee49e08130fd27 upstream. The power_pmu_event_init() callback access per-cpu variable (cpu_hw_events) to check for event constraints and Branch Stack (BHRB). Current usage is to disable preemption when accessing the per-cpu variable, but this does not prevent timer callback from interrupting event_init. Fix this by using 'local_irq_save/restore' to make sure the code path is invoked with disabled interrupts. This change is tested in mambo simulator to ensure that, if a timer interrupt comes in during the per-cpu access in event_init, it will be soft masked and replayed later. For testing purpose, introduced a udelay() in power_pmu_event_init() to make sure a timer interrupt arrives while in per-cpu variable access code between local_irq_save/resore. As expected the timer interrupt was replayed later during local_irq_restore called from power_pmu_event_init. This was confirmed by adding breakpoint in mambo and checking the backtrace when timer_interrupt was hit. Reported-by: Sebastian Andrzej Siewior Signed-off-by: Athira Rajeev Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1606814880-1720-1-git-send-email-atrajeev@linux.vnet.ibm.com Signed-off-by: Hanjun Guo Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/perf/core-book3s.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index ded4a3efd3f0..91452313489f 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1884,7 +1884,7 @@ static bool is_event_blacklisted(u64 ev) static int power_pmu_event_init(struct perf_event *event) { u64 ev; - unsigned long flags; + unsigned long flags, irq_flags; struct perf_event *ctrs[MAX_HWEVENTS]; u64 events[MAX_HWEVENTS]; unsigned int cflags[MAX_HWEVENTS]; @@ -1992,7 +1992,9 @@ static int power_pmu_event_init(struct perf_event *event) if (check_excludes(ctrs, cflags, n, 1)) return -EINVAL; - cpuhw = &get_cpu_var(cpu_hw_events); + local_irq_save(irq_flags); + cpuhw = this_cpu_ptr(&cpu_hw_events); + err = power_check_constraints(cpuhw, events, cflags, n + 1); if (has_branch_stack(event)) { @@ -2003,13 +2005,13 @@ static int power_pmu_event_init(struct perf_event *event) event->attr.branch_sample_type); if (bhrb_filter == -1) { - put_cpu_var(cpu_hw_events); + local_irq_restore(irq_flags); return -EOPNOTSUPP; } cpuhw->bhrb_filter = bhrb_filter; } - put_cpu_var(cpu_hw_events); + local_irq_restore(irq_flags); if (err) return -EINVAL; From fdf66e5a7fc87d3213c21eef4472b71d54fdf736 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 13 Nov 2020 10:08:09 -0800 Subject: [PATCH 086/105] srcu: Provide internal interface to start a Tree SRCU grace period commit 29d2bb94a8a126ce80ffbb433b648b32fdea524e upstream. There is a need for a polling interface for SRCU grace periods. This polling needs to initiate an SRCU grace period without having to queue (and manage) a callback. This commit therefore splits the Tree SRCU __call_srcu() function into callback-initialization and queuing/start-grace-period portions, with the latter in a new function named srcu_gp_start_if_needed(). This function may be passed a NULL callback pointer, in which case it will refrain from queuing anything. Why have the new function mess with queuing? Locking considerations, of course! Link: https://lore.kernel.org/rcu/20201112201547.GF3365678@moria.home.lan/ Reported-by: Kent Overstreet Reviewed-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/srcutree.c | 66 ++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 29 deletions(-) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 68ceac387844..7b0b9338c704 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -808,6 +808,42 @@ static void srcu_leak_callback(struct rcu_head *rhp) { } +/* + * Start an SRCU grace period, and also queue the callback if non-NULL. + */ +static void srcu_gp_start_if_needed(struct srcu_struct *ssp, struct rcu_head *rhp, bool do_norm) +{ + unsigned long flags; + int idx; + bool needexp = false; + bool needgp = false; + unsigned long s; + struct srcu_data *sdp; + + idx = srcu_read_lock(ssp); + sdp = raw_cpu_ptr(ssp->sda); + spin_lock_irqsave_rcu_node(sdp, flags); + rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp); + rcu_segcblist_advance(&sdp->srcu_cblist, + rcu_seq_current(&ssp->srcu_gp_seq)); + s = rcu_seq_snap(&ssp->srcu_gp_seq); + (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s); + if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { + sdp->srcu_gp_seq_needed = s; + needgp = true; + } + if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) { + sdp->srcu_gp_seq_needed_exp = s; + needexp = true; + } + spin_unlock_irqrestore_rcu_node(sdp, flags); + if (needgp) + srcu_funnel_gp_start(ssp, sdp, s, do_norm); + else if (needexp) + srcu_funnel_exp_start(ssp, sdp->mynode, s); + srcu_read_unlock(ssp, idx); +} + /* * Enqueue an SRCU callback on the srcu_data structure associated with * the current CPU and the specified srcu_struct structure, initiating @@ -839,13 +875,6 @@ static void srcu_leak_callback(struct rcu_head *rhp) static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, rcu_callback_t func, bool do_norm) { - unsigned long flags; - int idx; - bool needexp = false; - bool needgp = false; - unsigned long s; - struct srcu_data *sdp; - check_init_srcu_struct(ssp); if (debug_rcu_head_queue(rhp)) { /* Probable double call_srcu(), so leak the callback. */ @@ -854,28 +883,7 @@ static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, return; } rhp->func = func; - idx = srcu_read_lock(ssp); - sdp = raw_cpu_ptr(ssp->sda); - spin_lock_irqsave_rcu_node(sdp, flags); - rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp); - rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&ssp->srcu_gp_seq)); - s = rcu_seq_snap(&ssp->srcu_gp_seq); - (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s); - if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { - sdp->srcu_gp_seq_needed = s; - needgp = true; - } - if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) { - sdp->srcu_gp_seq_needed_exp = s; - needexp = true; - } - spin_unlock_irqrestore_rcu_node(sdp, flags); - if (needgp) - srcu_funnel_gp_start(ssp, sdp, s, do_norm); - else if (needexp) - srcu_funnel_exp_start(ssp, sdp->mynode, s); - srcu_read_unlock(ssp, idx); + srcu_gp_start_if_needed(ssp, rhp, do_norm); } /** From f789de3be808a0492a09cd7ad44cb017203572c6 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 13 Nov 2020 17:31:55 -0800 Subject: [PATCH 087/105] srcu: Provide polling interfaces for Tree SRCU grace periods commit 5358c9fa54b09b5d3d7811b033aa0838c1bbaaf2 upstream. There is a need for a polling interface for SRCU grace periods, so this commit supplies get_state_synchronize_srcu(), start_poll_synchronize_srcu(), and poll_state_synchronize_srcu() for this purpose. The first can be used if future grace periods are inevitable (perhaps due to a later call_srcu() invocation), the second if future grace periods might not otherwise happen, and the third to check if a grace period has elapsed since the corresponding call to either of the first two. As with get_state_synchronize_rcu() and cond_synchronize_rcu(), the return value from either get_state_synchronize_srcu() or start_poll_synchronize_srcu() must be passed in to a later call to poll_state_synchronize_srcu(). Link: https://lore.kernel.org/rcu/20201112201547.GF3365678@moria.home.lan/ Reported-by: Kent Overstreet [ paulmck: Add EXPORT_SYMBOL_GPL() per kernel test robot feedback. ] [ paulmck: Apply feedback from Neeraj Upadhyay. ] Link: https://lore.kernel.org/lkml/20201117004017.GA7444@paulmck-ThinkPad-P72/ Reviewed-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/srcutree.c | 67 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 4 deletions(-) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 7b0b9338c704..b8821665c435 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -811,7 +811,8 @@ static void srcu_leak_callback(struct rcu_head *rhp) /* * Start an SRCU grace period, and also queue the callback if non-NULL. */ -static void srcu_gp_start_if_needed(struct srcu_struct *ssp, struct rcu_head *rhp, bool do_norm) +static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, + struct rcu_head *rhp, bool do_norm) { unsigned long flags; int idx; @@ -820,10 +821,12 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp, struct rcu_head *rh unsigned long s; struct srcu_data *sdp; + check_init_srcu_struct(ssp); idx = srcu_read_lock(ssp); sdp = raw_cpu_ptr(ssp->sda); spin_lock_irqsave_rcu_node(sdp, flags); - rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp); + if (rhp) + rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp); rcu_segcblist_advance(&sdp->srcu_cblist, rcu_seq_current(&ssp->srcu_gp_seq)); s = rcu_seq_snap(&ssp->srcu_gp_seq); @@ -842,6 +845,7 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp, struct rcu_head *rh else if (needexp) srcu_funnel_exp_start(ssp, sdp->mynode, s); srcu_read_unlock(ssp, idx); + return s; } /* @@ -875,7 +879,6 @@ static void srcu_gp_start_if_needed(struct srcu_struct *ssp, struct rcu_head *rh static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, rcu_callback_t func, bool do_norm) { - check_init_srcu_struct(ssp); if (debug_rcu_head_queue(rhp)) { /* Probable double call_srcu(), so leak the callback. */ WRITE_ONCE(rhp->func, srcu_leak_callback); @@ -883,7 +886,7 @@ static void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, return; } rhp->func = func; - srcu_gp_start_if_needed(ssp, rhp, do_norm); + (void)srcu_gp_start_if_needed(ssp, rhp, do_norm); } /** @@ -1012,6 +1015,62 @@ void synchronize_srcu(struct srcu_struct *ssp) } EXPORT_SYMBOL_GPL(synchronize_srcu); +/** + * get_state_synchronize_srcu - Provide an end-of-grace-period cookie + * @ssp: srcu_struct to provide cookie for. + * + * This function returns a cookie that can be passed to + * poll_state_synchronize_srcu(), which will return true if a full grace + * period has elapsed in the meantime. It is the caller's responsibility + * to make sure that grace period happens, for example, by invoking + * call_srcu() after return from get_state_synchronize_srcu(). + */ +unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp) +{ + // Any prior manipulation of SRCU-protected data must happen + // before the load from ->srcu_gp_seq. + smp_mb(); + return rcu_seq_snap(&ssp->srcu_gp_seq); +} +EXPORT_SYMBOL_GPL(get_state_synchronize_srcu); + +/** + * start_poll_synchronize_srcu - Provide cookie and start grace period + * @ssp: srcu_struct to provide cookie for. + * + * This function returns a cookie that can be passed to + * poll_state_synchronize_srcu(), which will return true if a full grace + * period has elapsed in the meantime. Unlike get_state_synchronize_srcu(), + * this function also ensures that any needed SRCU grace period will be + * started. This convenience does come at a cost in terms of CPU overhead. + */ +unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp) +{ + return srcu_gp_start_if_needed(ssp, NULL, true); +} +EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu); + +/** + * poll_state_synchronize_srcu - Has cookie's grace period ended? + * @ssp: srcu_struct to provide cookie for. + * @cookie: Return value from get_state_synchronize_srcu() or start_poll_synchronize_srcu(). + * + * This function takes the cookie that was returned from either + * get_state_synchronize_srcu() or start_poll_synchronize_srcu(), and + * returns @true if an SRCU grace period elapsed since the time that the + * cookie was created. + */ +bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) +{ + if (!rcu_seq_done(&ssp->srcu_gp_seq, cookie)) + return false; + // Ensure that the end of the SRCU grace period happens before + // any subsequent code that the caller might execute. + smp_mb(); // ^^^ + return true; +} +EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu); + /* * Callback function for srcu_barrier() use. */ From 641e1d88404a57983c75cc39b9feb076f5cdeac8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 13 Nov 2020 09:37:39 -0800 Subject: [PATCH 088/105] srcu: Provide internal interface to start a Tiny SRCU grace period commit 1a893c711a600ab57526619b56e6f6b7be00956e upstream. There is a need for a polling interface for SRCU grace periods. This polling needs to initiate an SRCU grace period without having to queue (and manage) a callback. This commit therefore splits the Tiny SRCU call_srcu() function into callback-queuing and start-grace-period portions, with the latter in a new function named srcu_gp_start_if_needed(). Link: https://lore.kernel.org/rcu/20201112201547.GF3365678@moria.home.lan/ Reported-by: Kent Overstreet Reviewed-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney Signed-off-by: Greg Kroah-Hartman --- kernel/rcu/srcutiny.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 6208c1dae5c9..ee25b365c3ea 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -151,6 +151,16 @@ void srcu_drive_gp(struct work_struct *wp) } EXPORT_SYMBOL_GPL(srcu_drive_gp); +static void srcu_gp_start_if_needed(struct srcu_struct *ssp) +{ + if (!READ_ONCE(ssp->srcu_gp_running)) { + if (likely(srcu_init_done)) + schedule_work(&ssp->srcu_work); + else if (list_empty(&ssp->srcu_work.entry)) + list_add(&ssp->srcu_work.entry, &srcu_boot_list); + } +} + /* * Enqueue an SRCU callback on the specified srcu_struct structure, * initiating grace-period processing if it is not already running. @@ -166,12 +176,7 @@ void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, *ssp->srcu_cb_tail = rhp; ssp->srcu_cb_tail = &rhp->next; local_irq_restore(flags); - if (!READ_ONCE(ssp->srcu_gp_running)) { - if (likely(srcu_init_done)) - schedule_work(&ssp->srcu_work); - else if (list_empty(&ssp->srcu_work.entry)) - list_add(&ssp->srcu_work.entry, &srcu_boot_list); - } + srcu_gp_start_if_needed(ssp); } EXPORT_SYMBOL_GPL(call_srcu); From 450948b06ce8ba3e22df094ef324dbf5fa52f050 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 12 Nov 2020 16:34:09 -0800 Subject: [PATCH 089/105] srcu: Make Tiny SRCU use multi-bit grace-period counter commit 74612a07b83fc46c2b2e6f71a541d55b024ebefc upstream. There is a need for a polling interface for SRCU grace periods. This polling needs to distinguish between an SRCU instance being idle on the one hand or in the middle of a grace period on the other. This commit therefore converts the Tiny SRCU srcu_struct structure's srcu_idx from a defacto boolean to a free-running counter, using the bottom bit to indicate that a grace period is in progress. The second-from-bottom bit is thus used as the index returned by srcu_read_lock(). Link: https://lore.kernel.org/rcu/20201112201547.GF3365678@moria.home.lan/ Reported-by: Kent Overstreet [ paulmck: Fix ->srcu_lock_nesting[] indexing per Neeraj Upadhyay. ] Reviewed-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney Signed-off-by: Greg Kroah-Hartman --- include/linux/srcutiny.h | 6 +++--- kernel/rcu/srcutiny.c | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 5a5a1941ca15..b8b42d0a24f1 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -15,7 +15,7 @@ struct srcu_struct { short srcu_lock_nesting[2]; /* srcu_read_lock() nesting depth. */ - short srcu_idx; /* Current reader array element. */ + unsigned short srcu_idx; /* Current reader array element in bit 0x2. */ u8 srcu_gp_running; /* GP workqueue running? */ u8 srcu_gp_waiting; /* GP waiting for readers? */ struct swait_queue_head srcu_wq; @@ -59,7 +59,7 @@ static inline int __srcu_read_lock(struct srcu_struct *ssp) { int idx; - idx = READ_ONCE(ssp->srcu_idx); + idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1; WRITE_ONCE(ssp->srcu_lock_nesting[idx], ssp->srcu_lock_nesting[idx] + 1); return idx; } @@ -80,7 +80,7 @@ static inline void srcu_torture_stats_print(struct srcu_struct *ssp, { int idx; - idx = READ_ONCE(ssp->srcu_idx) & 0x1; + idx = ((READ_ONCE(ssp->srcu_idx) + 1) & 0x2) >> 1; pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n", tt, tf, idx, READ_ONCE(ssp->srcu_lock_nesting[!idx]), diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index ee25b365c3ea..3bac1db85a85 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -124,11 +124,12 @@ void srcu_drive_gp(struct work_struct *wp) ssp->srcu_cb_head = NULL; ssp->srcu_cb_tail = &ssp->srcu_cb_head; local_irq_enable(); - idx = ssp->srcu_idx; - WRITE_ONCE(ssp->srcu_idx, !ssp->srcu_idx); + idx = (ssp->srcu_idx & 0x2) / 2; + WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); WRITE_ONCE(ssp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */ swait_event_exclusive(ssp->srcu_wq, !READ_ONCE(ssp->srcu_lock_nesting[idx])); WRITE_ONCE(ssp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ + WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); /* Invoke the callbacks we removed above. */ while (lh) { From b6ae3854075e67a2764e30447f8603ef964aecc5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 13 Nov 2020 12:54:48 -0800 Subject: [PATCH 090/105] srcu: Provide polling interfaces for Tiny SRCU grace periods commit 8b5bd67cf6422b63ee100d76d8de8960ca2df7f0 upstream. There is a need for a polling interface for SRCU grace periods, so this commit supplies get_state_synchronize_srcu(), start_poll_synchronize_srcu(), and poll_state_synchronize_srcu() for this purpose. The first can be used if future grace periods are inevitable (perhaps due to a later call_srcu() invocation), the second if future grace periods might not otherwise happen, and the third to check if a grace period has elapsed since the corresponding call to either of the first two. As with get_state_synchronize_rcu() and cond_synchronize_rcu(), the return value from either get_state_synchronize_srcu() or start_poll_synchronize_srcu() must be passed in to a later call to poll_state_synchronize_srcu(). Link: https://lore.kernel.org/rcu/20201112201547.GF3365678@moria.home.lan/ Reported-by: Kent Overstreet [ paulmck: Add EXPORT_SYMBOL_GPL() per kernel test robot feedback. ] [ paulmck: Apply feedback from Neeraj Upadhyay. ] Link: https://lore.kernel.org/lkml/20201117004017.GA7444@paulmck-ThinkPad-P72/ Reviewed-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney Signed-off-by: Greg Kroah-Hartman --- include/linux/rcupdate.h | 2 ++ include/linux/srcu.h | 3 +++ include/linux/srcutiny.h | 1 + kernel/rcu/srcutiny.c | 55 ++++++++++++++++++++++++++++++++++++++-- 4 files changed, 59 insertions(+), 2 deletions(-) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 7d12c76e8fa4..0d7013da818c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -33,6 +33,8 @@ #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) #define ulong2long(a) (*(long *)(&(a))) +#define USHORT_CMP_GE(a, b) (USHRT_MAX / 2 >= (unsigned short)((a) - (b))) +#define USHORT_CMP_LT(a, b) (USHRT_MAX / 2 < (unsigned short)((a) - (b))) /* Exported common interfaces */ void call_rcu(struct rcu_head *head, rcu_callback_t func); diff --git a/include/linux/srcu.h b/include/linux/srcu.h index e432cc92c73d..a0895bbf71ce 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -60,6 +60,9 @@ void cleanup_srcu_struct(struct srcu_struct *ssp); int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); void synchronize_srcu(struct srcu_struct *ssp); +unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp); +unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp); +bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index b8b42d0a24f1..0e0cf4d6a72a 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -16,6 +16,7 @@ struct srcu_struct { short srcu_lock_nesting[2]; /* srcu_read_lock() nesting depth. */ unsigned short srcu_idx; /* Current reader array element in bit 0x2. */ + unsigned short srcu_idx_max; /* Furthest future srcu_idx request. */ u8 srcu_gp_running; /* GP workqueue running? */ u8 srcu_gp_waiting; /* GP waiting for readers? */ struct swait_queue_head srcu_wq; diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 3bac1db85a85..26344dc6483b 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -34,6 +34,7 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp) ssp->srcu_gp_running = false; ssp->srcu_gp_waiting = false; ssp->srcu_idx = 0; + ssp->srcu_idx_max = 0; INIT_WORK(&ssp->srcu_work, srcu_drive_gp); INIT_LIST_HEAD(&ssp->srcu_work.entry); return 0; @@ -84,6 +85,8 @@ void cleanup_srcu_struct(struct srcu_struct *ssp) WARN_ON(ssp->srcu_gp_waiting); WARN_ON(ssp->srcu_cb_head); WARN_ON(&ssp->srcu_cb_head != ssp->srcu_cb_tail); + WARN_ON(ssp->srcu_idx != ssp->srcu_idx_max); + WARN_ON(ssp->srcu_idx & 0x1); } EXPORT_SYMBOL_GPL(cleanup_srcu_struct); @@ -114,7 +117,7 @@ void srcu_drive_gp(struct work_struct *wp) struct srcu_struct *ssp; ssp = container_of(wp, struct srcu_struct, srcu_work); - if (ssp->srcu_gp_running || !READ_ONCE(ssp->srcu_cb_head)) + if (ssp->srcu_gp_running || USHORT_CMP_GE(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) return; /* Already running or nothing to do. */ /* Remove recently arrived callbacks and wait for readers. */ @@ -147,13 +150,19 @@ void srcu_drive_gp(struct work_struct *wp) * straighten that out. */ WRITE_ONCE(ssp->srcu_gp_running, false); - if (READ_ONCE(ssp->srcu_cb_head)) + if (USHORT_CMP_LT(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) schedule_work(&ssp->srcu_work); } EXPORT_SYMBOL_GPL(srcu_drive_gp); static void srcu_gp_start_if_needed(struct srcu_struct *ssp) { + unsigned short cookie; + + cookie = get_state_synchronize_srcu(ssp); + if (USHORT_CMP_GE(READ_ONCE(ssp->srcu_idx_max), cookie)) + return; + WRITE_ONCE(ssp->srcu_idx_max, cookie); if (!READ_ONCE(ssp->srcu_gp_running)) { if (likely(srcu_init_done)) schedule_work(&ssp->srcu_work); @@ -196,6 +205,48 @@ void synchronize_srcu(struct srcu_struct *ssp) } EXPORT_SYMBOL_GPL(synchronize_srcu); +/* + * get_state_synchronize_srcu - Provide an end-of-grace-period cookie + */ +unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp) +{ + unsigned long ret; + + barrier(); + ret = (READ_ONCE(ssp->srcu_idx) + 3) & ~0x1; + barrier(); + return ret & USHRT_MAX; +} +EXPORT_SYMBOL_GPL(get_state_synchronize_srcu); + +/* + * start_poll_synchronize_srcu - Provide cookie and start grace period + * + * The difference between this and get_state_synchronize_srcu() is that + * this function ensures that the poll_state_synchronize_srcu() will + * eventually return the value true. + */ +unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp) +{ + unsigned long ret = get_state_synchronize_srcu(ssp); + + srcu_gp_start_if_needed(ssp); + return ret; +} +EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu); + +/* + * poll_state_synchronize_srcu - Has cookie's grace period ended? + */ +bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) +{ + bool ret = USHORT_CMP_GE(READ_ONCE(ssp->srcu_idx), cookie); + + barrier(); + return ret; +} +EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu); + /* Lockdep diagnostics. */ void __init rcu_scheduler_starting(void) { From 9a4f1dc8a17c8606ab0506cce63e554253677a53 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 5 Aug 2021 15:29:54 -0400 Subject: [PATCH 091/105] tracepoint: Use rcu get state and cond sync for static call updates commit 7b40066c97ec66a44e388f82fcf694987451768f upstream. State transitions from 1->0->1 and N->2->1 callbacks require RCU synchronization. Rather than performing the RCU synchronization every time the state change occurs, which is quite slow when many tracepoints are registered in batch, instead keep a snapshot of the RCU state on the most recent transitions which belong to a chain, and conditionally wait for a grace period on the last transition of the chain if one g.p. has not elapsed since the last snapshot. This applies to both RCU and SRCU. This brings the performance regression caused by commit 231264d6927f ("Fix: tracepoint: static call function vs data state mismatch") back to what it was originally. Before this commit: # trace-cmd start -e all # time trace-cmd start -p nop real 0m10.593s user 0m0.017s sys 0m0.259s After this commit: # trace-cmd start -e all # time trace-cmd start -p nop real 0m0.878s user 0m0.000s sys 0m0.103s Link: https://lkml.kernel.org/r/20210805192954.30688-1-mathieu.desnoyers@efficios.com Link: https://lore.kernel.org/io-uring/4ebea8f0-58c9-e571-fd30-0ce4f6f09c70@samba.org/ Cc: stable@vger.kernel.org Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andrew Morton Cc: "Paul E. McKenney" Cc: Stefan Metzmacher Fixes: 231264d6927f ("Fix: tracepoint: static call function vs data state mismatch") Signed-off-by: Mathieu Desnoyers Reviewed-by: Paul E. McKenney Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Greg Kroah-Hartman --- kernel/tracepoint.c | 81 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 14 deletions(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index d7260f6614a6..2dff7f1a27ec 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -28,6 +28,44 @@ extern tracepoint_ptr_t __stop___tracepoints_ptrs[]; DEFINE_SRCU(tracepoint_srcu); EXPORT_SYMBOL_GPL(tracepoint_srcu); +enum tp_transition_sync { + TP_TRANSITION_SYNC_1_0_1, + TP_TRANSITION_SYNC_N_2_1, + + _NR_TP_TRANSITION_SYNC, +}; + +struct tp_transition_snapshot { + unsigned long rcu; + unsigned long srcu; + bool ongoing; +}; + +/* Protected by tracepoints_mutex */ +static struct tp_transition_snapshot tp_transition_snapshot[_NR_TP_TRANSITION_SYNC]; + +static void tp_rcu_get_state(enum tp_transition_sync sync) +{ + struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync]; + + /* Keep the latest get_state snapshot. */ + snapshot->rcu = get_state_synchronize_rcu(); + snapshot->srcu = start_poll_synchronize_srcu(&tracepoint_srcu); + snapshot->ongoing = true; +} + +static void tp_rcu_cond_sync(enum tp_transition_sync sync) +{ + struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync]; + + if (!snapshot->ongoing) + return; + cond_synchronize_rcu(snapshot->rcu); + if (!poll_state_synchronize_srcu(&tracepoint_srcu, snapshot->srcu)) + synchronize_srcu(&tracepoint_srcu); + snapshot->ongoing = false; +} + /* Set to 1 to enable tracepoint debug output */ static const int tracepoint_debug; @@ -332,6 +370,11 @@ static int tracepoint_add_func(struct tracepoint *tp, */ switch (nr_func_state(tp_funcs)) { case TP_FUNC_1: /* 0->1 */ + /* + * Make sure new static func never uses old data after a + * 1->0->1 transition sequence. + */ + tp_rcu_cond_sync(TP_TRANSITION_SYNC_1_0_1); /* Set static call to first function */ tracepoint_update_call(tp, tp_funcs); /* Both iterator and static call handle NULL tp->funcs */ @@ -346,10 +389,15 @@ static int tracepoint_add_func(struct tracepoint *tp, * Requires ordering between RCU assign/dereference and * static call update/call. */ - rcu_assign_pointer(tp->funcs, tp_funcs); - break; + fallthrough; case TP_FUNC_N: /* N->N+1 (N>1) */ rcu_assign_pointer(tp->funcs, tp_funcs); + /* + * Make sure static func never uses incorrect data after a + * N->...->2->1 (N>1) transition sequence. + */ + if (tp_funcs[0].data != old[0].data) + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); break; default: WARN_ON_ONCE(1); @@ -393,24 +441,23 @@ static int tracepoint_remove_func(struct tracepoint *tp, /* Both iterator and static call handle NULL tp->funcs */ rcu_assign_pointer(tp->funcs, NULL); /* - * Make sure new func never uses old data after a 1->0->1 - * transition sequence. - * Considering that transition 0->1 is the common case - * and don't have rcu-sync, issue rcu-sync after - * transition 1->0 to break that sequence by waiting for - * readers to be quiescent. + * Make sure new static func never uses old data after a + * 1->0->1 transition sequence. */ - tracepoint_synchronize_unregister(); + tp_rcu_get_state(TP_TRANSITION_SYNC_1_0_1); break; case TP_FUNC_1: /* 2->1 */ rcu_assign_pointer(tp->funcs, tp_funcs); /* - * On 2->1 transition, RCU sync is needed before setting - * static call to first callback, because the observer - * may have loaded any prior tp->funcs after the last one - * associated with an rcu-sync. + * Make sure static func never uses incorrect data after a + * N->...->2->1 (N>2) transition sequence. If the first + * element's data has changed, then force the synchronization + * to prevent current readers that have loaded the old data + * from calling the new function. */ - tracepoint_synchronize_unregister(); + if (tp_funcs[0].data != old[0].data) + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); + tp_rcu_cond_sync(TP_TRANSITION_SYNC_N_2_1); /* Set static call to first function */ tracepoint_update_call(tp, tp_funcs); break; @@ -418,6 +465,12 @@ static int tracepoint_remove_func(struct tracepoint *tp, fallthrough; case TP_FUNC_N: rcu_assign_pointer(tp->funcs, tp_funcs); + /* + * Make sure static func never uses incorrect data after a + * N->...->2->1 (N>2) transition sequence. + */ + if (tp_funcs[0].data != old[0].data) + tp_rcu_get_state(TP_TRANSITION_SYNC_N_2_1); break; default: WARN_ON_ONCE(1); From 08953884aad457dca2670b94e10077a506f3be3d Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Fri, 9 Oct 2020 16:40:46 +0200 Subject: [PATCH 092/105] usb: typec: ucsi: acpi: Always decode connector change information commit 47ea2929d58c35598e681212311d35b240c373ce upstream. Normal commands may be reporting that a connector has changed. Always call the usci_connector_change handler and let it take care of scheduling the work when needed. Doing this makes the ACPI code path identical to the CCG one. Cc: Hans de Goede Cc: Heikki Krogerus Acked-by: Heikki Krogerus Signed-off-by: Benjamin Berg Link: https://lore.kernel.org/r/20201009144047.505957-2-benjamin@sipsolutions.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi_acpi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c index fbfe8f5933af..04976435ad73 100644 --- a/drivers/usb/typec/ucsi/ucsi_acpi.c +++ b/drivers/usb/typec/ucsi/ucsi_acpi.c @@ -103,11 +103,12 @@ static void ucsi_acpi_notify(acpi_handle handle, u32 event, void *data) if (ret) return; + if (UCSI_CCI_CONNECTOR(cci)) + ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci)); + if (test_bit(COMMAND_PENDING, &ua->flags) && cci & (UCSI_CCI_ACK_COMPLETE | UCSI_CCI_COMMAND_COMPLETE)) complete(&ua->complete); - else if (UCSI_CCI_CONNECTOR(cci)) - ucsi_connector_change(ua->ucsi, UCSI_CCI_CONNECTOR(cci)); } static int ucsi_acpi_probe(struct platform_device *pdev) From e15e32d519fa9e0fb157fafeedb603283fef26e8 Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Fri, 9 Oct 2020 16:40:47 +0200 Subject: [PATCH 093/105] usb: typec: ucsi: Work around PPM losing change information commit 217504a055325fe76ec1142aa15f14d3db77f94f upstream. Some/many PPMs are simply clearing the change bitfield when a notification on a port is acknowledge. Unfortunately, doing so means that any changes between the GET_CONNECTOR_STATUS and ACK_CC_CI commands is simply lost. Work around this by re-fetching the connector status afterwards. We can then infer any changes that we see have happened but that may not be respresented in the change bitfield. We end up with the following actions: 1. UCSI_GET_CONNECTOR_STATUS, store result, update unprocessed_changes 2. UCSI_GET_CAM_SUPPORTED, discard result 3. ACK connector change 4. UCSI_GET_CONNECTOR_STATUS, store result 5. Infere lost changes by comparing UCSI_GET_CONNECTOR_STATUS results 6. If PPM reported a new change, then restart in order to ACK 7. Process everything as usual. The worker is also changed to re-schedule itself if a new change notification happened while it was running. Doing this fixes quite commonly occurring issues where e.g. the UCSI power supply would remain online even thought the ThunderBolt cable was unplugged. Cc: Hans de Goede Cc: Heikki Krogerus Acked-by: Heikki Krogerus Signed-off-by: Benjamin Berg Link: https://lore.kernel.org/r/20201009144047.505957-3-benjamin@sipsolutions.net Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 125 ++++++++++++++++++++++++++++------ drivers/usb/typec/ucsi/ucsi.h | 2 + 2 files changed, 107 insertions(+), 20 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 310b5caeb05a..dff93fbd2dd6 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -53,7 +53,7 @@ static int ucsi_acknowledge_connector_change(struct ucsi *ucsi) ctrl = UCSI_ACK_CC_CI; ctrl |= UCSI_ACK_CONNECTOR_CHANGE; - return ucsi->ops->async_write(ucsi, UCSI_CONTROL, &ctrl, sizeof(ctrl)); + return ucsi->ops->sync_write(ucsi, UCSI_CONTROL, &ctrl, sizeof(ctrl)); } static int ucsi_exec_command(struct ucsi *ucsi, u64 command); @@ -648,20 +648,112 @@ static void ucsi_handle_connector_change(struct work_struct *work) struct ucsi_connector *con = container_of(work, struct ucsi_connector, work); struct ucsi *ucsi = con->ucsi; + struct ucsi_connector_status pre_ack_status; + struct ucsi_connector_status post_ack_status; enum typec_role role; + u16 inferred_changes; + u16 changed_flags; u64 command; int ret; mutex_lock(&con->lock); + /* + * Some/many PPMs have an issue where all fields in the change bitfield + * are cleared when an ACK is send. This will causes any change + * between GET_CONNECTOR_STATUS and ACK to be lost. + * + * We work around this by re-fetching the connector status afterwards. + * We then infer any changes that we see have happened but that may not + * be represented in the change bitfield. + * + * Also, even though we don't need to know the currently supported alt + * modes, we run the GET_CAM_SUPPORTED command to ensure the PPM does + * not get stuck in case it assumes we do. + * Always do this, rather than relying on UCSI_CONSTAT_CAM_CHANGE to be + * set in the change bitfield. + * + * We end up with the following actions: + * 1. UCSI_GET_CONNECTOR_STATUS, store result, update unprocessed_changes + * 2. UCSI_GET_CAM_SUPPORTED, discard result + * 3. ACK connector change + * 4. UCSI_GET_CONNECTOR_STATUS, store result + * 5. Infere lost changes by comparing UCSI_GET_CONNECTOR_STATUS results + * 6. If PPM reported a new change, then restart in order to ACK + * 7. Process everything as usual. + * + * We may end up seeing a change twice, but we can only miss extremely + * short transitional changes. + */ + + /* 1. First UCSI_GET_CONNECTOR_STATUS */ command = UCSI_GET_CONNECTOR_STATUS | UCSI_CONNECTOR_NUMBER(con->num); - ret = ucsi_send_command(ucsi, command, &con->status, - sizeof(con->status)); + ret = ucsi_send_command(ucsi, command, &pre_ack_status, + sizeof(pre_ack_status)); if (ret < 0) { dev_err(ucsi->dev, "%s: GET_CONNECTOR_STATUS failed (%d)\n", __func__, ret); goto out_unlock; } + con->unprocessed_changes |= pre_ack_status.change; + + /* 2. Run UCSI_GET_CAM_SUPPORTED and discard the result. */ + command = UCSI_GET_CAM_SUPPORTED; + command |= UCSI_CONNECTOR_NUMBER(con->num); + ucsi_send_command(con->ucsi, command, NULL, 0); + + /* 3. ACK connector change */ + clear_bit(EVENT_PENDING, &ucsi->flags); + ret = ucsi_acknowledge_connector_change(ucsi); + if (ret) { + dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret); + goto out_unlock; + } + + /* 4. Second UCSI_GET_CONNECTOR_STATUS */ + command = UCSI_GET_CONNECTOR_STATUS | UCSI_CONNECTOR_NUMBER(con->num); + ret = ucsi_send_command(ucsi, command, &post_ack_status, + sizeof(post_ack_status)); + if (ret < 0) { + dev_err(ucsi->dev, "%s: GET_CONNECTOR_STATUS failed (%d)\n", + __func__, ret); + goto out_unlock; + } + + /* 5. Inferre any missing changes */ + changed_flags = pre_ack_status.flags ^ post_ack_status.flags; + inferred_changes = 0; + if (UCSI_CONSTAT_PWR_OPMODE(changed_flags) != 0) + inferred_changes |= UCSI_CONSTAT_POWER_OPMODE_CHANGE; + + if (changed_flags & UCSI_CONSTAT_CONNECTED) + inferred_changes |= UCSI_CONSTAT_CONNECT_CHANGE; + + if (changed_flags & UCSI_CONSTAT_PWR_DIR) + inferred_changes |= UCSI_CONSTAT_POWER_DIR_CHANGE; + + if (UCSI_CONSTAT_PARTNER_FLAGS(changed_flags) != 0) + inferred_changes |= UCSI_CONSTAT_PARTNER_CHANGE; + + if (UCSI_CONSTAT_PARTNER_TYPE(changed_flags) != 0) + inferred_changes |= UCSI_CONSTAT_PARTNER_CHANGE; + + /* Mask out anything that was correctly notified in the later call. */ + inferred_changes &= ~post_ack_status.change; + if (inferred_changes) + dev_dbg(ucsi->dev, "%s: Inferred changes that would have been lost: 0x%04x\n", + __func__, inferred_changes); + + con->unprocessed_changes |= inferred_changes; + + /* 6. If PPM reported a new change, then restart in order to ACK */ + if (post_ack_status.change) + goto out_unlock; + + /* 7. Continue as if nothing happened */ + con->status = post_ack_status; + con->status.change = con->unprocessed_changes; + con->unprocessed_changes = 0; role = !!(con->status.flags & UCSI_CONSTAT_PWR_DIR); @@ -703,28 +795,19 @@ static void ucsi_handle_connector_change(struct work_struct *work) ucsi_port_psy_changed(con); } - if (con->status.change & UCSI_CONSTAT_CAM_CHANGE) { - /* - * We don't need to know the currently supported alt modes here. - * Running GET_CAM_SUPPORTED command just to make sure the PPM - * does not get stuck in case it assumes we do so. - */ - command = UCSI_GET_CAM_SUPPORTED; - command |= UCSI_CONNECTOR_NUMBER(con->num); - ucsi_send_command(con->ucsi, command, NULL, 0); - } - if (con->status.change & UCSI_CONSTAT_PARTNER_CHANGE) ucsi_partner_change(con); - ret = ucsi_acknowledge_connector_change(ucsi); - if (ret) - dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret); - trace_ucsi_connector_change(con->num, &con->status); out_unlock: - clear_bit(EVENT_PENDING, &ucsi->flags); + if (test_and_clear_bit(EVENT_PENDING, &ucsi->flags)) { + schedule_work(&con->work); + mutex_unlock(&con->lock); + return; + } + + clear_bit(EVENT_PROCESSING, &ucsi->flags); mutex_unlock(&con->lock); } @@ -742,7 +825,9 @@ void ucsi_connector_change(struct ucsi *ucsi, u8 num) return; } - if (!test_and_set_bit(EVENT_PENDING, &ucsi->flags)) + set_bit(EVENT_PENDING, &ucsi->flags); + + if (!test_and_set_bit(EVENT_PROCESSING, &ucsi->flags)) schedule_work(&con->work); } EXPORT_SYMBOL_GPL(ucsi_connector_change); diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 047e17c4b492..fce23ad16c6d 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -299,6 +299,7 @@ struct ucsi { #define EVENT_PENDING 0 #define COMMAND_PENDING 1 #define ACK_PENDING 2 +#define EVENT_PROCESSING 3 }; #define UCSI_MAX_SVID 5 @@ -324,6 +325,7 @@ struct ucsi_connector { struct typec_capability typec_cap; + u16 unprocessed_changes; struct ucsi_connector_status status; struct ucsi_connector_capability cap; struct power_supply *psy; From f8242f554c82ad7f91b73d3834e81335af9ad600 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Sat, 15 May 2021 21:09:53 -0700 Subject: [PATCH 094/105] usb: typec: ucsi: Clear pending after acking connector change commit 8c9b3caab3ac26db1da00b8117901640c55a69dd upstream. It's possible that the interrupt handler for the UCSI driver signals a connector changes after the handler clears the PENDING bit, but before it has sent the acknowledge request. The result is that the handler is invoked yet again, to ack the same connector change. At least some versions of the Qualcomm UCSI firmware will not handle the second - "spurious" - acknowledgment gracefully. So make sure to not clear the pending flag until the change is acknowledged. Any connector changes coming in after the acknowledgment, that would have the pending flag incorrectly cleared, would afaict be covered by the subsequent connector status check. Fixes: 217504a05532 ("usb: typec: ucsi: Work around PPM losing change information") Cc: stable Reviewed-by: Heikki Krogerus Acked-By: Benjamin Berg Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20210516040953.622409-1-bjorn.andersson@linaro.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index dff93fbd2dd6..3bfa8005ae65 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -703,8 +703,8 @@ static void ucsi_handle_connector_change(struct work_struct *work) ucsi_send_command(con->ucsi, command, NULL, 0); /* 3. ACK connector change */ - clear_bit(EVENT_PENDING, &ucsi->flags); ret = ucsi_acknowledge_connector_change(ucsi); + clear_bit(EVENT_PENDING, &ucsi->flags); if (ret) { dev_err(ucsi->dev, "%s: ACK failed (%d)", __func__, ret); goto out_unlock; From b6c657abb893ef456fad6b229089f37127202df0 Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Wed, 11 Aug 2021 17:50:43 +0800 Subject: [PATCH 095/105] net: dsa: mt7530: fix VLAN traffic leaks again commit 7428022b50d0fbb4846dd0f00639ea09d36dff02 upstream. When a port leaves a VLAN-aware bridge, the current code does not clear other ports' matrix field bit. If the bridge is later set to VLAN-unaware mode, traffic in the bridge may leak to that port. Remove the VLAN filtering check in mt7530_port_bridge_leave. Fixes: 474a2ddaa192 ("net: dsa: mt7530: fix VLAN traffic leaks") Fixes: 83163f7dca56 ("net: dsa: mediatek: add VLAN support for MT7530") Signed-off-by: DENG Qingfang Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/dsa/mt7530.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 3fa2f81c8b47..73de09093c35 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -1161,11 +1161,8 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port, /* Remove this port from the port matrix of the other ports * in the same bridge. If the port is disabled, port matrix * is kept and not being setup until the port becomes enabled. - * And the other port's port matrix cannot be broken when the - * other port is still a VLAN-aware port. */ - if (dsa_is_user_port(ds, i) && i != port && - !dsa_port_is_vlan_filtering(dsa_to_port(ds, i))) { + if (dsa_is_user_port(ds, i) && i != port) { if (dsa_to_port(ds, i)->bridge_dev != bridge) continue; if (priv->ports[i].enable) From f760c1101f5284acdf3a8132dff257838b9bf75c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 23 Jun 2021 13:39:33 -0700 Subject: [PATCH 096/105] lkdtm: Enable DOUBLE_FAULT on all architectures commit f123c42bbeff26bfe8bdb08a01307e92d51eec39 upstream Where feasible, I prefer to have all tests visible on all architectures, but to have them wired to XFAIL. DOUBLE_FAIL was set up to XFAIL, but wasn't actually being added to the test list. Fixes: cea23efb4de2 ("lkdtm/bugs: Make double-fault test always available") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20210623203936.3151093-7-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman [sudip: adjust context] Signed-off-by: Sudip Mukherjee Signed-off-by: Greg Kroah-Hartman --- drivers/misc/lkdtm/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c index 97803f213d9d..c802db9aaeb0 100644 --- a/drivers/misc/lkdtm/core.c +++ b/drivers/misc/lkdtm/core.c @@ -173,9 +173,7 @@ static const struct crashtype crashtypes[] = { CRASHTYPE(USERCOPY_KERNEL), CRASHTYPE(STACKLEAK_ERASING), CRASHTYPE(CFI_FORWARD_PROTO), -#ifdef CONFIG_X86_32 CRASHTYPE(DOUBLE_FAULT), -#endif }; From 1604c42a1ca93469dc5c726d7768fdc97105e87f Mon Sep 17 00:00:00 2001 From: Petr Vorel Date: Thu, 15 Apr 2021 21:39:13 +0200 Subject: [PATCH 097/105] arm64: dts: qcom: msm8994-angler: Fix gpio-reserved-ranges 85-88 commit f890f89d9a80fffbfa7ca791b78927e5b8aba869 upstream. Reserve GPIO pins 85-88 as these aren't meant to be accessible from the application CPUs (causes reboot). Yet another fix similar to 9134586715e3, 5f8d3ab136d0, which is needed to allow angler to boot after 3edfb7bd76bd ("gpiolib: Show correct direction from the beginning"). Fixes: feeaf56ac78d ("arm64: dts: msm8994 SoC and Huawei Angler (Nexus 6P) support") Signed-off-by: Petr Vorel Reviewed-by: Konrad Dybcio Link: https://lore.kernel.org/r/20210415193913.1836153-1-petr.vorel@gmail.com Signed-off-by: Bjorn Andersson Signed-off-by: Greg Kroah-Hartman --- arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts b/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts index baa55643b40f..ffe1a9bd8f70 100644 --- a/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts +++ b/arch/arm64/boot/dts/qcom/msm8994-angler-rev-101.dts @@ -32,3 +32,7 @@ }; }; }; + +&tlmm { + gpio-reserved-ranges = <85 4>; +}; From c43add24dffdbac269d5610465ced70cfc1bad9e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 6 Aug 2021 18:24:15 +0800 Subject: [PATCH 098/105] btrfs: fix NULL pointer dereference when deleting device by invalid id commit e4571b8c5e9ffa1e85c0c671995bd4dcc5c75091 upstream. [BUG] It's easy to trigger NULL pointer dereference, just by removing a non-existing device id: # mkfs.btrfs -f -m single -d single /dev/test/scratch1 \ /dev/test/scratch2 # mount /dev/test/scratch1 /mnt/btrfs # btrfs device remove 3 /mnt/btrfs Then we have the following kernel NULL pointer dereference: BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 9 PID: 649 Comm: btrfs Not tainted 5.14.0-rc3-custom+ #35 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:btrfs_rm_device+0x4de/0x6b0 [btrfs] btrfs_ioctl+0x18bb/0x3190 [btrfs] ? lock_is_held_type+0xa5/0x120 ? find_held_lock.constprop.0+0x2b/0x80 ? do_user_addr_fault+0x201/0x6a0 ? lock_release+0xd2/0x2d0 ? __x64_sys_ioctl+0x83/0xb0 __x64_sys_ioctl+0x83/0xb0 do_syscall_64+0x3b/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae [CAUSE] Commit a27a94c2b0c7 ("btrfs: Make btrfs_find_device_by_devspec return btrfs_device directly") moves the "missing" device path check into btrfs_rm_device(). But btrfs_rm_device() itself can have case where it only receives @devid, with NULL as @device_path. In that case, calling strcmp() on NULL will trigger the NULL pointer dereference. Before that commit, we handle the "missing" case inside btrfs_find_device_by_devspec(), which will not check @device_path at all if @devid is provided, thus no way to trigger the bug. [FIX] Before calling strcmp(), also make sure @device_path is not NULL. Fixes: a27a94c2b0c7 ("btrfs: Make btrfs_find_device_by_devspec return btrfs_device directly") CC: stable@vger.kernel.org # 5.4+ Reported-by: butt3rflyh4ck Reviewed-by: Anand Jain Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 920c84fae710..d1fccddcf403 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2059,7 +2059,7 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, if (IS_ERR(device)) { if (PTR_ERR(device) == -ENOENT && - strcmp(device_path, "missing") == 0) + device_path && strcmp(device_path, "missing") == 0) ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND; else ret = PTR_ERR(device); From 709c162ddc835613112bdd2db15b7d04e10c9bbf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 20 Apr 2021 10:18:17 +0200 Subject: [PATCH 099/105] kthread: Fix PF_KTHREAD vs to_kthread() race commit 3a7956e25e1d7b3c148569e78895e1f3178122a9 upstream. The kthread_is_per_cpu() construct relies on only being called on PF_KTHREAD tasks (per the WARN in to_kthread). This gives rise to the following usage pattern: if ((p->flags & PF_KTHREAD) && kthread_is_per_cpu(p)) However, as reported by syzcaller, this is broken. The scenario is: CPU0 CPU1 (running p) (p->flags & PF_KTHREAD) // true begin_new_exec() me->flags &= ~(PF_KTHREAD|...); kthread_is_per_cpu(p) to_kthread(p) WARN(!(p->flags & PF_KTHREAD) <-- *SPLAT* Introduce __to_kthread() that omits the WARN and is sure to check both values. Use this to remove the problematic pattern for kthread_is_per_cpu() and fix a number of other kthread_*() functions that have similar issues but are currently not used in ways that would expose the problem. Notably kthread_func() is only ever called on 'current', while kthread_probe_data() is only used for PF_WQ_WORKER, which implies the task is from kthread_create*(). Fixes: ac687e6e8c26 ("kthread: Extract KTHREAD_IS_PER_CPU") Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lkml.kernel.org/r/YH6WJc825C4P0FCK@hirez.programming.kicks-ass.net [ Drop the balance_push() hunk as it is not needed. ] Signed-off-by: Borislav Petkov Signed-off-by: Greg Kroah-Hartman --- kernel/kthread.c | 33 +++++++++++++++++++++++++++------ kernel/sched/fair.c | 2 +- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/kernel/kthread.c b/kernel/kthread.c index 9825cf89c614..508fe5278285 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -84,6 +84,25 @@ static inline struct kthread *to_kthread(struct task_struct *k) return (__force void *)k->set_child_tid; } +/* + * Variant of to_kthread() that doesn't assume @p is a kthread. + * + * Per construction; when: + * + * (p->flags & PF_KTHREAD) && p->set_child_tid + * + * the task is both a kthread and struct kthread is persistent. However + * PF_KTHREAD on it's own is not, kernel_thread() can exec() (See umh.c and + * begin_new_exec()). + */ +static inline struct kthread *__to_kthread(struct task_struct *p) +{ + void *kthread = (__force void *)p->set_child_tid; + if (kthread && !(p->flags & PF_KTHREAD)) + kthread = NULL; + return kthread; +} + void free_kthread_struct(struct task_struct *k) { struct kthread *kthread; @@ -168,8 +187,9 @@ EXPORT_SYMBOL_GPL(kthread_freezable_should_stop); */ void *kthread_func(struct task_struct *task) { - if (task->flags & PF_KTHREAD) - return to_kthread(task)->threadfn; + struct kthread *kthread = __to_kthread(task); + if (kthread) + return kthread->threadfn; return NULL; } EXPORT_SYMBOL_GPL(kthread_func); @@ -199,10 +219,11 @@ EXPORT_SYMBOL_GPL(kthread_data); */ void *kthread_probe_data(struct task_struct *task) { - struct kthread *kthread = to_kthread(task); + struct kthread *kthread = __to_kthread(task); void *data = NULL; - copy_from_kernel_nofault(&data, &kthread->data, sizeof(data)); + if (kthread) + copy_from_kernel_nofault(&data, &kthread->data, sizeof(data)); return data; } @@ -514,9 +535,9 @@ void kthread_set_per_cpu(struct task_struct *k, int cpu) set_bit(KTHREAD_IS_PER_CPU, &kthread->flags); } -bool kthread_is_per_cpu(struct task_struct *k) +bool kthread_is_per_cpu(struct task_struct *p) { - struct kthread *kthread = to_kthread(k); + struct kthread *kthread = __to_kthread(p); if (!kthread) return false; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 262b02d75007..bad97d35684d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7569,7 +7569,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) return 0; /* Disregard pcpu kthreads; they are where they need to be. */ - if ((p->flags & PF_KTHREAD) && kthread_is_per_cpu(p)) + if (kthread_is_per_cpu(p)) return 0; if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) { From 17982c664f8b2c9543241552331b008ad34d2648 Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Sat, 7 Aug 2021 10:37:02 +0300 Subject: [PATCH 100/105] Revert "floppy: reintroduce O_NDELAY fix" commit c7e9d0020361f4308a70cdfd6d5335e273eb8717 upstream. The patch breaks userspace implementations (e.g. fdutils) and introduces regressions in behaviour. Previously, it was possible to O_NDELAY open a floppy device with no media inserted or with write protected media without an error. Some userspace tools use this particular behavior for probing. It's not the first time when we revert this patch. Previous revert is in commit f2791e7eadf4 (Revert "floppy: refactor open() flags handling"). This reverts commit 8a0c014cd20516ade9654fc13b51345ec58e7be8. Link: https://lore.kernel.org/linux-block/de10cb47-34d1-5a88-7751-225ca380f735@compro.net/ Reported-by: Mark Hounschell Cc: Jiri Kosina Cc: Wim Osterholt Cc: Kurt Garloff Cc: Signed-off-by: Denis Efremov Signed-off-by: Greg Kroah-Hartman --- drivers/block/floppy.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 295da442329f..7df79ae6b0a1 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4120,23 +4120,23 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (fdc_state[FDC(drive)].rawcmd == 1) fdc_state[FDC(drive)].rawcmd = 2; - if (mode & (FMODE_READ|FMODE_WRITE)) { - drive_state[drive].last_checked = 0; - clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags); - if (bdev_check_media_change(bdev)) - floppy_revalidate(bdev->bd_disk); - if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags)) - goto out; - if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags)) + if (!(mode & FMODE_NDELAY)) { + if (mode & (FMODE_READ|FMODE_WRITE)) { + drive_state[drive].last_checked = 0; + clear_bit(FD_OPEN_SHOULD_FAIL_BIT, + &drive_state[drive].flags); + if (bdev_check_media_change(bdev)) + floppy_revalidate(bdev->bd_disk); + if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags)) + goto out; + if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags)) + goto out; + } + res = -EROFS; + if ((mode & FMODE_WRITE) && + !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags)) goto out; } - - res = -EROFS; - - if ((mode & FMODE_WRITE) && - !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags)) - goto out; - mutex_unlock(&open_lock); mutex_unlock(&floppy_mutex); return 0; From 0085646e02b2423e20d0a9ea3fe64d8270255b23 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 27 Aug 2021 20:42:57 +0200 Subject: [PATCH 101/105] Revert "parisc: Add assembly implementations for memset, strlen, strcpy, strncpy and strcat" commit f6a3308d6feb351d9854eb8b3f6289a1ac163125 upstream. This reverts commit 83af58f8068ea3f7b3c537c37a30887bfa585069. It turns out that at least the assembly implementation for strncpy() was buggy. Revert the whole commit and return back to the default coding. Signed-off-by: Helge Deller Cc: # v5.4+ Cc: Rasmus Villemoes Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/parisc/include/asm/string.h | 15 ---- arch/parisc/kernel/parisc_ksyms.c | 4 - arch/parisc/lib/Makefile | 4 +- arch/parisc/lib/memset.c | 72 ++++++++++++++++ arch/parisc/lib/string.S | 136 ------------------------------ 5 files changed, 74 insertions(+), 157 deletions(-) create mode 100644 arch/parisc/lib/memset.c delete mode 100644 arch/parisc/lib/string.S diff --git a/arch/parisc/include/asm/string.h b/arch/parisc/include/asm/string.h index 4a0c9dbd62fd..f6e1132f4e35 100644 --- a/arch/parisc/include/asm/string.h +++ b/arch/parisc/include/asm/string.h @@ -8,19 +8,4 @@ extern void * memset(void *, int, size_t); #define __HAVE_ARCH_MEMCPY void * memcpy(void * dest,const void *src,size_t count); -#define __HAVE_ARCH_STRLEN -extern size_t strlen(const char *s); - -#define __HAVE_ARCH_STRCPY -extern char *strcpy(char *dest, const char *src); - -#define __HAVE_ARCH_STRNCPY -extern char *strncpy(char *dest, const char *src, size_t count); - -#define __HAVE_ARCH_STRCAT -extern char *strcat(char *dest, const char *src); - -#define __HAVE_ARCH_MEMSET -extern void *memset(void *, int, size_t); - #endif diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 8ed409ecec93..e8a6a751dfd8 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -17,10 +17,6 @@ #include EXPORT_SYMBOL(memset); -EXPORT_SYMBOL(strlen); -EXPORT_SYMBOL(strcpy); -EXPORT_SYMBOL(strncpy); -EXPORT_SYMBOL(strcat); #include EXPORT_SYMBOL(__xchg8); diff --git a/arch/parisc/lib/Makefile b/arch/parisc/lib/Makefile index 2d7a9974dbae..7b197667faf6 100644 --- a/arch/parisc/lib/Makefile +++ b/arch/parisc/lib/Makefile @@ -3,7 +3,7 @@ # Makefile for parisc-specific library files # -lib-y := lusercopy.o bitops.o checksum.o io.o memcpy.o \ - ucmpdi2.o delay.o string.o +lib-y := lusercopy.o bitops.o checksum.o io.o memset.o memcpy.o \ + ucmpdi2.o delay.o obj-y := iomap.o diff --git a/arch/parisc/lib/memset.c b/arch/parisc/lib/memset.c new file mode 100644 index 000000000000..133e4809859a --- /dev/null +++ b/arch/parisc/lib/memset.c @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#include +#include + +#define OPSIZ (BITS_PER_LONG/8) +typedef unsigned long op_t; + +void * +memset (void *dstpp, int sc, size_t len) +{ + unsigned int c = sc; + long int dstp = (long int) dstpp; + + if (len >= 8) + { + size_t xlen; + op_t cccc; + + cccc = (unsigned char) c; + cccc |= cccc << 8; + cccc |= cccc << 16; + if (OPSIZ > 4) + /* Do the shift in two steps to avoid warning if long has 32 bits. */ + cccc |= (cccc << 16) << 16; + + /* There are at least some bytes to set. + No need to test for LEN == 0 in this alignment loop. */ + while (dstp % OPSIZ != 0) + { + ((unsigned char *) dstp)[0] = c; + dstp += 1; + len -= 1; + } + + /* Write 8 `op_t' per iteration until less than 8 `op_t' remain. */ + xlen = len / (OPSIZ * 8); + while (xlen > 0) + { + ((op_t *) dstp)[0] = cccc; + ((op_t *) dstp)[1] = cccc; + ((op_t *) dstp)[2] = cccc; + ((op_t *) dstp)[3] = cccc; + ((op_t *) dstp)[4] = cccc; + ((op_t *) dstp)[5] = cccc; + ((op_t *) dstp)[6] = cccc; + ((op_t *) dstp)[7] = cccc; + dstp += 8 * OPSIZ; + xlen -= 1; + } + len %= OPSIZ * 8; + + /* Write 1 `op_t' per iteration until less than OPSIZ bytes remain. */ + xlen = len / OPSIZ; + while (xlen > 0) + { + ((op_t *) dstp)[0] = cccc; + dstp += OPSIZ; + xlen -= 1; + } + len %= OPSIZ; + } + + /* Write the last few bytes. */ + while (len > 0) + { + ((unsigned char *) dstp)[0] = c; + dstp += 1; + len -= 1; + } + + return dstpp; +} diff --git a/arch/parisc/lib/string.S b/arch/parisc/lib/string.S deleted file mode 100644 index 4a64264427a6..000000000000 --- a/arch/parisc/lib/string.S +++ /dev/null @@ -1,136 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * PA-RISC assembly string functions - * - * Copyright (C) 2019 Helge Deller - */ - -#include -#include - - .section .text.hot - .level PA_ASM_LEVEL - - t0 = r20 - t1 = r21 - t2 = r22 - -ENTRY_CFI(strlen, frame=0,no_calls) - or,COND(<>) arg0,r0,ret0 - b,l,n .Lstrlen_null_ptr,r0 - depwi 0,31,2,ret0 - cmpb,COND(<>) arg0,ret0,.Lstrlen_not_aligned - ldw,ma 4(ret0),t0 - cmpib,tr 0,r0,.Lstrlen_loop - uxor,nbz r0,t0,r0 -.Lstrlen_not_aligned: - uaddcm arg0,ret0,t1 - shladd t1,3,r0,t1 - mtsar t1 - depwi -1,%sar,32,t0 - uxor,nbz r0,t0,r0 -.Lstrlen_loop: - b,l,n .Lstrlen_end_loop,r0 - ldw,ma 4(ret0),t0 - cmpib,tr 0,r0,.Lstrlen_loop - uxor,nbz r0,t0,r0 -.Lstrlen_end_loop: - extrw,u,<> t0,7,8,r0 - addib,tr,n -3,ret0,.Lstrlen_out - extrw,u,<> t0,15,8,r0 - addib,tr,n -2,ret0,.Lstrlen_out - extrw,u,<> t0,23,8,r0 - addi -1,ret0,ret0 -.Lstrlen_out: - bv r0(rp) - uaddcm ret0,arg0,ret0 -.Lstrlen_null_ptr: - bv,n r0(rp) -ENDPROC_CFI(strlen) - - -ENTRY_CFI(strcpy, frame=0,no_calls) - ldb 0(arg1),t0 - stb t0,0(arg0) - ldo 0(arg0),ret0 - ldo 1(arg1),t1 - cmpb,= r0,t0,2f - ldo 1(arg0),t2 -1: ldb 0(t1),arg1 - stb arg1,0(t2) - ldo 1(t1),t1 - cmpb,<> r0,arg1,1b - ldo 1(t2),t2 -2: bv,n r0(rp) -ENDPROC_CFI(strcpy) - - -ENTRY_CFI(strncpy, frame=0,no_calls) - ldb 0(arg1),t0 - stb t0,0(arg0) - ldo 1(arg1),t1 - ldo 0(arg0),ret0 - cmpb,= r0,t0,2f - ldo 1(arg0),arg1 -1: ldo -1(arg2),arg2 - cmpb,COND(=),n r0,arg2,2f - ldb 0(t1),arg0 - stb arg0,0(arg1) - ldo 1(t1),t1 - cmpb,<> r0,arg0,1b - ldo 1(arg1),arg1 -2: bv,n r0(rp) -ENDPROC_CFI(strncpy) - - -ENTRY_CFI(strcat, frame=0,no_calls) - ldb 0(arg0),t0 - cmpb,= t0,r0,2f - ldo 0(arg0),ret0 - ldo 1(arg0),arg0 -1: ldb 0(arg0),t1 - cmpb,<>,n r0,t1,1b - ldo 1(arg0),arg0 -2: ldb 0(arg1),t2 - stb t2,0(arg0) - ldo 1(arg0),arg0 - ldb 0(arg1),t0 - cmpb,<> r0,t0,2b - ldo 1(arg1),arg1 - bv,n r0(rp) -ENDPROC_CFI(strcat) - - -ENTRY_CFI(memset, frame=0,no_calls) - copy arg0,ret0 - cmpb,COND(=) r0,arg0,4f - copy arg0,t2 - cmpb,COND(=) r0,arg2,4f - ldo -1(arg2),arg3 - subi -1,arg3,t0 - subi 0,t0,t1 - cmpiclr,COND(>=) 0,t1,arg2 - ldo -1(t1),arg2 - extru arg2,31,2,arg0 -2: stb arg1,0(t2) - ldo 1(t2),t2 - addib,>= -1,arg0,2b - ldo -1(arg3),arg3 - cmpiclr,COND(<=) 4,arg2,r0 - b,l,n 4f,r0 -#ifdef CONFIG_64BIT - depd,* r0,63,2,arg2 -#else - depw r0,31,2,arg2 -#endif - ldo 1(t2),t2 -3: stb arg1,-1(t2) - stb arg1,0(t2) - stb arg1,1(t2) - stb arg1,2(t2) - addib,COND(>) -4,arg2,3b - ldo 4(t2),t2 -4: bv,n r0(rp) -ENDPROC_CFI(memset) - - .end From 1890ee7ff87fc48806c37f3543e025e2252ac2e3 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 26 Aug 2021 12:46:01 -0700 Subject: [PATCH 102/105] net: don't unconditionally copy_from_user a struct ifreq for socket ioctls commit d0efb16294d145d157432feda83877ae9d7cdf37 upstream. A common implementation of isatty(3) involves calling a ioctl passing a dummy struct argument and checking whether the syscall failed -- bionic and glibc use TCGETS (passing a struct termios), and musl uses TIOCGWINSZ (passing a struct winsize). If the FD is a socket, we will copy sizeof(struct ifreq) bytes of data from the argument and return -EFAULT if that fails. The result is that the isatty implementations may return a non-POSIX-compliant value in errno in the case where part of the dummy struct argument is inaccessible, as both struct termios and struct winsize are smaller than struct ifreq (at least on arm64). Although there is usually enough stack space following the argument on the stack that this did not present a practical problem up to now, with MTE stack instrumentation it's more likely for the copy to fail, as the memory following the struct may have a different tag. Fix the problem by adding an early check for whether the ioctl is a valid socket ioctl, and return -ENOTTY if it isn't. Fixes: 44c02a2c3dc5 ("dev_ioctl(): move copyin/copyout to callers") Link: https://linux-review.googlesource.com/id/I869da6cf6daabc3e4b7b82ac979683ba05e27d4d Signed-off-by: Peter Collingbourne Cc: # 4.19 Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/netdevice.h | 4 ++++ net/socket.c | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e37480b5f4c0..4fdeccf22378 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3884,6 +3884,10 @@ int netdev_rx_handler_register(struct net_device *dev, void netdev_rx_handler_unregister(struct net_device *dev); bool dev_valid_name(const char *name); +static inline bool is_socket_ioctl_cmd(unsigned int cmd) +{ + return _IOC_TYPE(cmd) == SOCK_IOC_TYPE; +} int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_copyout); int dev_ifconf(struct net *net, struct ifconf *, int); diff --git a/net/socket.c b/net/socket.c index 002d5952ae5d..dd5da07bc1ff 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1062,7 +1062,7 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, rtnl_unlock(); if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf))) err = -EFAULT; - } else { + } else if (is_socket_ioctl_cmd(cmd)) { struct ifreq ifr; bool need_copyout; if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) @@ -1071,6 +1071,8 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, if (!err && need_copyout) if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) return -EFAULT; + } else { + err = -ENOTTY; } return err; } @@ -3264,6 +3266,8 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, struct ifreq ifreq; u32 data32; + if (!is_socket_ioctl_cmd(cmd)) + return -ENOTTY; if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ)) return -EFAULT; if (get_user(data32, &u_ifreq32->ifr_data)) From 38c1915d3e9f457006c4b2ef5eaab038c34a95a2 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Mon, 23 Aug 2021 22:04:09 -0400 Subject: [PATCH 103/105] audit: move put_tree() to avoid trim_trees refcount underflow and UAF commit 67d69e9d1a6c889d98951c1d74b19332ce0565af upstream. AUDIT_TRIM is expected to be idempotent, but multiple executions resulted in a refcount underflow and use-after-free. git bisect fingered commit fb041bb7c0a9 ("locking/refcount: Consolidate implementations of refcount_t") but this patch with its more thorough checking that wasn't in the x86 assembly code merely exposed a previously existing tree refcount imbalance in the case of tree trimming code that was refactored with prune_one() to remove a tree introduced in commit 8432c7006297 ("audit: Simplify locking around untag_chunk()") Move the put_tree() to cover only the prune_one() case. Passes audit-testsuite and 3 passes of "auditctl -t" with at least one directory watch. Cc: Jan Kara Cc: Will Deacon Cc: Alexander Viro Cc: Seiji Nishikawa Cc: stable@vger.kernel.org Fixes: 8432c7006297 ("audit: Simplify locking around untag_chunk()") Signed-off-by: Richard Guy Briggs Reviewed-by: Jan Kara [PM: reformatted/cleaned-up the commit description] Signed-off-by: Paul Moore Signed-off-by: Greg Kroah-Hartman --- kernel/audit_tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 6c91902f4f45..39241207ec04 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -593,7 +593,6 @@ static void prune_tree_chunks(struct audit_tree *victim, bool tagged) spin_lock(&hash_lock); } spin_unlock(&hash_lock); - put_tree(victim); } /* @@ -602,6 +601,7 @@ static void prune_tree_chunks(struct audit_tree *victim, bool tagged) static void prune_one(struct audit_tree *victim) { prune_tree_chunks(victim, false); + put_tree(victim); } /* trim the uncommitted chunks from tree */ From 0c9a876f2897c64d21a5a414a9007a10cd015a9e Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 9 Aug 2021 18:04:13 -0700 Subject: [PATCH 104/105] bpf: Fix potentially incorrect results with bpf_get_local_storage() commit a2baf4e8bb0f306fbed7b5e6197c02896a638ab5 upstream. Commit b910eaaaa4b8 ("bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper") fixed a bug for bpf_get_local_storage() helper so different tasks won't mess up with each other's percpu local storage. The percpu data contains 8 slots so it can hold up to 8 contexts (same or different tasks), for 8 different program runs, at the same time. This in general is sufficient. But our internal testing showed the following warning multiple times: [...] warning: WARNING: CPU: 13 PID: 41661 at include/linux/bpf-cgroup.h:193 __cgroup_bpf_run_filter_sock_ops+0x13e/0x180 RIP: 0010:__cgroup_bpf_run_filter_sock_ops+0x13e/0x180 tcp_call_bpf.constprop.99+0x93/0xc0 tcp_conn_request+0x41e/0xa50 ? tcp_rcv_state_process+0x203/0xe00 tcp_rcv_state_process+0x203/0xe00 ? sk_filter_trim_cap+0xbc/0x210 ? tcp_v6_inbound_md5_hash.constprop.41+0x44/0x160 tcp_v6_do_rcv+0x181/0x3e0 tcp_v6_rcv+0xc65/0xcb0 ip6_protocol_deliver_rcu+0xbd/0x450 ip6_input_finish+0x11/0x20 ip6_input+0xb5/0xc0 ip6_sublist_rcv_finish+0x37/0x50 ip6_sublist_rcv+0x1dc/0x270 ipv6_list_rcv+0x113/0x140 __netif_receive_skb_list_core+0x1a0/0x210 netif_receive_skb_list_internal+0x186/0x2a0 gro_normal_list.part.170+0x19/0x40 napi_complete_done+0x65/0x150 mlx5e_napi_poll+0x1ae/0x680 __napi_poll+0x25/0x120 net_rx_action+0x11e/0x280 __do_softirq+0xbb/0x271 irq_exit_rcu+0x97/0xa0 common_interrupt+0x7f/0xa0 asm_common_interrupt+0x1e/0x40 RIP: 0010:bpf_prog_1835a9241238291a_tw_egress+0x5/0xbac ? __cgroup_bpf_run_filter_skb+0x378/0x4e0 ? do_softirq+0x34/0x70 ? ip6_finish_output2+0x266/0x590 ? ip6_finish_output+0x66/0xa0 ? ip6_output+0x6c/0x130 ? ip6_xmit+0x279/0x550 ? ip6_dst_check+0x61/0xd0 [...] Using drgn [0] to dump the percpu buffer contents showed that on this CPU slot 0 is still available, but slots 1-7 are occupied and those tasks in slots 1-7 mostly don't exist any more. So we might have issues in bpf_cgroup_storage_unset(). Further debugging confirmed that there is a bug in bpf_cgroup_storage_unset(). Currently, it tries to unset "current" slot with searching from the start. So the following sequence is possible: 1. A task is running and claims slot 0 2. Running BPF program is done, and it checked slot 0 has the "task" and ready to reset it to NULL (not yet). 3. An interrupt happens, another BPF program runs and it claims slot 1 with the *same* task. 4. The unset() in interrupt context releases slot 0 since it matches "task". 5. Interrupt is done, the task in process context reset slot 0. At the end, slot 1 is not reset and the same process can continue to occupy slots 2-7 and finally, when the above step 1-5 is repeated again, step 3 BPF program won't be able to claim an empty slot and a warning will be issued. To fix the issue, for unset() function, we should traverse from the last slot to the first. This way, the above issue can be avoided. The same reverse traversal should also be done in bpf_get_local_storage() helper itself. Otherwise, incorrect local storage may be returned to BPF program. [0] https://github.com/osandov/drgn Fixes: b910eaaaa4b8 ("bpf: Fix NULL pointer dereference in bpf_get_local_storage() helper") Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210810010413.1976277-1-yhs@fb.com Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf-cgroup.h | 4 ++-- kernel/bpf/helpers.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 53f14e8827cc..91b966978541 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -196,8 +196,8 @@ static inline void bpf_cgroup_storage_unset(void) { int i; - for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { - if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) + for (i = BPF_CGROUP_STORAGE_NEST_MAX - 1; i >= 0; i--) { + if (likely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) continue; this_cpu_write(bpf_cgroup_storage_info[i].task, NULL); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 3bd7fbd8c543..0efe7c7bfe5e 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -386,8 +386,8 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) void *ptr; int i; - for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { - if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) + for (i = BPF_CGROUP_STORAGE_NEST_MAX - 1; i >= 0; i--) { + if (likely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) continue; storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]); From f6dd002450bf7b9143aff3af42ad1e12efe9a4f8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 3 Sep 2021 10:09:31 +0200 Subject: [PATCH 105/105] Linux 5.10.62 Link: https://lore.kernel.org/r/20210901122300.503008474@linuxfoundation.org Tested-by: Fox Chen Tested-by: Jon Hunter Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Florian Fainelli Tested-by: Hulk Robot Tested-by: Linux Kernel Functional Testing Tested-by: Sudip Mukherjee Tested-by: Salvatore Bonaccorso Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index a6ab3263f81d..90c0cb3e4d3c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 61 +SUBLEVEL = 62 EXTRAVERSION = NAME = Dare mighty things