From 76eba54f0ddfb580240f150b90b87d57fb3924e3 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 20 Oct 2022 17:18:58 +0200 Subject: [PATCH 01/98] fuse: fix readdir cache race [ Upstream commit 9fa248c65bdbf5af0a2f74dd38575acfc8dfd2bf ] There's a race in fuse's readdir cache that can result in an uninitilized page being read. The page lock is supposed to prevent this from happening but in the following case it doesn't: Two fuse_add_dirent_to_cache() start out and get the same parameters (size=0,offset=0). One of them wins the race to create and lock the page, after which it fills in data, sets rdc.size and unlocks the page. In the meantime the page gets evicted from the cache before the other instance gets to run. That one also creates the page, but finds the size to be mismatched, bails out and leaves the uninitialized page in the cache. Fix by marking a filled page uptodate and ignoring non-uptodate pages. Reported-by: Frank Sorenson Fixes: 5d7bc7e8680c ("fuse: allow using readdir cache") Cc: # v4.20 Signed-off-by: Miklos Szeredi Signed-off-by: Sasha Levin --- fs/fuse/readdir.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index bc267832310c..d5294e663df5 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -77,8 +77,10 @@ static void fuse_add_dirent_to_cache(struct file *file, goto unlock; addr = kmap_atomic(page); - if (!offset) + if (!offset) { clear_page(addr); + SetPageUptodate(page); + } memcpy(addr + offset, dirent, reclen); kunmap_atomic(addr); fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen; @@ -516,6 +518,12 @@ retry_locked: page = find_get_page_flags(file->f_mapping, index, FGP_ACCESSED | FGP_LOCK); + /* Page gone missing, then re-added to cache, but not initialized? */ + if (page && !PageUptodate(page)) { + unlock_page(page); + put_page(page); + page = NULL; + } spin_lock(&fi->rdc.lock); if (!page) { /* From 925bf1ba760482591218ace2c8fe135155da2710 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 9 Sep 2022 11:20:23 +0200 Subject: [PATCH 02/98] hwspinlock: qcom: correct MMIO max register for newer SoCs [ Upstream commit 90cb380f9ceb811059340d06ff5fd0c0e93ecbe1 ] Newer ARMv8 Qualcomm SoCs using 0x1000 register stride have maximum register 0x20000 (32 mutexes * 0x1000). Fixes: 7a1e6fb1c606 ("hwspinlock: qcom: Allow mmio usage in addition to syscon") Signed-off-by: Krzysztof Kozlowski Reviewed-by: Konrad Dybcio Signed-off-by: Bjorn Andersson Link: https://lore.kernel.org/r/20220909092035.223915-4-krzysztof.kozlowski@linaro.org Signed-off-by: Sasha Levin --- drivers/hwspinlock/qcom_hwspinlock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwspinlock/qcom_hwspinlock.c b/drivers/hwspinlock/qcom_hwspinlock.c index 364710966665..e49914664863 100644 --- a/drivers/hwspinlock/qcom_hwspinlock.c +++ b/drivers/hwspinlock/qcom_hwspinlock.c @@ -105,7 +105,7 @@ static const struct regmap_config tcsr_mutex_config = { .reg_bits = 32, .reg_stride = 4, .val_bits = 32, - .max_register = 0x40000, + .max_register = 0x20000, .fast_io = true, }; From 72ea2fc29962f3bb16eba80a2f0c88ffa6bb9e08 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 14 Oct 2022 12:25:06 +0300 Subject: [PATCH 03/98] phy: stm32: fix an error code in probe [ Upstream commit ca1c73628f5bd0c1ef6e46073cc3be2450605b06 ] If "index > usbphyc->nphys" is true then this returns success but it should return -EINVAL. Fixes: 94c358da3a05 ("phy: stm32: add support for STM32 USB PHY Controller (USBPHYC)") Signed-off-by: Dan Carpenter Reviewed-by: Amelie Delaunay Link: https://lore.kernel.org/r/Y0kq8j6S+5nDdMpr@kili Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/phy/st/phy-stm32-usbphyc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c index 2b3639cba51a..03fc567e9f18 100644 --- a/drivers/phy/st/phy-stm32-usbphyc.c +++ b/drivers/phy/st/phy-stm32-usbphyc.c @@ -393,6 +393,8 @@ static int stm32_usbphyc_probe(struct platform_device *pdev) ret = of_property_read_u32(child, "reg", &index); if (ret || index > usbphyc->nphys) { dev_err(&phy->dev, "invalid reg property: %d\n", ret); + if (!ret) + ret = -EINVAL; goto put_child; } From 914cb94e738ba6ebb482d61567fb50bf700e4f19 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 13 Oct 2022 19:41:51 +0200 Subject: [PATCH 04/98] wifi: cfg80211: silence a sparse RCU warning [ Upstream commit 03c0ad4b06c3566de624b4f4b78ac1a5d1e4c8e7 ] All we're going to do with this pointer is assign it to another __rcu pointer, but sparse can't see that, so use rcu_access_pointer() to silence the warning here. Fixes: c90b93b5b782 ("wifi: cfg80211: update hidden BSSes to avoid WARN_ON") Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/scan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 22d169923261..15119c49c093 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1669,7 +1669,9 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, if (old == rcu_access_pointer(known->pub.ies)) rcu_assign_pointer(known->pub.ies, new->pub.beacon_ies); - cfg80211_update_hidden_bsses(known, new->pub.beacon_ies, old); + cfg80211_update_hidden_bsses(known, + rcu_access_pointer(new->pub.beacon_ies), + old); if (old) kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); From e1e12180321f416d83444f2cdc9259e0f5093d35 Mon Sep 17 00:00:00 2001 From: Arend van Spriel Date: Thu, 20 Oct 2022 13:40:40 +0200 Subject: [PATCH 05/98] wifi: cfg80211: fix memory leak in query_regdb_file() [ Upstream commit 57b962e627ec0ae53d4d16d7bd1033e27e67677a ] In the function query_regdb_file() the alpha2 parameter is duplicated using kmemdup() and subsequently freed in regdb_fw_cb(). However, request_firmware_nowait() can fail without calling regdb_fw_cb() and thus leak memory. Fixes: 007f6c5e6eb4 ("cfg80211: support loading regulatory database as firmware file") Signed-off-by: Arend van Spriel Signed-off-by: Johannes Berg Signed-off-by: Sasha Levin --- net/wireless/reg.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index fd848609e656..a1e64d967bd3 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1064,6 +1064,8 @@ MODULE_FIRMWARE("regulatory.db"); static int query_regdb_file(const char *alpha2) { + int err; + ASSERT_RTNL(); if (regdb) @@ -1073,9 +1075,13 @@ static int query_regdb_file(const char *alpha2) if (!alpha2) return -ENOMEM; - return request_firmware_nowait(THIS_MODULE, true, "regulatory.db", - ®_pdev->dev, GFP_KERNEL, - (void *)alpha2, regdb_fw_cb); + err = request_firmware_nowait(THIS_MODULE, true, "regulatory.db", + ®_pdev->dev, GFP_KERNEL, + (void *)alpha2, regdb_fw_cb); + if (err) + kfree(alpha2); + + return err; } int reg_reload_regdb(void) From cc21dc48a78cc9e5af9a4d039cd456446a6e73ff Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Tue, 1 Nov 2022 09:31:36 +0800 Subject: [PATCH 06/98] bpf, sockmap: Fix the sk->sk_forward_alloc warning of sk_stream_kill_queues [ Upstream commit 8ec95b94716a1e4d126edc3fb2bc426a717e2dba ] When running `test_sockmap` selftests, the following warning appears: WARNING: CPU: 2 PID: 197 at net/core/stream.c:205 sk_stream_kill_queues+0xd3/0xf0 Call Trace: inet_csk_destroy_sock+0x55/0x110 tcp_rcv_state_process+0xd28/0x1380 ? tcp_v4_do_rcv+0x77/0x2c0 tcp_v4_do_rcv+0x77/0x2c0 __release_sock+0x106/0x130 __tcp_close+0x1a7/0x4e0 tcp_close+0x20/0x70 inet_release+0x3c/0x80 __sock_release+0x3a/0xb0 sock_close+0x14/0x20 __fput+0xa3/0x260 task_work_run+0x59/0xb0 exit_to_user_mode_prepare+0x1b3/0x1c0 syscall_exit_to_user_mode+0x19/0x50 do_syscall_64+0x48/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae The root case is in commit 84472b436e76 ("bpf, sockmap: Fix more uncharged while msg has more_data"), where I used msg->sg.size to replace the tosend, causing breakage: if (msg->apply_bytes && msg->apply_bytes < tosend) tosend = psock->apply_bytes; Fixes: 84472b436e76 ("bpf, sockmap: Fix more uncharged while msg has more_data") Reported-by: Jakub Sitnicki Signed-off-by: Wang Yufen Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/1667266296-8794-1-git-send-email-wangyufen@huawei.com Signed-off-by: Sasha Levin --- net/ipv4/tcp_bpf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index eaf2308c355a..809ee0f32d59 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -315,7 +315,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, { bool cork = false, enospc = sk_msg_full(msg); struct sock *sk_redir; - u32 tosend, delta = 0; + u32 tosend, origsize, sent, delta = 0; u32 eval = __SK_NONE; int ret; @@ -370,10 +370,12 @@ more_data: cork = true; psock->cork = NULL; } - sk_msg_return(sk, msg, msg->sg.size); + sk_msg_return(sk, msg, tosend); release_sock(sk); + origsize = msg->sg.size; ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags); + sent = origsize - msg->sg.size; if (eval == __SK_REDIRECT) sock_put(sk_redir); @@ -412,7 +414,7 @@ more_data: msg->sg.data[msg->sg.start].page_link && msg->sg.data[msg->sg.start].length) { if (eval == __SK_REDIRECT) - sk_mem_charge(sk, msg->sg.size); + sk_mem_charge(sk, tosend - sent); goto more_data; } } From 8c80b2fca4112d724dde477aed13f7b0510a2792 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Wed, 2 Nov 2022 16:40:34 +0800 Subject: [PATCH 07/98] bpftool: Fix NULL pointer dereference when pin {PROG, MAP, LINK} without FILE [ Upstream commit 34de8e6e0e1f66e431abf4123934a2581cb5f133 ] When using bpftool to pin {PROG, MAP, LINK} without FILE, segmentation fault will occur. The reson is that the lack of FILE will cause strlen to trigger NULL pointer dereference. The corresponding stacktrace is shown below: do_pin do_pin_any do_pin_fd mount_bpffs_for_pin strlen(name) <- NULL pointer dereference Fix it by adding validation to the common process. Fixes: 75a1e792c335 ("tools: bpftool: Allow all prog/map handles for pinning objects") Signed-off-by: Pu Lehui Signed-off-by: Daniel Borkmann Reviewed-by: Quentin Monnet Link: https://lore.kernel.org/bpf/20221102084034.3342995-1-pulehui@huaweicloud.com Signed-off-by: Sasha Levin --- tools/bpf/bpftool/common.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 6ebf2b215ef4..eefa2b34e641 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -271,6 +271,9 @@ int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***)) int err; int fd; + if (!REQ_ARGS(3)) + return -EINVAL; + fd = get_fd(&argc, &argv); if (fd < 0) return fd; From 8597b59e3d22b27849bd3e4f92a3d466774bfb04 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Fri, 28 Oct 2022 21:40:43 +0800 Subject: [PATCH 08/98] HID: hyperv: fix possible memory leak in mousevsc_probe() [ Upstream commit b5bcb94b0954a026bbd671741fdb00e7141f9c91 ] If hid_add_device() returns error, it should call hid_destroy_device() to free hid_dev which is allocated in hid_allocate_device(). Fixes: 74c4fb058083 ("HID: hv_mouse: Properly add the hid device") Signed-off-by: Yang Yingliang Reviewed-by: Wei Liu Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/hid-hyperv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-hyperv.c b/drivers/hid/hid-hyperv.c index 978ee2aab2d4..b7704dd6809d 100644 --- a/drivers/hid/hid-hyperv.c +++ b/drivers/hid/hid-hyperv.c @@ -498,7 +498,7 @@ static int mousevsc_probe(struct hv_device *device, ret = hid_add_device(hid_dev); if (ret) - goto probe_err1; + goto probe_err2; ret = hid_parse(hid_dev); From 95b6ec733752b31bfd166c4609d2c1b5cdde9b47 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 10 Nov 2020 19:12:11 -0800 Subject: [PATCH 09/98] bpf: Support for pointers beyond pkt_end. [ Upstream commit 6d94e741a8ff818e5518da8257f5ca0aaed1f269 ] This patch adds the verifier support to recognize inlined branch conditions. The LLVM knows that the branch evaluates to the same value, but the verifier couldn't track it. Hence causing valid programs to be rejected. The potential LLVM workaround: https://reviews.llvm.org/D87428 can have undesired side effects, since LLVM doesn't know that skb->data/data_end are being compared. LLVM has to introduce extra boolean variable and use inline_asm trick to force easier for the verifier assembly. Instead teach the verifier to recognize that r1 = skb->data; r1 += 10; r2 = skb->data_end; if (r1 > r2) { here r1 points beyond packet_end and subsequent if (r1 > r2) // always evaluates to "true". } Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Tested-by: Jiri Olsa Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20201111031213.25109-2-alexei.starovoitov@gmail.com Stable-dep-of: f1db20814af5 ("bpf: Fix wrong reg type conversion in release_reference()") Signed-off-by: Sasha Levin --- include/linux/bpf_verifier.h | 2 +- kernel/bpf/verifier.c | 129 +++++++++++++++++++++++++++++------ 2 files changed, 108 insertions(+), 23 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 391bc1480dfb..f49165f9229c 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -45,7 +45,7 @@ struct bpf_reg_state { enum bpf_reg_type type; union { /* valid when type == PTR_TO_PACKET */ - u16 range; + int range; /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | * PTR_TO_MAP_VALUE_OR_NULL diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e4dcc23b52c0..510a54471f13 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2978,7 +2978,9 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, regno); return -EACCES; } - err = __check_mem_access(env, regno, off, size, reg->range, + + err = reg->range < 0 ? -EINVAL : + __check_mem_access(env, regno, off, size, reg->range, zero_size_allowed); if (err) { verbose(env, "R%d offset is outside of the packet\n", regno); @@ -5018,6 +5020,32 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) __clear_all_pkt_pointers(env, vstate->frame[i]); } +enum { + AT_PKT_END = -1, + BEYOND_PKT_END = -2, +}; + +static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open) +{ + struct bpf_func_state *state = vstate->frame[vstate->curframe]; + struct bpf_reg_state *reg = &state->regs[regn]; + + if (reg->type != PTR_TO_PACKET) + /* PTR_TO_PACKET_META is not supported yet */ + return; + + /* The 'reg' is pkt > pkt_end or pkt >= pkt_end. + * How far beyond pkt_end it goes is unknown. + * if (!range_open) it's the case of pkt >= pkt_end + * if (range_open) it's the case of pkt > pkt_end + * hence this pointer is at least 1 byte bigger than pkt_end + */ + if (range_open) + reg->range = BEYOND_PKT_END; + else + reg->range = AT_PKT_END; +} + static void release_reg_references(struct bpf_verifier_env *env, struct bpf_func_state *state, int ref_obj_id) @@ -7193,7 +7221,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) static void __find_good_pkt_pointers(struct bpf_func_state *state, struct bpf_reg_state *dst_reg, - enum bpf_reg_type type, u16 new_range) + enum bpf_reg_type type, int new_range) { struct bpf_reg_state *reg; int i; @@ -7218,8 +7246,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, enum bpf_reg_type type, bool range_right_open) { - u16 new_range; - int i; + int new_range, i; if (dst_reg->off < 0 || (dst_reg->off == 0 && range_right_open)) @@ -7470,6 +7497,67 @@ static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, return is_branch64_taken(reg, val, opcode); } +static int flip_opcode(u32 opcode) +{ + /* How can we transform "a b" into "b a"? */ + static const u8 opcode_flip[16] = { + /* these stay the same */ + [BPF_JEQ >> 4] = BPF_JEQ, + [BPF_JNE >> 4] = BPF_JNE, + [BPF_JSET >> 4] = BPF_JSET, + /* these swap "lesser" and "greater" (L and G in the opcodes) */ + [BPF_JGE >> 4] = BPF_JLE, + [BPF_JGT >> 4] = BPF_JLT, + [BPF_JLE >> 4] = BPF_JGE, + [BPF_JLT >> 4] = BPF_JGT, + [BPF_JSGE >> 4] = BPF_JSLE, + [BPF_JSGT >> 4] = BPF_JSLT, + [BPF_JSLE >> 4] = BPF_JSGE, + [BPF_JSLT >> 4] = BPF_JSGT + }; + return opcode_flip[opcode >> 4]; +} + +static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg, + u8 opcode) +{ + struct bpf_reg_state *pkt; + + if (src_reg->type == PTR_TO_PACKET_END) { + pkt = dst_reg; + } else if (dst_reg->type == PTR_TO_PACKET_END) { + pkt = src_reg; + opcode = flip_opcode(opcode); + } else { + return -1; + } + + if (pkt->range >= 0) + return -1; + + switch (opcode) { + case BPF_JLE: + /* pkt <= pkt_end */ + fallthrough; + case BPF_JGT: + /* pkt > pkt_end */ + if (pkt->range == BEYOND_PKT_END) + /* pkt has at last one extra byte beyond pkt_end */ + return opcode == BPF_JGT; + break; + case BPF_JLT: + /* pkt < pkt_end */ + fallthrough; + case BPF_JGE: + /* pkt >= pkt_end */ + if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END) + return opcode == BPF_JGE; + break; + } + return -1; +} + /* Adjusts the register min/max values in the case that the dst_reg is the * variable register that we are working on, and src_reg is a constant or we're * simply doing a BPF_K check. @@ -7640,23 +7728,7 @@ static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, u64 val, u32 val32, u8 opcode, bool is_jmp32) { - /* How can we transform "a b" into "b a"? */ - static const u8 opcode_flip[16] = { - /* these stay the same */ - [BPF_JEQ >> 4] = BPF_JEQ, - [BPF_JNE >> 4] = BPF_JNE, - [BPF_JSET >> 4] = BPF_JSET, - /* these swap "lesser" and "greater" (L and G in the opcodes) */ - [BPF_JGE >> 4] = BPF_JLE, - [BPF_JGT >> 4] = BPF_JLT, - [BPF_JLE >> 4] = BPF_JGE, - [BPF_JLT >> 4] = BPF_JGT, - [BPF_JSGE >> 4] = BPF_JSLE, - [BPF_JSGT >> 4] = BPF_JSLT, - [BPF_JSLE >> 4] = BPF_JSGE, - [BPF_JSLT >> 4] = BPF_JSGT - }; - opcode = opcode_flip[opcode >> 4]; + opcode = flip_opcode(opcode); /* This uses zero as "not present in table"; luckily the zero opcode, * BPF_JA, can't get here. */ @@ -7825,6 +7897,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_data' > pkt_end, pkt_meta' > pkt_data */ find_good_pkt_pointers(this_branch, dst_reg, dst_reg->type, false); + mark_pkt_end(other_branch, insn->dst_reg, true); } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) || (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && @@ -7832,6 +7905,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_end > pkt_data', pkt_data > pkt_meta' */ find_good_pkt_pointers(other_branch, src_reg, src_reg->type, true); + mark_pkt_end(this_branch, insn->src_reg, false); } else { return false; } @@ -7844,6 +7918,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_data' < pkt_end, pkt_meta' < pkt_data */ find_good_pkt_pointers(other_branch, dst_reg, dst_reg->type, true); + mark_pkt_end(this_branch, insn->dst_reg, false); } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) || (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && @@ -7851,6 +7926,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_end < pkt_data', pkt_data > pkt_meta' */ find_good_pkt_pointers(this_branch, src_reg, src_reg->type, false); + mark_pkt_end(other_branch, insn->src_reg, true); } else { return false; } @@ -7863,6 +7939,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */ find_good_pkt_pointers(this_branch, dst_reg, dst_reg->type, true); + mark_pkt_end(other_branch, insn->dst_reg, false); } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) || (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && @@ -7870,6 +7947,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */ find_good_pkt_pointers(other_branch, src_reg, src_reg->type, false); + mark_pkt_end(this_branch, insn->src_reg, true); } else { return false; } @@ -7882,6 +7960,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */ find_good_pkt_pointers(other_branch, dst_reg, dst_reg->type, false); + mark_pkt_end(this_branch, insn->dst_reg, true); } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) || (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && @@ -7889,6 +7968,7 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */ find_good_pkt_pointers(this_branch, src_reg, src_reg->type, true); + mark_pkt_end(other_branch, insn->src_reg, false); } else { return false; } @@ -7988,6 +8068,10 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, src_reg->var_off.value, opcode, is_jmp32); + } else if (reg_is_pkt_pointer_any(dst_reg) && + reg_is_pkt_pointer_any(src_reg) && + !is_jmp32) { + pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode); } if (pred >= 0) { @@ -7996,7 +8080,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, */ if (!__is_pointer_value(false, dst_reg)) err = mark_chain_precision(env, insn->dst_reg); - if (BPF_SRC(insn->code) == BPF_X && !err) + if (BPF_SRC(insn->code) == BPF_X && !err && + !__is_pointer_value(false, src_reg)) err = mark_chain_precision(env, insn->src_reg); if (err) return err; From 9069db2579e92a1ba604dd85e5de5206ea4beb6d Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Sun, 4 Sep 2022 22:41:28 +0200 Subject: [PATCH 10/98] bpf: Add helper macro bpf_for_each_reg_in_vstate [ Upstream commit b239da34203f49c40b5d656220c39647c3ff0b3c ] For a lot of use cases in future patches, we will want to modify the state of registers part of some same 'group' (e.g. same ref_obj_id). It won't just be limited to releasing reference state, but setting a type flag dynamically based on certain actions, etc. Hence, we need a way to easily pass a callback to the function that iterates over all registers in current bpf_verifier_state in all frames upto (and including) the curframe. While in C++ we would be able to easily use a lambda to pass state and the callback together, sadly we aren't using C++ in the kernel. The next best thing to avoid defining a function for each case seems like statement expressions in GNU C. The kernel already uses them heavily, hence they can passed to the macro in the style of a lambda. The statement expression will then be substituted in the for loop bodies. Variables __state and __reg are set to current bpf_func_state and reg for each invocation of the expression inside the passed in verifier state. Then, convert mark_ptr_or_null_regs, clear_all_pkt_pointers, release_reference, find_good_pkt_pointers, find_equal_scalars to use bpf_for_each_reg_in_vstate. Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20220904204145.3089-16-memxor@gmail.com Signed-off-by: Alexei Starovoitov Stable-dep-of: f1db20814af5 ("bpf: Fix wrong reg type conversion in release_reference()") Signed-off-by: Sasha Levin --- include/linux/bpf_verifier.h | 21 ++++++ kernel/bpf/verifier.c | 137 ++++++++--------------------------- 2 files changed, 50 insertions(+), 108 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f49165f9229c..4d37c69e76b1 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -290,6 +290,27 @@ struct bpf_verifier_state { iter < frame->allocated_stack / BPF_REG_SIZE; \ iter++, reg = bpf_get_spilled_reg(iter, frame)) +/* Invoke __expr over regsiters in __vst, setting __state and __reg */ +#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \ + ({ \ + struct bpf_verifier_state *___vstate = __vst; \ + int ___i, ___j; \ + for (___i = 0; ___i <= ___vstate->curframe; ___i++) { \ + struct bpf_reg_state *___regs; \ + __state = ___vstate->frame[___i]; \ + ___regs = __state->regs; \ + for (___j = 0; ___j < MAX_BPF_REG; ___j++) { \ + __reg = &___regs[___j]; \ + (void)(__expr); \ + } \ + bpf_for_each_spilled_reg(___j, __state, __reg) { \ + if (!__reg) \ + continue; \ + (void)(__expr); \ + } \ + } \ + }) + /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 510a54471f13..3a0f288f538c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4993,31 +4993,15 @@ static int check_func_proto(const struct bpf_func_proto *fn, int func_id) /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] * are now invalid, so turn them into unknown SCALAR_VALUE. */ -static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, - struct bpf_func_state *state) -{ - struct bpf_reg_state *regs = state->regs, *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) - if (reg_is_pkt_pointer_any(®s[i])) - mark_reg_unknown(env, regs, i); - - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - if (reg_is_pkt_pointer_any(reg)) - __mark_reg_unknown(env, reg); - } -} - static void clear_all_pkt_pointers(struct bpf_verifier_env *env) { - struct bpf_verifier_state *vstate = env->cur_state; - int i; + struct bpf_func_state *state; + struct bpf_reg_state *reg; - for (i = 0; i <= vstate->curframe; i++) - __clear_all_pkt_pointers(env, vstate->frame[i]); + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ + if (reg_is_pkt_pointer_any(reg)) + __mark_reg_unknown(env, reg); + })); } enum { @@ -5046,41 +5030,24 @@ static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range reg->range = AT_PKT_END; } -static void release_reg_references(struct bpf_verifier_env *env, - struct bpf_func_state *state, - int ref_obj_id) -{ - struct bpf_reg_state *regs = state->regs, *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) - if (regs[i].ref_obj_id == ref_obj_id) - mark_reg_unknown(env, regs, i); - - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - if (reg->ref_obj_id == ref_obj_id) - __mark_reg_unknown(env, reg); - } -} - /* The pointer with the specified id has released its reference to kernel * resources. Identify all copies of the same pointer and clear the reference. */ static int release_reference(struct bpf_verifier_env *env, int ref_obj_id) { - struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_func_state *state; + struct bpf_reg_state *reg; int err; - int i; err = release_reference_state(cur_func(env), ref_obj_id); if (err) return err; - for (i = 0; i <= vstate->curframe; i++) - release_reg_references(env, vstate->frame[i], ref_obj_id); + bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ + if (reg->ref_obj_id == ref_obj_id) + __mark_reg_unknown(env, reg); + })); return 0; } @@ -7219,34 +7186,14 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) return 0; } -static void __find_good_pkt_pointers(struct bpf_func_state *state, - struct bpf_reg_state *dst_reg, - enum bpf_reg_type type, int new_range) -{ - struct bpf_reg_state *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) { - reg = &state->regs[i]; - if (reg->type == type && reg->id == dst_reg->id) - /* keep the maximum range already checked */ - reg->range = max(reg->range, new_range); - } - - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - if (reg->type == type && reg->id == dst_reg->id) - reg->range = max(reg->range, new_range); - } -} - static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, struct bpf_reg_state *dst_reg, enum bpf_reg_type type, bool range_right_open) { - int new_range, i; + struct bpf_func_state *state; + struct bpf_reg_state *reg; + int new_range; if (dst_reg->off < 0 || (dst_reg->off == 0 && range_right_open)) @@ -7311,9 +7258,11 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, * the range won't allow anything. * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. */ - for (i = 0; i <= vstate->curframe; i++) - __find_good_pkt_pointers(vstate->frame[i], dst_reg, type, - new_range); + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ + if (reg->type == type && reg->id == dst_reg->id) + /* keep the maximum range already checked */ + reg->range = max(reg->range, new_range); + })); } static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode) @@ -7826,7 +7775,7 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, reg->ref_obj_id = 0; } else if (!reg_may_point_to_spin_lock(reg)) { /* For not-NULL ptr, reg->ref_obj_id will be reset - * in release_reg_references(). + * in release_reference(). * * reg->id is still used by spin_lock ptr. Other * than spin_lock ptr type, reg->id can be reset. @@ -7836,22 +7785,6 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, } } -static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, - bool is_null) -{ - struct bpf_reg_state *reg; - int i; - - for (i = 0; i < MAX_BPF_REG; i++) - mark_ptr_or_null_reg(state, &state->regs[i], id, is_null); - - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - mark_ptr_or_null_reg(state, reg, id, is_null); - } -} - /* The logic is similar to find_good_pkt_pointers(), both could eventually * be folded together at some point. */ @@ -7859,10 +7792,9 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, bool is_null) { struct bpf_func_state *state = vstate->frame[vstate->curframe]; - struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *regs = state->regs, *reg; u32 ref_obj_id = regs[regno].ref_obj_id; u32 id = regs[regno].id; - int i; if (ref_obj_id && ref_obj_id == id && is_null) /* regs[regno] is in the " == NULL" branch. @@ -7871,8 +7803,9 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, */ WARN_ON_ONCE(release_reference_state(state, id)); - for (i = 0; i <= vstate->curframe; i++) - __mark_ptr_or_null_regs(vstate->frame[i], id, is_null); + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ + mark_ptr_or_null_reg(state, reg, id, is_null); + })); } static bool try_match_pkt_pointers(const struct bpf_insn *insn, @@ -7985,23 +7918,11 @@ static void find_equal_scalars(struct bpf_verifier_state *vstate, { struct bpf_func_state *state; struct bpf_reg_state *reg; - int i, j; - for (i = 0; i <= vstate->curframe; i++) { - state = vstate->frame[i]; - for (j = 0; j < MAX_BPF_REG; j++) { - reg = &state->regs[j]; - if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) - *reg = *known_reg; - } - - bpf_for_each_spilled_reg(j, state, reg) { - if (!reg) - continue; - if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) - *reg = *known_reg; - } - } + bpf_for_each_reg_in_vstate(vstate, state, reg, ({ + if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) + *reg = *known_reg; + })); } static int check_cond_jmp_op(struct bpf_verifier_env *env, From cedd4f01f67be94735f15123158f485028571037 Mon Sep 17 00:00:00 2001 From: Youlin Li Date: Thu, 3 Nov 2022 17:34:39 +0800 Subject: [PATCH 11/98] bpf: Fix wrong reg type conversion in release_reference() [ Upstream commit f1db20814af532f85e091231223e5e4818e8464b ] Some helper functions will allocate memory. To avoid memory leaks, the verifier requires the eBPF program to release these memories by calling the corresponding helper functions. When a resource is released, all pointer registers corresponding to the resource should be invalidated. The verifier use release_references() to do this job, by apply __mark_reg_unknown() to each relevant register. It will give these registers the type of SCALAR_VALUE. A register that will contain a pointer value at runtime, but of type SCALAR_VALUE, which may allow the unprivileged user to get a kernel pointer by storing this register into a map. Using __mark_reg_not_init() while NOT allow_ptr_leaks can mitigate this problem. Fixes: fd978bf7fd31 ("bpf: Add reference tracking to verifier") Signed-off-by: Youlin Li Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20221103093440.3161-1-liulin063@gmail.com Signed-off-by: Sasha Levin --- kernel/bpf/verifier.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 3a0f288f538c..50364031eb4d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5045,8 +5045,12 @@ static int release_reference(struct bpf_verifier_env *env, return err; bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ - if (reg->ref_obj_id == ref_obj_id) - __mark_reg_unknown(env, reg); + if (reg->ref_obj_id == ref_obj_id) { + if (!env->allow_ptr_leaks) + __mark_reg_not_init(env, reg); + else + __mark_reg_unknown(env, reg); + } })); return 0; From 50868de7dc4e7f0fcadd6029f32bf4387c102ee6 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Wed, 2 Nov 2022 17:53:25 +0100 Subject: [PATCH 12/98] net: gso: fix panic on frag_list with mixed head alloc types [ Upstream commit 9e4b7a99a03aefd37ba7bb1f022c8efab5019165 ] Since commit 3dcbdb134f32 ("net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list"), it is allowed to change gso_size of a GRO packet. However, that commit assumes that "checking the first list_skb member suffices; i.e if either of the list_skb members have non head_frag head, then the first one has too". It turns out this assumption does not hold. We've seen BUG_ON being hit in skb_segment when skbs on the frag_list had differing head_frag with the vmxnet3 driver. This happens because __netdev_alloc_skb and __napi_alloc_skb can return a skb that is page backed or kmalloced depending on the requested size. As the result, the last small skb in the GRO packet can be kmalloced. There are three different locations where this can be fixed: (1) We could check head_frag in GRO and not allow GROing skbs with different head_frag. However, that would lead to performance regression on normal forward paths with unmodified gso_size, where !head_frag in the last packet is not a problem. (2) Set a flag in bpf_skb_net_grow and bpf_skb_net_shrink indicating that NETIF_F_SG is undesirable. That would need to eat a bit in sk_buff. Furthermore, that flag can be unset when all skbs on the frag_list are page backed. To retain good performance, bpf_skb_net_grow/shrink would have to walk the frag_list. (3) Walk the frag_list in skb_segment when determining whether NETIF_F_SG should be cleared. This of course slows things down. This patch implements (3). To limit the performance impact in skb_segment, the list is walked only for skbs with SKB_GSO_DODGY set that have gso_size changed. Normal paths thus will not hit it. We could check only the last skb but since we need to walk the whole list anyway, let's stay on the safe side. Fixes: 3dcbdb134f32 ("net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list") Signed-off-by: Jiri Benc Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/e04426a6a91baf4d1081e1b478c82b5de25fdf21.1667407944.git.jbenc@redhat.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/core/skbuff.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7bdcdad58dc8..06169889b0ca 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3809,23 +3809,25 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int i = 0; int pos; - if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) && - (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) { - /* gso_size is untrusted, and we have a frag_list with a linear - * non head_frag head. - * - * (we assume checking the first list_skb member suffices; - * i.e if either of the list_skb members have non head_frag - * head, then the first one has too). - * - * If head_skb's headlen does not fit requested gso_size, it - * means that the frag_list members do NOT terminate on exact - * gso_size boundaries. Hence we cannot perform skb_frag_t page - * sharing. Therefore we must fallback to copying the frag_list - * skbs; we do so by disabling SG. - */ - if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) - features &= ~NETIF_F_SG; + if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) && + mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) { + struct sk_buff *check_skb; + + for (check_skb = list_skb; check_skb; check_skb = check_skb->next) { + if (skb_headlen(check_skb) && !check_skb->head_frag) { + /* gso_size is untrusted, and we have a frag_list with + * a linear non head_frag item. + * + * If head_skb's headlen does not fit requested gso_size, + * it means that the frag_list members do NOT terminate + * on exact gso_size boundaries. Hence we cannot perform + * skb_frag_t page sharing. Therefore we must fallback to + * copying the frag_list skbs; we do so by disabling SG. + */ + features &= ~NETIF_F_SG; + break; + } + } } __skb_push(head_skb, doffset); From 3b05d9073ae26c686d9d26d138164d1766cf470e Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 2 Nov 2022 22:33:13 +0100 Subject: [PATCH 13/98] macsec: delete new rxsc when offload fails [ Upstream commit 93a30947821c203d08865c4e17ea181c9668ce52 ] Currently we get an inconsistent state: - netlink returns the error to userspace - the RXSC is installed but not offloaded Then the device could get confused when we try to add an RXSA, because the RXSC isn't supposed to exist. Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure") Signed-off-by: Sabrina Dubroca Reviewed-by: Antoine Tenart Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 70c5905a916b..65e0af28c950 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1867,7 +1867,6 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) struct macsec_rx_sc *rx_sc; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct macsec_secy *secy; - bool was_active; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) @@ -1895,7 +1894,6 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) return PTR_ERR(rx_sc); } - was_active = rx_sc->active; if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) rx_sc->active = !!nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); @@ -1922,7 +1920,8 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) return 0; cleanup: - rx_sc->active = was_active; + del_rx_sc(secy, sci); + free_rx_sc(rx_sc); rtnl_unlock(); return ret; } From 7f4456f0119b8014ce61c38912d74fa4fd24092c Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 2 Nov 2022 22:33:14 +0100 Subject: [PATCH 14/98] macsec: fix secy->n_rx_sc accounting [ Upstream commit 73a4b31c9d11f98ae3bc5286d5382930adb0e9c7 ] secy->n_rx_sc is supposed to be the number of _active_ rxsc's within a secy. This is then used by macsec_send_sci to help decide if we should add the SCI to the header or not. This logic is currently broken when we create a new RXSC and turn it off at creation, as create_rx_sc always sets ->active to true (and immediately uses that to increment n_rx_sc), and only later macsec_add_rxsc sets rx_sc->active. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Sabrina Dubroca Reviewed-by: Antoine Tenart Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 65e0af28c950..a816fbbe23a7 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1390,7 +1390,8 @@ static struct macsec_rx_sc *del_rx_sc(struct macsec_secy *secy, sci_t sci) return NULL; } -static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci) +static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci, + bool active) { struct macsec_rx_sc *rx_sc; struct macsec_dev *macsec; @@ -1414,7 +1415,7 @@ static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci) } rx_sc->sci = sci; - rx_sc->active = true; + rx_sc->active = active; refcount_set(&rx_sc->refcnt, 1); secy = &macsec_priv(dev)->secy; @@ -1867,6 +1868,7 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) struct macsec_rx_sc *rx_sc; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct macsec_secy *secy; + bool active = true; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) @@ -1888,15 +1890,15 @@ static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) secy = &macsec_priv(dev)->secy; sci = nla_get_sci(tb_rxsc[MACSEC_RXSC_ATTR_SCI]); - rx_sc = create_rx_sc(dev, sci); + if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) + active = nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); + + rx_sc = create_rx_sc(dev, sci, active); if (IS_ERR(rx_sc)) { rtnl_unlock(); return PTR_ERR(rx_sc); } - if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) - rx_sc->active = !!nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); - if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; From 9dc7503bae33046c6301562f7e03d5352e6f8c50 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 2 Nov 2022 22:33:15 +0100 Subject: [PATCH 15/98] macsec: fix detection of RXSCs when toggling offloading [ Upstream commit 80df4706357a5a06bbbc70273bf2611df1ceee04 ] macsec_is_configured incorrectly uses secy->n_rx_sc to check if some RXSCs exist. secy->n_rx_sc only counts the number of active RXSCs, but there can also be inactive SCs as well, which may be stored in the driver (in case we're disabling offloading), or would have to be pushed to the device (in case we're trying to enable offloading). As long as RXSCs active on creation and never turned off, the issue is not visible. Fixes: dcb780fb2795 ("net: macsec: add nla support for changing the offloading selection") Signed-off-by: Sabrina Dubroca Reviewed-by: Antoine Tenart Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index a816fbbe23a7..69108c1db130 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2562,7 +2562,7 @@ static bool macsec_is_configured(struct macsec_dev *macsec) struct macsec_tx_sc *tx_sc = &secy->tx_sc; int i; - if (secy->n_rx_sc > 0) + if (secy->rx_sc) return true; for (i = 0; i < MACSEC_NUM_AN; i++) From adaa0f180de5236e086ddab6476c4364d922f1fd Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 2 Nov 2022 22:33:16 +0100 Subject: [PATCH 16/98] macsec: clear encryption keys from the stack after setting up offload [ Upstream commit aaab73f8fba4fd38f4d2617440d541a1c334e819 ] macsec_add_rxsa and macsec_add_txsa copy the key to an on-stack offloading context to pass it to the drivers, but leaves it there when it's done. Clear it with memzero_explicit as soon as it's not needed anymore. Fixes: 3cf3227a21d1 ("net: macsec: hardware offloading infrastructure") Signed-off-by: Sabrina Dubroca Reviewed-by: Antoine Tenart Reviewed-by: Leon Romanovsky Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/macsec.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 69108c1db130..f84e3cc0d3ec 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1824,6 +1824,7 @@ static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) secy->key_len); err = macsec_offload(ops->mdo_add_rxsa, &ctx); + memzero_explicit(ctx.sa.key, secy->key_len); if (err) goto cleanup; } @@ -2066,6 +2067,7 @@ static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) secy->key_len); err = macsec_offload(ops->mdo_add_txsa, &ctx); + memzero_explicit(ctx.sa.key, secy->key_len); if (err) goto cleanup; } From 3401f964028ac941425b9b2c8ff8a022539ef44a Mon Sep 17 00:00:00 2001 From: Wang Yufen Date: Wed, 2 Nov 2022 17:41:19 +0800 Subject: [PATCH 17/98] net: tun: Fix memory leaks of napi_get_frags [ Upstream commit 1118b2049d77ca0b505775fc1a8d1909cf19a7ec ] kmemleak reports after running test_progs: unreferenced object 0xffff8881b1672dc0 (size 232): comm "test_progs", pid 394388, jiffies 4354712116 (age 841.975s) hex dump (first 32 bytes): e0 84 d7 a8 81 88 ff ff 80 2c 67 b1 81 88 ff ff .........,g..... 00 40 c5 9b 81 88 ff ff 00 00 00 00 00 00 00 00 .@.............. backtrace: [<00000000c8f01748>] napi_skb_cache_get+0xd4/0x150 [<0000000041c7fc09>] __napi_build_skb+0x15/0x50 [<00000000431c7079>] __napi_alloc_skb+0x26e/0x540 [<000000003ecfa30e>] napi_get_frags+0x59/0x140 [<0000000099b2199e>] tun_get_user+0x183d/0x3bb0 [tun] [<000000008a5adef0>] tun_chr_write_iter+0xc0/0x1b1 [tun] [<0000000049993ff4>] do_iter_readv_writev+0x19f/0x320 [<000000008f338ea2>] do_iter_write+0x135/0x630 [<000000008a3377a4>] vfs_writev+0x12e/0x440 [<00000000a6b5639a>] do_writev+0x104/0x280 [<00000000ccf065d8>] do_syscall_64+0x3b/0x90 [<00000000d776e329>] entry_SYSCALL_64_after_hwframe+0x63/0xcd The issue occurs in the following scenarios: tun_get_user() napi_gro_frags() napi_frags_finish() case GRO_NORMAL: gro_normal_one() list_add_tail(&skb->list, &napi->rx_list); <-- While napi->rx_count < READ_ONCE(gro_normal_batch), <-- gro_normal_list() is not called, napi->rx_list is not empty <-- not ask to complete the gro work, will cause memory leaks in <-- following tun_napi_del() ... tun_napi_del() netif_napi_del() __netif_napi_del() <-- &napi->rx_list is not empty, which caused memory leaks To fix, add napi_complete() after napi_gro_frags(). Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") Signed-off-by: Wang Yufen Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 0c09f8e9d383..83662f616b67 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1996,6 +1996,7 @@ drop: local_bh_disable(); napi_gro_frags(&tfile->napi); + napi_complete(&tfile->napi); local_bh_enable(); mutex_unlock(&tfile->napi_mutex); } else if (tfile->napi_enabled) { From 38147073c96dce8c7e142ce0e5f305a420a729ba Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Thu, 3 Nov 2022 19:33:26 -0400 Subject: [PATCH 18/98] bnxt_en: Fix possible crash in bnxt_hwrm_set_coal() [ Upstream commit 6d81ea3765dfa6c8a20822613c81edad1c4a16a0 ] During the error recovery sequence, the rtnl_lock is not held for the entire duration and some datastructures may be freed during the sequence. Check for the BNXT_STATE_OPEN flag instead of netif_running() to ensure that the device is fully operational before proceeding to reconfigure the coalescing settings. This will fix a possible crash like this: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 0 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 10 PID: 181276 Comm: ethtool Kdump: loaded Tainted: G IOE --------- - - 4.18.0-348.el8.x86_64 #1 Hardware name: Dell Inc. PowerEdge R740/0F9N89, BIOS 2.3.10 08/15/2019 RIP: 0010:bnxt_hwrm_set_coal+0x1fb/0x2a0 [bnxt_en] Code: c2 66 83 4e 22 08 66 89 46 1c e8 10 cb 00 00 41 83 c6 01 44 39 b3 68 01 00 00 0f 8e a3 00 00 00 48 8b 93 c8 00 00 00 49 63 c6 <48> 8b 2c c2 48 8b 85 b8 02 00 00 48 85 c0 74 2e 48 8b 74 24 08 f6 RSP: 0018:ffffb11c8dcaba50 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8d168a8b0ac0 RCX: 00000000000000c5 RDX: 0000000000000000 RSI: ffff8d162f72c000 RDI: ffff8d168a8b0b28 RBP: 0000000000000000 R08: b6e1f68a12e9a7eb R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000037 R12: ffff8d168a8b109c R13: ffff8d168a8b10aa R14: 0000000000000000 R15: ffffffffc01ac4e0 FS: 00007f3852e4c740(0000) GS:ffff8d24c0080000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000041b3ee003 CR4: 00000000007706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: ethnl_set_coalesce+0x3ce/0x4c0 genl_family_rcv_msg_doit.isra.15+0x10f/0x150 genl_family_rcv_msg+0xb3/0x160 ? coalesce_fill_reply+0x480/0x480 genl_rcv_msg+0x47/0x90 ? genl_family_rcv_msg+0x160/0x160 netlink_rcv_skb+0x4c/0x120 genl_rcv+0x24/0x40 netlink_unicast+0x196/0x230 netlink_sendmsg+0x204/0x3d0 sock_sendmsg+0x4c/0x50 __sys_sendto+0xee/0x160 ? syscall_trace_enter+0x1d3/0x2c0 ? __audit_syscall_exit+0x249/0x2a0 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x5b/0x1a0 entry_SYSCALL_64_after_hwframe+0x65/0xca RIP: 0033:0x7f38524163bb Fixes: 2151fe0830fd ("bnxt_en: Handle RESET_NOTIFY async event from firmware.") Reviewed-by: Somnath Kotur Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index f8f775619520..81b63d1c2391 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -125,7 +125,7 @@ static int bnxt_set_coalesce(struct net_device *dev, } reset_coalesce: - if (netif_running(dev)) { + if (test_bit(BNXT_STATE_OPEN, &bp->state)) { if (update_stats) { rc = bnxt_close_nic(bp, true, false); if (!rc) From e2c5ee3b628f72729c369611290524d662424e16 Mon Sep 17 00:00:00 2001 From: Alex Barba Date: Thu, 3 Nov 2022 19:33:27 -0400 Subject: [PATCH 19/98] bnxt_en: fix potentially incorrect return value for ndo_rx_flow_steer [ Upstream commit 02597d39145bb0aa81d04bf39b6a913ce9a9d465 ] In the bnxt_en driver ndo_rx_flow_steer returns '0' whenever an entry that we are attempting to steer is already found. This is not the correct behavior. The return code should be the value/index that corresponds to the entry. Returning zero all the time causes the RFS records to be incorrect unless entry '0' is the correct one. As flows migrate to different cores this can create entries that are not correct. Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.") Reported-by: Akshay Navgire Signed-off-by: Alex Barba Signed-off-by: Andy Gospodarek Signed-off-by: Michael Chan Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b818d5f342d5..8311473d537b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -12008,8 +12008,8 @@ static int bnxt_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, rcu_read_lock(); hlist_for_each_entry_rcu(fltr, head, hash) { if (bnxt_fltr_match(fltr, new_fltr)) { + rc = fltr->sw_id; rcu_read_unlock(); - rc = 0; goto err_free; } } From 8aae24b0ed767542e71159ba0756a1faea70a6b2 Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 3 Nov 2022 14:28:30 -0400 Subject: [PATCH 20/98] net: fman: Unregister ethernet device on removal [ Upstream commit b7cbc6740bd6ad5d43345a2504f7e4beff0d709f ] When the mac device gets removed, it leaves behind the ethernet device. This will result in a segfault next time the ethernet device accesses mac_dev. Remove the ethernet device when we get removed to prevent this. This is not completely reversible, since some resources aren't cleaned up properly, but that can be addressed later. Fixes: 3933961682a3 ("fsl/fman: Add FMan MAC driver") Signed-off-by: Sean Anderson Link: https://lore.kernel.org/r/20221103182831.2248833-1-sean.anderson@seco.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/freescale/fman/mac.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 6eeccc11b76e..3312dc4083a0 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -884,12 +884,21 @@ _return: return err; } +static int mac_remove(struct platform_device *pdev) +{ + struct mac_device *mac_dev = platform_get_drvdata(pdev); + + platform_device_unregister(mac_dev->priv->eth_dev); + return 0; +} + static struct platform_driver mac_driver = { .driver = { .name = KBUILD_MODNAME, .of_match_table = mac_match, }, .probe = mac_probe, + .remove = mac_remove, }; builtin_platform_driver(mac_driver); From fcbd2b336834bd24e1d9454ad5737856470c10d7 Mon Sep 17 00:00:00 2001 From: Gaosheng Cui Date: Mon, 31 Oct 2022 19:25:36 +0800 Subject: [PATCH 21/98] capabilities: fix undefined behavior in bit shift for CAP_TO_MASK [ Upstream commit 46653972e3ea64f79e7f8ae3aa41a4d3fdb70a13 ] Shifting signed 32-bit value by 31 bits is undefined, so changing significant bit to unsigned. The UBSAN warning calltrace like below: UBSAN: shift-out-of-bounds in security/commoncap.c:1252:2 left shift of 1 by 31 places cannot be represented in type 'int' Call Trace: dump_stack_lvl+0x7d/0xa5 dump_stack+0x15/0x1b ubsan_epilogue+0xe/0x4e __ubsan_handle_shift_out_of_bounds+0x1e7/0x20c cap_task_prctl+0x561/0x6f0 security_task_prctl+0x5a/0xb0 __x64_sys_prctl+0x61/0x8f0 do_syscall_64+0x58/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: e338d263a76a ("Add 64-bit capability support to the kernel") Signed-off-by: Gaosheng Cui Acked-by: Andrew G. Morgan Reviewed-by: Serge Hallyn Signed-off-by: Paul Moore Signed-off-by: Sasha Levin --- include/uapi/linux/capability.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index 2ddb4226cd23..43a44538ec8d 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -427,7 +427,7 @@ struct vfs_ns_cap_data { */ #define CAP_TO_INDEX(x) ((x) >> 5) /* 1 << 5 == bits in __u32 */ -#define CAP_TO_MASK(x) (1 << ((x) & 31)) /* mask for indexed __u32 */ +#define CAP_TO_MASK(x) (1U << ((x) & 31)) /* mask for indexed __u32 */ #endif /* _UAPI_LINUX_CAPABILITY_H */ From a60cc64db72f9fa87b82fbda12fb11270599f8c4 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Tue, 1 Mar 2022 15:33:40 +0100 Subject: [PATCH 22/98] KVM: s390x: fix SCK locking [ Upstream commit c0573ba5c5a2244dc02060b1f374d4593c1d20b7 ] When handling the SCK instruction, the kvm lock is taken, even though the vcpu lock is already being held. The normal locking order is kvm lock first and then vcpu lock. This is can (and in some circumstances does) lead to deadlocks. The function kvm_s390_set_tod_clock is called both by the SCK handler and by some IOCTLs to set the clock. The IOCTLs will not hold the vcpu lock, so they can safely take the kvm lock. The SCK handler holds the vcpu lock, but will also somehow need to acquire the kvm lock without relinquishing the vcpu lock. The solution is to factor out the code to set the clock, and provide two wrappers. One is called like the original function and does the locking, the other is called kvm_s390_try_set_tod_clock and uses trylock to try to acquire the kvm lock. This new wrapper is then used in the SCK handler. If locking fails, -EAGAIN is returned, which is eventually propagated to userspace, thus also freeing the vcpu lock and allowing for forward progress. This is not the most efficient or elegant way to solve this issue, but the SCK instruction is deprecated and its performance is not critical. The goal of this patch is just to provide a simple but correct way to fix the bug. Fixes: 6a3f95a6b04c ("KVM: s390: Intercept SCK instruction") Signed-off-by: Claudio Imbrenda Reviewed-by: Christian Borntraeger Reviewed-by: Janis Schoetterl-Glausch Link: https://lore.kernel.org/r/20220301143340.111129-1-imbrenda@linux.ibm.com Cc: stable@vger.kernel.org Signed-off-by: Christian Borntraeger Stable-dep-of: 6973091d1b50 ("KVM: s390: pv: don't allow userspace to set the clock under PV") Signed-off-by: Sasha Levin --- arch/s390/kvm/kvm-s390.c | 19 ++++++++++++++++--- arch/s390/kvm/kvm-s390.h | 4 ++-- arch/s390/kvm/priv.c | 15 ++++++++++++++- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d8e9239c24ff..d12042ab5d54 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3862,14 +3862,12 @@ retry: return 0; } -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod) +static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { struct kvm_vcpu *vcpu; struct kvm_s390_tod_clock_ext htod; int i; - mutex_lock(&kvm->lock); preempt_disable(); get_tod_clock_ext((char *)&htod); @@ -3890,9 +3888,24 @@ void kvm_s390_set_tod_clock(struct kvm *kvm, kvm_s390_vcpu_unblock_all(kvm); preempt_enable(); +} + +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + mutex_lock(&kvm->lock); + __kvm_s390_set_tod_clock(kvm, gtod); mutex_unlock(&kvm->lock); } +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) +{ + if (!mutex_trylock(&kvm->lock)) + return 0; + __kvm_s390_set_tod_clock(kvm, gtod); + mutex_unlock(&kvm->lock); + return 1; +} + /** * kvm_arch_fault_in_page - fault-in guest page if necessary * @vcpu: The corresponding virtual cpu diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index a3e9b71d426f..80bfe9c3364a 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -326,8 +326,8 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock(struct kvm *kvm, - const struct kvm_s390_vm_tod_clock *gtod); +void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); +int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 3b1a498e58d2..e34d518dd3d3 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -102,7 +102,20 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) return kvm_s390_inject_prog_cond(vcpu, rc); VCPU_EVENT(vcpu, 3, "SCK: setting guest TOD to 0x%llx", gtod.tod); - kvm_s390_set_tod_clock(vcpu->kvm, >od); + /* + * To set the TOD clock the kvm lock must be taken, but the vcpu lock + * is already held in handle_set_clock. The usual lock order is the + * opposite. As SCK is deprecated and should not be used in several + * cases, for example when the multiple epoch facility or TOD clock + * steering facility is installed (see Principles of Operation), a + * slow path can be used. If the lock can not be taken via try_lock, + * the instruction will be retried via -EAGAIN at a later point in + * time. + */ + if (!kvm_s390_try_set_tod_clock(vcpu->kvm, >od)) { + kvm_s390_retry_instr(vcpu); + return -EAGAIN; + } kvm_s390_set_psw_cc(vcpu, 0); return 0; From 2bf8b1c111fff86317a61cdb92c3a5a10525e68a Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Tue, 11 Oct 2022 18:07:12 +0200 Subject: [PATCH 23/98] KVM: s390: pv: don't allow userspace to set the clock under PV [ Upstream commit 6973091d1b50ab4042f6a2d495f59e9db3662ab8 ] When running under PV, the guest's TOD clock is under control of the ultravisor and the hypervisor isn't allowed to change it. Hence, don't allow userspace to change the guest's TOD clock by returning -EOPNOTSUPP. When userspace changes the guest's TOD clock, KVM updates its kvm.arch.epoch field and, in addition, the epoch field in all state descriptions of all VCPUs. But, under PV, the ultravisor will ignore the epoch field in the state description and simply overwrite it on next SIE exit with the actual guest epoch. This leads to KVM having an incorrect view of the guest's TOD clock: it has updated its internal kvm.arch.epoch field, but the ultravisor ignores the field in the state description. Whenever a guest is now waiting for a clock comparator, KVM will incorrectly calculate the time when the guest should wake up, possibly causing the guest to sleep for much longer than expected. With this change, kvm_s390_set_tod() will now take the kvm->lock to be able to call kvm_s390_pv_is_protected(). Since kvm_s390_set_tod_clock() also takes kvm->lock, use __kvm_s390_set_tod_clock() instead. The function kvm_s390_set_tod_clock is now unused, hence remove it. Update the documentation to indicate the TOD clock attr calls can now return -EOPNOTSUPP. Fixes: 0f3035047140 ("KVM: s390: protvirt: Do only reset registers that are accessible") Reported-by: Marc Hartmayer Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20221011160712.928239-2-nrb@linux.ibm.com Message-Id: <20221011160712.928239-2-nrb@linux.ibm.com> Signed-off-by: Janosch Frank Signed-off-by: Sasha Levin --- Documentation/virt/kvm/devices/vm.rst | 3 +++ arch/s390/kvm/kvm-s390.c | 26 +++++++++++++++++--------- arch/s390/kvm/kvm-s390.h | 1 - 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/Documentation/virt/kvm/devices/vm.rst b/Documentation/virt/kvm/devices/vm.rst index 0aa5b1cfd700..60acc39e0e93 100644 --- a/Documentation/virt/kvm/devices/vm.rst +++ b/Documentation/virt/kvm/devices/vm.rst @@ -215,6 +215,7 @@ KVM_S390_VM_TOD_EXT). :Parameters: address of a buffer in user space to store the data (u8) to :Returns: -EFAULT if the given address is not accessible from kernel space; -EINVAL if setting the TOD clock extension to != 0 is not supported + -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor) 3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW ----------------------------------- @@ -224,6 +225,7 @@ the POP (u64). :Parameters: address of a buffer in user space to store the data (u64) to :Returns: -EFAULT if the given address is not accessible from kernel space + -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor) 3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT ----------------------------------- @@ -237,6 +239,7 @@ it, it is stored as 0 and not allowed to be set to a value != 0. (kvm_s390_vm_tod_clock) to :Returns: -EFAULT if the given address is not accessible from kernel space; -EINVAL if setting the TOD clock extension to != 0 is not supported + -EOPNOTSUPP for a PV guest (TOD managed by the ultravisor) 4. GROUP: KVM_S390_VM_CRYPTO ============================ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d12042ab5d54..59db85fb63e1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1092,6 +1092,8 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm, return 0; } +static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); + static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) { struct kvm_s390_vm_tod_clock gtod; @@ -1101,7 +1103,7 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr) if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx) return -EINVAL; - kvm_s390_set_tod_clock(kvm, >od); + __kvm_s390_set_tod_clock(kvm, >od); VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx", gtod.epoch_idx, gtod.tod); @@ -1132,7 +1134,7 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr) sizeof(gtod.tod))) return -EFAULT; - kvm_s390_set_tod_clock(kvm, >od); + __kvm_s390_set_tod_clock(kvm, >od); VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod); return 0; } @@ -1144,6 +1146,16 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) if (attr->flags) return -EINVAL; + mutex_lock(&kvm->lock); + /* + * For protected guests, the TOD is managed by the ultravisor, so trying + * to change it will never bring the expected results. + */ + if (kvm_s390_pv_is_protected(kvm)) { + ret = -EOPNOTSUPP; + goto out_unlock; + } + switch (attr->attr) { case KVM_S390_VM_TOD_EXT: ret = kvm_s390_set_tod_ext(kvm, attr); @@ -1158,6 +1170,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr) ret = -ENXIO; break; } + +out_unlock: + mutex_unlock(&kvm->lock); return ret; } @@ -3890,13 +3905,6 @@ static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_t preempt_enable(); } -void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) -{ - mutex_lock(&kvm->lock); - __kvm_s390_set_tod_clock(kvm, gtod); - mutex_unlock(&kvm->lock); -} - int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod) { if (!mutex_trylock(&kvm->lock)) diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 80bfe9c3364a..b6ff64796af9 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -326,7 +326,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); From bc4591a86b8fe322944776bb1c92a3f2daf31ebf Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 3 Nov 2022 17:05:37 +0800 Subject: [PATCH 24/98] net: lapbether: fix issue of dev reference count leakage in lapbeth_device_event() [ Upstream commit 531705a765493655472c993627106e19f7e5a6d2 ] When following tests are performed, it will cause dev reference counting leakage. a)ip link add bond2 type bond mode balance-rr b)ip link set bond2 up c)ifenslave -f bond2 rose1 d)ip link del bond2 When new bond device is created, the default type of the bond device is ether. And the bond device is up, lapbeth_device_event() receives the message and creates a new lapbeth device. In this case, the reference count value of dev is hold once. But after "ifenslave -f bond2 rose1" command is executed, the type of the bond device is changed to rose. When the bond device is unregistered, lapbeth_device_event() will not put the dev reference count. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zhengchao Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/wan/lapbether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index f6562a343cb4..b965eb6a4bb1 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -403,7 +403,7 @@ static int lapbeth_device_event(struct notifier_block *this, if (dev_net(dev) != &init_net) return NOTIFY_DONE; - if (!dev_is_ethdev(dev)) + if (!dev_is_ethdev(dev) && !lapbeth_get_x25_dev(dev)) return NOTIFY_DONE; switch (event) { From bcb3bb10695f555c66f18b195e4b41cdd14be450 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Thu, 3 Nov 2022 17:09:05 +0800 Subject: [PATCH 25/98] hamradio: fix issue of dev reference count leakage in bpq_device_event() [ Upstream commit 85cbaf032d3cd9f595152625eda5d4ecb1d6d78d ] When following tests are performed, it will cause dev reference counting leakage. a)ip link add bond2 type bond mode balance-rr b)ip link set bond2 up c)ifenslave -f bond2 rose1 d)ip link del bond2 When new bond device is created, the default type of the bond device is ether. And the bond device is up, bpq_device_event() receives the message and creates a new bpq device. In this case, the reference count value of dev is hold once. But after "ifenslave -f bond2 rose1" command is executed, the type of the bond device is changed to rose. When the bond device is unregistered, bpq_device_event() will not put the dev reference count. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Zhengchao Shao Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- drivers/net/hamradio/bpqether.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index 1ad6085994b1..5c17c92add8a 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -533,7 +533,7 @@ static int bpq_device_event(struct notifier_block *this, if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; - if (!dev_is_ethdev(dev)) + if (!dev_is_ethdev(dev) && !bpq_get_ax25_dev(dev)) return NOTIFY_DONE; switch (event) { From f3aa8a7d9550a5f9ccbd4e0ed914a065d0741cc8 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Thu, 3 Nov 2022 01:47:05 +0000 Subject: [PATCH 26/98] drm/vc4: Fix missing platform_unregister_drivers() call in vc4_drm_register() [ Upstream commit cf53db768a8790fdaae2fa3a81322b080285f7e5 ] A problem about modprobe vc4 failed is triggered with the following log given: [ 420.327987] Error: Driver 'vc4_hvs' is already registered, aborting... [ 420.333904] failed to register platform driver vc4_hvs_driver [vc4]: -16 modprobe: ERROR: could not insert 'vc4': Device or resource busy The reason is that vc4_drm_register() returns platform_driver_register() directly without checking its return value, if platform_driver_register() fails, it returns without unregistering all the vc4 drivers, resulting the vc4 can never be installed later. A simple call graph is shown as below: vc4_drm_register() platform_register_drivers() # all vc4 drivers are registered platform_driver_register() driver_register() bus_add_driver() priv = kzalloc(...) # OOM happened # return without unregister drivers Fixing this problem by checking the return value of platform_driver_register() and do platform_unregister_drivers() if error happened. Fixes: c8b75bca92cb ("drm/vc4: Add KMS support for Raspberry Pi.") Signed-off-by: Yuan Can Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20221103014705.109322-1-yuancan@huawei.com Signed-off-by: Sasha Levin --- drivers/gpu/drm/vc4/vc4_drv.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 52426bc8edb8..888aec1bbeee 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -404,7 +404,12 @@ static int __init vc4_drm_register(void) if (ret) return ret; - return platform_driver_register(&vc4_platform_driver); + ret = platform_driver_register(&vc4_platform_driver); + if (ret) + platform_unregister_drivers(component_drivers, + ARRAY_SIZE(component_drivers)); + + return ret; } static void __exit vc4_drm_unregister(void) From 02f8dfee7580b65449a67baa65cc2da4e5ffc473 Mon Sep 17 00:00:00 2001 From: Lu Wei Date: Fri, 4 Nov 2022 10:27:23 +0800 Subject: [PATCH 27/98] tcp: prohibit TCP_REPAIR_OPTIONS if data was already sent [ Upstream commit 0c175da7b0378445f5ef53904247cfbfb87e0b78 ] If setsockopt with option name of TCP_REPAIR_OPTIONS and opt_code of TCPOPT_SACK_PERM is called to enable sack after data is sent and dupacks are received , it will trigger a warning in function tcp_verify_left_out() as follows: ============================================ WARNING: CPU: 8 PID: 0 at net/ipv4/tcp_input.c:2132 tcp_timeout_mark_lost+0x154/0x160 tcp_enter_loss+0x2b/0x290 tcp_retransmit_timer+0x50b/0x640 tcp_write_timer_handler+0x1c8/0x340 tcp_write_timer+0xe5/0x140 call_timer_fn+0x3a/0x1b0 __run_timers.part.0+0x1bf/0x2d0 run_timer_softirq+0x43/0xb0 __do_softirq+0xfd/0x373 __irq_exit_rcu+0xf6/0x140 The warning is caused in the following steps: 1. a socket named socketA is created 2. socketA enters repair mode without build a connection 3. socketA calls connect() and its state is changed to TCP_ESTABLISHED directly 4. socketA leaves repair mode 5. socketA calls sendmsg() to send data, packets_out and sack_outs(dup ack receives) increase 6. socketA enters repair mode again 7. socketA calls setsockopt with TCPOPT_SACK_PERM to enable sack 8. retransmit timer expires, it calls tcp_timeout_mark_lost(), lost_out increases 9. sack_outs + lost_out > packets_out triggers since lost_out and sack_outs increase repeatly In function tcp_timeout_mark_lost(), tp->sacked_out will be cleared if Step7 not happen and the warning will not be triggered. As suggested by Denis and Eric, TCP_REPAIR_OPTIONS should be prohibited if data was already sent. socket-tcp tests in CRIU has been tested as follows: $ sudo ./test/zdtm.py run -t zdtm/static/socket-tcp* --keep-going \ --ignore-taint socket-tcp* represent all socket-tcp tests in test/zdtm/static/. Fixes: b139ba4e90dc ("tcp: Repair connection-time negotiated parameters") Signed-off-by: Lu Wei Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a7127364253c..cc588bc2b11d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3291,7 +3291,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, case TCP_REPAIR_OPTIONS: if (!tp->repair) err = -EINVAL; - else if (sk->sk_state == TCP_ESTABLISHED) + else if (sk->sk_state == TCP_ESTABLISHED && !tp->bytes_sent) err = tcp_repair_options_est(sk, optval, optlen); else err = -EPERM; From a033b86c7f7621fde31f0364af8986f43b44914f Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Fri, 4 Nov 2022 11:32:16 +0100 Subject: [PATCH 28/98] ipv6: addrlabel: fix infoleak when sending struct ifaddrlblmsg to network [ Upstream commit c23fb2c82267638f9d206cb96bb93e1f93ad7828 ] When copying a `struct ifaddrlblmsg` to the network, __ifal_reserved remained uninitialized, resulting in a 1-byte infoleak: BUG: KMSAN: kernel-network-infoleak in __netdev_start_xmit ./include/linux/netdevice.h:4841 __netdev_start_xmit ./include/linux/netdevice.h:4841 netdev_start_xmit ./include/linux/netdevice.h:4857 xmit_one net/core/dev.c:3590 dev_hard_start_xmit+0x1dc/0x800 net/core/dev.c:3606 __dev_queue_xmit+0x17e8/0x4350 net/core/dev.c:4256 dev_queue_xmit ./include/linux/netdevice.h:3009 __netlink_deliver_tap_skb net/netlink/af_netlink.c:307 __netlink_deliver_tap+0x728/0xad0 net/netlink/af_netlink.c:325 netlink_deliver_tap net/netlink/af_netlink.c:338 __netlink_sendskb net/netlink/af_netlink.c:1263 netlink_sendskb+0x1d9/0x200 net/netlink/af_netlink.c:1272 netlink_unicast+0x56d/0xf50 net/netlink/af_netlink.c:1360 nlmsg_unicast ./include/net/netlink.h:1061 rtnl_unicast+0x5a/0x80 net/core/rtnetlink.c:758 ip6addrlbl_get+0xfad/0x10f0 net/ipv6/addrlabel.c:628 rtnetlink_rcv_msg+0xb33/0x1570 net/core/rtnetlink.c:6082 ... Uninit was created at: slab_post_alloc_hook+0x118/0xb00 mm/slab.h:742 slab_alloc_node mm/slub.c:3398 __kmem_cache_alloc_node+0x4f2/0x930 mm/slub.c:3437 __do_kmalloc_node mm/slab_common.c:954 __kmalloc_node_track_caller+0x117/0x3d0 mm/slab_common.c:975 kmalloc_reserve net/core/skbuff.c:437 __alloc_skb+0x27a/0xab0 net/core/skbuff.c:509 alloc_skb ./include/linux/skbuff.h:1267 nlmsg_new ./include/net/netlink.h:964 ip6addrlbl_get+0x490/0x10f0 net/ipv6/addrlabel.c:608 rtnetlink_rcv_msg+0xb33/0x1570 net/core/rtnetlink.c:6082 netlink_rcv_skb+0x299/0x550 net/netlink/af_netlink.c:2540 rtnetlink_rcv+0x26/0x30 net/core/rtnetlink.c:6109 netlink_unicast_kernel net/netlink/af_netlink.c:1319 netlink_unicast+0x9ab/0xf50 net/netlink/af_netlink.c:1345 netlink_sendmsg+0xebc/0x10f0 net/netlink/af_netlink.c:1921 ... This patch ensures that the reserved field is always initialized. Reported-by: syzbot+3553517af6020c4f2813f1003fe76ef3cbffe98d@syzkaller.appspotmail.com Fixes: 2a8cc6c89039 ("[IPV6] ADDRCONF: Support RFC3484 configurable address selection policy table.") Signed-off-by: Alexander Potapenko Reviewed-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- net/ipv6/addrlabel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 8a22486cf270..17ac45aa7194 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -437,6 +437,7 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, { struct ifaddrlblmsg *ifal = nlmsg_data(nlh); ifal->ifal_family = AF_INET6; + ifal->__ifal_reserved = 0; ifal->ifal_prefixlen = prefixlen; ifal->ifal_flags = 0; ifal->ifal_index = ifindex; From d68fa77ee3d03bad6fe84e89759ddf7005f9e9c6 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Fri, 28 Oct 2022 16:56:50 +0800 Subject: [PATCH 29/98] can: af_can: fix NULL pointer dereference in can_rx_register() [ Upstream commit 8aa59e355949442c408408c2d836e561794c40a1 ] It causes NULL pointer dereference when testing as following: (a) use syscall(__NR_socket, 0x10ul, 3ul, 0) to create netlink socket. (b) use syscall(__NR_sendmsg, ...) to create bond link device and vxcan link device, and bind vxcan device to bond device (can also use ifenslave command to bind vxcan device to bond device). (c) use syscall(__NR_socket, 0x1dul, 3ul, 1) to create CAN socket. (d) use syscall(__NR_bind, ...) to bind the bond device to CAN socket. The bond device invokes the can-raw protocol registration interface to receive CAN packets. However, ml_priv is not allocated to the dev, dev_rcv_lists is assigned to NULL in can_rx_register(). In this case, it will occur the NULL pointer dereference issue. The following is the stack information: BUG: kernel NULL pointer dereference, address: 0000000000000008 PGD 122a4067 P4D 122a4067 PUD 1223c067 PMD 0 Oops: 0000 [#1] PREEMPT SMP RIP: 0010:can_rx_register+0x12d/0x1e0 Call Trace: raw_enable_filters+0x8d/0x120 raw_enable_allfilters+0x3b/0x130 raw_bind+0x118/0x4f0 __sys_bind+0x163/0x1a0 __x64_sys_bind+0x1e/0x30 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x63/0xcd Fixes: 4e096a18867a ("net: introduce CAN specific pointer in the struct net_device") Signed-off-by: Zhengchao Shao Reviewed-by: Marc Kleine-Budde Link: https://lore.kernel.org/all/20221028085650.170470-1-shaozhengchao@huawei.com Signed-off-by: Marc Kleine-Budde Signed-off-by: Sasha Levin --- net/can/af_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/can/af_can.c b/net/can/af_can.c index 1c95ede2c9a6..cf554e855521 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -451,7 +451,7 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, /* insert new receiver (dev,canid,mask) -> (func,data) */ - if (dev && dev->type != ARPHRD_CAN) + if (dev && (dev->type != ARPHRD_CAN || !can_get_ml_priv(dev))) return -ENODEV; if (dev && !net_eq(net, dev_net(dev))) From cb6d639bb1efaa4d6ec28a49a98cb23bc19559a7 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 4 Nov 2022 09:30:04 +0100 Subject: [PATCH 30/98] net: stmmac: dwmac-meson8b: fix meson8b_devm_clk_prepare_enable() [ Upstream commit ed4314f7729714d788698ade4f9905ee5378ebc0 ] There are two problems with meson8b_devm_clk_prepare_enable(), introduced in commit a54dc4a49045 ("net: stmmac: dwmac-meson8b: Make the clock enabling code re-usable"): - It doesn't pass the clk argument, but instead always the rgmii_tx_clk of the device. - It silently ignores the return value of devm_add_action_or_reset(). The former didn't become an actual bug until another user showed up in the next commit 9308c47640d5 ("net: stmmac: dwmac-meson8b: add support for the RX delay configuration"). The latter means the callers could end up with the clock not actually prepared/enabled. Fixes: a54dc4a49045 ("net: stmmac: dwmac-meson8b: Make the clock enabling code re-usable") Signed-off-by: Rasmus Villemoes Reviewed-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20221104083004.2212520-1-linux@rasmusvillemoes.dk Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 752658ec7bee..50ef68497bce 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -261,11 +261,9 @@ static int meson8b_devm_clk_prepare_enable(struct meson8b_dwmac *dwmac, if (ret) return ret; - devm_add_action_or_reset(dwmac->dev, - (void(*)(void *))clk_disable_unprepare, - dwmac->rgmii_tx_clk); - - return 0; + return devm_add_action_or_reset(dwmac->dev, + (void(*)(void *))clk_disable_unprepare, + clk); } static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac) From fbb4e8e6dc7b38b3007354700f03c8ad2d9a2118 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Sat, 5 Nov 2022 17:02:45 +0800 Subject: [PATCH 31/98] net: broadcom: Fix BCMGENET Kconfig [ Upstream commit 8d820bc9d12b8beebca836cceaf2bbe68216c2f8 ] While BCMGENET select BROADCOM_PHY as y, but PTP_1588_CLOCK_OPTIONAL is m, kconfig warning and build errors: WARNING: unmet direct dependencies detected for BROADCOM_PHY Depends on [m]: NETDEVICES [=y] && PHYLIB [=y] && PTP_1588_CLOCK_OPTIONAL [=m] Selected by [y]: - BCMGENET [=y] && NETDEVICES [=y] && ETHERNET [=y] && NET_VENDOR_BROADCOM [=y] && HAS_IOMEM [=y] && ARCH_BCM2835 [=y] drivers/net/phy/broadcom.o: In function `bcm54xx_suspend': broadcom.c:(.text+0x6ac): undefined reference to `bcm_ptp_stop' drivers/net/phy/broadcom.o: In function `bcm54xx_phy_probe': broadcom.c:(.text+0x784): undefined reference to `bcm_ptp_probe' drivers/net/phy/broadcom.o: In function `bcm54xx_config_init': broadcom.c:(.text+0xd4c): undefined reference to `bcm_ptp_config_init' Fixes: 99addbe31f55 ("net: broadcom: Select BROADCOM_PHY for BCMGENET") Signed-off-by: YueHaibing Acked-by: Florian Fainelli Link: https://lore.kernel.org/r/20221105090245.8508-1-yuehaibing@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/broadcom/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 7b79528d6eed..2b6d929d462f 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -69,7 +69,7 @@ config BCMGENET select BCM7XXX_PHY select MDIO_BCM_UNIMAC select DIMLIB - select BROADCOM_PHY if ARCH_BCM2835 + select BROADCOM_PHY if (ARCH_BCM2835 && PTP_1588_CLOCK_OPTIONAL) help This driver supports the built-in Ethernet MACs found in the Broadcom BCM7xxx Set Top Box family chipset. From f31dd158580940938f77514b87337a777520185a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Fri, 4 Nov 2022 16:48:53 -0400 Subject: [PATCH 32/98] tipc: fix the msg->req tlv len check in tipc_nl_compat_name_table_dump_header [ Upstream commit 1c075b192fe41030457cd4a5f7dea730412bca40 ] This is a follow-up for commit 974cb0e3e7c9 ("tipc: fix uninit-value in tipc_nl_compat_name_table_dump") where it should have type casted sizeof(..) to int to work when TLV_GET_DATA_LEN() returns a negative value. syzbot reported a call trace because of it: BUG: KMSAN: uninit-value in ... tipc_nl_compat_name_table_dump+0x841/0xea0 net/tipc/netlink_compat.c:934 __tipc_nl_compat_dumpit+0xab2/0x1320 net/tipc/netlink_compat.c:238 tipc_nl_compat_dumpit+0x991/0xb50 net/tipc/netlink_compat.c:321 tipc_nl_compat_recv+0xb6e/0x1640 net/tipc/netlink_compat.c:1324 genl_family_rcv_msg_doit net/netlink/genetlink.c:731 [inline] genl_family_rcv_msg net/netlink/genetlink.c:775 [inline] genl_rcv_msg+0x103f/0x1260 net/netlink/genetlink.c:792 netlink_rcv_skb+0x3a5/0x6c0 net/netlink/af_netlink.c:2501 genl_rcv+0x3c/0x50 net/netlink/genetlink.c:803 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] netlink_unicast+0xf3b/0x1270 net/netlink/af_netlink.c:1345 netlink_sendmsg+0x1288/0x1440 net/netlink/af_netlink.c:1921 sock_sendmsg_nosec net/socket.c:714 [inline] sock_sendmsg net/socket.c:734 [inline] Reported-by: syzbot+e5dbaaa238680ce206ea@syzkaller.appspotmail.com Fixes: 974cb0e3e7c9 ("tipc: fix uninit-value in tipc_nl_compat_name_table_dump") Signed-off-by: Xin Long Link: https://lore.kernel.org/r/ccd6a7ea801b15aec092c3b532a883b4c5708695.1667594933.git.lucien.xin@gmail.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- net/tipc/netlink_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 49e893313652..2d62932b5987 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -877,7 +877,7 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg) }; ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req); - if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query)) + if (TLV_GET_DATA_LEN(msg->req) < (int)sizeof(struct tipc_name_table_query)) return -EINVAL; depth = ntohl(ntq->depth); From 6e2ffae69d17da317a892cfd6a3c24c8a8bbaed3 Mon Sep 17 00:00:00 2001 From: Doug Brown Date: Mon, 5 Sep 2022 17:07:09 -0700 Subject: [PATCH 33/98] dmaengine: pxa_dma: use platform_get_irq_optional [ Upstream commit b3d726cb8497c6b12106fd617d46eef11763ea86 ] The first IRQ is required, but IRQs 1 through (nb_phy_chans - 1) are optional, because on some platforms (e.g. PXA168) there is a single IRQ shared between all channels. This change inhibits a flood of "IRQ index # not found" messages at startup. Tested on a PXA168-based device. Fixes: 7723f4c5ecdb ("driver core: platform: Add an error message to platform_get_irq*()") Signed-off-by: Doug Brown Link: https://lore.kernel.org/r/20220906000709.52705-1-doug@schmorgal.com Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/pxa_dma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index b4ef4f19f7de..68d9d60c051d 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -1249,14 +1249,14 @@ static int pxad_init_phys(struct platform_device *op, return -ENOMEM; for (i = 0; i < nb_phy_chans; i++) - if (platform_get_irq(op, i) > 0) + if (platform_get_irq_optional(op, i) > 0) nr_irq++; for (i = 0; i < nb_phy_chans; i++) { phy = &pdev->phys[i]; phy->base = pdev->base; phy->idx = i; - irq = platform_get_irq(op, i); + irq = platform_get_irq_optional(op, i); if ((nr_irq > 1) && (irq > 0)) ret = devm_request_irq(&op->dev, irq, pxad_chan_handler, From 0b7ee3d50f32d277bf024b4ddb4de54da43a3025 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 24 Oct 2022 21:50:09 +0200 Subject: [PATCH 34/98] dmaengine: mv_xor_v2: Fix a resource leak in mv_xor_v2_remove() [ Upstream commit 081195d17a0c4c636da2b869bd5809d42e8cbb13 ] A clk_prepare_enable() call in the probe is not balanced by a corresponding clk_disable_unprepare() in the remove function. Add the missing call. Fixes: 3cd2c313f1d6 ("dmaengine: mv_xor_v2: Fix clock resource by adding a register clock") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/e9e3837a680c9bd2438e4db2b83270c6c052d005.1666640987.git.christophe.jaillet@wanadoo.fr Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- drivers/dma/mv_xor_v2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c index 9b0d463f89bb..4800c596433a 100644 --- a/drivers/dma/mv_xor_v2.c +++ b/drivers/dma/mv_xor_v2.c @@ -899,6 +899,7 @@ static int mv_xor_v2_remove(struct platform_device *pdev) tasklet_kill(&xor_dev->irq_tasklet); clk_disable_unprepare(xor_dev->clk); + clk_disable_unprepare(xor_dev->reg_clk); return 0; } From 3a4a3c3b1fe6f9e2580abf1f5cf57f6c47c98111 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 7 Nov 2022 12:30:32 +0800 Subject: [PATCH 35/98] drivers: net: xgene: disable napi when register irq failed in xgene_enet_open() [ Upstream commit ce9e57feeed81d17d5e80ed86f516ff0d39c3867 ] When failed to register irq in xgene_enet_open() for opening device, napi isn't disabled. When open xgene device next time, it will reports a invalid opcode issue. Fix it. Only be compiled, not be tested. Fixes: aeb20b6b3f4e ("drivers: net: xgene: fix: ifconfig up/down crash") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221107043032.357673-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/apm/xgene/xgene_enet_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index 78c7cbc372b0..71151f675a49 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -1004,8 +1004,10 @@ static int xgene_enet_open(struct net_device *ndev) xgene_enet_napi_enable(pdata); ret = xgene_enet_register_irq(ndev); - if (ret) + if (ret) { + xgene_enet_napi_disable(pdata); return ret; + } if (ndev->phydev) { phy_start(ndev->phydev); From cf4853880e2433909880145aad650f52554662bd Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Tue, 18 Oct 2022 14:26:04 +0530 Subject: [PATCH 36/98] perf stat: Fix printing os->prefix in CSV metrics output [ Upstream commit ad353b710c7493df3d4fc2d3a51819126bed2e81 ] 'perf stat' with CSV output option prints an extra empty string as first field in metrics output line. Sample output below: # ./perf stat -x, --per-socket -a -C 1 ls S0,1,1.78,msec,cpu-clock,1785146,100.00,0.973,CPUs utilized S0,1,26,,context-switches,1781750,100.00,0.015,M/sec S0,1,1,,cpu-migrations,1780526,100.00,0.561,K/sec S0,1,1,,page-faults,1779060,100.00,0.561,K/sec S0,1,875807,,cycles,1769826,100.00,0.491,GHz S0,1,85281,,stalled-cycles-frontend,1767512,100.00,9.74,frontend cycles idle S0,1,576839,,stalled-cycles-backend,1766260,100.00,65.86,backend cycles idle S0,1,288430,,instructions,1762246,100.00,0.33,insn per cycle ====> ,S0,1,,,,,,,2.00,stalled cycles per insn The above command line uses field separator as "," via "-x," option and per-socket option displays socket value as first field. But here the last line for "stalled cycles per insn" has "," in the beginning. Sample output using interval mode: # ./perf stat -I 1000 -x, --per-socket -a -C 1 ls 0.001813453,S0,1,1.87,msec,cpu-clock,1872052,100.00,0.002,CPUs utilized 0.001813453,S0,1,2,,context-switches,1868028,100.00,1.070,K/sec ------ 0.001813453,S0,1,85379,,instructions,1856754,100.00,0.32,insn per cycle ====> 0.001813453,,S0,1,,,,,,,1.34,stalled cycles per insn Above result also has an extra CSV separator after the timestamp. Patch addresses extra field separator in the beginning of the metric output line. The counter stats are displayed by function "perf_stat__print_shadow_stats" in code "util/stat-shadow.c". While printing the stats info for "stalled cycles per insn", function "new_line_csv" is used as new_line callback. The new_line_csv function has check for "os->prefix" and if prefix is not null, it will be printed along with cvs separator. Snippet from "new_line_csv": if (os->prefix) fprintf(os->fh, "%s%s", os->prefix, config->csv_sep); Here os->prefix gets printed followed by "," which is the cvs separator. The os->prefix is used in interval mode option ( -I ), to print time stamp on every new line. But prefix is already set to contain CSV separator when used in interval mode for CSV option. Reference: Function "static void print_interval" Snippet: sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep); Also if prefix is not assigned (if not used with -I option), it gets set to empty string. Reference: function printout() in util/stat-display.c Snippet: .prefix = prefix ? prefix : "", Since prefix already set to contain cvs_sep in interval option, patch removes printing config->csv_sep in new_line_csv function to avoid printing extra field. After the patch: # ./perf stat -x, --per-socket -a -C 1 ls S0,1,2.04,msec,cpu-clock,2045202,100.00,1.013,CPUs utilized S0,1,2,,context-switches,2041444,100.00,979.289,/sec S0,1,0,,cpu-migrations,2040820,100.00,0.000,/sec S0,1,2,,page-faults,2040288,100.00,979.289,/sec S0,1,254589,,cycles,2036066,100.00,0.125,GHz S0,1,82481,,stalled-cycles-frontend,2032420,100.00,32.40,frontend cycles idle S0,1,113170,,stalled-cycles-backend,2031722,100.00,44.45,backend cycles idle S0,1,88766,,instructions,2030942,100.00,0.35,insn per cycle S0,1,,,,,,,1.27,stalled cycles per insn Fixes: 92a61f6412d3a09d ("perf stat: Implement CSV metrics output") Reported-by: Disha Goel Reviewed-By: Kajol Jain Signed-off-by: Athira Jajeev Tested-by: Disha Goel Cc: Andi Kleen Cc: Ian Rogers Cc: James Clark Cc: Jiri Olsa Cc: linuxppc-dev@lists.ozlabs.org Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Nageswara R Sastry Cc: Namhyung Kim Link: https://lore.kernel.org/r/20221018085605.63834-1-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/stat-display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 96fe9c1af336..4688e39de52a 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -203,7 +203,7 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx) fputc('\n', os->fh); if (os->prefix) - fprintf(os->fh, "%s%s", os->prefix, config->csv_sep); + fprintf(os->fh, "%s", os->prefix); aggr_printout(config, os->evsel, os->id, os->nr); for (i = 0; i < os->nfields; i++) fputs(config->csv_sep, os->fh); From 5333cf1b7f6861912aff6263978d4781f9858e47 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Tue, 8 Nov 2022 10:56:07 +0800 Subject: [PATCH 37/98] net: marvell: prestera: fix memory leak in prestera_rxtx_switch_init() [ Upstream commit 519b58bbfa825f042fcf80261cc18e1e35f85ffd ] When prestera_sdma_switch_init() failed, the memory pointed to by sw->rxtx isn't released. Fix it. Only be compiled, not be tested. Fixes: 501ef3066c89 ("net: marvell: prestera: Add driver for Prestera family ASIC devices") Signed-off-by: Zhengchao Shao Reviewed-by: Vadym Kochan Link: https://lore.kernel.org/r/20221108025607.338450-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/prestera/prestera_rxtx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c index 2a13c318048c..59a3ea02b8ad 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_rxtx.c @@ -771,6 +771,7 @@ tx_done: int prestera_rxtx_switch_init(struct prestera_switch *sw) { struct prestera_rxtx *rxtx; + int err; rxtx = kzalloc(sizeof(*rxtx), GFP_KERNEL); if (!rxtx) @@ -778,7 +779,11 @@ int prestera_rxtx_switch_init(struct prestera_switch *sw) sw->rxtx = rxtx; - return prestera_sdma_switch_init(sw); + err = prestera_sdma_switch_init(sw); + if (err) + kfree(rxtx); + + return err; } void prestera_rxtx_switch_fini(struct prestera_switch *sw) From bdd282bba72de096145d75ef9db40792c9c710ea Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Mon, 7 Nov 2022 18:14:43 +0800 Subject: [PATCH 38/98] net: nixge: disable napi when enable interrupts failed in nixge_open() [ Upstream commit b06334919c7a068d54ba5b219c05e919d89943f7 ] When failed to enable interrupts in nixge_open() for opening device, napi isn't disabled. When open nixge device next time, it will reports a invalid opcode issue. Fix it. Only be compiled, not be tested. Fixes: 492caffa8a1a ("net: ethernet: nixge: Add support for National Instruments XGE netdev") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221107101443.120205-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/ni/nixge.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c index a6861df9904f..9c48fd85c418 100644 --- a/drivers/net/ethernet/ni/nixge.c +++ b/drivers/net/ethernet/ni/nixge.c @@ -899,6 +899,7 @@ static int nixge_open(struct net_device *ndev) err_rx_irq: free_irq(priv->tx_irq, ndev); err_tx_irq: + napi_disable(&priv->napi); phy_stop(phy); phy_disconnect(phy); tasklet_kill(&priv->dma_err_tasklet); From eb6fa0ac2a9c14b700ee7a2bb55736a0bd14a099 Mon Sep 17 00:00:00 2001 From: Roy Novich Date: Wed, 2 Nov 2022 23:55:38 -0700 Subject: [PATCH 39/98] net/mlx5: Allow async trigger completion execution on single CPU systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 2808b37b59288ad8f1897e3546c2296df3384b65 ] For a single CPU system, the kernel thread executing mlx5_cmd_flush() never releases the CPU but calls down_trylock(&cmd→sem) in a busy loop. On a single processor system, this leads to a deadlock as the kernel thread which executes mlx5_cmd_invoke() never gets scheduled. Fix this, by adding the cond_resched() call to the loop, allow the command completion kernel thread to execute. Fixes: 8e715cd613a1 ("net/mlx5: Set command entry semaphore up once got index free") Signed-off-by: Alexander Schmidt Signed-off-by: Roy Novich Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 6612b2c0be48..cf07318048df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1687,12 +1687,17 @@ void mlx5_cmd_flush(struct mlx5_core_dev *dev) struct mlx5_cmd *cmd = &dev->cmd; int i; - for (i = 0; i < cmd->max_reg_cmds; i++) - while (down_trylock(&cmd->sem)) + for (i = 0; i < cmd->max_reg_cmds; i++) { + while (down_trylock(&cmd->sem)) { mlx5_cmd_trigger_completions(dev); + cond_resched(); + } + } - while (down_trylock(&cmd->pages_sem)) + while (down_trylock(&cmd->pages_sem)) { mlx5_cmd_trigger_completions(dev); + cond_resched(); + } /* Unlock cmdif */ up(&cmd->pages_sem); From 3b27e20601abe48ae4baf9cb0cb0e3dcfdff4525 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 2 Nov 2022 23:55:46 -0700 Subject: [PATCH 40/98] net/mlx5e: E-Switch, Fix comparing termination table instance [ Upstream commit f4f4096b410e8d31c3f07f39de3b17d144edd53d ] The pkt_reformat pointer being saved under flow_act and not dest attribute in the termination table instance. Fix the comparison pointers. Also fix returning success if one pkt_reformat pointer is null and the other is not. Fixes: 249ccc3c95bd ("net/mlx5e: Add support for offloading traffic from uplink to uplink") Signed-off-by: Roi Dayan Reviewed-by: Chris Mi Signed-off-by: Saeed Mahameed Signed-off-by: Sasha Levin --- .../mellanox/mlx5/core/eswitch_offloads_termtbl.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c index 1cbb330b9f42..6c865cb7f445 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads_termtbl.c @@ -30,9 +30,9 @@ mlx5_eswitch_termtbl_hash(struct mlx5_flow_act *flow_act, sizeof(dest->vport.num), hash); hash = jhash((const void *)&dest->vport.vhca_id, sizeof(dest->vport.num), hash); - if (dest->vport.pkt_reformat) - hash = jhash(dest->vport.pkt_reformat, - sizeof(*dest->vport.pkt_reformat), + if (flow_act->pkt_reformat) + hash = jhash(flow_act->pkt_reformat, + sizeof(*flow_act->pkt_reformat), hash); return hash; } @@ -53,9 +53,11 @@ mlx5_eswitch_termtbl_cmp(struct mlx5_flow_act *flow_act1, if (ret) return ret; - return dest1->vport.pkt_reformat && dest2->vport.pkt_reformat ? - memcmp(dest1->vport.pkt_reformat, dest2->vport.pkt_reformat, - sizeof(*dest1->vport.pkt_reformat)) : 0; + if (flow_act1->pkt_reformat && flow_act2->pkt_reformat) + return memcmp(flow_act1->pkt_reformat, flow_act2->pkt_reformat, + sizeof(*flow_act1->pkt_reformat)); + + return !(flow_act1->pkt_reformat == flow_act2->pkt_reformat); } static int From fd52dd2d6e2f5e34bed00e654d06e02c6bef39d4 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 09:15:37 +0800 Subject: [PATCH 41/98] net: cpsw: disable napi in cpsw_ndo_open() [ Upstream commit 6d47b53fb3f363a74538a1dbd09954af3d8d4131 ] When failed to create xdp rxqs or fill rx channels in cpsw_ndo_open() for opening device, napi isn't disabled. When open cpsw device next time, it will report a invalid opcode issue. Compiled tested only. Fixes: d354eb85d618 ("drivers: net: cpsw: dual_emac: simplify napi usage") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109011537.96975-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/ti/cpsw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index b0f00b4edd94..5af0f9f8c097 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -864,6 +864,8 @@ static int cpsw_ndo_open(struct net_device *ndev) err_cleanup: if (!cpsw->usage_count) { + napi_disable(&cpsw->napi_rx); + napi_disable(&cpsw->napi_tx); cpdma_ctlr_stop(cpsw->dma); cpsw_destroy_xdp_rxqs(cpsw); } From 38aa7ed8c2c3cace4b542f2a3369271878b45e74 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 10:14:51 +0800 Subject: [PATCH 42/98] net: cxgb3_main: disable napi when bind qsets failed in cxgb_up() [ Upstream commit d75aed1428da787cbe42bc073d76f1354f364d92 ] When failed to bind qsets in cxgb_up() for opening device, napi isn't disabled. When open cxgb3 device next time, it will trigger a BUG_ON() in napi_enable(). Compile tested only. Fixes: 48c4b6dbb7e2 ("cxgb3 - fix port up/down error path") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109021451.121490-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 84ad7261e243..8a167eea288c 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -1302,6 +1302,7 @@ static int cxgb_up(struct adapter *adap) if (ret < 0) { CH_ERR(adap, "failed to bind qsets, err %d\n", ret); t3_intr_disable(adap); + quiesce_rx(adap); free_irq_resources(adap); err = ret; goto out; From 4ad684ba028cfb5c4bf924e46855ef11c15aa5ae Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 09:21:00 +0800 Subject: [PATCH 43/98] cxgb4vf: shut down the adapter when t4vf_update_port_info() failed in cxgb4vf_open() [ Upstream commit c6092ea1e6d7bd12acd881f6aa2b5054cd70e096 ] When t4vf_update_port_info() failed in cxgb4vf_open(), resources applied during adapter goes up are not cleared. Fix it. Only be compiled, not be tested. Fixes: 18d79f721e0a ("cxgb4vf: Update port information in cxgb4vf_open()") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109012100.99132-1-shaozhengchao@huawei.com Signed-off-by: Jakub Kicinski Signed-off-by: Sasha Levin --- drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index 2820a0bb971b..5e1e46425014 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -858,7 +858,7 @@ static int cxgb4vf_open(struct net_device *dev) */ err = t4vf_update_port_info(pi); if (err < 0) - return err; + goto err_unwind; /* * Note that this interface is up and start everything up ... From 1a4e495edfe25b42c0b59f809a8df97ad5a6eed3 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 8 Nov 2022 16:34:58 +0100 Subject: [PATCH 44/98] net: phy: mscc: macsec: clear encryption keys when freeing a flow [ Upstream commit 1b16b3fdf675cca15a537572bac50cc5354368fc ] Commit aaab73f8fba4 ("macsec: clear encryption keys from the stack after setting up offload") made sure to clean encryption keys from the stack after setting up offloading, but the MSCC PHY driver made a copy, kept it in the flow data and did not clear it when freeing a flow. Fix this. Fixes: 28c5107aa904 ("net: phy: mscc: macsec support") Signed-off-by: Antoine Tenart Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/phy/mscc/mscc_macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/mscc/mscc_macsec.c b/drivers/net/phy/mscc/mscc_macsec.c index b7b2521c73fb..c00eef457b85 100644 --- a/drivers/net/phy/mscc/mscc_macsec.c +++ b/drivers/net/phy/mscc/mscc_macsec.c @@ -632,6 +632,7 @@ static void vsc8584_macsec_free_flow(struct vsc8531_private *priv, list_del(&flow->list); clear_bit(flow->index, bitmap); + memzero_explicit(flow->key, sizeof(flow->key)); kfree(flow); } From 05b222843457391683889d319afb8998c10f9562 Mon Sep 17 00:00:00 2001 From: Antoine Tenart Date: Tue, 8 Nov 2022 16:34:59 +0100 Subject: [PATCH 45/98] net: atlantic: macsec: clear encryption keys from the stack [ Upstream commit 879785def0f5e71d54399de0f8a5cb399db14171 ] Commit aaab73f8fba4 ("macsec: clear encryption keys from the stack after setting up offload") made sure to clean encryption keys from the stack after setting up offloading, but the atlantic driver made a copy and did not clear it. Fix this. [4 Fixes tags below, all part of the same series, no need to split this] Fixes: 9ff40a751a6f ("net: atlantic: MACSec ingress offload implementation") Fixes: b8f8a0b7b5cb ("net: atlantic: MACSec ingress offload HW bindings") Fixes: 27736563ce32 ("net: atlantic: MACSec egress offload implementation") Fixes: 9d106c6dd81b ("net: atlantic: MACSec egress offload HW bindings") Signed-off-by: Antoine Tenart Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- .../net/ethernet/aquantia/atlantic/aq_macsec.c | 2 ++ .../aquantia/atlantic/macsec/macsec_api.c | 18 +++++++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c index 7c6e0811f2e6..ee823a18294c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c @@ -585,6 +585,7 @@ static int aq_update_txsa(struct aq_nic_s *nic, const unsigned int sc_idx, ret = aq_mss_set_egress_sakey_record(hw, &key_rec, sa_idx); + memzero_explicit(&key_rec, sizeof(key_rec)); return ret; } @@ -932,6 +933,7 @@ static int aq_update_rxsa(struct aq_nic_s *nic, const unsigned int sc_idx, ret = aq_mss_set_ingress_sakey_record(hw, &sa_key_record, sa_idx); + memzero_explicit(&sa_key_record, sizeof(sa_key_record)); return ret; } diff --git a/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c b/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c index 36c7cf05630a..431924959520 100644 --- a/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c +++ b/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c @@ -757,6 +757,7 @@ set_ingress_sakey_record(struct aq_hw_s *hw, u16 table_index) { u16 packed_record[18]; + int ret; if (table_index >= NUMROWS_INGRESSSAKEYRECORD) return -EINVAL; @@ -789,9 +790,12 @@ set_ingress_sakey_record(struct aq_hw_s *hw, packed_record[16] = rec->key_len & 0x3; - return set_raw_ingress_record(hw, packed_record, 18, 2, - ROWOFFSET_INGRESSSAKEYRECORD + - table_index); + ret = set_raw_ingress_record(hw, packed_record, 18, 2, + ROWOFFSET_INGRESSSAKEYRECORD + + table_index); + + memzero_explicit(packed_record, sizeof(packed_record)); + return ret; } int aq_mss_set_ingress_sakey_record(struct aq_hw_s *hw, @@ -1739,14 +1743,14 @@ static int set_egress_sakey_record(struct aq_hw_s *hw, ret = set_raw_egress_record(hw, packed_record, 8, 2, ROWOFFSET_EGRESSSAKEYRECORD + table_index); if (unlikely(ret)) - return ret; + goto clear_key; ret = set_raw_egress_record(hw, packed_record + 8, 8, 2, ROWOFFSET_EGRESSSAKEYRECORD + table_index - 32); - if (unlikely(ret)) - return ret; - return 0; +clear_key: + memzero_explicit(packed_record, sizeof(packed_record)); + return ret; } int aq_mss_set_egress_sakey_record(struct aq_hw_s *hw, From 56d3b5531bf64009adb616f06ad30695b7e70018 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 10:37:41 +0800 Subject: [PATCH 46/98] ethernet: s2io: disable napi when start nic failed in s2io_card_up() [ Upstream commit 0348c1ab980c1d43fb37b758d4b760990c066cb5 ] When failed to start nic or add interrupt service routine in s2io_card_up() for opening device, napi isn't disabled. When open s2io device next time, it will trigger a BUG_ON()in napi_enable(). Compile tested only. Fixes: 5f490c968056 ("S2io: Fixed synchronization between scheduling of napi with card reset and close") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109023741.131552-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/neterion/s2io.c | 29 +++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index 3cae8449fadb..8a30be698f99 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -7114,9 +7114,8 @@ static int s2io_card_up(struct s2io_nic *sp) if (ret) { DBG_PRINT(ERR_DBG, "%s: Out of memory in Open\n", dev->name); - s2io_reset(sp); - free_rx_buffers(sp); - return -ENOMEM; + ret = -ENOMEM; + goto err_fill_buff; } DBG_PRINT(INFO_DBG, "Buf in ring:%d is %d:\n", i, ring->rx_bufs_left); @@ -7154,18 +7153,16 @@ static int s2io_card_up(struct s2io_nic *sp) /* Enable Rx Traffic and interrupts on the NIC */ if (start_nic(sp)) { DBG_PRINT(ERR_DBG, "%s: Starting NIC failed\n", dev->name); - s2io_reset(sp); - free_rx_buffers(sp); - return -ENODEV; + ret = -ENODEV; + goto err_out; } /* Add interrupt service routine */ if (s2io_add_isr(sp) != 0) { if (sp->config.intr_type == MSI_X) s2io_rem_isr(sp); - s2io_reset(sp); - free_rx_buffers(sp); - return -ENODEV; + ret = -ENODEV; + goto err_out; } timer_setup(&sp->alarm_timer, s2io_alarm_handle, 0); @@ -7185,6 +7182,20 @@ static int s2io_card_up(struct s2io_nic *sp) } return 0; + +err_out: + if (config->napi) { + if (config->intr_type == MSI_X) { + for (i = 0; i < sp->config.rx_ring_num; i++) + napi_disable(&sp->mac_control.rings[i].napi); + } else { + napi_disable(&sp->napi); + } + } +err_fill_buff: + s2io_reset(sp); + free_rx_buffers(sp); + return ret; } /** From dd7beaec8b48916acd98d01dd24e75e6fdba3d9e Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 10:54:32 +0800 Subject: [PATCH 47/98] net: mv643xx_eth: disable napi when init rxq or txq failed in mv643xx_eth_open() [ Upstream commit f111606b63ff2282428ffbac0447c871eb957b6c ] When failed to init rxq or txq in mv643xx_eth_open() for opening device, napi isn't disabled. When open mv643xx_eth device next time, it will trigger a BUG_ON() in napi_enable(). Compile tested only. Fixes: 2257e05c1705 ("mv643xx_eth: get rid of receive-side locking") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109025432.80900-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/marvell/mv643xx_eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 90e6111ce534..735b76effc49 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2471,6 +2471,7 @@ out_free: for (i = 0; i < mp->rxq_count; i++) rxq_deinit(mp->rxq + i); out: + napi_disable(&mp->napi); free_irq(dev->irq, dev); return err; From 59ec132386a07ac09165f96226fe6fcb44b54b37 Mon Sep 17 00:00:00 2001 From: Zhengchao Shao Date: Wed, 9 Nov 2022 12:40:16 +0800 Subject: [PATCH 48/98] ethernet: tundra: free irq when alloc ring failed in tsi108_open() [ Upstream commit acce40037041f97baad18142bb253064491ebde3 ] When alloc tx/rx ring failed in tsi108_open(), it doesn't free irq. Fix it. Fixes: 5e123b844a1c ("[PATCH] Add tsi108/9 On Chip Ethernet device driver support") Signed-off-by: Zhengchao Shao Link: https://lore.kernel.org/r/20221109044016.126866-1-shaozhengchao@huawei.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/ethernet/tundra/tsi108_eth.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index c62f474b6d08..fcebd2418dbd 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1302,12 +1302,15 @@ static int tsi108_open(struct net_device *dev) data->rxring = dma_alloc_coherent(&data->pdev->dev, rxring_size, &data->rxdma, GFP_KERNEL); - if (!data->rxring) + if (!data->rxring) { + free_irq(data->irq_num, dev); return -ENOMEM; + } data->txring = dma_alloc_coherent(&data->pdev->dev, txring_size, &data->txdma, GFP_KERNEL); if (!data->txring) { + free_irq(data->irq_num, dev); dma_free_coherent(&data->pdev->dev, rxring_size, data->rxring, data->rxdma); return -ENOMEM; From 956e0216a19994443c90ba2ea6b0b284c9c4f9cb Mon Sep 17 00:00:00 2001 From: Chuang Wang Date: Wed, 9 Nov 2022 17:07:34 +0800 Subject: [PATCH 49/98] net: macvlan: fix memory leaks of macvlan_common_newlink [ Upstream commit 23569b5652ee8e8e55a12f7835f59af6f3cefc30 ] kmemleak reports memory leaks in macvlan_common_newlink, as follows: ip link add link eth0 name .. type macvlan mode source macaddr add kmemleak reports: unreferenced object 0xffff8880109bb140 (size 64): comm "ip", pid 284, jiffies 4294986150 (age 430.108s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 b8 aa 5a 12 80 88 ff ff ..........Z..... 80 1b fa 0d 80 88 ff ff 1e ff ac af c7 c1 6b 6b ..............kk backtrace: [] kmem_cache_alloc_trace+0x1c7/0x300 [] macvlan_hash_add_source+0x45/0xc0 [] macvlan_changelink_sources+0xd7/0x170 [] macvlan_common_newlink+0x38c/0x5a0 [] macvlan_newlink+0xe/0x20 [] __rtnl_newlink+0x7af/0xa50 [] rtnl_newlink+0x48/0x70 ... In the scenario where the macvlan mode is configured as 'source', macvlan_changelink_sources() will be execured to reconfigure list of remote source mac addresses, at the same time, if register_netdevice() return an error, the resource generated by macvlan_changelink_sources() is not cleaned up. Using this patch, in the case of an error, it will execute macvlan_flush_sources() to ensure that the resource is cleaned up. Fixes: aa5fd0fb7748 ("driver: macvlan: Destroy new macvlan port if macvlan_common_newlink failed.") Signed-off-by: Chuang Wang Link: https://lore.kernel.org/r/20221109090735.690500-1-nashuiliang@gmail.com Signed-off-by: Paolo Abeni Signed-off-by: Sasha Levin --- drivers/net/macvlan.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index c8d803d3616c..6b269a72388b 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1509,8 +1509,10 @@ destroy_macvlan_port: /* the macvlan port may be freed by macvlan_uninit when fail to register. * so we destroy the macvlan port only when it's valid. */ - if (create && macvlan_port_get_rtnl(lowerdev)) + if (create && macvlan_port_get_rtnl(lowerdev)) { + macvlan_flush_sources(port, vlan); macvlan_port_destroy(port->dev); + } return err; } EXPORT_SYMBOL_GPL(macvlan_common_newlink); From e56d18a976dda653194218df6d40d8122c775712 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Sat, 29 Oct 2022 19:34:50 +0800 Subject: [PATCH 50/98] riscv: process: fix kernel info leakage [ Upstream commit 6510c78490c490a6636e48b61eeaa6fb65981f4b ] thread_struct's s[12] may contain random kernel memory content, which may be finally leaked to userspace. This is a security hole. Fix it by clearing the s[12] array in thread_struct when fork. As for kthread case, it's better to clear the s[12] array as well. Fixes: 7db91e57a0ac ("RISC-V: Task implementation") Signed-off-by: Jisheng Zhang Tested-by: Guo Ren Link: https://lore.kernel.org/r/20221029113450.4027-1-jszhang@kernel.org Reviewed-by: Guo Ren Link: https://lore.kernel.org/r/CAJF2gTSdVyAaM12T%2B7kXAdRPGS4VyuO08X1c7paE-n4Fr8OtRA@mail.gmail.com/ Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/process.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index dd5f985b1f40..9a8b2e60adcf 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -111,6 +111,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg, { struct pt_regs *childregs = task_pt_regs(p); + memset(&p->thread.s, 0, sizeof(p->thread.s)); + /* p->thread holds context to be restored by __switch_to() */ if (unlikely(p->flags & PF_KTHREAD)) { /* Kernel thread */ From ecf78af5141f079fffddddf708ffd311699cc131 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 1 Nov 2022 02:29:43 +0800 Subject: [PATCH 51/98] riscv: vdso: fix build with llvm [ Upstream commit 50f4dd657a0fcf90aa8da8dc2794a8100ff4c37c ] Even after commit 89fd4a1df829 ("riscv: jump_label: mark arguments as const to satisfy asm constraints"), building with CC_OPTIMIZE_FOR_SIZE + LLVM=1 can reproduce below build error: CC arch/riscv/kernel/vdso/vgettimeofday.o In file included from :4: In file included from lib/vdso/gettimeofday.c:5: In file included from include/vdso/datapage.h:17: In file included from include/vdso/processor.h:10: In file included from arch/riscv/include/asm/vdso/processor.h:7: In file included from include/linux/jump_label.h:112: arch/riscv/include/asm/jump_label.h:42:3: error: invalid operand for inline asm constraint 'i' " .option push \n\t" ^ 1 error generated. I think the problem is when "-Os" is passed as CFLAGS, it's removed by "CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os" which is introduced in commit e05d57dcb8c7 ("riscv: Fixup __vdso_gettimeofday broke dynamic ftrace"), thus no optimization at all for vgettimeofday.c arm64 does remove "-Os" as well, but it forces "-O2" after removing "-Os". I compared the generated vgettimeofday.o with "-O2" and "-Os", I think no big performance difference. So let's tell the kbuild not to remove "-Os" rather than follow arm64 style. vdso related performance can be improved a lot when building kernel with CC_OPTIMIZE_FOR_SIZE after this commit, ("-Os" VS no optimization) Fixes: e05d57dcb8c7 ("riscv: Fixup __vdso_gettimeofday broke dynamic ftrace") Signed-off-by: Jisheng Zhang Tested-by: Conor Dooley Link: https://lore.kernel.org/r/20221031182943.2453-1-jszhang@kernel.org Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 24d936c147cd..926ab3960f9e 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -30,7 +30,7 @@ obj-y += vdso.o vdso-syms.o CPPFLAGS_vdso.lds += -P -C -U$(ARCH) # Disable -pg to prevent insert call site -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) # Disable profiling and instrumentation for VDSO code GCOV_PROFILE := n From d7716240bca508fb9e74d00d96304bc30acc3d6c Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Sat, 31 Oct 2020 14:01:12 +0800 Subject: [PATCH 52/98] riscv: Enable CMA support [ Upstream commit da815582cf4594e96defa1cddb72cd00b1e7aac5 ] riscv has selected HAVE_DMA_CONTIGUOUS, but doesn't call dma_contiguous_reserve(). This calls dma_contiguous_reserve(), which enables CMA. Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt Stable-dep-of: 50e63dd8ed92 ("riscv: fix reserved memory setup") Signed-off-by: Sasha Levin --- arch/riscv/mm/init.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index e8921e78a292..56314e82f051 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -41,13 +42,14 @@ struct pt_alloc_ops { #endif }; +static phys_addr_t dma32_phys_limit __ro_after_init; + static void __init zone_sizes_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, }; #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(min(4UL * SZ_1G, - (unsigned long) PFN_PHYS(max_low_pfn))); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; @@ -193,6 +195,7 @@ void __init setup_bootmem(void) max_pfn = PFN_DOWN(dram_end); max_low_pfn = max_pfn; + dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn); #ifdef CONFIG_BLK_DEV_INITRD @@ -206,6 +209,7 @@ void __init setup_bootmem(void) memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); early_init_fdt_scan_reserved_mem(); + dma_contiguous_reserve(dma32_phys_limit); memblock_allow_resize(); memblock_dump_all(); } From 0cf9cb06149398daab565586b56825418567a7e1 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 18 Nov 2020 16:38:27 -0800 Subject: [PATCH 53/98] riscv: Separate memory init from paging init [ Upstream commit cbd34f4bb37d62d8a027f54205bff07e73340da4 ] Currently, we perform some memory init functions in paging init. But, that will be an issue for NUMA support where DT needs to be flattened before numa initialization and memblock_present can only be called after numa initialization. Move memory initialization related functions to a separate function. Signed-off-by: Atish Patra Reviewed-by: Greentime Hu Reviewed-by: Anup Patel Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt Stable-dep-of: 50e63dd8ed92 ("riscv: fix reserved memory setup") Signed-off-by: Sasha Levin --- arch/riscv/include/asm/pgtable.h | 1 + arch/riscv/kernel/setup.c | 1 + arch/riscv/mm/init.c | 6 +++++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 73e8b5e5bb65..b16304fdf448 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -470,6 +470,7 @@ extern void *dtb_early_va; extern uintptr_t dtb_early_pa; void setup_bootmem(void); void paging_init(void); +void misc_mem_init(void); #define FIRST_USER_ADDRESS 0 diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index cc85858f7fe8..57e1ab036edf 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -96,6 +96,7 @@ void __init setup_arch(char **cmdline_p) else pr_err("No DTB found in kernel mappings\n"); #endif + misc_mem_init(); #ifdef CONFIG_SWIOTLB swiotlb_init(1); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 56314e82f051..b6ab6a18dc1a 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -669,8 +669,12 @@ static void __init resource_init(void) void __init paging_init(void) { setup_vm_final(); - sparse_init(); setup_zero_page(); +} + +void __init misc_mem_init(void) +{ + sparse_init(); zone_sizes_init(); resource_init(); } From 94ab8f88feb75e3b1486102c0c9c550f37d9d137 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Mon, 7 Nov 2022 15:15:25 +0000 Subject: [PATCH 54/98] riscv: fix reserved memory setup [ Upstream commit 50e63dd8ed92045eb70a72d7ec725488320fb68b ] Currently, RISC-V sets up reserved memory using the "early" copy of the device tree. As a result, when trying to get a reserved memory region using of_reserved_mem_lookup(), the pointer to reserved memory regions is using the early, pre-virtual-memory address which causes a kernel panic when trying to use the buffer's name: Unable to handle kernel paging request at virtual address 00000000401c31ac Oops [#1] Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 6.0.0-rc1-00001-g0d9d6953d834 #1 Hardware name: Microchip PolarFire-SoC Icicle Kit (DT) epc : string+0x4a/0xea ra : vsnprintf+0x1e4/0x336 epc : ffffffff80335ea0 ra : ffffffff80338936 sp : ffffffff81203be0 gp : ffffffff812e0a98 tp : ffffffff8120de40 t0 : 0000000000000000 t1 : ffffffff81203e28 t2 : 7265736572203a46 s0 : ffffffff81203c20 s1 : ffffffff81203e28 a0 : ffffffff81203d22 a1 : 0000000000000000 a2 : ffffffff81203d08 a3 : 0000000081203d21 a4 : ffffffffffffffff a5 : 00000000401c31ac a6 : ffff0a00ffffff04 a7 : ffffffffffffffff s2 : ffffffff81203d08 s3 : ffffffff81203d00 s4 : 0000000000000008 s5 : ffffffff000000ff s6 : 0000000000ffffff s7 : 00000000ffffff00 s8 : ffffffff80d9821a s9 : ffffffff81203d22 s10: 0000000000000002 s11: ffffffff80d9821c t3 : ffffffff812f3617 t4 : ffffffff812f3617 t5 : ffffffff812f3618 t6 : ffffffff81203d08 status: 0000000200000100 badaddr: 00000000401c31ac cause: 000000000000000d [] vsnprintf+0x1e4/0x336 [] vprintk_store+0xf6/0x344 [] vprintk_emit+0x56/0x192 [] vprintk_default+0x16/0x1e [] vprintk+0x72/0x80 [] _printk+0x36/0x50 [] print_reserved_mem+0x1c/0x24 [] paging_init+0x528/0x5bc [] setup_arch+0xd0/0x592 [] start_kernel+0x82/0x73c early_init_fdt_scan_reserved_mem() takes no arguments as it operates on initial_boot_params, which is populated by early_init_dt_verify(). On RISC-V, early_init_dt_verify() is called twice. Once, directly, in setup_arch() if CONFIG_BUILTIN_DTB is not enabled and once indirectly, very early in the boot process, by parse_dtb() when it calls early_init_dt_scan_nodes(). This first call uses dtb_early_va to set initial_boot_params, which is not usable later in the boot process when early_init_fdt_scan_reserved_mem() is called. On arm64 for example, the corresponding call to early_init_dt_scan_nodes() uses fixmap addresses and doesn't suffer the same fate. Move early_init_fdt_scan_reserved_mem() further along the boot sequence, after the direct call to early_init_dt_verify() in setup_arch() so that the names use the correct virtual memory addresses. The above supposed that CONFIG_BUILTIN_DTB was not set, but should work equally in the case where it is - unflatted_and_copy_device_tree() also updates initial_boot_params. Reported-by: Valentina Fernandez Reported-by: Evgenii Shatokhin Link: https://lore.kernel.org/linux-riscv/f8e67f82-103d-156c-deb0-d6d6e2756f5e@microchip.com/ Fixes: 922b0375fc93 ("riscv: Fix memblock reservation for device tree blob") Signed-off-by: Conor Dooley Tested-by: Evgenii Shatokhin Link: https://lore.kernel.org/r/20221107151524.3941467-1-conor.dooley@microchip.com Signed-off-by: Palmer Dabbelt Signed-off-by: Sasha Levin --- arch/riscv/kernel/setup.c | 1 + arch/riscv/mm/init.c | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 57e1ab036edf..8e78a8ab6a34 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -96,6 +96,7 @@ void __init setup_arch(char **cmdline_p) else pr_err("No DTB found in kernel mappings\n"); #endif + early_init_fdt_scan_reserved_mem(); misc_mem_init(); #ifdef CONFIG_SWIOTLB diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index b6ab6a18dc1a..6c2f38aac544 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -208,7 +208,6 @@ void __init setup_bootmem(void) */ memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va)); - early_init_fdt_scan_reserved_mem(); dma_contiguous_reserve(dma32_phys_limit); memblock_allow_resize(); memblock_dump_all(); From 475fd3991a0d98e7b43b51e64b1eb92d0a885430 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 6 Nov 2022 15:53:54 +0100 Subject: [PATCH 55/98] arm64: efi: Fix handling of misaligned runtime regions and drop warning commit 9b9eaee9828fe98b030cf43ac50065a54a2f5d52 upstream. Currently, when mapping the EFI runtime regions in the EFI page tables, we complain about misaligned regions in a rather noisy way, using WARN(). Not only does this produce a lot of irrelevant clutter in the log, it is factually incorrect, as misaligned runtime regions are actually allowed by the EFI spec as long as they don't require conflicting memory types within the same 64k page. So let's drop the warning, and tweak the code so that we - take both the start and end of the region into account when checking for misalignment - only revert to RWX mappings for non-code regions if misaligned code regions are also known to exist. Cc: Acked-by: Linus Torvalds Signed-off-by: Ard Biesheuvel Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/efi.c | 52 +++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index fa02efb28e88..c5685179db5a 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -12,6 +12,14 @@ #include +static bool region_is_misaligned(const efi_memory_desc_t *md) +{ + if (PAGE_SIZE == EFI_PAGE_SIZE) + return false; + return !PAGE_ALIGNED(md->phys_addr) || + !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT); +} + /* * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be * executable, everything else can be mapped with the XN bits @@ -25,14 +33,22 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) if (type == EFI_MEMORY_MAPPED_IO) return PROT_DEVICE_nGnRE; - if (WARN_ONCE(!PAGE_ALIGNED(md->phys_addr), - "UEFI Runtime regions are not aligned to 64 KB -- buggy firmware?")) + if (region_is_misaligned(md)) { + static bool __initdata code_is_misaligned; + /* - * If the region is not aligned to the page size of the OS, we - * can not use strict permissions, since that would also affect - * the mapping attributes of the adjacent regions. + * Regions that are not aligned to the OS page size cannot be + * mapped with strict permissions, as those might interfere + * with the permissions that are needed by the adjacent + * region's mapping. However, if we haven't encountered any + * misaligned runtime code regions so far, we can safely use + * non-executable permissions for non-code regions. */ - return pgprot_val(PAGE_KERNEL_EXEC); + code_is_misaligned |= (type == EFI_RUNTIME_SERVICES_CODE); + + return code_is_misaligned ? pgprot_val(PAGE_KERNEL_EXEC) + : pgprot_val(PAGE_KERNEL); + } /* R-- */ if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) == @@ -62,19 +78,16 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE || md->type == EFI_RUNTIME_SERVICES_DATA); - if (!PAGE_ALIGNED(md->phys_addr) || - !PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT)) { - /* - * If the end address of this region is not aligned to page - * size, the mapping is rounded up, and may end up sharing a - * page frame with the next UEFI memory region. If we create - * a block entry now, we may need to split it again when mapping - * the next region, and support for that is going to be removed - * from the MMU routines. So avoid block mappings altogether in - * that case. - */ + /* + * If this region is not aligned to the page size used by the OS, the + * mapping will be rounded outwards, and may end up sharing a page + * frame with an adjacent runtime memory region. Given that the page + * table descriptor covering the shared page will be rewritten when the + * adjacent region gets mapped, we must avoid block mappings here so we + * don't have to worry about splitting them when that happens. + */ + if (region_is_misaligned(md)) page_mappings_only = true; - } create_pgd_mapping(mm, md->phys_addr, md->virt_addr, md->num_pages << EFI_PAGE_SHIFT, @@ -101,6 +114,9 @@ int __init efi_set_mapping_permissions(struct mm_struct *mm, BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE && md->type != EFI_RUNTIME_SERVICES_DATA); + if (region_is_misaligned(md)) + return 0; + /* * Calling apply_to_page_range() is only safe on regions that are * guaranteed to be mapped down to pages. Since we are only called From 4631cb040645eff4626d02a2a318863a3e53e2af Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Thu, 3 Nov 2022 15:10:53 +0000 Subject: [PATCH 56/98] MIPS: jump_label: Fix compat branch range check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 64ac0befe75bdfaffc396c2b4a0ed5ae6920eeee upstream. Cast upper bound of branch range to long to do signed compare, avoid negative offset trigger this warning. Fixes: 9b6584e35f40 ("MIPS: jump_label: Use compact branches for >= r6") Signed-off-by: Jiaxun Yang Cc: stable@vger.kernel.org Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Thomas Bogendoerfer Signed-off-by: Greg Kroah-Hartman --- arch/mips/kernel/jump_label.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c index 662c8db9f45b..9f5b1247b4ba 100644 --- a/arch/mips/kernel/jump_label.c +++ b/arch/mips/kernel/jump_label.c @@ -56,7 +56,7 @@ void arch_jump_label_transform(struct jump_entry *e, * The branch offset must fit in the instruction's 26 * bit field. */ - WARN_ON((offset >= BIT(25)) || + WARN_ON((offset >= (long)BIT(25)) || (offset < -(long)BIT(25))); insn.j_format.opcode = bc6_op; From b55e64d0a3a3341d0a416478fa367b74a98cf821 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 26 Oct 2022 12:42:03 -0700 Subject: [PATCH 57/98] mmc: cqhci: Provide helper for resetting both SDHCI and CQHCI commit ebb5fd38f41132e6924cb33b647337f4a5d5360c upstream. Several SDHCI drivers need to deactivate command queueing in their reset hook (see sdhci_cqhci_reset() / sdhci-pci-core.c, for example), and several more are coming. Those reset implementations have some small subtleties (e.g., ordering of initialization of SDHCI vs. CQHCI might leave us resetting with a NULL ->cqe_private), and are often identical across different host drivers. We also don't want to force a dependency between SDHCI and CQHCI, or vice versa; non-SDHCI drivers use CQHCI, and SDHCI drivers might support command queueing through some other means. So, implement a small helper, to avoid repeating the same mistakes in different drivers. Simply stick it in a header, because it's so small it doesn't deserve its own module right now, and inlining to each driver is pretty reasonable. This is marked for -stable, as it is an important prerequisite patch for several SDHCI controller bugfixes that follow. Cc: Signed-off-by: Brian Norris Acked-by: Adrian Hunter Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20221026124150.v4.1.Ie85faa09432bfe1b0890d8c24ff95e17f3097317@changeid Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-cqhci.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 drivers/mmc/host/sdhci-cqhci.h diff --git a/drivers/mmc/host/sdhci-cqhci.h b/drivers/mmc/host/sdhci-cqhci.h new file mode 100644 index 000000000000..cf8e7ba71bbd --- /dev/null +++ b/drivers/mmc/host/sdhci-cqhci.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2022 The Chromium OS Authors + * + * Support that applies to the combination of SDHCI and CQHCI, while not + * expressing a dependency between the two modules. + */ + +#ifndef __MMC_HOST_SDHCI_CQHCI_H__ +#define __MMC_HOST_SDHCI_CQHCI_H__ + +#include "cqhci.h" +#include "sdhci.h" + +static inline void sdhci_and_cqhci_reset(struct sdhci_host *host, u8 mask) +{ + if ((host->mmc->caps2 & MMC_CAP2_CQE) && (mask & SDHCI_RESET_ALL) && + host->mmc->cqe_private) + cqhci_deactivate(host->mmc); + + sdhci_reset(host, mask); +} + +#endif /* __MMC_HOST_SDHCI_CQHCI_H__ */ From 3f558930add76be2a0e23991256c6a7eb46b4131 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 26 Oct 2022 12:42:04 -0700 Subject: [PATCH 58/98] mmc: sdhci-of-arasan: Fix SDHCI_RESET_ALL for CQHCI commit 5d249ac37fc2396e8acc1adb0650cdacae5a990d upstream. SDHCI_RESET_ALL resets will reset the hardware CQE state, but we aren't tracking that properly in software. When out of sync, we may trigger various timeouts. It's not typical to perform resets while CQE is enabled, but one particular case I hit commonly enough: mmc_suspend() -> mmc_power_off(). Typically we will eventually deactivate CQE (cqhci_suspend() -> cqhci_deactivate()), but that's not guaranteed -- in particular, if we perform a partial (e.g., interrupted) system suspend. The same bug was already found and fixed for two other drivers, in v5.7 and v5.9: 5cf583f1fb9c ("mmc: sdhci-msm: Deactivate CQE during SDHC reset") df57d73276b8 ("mmc: sdhci-pci: Fix SDHCI_RESET_ALL for CQHCI for Intel GLK-based controllers") The latter is especially prescient, saying "other drivers using CQHCI might benefit from a similar change, if they also have CQHCI reset by SDHCI_RESET_ALL." So like these other patches, deactivate CQHCI when resetting the controller. Do this via the new sdhci_and_cqhci_reset() helper. This patch depends on (and should not compile without) the patch entitled "mmc: cqhci: Provide helper for resetting both SDHCI and CQHCI". Fixes: 84362d79f436 ("mmc: sdhci-of-arasan: Add CQHCI support for arasan,sdhci-5.1") Cc: Signed-off-by: Brian Norris Reviewed-by: Guenter Roeck Acked-by: Adrian Hunter Link: https://lore.kernel.org/r/20221026124150.v4.2.I29f6a2189e84e35ad89c1833793dca9e36c64297@changeid Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-of-arasan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c index fc38db64a6b4..9da49dc15248 100644 --- a/drivers/mmc/host/sdhci-of-arasan.c +++ b/drivers/mmc/host/sdhci-of-arasan.c @@ -25,6 +25,7 @@ #include #include "cqhci.h" +#include "sdhci-cqhci.h" #include "sdhci-pltfm.h" #define SDHCI_ARASAN_VENDOR_REGISTER 0x78 @@ -359,7 +360,7 @@ static void sdhci_arasan_reset(struct sdhci_host *host, u8 mask) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_arasan_data *sdhci_arasan = sdhci_pltfm_priv(pltfm_host); - sdhci_reset(host, mask); + sdhci_and_cqhci_reset(host, mask); if (sdhci_arasan->quirks & SDHCI_ARASAN_QUIRK_FORCE_CDTEST) { ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); From 0a8d4531a0d58fa4b118f657ee38024154d4385a Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 26 Oct 2022 12:42:08 -0700 Subject: [PATCH 59/98] mmc: sdhci_am654: Fix SDHCI_RESET_ALL for CQHCI commit 162503fd1c3a1d4e14dbe7f399c1d1bec1c8abbc upstream. [[ NOTE: this is completely untested by the author, but included solely because, as noted in commit df57d73276b8 ("mmc: sdhci-pci: Fix SDHCI_RESET_ALL for CQHCI for Intel GLK-based controllers"), "other drivers using CQHCI might benefit from a similar change, if they also have CQHCI reset by SDHCI_RESET_ALL." We've now seen the same bug on at least MSM, Arasan, and Intel hardware. ]] SDHCI_RESET_ALL resets will reset the hardware CQE state, but we aren't tracking that properly in software. When out of sync, we may trigger various timeouts. It's not typical to perform resets while CQE is enabled, but this may occur in some suspend or error recovery scenarios. Include this fix by way of the new sdhci_and_cqhci_reset() helper. This patch depends on (and should not compile without) the patch entitled "mmc: cqhci: Provide helper for resetting both SDHCI and CQHCI". Fixes: f545702b74f9 ("mmc: sdhci_am654: Add Support for Command Queuing Engine to J721E") Signed-off-by: Brian Norris Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221026124150.v4.6.I35ca9d6220ba48304438b992a76647ca8e5b126f@changeid Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci_am654.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c index 7cab9d831afb..24cd6d3dc647 100644 --- a/drivers/mmc/host/sdhci_am654.c +++ b/drivers/mmc/host/sdhci_am654.c @@ -15,6 +15,7 @@ #include #include "cqhci.h" +#include "sdhci-cqhci.h" #include "sdhci-pltfm.h" /* CTL_CFG Registers */ @@ -378,7 +379,7 @@ static void sdhci_am654_reset(struct sdhci_host *host, u8 mask) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_am654_data *sdhci_am654 = sdhci_pltfm_priv(pltfm_host); - sdhci_reset(host, mask); + sdhci_and_cqhci_reset(host, mask); if (sdhci_am654->quirks & SDHCI_AM654_QUIRK_FORCE_CDTEST) { ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); @@ -464,7 +465,7 @@ static struct sdhci_ops sdhci_am654_ops = { .set_clock = sdhci_am654_set_clock, .write_b = sdhci_am654_write_b, .irq = sdhci_am654_cqhci_irq, - .reset = sdhci_reset, + .reset = sdhci_and_cqhci_reset, }; static const struct sdhci_pltfm_data sdhci_am654_pdata = { @@ -494,7 +495,7 @@ static struct sdhci_ops sdhci_j721e_8bit_ops = { .set_clock = sdhci_am654_set_clock, .write_b = sdhci_am654_write_b, .irq = sdhci_am654_cqhci_irq, - .reset = sdhci_reset, + .reset = sdhci_and_cqhci_reset, }; static const struct sdhci_pltfm_data sdhci_j721e_8bit_pdata = { From ea6787e482add0093b6049f9e29e6b05ea88e4ff Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 26 Oct 2022 12:42:07 -0700 Subject: [PATCH 60/98] mmc: sdhci-tegra: Fix SDHCI_RESET_ALL for CQHCI commit 836078449464e6af3b66ae6652dae79af176f21e upstream. [[ NOTE: this is completely untested by the author, but included solely because, as noted in commit df57d73276b8 ("mmc: sdhci-pci: Fix SDHCI_RESET_ALL for CQHCI for Intel GLK-based controllers"), "other drivers using CQHCI might benefit from a similar change, if they also have CQHCI reset by SDHCI_RESET_ALL." We've now seen the same bug on at least MSM, Arasan, and Intel hardware. ]] SDHCI_RESET_ALL resets will reset the hardware CQE state, but we aren't tracking that properly in software. When out of sync, we may trigger various timeouts. It's not typical to perform resets while CQE is enabled, but this may occur in some suspend or error recovery scenarios. Include this fix by way of the new sdhci_and_cqhci_reset() helper. This patch depends on (and should not compile without) the patch entitled "mmc: cqhci: Provide helper for resetting both SDHCI and CQHCI". Fixes: 3c4019f97978 ("mmc: tegra: HW Command Queue Support for Tegra SDMMC") Signed-off-by: Brian Norris Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221026124150.v4.5.I418c9eaaf754880fcd2698113e8c3ef821a944d7@changeid Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-tegra.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 67211fc42d24..d8fd2b5efd38 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -24,6 +24,7 @@ #include #include +#include "sdhci-cqhci.h" #include "sdhci-pltfm.h" #include "cqhci.h" @@ -361,7 +362,7 @@ static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask) const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data; u32 misc_ctrl, clk_ctrl, pad_ctrl; - sdhci_reset(host, mask); + sdhci_and_cqhci_reset(host, mask); if (!(mask & SDHCI_RESET_ALL)) return; From 181cfff57bdca50850eb8f841c5b0338b5b6d30d Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Tue, 8 Nov 2022 16:47:46 +0800 Subject: [PATCH 61/98] ALSA: hda/hdmi - enable runtime pm for more AMD display audio commit fdcc4c22b7ab20e90b97f8bc6225d876b72b8f16 upstream. We are able to power down the GPU and audio via the GPU driver so flag these asics as supporting runtime pm. Signed-off-by: Evan Quan Cc: Link: https://lore.kernel.org/r/20221108084746.583058-1-evan.quan@amd.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/hda_intel.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 26dfa8558792..494bfd2135a9 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2749,6 +2749,9 @@ static const struct pci_device_id azx_ids[] = { { PCI_DEVICE(0x1002, 0xab28), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | AZX_DCAPS_PM_RUNTIME }, + { PCI_DEVICE(0x1002, 0xab30), + .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | + AZX_DCAPS_PM_RUNTIME }, { PCI_DEVICE(0x1002, 0xab38), .driver_data = AZX_DRIVER_ATIHDMI_NS | AZX_DCAPS_PRESET_ATI_HDMI_NS | AZX_DCAPS_PM_RUNTIME }, From 380d64168da41b17dc4faaf5c5e780793566ca09 Mon Sep 17 00:00:00 2001 From: Xian Wang Date: Fri, 4 Nov 2022 13:29:13 -0700 Subject: [PATCH 62/98] ALSA: hda/ca0132: add quirk for EVGA Z390 DARK commit 0c423e2ffa7edd3f8f9bcf17ce73fa9c7509b99e upstream. The Z390 DARK mainboard uses a CA0132 audio controller. The quirk is needed to enable surround sound and 3.5mm headphone jack handling in the front audio connector as well as in the rear of the board when in stereo mode. Page 97 of the linked manual contains instructions to setup the controller. Signed-off-by: Xian Wang Cc: stable@vger.kernel.org Link: https://www.evga.com/support/manuals/files/131-CS-E399.pdf Link: https://lore.kernel.org/r/20221104202913.13904-1-dev@xianwang.io Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_ca0132.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index f774b2ac9720..82f14c3f642b 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -1272,6 +1272,7 @@ static const struct snd_pci_quirk ca0132_quirks[] = { SND_PCI_QUIRK(0x1458, 0xA026, "Gigabyte G1.Sniper Z97", QUIRK_R3DI), SND_PCI_QUIRK(0x1458, 0xA036, "Gigabyte GA-Z170X-Gaming 7", QUIRK_R3DI), SND_PCI_QUIRK(0x3842, 0x1038, "EVGA X99 Classified", QUIRK_R3DI), + SND_PCI_QUIRK(0x3842, 0x1055, "EVGA Z390 DARK", QUIRK_R3DI), SND_PCI_QUIRK(0x1102, 0x0013, "Recon3D", QUIRK_R3D), SND_PCI_QUIRK(0x1102, 0x0018, "Recon3D", QUIRK_R3D), SND_PCI_QUIRK(0x1102, 0x0051, "Sound Blaster AE-5", QUIRK_AE5), From 3a79f9568de08657fcdbc41d6fc4c0ca145a7a2b Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 10 Nov 2022 22:45:39 +0800 Subject: [PATCH 63/98] ALSA: hda: fix potential memleak in 'add_widget_node' commit 9a5523f72bd2b0d66eef3d58810c6eb7b5ffc143 upstream. As 'kobject_add' may allocated memory for 'kobject->name' when return error. And in this function, if call 'kobject_add' failed didn't free kobject. So call 'kobject_put' to recycling resources. Signed-off-by: Ye Bin Cc: Link: https://lore.kernel.org/r/20221110144539.2989354-1-yebin@huaweicloud.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/hda/hdac_sysfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/hda/hdac_sysfs.c b/sound/hda/hdac_sysfs.c index e56e83325903..bcf302f5115a 100644 --- a/sound/hda/hdac_sysfs.c +++ b/sound/hda/hdac_sysfs.c @@ -346,8 +346,10 @@ static int add_widget_node(struct kobject *parent, hda_nid_t nid, return -ENOMEM; kobject_init(kobj, &widget_ktype); err = kobject_add(kobj, parent, "%02x", nid); - if (err < 0) + if (err < 0) { + kobject_put(kobj); return err; + } err = sysfs_create_group(kobj, group); if (err < 0) { kobject_put(kobj); From a414a6d6ef3ca3ba68a75d7d03b1e0e731e00aa4 Mon Sep 17 00:00:00 2001 From: Edson Juliano Drosdeck Date: Wed, 9 Nov 2022 13:17:32 -0400 Subject: [PATCH 64/98] ALSA: hda/realtek: Add Positivo C6300 model quirk commit 79e28f2ab3440e08f5fbf65648b008341c37b496 upstream. Positivo Master C6300 (1849:a233) require quirk for anabling headset-mic Signed-off-by: Edson Juliano Drosdeck Cc: Link: https://lore.kernel.org/r/20221109171732.5417-1-edson.drosdeck@gmail.com Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 60e3bc124836..e3f6b930ad4a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -9161,6 +9161,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), SND_PCI_QUIRK(0x1849, 0x1233, "ASRock NUC Box 1100", ALC233_FIXUP_NO_AUDIO_JACK), + SND_PCI_QUIRK(0x1849, 0xa233, "Positivo Master C6300", ALC269_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS), SND_PCI_QUIRK(0x19e5, 0x320f, "Huawei WRT-WX9 ", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1b35, 0x1235, "CZC B20", ALC269_FIXUP_CZC_B20), From 2032c2d32b2afd030343cde491d23224421d5f4b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 8 Nov 2022 15:07:21 +0100 Subject: [PATCH 65/98] ALSA: usb-audio: Add quirk entry for M-Audio Micro commit 2f01a612d4758b45f775dbb88a49cf534ba47275 upstream. M-Audio Micro (0762:201a) defines the descriptor as vendor-specific, while the content seems class-compliant. Just overriding the probe makes the device working. Reported-by: Ash Logan Cc: Link: https://lore.kernel.org/r/7ecd4417-d860-4773-c1c1-b07433342390@heyquark.com Link: https://lore.kernel.org/r/20221108140721.24248-1-tiwai@suse.de Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks-table.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index a51591f68ae6..6a78813b63f5 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2028,6 +2028,10 @@ YAMAHA_DEVICE(0x7010, "UB99"), } } }, +{ + /* M-Audio Micro */ + USB_DEVICE_VENDOR_SPEC(0x0763, 0x201a), +}, { USB_DEVICE_VENDOR_SPEC(0x0763, 0x2030), .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { From f6896fb69d50909170a3812f71808b682c66d82b Mon Sep 17 00:00:00 2001 From: Jussi Laako Date: Wed, 9 Nov 2022 00:12:41 +0200 Subject: [PATCH 66/98] ALSA: usb-audio: Add DSD support for Accuphase DAC-60 commit 8cbd4725ffff3eface1f5f3397af02acad5b2831 upstream. Accuphase DAC-60 option card supports native DSD up to DSD256, but doesn't have support for auto-detection. Explicitly enable DSD support for the correct altsetting. Signed-off-by: Jussi Laako Cc: Link: https://lore.kernel.org/r/20221108221241.1220878-1-jussi@sonarnerd.net Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 04a691bc560c..752422147fb3 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1744,6 +1744,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, /* XMOS based USB DACs */ switch (chip->usb_id) { case USB_ID(0x1511, 0x0037): /* AURALiC VEGA */ + case USB_ID(0x21ed, 0xd75a): /* Accuphase DAC-60 option card */ case USB_ID(0x2522, 0x0012): /* LH Labs VI DAC Infinity */ case USB_ID(0x2772, 0x0230): /* Pro-Ject Pre Box S2 Digital */ if (fp->altsetting == 2) From 516f9f23008b1e3033df7944f5d616b95076acd3 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 8 Nov 2022 10:49:34 -0700 Subject: [PATCH 67/98] vmlinux.lds.h: Fix placement of '.data..decrypted' section commit 000f8870a47bdc36730357883b6aef42bced91ee upstream. Commit d4c639990036 ("vmlinux.lds.h: Avoid orphan section with !SMP") fixed an orphan section warning by adding the '.data..decrypted' section to the linker script under the PERCPU_DECRYPTED_SECTION define but that placement introduced a panic with !SMP, as the percpu sections are not instantiated with that configuration so attempting to access variables defined with DEFINE_PER_CPU_DECRYPTED() will result in a page fault. Move the '.data..decrypted' section to the DATA_MAIN define so that the variables in it are properly instantiated at boot time with CONFIG_SMP=n. Cc: stable@vger.kernel.org Fixes: d4c639990036 ("vmlinux.lds.h: Avoid orphan section with !SMP") Link: https://lore.kernel.org/cbbd3548-880c-d2ca-1b67-5bb93b291d5f@huawei.com/ Debugged-by: Ard Biesheuvel Reported-by: Zhao Wenhui Tested-by: xiafukun Signed-off-by: Nathan Chancellor Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20221108174934.3384275-1-nathan@kernel.org Signed-off-by: Greg Kroah-Hartman --- include/asm-generic/vmlinux.lds.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index a774361f28d4..d233f9e4b9c6 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -328,6 +328,7 @@ #define DATA_DATA \ *(.xiptext) \ *(DATA_MAIN) \ + *(.data..decrypted) \ *(.ref.data) \ *(.data..shared_aligned) /* percpu related */ \ MEM_KEEP(init.data*) \ @@ -972,7 +973,6 @@ #ifdef CONFIG_AMD_MEM_ENCRYPT #define PERCPU_DECRYPTED_SECTION \ . = ALIGN(PAGE_SIZE); \ - *(.data..decrypted) \ *(.data..percpu..decrypted) \ . = ALIGN(PAGE_SIZE); #else From 344ddbd688d869aff2831e1345755d1b163000f4 Mon Sep 17 00:00:00 2001 From: Shin'ichiro Kawasaki Date: Mon, 7 Nov 2022 13:02:29 +0900 Subject: [PATCH 68/98] ata: libata-scsi: fix SYNCHRONIZE CACHE (16) command failure commit ea045fd344cb15c164e9ffc8b8cffb6883df8475 upstream. SAT SCSI/ATA Translation specification requires SCSI SYNCHRONIZE CACHE (10) and (16) commands both shall be translated to ATA flush command. Also, ZBC Zoned Block Commands specification mandates SYNCHRONIZE CACHE (16) command support. However, libata translates only SYNCHRONIZE CACHE (10). This results in SYNCHRONIZE CACHE (16) command failures on SATA drives and then libata translation does not conform to ZBC. To avoid the failure, add support for SYNCHRONIZE CACHE (16). Signed-off-by: Shin'ichiro Kawasaki Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Damien Le Moal Signed-off-by: Greg Kroah-Hartman --- drivers/ata/libata-scsi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index a0e788b64821..459ece666c62 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -3303,6 +3303,7 @@ static unsigned int ata_scsiop_maint_in(struct ata_scsi_args *args, u8 *rbuf) case REPORT_LUNS: case REQUEST_SENSE: case SYNCHRONIZE_CACHE: + case SYNCHRONIZE_CACHE_16: case REZERO_UNIT: case SEEK_6: case SEEK_10: @@ -3969,6 +3970,7 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd) return ata_scsi_write_same_xlat; case SYNCHRONIZE_CACHE: + case SYNCHRONIZE_CACHE_16: if (ata_try_flush_cache(dev)) return ata_scsi_flush_xlat; break; @@ -4215,6 +4217,7 @@ void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd) * turning this into a no-op. */ case SYNCHRONIZE_CACHE: + case SYNCHRONIZE_CACHE_16: fallthrough; /* no-op's, complete with success */ From 1d4ff73062096c21b47954d2996b4df259777bda Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 29 Oct 2022 13:49:12 +0900 Subject: [PATCH 69/98] nilfs2: fix deadlock in nilfs_count_free_blocks() commit 8ac932a4921a96ca52f61935dbba64ea87bbd5dc upstream. A semaphore deadlock can occur if nilfs_get_block() detects metadata corruption while locating data blocks and a superblock writeback occurs at the same time: task 1 task 2 ------ ------ * A file operation * nilfs_truncate() nilfs_get_block() down_read(rwsem A) <-- nilfs_bmap_lookup_contig() ... generic_shutdown_super() nilfs_put_super() * Prepare to write superblock * down_write(rwsem B) <-- nilfs_cleanup_super() * Detect b-tree corruption * nilfs_set_log_cursor() nilfs_bmap_convert_error() nilfs_count_free_blocks() __nilfs_error() down_read(rwsem A) <-- nilfs_set_error() down_write(rwsem B) <-- *** DEADLOCK *** Here, nilfs_get_block() readlocks rwsem A (= NILFS_MDT(dat_inode)->mi_sem) and then calls nilfs_bmap_lookup_contig(), but if it fails due to metadata corruption, __nilfs_error() is called from nilfs_bmap_convert_error() inside the lock section. Since __nilfs_error() calls nilfs_set_error() unless the filesystem is read-only and nilfs_set_error() attempts to writelock rwsem B (= nilfs->ns_sem) to write back superblock exclusively, hierarchical lock acquisition occurs in the order rwsem A -> rwsem B. Now, if another task starts updating the superblock, it may writelock rwsem B during the lock sequence above, and can deadlock trying to readlock rwsem A in nilfs_count_free_blocks(). However, there is actually no need to take rwsem A in nilfs_count_free_blocks() because it, within the lock section, only reads a single integer data on a shared struct with nilfs_sufile_get_ncleansegs(). This has been the case after commit aa474a220180 ("nilfs2: add local variable to cache the number of clean segments"), that is, even before this bug was introduced. So, this resolves the deadlock problem by just not taking the semaphore in nilfs_count_free_blocks(). Link: https://lkml.kernel.org/r/20221029044912.9139-1-konishi.ryusuke@gmail.com Fixes: e828949e5b42 ("nilfs2: call nilfs_error inside bmap routines") Signed-off-by: Ryusuke Konishi Reported-by: syzbot+45d6ce7b7ad7ef455d03@syzkaller.appspotmail.com Tested-by: Ryusuke Konishi Cc: [2.6.38+ Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/the_nilfs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index c20ebecd7bc2..ce103dd39b89 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -690,9 +690,7 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) { unsigned long ncleansegs; - down_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); - up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; return 0; } From 4feedde5486c07ea79787839153a71ca71329c7d Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 4 Nov 2022 23:29:59 +0900 Subject: [PATCH 70/98] nilfs2: fix use-after-free bug of ns_writer on remount commit 8cccf05fe857a18ee26e20d11a8455a73ffd4efd upstream. If a nilfs2 filesystem is downgraded to read-only due to metadata corruption on disk and is remounted read/write, or if emergency read-only remount is performed, detaching a log writer and synchronizing the filesystem can be done at the same time. In these cases, use-after-free of the log writer (hereinafter nilfs->ns_writer) can happen as shown in the scenario below: Task1 Task2 -------------------------------- ------------------------------ nilfs_construct_segment nilfs_segctor_sync init_wait init_waitqueue_entry add_wait_queue schedule nilfs_remount (R/W remount case) nilfs_attach_log_writer nilfs_detach_log_writer nilfs_segctor_destroy kfree finish_wait _raw_spin_lock_irqsave __raw_spin_lock_irqsave do_raw_spin_lock debug_spin_lock_before <-- use-after-free While Task1 is sleeping, nilfs->ns_writer is freed by Task2. After Task1 waked up, Task1 accesses nilfs->ns_writer which is already freed. This scenario diagram is based on the Shigeru Yoshida's post [1]. This patch fixes the issue by not detaching nilfs->ns_writer on remount so that this UAF race doesn't happen. Along with this change, this patch also inserts a few necessary read-only checks with superblock instance where only the ns_writer pointer was used to check if the filesystem is read-only. Link: https://syzkaller.appspot.com/bug?id=79a4c002e960419ca173d55e863bd09e8112df8b Link: https://lkml.kernel.org/r/20221103141759.1836312-1-syoshida@redhat.com [1] Link: https://lkml.kernel.org/r/20221104142959.28296-1-konishi.ryusuke@gmail.com Signed-off-by: Ryusuke Konishi Reported-by: syzbot+f816fa82f8783f7a02bb@syzkaller.appspotmail.com Reported-by: Shigeru Yoshida Tested-by: Ryusuke Konishi Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- fs/nilfs2/segment.c | 15 ++++++++------- fs/nilfs2/super.c | 2 -- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 545f764d70b1..5ee4973525f0 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -322,7 +322,7 @@ void nilfs_relax_pressure_in_lock(struct super_block *sb) struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_sc_info *sci = nilfs->ns_writer; - if (!sci || !sci->sc_flush_request) + if (sb_rdonly(sb) || unlikely(!sci) || !sci->sc_flush_request) return; set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); @@ -2248,7 +2248,7 @@ int nilfs_construct_segment(struct super_block *sb) struct nilfs_transaction_info *ti; int err; - if (!sci) + if (sb_rdonly(sb) || unlikely(!sci)) return -EROFS; /* A call inside transactions causes a deadlock. */ @@ -2287,7 +2287,7 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, struct nilfs_transaction_info ti; int err = 0; - if (!sci) + if (sb_rdonly(sb) || unlikely(!sci)) return -EROFS; nilfs_transaction_lock(sb, &ti, 0); @@ -2783,11 +2783,12 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) if (nilfs->ns_writer) { /* - * This happens if the filesystem was remounted - * read/write after nilfs_error degenerated it into a - * read-only mount. + * This happens if the filesystem is made read-only by + * __nilfs_error or nilfs_remount and then remounted + * read/write. In these cases, reuse the existing + * writer. */ - nilfs_detach_log_writer(sb); + return 0; } nilfs->ns_writer = nilfs_segctor_new(sb, root); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index b9d30e8c43b0..7a41c9727c9e 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1133,8 +1133,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) goto out; if (*flags & SB_RDONLY) { - /* Shutting down log writer */ - nilfs_detach_log_writer(sb); sb->s_flags |= SB_RDONLY; /* From b5ee579fcb147c0939017eb78b6c75f7d79a6719 Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 28 Oct 2022 16:50:26 +0100 Subject: [PATCH 71/98] drm/i915/dmabuf: fix sg_table handling in map_dma_buf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f90daa975911961b65070ec72bd7dd8d448f9ef7 upstream. We need to iterate over the original entries here for the sg_table, pulling out the struct page for each one, to be remapped. However currently this incorrectly iterates over the final dma mapped entries, which is likely just one gigantic sg entry if the iommu is enabled, leading to us only mapping the first struct page (and any physically contiguous pages following it), even if there is potentially lots more data to follow. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7306 Fixes: 1286ff739773 ("i915: add dmabuf/prime buffer sharing support.") Signed-off-by: Matthew Auld Cc: Lionel Landwerlin Cc: Tvrtko Ursulin Cc: Ville Syrjälä Cc: Michael J. Ruhl Cc: # v3.5+ Reviewed-by: Michael J. Ruhl Link: https://patchwork.freedesktop.org/patch/msgid/20221028155029.494736-1-matthew.auld@intel.com (cherry picked from commit 28d52f99bbca7227008cf580c9194c9b3516968e) Signed-off-by: Tvrtko Ursulin Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 8dd295dbe241..dd35d3d7ad03 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -36,13 +36,13 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme goto err_unpin_pages; } - ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); + ret = sg_alloc_table(st, obj->mm.pages->orig_nents, GFP_KERNEL); if (ret) goto err_free; src = obj->mm.pages->sgl; dst = st->sgl; - for (i = 0; i < obj->mm.pages->nents; i++) { + for (i = 0; i < obj->mm.pages->orig_nents; i++) { sg_set_page(dst, sg_page(src), src->length, 0); dst = sg_next(dst); src = sg_next(src); From 8fa0c22ef8242f2fbe722f2f1ac39df6a95faa73 Mon Sep 17 00:00:00 2001 From: Jorge Lopez Date: Fri, 28 Oct 2022 10:55:27 -0500 Subject: [PATCH 72/98] platform/x86: hp_wmi: Fix rfkill causing soft blocked wifi commit 1598bfa8e1faa932de42e1ee7628a1c4c4263f0a upstream. After upgrading BIOS to U82 01.02.01 Rev.A, the console is flooded strange char "^@" which printed out every second and makes login nearly impossible. Also the below messages were shown both in console and journal/dmesg every second: usb 1-3: Device not responding to setup address. usb 1-3: device not accepting address 4, error -71 usb 1-3: device descriptor read/all, error -71 usb usb1-port3: unable to enumerate USB device Wifi is soft blocked by checking rfkill. When unblocked manually, after few seconds it would be soft blocked again. So I was suspecting something triggered rfkill to soft block wifi. At the end it was fixed by removing hp_wmi module. The root cause is the way hp-wmi driver handles command 1B on post-2009 BIOS. In pre-2009 BIOS, command 1Bh return 0x4 to indicate that BIOS no longer controls the power for the wireless devices. Signed-off-by: Jorge Lopez Link: https://bugzilla.kernel.org/show_bug.cgi?id=216468 Reviewed-by: Mario Limonciello Link: https://lore.kernel.org/r/20221028155527.7724-1-jorge.lopez2@hp.com Cc: stable@vger.kernel.org Reviewed-by: Hans de Goede Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/platform/x86/hp-wmi.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 012639f6d335..519b2ab84a63 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -900,8 +900,16 @@ static int __init hp_wmi_bios_setup(struct platform_device *device) wwan_rfkill = NULL; rfkill2_count = 0; - if (hp_wmi_rfkill_setup(device)) - hp_wmi_rfkill2_setup(device); + /* + * In pre-2009 BIOS, command 1Bh return 0x4 to indicate that + * BIOS no longer controls the power for the wireless + * devices. All features supported by this command will no + * longer be supported. + */ + if (!hp_wmi_bios_2009_later()) { + if (hp_wmi_rfkill_setup(device)) + hp_wmi_rfkill2_setup(device); + } thermal_policy_setup(device); From 9c0accfa5a358b89afabd4d4e82ca6eaca06e72c Mon Sep 17 00:00:00 2001 From: Zhang Xiaoxu Date: Tue, 1 Nov 2022 10:53:54 +0800 Subject: [PATCH 73/98] btrfs: selftests: fix wrong error check in btrfs_free_dummy_root() commit 9b2f20344d450137d015b380ff0c2e2a6a170135 upstream. The btrfs_alloc_dummy_root() uses ERR_PTR as the error return value rather than NULL, if error happened, there will be a NULL pointer dereference: BUG: KASAN: null-ptr-deref in btrfs_free_dummy_root+0x21/0x50 [btrfs] Read of size 8 at addr 000000000000002c by task insmod/258926 CPU: 2 PID: 258926 Comm: insmod Tainted: G W 6.1.0-rc2+ #5 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014 Call Trace: dump_stack_lvl+0x34/0x44 kasan_report+0xb7/0x140 kasan_check_range+0x145/0x1a0 btrfs_free_dummy_root+0x21/0x50 [btrfs] btrfs_test_free_space_cache+0x1a8c/0x1add [btrfs] btrfs_run_sanity_tests+0x65/0x80 [btrfs] init_btrfs_fs+0xec/0x154 [btrfs] do_one_initcall+0x87/0x2a0 do_init_module+0xdf/0x320 load_module+0x3006/0x3390 __do_sys_finit_module+0x113/0x1b0 do_syscall_64+0x35/0x80 entry_SYSCALL_64_after_hwframe+0x46/0xb0 Fixes: aaedb55bc08f ("Btrfs: add tests for btrfs_get_extent") CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Anand Jain Signed-off-by: Zhang Xiaoxu Reviewed-by: David Sterba Signed-off-by: David Sterba Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/tests/btrfs-tests.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 999c14e5d0bd..0599566c66b0 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -192,7 +192,7 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info) void btrfs_free_dummy_root(struct btrfs_root *root) { - if (!root) + if (IS_ERR_OR_NULL(root)) return; /* Will be freed by btrfs_free_fs_roots */ if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state))) From 4ea3aa3b983b18863719960bb03568fc7eff29a7 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 26 Oct 2022 12:42:06 -0700 Subject: [PATCH 74/98] mms: sdhci-esdhc-imx: Fix SDHCI_RESET_ALL for CQHCI commit fb1dec44c6750bb414f47b929c8c175a1a127c31 upstream. [[ NOTE: this is completely untested by the author, but included solely because, as noted in commit df57d73276b8 ("mmc: sdhci-pci: Fix SDHCI_RESET_ALL for CQHCI for Intel GLK-based controllers"), "other drivers using CQHCI might benefit from a similar change, if they also have CQHCI reset by SDHCI_RESET_ALL." We've now seen the same bug on at least MSM, Arasan, and Intel hardware. ]] SDHCI_RESET_ALL resets will reset the hardware CQE state, but we aren't tracking that properly in software. When out of sync, we may trigger various timeouts. It's not typical to perform resets while CQE is enabled, but this may occur in some suspend or error recovery scenarios. Include this fix by way of the new sdhci_and_cqhci_reset() helper. This patch depends on (and should not compile without) the patch entitled "mmc: cqhci: Provide helper for resetting both SDHCI and CQHCI". Fixes: bb6e358169bf ("mmc: sdhci-esdhc-imx: add CMDQ support") Signed-off-by: Brian Norris Reviewed-by: Haibo Chen Acked-by: Adrian Hunter Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20221026124150.v4.4.I7d01f9ad11bacdc9213dee61b7918982aea39115@changeid Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-esdhc-imx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index be4e5cdda1fa..de42b5473f97 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -26,6 +26,7 @@ #include #include #include +#include "sdhci-cqhci.h" #include "sdhci-pltfm.h" #include "sdhci-esdhc.h" #include "cqhci.h" @@ -1243,7 +1244,7 @@ static void esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned timing) static void esdhc_reset(struct sdhci_host *host, u8 mask) { - sdhci_reset(host, mask); + sdhci_and_cqhci_reset(host, mask); sdhci_writel(host, host->ier, SDHCI_INT_ENABLE); sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE); From 03f9582a6a2ebd25a440896475c968428c4b63e7 Mon Sep 17 00:00:00 2001 From: ZhangPeng Date: Wed, 9 Nov 2022 01:35:42 +0000 Subject: [PATCH 75/98] udf: Fix a slab-out-of-bounds write bug in udf_find_entry() commit c8af247de385ce49afabc3bf1cf4fd455c94bfe8 upstream. Syzbot reported a slab-out-of-bounds Write bug: loop0: detected capacity change from 0 to 2048 ================================================================== BUG: KASAN: slab-out-of-bounds in udf_find_entry+0x8a5/0x14f0 fs/udf/namei.c:253 Write of size 105 at addr ffff8880123ff896 by task syz-executor323/3610 CPU: 0 PID: 3610 Comm: syz-executor323 Not tainted 6.1.0-rc2-syzkaller-00105-gb229b6ca5abb #0 Hardware name: Google Compute Engine/Google Compute Engine, BIOS Google 10/11/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1b1/0x28e lib/dump_stack.c:106 print_address_description+0x74/0x340 mm/kasan/report.c:284 print_report+0x107/0x1f0 mm/kasan/report.c:395 kasan_report+0xcd/0x100 mm/kasan/report.c:495 kasan_check_range+0x2a7/0x2e0 mm/kasan/generic.c:189 memcpy+0x3c/0x60 mm/kasan/shadow.c:66 udf_find_entry+0x8a5/0x14f0 fs/udf/namei.c:253 udf_lookup+0xef/0x340 fs/udf/namei.c:309 lookup_open fs/namei.c:3391 [inline] open_last_lookups fs/namei.c:3481 [inline] path_openat+0x10e6/0x2df0 fs/namei.c:3710 do_filp_open+0x264/0x4f0 fs/namei.c:3740 do_sys_openat2+0x124/0x4e0 fs/open.c:1310 do_sys_open fs/open.c:1326 [inline] __do_sys_creat fs/open.c:1402 [inline] __se_sys_creat fs/open.c:1396 [inline] __x64_sys_creat+0x11f/0x160 fs/open.c:1396 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7ffab0d164d9 Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007ffe1a7e6bb8 EFLAGS: 00000246 ORIG_RAX: 0000000000000055 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007ffab0d164d9 RDX: 00007ffab0d164d9 RSI: 0000000000000000 RDI: 0000000020000180 RBP: 00007ffab0cd5a10 R08: 0000000000000000 R09: 0000000000000000 R10: 00005555573552c0 R11: 0000000000000246 R12: 00007ffab0cd5aa0 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Allocated by task 3610: kasan_save_stack mm/kasan/common.c:45 [inline] kasan_set_track+0x3d/0x60 mm/kasan/common.c:52 ____kasan_kmalloc mm/kasan/common.c:371 [inline] __kasan_kmalloc+0x97/0xb0 mm/kasan/common.c:380 kmalloc include/linux/slab.h:576 [inline] udf_find_entry+0x7b6/0x14f0 fs/udf/namei.c:243 udf_lookup+0xef/0x340 fs/udf/namei.c:309 lookup_open fs/namei.c:3391 [inline] open_last_lookups fs/namei.c:3481 [inline] path_openat+0x10e6/0x2df0 fs/namei.c:3710 do_filp_open+0x264/0x4f0 fs/namei.c:3740 do_sys_openat2+0x124/0x4e0 fs/open.c:1310 do_sys_open fs/open.c:1326 [inline] __do_sys_creat fs/open.c:1402 [inline] __se_sys_creat fs/open.c:1396 [inline] __x64_sys_creat+0x11f/0x160 fs/open.c:1396 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd The buggy address belongs to the object at ffff8880123ff800 which belongs to the cache kmalloc-256 of size 256 The buggy address is located 150 bytes inside of 256-byte region [ffff8880123ff800, ffff8880123ff900) The buggy address belongs to the physical page: page:ffffea000048ff80 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x123fe head:ffffea000048ff80 order:1 compound_mapcount:0 compound_pincount:0 flags: 0xfff00000010200(slab|head|node=0|zone=1|lastcpupid=0x7ff) raw: 00fff00000010200 ffffea00004b8500 dead000000000003 ffff888012041b40 raw: 0000000000000000 0000000080100010 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected page_owner tracks the page as allocated page last allocated via order 0, migratetype Unmovable, gfp_mask 0x0(), pid 1, tgid 1 (swapper/0), ts 1841222404, free_ts 0 create_dummy_stack mm/page_owner.c:67 [inline] register_early_stack+0x77/0xd0 mm/page_owner.c:83 init_page_owner+0x3a/0x731 mm/page_owner.c:93 kernel_init_freeable+0x41c/0x5d5 init/main.c:1629 kernel_init+0x19/0x2b0 init/main.c:1519 page_owner free stack trace missing Memory state around the buggy address: ffff8880123ff780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8880123ff800: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffff8880123ff880: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 06 ^ ffff8880123ff900: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff8880123ff980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ================================================================== Fix this by changing the memory size allocated for copy_name from UDF_NAME_LEN(254) to UDF_NAME_LEN_CS0(255), because the total length (lfi) of subsequent memcpy can be up to 255. CC: stable@vger.kernel.org Reported-by: syzbot+69c9fdccc6dd08961d34@syzkaller.appspotmail.com Fixes: 066b9cded00b ("udf: Use separate buffer for copying split names") Signed-off-by: ZhangPeng Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20221109013542.442790-1-zhangpeng362@huawei.com Signed-off-by: Greg Kroah-Hartman --- fs/udf/namei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 9f3aced46c68..aff5ca32e4f6 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -241,7 +241,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, poffset - lfi); else { if (!copy_name) { - copy_name = kmalloc(UDF_NAME_LEN, + copy_name = kmalloc(UDF_NAME_LEN_CS0, GFP_NOFS); if (!copy_name) { fi = ERR_PTR(-ENOMEM); From 0b692d41ee5c88097ecf5dbb37c59083044c996a Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Wed, 2 Nov 2022 11:07:28 -0500 Subject: [PATCH 76/98] mm/memremap.c: map FS_DAX device memory as decrypted commit 867400af90f1f953ff9e10b1b87ecaf9369a7eb8 upstream. virtio_pmem use devm_memremap_pages() to map the device memory. By default this memory is mapped as encrypted with SEV. Guest reboot changes the current encryption key and guest no longer properly decrypts the FSDAX device meta data. Mark the corresponding device memory region for FSDAX devices (mapped with memremap_pages) as decrypted to retain the persistent memory property. Link: https://lkml.kernel.org/r/20221102160728.3184016-1-pankaj.gupta@amd.com Fixes: b7b3c01b19159 ("mm/memremap_pages: support multiple ranges per invocation") Signed-off-by: Pankaj Gupta Cc: Dan Williams Cc: Tom Lendacky Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- mm/memremap.c | 1 + 1 file changed, 1 insertion(+) diff --git a/mm/memremap.c b/mm/memremap.c index 2455bac89506..299aad0d26e5 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -348,6 +348,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid) WARN(1, "File system DAX not supported\n"); return ERR_PTR(-EINVAL); } + params.pgprot = pgprot_decrypted(params.pgprot); break; case MEMORY_DEVICE_GENERIC: break; From f8e0edeaa0f2b860bdbbf0aafb4492533043d650 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Fri, 4 Nov 2022 08:50:00 +0100 Subject: [PATCH 77/98] can: j1939: j1939_send_one(): fix missing CAN header initialization commit 3eb3d283e8579a22b81dd2ac3987b77465b2a22f upstream. The read access to struct canxl_frame::len inside of a j1939 created skbuff revealed a missing initialization of reserved and later filled elements in struct can_frame. This patch initializes the 8 byte CAN header with zero. Fixes: 9d71dd0c7009 ("can: add support of SAE J1939 protocol") Cc: Oleksij Rempel Link: https://lore.kernel.org/linux-can/20221104052235.GA6474@pengutronix.de Reported-by: syzbot+d168ec0caca4697e03b1@syzkaller.appspotmail.com Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/all/20221104075000.105414-1-socketcan@hartkopp.net Cc: stable@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- net/can/j1939/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index ca75d1b8f415..9da8fbc81c04 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -332,6 +332,9 @@ int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb) /* re-claim the CAN_HDR from the SKB */ cf = skb_push(skb, J1939_CAN_HDR); + /* initialize header structure */ + memset(cf, 0, J1939_CAN_HDR); + /* make it a full can frame again */ skb_put(skb, J1939_CAN_FTR + (8 - dlc)); From d37dfb9357e935c8701e80429f5f7c416fb946a3 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Jun 2022 13:18:39 -0700 Subject: [PATCH 78/98] cert host tools: Stop complaining about deprecated OpenSSL functions commit 6bfb56e93bcef41859c2d5ab234ffd80b691be35 upstream. OpenSSL 3.0 deprecated the OpenSSL's ENGINE API. That is as may be, but the kernel build host tools still use it. Disable the warning about deprecated declarations until somebody who cares fixes it. Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- scripts/extract-cert.c | 7 +++++++ scripts/sign-file.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/scripts/extract-cert.c b/scripts/extract-cert.c index 3bc48c726c41..79ecbbfe37cd 100644 --- a/scripts/extract-cert.c +++ b/scripts/extract-cert.c @@ -23,6 +23,13 @@ #include #include +/* + * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API. + * + * Remove this if/when that API is no longer used + */ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + #define PKEY_ID_PKCS7 2 static __attribute__((noreturn)) diff --git a/scripts/sign-file.c b/scripts/sign-file.c index fbd34b8e8f57..7434e9ea926e 100644 --- a/scripts/sign-file.c +++ b/scripts/sign-file.c @@ -29,6 +29,13 @@ #include #include +/* + * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API. + * + * Remove this if/when that API is no longer used + */ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + /* * Use CMS if we have openssl-1.0.0 or newer available - otherwise we have to * assume that it's not available and its header file is missing and that we From 4b51cce72ab7d173fd511917396c832b912b7634 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:35 +0300 Subject: [PATCH 79/98] dmaengine: at_hdmac: Fix at_lli struct definition commit f1171bbdd2ba2a50ee64bb198a78c268a5baf5f1 upstream. Those hardware registers are all of 32 bits, while dma_addr_t ca be of type u64 or u32 depending on CONFIG_ARCH_DMA_ADDR_T_64BIT. Force u32 to comply with what the hardware expects. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-2-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac_regs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index 80fc2fe8c77e..8dc82c7b1dcf 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -164,13 +164,13 @@ /* LLI == Linked List Item; aka DMA buffer descriptor */ struct at_lli { /* values that are not changed by hardware */ - dma_addr_t saddr; - dma_addr_t daddr; + u32 saddr; + u32 daddr; /* value that may get written back: */ - u32 ctrla; + u32 ctrla; /* more values that are not changed by hardware */ - u32 ctrlb; - dma_addr_t dscr; /* chain to next lli */ + u32 ctrlb; + u32 dscr; /* chain to next lli */ }; /** From 24f9e93e506aa366172385ef2e367add05de422b Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:36 +0300 Subject: [PATCH 80/98] dmaengine: at_hdmac: Don't start transactions at tx_submit level commit 7176a6a8982d311e50a7c1168868d26e65bbba19 upstream. tx_submit is supposed to push the current transaction descriptor to a pending queue, waiting for issue_pending() to be called. issue_pending() must start the transfer, not tx_submit(), thus remove atc_dostart() from atc_tx_submit(). Clients of at_xdmac that assume that tx_submit() starts the transfer must be updated and call dma_async_issue_pending() if they miss to call it. The vdbg print was moved to after the lock is released. It is desirable to do the prints without the lock held if possible, and because the if statement disappears there's no reason why to do the print while holding the lock. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-3-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 7eaee5b705b1..1b0f753a4ba2 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -672,19 +672,11 @@ static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_irqsave(&atchan->lock, flags); cookie = dma_cookie_assign(tx); - if (list_empty(&atchan->active_list)) { - dev_vdbg(chan2dev(tx->chan), "tx_submit: started %u\n", - desc->txd.cookie); - atc_dostart(atchan, desc); - list_add_tail(&desc->desc_node, &atchan->active_list); - } else { - dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n", - desc->txd.cookie); - list_add_tail(&desc->desc_node, &atchan->queue); - } - + list_add_tail(&desc->desc_node, &atchan->queue); spin_unlock_irqrestore(&atchan->lock, flags); + dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n", + desc->txd.cookie); return cookie; } From ad4cbe8e9c3a2e0b3c5eacd1f74ea6835835f38d Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:37 +0300 Subject: [PATCH 81/98] dmaengine: at_hdmac: Start transfer for cyclic channels in issue_pending commit 8a47221fc28417ff8a32a4f92d4448a56c3cf7e1 upstream. Cyclic channels must too call issue_pending in order to start a transfer. Start the transfer in issue_pending regardless of the type of channel. This wrongly worked before, because in the past the transfer was started at tx_submit level when only a desc in the transfer list. Fixes: 53830cc75974 ("dmaengine: at_hdmac: add cyclic DMA operation support") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-4-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 1b0f753a4ba2..bdbf7a65450b 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1509,10 +1509,6 @@ static void atc_issue_pending(struct dma_chan *chan) dev_vdbg(chan2dev(chan), "issue_pending\n"); - /* Not needed for cyclic transfers */ - if (atc_chan_is_cyclic(atchan)) - return; - atc_advance_work(atchan); } From 7078e935b4102a63a2473dbeeeb13dd9fefc33fb Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:38 +0300 Subject: [PATCH 82/98] dmaengine: at_hdmac: Fix premature completion of desc in issue_pending commit fcd37565efdaffeac179d0f0ce980ac79bfdf569 upstream. Multiple calls to atc_issue_pending() could result in a premature completion of a descriptor from the atchan->active list, as the method always completed the first active descriptor from the list. Instead, issue_pending() should just take the first transaction descriptor from the pending queue, move it to active_list and start the transfer. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-5-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index bdbf7a65450b..d441c3ffcd63 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1500,16 +1500,26 @@ atc_tx_status(struct dma_chan *chan, } /** - * atc_issue_pending - try to finish work + * atc_issue_pending - takes the first transaction descriptor in the pending + * queue and starts the transfer. * @chan: target DMA channel */ static void atc_issue_pending(struct dma_chan *chan) { - struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_desc *desc; + unsigned long flags; dev_vdbg(chan2dev(chan), "issue_pending\n"); - atc_advance_work(atchan); + spin_lock_irqsave(&atchan->lock, flags); + if (atc_chan_is_enabled(atchan) || list_empty(&atchan->queue)) + return spin_unlock_irqrestore(&atchan->lock, flags); + + desc = atc_first_queued(atchan); + list_move_tail(&desc->desc_node, &atchan->active_list); + atc_dostart(atchan, desc); + spin_unlock_irqrestore(&atchan->lock, flags); } /** From ee356822618ee86cfab8fac1bec0bf66fb98ea2e Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:39 +0300 Subject: [PATCH 83/98] dmaengine: at_hdmac: Do not call the complete callback on device_terminate_all commit f645f85ae1104f8bd882f962ac0a69a1070076dd upstream. The method was wrong because it violated the dmaengine API. For aborted transfers the complete callback should not be called. Fix the behavior and do not call the complete callback on device_terminate_all. Fixes: 808347f6a317 ("dmaengine: at_hdmac: add DMA slave transfers") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-6-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index d441c3ffcd63..69d470ea7f13 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1410,11 +1410,8 @@ static int atc_terminate_all(struct dma_chan *chan) struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); int chan_id = atchan->chan_common.chan_id; - struct at_desc *desc, *_desc; unsigned long flags; - LIST_HEAD(list); - dev_vdbg(chan2dev(chan), "%s\n", __func__); /* @@ -1433,15 +1430,11 @@ static int atc_terminate_all(struct dma_chan *chan) cpu_relax(); /* active_list entries will end up before queued entries */ - list_splice_init(&atchan->queue, &list); - list_splice_init(&atchan->active_list, &list); + list_splice_tail_init(&atchan->queue, &atchan->free_list); + list_splice_tail_init(&atchan->active_list, &atchan->free_list); spin_unlock_irqrestore(&atchan->lock, flags); - /* Flush all pending and queued descriptors */ - list_for_each_entry_safe(desc, _desc, &list, desc_node) - atc_chain_complete(atchan, desc); - clear_bit(ATC_IS_PAUSED, &atchan->status); /* if channel dedicated to cyclic operations, free it */ clear_bit(ATC_IS_CYCLIC, &atchan->status); From 9b69060a725dbc8ee3f3d270279d6b2d9e39079b Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:40 +0300 Subject: [PATCH 84/98] dmaengine: at_hdmac: Protect atchan->status with the channel lock commit 6e5ad28d16f082efeae3d0bd2e31f24bed218019 upstream. Now that the complete callback call was removed from device_terminate_all(), we can protect the atchan->status with the channel lock. The atomic bitops on atchan->status do not substitute proper locking on the status, as one could still modify the status after the lock was dropped in atc_terminate_all() but before the atomic bitops were executed. Fixes: 078a6506141a ("dmaengine: at_hdmac: Fix deadlocks") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-7-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 69d470ea7f13..99d6bb5634df 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1433,12 +1433,12 @@ static int atc_terminate_all(struct dma_chan *chan) list_splice_tail_init(&atchan->queue, &atchan->free_list); list_splice_tail_init(&atchan->active_list, &atchan->free_list); - spin_unlock_irqrestore(&atchan->lock, flags); - clear_bit(ATC_IS_PAUSED, &atchan->status); /* if channel dedicated to cyclic operations, free it */ clear_bit(ATC_IS_CYCLIC, &atchan->status); + spin_unlock_irqrestore(&atchan->lock, flags); + return 0; } From 1582cc3b480501b6144c724306d2c8d8dd4b84e3 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:41 +0300 Subject: [PATCH 85/98] dmaengine: at_hdmac: Fix concurrency problems by removing atc_complete_all() commit c6babed879fbe82796a601bf097649e07382db46 upstream. atc_complete_all() had concurrency bugs, thus remove it: 1/ atc_complete_all() in its entirety was buggy, as when the atchan->queue list (the one that contains descriptors that are not yet issued to the hardware) contained descriptors, it fired just the first from the atchan->queue, but moved all the desc from atchan->queue to atchan->active_list and considered them all as fired. This could result in calling the completion of a descriptor that was not yet issued to the hardware. 2/ when in tasklet at atc_advance_work() time, atchan->active_list was queried without holding the lock of the chan. This can result in atchan->active_list concurrency problems between the tasklet and issue_pending(). Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-8-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 49 ++++-------------------------------------- 1 file changed, 4 insertions(+), 45 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 99d6bb5634df..3f3b8c040499 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -466,42 +466,6 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) dma_run_dependencies(txd); } -/** - * atc_complete_all - finish work for all transactions - * @atchan: channel to complete transactions for - * - * Eventually submit queued descriptors if any - * - * Assume channel is idle while calling this function - * Called with atchan->lock held and bh disabled - */ -static void atc_complete_all(struct at_dma_chan *atchan) -{ - struct at_desc *desc, *_desc; - LIST_HEAD(list); - unsigned long flags; - - dev_vdbg(chan2dev(&atchan->chan_common), "complete all\n"); - - spin_lock_irqsave(&atchan->lock, flags); - - /* - * Submit queued descriptors ASAP, i.e. before we go through - * the completed ones. - */ - if (!list_empty(&atchan->queue)) - atc_dostart(atchan, atc_first_queued(atchan)); - /* empty active_list now it is completed */ - list_splice_init(&atchan->active_list, &list); - /* empty queue list by moving descriptors (if any) to active_list */ - list_splice_init(&atchan->queue, &atchan->active_list); - - spin_unlock_irqrestore(&atchan->lock, flags); - - list_for_each_entry_safe(desc, _desc, &list, desc_node) - atc_chain_complete(atchan, desc); -} - /** * atc_advance_work - at the end of a transaction, move forward * @atchan: channel where the transaction ended @@ -509,25 +473,20 @@ static void atc_complete_all(struct at_dma_chan *atchan) static void atc_advance_work(struct at_dma_chan *atchan) { unsigned long flags; - int ret; dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n"); spin_lock_irqsave(&atchan->lock, flags); - ret = atc_chan_is_enabled(atchan); + if (atc_chan_is_enabled(atchan) || list_empty(&atchan->active_list)) + return spin_unlock_irqrestore(&atchan->lock, flags); spin_unlock_irqrestore(&atchan->lock, flags); - if (ret) - return; - - if (list_empty(&atchan->active_list) || - list_is_singular(&atchan->active_list)) - return atc_complete_all(atchan); atc_chain_complete(atchan, atc_first_active(atchan)); /* advance work */ spin_lock_irqsave(&atchan->lock, flags); - atc_dostart(atchan, atc_first_active(atchan)); + if (!list_empty(&atchan->active_list)) + atc_dostart(atchan, atc_first_active(atchan)); spin_unlock_irqrestore(&atchan->lock, flags); } From 7f07cecc7411d8f634fd23766368c92264fc2d73 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:42 +0300 Subject: [PATCH 86/98] dmaengine: at_hdmac: Fix concurrency over descriptor commit 06988949df8c3007ad82036d3606d8ae72ed9000 upstream. The descriptor was added to the free_list before calling the callback, which could result in reissuing of the same descriptor and calling of a single callback for both. Move the decriptor to the free list after the callback is invoked. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-9-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 3f3b8c040499..01dfc8dbfc83 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -450,11 +450,8 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) desc->memset_buffer = false; } - /* move children to free_list */ - list_splice_init(&desc->tx_list, &atchan->free_list); - /* move myself to free_list */ - list_move(&desc->desc_node, &atchan->free_list); - + /* Remove transfer node from the active list. */ + list_del_init(&desc->desc_node); spin_unlock_irqrestore(&atchan->lock, flags); dma_descriptor_unmap(txd); @@ -464,6 +461,13 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) dmaengine_desc_get_callback_invoke(txd, NULL); dma_run_dependencies(txd); + + spin_lock_irqsave(&atchan->lock, flags); + /* move children to free_list */ + list_splice_init(&desc->tx_list, &atchan->free_list); + /* add myself to free_list */ + list_add(&desc->desc_node, &atchan->free_list); + spin_unlock_irqrestore(&atchan->lock, flags); } /** From 0f603bf553a70af22ced122d706feeae2bb6ed76 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:43 +0300 Subject: [PATCH 87/98] dmaengine: at_hdmac: Free the memset buf without holding the chan lock commit 6ba826cbb57d675f447b59323204d1473bbd5593 upstream. There's no need to hold the channel lock when freeing the memset buf, as the operation has already completed. Free the memset buf without holding the channel lock. Fixes: 4d112426c344 ("dmaengine: hdmac: Add memset capabilities") Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-10-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 01dfc8dbfc83..61e1c555cede 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -443,13 +443,6 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) if (!atc_chan_is_cyclic(atchan)) dma_cookie_complete(txd); - /* If the transfer was a memset, free our temporary buffer */ - if (desc->memset_buffer) { - dma_pool_free(atdma->memset_pool, desc->memset_vaddr, - desc->memset_paddr); - desc->memset_buffer = false; - } - /* Remove transfer node from the active list. */ list_del_init(&desc->desc_node); spin_unlock_irqrestore(&atchan->lock, flags); @@ -468,6 +461,13 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) /* add myself to free_list */ list_add(&desc->desc_node, &atchan->free_list); spin_unlock_irqrestore(&atchan->lock, flags); + + /* If the transfer was a memset, free our temporary buffer */ + if (desc->memset_buffer) { + dma_pool_free(atdma->memset_pool, desc->memset_vaddr, + desc->memset_paddr); + desc->memset_buffer = false; + } } /** From a35dd5dd98b67b98f57e55efb4374e55c2cb680f Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:44 +0300 Subject: [PATCH 88/98] dmaengine: at_hdmac: Fix concurrency over the active list commit 03ed9ba357cc78116164b90b87f45eacab60b561 upstream. The tasklet (atc_advance_work()) did not held the channel lock when retrieving the first active descriptor, causing concurrency problems if issue_pending() was called in between. If issue_pending() was called exactly after the lock was released in the tasklet (atc_advance_work()), atc_chain_complete() could complete a descriptor for which the controller has not yet raised an interrupt. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-11-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 61e1c555cede..b5cff318461d 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -443,8 +443,6 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) if (!atc_chan_is_cyclic(atchan)) dma_cookie_complete(txd); - /* Remove transfer node from the active list. */ - list_del_init(&desc->desc_node); spin_unlock_irqrestore(&atchan->lock, flags); dma_descriptor_unmap(txd); @@ -476,6 +474,7 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) */ static void atc_advance_work(struct at_dma_chan *atchan) { + struct at_desc *desc; unsigned long flags; dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n"); @@ -483,9 +482,12 @@ static void atc_advance_work(struct at_dma_chan *atchan) spin_lock_irqsave(&atchan->lock, flags); if (atc_chan_is_enabled(atchan) || list_empty(&atchan->active_list)) return spin_unlock_irqrestore(&atchan->lock, flags); - spin_unlock_irqrestore(&atchan->lock, flags); - atc_chain_complete(atchan, atc_first_active(atchan)); + desc = atc_first_active(atchan); + /* Remove the transfer node from the active list. */ + list_del_init(&desc->desc_node); + spin_unlock_irqrestore(&atchan->lock, flags); + atc_chain_complete(atchan, desc); /* advance work */ spin_lock_irqsave(&atchan->lock, flags); From 6be4ab08c8637bcea930ee399bd1001d2250b168 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:45 +0300 Subject: [PATCH 89/98] dmaengine: at_hdmac: Fix descriptor handling when issuing it to hardware commit ba2423633ba646e1df20e30cb3cf35495c16f173 upstream. As it was before, the descriptor was issued to the hardware without adding it to the active (issued) list. This could result in a completion of other descriptor, or/and in the descriptor never being completed. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-12-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index b5cff318461d..d4f92c8befc1 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -491,8 +491,11 @@ static void atc_advance_work(struct at_dma_chan *atchan) /* advance work */ spin_lock_irqsave(&atchan->lock, flags); - if (!list_empty(&atchan->active_list)) - atc_dostart(atchan, atc_first_active(atchan)); + if (!list_empty(&atchan->active_list)) { + desc = atc_first_queued(atchan); + list_move_tail(&desc->desc_node, &atchan->active_list); + atc_dostart(atchan, desc); + } spin_unlock_irqrestore(&atchan->lock, flags); } @@ -504,6 +507,7 @@ static void atc_advance_work(struct at_dma_chan *atchan) static void atc_handle_error(struct at_dma_chan *atchan) { struct at_desc *bad_desc; + struct at_desc *desc; struct at_desc *child; unsigned long flags; @@ -521,8 +525,11 @@ static void atc_handle_error(struct at_dma_chan *atchan) list_splice_init(&atchan->queue, atchan->active_list.prev); /* Try to restart the controller */ - if (!list_empty(&atchan->active_list)) - atc_dostart(atchan, atc_first_active(atchan)); + if (!list_empty(&atchan->active_list)) { + desc = atc_first_queued(atchan); + list_move_tail(&desc->desc_node, &atchan->active_list); + atc_dostart(atchan, desc); + } /* * KERN_CRITICAL may seem harsh, but since this only happens From f4512855223c37aae610f667aed85fdfc701fc4f Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:46 +0300 Subject: [PATCH 90/98] dmaengine: at_hdmac: Fix completion of unissued descriptor in case of errors commit ef2cb4f0ce479f77607b04c4b0414bf32f863ee8 upstream. In case the controller detected an error, the code took the chance to move all the queued (submitted) descriptors to the active (issued) list. This was wrong as if there were any descriptors in the submitted list they were moved to the issued list without actually issuing them to the controller, thus a completion could be raised without even fireing the descriptor. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-13-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index d4f92c8befc1..2740c78649db 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -520,10 +520,6 @@ static void atc_handle_error(struct at_dma_chan *atchan) bad_desc = atc_first_active(atchan); list_del_init(&bad_desc->desc_node); - /* As we are stopped, take advantage to push queued descriptors - * in active_list */ - list_splice_init(&atchan->queue, atchan->active_list.prev); - /* Try to restart the controller */ if (!list_empty(&atchan->active_list)) { desc = atc_first_queued(atchan); From f53a233eaad61ddb7622bc279cf1145b022cff46 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:47 +0300 Subject: [PATCH 91/98] dmaengine: at_hdmac: Don't allow CPU to reorder channel enable commit 580ee84405c27d6ed419abe4d2b3de1968abdafd upstream. at_hdmac uses __raw_writel for register writes. In the absence of a barrier, the CPU may reorder the register operations. Introduce a write memory barrier so that the CPU does not reorder the channel enable, thus the start of the transfer, without making sure that all the pre-required register fields are already written. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Reported-by: Peter Rosin Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Link: https://lore.kernel.org/lkml/13c6c9a2-6db5-c3bf-349b-4c127ad3496a@axentia.se/ Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-14-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 2740c78649db..128579d5e0d8 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -237,6 +237,8 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) ATC_SPIP_BOUNDARY(first->boundary)); channel_writel(atchan, DPIP, ATC_DPIP_HOLE(first->dst_hole) | ATC_DPIP_BOUNDARY(first->boundary)); + /* Don't allow CPU to reorder channel enable. */ + wmb(); dma_writel(atdma, CHER, atchan->mask); vdbg_dump_regs(atchan); From 85f97c97efc501191bb73020e98d822561d61828 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:48 +0300 Subject: [PATCH 92/98] dmaengine: at_hdmac: Fix impossible condition commit 28cbe5a0a46a6637adbda52337d7b2777fc04027 upstream. The iterator can not be greater than ATC_MAX_DSCR_TRIALS, as the for loop will stop when i == ATC_MAX_DSCR_TRIALS. While here, use the common "i" name for the iterator. Fixes: 93dce3a6434f ("dmaengine: at_hdmac: fix residue computation") Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-15-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 128579d5e0d8..e3a825098140 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -299,7 +299,8 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) struct at_desc *desc_first = atc_first_active(atchan); struct at_desc *desc; int ret; - u32 ctrla, dscr, trials; + u32 ctrla, dscr; + unsigned int i; /* * If the cookie doesn't match to the currently running transfer then @@ -369,7 +370,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) dscr = channel_readl(atchan, DSCR); rmb(); /* ensure DSCR is read before CTRLA */ ctrla = channel_readl(atchan, CTRLA); - for (trials = 0; trials < ATC_MAX_DSCR_TRIALS; ++trials) { + for (i = 0; i < ATC_MAX_DSCR_TRIALS; ++i) { u32 new_dscr; rmb(); /* ensure DSCR is read after CTRLA */ @@ -395,7 +396,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie) rmb(); /* ensure DSCR is read before CTRLA */ ctrla = channel_readl(atchan, CTRLA); } - if (unlikely(trials >= ATC_MAX_DSCR_TRIALS)) + if (unlikely(i == ATC_MAX_DSCR_TRIALS)) return -ETIMEDOUT; /* for the first descriptor we can be more accurate */ From 367bc0fa988fde83dbb6f654067c0d76c86faf68 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Oct 2022 12:02:49 +0300 Subject: [PATCH 93/98] dmaengine: at_hdmac: Check return code of dma_async_device_register commit c47e6403fa099f200868d6b106701cb42d181d2b upstream. dma_async_device_register() can fail, check the return code and display an error. Fixes: dc78baa2b90b ("dmaengine: at_hdmac: new driver for the Atmel AHB DMA Controller") Signed-off-by: Tudor Ambarus Cc: stable@vger.kernel.org Acked-by: Nicolas Ferre Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com Link: https://lore.kernel.org/r/20221025090306.297886-16-tudor.ambarus@microchip.com Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- drivers/dma/at_hdmac.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index e3a825098140..6a4f9697b574 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -1901,7 +1901,11 @@ static int __init at_dma_probe(struct platform_device *pdev) dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) ? "slave " : "", plat_dat->nr_channels); - dma_async_device_register(&atdma->dma_common); + err = dma_async_device_register(&atdma->dma_common); + if (err) { + dev_err(&pdev->dev, "Unable to register: %d.\n", err); + goto err_dma_async_device_register; + } /* * Do not return an error if the dmac node is not present in order to @@ -1921,6 +1925,7 @@ static int __init at_dma_probe(struct platform_device *pdev) err_of_dma_controller_register: dma_async_device_unregister(&atdma->dma_common); +err_dma_async_device_register: dma_pool_destroy(atdma->memset_pool); err_memset_pool_create: dma_pool_destroy(atdma->dma_desc_pool); From 534762e261c84d43e5d56a780e40278b94c20540 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 7 Nov 2022 18:00:11 +0000 Subject: [PATCH 94/98] net: tun: call napi_schedule_prep() to ensure we own a napi commit 07d120aa33cc9d9115753d159f64d20c94458781 upstream. A recent patch exposed another issue in napi_get_frags() caught by syzbot [1] Before feeding packets to GRO, and calling napi_complete() we must first grab NAPI_STATE_SCHED. [1] WARNING: CPU: 0 PID: 3612 at net/core/dev.c:6076 napi_complete_done+0x45b/0x880 net/core/dev.c:6076 Modules linked in: CPU: 0 PID: 3612 Comm: syz-executor408 Not tainted 6.1.0-rc3-syzkaller-00175-g1118b2049d77 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 RIP: 0010:napi_complete_done+0x45b/0x880 net/core/dev.c:6076 Code: c1 ea 03 0f b6 14 02 4c 89 f0 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 24 04 00 00 41 89 5d 1c e9 73 fc ff ff e8 b5 53 22 fa <0f> 0b e9 82 fe ff ff e8 a9 53 22 fa 48 8b 5c 24 08 31 ff 48 89 de RSP: 0018:ffffc90003c4f920 EFLAGS: 00010293 RAX: 0000000000000000 RBX: 0000000000000030 RCX: 0000000000000000 RDX: ffff8880251c0000 RSI: ffffffff875a58db RDI: 0000000000000007 RBP: 0000000000000001 R08: 0000000000000007 R09: 0000000000000000 R10: 0000000000000001 R11: 0000000000000001 R12: ffff888072d02628 R13: ffff888072d02618 R14: ffff888072d02634 R15: 0000000000000000 FS: 0000555555f13300(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000055c44d3892b8 CR3: 00000000172d2000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: napi_complete include/linux/netdevice.h:510 [inline] tun_get_user+0x206d/0x3a60 drivers/net/tun.c:1980 tun_chr_write_iter+0xdb/0x200 drivers/net/tun.c:2027 call_write_iter include/linux/fs.h:2191 [inline] do_iter_readv_writev+0x20b/0x3b0 fs/read_write.c:735 do_iter_write+0x182/0x700 fs/read_write.c:861 vfs_writev+0x1aa/0x630 fs/read_write.c:934 do_writev+0x133/0x2f0 fs/read_write.c:977 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f37021a3c19 Fixes: 1118b2049d77 ("net: tun: Fix memory leaks of napi_get_frags") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Wang Yufen Link: https://lore.kernel.org/r/20221107180011.188437-1-edumazet@google.com Signed-off-by: Jakub Kicinski Signed-off-by: Greg Kroah-Hartman --- drivers/net/tun.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 83662f616b67..cb42fdbfeb32 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1986,18 +1986,25 @@ drop: skb_headlen(skb)); if (unlikely(headlen > skb_headlen(skb))) { + WARN_ON_ONCE(1); + err = -ENOMEM; this_cpu_inc(tun->pcpu_stats->rx_dropped); +napi_busy: napi_free_frags(&tfile->napi); rcu_read_unlock(); mutex_unlock(&tfile->napi_mutex); - WARN_ON(1); - return -ENOMEM; + return err; } - local_bh_disable(); - napi_gro_frags(&tfile->napi); - napi_complete(&tfile->napi); - local_bh_enable(); + if (likely(napi_schedule_prep(&tfile->napi))) { + local_bh_disable(); + napi_gro_frags(&tfile->napi); + napi_complete(&tfile->napi); + local_bh_enable(); + } else { + err = -EBUSY; + goto napi_busy; + } mutex_unlock(&tfile->napi_mutex); } else if (tfile->napi_enabled) { struct sk_buff_head *queue = &tfile->sk.sk_write_queue; From e7294b01de40eb34bed65f376986181fcdeca0cd Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 17 Nov 2020 08:37:50 -0300 Subject: [PATCH 95/98] mmc: sdhci-esdhc-imx: Convert the driver to DT-only commit 854a22997ad5d6c9860a2d695c40cd4004151d5b upstream. Since 5.10-rc1 i.MX is a devicetree-only platform, so simplify the code by removing the unused non-DT support. Signed-off-by: Fabio Estevam Reviewed-by: Haibo Chen Link: https://lore.kernel.org/r/20201117113750.25053-1-festevam@gmail.com Signed-off-by: Ulf Hansson Signed-off-by: Greg Kroah-Hartman --- drivers/mmc/host/sdhci-esdhc-imx.c | 86 +----------------------------- 1 file changed, 1 insertion(+), 85 deletions(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index de42b5473f97..449562122adc 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -295,22 +295,6 @@ struct pltfm_imx_data { struct pm_qos_request pm_qos_req; }; -static const struct platform_device_id imx_esdhc_devtype[] = { - { - .name = "sdhci-esdhc-imx25", - .driver_data = (kernel_ulong_t) &esdhc_imx25_data, - }, { - .name = "sdhci-esdhc-imx35", - .driver_data = (kernel_ulong_t) &esdhc_imx35_data, - }, { - .name = "sdhci-esdhc-imx51", - .driver_data = (kernel_ulong_t) &esdhc_imx51_data, - }, { - /* sentinel */ - } -}; -MODULE_DEVICE_TABLE(platform, imx_esdhc_devtype); - static const struct of_device_id imx_esdhc_dt_ids[] = { { .compatible = "fsl,imx25-esdhc", .data = &esdhc_imx25_data, }, { .compatible = "fsl,imx35-esdhc", .data = &esdhc_imx35_data, }, @@ -1546,72 +1530,6 @@ sdhci_esdhc_imx_probe_dt(struct platform_device *pdev, } #endif -static int sdhci_esdhc_imx_probe_nondt(struct platform_device *pdev, - struct sdhci_host *host, - struct pltfm_imx_data *imx_data) -{ - struct esdhc_platform_data *boarddata = &imx_data->boarddata; - int err; - - if (!host->mmc->parent->platform_data) { - dev_err(mmc_dev(host->mmc), "no board data!\n"); - return -EINVAL; - } - - imx_data->boarddata = *((struct esdhc_platform_data *) - host->mmc->parent->platform_data); - /* write_protect */ - if (boarddata->wp_type == ESDHC_WP_GPIO) { - host->mmc->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH; - - err = mmc_gpiod_request_ro(host->mmc, "wp", 0, 0); - if (err) { - dev_err(mmc_dev(host->mmc), - "failed to request write-protect gpio!\n"); - return err; - } - } - - /* card_detect */ - switch (boarddata->cd_type) { - case ESDHC_CD_GPIO: - err = mmc_gpiod_request_cd(host->mmc, "cd", 0, false, 0); - if (err) { - dev_err(mmc_dev(host->mmc), - "failed to request card-detect gpio!\n"); - return err; - } - fallthrough; - - case ESDHC_CD_CONTROLLER: - /* we have a working card_detect back */ - host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION; - break; - - case ESDHC_CD_PERMANENT: - host->mmc->caps |= MMC_CAP_NONREMOVABLE; - break; - - case ESDHC_CD_NONE: - break; - } - - switch (boarddata->max_bus_width) { - case 8: - host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_4_BIT_DATA; - break; - case 4: - host->mmc->caps |= MMC_CAP_4_BIT_DATA; - break; - case 1: - default: - host->quirks |= SDHCI_QUIRK_FORCE_1_BIT_DATA; - break; - } - - return 0; -} - static int sdhci_esdhc_imx_probe(struct platform_device *pdev) { const struct of_device_id *of_id = @@ -1631,8 +1549,7 @@ static int sdhci_esdhc_imx_probe(struct platform_device *pdev) imx_data = sdhci_pltfm_priv(pltfm_host); - imx_data->socdata = of_id ? of_id->data : (struct esdhc_soc_data *) - pdev->id_entry->driver_data; + imx_data->socdata = of_id->data; if (imx_data->socdata->flags & ESDHC_FLAG_PMQOS) cpu_latency_qos_add_request(&imx_data->pm_qos_req, 0); @@ -1944,7 +1861,6 @@ static struct platform_driver sdhci_esdhc_imx_driver = { .of_match_table = imx_esdhc_dt_ids, .pm = &sdhci_esdhc_pmops, }, - .id_table = imx_esdhc_devtype, .probe = sdhci_esdhc_imx_probe, .remove = sdhci_esdhc_imx_remove, }; From 154d744fbefcd13648ff036db2d185319afa74dc Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 14 Nov 2022 12:44:01 +0100 Subject: [PATCH 96/98] x86/cpu: Restore AMD's DE_CFG MSR after resume commit 2632daebafd04746b4b96c2f26a6021bc38f6209 upstream. DE_CFG contains the LFENCE serializing bit, restore it on resume too. This is relevant to older families due to the way how they do S3. Unify and correct naming while at it. Fixes: e4d0e84e4907 ("x86/cpu/AMD: Make LFENCE a serializing instruction") Reported-by: Andrew Cooper Reported-by: Pawan Gupta Signed-off-by: Borislav Petkov Cc: Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/msr-index.h | 8 +++++--- arch/x86/kernel/cpu/amd.c | 6 ++---- arch/x86/kernel/cpu/hygon.c | 4 ++-- arch/x86/kvm/svm/svm.c | 10 +++++----- arch/x86/kvm/x86.c | 2 +- arch/x86/power/cpu.c | 1 + tools/arch/x86/include/asm/msr-index.h | 8 +++++--- 7 files changed, 21 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 144dc164b759..5a8ee3b83af2 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -489,6 +489,11 @@ #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 + +#define MSR_AMD64_DE_CFG 0xc0011029 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT) + #define MSR_AMD64_BU_CFG2 0xc001102a #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 @@ -565,9 +570,6 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c -#define MSR_F10H_DECFG 0xc0011029 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8b9e3277a6ce..ec3fa4dc9031 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -822,8 +822,6 @@ static void init_amd_gh(struct cpuinfo_x86 *c) set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); } -#define MSR_AMD64_DE_CFG 0xC0011029 - static void init_amd_ln(struct cpuinfo_x86 *c) { /* @@ -1018,8 +1016,8 @@ static void init_amd(struct cpuinfo_x86 *c) * msr_set_bit() uses the safe accessors, too, even if the MSR * is not present. */ - msr_set_bit(MSR_F10H_DECFG, - MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + msr_set_bit(MSR_AMD64_DE_CFG, + MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT); /* A serializing LFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 774ca6bfda9f..205fa420ee7c 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -342,8 +342,8 @@ static void init_hygon(struct cpuinfo_x86 *c) * msr_set_bit() uses the safe accessors, too, even if the MSR * is not present. */ - msr_set_bit(MSR_F10H_DECFG, - MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT); + msr_set_bit(MSR_AMD64_DE_CFG, + MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT); /* A serializing LFENCE stops RDTSC speculation */ set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a0512a91760d..2b7528821577 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2475,9 +2475,9 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr) msr->data = 0; switch (msr->index) { - case MSR_F10H_DECFG: - if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) - msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE; + case MSR_AMD64_DE_CFG: + if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) + msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE; break; case MSR_IA32_PERF_CAPABILITIES: return 0; @@ -2584,7 +2584,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x1E; } break; - case MSR_F10H_DECFG: + case MSR_AMD64_DE_CFG: msr_info->data = svm->msr_decfg; break; default: @@ -2764,7 +2764,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) case MSR_VM_IGNNE: vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; - case MSR_F10H_DECFG: { + case MSR_AMD64_DE_CFG: { struct kvm_msr_entry msr_entry; msr_entry.index = msr->index; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0ac80b3ff0f5..23d7c563e012 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1362,7 +1362,7 @@ static const u32 msr_based_features_all[] = { MSR_IA32_VMX_EPT_VPID_CAP, MSR_IA32_VMX_VMFUNC, - MSR_F10H_DECFG, + MSR_AMD64_DE_CFG, MSR_IA32_UCODE_REV, MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_PERF_CAPABILITIES, diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index d023c85e3c53..61581c45788e 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -522,6 +522,7 @@ static void pm_save_spec_msr(void) MSR_TSX_FORCE_ABORT, MSR_IA32_MCU_OPT_CTRL, MSR_AMD64_LS_CFG, + MSR_AMD64_DE_CFG, }; msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id)); diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 144dc164b759..8fb925676813 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -489,6 +489,11 @@ #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 + +#define MSR_AMD64_DE_CFG 0xc0011029 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT) + #define MSR_AMD64_BU_CFG2 0xc001102a #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 @@ -565,9 +570,6 @@ #define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL #define FAM10H_MMIO_CONF_BASE_SHIFT 20 #define MSR_FAM10H_NODE_ID 0xc001100c -#define MSR_F10H_DECFG 0xc0011029 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 -#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a From 0f544353fec8e717d37724d95b92538e1de79e86 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 16 Nov 2022 09:43:39 +0100 Subject: [PATCH 97/98] io_uring: kill goto error handling in io_sqpoll_wait_sq() Hunk extracted from commit 70aacfe66136809d7f080f89c492c278298719f4 upstream. If the sqpoll thread has died, the out condition doesn't remove the waiting task from the waitqueue. The goto and check are not needed, just make it a break condition after setting the error value. That ensures that we always remove ourselves from sqo_sq_wait waitqueue. Reported-by: Xingyuan Mo Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/io_uring.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 05f360b66b07..d1cb1addea96 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -9038,7 +9038,7 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) if (unlikely(ctx->sqo_dead)) { ret = -EOWNERDEAD; - goto out; + break; } if (!io_sqring_full(ctx)) @@ -9048,7 +9048,6 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) } while (!signal_pending(current)); finish_wait(&ctx->sqo_sq_wait, &wait); -out: return ret; } From 41217963b1d97ec170f24fc4155953a2b0835191 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 16 Nov 2022 09:57:20 +0100 Subject: [PATCH 98/98] Linux 5.10.155 Link: https://lore.kernel.org/r/20221114124442.530286937@linuxfoundation.org Tested-by: Jon Hunter Tested-by: Pavel Machek (CIP) Tested-by: Shuah Khan Tested-by: Slade Watkins Tested-by: Florian Fainelli Tested-by: Guenter Roeck Tested-by: Salvatore Bonaccorso Tested-by: Sudip Mukherjee Tested-by: Linux Kernel Functional Testing Tested-by: Rudi Heitbaum Tested-by: Hulk Robot Signed-off-by: Greg Kroah-Hartman --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 43fecb404581..8ccf902b3609 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 154 +SUBLEVEL = 155 EXTRAVERSION = NAME = Dare mighty things