From aad4b41a34191b07ad0cb78b4009b46fd23382f7 Mon Sep 17 00:00:00 2001 From: Leo Stone Date: Tue, 22 Oct 2024 21:12:01 -0700 Subject: [PATCH 0001/1386] Documentation: ieee802154: fix grammar Fix grammar where it improves readability. Signed-off-by: Leo Stone Reviewed-by: Miquel Raynal Reviewed-by: Simon Horman Reviewed-by: Bagas Sanjaya Link: https://lore.kernel.org/20241023041203.35313-1-leocstone@gmail.com Signed-off-by: Stefan Schmidt --- Documentation/networking/ieee802154.rst | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Documentation/networking/ieee802154.rst b/Documentation/networking/ieee802154.rst index c652d383fe10..743c0a80e309 100644 --- a/Documentation/networking/ieee802154.rst +++ b/Documentation/networking/ieee802154.rst @@ -72,7 +72,8 @@ exports a management (e.g. MLME) and data API. possibly with some kinds of acceleration like automatic CRC computation and comparison, automagic ACK handling, address matching, etc. -Those types of devices require different approach to be hooked into Linux kernel. +Each type of device requires a different approach to be hooked into the Linux +kernel. HardMAC ------- @@ -81,10 +82,10 @@ See the header include/net/ieee802154_netdev.h. You have to implement Linux net_device, with .type = ARPHRD_IEEE802154. Data is exchanged with socket family code via plain sk_buffs. On skb reception skb->cb must contain additional info as described in the struct ieee802154_mac_cb. During packet transmission -the skb->cb is used to provide additional data to device's header_ops->create -function. Be aware that this data can be overridden later (when socket code -submits skb to qdisc), so if you need something from that cb later, you should -store info in the skb->data on your own. +the skb->cb is used to provide additional data to the device's +header_ops->create function. Be aware that this data can be overridden later +(when socket code submits skb to qdisc), so if you need something from that cb +later, you should store info in the skb->data on your own. To hook the MLME interface you have to populate the ml_priv field of your net_device with a pointer to struct ieee802154_mlme_ops instance. The fields @@ -94,8 +95,9 @@ All other fields are required. SoftMAC ------- -The MAC is the middle layer in the IEEE 802.15.4 Linux stack. This moment it -provides interface for drivers registration and management of slave interfaces. +The MAC is the middle layer in the IEEE 802.15.4 Linux stack. At the moment, it +provides an interface for driver registration and management of slave +interfaces. NOTE: Currently the only monitor device type is supported - it's IEEE 802.15.4 stack interface for network sniffers (e.g. WireShark). From 34cd3bdffa111eb483c12c0a55d77ad0634bb5f3 Mon Sep 17 00:00:00 2001 From: Balaji Pothunoori Date: Tue, 29 Oct 2024 14:03:40 +0530 Subject: [PATCH 0002/1386] wifi: ath11k: Suspend hardware before firmware mode off for WCN6750 During rmmod, the ath11k host driver sends a QMI MODE OFF command to firmware. As part of this command, firmware initiates WLAN de-initialization and accesses certain UMAC registers during this process. Currently, on WCN6750 WLAN hardware, the system is in a sleep state when firmware receives the QMI MODE OFF command. This results in a firmware/hardware reset while accessing the UMAC hardware registers during sleep state. To avoid this, add logic to send WCN6750 hardware specific WMI_PDEV_SUSPEND_AND_DISABLE_INTR command to firmware prior to sending the QMI MODE OFF command. This will cause firmware to cease all activities and put the device in a powered-on state that prevents access to registers which have been powered off. Signed-off-by: Balaji Pothunoori Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241029083340.3010798-1-quic_bpothuno@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/core.c | 45 ++++++++++++++++++++++++++ drivers/net/wireless/ath/ath11k/hw.h | 1 + 2 files changed, 46 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index be67382c00f6..a9aefb1a705d 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -123,6 +123,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .tx_ring_size = DP_TCL_DATA_RING_SIZE, .smp2p_wow_exit = false, .support_dual_stations = false, + .pdev_suspend = false, }, { .hw_rev = ATH11K_HW_IPQ6018_HW10, @@ -207,6 +208,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .smp2p_wow_exit = false, .support_fw_mac_sequence = false, .support_dual_stations = false, + .pdev_suspend = false, }, { .name = "qca6390 hw2.0", @@ -296,6 +298,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .smp2p_wow_exit = false, .support_fw_mac_sequence = true, .support_dual_stations = true, + .pdev_suspend = false, }, { .name = "qcn9074 hw1.0", @@ -379,6 +382,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .smp2p_wow_exit = false, .support_fw_mac_sequence = false, .support_dual_stations = false, + .pdev_suspend = false, }, { .name = "wcn6855 hw2.0", @@ -468,6 +472,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .smp2p_wow_exit = false, .support_fw_mac_sequence = true, .support_dual_stations = true, + .pdev_suspend = false, }, { .name = "wcn6855 hw2.1", @@ -555,6 +560,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .smp2p_wow_exit = false, .support_fw_mac_sequence = true, .support_dual_stations = true, + .pdev_suspend = false, }, { .name = "wcn6750 hw1.0", @@ -637,6 +643,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .smp2p_wow_exit = true, .support_fw_mac_sequence = true, .support_dual_stations = false, + .pdev_suspend = true, }, { .hw_rev = ATH11K_HW_IPQ5018_HW10, @@ -719,6 +726,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .smp2p_wow_exit = false, .support_fw_mac_sequence = false, .support_dual_stations = false, + .pdev_suspend = false, }, { .name = "qca2066 hw2.1", @@ -808,6 +816,7 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .smp2p_wow_exit = false, .support_fw_mac_sequence = true, .support_dual_stations = true, + .pdev_suspend = false, }, }; @@ -1669,11 +1678,47 @@ err_pdev_debug: return ret; } +static void ath11k_core_pdev_suspend_target(struct ath11k_base *ab) +{ + struct ath11k *ar; + struct ath11k_pdev *pdev; + unsigned long time_left; + int ret; + int i; + + if (!ab->hw_params.pdev_suspend) + return; + + for (i = 0; i < ab->num_radios; i++) { + pdev = &ab->pdevs[i]; + ar = pdev->ar; + + reinit_completion(&ab->htc_suspend); + + ret = ath11k_wmi_pdev_suspend(ar, WMI_PDEV_SUSPEND_AND_DISABLE_INTR, + pdev->pdev_id); + if (ret) { + ath11k_warn(ab, "could not suspend target :%d\n", ret); + /* pointless to try other pdevs */ + return; + } + + time_left = wait_for_completion_timeout(&ab->htc_suspend, 3 * HZ); + + if (!time_left) { + ath11k_warn(ab, "suspend timed out - target pause event never came\n"); + /* pointless to try other pdevs */ + return; + } + } +} + static void ath11k_core_pdev_destroy(struct ath11k_base *ab) { ath11k_spectral_deinit(ab); ath11k_thermal_unregister(ab); ath11k_mac_unregister(ab); + ath11k_core_pdev_suspend_target(ab); ath11k_hif_irq_disable(ab); ath11k_dp_pdev_free(ab); ath11k_debugfs_pdev_destroy(ab); diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h index 300322535766..52d9f4c13b13 100644 --- a/drivers/net/wireless/ath/ath11k/hw.h +++ b/drivers/net/wireless/ath/ath11k/hw.h @@ -227,6 +227,7 @@ struct ath11k_hw_params { bool smp2p_wow_exit; bool support_fw_mac_sequence; bool support_dual_stations; + bool pdev_suspend; }; struct ath11k_hw_ops { From 38db1ae301c37b3c9599f4f67cd495b6f3a9010e Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Mon, 28 Oct 2024 07:08:40 -0700 Subject: [PATCH 0003/1386] wifi: ath12k: mark QMI driver event helpers as noinline As described in [1], compiling the ath12k driver using clang with KASAN enabled warns about some functions with excessive stack usage, with the worst case being: drivers/net/wireless/ath/ath12k/qmi.c:3546:13: warning: stack frame size (2456) exceeds limit (1024) in 'ath12k_qmi_driver_event_work' [-Wframe-larger-than] Nathan [2] highlighted work done by Arnd [3] to address similar issues in other portions of the kernel. ath12k_qmi_driver_event_work() itself is a pretty lightweight function, but it dispatches to several other functions which do the real work: ath12k_qmi_driver_event_work() ath12k_qmi_event_server_arrive() ath12k_qmi_host_cap_send() ath12k_qmi_event_mem_request() ath12k_qmi_respond_fw_mem_request() ath12k_qmi_event_load_bdf() ath12k_qmi_request_target_cap() ath12k_qmi_load_bdf_qmi() ath12k_qmi_wlanfw_m3_info_send() Mark all of those underlying functions as 'noinline_for_stack' to prevent them from being inlined in ath12k_qmi_driver_event_work(), thereby eliminating the excessive stack usage. Link: https://msgid.link/bc214795-1c51-4cb7-922f-67d6ef98bff2@quicinc.com # [1] Link: https://msgid.link/20241025223321.GA3647469@thelio-3990X # [2] Link: https://lore.kernel.org/all/?q=f:arnd@kernel.org+Wframe-larger-than # [3] Acked-by: Kalle Valo Link: https://patch.msgid.link/20241028-ath12k_qmi_driver_event_work-v1-1-0d532eb593fa@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/qmi.c | 34 ++++++++++++++++++++------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index b93ce9f87f61..d2d9d03c7a28 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -2066,7 +2066,9 @@ static void ath12k_host_cap_parse_mlo(struct ath12k_base *ab, req->mlo_chip_info_valid = 1; } -static int ath12k_qmi_host_cap_send(struct ath12k_base *ab) +/* clang stack usage explodes if this is inlined */ +static noinline_for_stack +int ath12k_qmi_host_cap_send(struct ath12k_base *ab) { struct qmi_wlanfw_host_cap_req_msg_v01 req = {}; struct qmi_wlanfw_host_cap_resp_msg_v01 resp = {}; @@ -2275,7 +2277,9 @@ resp_out: return ret; } -static int ath12k_qmi_respond_fw_mem_request(struct ath12k_base *ab) +/* clang stack usage explodes if this is inlined */ +static noinline_for_stack +int ath12k_qmi_respond_fw_mem_request(struct ath12k_base *ab) { struct qmi_wlanfw_respond_mem_req_msg_v01 *req; struct qmi_wlanfw_respond_mem_resp_msg_v01 resp = {}; @@ -2433,7 +2437,9 @@ this_chunk_done: return 0; } -static int ath12k_qmi_request_target_cap(struct ath12k_base *ab) +/* clang stack usage explodes if this is inlined */ +static noinline_for_stack +int ath12k_qmi_request_target_cap(struct ath12k_base *ab) { struct qmi_wlanfw_cap_req_msg_v01 req = {}; struct qmi_wlanfw_cap_resp_msg_v01 resp = {}; @@ -2619,8 +2625,10 @@ out: return ret; } -static int ath12k_qmi_load_bdf_qmi(struct ath12k_base *ab, - enum ath12k_qmi_bdf_type type) +/* clang stack usage explodes if this is inlined */ +static noinline_for_stack +int ath12k_qmi_load_bdf_qmi(struct ath12k_base *ab, + enum ath12k_qmi_bdf_type type) { struct device *dev = ab->dev; char filename[ATH12K_QMI_MAX_BDF_FILE_NAME_SIZE]; @@ -2791,7 +2799,9 @@ out: return ret; } -static int ath12k_qmi_wlanfw_m3_info_send(struct ath12k_base *ab) +/* clang stack usage explodes if this is inlined */ +static noinline_for_stack +int ath12k_qmi_wlanfw_m3_info_send(struct ath12k_base *ab) { struct m3_mem_region *m3_mem = &ab->qmi.m3_mem; struct qmi_wlanfw_m3_info_req_msg_v01 req = {}; @@ -3079,7 +3089,9 @@ ath12k_qmi_driver_event_post(struct ath12k_qmi *qmi, return 0; } -static int ath12k_qmi_event_server_arrive(struct ath12k_qmi *qmi) +/* clang stack usage explodes if this is inlined */ +static noinline_for_stack +int ath12k_qmi_event_server_arrive(struct ath12k_qmi *qmi) { struct ath12k_base *ab = qmi->ab; int ret; @@ -3101,7 +3113,9 @@ static int ath12k_qmi_event_server_arrive(struct ath12k_qmi *qmi) return ret; } -static int ath12k_qmi_event_mem_request(struct ath12k_qmi *qmi) +/* clang stack usage explodes if this is inlined */ +static noinline_for_stack +int ath12k_qmi_event_mem_request(struct ath12k_qmi *qmi) { struct ath12k_base *ab = qmi->ab; int ret; @@ -3115,7 +3129,9 @@ static int ath12k_qmi_event_mem_request(struct ath12k_qmi *qmi) return ret; } -static int ath12k_qmi_event_load_bdf(struct ath12k_qmi *qmi) +/* clang stack usage explodes if this is inlined */ +static noinline_for_stack +int ath12k_qmi_event_load_bdf(struct ath12k_qmi *qmi) { struct ath12k_base *ab = qmi->ab; int ret; From 757cc46520091ca103b9a948ddbdfa660a10879d Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Fri, 1 Nov 2024 17:16:58 +0200 Subject: [PATCH 0004/1386] wifi: ath12k: ath12k_mac_vdev_create(): use goto for error handling In commit 477cabfdb776 ("wifi: ath12k: modify link arvif creation and removal for MLO") I had accidentally left one personal TODO comment about using goto instead of ret. Switch to use goto to be consistent with the error handling in the function. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Kalle Valo Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241101151705.165987-2-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index d493ec812055..fa48200f012e 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -7034,8 +7034,7 @@ err_peer_del: ret = ath12k_wait_for_peer_delete_done(ar, arvif->vdev_id, arvif->bssid); if (ret) - /* KVALO: why not goto err? */ - return ret; + goto err_vdev_del; ar->num_peers--; } From 1ea0cdee6fb3a498e7413cb2a28a918464d33d48 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Fri, 1 Nov 2024 17:16:59 +0200 Subject: [PATCH 0005/1386] wifi: ath12k: MLO vdev bringup changes Add changes to add the link vdevs dynamically whenever a channel is assigned from mac80211 for a link vdev. During vdev create, update ML address of the vdev to firmware using the new WMI parameter (WMI_TAG_MLO_VDEV_CREATE_PARAMS). During vdev start, notify the firmware that this link vdev is newly added and also indicate all its known partners so that the firmware can take necessary actions to internally update the partners on the new link being added. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Co-developed-by: Rameshkumar Sundaram Signed-off-by: Rameshkumar Sundaram Signed-off-by: Kalle Valo Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241101151705.165987-3-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 87 ++++++++++++++++++++++++++- drivers/net/wireless/ath/ath12k/wmi.c | 85 +++++++++++++++++++++++++- drivers/net/wireless/ath/ath12k/wmi.h | 63 +++++++++++++++++++ 3 files changed, 230 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index fa48200f012e..1a97eafaa3d2 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -648,6 +648,18 @@ struct ath12k *ath12k_mac_get_ar_by_pdev_id(struct ath12k_base *ab, u32 pdev_id) return NULL; } +static bool ath12k_mac_is_ml_arvif(struct ath12k_link_vif *arvif) +{ + struct ath12k_vif *ahvif = arvif->ahvif; + + lockdep_assert_wiphy(ahvif->ah->hw->wiphy); + + if (ahvif->vif->valid_links & BIT(arvif->link_id)) + return true; + + return false; +} + static struct ath12k *ath12k_mac_get_ar_by_chan(struct ieee80211_hw *hw, struct ieee80211_channel *channel) { @@ -1498,7 +1510,8 @@ static int ath12k_mac_setup_bcn_tmpl_ema(struct ath12k_link_vif *arvif) tx_ahvif = ath12k_vif_to_ahvif(ahvif->vif->mbssid_tx_vif); tx_arvif = &tx_ahvif->deflink; beacons = ieee80211_beacon_get_template_ema_list(ath12k_ar_to_hw(tx_arvif->ar), - tx_ahvif->vif, 0); + tx_ahvif->vif, + tx_arvif->link_id); if (!beacons || !beacons->cnt) { ath12k_warn(arvif->ar->ab, "failed to get ema beacon templates from mac80211\n"); @@ -1563,7 +1576,7 @@ static int ath12k_mac_setup_bcn_tmpl(struct ath12k_link_vif *arvif) } bcn = ieee80211_beacon_get_template(ath12k_ar_to_hw(tx_arvif->ar), tx_ahvif->vif, - &offs, 0); + &offs, tx_arvif->link_id); if (!bcn) { ath12k_warn(ab, "failed to get beacon template from mac80211\n"); return -EPERM; @@ -1644,7 +1657,7 @@ static void ath12k_control_beaconing(struct ath12k_link_vif *arvif, ahvif->aid = 0; - ether_addr_copy(arvif->bssid, info->bssid); + ether_addr_copy(arvif->bssid, info->addr); params.vdev_id = arvif->vdev_id; params.aid = ahvif->aid; @@ -6658,6 +6671,8 @@ static int ath12k_mac_setup_vdev_create_arg(struct ath12k_link_vif *arvif, struct ath12k_vif *ahvif = arvif->ahvif; int ret; + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + arg->if_id = arvif->vdev_id; arg->type = ahvif->vdev_type; arg->subtype = ahvif->vdev_subtype; @@ -6689,6 +6704,17 @@ static int ath12k_mac_setup_vdev_create_arg(struct ath12k_link_vif *arvif, } arg->if_stats_id = ath12k_mac_get_vdev_stats_id(arvif); + + if (ath12k_mac_is_ml_arvif(arvif)) { + if (hweight16(ahvif->vif->valid_links) > ATH12K_WMI_MLO_MAX_LINKS) { + ath12k_warn(ar->ab, "too many MLO links during setting up vdev: %d", + ahvif->vif->valid_links); + return -EINVAL; + } + + ether_addr_copy(arg->mld_addr, ahvif->vif->addr); + } + return 0; } @@ -7626,6 +7652,58 @@ ath12k_mac_check_down_grade_phy_mode(struct ath12k *ar, return down_mode; } +static void +ath12k_mac_mlo_get_vdev_args(struct ath12k_link_vif *arvif, + struct wmi_ml_arg *ml_arg) +{ + struct ath12k_vif *ahvif = arvif->ahvif; + struct wmi_ml_partner_info *partner_info; + struct ieee80211_bss_conf *link_conf; + struct ath12k_link_vif *arvif_p; + unsigned long links; + u8 link_id; + + lockdep_assert_wiphy(ahvif->ah->hw->wiphy); + + if (!ath12k_mac_is_ml_arvif(arvif)) + return; + + if (hweight16(ahvif->vif->valid_links) > ATH12K_WMI_MLO_MAX_LINKS) + return; + + ml_arg->enabled = true; + + /* Driver always add a new link via VDEV START, FW takes + * care of internally adding this link to existing + * link vdevs which are advertised as partners below + */ + ml_arg->link_add = true; + partner_info = ml_arg->partner_info; + + links = ahvif->links_map; + for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { + arvif_p = wiphy_dereference(ahvif->ah->hw->wiphy, ahvif->link[link_id]); + + if (WARN_ON(!arvif_p)) + continue; + + if (arvif == arvif_p) + continue; + + link_conf = wiphy_dereference(ahvif->ah->hw->wiphy, + ahvif->vif->link_conf[arvif_p->link_id]); + + if (!link_conf) + continue; + + partner_info->vdev_id = arvif_p->vdev_id; + partner_info->hw_link_id = arvif_p->ar->pdev->hw_link_id; + ether_addr_copy(partner_info->addr, link_conf->addr); + ml_arg->num_partner_links++; + partner_info++; + } +} + static int ath12k_mac_vdev_start_restart(struct ath12k_link_vif *arvif, struct ieee80211_chanctx_conf *ctx, @@ -7704,6 +7782,9 @@ ath12k_mac_vdev_start_restart(struct ath12k_link_vif *arvif, arg.passive |= !!(chandef->chan->flags & IEEE80211_CHAN_NO_IR); + if (!restart) + ath12k_mac_mlo_get_vdev_args(arvif, &arg.ml); + ath12k_dbg(ab, ATH12K_DBG_MAC, "mac vdev %d start center_freq %d phymode %s punct_bitmap 0x%x\n", arg.vdev_id, arg.freq, diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index dced2aa9ba1a..e089b58bbea1 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -821,6 +821,8 @@ int ath12k_wmi_vdev_create(struct ath12k *ar, u8 *macaddr, struct wmi_vdev_create_cmd *cmd; struct sk_buff *skb; struct ath12k_wmi_vdev_txrx_streams_params *txrx_streams; + bool is_ml_vdev = is_valid_ether_addr(args->mld_addr); + struct wmi_vdev_create_mlo_params *ml_params; struct wmi_tlv *tlv; int ret, len; void *ptr; @@ -830,7 +832,8 @@ int ath12k_wmi_vdev_create(struct ath12k *ar, u8 *macaddr, * both the bands. */ len = sizeof(*cmd) + TLV_HDR_SIZE + - (WMI_NUM_SUPPORTED_BAND_MAX * sizeof(*txrx_streams)); + (WMI_NUM_SUPPORTED_BAND_MAX * sizeof(*txrx_streams)) + + (is_ml_vdev ? TLV_HDR_SIZE + sizeof(*ml_params) : 0); skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, len); if (!skb) @@ -879,6 +882,21 @@ int ath12k_wmi_vdev_create(struct ath12k *ar, u8 *macaddr, txrx_streams->supported_rx_streams = cpu_to_le32(args->chains[NL80211_BAND_5GHZ].rx); + ptr += WMI_NUM_SUPPORTED_BAND_MAX * sizeof(*txrx_streams); + + if (is_ml_vdev) { + tlv = ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_STRUCT, + sizeof(*ml_params)); + ptr += TLV_HDR_SIZE; + ml_params = ptr; + + ml_params->tlv_header = + ath12k_wmi_tlv_cmd_hdr(WMI_TAG_MLO_VDEV_CREATE_PARAMS, + sizeof(*ml_params)); + ether_addr_copy(ml_params->mld_macaddr.addr, args->mld_addr); + } + ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "WMI vdev create: id %d type %d subtype %d macaddr %pM pdevid %d\n", args->if_id, args->type, args->subtype, @@ -1020,19 +1038,27 @@ static void ath12k_wmi_put_wmi_channel(struct ath12k_wmi_channel_params *chan, int ath12k_wmi_vdev_start(struct ath12k *ar, struct wmi_vdev_start_req_arg *arg, bool restart) { + struct wmi_vdev_start_mlo_params *ml_params; + struct wmi_partner_link_info *partner_info; struct ath12k_wmi_pdev *wmi = ar->wmi; struct wmi_vdev_start_request_cmd *cmd; struct sk_buff *skb; struct ath12k_wmi_channel_params *chan; struct wmi_tlv *tlv; void *ptr; - int ret, len; + int ret, len, i, ml_arg_size = 0; if (WARN_ON(arg->ssid_len > sizeof(cmd->ssid.ssid))) return -EINVAL; len = sizeof(*cmd) + sizeof(*chan) + TLV_HDR_SIZE; + if (!restart && arg->ml.enabled) { + ml_arg_size = TLV_HDR_SIZE + sizeof(*ml_params) + + TLV_HDR_SIZE + (arg->ml.num_partner_links * + sizeof(*partner_info)); + len += ml_arg_size; + } skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, len); if (!skb) return -ENOMEM; @@ -1085,6 +1111,61 @@ int ath12k_wmi_vdev_start(struct ath12k *ar, struct wmi_vdev_start_req_arg *arg, ptr += sizeof(*tlv); + if (ml_arg_size) { + tlv = ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_STRUCT, + sizeof(*ml_params)); + ptr += TLV_HDR_SIZE; + + ml_params = ptr; + + ml_params->tlv_header = + ath12k_wmi_tlv_cmd_hdr(WMI_TAG_MLO_VDEV_START_PARAMS, + sizeof(*ml_params)); + + ml_params->flags = le32_encode_bits(arg->ml.enabled, + ATH12K_WMI_FLAG_MLO_ENABLED) | + le32_encode_bits(arg->ml.assoc_link, + ATH12K_WMI_FLAG_MLO_ASSOC_LINK) | + le32_encode_bits(arg->ml.mcast_link, + ATH12K_WMI_FLAG_MLO_MCAST_VDEV) | + le32_encode_bits(arg->ml.link_add, + ATH12K_WMI_FLAG_MLO_LINK_ADD); + + ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "vdev %d start ml flags 0x%x\n", + arg->vdev_id, ml_params->flags); + + ptr += sizeof(*ml_params); + + tlv = ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_STRUCT, + arg->ml.num_partner_links * + sizeof(*partner_info)); + ptr += TLV_HDR_SIZE; + + partner_info = ptr; + + for (i = 0; i < arg->ml.num_partner_links; i++) { + partner_info->tlv_header = + ath12k_wmi_tlv_cmd_hdr(WMI_TAG_MLO_PARTNER_LINK_PARAMS, + sizeof(*partner_info)); + partner_info->vdev_id = + cpu_to_le32(arg->ml.partner_info[i].vdev_id); + partner_info->hw_link_id = + cpu_to_le32(arg->ml.partner_info[i].hw_link_id); + ether_addr_copy(partner_info->vdev_addr.addr, + arg->ml.partner_info[i].addr); + + ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "partner vdev %d hw_link_id %d macaddr%pM\n", + partner_info->vdev_id, partner_info->hw_link_id, + partner_info->vdev_addr.addr); + + partner_info++; + } + + ptr = partner_info; + } + ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "vdev %s id 0x%x freq 0x%x mode 0x%x\n", restart ? "restart" : "start", arg->vdev_id, arg->freq, arg->mode); diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 6f55dbdf629d..0ddd7ce97385 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -1929,6 +1929,19 @@ enum wmi_tlv_tag { WMI_TAG_REGULATORY_RULE_EXT_STRUCT = 0x3A9, WMI_TAG_REG_CHAN_LIST_CC_EXT_EVENT, WMI_TAG_EHT_RATE_SET = 0x3C4, + WMI_TAG_DCS_AWGN_INT_TYPE = 0x3C5, + WMI_TAG_MLO_TX_SEND_PARAMS, + WMI_TAG_MLO_PARTNER_LINK_PARAMS, + WMI_TAG_MLO_PARTNER_LINK_PARAMS_PEER_ASSOC, + WMI_TAG_MLO_SETUP_CMD = 0x3C9, + WMI_TAG_MLO_SETUP_COMPLETE_EVENT, + WMI_TAG_MLO_READY_CMD, + WMI_TAG_MLO_TEARDOWN_CMD, + WMI_TAG_MLO_TEARDOWN_COMPLETE, + WMI_TAG_MLO_PEER_ASSOC_PARAMS = 0x3D0, + WMI_TAG_MLO_PEER_CREATE_PARAMS = 0x3D5, + WMI_TAG_MLO_VDEV_START_PARAMS = 0x3D6, + WMI_TAG_MLO_VDEV_CREATE_PARAMS = 0x3D7, WMI_TAG_PDEV_SET_BIOS_SAR_TABLE_CMD = 0x3D8, WMI_TAG_PDEV_SET_BIOS_GEO_TABLE_CMD = 0x3D9, WMI_TAG_PDEV_SET_BIOS_INTERFACE_CMD = 0x3FB, @@ -2740,6 +2753,7 @@ struct ath12k_wmi_vdev_create_arg { u8 if_stats_id; u32 mbssid_flags; u32 mbssid_tx_vdev_id; + u8 mld_addr[ETH_ALEN]; }; #define ATH12K_MAX_VDEV_STATS_ID 0x30 @@ -2766,6 +2780,33 @@ struct ath12k_wmi_vdev_txrx_streams_params { __le32 supported_rx_streams; } __packed; +struct wmi_vdev_create_mlo_params { + __le32 tlv_header; + struct ath12k_wmi_mac_addr_params mld_macaddr; +} __packed; + +#define ATH12K_WMI_FLAG_MLO_ENABLED BIT(0) +#define ATH12K_WMI_FLAG_MLO_ASSOC_LINK BIT(1) +#define ATH12K_WMI_FLAG_MLO_PRIMARY_UMAC BIT(2) +#define ATH12K_WMI_FLAG_MLO_LOGICAL_LINK_IDX_VALID BIT(3) +#define ATH12K_WMI_FLAG_MLO_PEER_ID_VALID BIT(4) +#define ATH12K_WMI_FLAG_MLO_MCAST_VDEV BIT(5) +#define ATH12K_WMI_FLAG_MLO_EMLSR_SUPPORT BIT(6) +#define ATH12K_WMI_FLAG_MLO_FORCED_INACTIVE BIT(7) +#define ATH12K_WMI_FLAG_MLO_LINK_ADD BIT(8) + +struct wmi_vdev_start_mlo_params { + __le32 tlv_header; + __le32 flags; +} __packed; + +struct wmi_partner_link_info { + __le32 tlv_header; + __le32 vdev_id; + __le32 hw_link_id; + struct ath12k_wmi_mac_addr_params vdev_addr; +} __packed; + struct wmi_vdev_delete_cmd { __le32 tlv_header; __le32 vdev_id; @@ -2909,6 +2950,27 @@ enum wmi_phy_mode { MODE_MAX = 33, }; +#define ATH12K_WMI_MLO_MAX_LINKS 4 + +struct wmi_ml_partner_info { + u32 vdev_id; + u32 hw_link_id; + u8 addr[ETH_ALEN]; + bool assoc_link; + bool primary_umac; + bool logical_link_idx_valid; + u32 logical_link_idx; +}; + +struct wmi_ml_arg { + bool enabled; + bool assoc_link; + bool mcast_link; + bool link_add; + u8 num_partner_links; + struct wmi_ml_partner_info partner_info[ATH12K_WMI_MLO_MAX_LINKS]; +}; + struct wmi_vdev_start_req_arg { u32 vdev_id; u32 freq; @@ -2946,6 +3008,7 @@ struct wmi_vdev_start_req_arg { u32 mbssid_flags; u32 mbssid_tx_vdev_id; u32 punct_bitmap; + struct wmi_ml_arg ml; }; struct ath12k_wmi_peer_create_arg { From c8a98ed160e54ae629364a1efd5379bed839d633 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Fri, 1 Nov 2024 17:17:00 +0200 Subject: [PATCH 0006/1386] wifi: ath12k: Refactor sta state machine Refactor ath12k_mac_op_sta_state(), with generic wrappers which can be used for both multi link stations and non-ML stations. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241101151705.165987-4-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 3 + drivers/net/wireless/ath/ath12k/mac.c | 343 +++++++++++++++++-------- 2 files changed, 244 insertions(+), 102 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 3bf31ee5b9fa..23e27d119859 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -469,6 +469,9 @@ struct ath12k_link_sta { struct ath12k_link_vif *arvif; struct ath12k_sta *ahsta; + /* link address similar to ieee80211_link_sta */ + u8 addr[ETH_ALEN]; + /* the following are protected by ar->data_lock */ u32 changed; /* IEEE80211_RC_* */ u32 bw; diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 1a97eafaa3d2..73a80b55b229 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4505,10 +4505,10 @@ ath12k_mac_set_peer_vht_fixed_rate(struct ath12k_link_vif *arvif, return ret; } -static int ath12k_station_assoc(struct ath12k *ar, - struct ath12k_link_vif *arvif, - struct ath12k_link_sta *arsta, - bool reassoc) +static int ath12k_mac_station_assoc(struct ath12k *ar, + struct ath12k_link_vif *arvif, + struct ath12k_link_sta *arsta, + bool reassoc) { struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); @@ -4595,28 +4595,19 @@ static int ath12k_station_assoc(struct ath12k *ar, return 0; } -static int ath12k_station_disassoc(struct ath12k *ar, - struct ath12k_link_vif *arvif, - struct ath12k_link_sta *arsta) +static int ath12k_mac_station_disassoc(struct ath12k *ar, + struct ath12k_link_vif *arvif, + struct ath12k_link_sta *arsta) { struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); - int ret; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); if (!sta->wme) { arvif->num_legacy_stations--; - ret = ath12k_recalc_rtscts_prot(arvif); - if (ret) - return ret; + return ath12k_recalc_rtscts_prot(arvif); } - ret = ath12k_clear_peer_keys(arvif, sta->addr); - if (ret) { - ath12k_warn(ar->ab, "failed to clear all peer keys for vdev %i: %d\n", - arvif->vdev_id, ret); - return ret; - } return 0; } @@ -4812,6 +4803,147 @@ static void ath12k_mac_dec_num_stations(struct ath12k_link_vif *arvif, ar->num_stations--; } +static void ath12k_mac_station_post_remove(struct ath12k *ar, + struct ath12k_link_vif *arvif, + struct ath12k_link_sta *arsta) +{ + struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); + struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); + struct ath12k_sta *ahsta = arsta->ahsta; + struct ath12k_peer *peer; + + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + + ath12k_mac_dec_num_stations(arvif, arsta); + + spin_lock_bh(&ar->ab->base_lock); + + peer = ath12k_peer_find(ar->ab, arvif->vdev_id, sta->addr); + if (peer && peer->sta == sta) { + ath12k_warn(ar->ab, "Found peer entry %pM n vdev %i after it was supposedly removed\n", + vif->addr, arvif->vdev_id); + peer->sta = NULL; + list_del(&peer->list); + kfree(peer); + ar->num_peers--; + } + + spin_unlock_bh(&ar->ab->base_lock); + + kfree(arsta->rx_stats); + arsta->rx_stats = NULL; + + if (arsta->link_id < IEEE80211_MLD_MAX_NUM_LINKS) { + ahsta->links_map &= ~(BIT(arsta->link_id)); + rcu_assign_pointer(ahsta->link[arsta->link_id], NULL); + synchronize_rcu(); + arsta->link_id = ATH12K_INVALID_LINK_ID; + arsta->ahsta = NULL; + } +} + +static int ath12k_mac_station_unauthorize(struct ath12k *ar, + struct ath12k_link_vif *arvif, + struct ath12k_link_sta *arsta) +{ + struct ath12k_peer *peer; + int ret; + + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + + spin_lock_bh(&ar->ab->base_lock); + + peer = ath12k_peer_find(ar->ab, arvif->vdev_id, arsta->addr); + if (peer) + peer->is_authorized = false; + + spin_unlock_bh(&ar->ab->base_lock); + + /* Driver must clear the keys during the state change from + * IEEE80211_STA_AUTHORIZED to IEEE80211_STA_ASSOC, since after + * returning from here, mac80211 is going to delete the keys + * in __sta_info_destroy_part2(). This will ensure that the driver does + * not retain stale key references after mac80211 deletes the keys. + */ + ret = ath12k_clear_peer_keys(arvif, arsta->addr); + if (ret) { + ath12k_warn(ar->ab, "failed to clear all peer keys for vdev %i: %d\n", + arvif->vdev_id, ret); + return ret; + } + + return 0; +} + +static int ath12k_mac_station_authorize(struct ath12k *ar, + struct ath12k_link_vif *arvif, + struct ath12k_link_sta *arsta) +{ + struct ath12k_peer *peer; + struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); + struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); + int ret; + + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + + spin_lock_bh(&ar->ab->base_lock); + + peer = ath12k_peer_find(ar->ab, arvif->vdev_id, sta->addr); + if (peer) + peer->is_authorized = true; + + spin_unlock_bh(&ar->ab->base_lock); + + if (vif->type == NL80211_IFTYPE_STATION && arvif->is_up) { + ret = ath12k_wmi_set_peer_param(ar, sta->addr, + arvif->vdev_id, + WMI_PEER_AUTHORIZE, + 1); + if (ret) { + ath12k_warn(ar->ab, "Unable to authorize peer %pM vdev %d: %d\n", + sta->addr, arvif->vdev_id, ret); + return ret; + } + } + + return 0; +} + +static int ath12k_mac_station_remove(struct ath12k *ar, + struct ath12k_link_vif *arvif, + struct ath12k_link_sta *arsta) +{ + struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); + struct ath12k_vif *ahvif = arvif->ahvif; + int ret; + + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + + wiphy_work_cancel(ar->ah->hw->wiphy, &arsta->update_wk); + + if (ahvif->vdev_type == WMI_VDEV_TYPE_STA) { + ath12k_bss_disassoc(ar, arvif); + ret = ath12k_mac_vdev_stop(arvif); + if (ret) + ath12k_warn(ar->ab, "failed to stop vdev %i: %d\n", + arvif->vdev_id, ret); + } + + ath12k_dp_peer_cleanup(ar, arvif->vdev_id, sta->addr); + + ret = ath12k_peer_delete(ar, arvif->vdev_id, sta->addr); + if (ret) + ath12k_warn(ar->ab, "Failed to delete peer: %pM for VDEV: %d\n", + sta->addr, arvif->vdev_id); + else + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "Removed peer: %pM for VDEV: %d\n", + sta->addr, arvif->vdev_id); + + ath12k_mac_station_post_remove(ar, arvif, arsta); + + return ret; +} + static int ath12k_mac_station_add(struct ath12k *ar, struct ath12k_link_vif *arvif, struct ath12k_link_sta *arsta) @@ -4919,31 +5051,37 @@ static u32 ath12k_mac_ieee80211_sta_bw_to_wmi(struct ath12k *ar, return bw; } -static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - enum ieee80211_sta_state old_state, - enum ieee80211_sta_state new_state) +static int ath12k_mac_handle_link_sta_state(struct ieee80211_hw *hw, + struct ath12k_link_vif *arvif, + struct ath12k_link_sta *arsta, + enum ieee80211_sta_state old_state, + enum ieee80211_sta_state new_state) { - struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); - struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta); - struct ath12k *ar; - struct ath12k_link_vif *arvif; - struct ath12k_link_sta *arsta; - struct ath12k_peer *peer; + struct ath12k *ar = arvif->ar; + struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); + struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); + struct ath12k_sta *ahsta = arsta->ahsta; int ret = 0; lockdep_assert_wiphy(hw->wiphy); - arvif = &ahvif->deflink; - arsta = &ahsta->deflink; + /* IEEE80211_STA_NONE -> IEEE80211_STA_NOTEXIST: Remove the station + * from driver + */ + if ((old_state == IEEE80211_STA_NONE && + new_state == IEEE80211_STA_NOTEXIST)) { + /* ML sta needs separate handling */ + if (sta->mlo) + return 0; - ar = ath12k_get_ar_by_vif(hw, vif); - if (!ar) { - WARN_ON_ONCE(1); - return -EINVAL; + ret = ath12k_mac_station_remove(ar, arvif, arsta); + if (ret) { + ath12k_warn(ar->ab, "Failed to remove station: %pM for VDEV: %d\n", + arsta->addr, arvif->vdev_id); + } } + /* IEEE80211_STA_NOTEXIST -> IEEE80211_STA_NONE: Add new station to driver */ if (old_state == IEEE80211_STA_NOTEXIST && new_state == IEEE80211_STA_NONE) { memset(arsta, 0, sizeof(*arsta)); @@ -4961,56 +5099,16 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, if (ret) ath12k_warn(ar->ab, "Failed to add station: %pM for VDEV: %d\n", sta->addr, arvif->vdev_id); - } else if ((old_state == IEEE80211_STA_NONE && - new_state == IEEE80211_STA_NOTEXIST)) { - wiphy_work_cancel(hw->wiphy, &arsta->update_wk); - if (ahvif->vdev_type == WMI_VDEV_TYPE_STA) { - ath12k_bss_disassoc(ar, arvif); - ret = ath12k_mac_vdev_stop(arvif); - if (ret) - ath12k_warn(ar->ab, "failed to stop vdev %i: %d\n", - arvif->vdev_id, ret); - } - ath12k_dp_peer_cleanup(ar, arvif->vdev_id, sta->addr); - - ret = ath12k_peer_delete(ar, arvif->vdev_id, sta->addr); - if (ret) - ath12k_warn(ar->ab, "Failed to delete peer: %pM for VDEV: %d\n", - sta->addr, arvif->vdev_id); - else - ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "Removed peer: %pM for VDEV: %d\n", - sta->addr, arvif->vdev_id); - - ath12k_mac_dec_num_stations(arvif, arsta); - spin_lock_bh(&ar->ab->base_lock); - peer = ath12k_peer_find(ar->ab, arvif->vdev_id, sta->addr); - if (peer && peer->sta == sta) { - ath12k_warn(ar->ab, "Found peer entry %pM n vdev %i after it was supposedly removed\n", - vif->addr, arvif->vdev_id); - peer->sta = NULL; - list_del(&peer->list); - kfree(peer); - ar->num_peers--; - } - spin_unlock_bh(&ar->ab->base_lock); - - kfree(arsta->rx_stats); - arsta->rx_stats = NULL; - - if (arsta->link_id < IEEE80211_MLD_MAX_NUM_LINKS) { - rcu_assign_pointer(ahsta->link[arsta->link_id], NULL); - synchronize_rcu(); - ahsta->links_map &= ~(BIT(arsta->link_id)); - arsta->link_id = ATH12K_INVALID_LINK_ID; - arsta->ahsta = NULL; - } + /* IEEE80211_STA_AUTH -> IEEE80211_STA_ASSOC: Send station assoc command for + * peer associated to AP/Mesh/ADHOC vif type. + */ } else if (old_state == IEEE80211_STA_AUTH && new_state == IEEE80211_STA_ASSOC && (vif->type == NL80211_IFTYPE_AP || vif->type == NL80211_IFTYPE_MESH_POINT || vif->type == NL80211_IFTYPE_ADHOC)) { - ret = ath12k_station_assoc(ar, arvif, arsta, false); + ret = ath12k_mac_station_assoc(ar, arvif, arsta, false); if (ret) ath12k_warn(ar->ab, "Failed to associate station: %pM\n", sta->addr); @@ -5021,40 +5119,32 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, arsta->bw_prev = sta->deflink.bandwidth; spin_unlock_bh(&ar->data_lock); + + /* IEEE80211_STA_ASSOC -> IEEE80211_STA_AUTHORIZED: set peer status as + * authorized + */ } else if (old_state == IEEE80211_STA_ASSOC && new_state == IEEE80211_STA_AUTHORIZED) { - spin_lock_bh(&ar->ab->base_lock); + ret = ath12k_mac_station_authorize(ar, arvif, arsta); + if (ret) + ath12k_warn(ar->ab, "Failed to authorize station: %pM\n", + sta->addr); - peer = ath12k_peer_find(ar->ab, arvif->vdev_id, sta->addr); - if (peer) - peer->is_authorized = true; - - spin_unlock_bh(&ar->ab->base_lock); - - if (vif->type == NL80211_IFTYPE_STATION && arvif->is_up) { - ret = ath12k_wmi_set_peer_param(ar, sta->addr, - arvif->vdev_id, - WMI_PEER_AUTHORIZE, - 1); - if (ret) - ath12k_warn(ar->ab, "Unable to authorize peer %pM vdev %d: %d\n", - sta->addr, arvif->vdev_id, ret); - } + /* IEEE80211_STA_AUTHORIZED -> IEEE80211_STA_ASSOC: station may be in removal, + * deauthorize it. + */ } else if (old_state == IEEE80211_STA_AUTHORIZED && new_state == IEEE80211_STA_ASSOC) { - spin_lock_bh(&ar->ab->base_lock); - - peer = ath12k_peer_find(ar->ab, arvif->vdev_id, sta->addr); - if (peer) - peer->is_authorized = false; - - spin_unlock_bh(&ar->ab->base_lock); + ath12k_mac_station_unauthorize(ar, arvif, arsta); + /* IEEE80211_STA_ASSOC -> IEEE80211_STA_AUTH: disassoc peer connected to + * AP/mesh/ADHOC vif type. + */ } else if (old_state == IEEE80211_STA_ASSOC && new_state == IEEE80211_STA_AUTH && (vif->type == NL80211_IFTYPE_AP || vif->type == NL80211_IFTYPE_MESH_POINT || vif->type == NL80211_IFTYPE_ADHOC)) { - ret = ath12k_station_disassoc(ar, arvif, arsta); + ret = ath12k_mac_station_disassoc(ar, arvif, arsta); if (ret) ath12k_warn(ar->ab, "Failed to disassociate station: %pM\n", sta->addr); @@ -5063,6 +5153,55 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, return ret; } +static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + enum ieee80211_sta_state old_state, + enum ieee80211_sta_state new_state) +{ + struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); + struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta); + struct ath12k_link_vif *arvif; + struct ath12k_link_sta *arsta; + int ret; + u8 link_id = 0; + + lockdep_assert_wiphy(hw->wiphy); + + if (ieee80211_vif_is_mld(vif) && sta->valid_links) { + WARN_ON(!sta->mlo && hweight16(sta->valid_links) != 1); + link_id = ffs(sta->valid_links) - 1; + } + + /* Handle for non-ML station */ + if (!sta->mlo) { + arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); + arsta = &ahsta->deflink; + arsta->ahsta = ahsta; + + if (WARN_ON(!arvif || !arsta)) { + ret = -EINVAL; + goto exit; + } + + /* vdev might be in deleted */ + if (WARN_ON(!arvif->ar)) { + ret = -EINVAL; + goto exit; + } + + ret = ath12k_mac_handle_link_sta_state(hw, arvif, arsta, + old_state, new_state); + if (ret) + goto exit; + } + + ret = 0; + +exit: + return ret; +} + static int ath12k_mac_op_sta_set_txpwr(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta) From a2189d2b8005cd9f3a440512af087eb9b62c103e Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Fri, 1 Nov 2024 17:17:01 +0200 Subject: [PATCH 0007/1386] wifi: ath12k: introduce ath12k_hw_warn() In the following patch we need to use ath12k_warn() but don't easily have access to struct ath12k_base (ab) but do have access to struct ath12k_hw (ah). So add a new warning helper ath12_hw_warn() which takes the latter but the log output is still identical but uses the struct device pointer stored to struct ath12k_hw. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Kalle Valo Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241101151705.165987-5-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 2 ++ drivers/net/wireless/ath/ath12k/debug.c | 6 +++--- drivers/net/wireless/ath/ath12k/debug.h | 5 ++++- drivers/net/wireless/ath/ath12k/mac.c | 2 ++ 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 23e27d119859..e084e231f753 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -691,6 +691,8 @@ struct ath12k { struct ath12k_hw { struct ieee80211_hw *hw; + struct device *dev; + /* Protect the write operation of the hardware state ath12k_hw::state * between hardware start<=>reconfigure<=>stop transitions. */ diff --git a/drivers/net/wireless/ath/ath12k/debug.c b/drivers/net/wireless/ath/ath12k/debug.c index fe5a732ba9ec..ff6eaeafa092 100644 --- a/drivers/net/wireless/ath/ath12k/debug.c +++ b/drivers/net/wireless/ath/ath12k/debug.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -36,7 +36,7 @@ void ath12k_err(struct ath12k_base *ab, const char *fmt, ...) va_end(args); } -void ath12k_warn(struct ath12k_base *ab, const char *fmt, ...) +void __ath12k_warn(struct device *dev, const char *fmt, ...) { struct va_format vaf = { .fmt = fmt, @@ -45,7 +45,7 @@ void ath12k_warn(struct ath12k_base *ab, const char *fmt, ...) va_start(args, fmt); vaf.va = &args; - dev_warn_ratelimited(ab->dev, "%pV", &vaf); + dev_warn_ratelimited(dev, "%pV", &vaf); /* TODO: Trace the log */ va_end(args); } diff --git a/drivers/net/wireless/ath/ath12k/debug.h b/drivers/net/wireless/ath/ath12k/debug.h index f7005917362c..90e801136bc6 100644 --- a/drivers/net/wireless/ath/ath12k/debug.h +++ b/drivers/net/wireless/ath/ath12k/debug.h @@ -31,7 +31,10 @@ enum ath12k_debug_mask { __printf(2, 3) void ath12k_info(struct ath12k_base *ab, const char *fmt, ...); __printf(2, 3) void ath12k_err(struct ath12k_base *ab, const char *fmt, ...); -__printf(2, 3) void ath12k_warn(struct ath12k_base *ab, const char *fmt, ...); +__printf(2, 3) void __ath12k_warn(struct device *dev, const char *fmt, ...); + +#define ath12k_warn(ab, fmt, ...) __ath12k_warn((ab)->dev, fmt, ##__VA_ARGS__) +#define ath12k_hw_warn(ah, fmt, ...) __ath12k_warn((ah)->dev, fmt, ##__VA_ARGS__) extern unsigned int ath12k_debug_mask; diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 73a80b55b229..5b8d4aa0eefd 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -10179,6 +10179,8 @@ int ath12k_mac_allocate(struct ath12k_base *ab) goto err; } + ah->dev = ab->dev; + ab->ah[i] = ah; } From 7fd8b4cbde65bf65d32e1a6615ae8160cf305ef8 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Fri, 1 Nov 2024 17:17:02 +0200 Subject: [PATCH 0008/1386] wifi: ath12k: Add helpers for multi link peer creation and deletion Add helper functions for multi link peer addition and deletion. And add address validation to ensure we are not creating link peers (belonging to different clients) with same MLD address. To aid in this validation for faster lookup, add a new list of ML peers to struct ath12k_hw::ml_peers and use the same for parsing for the above address validation use cases. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241101151705.165987-6-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 13 ++++ drivers/net/wireless/ath/ath12k/mac.c | 2 + drivers/net/wireless/ath/ath12k/peer.c | 98 ++++++++++++++++++++++++++ drivers/net/wireless/ath/ath12k/peer.h | 8 +++ 4 files changed, 121 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index e084e231f753..7324dae3fcb8 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -63,6 +63,13 @@ #define ATH12K_RECONFIGURE_TIMEOUT_HZ (10 * HZ) #define ATH12K_RECOVER_START_TIMEOUT_HZ (20 * HZ) +#define ATH12K_MAX_SOCS 3 +#define ATH12K_INVALID_GROUP_ID 0xFF +#define ATH12K_INVALID_DEVICE_ID 0xFF + +#define ATH12K_MAX_MLO_PEERS 256 +#define ATH12K_MLO_PEER_ID_INVALID 0xFFFF + enum ath12k_bdf_search { ATH12K_BDF_SEARCH_DEFAULT, ATH12K_BDF_SEARCH_BUS_AND_BOARD, @@ -496,6 +503,7 @@ struct ath12k_sta { struct ath12k_link_sta __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; /* indicates bitmap of link sta created in FW */ u16 links_map; + u16 ml_peer_id; }; #define ATH12K_MIN_5G_FREQ 4150 @@ -703,6 +711,11 @@ struct ath12k_hw { u8 num_radio; + DECLARE_BITMAP(free_ml_peer_id_map, ATH12K_MAX_MLO_PEERS); + + /* protected by wiphy_lock() */ + struct list_head ml_peers; + /* Keep last */ struct ath12k radio[] __aligned(sizeof(void *)); }; diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 5b8d4aa0eefd..61d05ffa364a 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -5091,6 +5091,7 @@ static int ath12k_mac_handle_link_sta_state(struct ieee80211_hw *hw, ahsta->links_map = BIT(arsta->link_id); arsta->ahsta = ahsta; arsta->arvif = arvif; + ether_addr_copy(arsta->addr, sta->addr); wiphy_work_init(&arsta->update_wk, ath12k_sta_rc_update_wk); synchronize_rcu(); @@ -10110,6 +10111,7 @@ static struct ath12k_hw *ath12k_mac_hw_allocate(struct ath12k_base *ab, ah->num_radio = num_pdev_map; mutex_init(&ah->hw_mutex); + INIT_LIST_HEAD(&ah->ml_peers); for (i = 0; i < num_pdev_map; i++) { ab = pdev_map[i].ab; diff --git a/drivers/net/wireless/ath/ath12k/peer.c b/drivers/net/wireless/ath/ath12k/peer.c index 7a62665b8af9..2ad19baf0664 100644 --- a/drivers/net/wireless/ath/ath12k/peer.c +++ b/drivers/net/wireless/ath/ath12k/peer.c @@ -8,6 +8,22 @@ #include "peer.h" #include "debug.h" +static struct ath12k_ml_peer *ath12k_peer_ml_find(struct ath12k_hw *ah, const u8 *addr) +{ + struct ath12k_ml_peer *ml_peer; + + lockdep_assert_wiphy(ah->hw->wiphy); + + list_for_each_entry(ml_peer, &ah->ml_peers, list) { + if (!ether_addr_equal(ml_peer->addr, addr)) + continue; + + return ml_peer; + } + + return NULL; +} + struct ath12k_peer *ath12k_peer_find(struct ath12k_base *ab, int vdev_id, const u8 *addr) { @@ -341,3 +357,85 @@ int ath12k_peer_create(struct ath12k *ar, struct ath12k_link_vif *arvif, return 0; } + +static u16 ath12k_peer_ml_alloc(struct ath12k_hw *ah) +{ + u16 ml_peer_id; + + lockdep_assert_wiphy(ah->hw->wiphy); + + for (ml_peer_id = 0; ml_peer_id < ATH12K_MAX_MLO_PEERS; ml_peer_id++) { + if (test_bit(ml_peer_id, ah->free_ml_peer_id_map)) + continue; + + set_bit(ml_peer_id, ah->free_ml_peer_id_map); + break; + } + + if (ml_peer_id == ATH12K_MAX_MLO_PEERS) + ml_peer_id = ATH12K_MLO_PEER_ID_INVALID; + + return ml_peer_id; +} + +int ath12k_peer_ml_create(struct ath12k_hw *ah, struct ieee80211_sta *sta) +{ + struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta); + struct ath12k_ml_peer *ml_peer; + + lockdep_assert_wiphy(ah->hw->wiphy); + + if (!sta->mlo) + return -EINVAL; + + ml_peer = ath12k_peer_ml_find(ah, sta->addr); + if (ml_peer) { + ath12k_hw_warn(ah, "ML peer %d exists already, unable to add new entry for %pM", + ml_peer->id, sta->addr); + return -EEXIST; + } + + ml_peer = kzalloc(sizeof(*ml_peer), GFP_ATOMIC); + if (!ml_peer) + return -ENOMEM; + + ahsta->ml_peer_id = ath12k_peer_ml_alloc(ah); + + if (ahsta->ml_peer_id == ATH12K_MLO_PEER_ID_INVALID) { + ath12k_hw_warn(ah, "unable to allocate ML peer id for sta %pM", + sta->addr); + kfree(ml_peer); + return -ENOMEM; + } + + ether_addr_copy(ml_peer->addr, sta->addr); + ml_peer->id = ahsta->ml_peer_id; + list_add(&ml_peer->list, &ah->ml_peers); + + return 0; +} + +int ath12k_peer_ml_delete(struct ath12k_hw *ah, struct ieee80211_sta *sta) +{ + struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta); + struct ath12k_ml_peer *ml_peer; + + lockdep_assert_wiphy(ah->hw->wiphy); + + if (!sta->mlo) + return -EINVAL; + + clear_bit(ahsta->ml_peer_id, ah->free_ml_peer_id_map); + ahsta->ml_peer_id = ATH12K_MLO_PEER_ID_INVALID; + + ml_peer = ath12k_peer_ml_find(ah, sta->addr); + if (!ml_peer) { + ath12k_hw_warn(ah, "ML peer for %pM not found", sta->addr); + return -EINVAL; + } + + list_del(&ml_peer->list); + kfree(ml_peer); + + return 0; +} diff --git a/drivers/net/wireless/ath/ath12k/peer.h b/drivers/net/wireless/ath/ath12k/peer.h index b955f0cdf598..085246ca938d 100644 --- a/drivers/net/wireless/ath/ath12k/peer.h +++ b/drivers/net/wireless/ath/ath12k/peer.h @@ -49,6 +49,12 @@ struct ath12k_peer { bool dp_setup_done; }; +struct ath12k_ml_peer { + struct list_head list; + u8 addr[ETH_ALEN]; + u16 id; +}; + void ath12k_peer_unmap_event(struct ath12k_base *ab, u16 peer_id); void ath12k_peer_map_event(struct ath12k_base *ab, u8 vdev_id, u16 peer_id, u8 *mac_addr, u16 ast_hash, u16 hw_peer_id); @@ -66,5 +72,7 @@ int ath12k_wait_for_peer_delete_done(struct ath12k *ar, u32 vdev_id, const u8 *addr); bool ath12k_peer_exist_by_vdev_id(struct ath12k_base *ab, int vdev_id); struct ath12k_peer *ath12k_peer_find_by_ast(struct ath12k_base *ab, int ast_hash); +int ath12k_peer_ml_create(struct ath12k_hw *ah, struct ieee80211_sta *sta); +int ath12k_peer_ml_delete(struct ath12k_hw *ah, struct ieee80211_sta *sta); #endif /* _PEER_H_ */ From 0660e1e2ed5ff493f1e383a32d28db2b7d8490f7 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Fri, 1 Nov 2024 17:17:03 +0200 Subject: [PATCH 0009/1386] wifi: ath12k: add multi-link flag in peer create command Driver should indicate to firmware whether a peer is multi-link or not in peer create command using multi-link flag. Add changes to support WMI_TAG_MLO_PEER_CREATE_PARAMS in WMI_PEER_CREATE_CMDID. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241101151705.165987-7-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 5 +++-- drivers/net/wireless/ath/ath12k/wmi.c | 27 +++++++++++++++++++++++---- drivers/net/wireless/ath/ath12k/wmi.h | 6 ++++++ 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 61d05ffa364a..076e4da4875f 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4969,8 +4969,9 @@ static int ath12k_mac_station_add(struct ath12k *ar, } peer_param.vdev_id = arvif->vdev_id; - peer_param.peer_addr = sta->addr; + peer_param.peer_addr = arsta->addr; peer_param.peer_type = WMI_PEER_TYPE_DEFAULT; + peer_param.ml_enabled = sta->mlo; ret = ath12k_peer_create(ar, arvif, sta, &peer_param); if (ret) { @@ -7005,7 +7006,7 @@ int ath12k_mac_vdev_create(struct ath12k *ar, struct ath12k_link_vif *arvif) struct ath12k_vif *ahvif = arvif->ahvif; struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif); struct ath12k_wmi_vdev_create_arg vdev_arg = {0}; - struct ath12k_wmi_peer_create_arg peer_param; + struct ath12k_wmi_peer_create_arg peer_param = {0}; struct ieee80211_bss_conf *link_conf; u32 param_id, param_value; u16 nss; diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index e089b58bbea1..0583d832fac7 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -1230,9 +1230,14 @@ int ath12k_wmi_send_peer_create_cmd(struct ath12k *ar, struct ath12k_wmi_pdev *wmi = ar->wmi; struct wmi_peer_create_cmd *cmd; struct sk_buff *skb; - int ret; + int ret, len; + struct wmi_peer_create_mlo_params *ml_param; + void *ptr; + struct wmi_tlv *tlv; - skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, sizeof(*cmd)); + len = sizeof(*cmd) + TLV_HDR_SIZE + sizeof(*ml_param); + + skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, len); if (!skb) return -ENOMEM; @@ -1244,9 +1249,23 @@ int ath12k_wmi_send_peer_create_cmd(struct ath12k *ar, cmd->peer_type = cpu_to_le32(arg->peer_type); cmd->vdev_id = cpu_to_le32(arg->vdev_id); + ptr = skb->data + sizeof(*cmd); + tlv = ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_STRUCT, + sizeof(*ml_param)); + ptr += TLV_HDR_SIZE; + ml_param = ptr; + ml_param->tlv_header = + ath12k_wmi_tlv_cmd_hdr(WMI_TAG_MLO_PEER_CREATE_PARAMS, + sizeof(*ml_param)); + if (arg->ml_enabled) + ml_param->flags = cpu_to_le32(ATH12K_WMI_FLAG_MLO_ENABLED); + + ptr += sizeof(*ml_param); + ath12k_dbg(ar->ab, ATH12K_DBG_WMI, - "WMI peer create vdev_id %d peer_addr %pM\n", - arg->vdev_id, arg->peer_addr); + "WMI peer create vdev_id %d peer_addr %pM ml_flags 0x%x\n", + arg->vdev_id, arg->peer_addr, ml_param->flags); ret = ath12k_wmi_cmd_send(wmi, skb, WMI_PEER_CREATE_CMDID); if (ret) { diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 0ddd7ce97385..2378d94b2409 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -3015,6 +3015,12 @@ struct ath12k_wmi_peer_create_arg { const u8 *peer_addr; u32 peer_type; u32 vdev_id; + bool ml_enabled; +}; + +struct wmi_peer_create_mlo_params { + __le32 tlv_header; + __le32 flags; }; struct ath12k_wmi_pdev_set_regdomain_arg { From c20dbc8c68b38fe702e9fbc4748aa117194f8963 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Fri, 1 Nov 2024 17:17:04 +0200 Subject: [PATCH 0010/1386] wifi: ath12k: add helper to find multi-link station Multi-link stations are identified in driver using the multi-link peer id and they have ATH12K_PEER_ML_ID_VALID bit set in the id. Add a helper to find multi-link station using the multi-link peer id. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241101151705.165987-8-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/peer.c | 17 +++++++++++++++++ drivers/net/wireless/ath/ath12k/peer.h | 4 ++++ 2 files changed, 21 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/peer.c b/drivers/net/wireless/ath/ath12k/peer.c index 2ad19baf0664..0e86847edd6e 100644 --- a/drivers/net/wireless/ath/ath12k/peer.c +++ b/drivers/net/wireless/ath/ath12k/peer.c @@ -79,6 +79,20 @@ struct ath12k_peer *ath12k_peer_find_by_addr(struct ath12k_base *ab, return NULL; } +static struct ath12k_peer *ath12k_peer_find_by_ml_id(struct ath12k_base *ab, + int ml_peer_id) +{ + struct ath12k_peer *peer; + + lockdep_assert_held(&ab->base_lock); + + list_for_each_entry(peer, &ab->peers, list) + if (ml_peer_id == peer->ml_id) + return peer; + + return NULL; +} + struct ath12k_peer *ath12k_peer_find_by_id(struct ath12k_base *ab, int peer_id) { @@ -86,6 +100,9 @@ struct ath12k_peer *ath12k_peer_find_by_id(struct ath12k_base *ab, lockdep_assert_held(&ab->base_lock); + if (peer_id & ATH12K_PEER_ML_ID_VALID) + return ath12k_peer_find_by_ml_id(ab, peer_id); + list_for_each_entry(peer, &ab->peers, list) if (peer_id == peer->peer_id) return peer; diff --git a/drivers/net/wireless/ath/ath12k/peer.h b/drivers/net/wireless/ath/ath12k/peer.h index 085246ca938d..c28aca5d88a0 100644 --- a/drivers/net/wireless/ath/ath12k/peer.h +++ b/drivers/net/wireless/ath/ath12k/peer.h @@ -19,6 +19,8 @@ struct ppdu_user_delayba { u32 resp_rate_flags; }; +#define ATH12K_PEER_ML_ID_VALID BIT(13) + struct ath12k_peer { struct list_head list; struct ieee80211_sta *sta; @@ -47,6 +49,8 @@ struct ath12k_peer { /* protected by ab->data_lock */ bool dp_setup_done; + + u16 ml_id; }; struct ath12k_ml_peer { From 507f8e730100822b75290bbf96135f0e789da9cc Mon Sep 17 00:00:00 2001 From: Sriram R Date: Fri, 1 Nov 2024 17:17:05 +0200 Subject: [PATCH 0011/1386] wifi: ath12k: Add MLO peer assoc command support Add changes to send MLO peer assoc command with partner link details and primary umac details. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241101151705.165987-9-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 7 +++ drivers/net/wireless/ath/ath12k/mac.c | 62 ++++++++++++++++++++ drivers/net/wireless/ath/ath12k/wmi.c | 79 ++++++++++++++++++++++++-- drivers/net/wireless/ath/ath12k/wmi.h | 46 +++++++++++++++ 4 files changed, 188 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 7324dae3fcb8..8dbdf6818f58 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -495,9 +495,16 @@ struct ath12k_link_sta { struct ath12k_rx_peer_stats *rx_stats; struct ath12k_wbm_tx_stats *wbm_tx_stats; u32 bw_prev; + + /* For now the assoc link will be considered primary */ + bool is_assoc_link; + + /* for firmware use only */ + u8 link_idx; }; struct ath12k_sta { + struct ath12k_vif *ahvif; enum hal_pn_type pn_type; struct ath12k_link_sta deflink; struct ath12k_link_sta __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 076e4da4875f..ad27a2552a2c 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -2859,6 +2859,67 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, arg->punct_bitmap = ~arvif->punct_bitmap; } +static void ath12k_peer_assoc_h_mlo(struct ath12k_link_sta *arsta, + struct ath12k_wmi_peer_assoc_arg *arg) +{ + struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); + struct peer_assoc_mlo_params *ml = &arg->ml; + struct ath12k_sta *ahsta = arsta->ahsta; + struct ath12k_link_sta *arsta_p; + struct ath12k_link_vif *arvif; + unsigned long links; + u8 link_id; + int i; + + if (!sta->mlo || ahsta->ml_peer_id == ATH12K_MLO_PEER_ID_INVALID) + return; + + ml->enabled = true; + ml->assoc_link = arsta->is_assoc_link; + + /* For now considering the primary umac based on assoc link */ + ml->primary_umac = arsta->is_assoc_link; + ml->peer_id_valid = true; + ml->logical_link_idx_valid = true; + + ether_addr_copy(ml->mld_addr, sta->addr); + ml->logical_link_idx = arsta->link_idx; + ml->ml_peer_id = ahsta->ml_peer_id; + ml->ieee_link_id = arsta->link_id; + ml->num_partner_links = 0; + links = ahsta->links_map; + + rcu_read_lock(); + + i = 0; + + for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { + if (i >= ATH12K_WMI_MLO_MAX_LINKS) + break; + + arsta_p = rcu_dereference(ahsta->link[link_id]); + arvif = rcu_dereference(ahsta->ahvif->link[link_id]); + + if (arsta_p == arsta) + continue; + + if (!arvif->is_started) + continue; + + ml->partner_info[i].vdev_id = arvif->vdev_id; + ml->partner_info[i].hw_link_id = arvif->ar->pdev->hw_link_id; + ml->partner_info[i].assoc_link = arsta_p->is_assoc_link; + ml->partner_info[i].primary_umac = arsta_p->is_assoc_link; + ml->partner_info[i].logical_link_idx_valid = true; + ml->partner_info[i].logical_link_idx = arsta_p->link_idx; + ml->num_partner_links++; + + i++; + } + + rcu_read_unlock(); +} + static void ath12k_peer_assoc_prepare(struct ath12k *ar, struct ath12k_link_vif *arvif, struct ath12k_link_sta *arsta, @@ -2883,6 +2944,7 @@ static void ath12k_peer_assoc_prepare(struct ath12k *ar, ath12k_peer_assoc_h_qos(ar, arvif, arsta, arg); ath12k_peer_assoc_h_phymode(ar, arvif, arsta, arg); ath12k_peer_assoc_h_smps(arsta, arg); + ath12k_peer_assoc_h_mlo(arsta, arg); /* TODO: amsdu_disable req? */ } diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 0583d832fac7..50ed7e72f178 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -2101,12 +2101,15 @@ int ath12k_wmi_send_peer_assoc_cmd(struct ath12k *ar, struct ath12k_wmi_vht_rate_set_params *mcs; struct ath12k_wmi_he_rate_set_params *he_mcs; struct ath12k_wmi_eht_rate_set_params *eht_mcs; + struct wmi_peer_assoc_mlo_params *ml_params; + struct wmi_peer_assoc_mlo_partner_info_params *partner_info; struct sk_buff *skb; struct wmi_tlv *tlv; void *ptr; u32 peer_legacy_rates_align; u32 peer_ht_rates_align; int i, ret, len; + __le32 v; peer_legacy_rates_align = roundup(arg->peer_legacy_rates.num_rates, sizeof(u32)); @@ -2118,8 +2121,13 @@ int ath12k_wmi_send_peer_assoc_cmd(struct ath12k *ar, TLV_HDR_SIZE + (peer_ht_rates_align * sizeof(u8)) + sizeof(*mcs) + TLV_HDR_SIZE + (sizeof(*he_mcs) * arg->peer_he_mcs_count) + - TLV_HDR_SIZE + (sizeof(*eht_mcs) * arg->peer_eht_mcs_count) + - TLV_HDR_SIZE + TLV_HDR_SIZE; + TLV_HDR_SIZE + (sizeof(*eht_mcs) * arg->peer_eht_mcs_count); + + if (arg->ml.enabled) + len += TLV_HDR_SIZE + sizeof(*ml_params) + + TLV_HDR_SIZE + (arg->ml.num_partner_links * sizeof(*partner_info)); + else + len += (2 * TLV_HDR_SIZE); skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, len); if (!skb) @@ -2243,12 +2251,38 @@ int ath12k_wmi_send_peer_assoc_cmd(struct ath12k *ar, ptr += sizeof(*he_mcs); } - /* MLO header tag with 0 length */ - len = 0; tlv = ptr; + len = arg->ml.enabled ? sizeof(*ml_params) : 0; tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_STRUCT, len); ptr += TLV_HDR_SIZE; + if (!len) + goto skip_ml_params; + ml_params = ptr; + ml_params->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_MLO_PEER_ASSOC_PARAMS, + len); + ml_params->flags = cpu_to_le32(ATH12K_WMI_FLAG_MLO_ENABLED); + + if (arg->ml.assoc_link) + ml_params->flags |= cpu_to_le32(ATH12K_WMI_FLAG_MLO_ASSOC_LINK); + + if (arg->ml.primary_umac) + ml_params->flags |= cpu_to_le32(ATH12K_WMI_FLAG_MLO_PRIMARY_UMAC); + + if (arg->ml.logical_link_idx_valid) + ml_params->flags |= + cpu_to_le32(ATH12K_WMI_FLAG_MLO_LOGICAL_LINK_IDX_VALID); + + if (arg->ml.peer_id_valid) + ml_params->flags |= cpu_to_le32(ATH12K_WMI_FLAG_MLO_PEER_ID_VALID); + + ether_addr_copy(ml_params->mld_addr.addr, arg->ml.mld_addr); + ml_params->logical_link_idx = cpu_to_le32(arg->ml.logical_link_idx); + ml_params->ml_peer_id = cpu_to_le32(arg->ml.ml_peer_id); + ml_params->ieee_link_id = cpu_to_le32(arg->ml.ieee_link_id); + ptr += sizeof(*ml_params); + +skip_ml_params: /* Loop through the EHT rate set */ len = arg->peer_eht_mcs_count * sizeof(*eht_mcs); tlv = ptr; @@ -2265,12 +2299,45 @@ int ath12k_wmi_send_peer_assoc_cmd(struct ath12k *ar, ptr += sizeof(*eht_mcs); } - /* ML partner links tag with 0 length */ - len = 0; tlv = ptr; + len = arg->ml.enabled ? arg->ml.num_partner_links * sizeof(*partner_info) : 0; + /* fill ML Partner links */ tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_STRUCT, len); ptr += TLV_HDR_SIZE; + if (len == 0) + goto send; + + for (i = 0; i < arg->ml.num_partner_links; i++) { + u32 cmd = WMI_TAG_MLO_PARTNER_LINK_PARAMS_PEER_ASSOC; + + partner_info = ptr; + partner_info->tlv_header = ath12k_wmi_tlv_cmd_hdr(cmd, + sizeof(*partner_info)); + partner_info->vdev_id = cpu_to_le32(arg->ml.partner_info[i].vdev_id); + partner_info->hw_link_id = + cpu_to_le32(arg->ml.partner_info[i].hw_link_id); + partner_info->flags = cpu_to_le32(ATH12K_WMI_FLAG_MLO_ENABLED); + + if (arg->ml.partner_info[i].assoc_link) + partner_info->flags |= + cpu_to_le32(ATH12K_WMI_FLAG_MLO_ASSOC_LINK); + + if (arg->ml.partner_info[i].primary_umac) + partner_info->flags |= + cpu_to_le32(ATH12K_WMI_FLAG_MLO_PRIMARY_UMAC); + + if (arg->ml.partner_info[i].logical_link_idx_valid) { + v = cpu_to_le32(ATH12K_WMI_FLAG_MLO_LINK_ID_VALID); + partner_info->flags |= v; + } + + partner_info->logical_link_idx = + cpu_to_le32(arg->ml.partner_info[i].logical_link_idx); + ptr += sizeof(*partner_info); + } + +send: ath12k_dbg(ar->ab, ATH12K_DBG_WMI, "wmi peer assoc vdev id %d assoc id %d peer mac %pM peer_flags %x rate_caps %x peer_caps %x listen_intval %d ht_caps %x max_mpdu %d nss %d phymode %d peer_mpdu_density %d vht_caps %x he cap_info %x he ops %x he cap_info_ext %x he phy %x %x %x peer_bw_rxnss_override %x peer_flags_ext %x eht mac_cap %x %x eht phy_cap %x %x %x\n", cmd->vdev_id, cmd->peer_associd, arg->peer_mac, diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 2378d94b2409..05aa9754118a 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -3687,6 +3687,24 @@ struct wmi_vdev_install_key_arg { #define WMI_HECAP_TXRX_MCS_NSS_IDX_160 1 #define WMI_HECAP_TXRX_MCS_NSS_IDX_80_80 2 +#define ATH12K_WMI_MLO_MAX_PARTNER_LINKS \ + (ATH12K_WMI_MLO_MAX_LINKS + ATH12K_MAX_NUM_BRIDGE_LINKS - 1) + +struct peer_assoc_mlo_params { + bool enabled; + bool assoc_link; + bool primary_umac; + bool peer_id_valid; + bool logical_link_idx_valid; + bool bridge_peer; + u8 mld_addr[ETH_ALEN]; + u32 logical_link_idx; + u32 ml_peer_id; + u32 ieee_link_id; + u8 num_partner_links; + struct wmi_ml_partner_info partner_info[ATH12K_WMI_MLO_MAX_LINKS]; +}; + struct wmi_rate_set_arg { u32 num_rates; u8 rates[WMI_MAX_SUPPORTED_RATES]; @@ -3761,8 +3779,36 @@ struct ath12k_wmi_peer_assoc_arg { u32 peer_eht_tx_mcs_set[WMI_MAX_EHTCAP_RATE_SET]; struct ath12k_wmi_ppe_threshold_arg peer_eht_ppet; u32 punct_bitmap; + bool is_assoc; + struct peer_assoc_mlo_params ml; }; +#define ATH12K_WMI_FLAG_MLO_ENABLED BIT(0) +#define ATH12K_WMI_FLAG_MLO_ASSOC_LINK BIT(1) +#define ATH12K_WMI_FLAG_MLO_PRIMARY_UMAC BIT(2) +#define ATH12K_WMI_FLAG_MLO_LINK_ID_VALID BIT(3) +#define ATH12K_WMI_FLAG_MLO_PEER_ID_VALID BIT(4) + +struct wmi_peer_assoc_mlo_partner_info_params { + __le32 tlv_header; + __le32 vdev_id; + __le32 hw_link_id; + __le32 flags; + __le32 logical_link_idx; +} __packed; + +struct wmi_peer_assoc_mlo_params { + __le32 tlv_header; + __le32 flags; + struct ath12k_wmi_mac_addr_params mld_addr; + __le32 logical_link_idx; + __le32 ml_peer_id; + __le32 ieee_link_id; + __le32 emlsr_trans_timeout_us; + __le32 emlsr_trans_delay_us; + __le32 emlsr_padding_delay_us; +} __packed; + struct wmi_peer_assoc_complete_cmd { __le32 tlv_header; struct ath12k_wmi_mac_addr_params peer_macaddr; From 1053987a6bce68127504cda476ee56b97c9109d9 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Mon, 11 Nov 2024 13:47:24 +0300 Subject: [PATCH 0012/1386] wifi: ath9k: miscellaneous spelling fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Correct spelling here and there as suggested by codespell. Signed-off-by: Dmitry Antipov Acked-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241111104724.484586-1-dmantipov@yandex.ru Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath9k/antenna.c | 2 +- drivers/net/wireless/ath/ath9k/ar9002_hw.c | 2 +- drivers/net/wireless/ath/ath9k/ar9003_hw.c | 2 +- drivers/net/wireless/ath/ath9k/ar9003_mci.c | 4 ++-- drivers/net/wireless/ath/ath9k/ar9003_phy.h | 2 +- drivers/net/wireless/ath/ath9k/channel.c | 2 +- drivers/net/wireless/ath/ath9k/common-spectral.c | 2 +- drivers/net/wireless/ath/ath9k/dfs.c | 2 +- drivers/net/wireless/ath/ath9k/hif_usb.c | 2 +- drivers/net/wireless/ath/ath9k/hw.c | 4 ++-- drivers/net/wireless/ath/ath9k/hw.h | 2 +- drivers/net/wireless/ath/ath9k/mac.h | 2 +- drivers/net/wireless/ath/ath9k/main.c | 2 +- drivers/net/wireless/ath/ath9k/wow.c | 6 +++--- drivers/net/wireless/ath/ath9k/xmit.c | 2 +- 15 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/antenna.c b/drivers/net/wireless/ath/ath9k/antenna.c index acc84e6711b0..e5e274bc9e68 100644 --- a/drivers/net/wireless/ath/ath9k/antenna.c +++ b/drivers/net/wireless/ath/ath9k/antenna.c @@ -193,7 +193,7 @@ static void ath_lnaconf_alt_good_scan(struct ath_ant_comb *antcomb, static void ath_ant_set_alt_ratio(struct ath_ant_comb *antcomb, struct ath_hw_antcomb_conf *conf) { - /* set alt to the conf with maximun ratio */ + /* set alt to the conf with maximum ratio */ if (antcomb->first_ratio && antcomb->second_ratio) { if (antcomb->rssi_second > antcomb->rssi_third) { /* first alt*/ diff --git a/drivers/net/wireless/ath/ath9k/ar9002_hw.c b/drivers/net/wireless/ath/ath9k/ar9002_hw.c index d08ea0b28530..b26224480041 100644 --- a/drivers/net/wireless/ath/ath9k/ar9002_hw.c +++ b/drivers/net/wireless/ath/ath9k/ar9002_hw.c @@ -395,7 +395,7 @@ static void ar9002_hw_init_hang_checks(struct ath_hw *ah) ah->config.hw_hang_checks |= HW_MAC_HANG; } -/* Sets up the AR5008/AR9001/AR9002 hardware familiy callbacks */ +/* Sets up the AR5008/AR9001/AR9002 hardware family callbacks */ int ar9002_hw_attach_ops(struct ath_hw *ah) { struct ath_hw_private_ops *priv_ops = ath9k_hw_private_ops(ah); diff --git a/drivers/net/wireless/ath/ath9k/ar9003_hw.c b/drivers/net/wireless/ath/ath9k/ar9003_hw.c index e9bd13eeee92..6595eca74997 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_hw.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_hw.c @@ -1170,7 +1170,7 @@ exit: return false; } -/* Sets up the AR9003 hardware familiy callbacks */ +/* Sets up the AR9003 hardware family callbacks */ void ar9003_hw_attach_ops(struct ath_hw *ah) { struct ath_hw_private_ops *priv_ops = ath9k_hw_private_ops(ah); diff --git a/drivers/net/wireless/ath/ath9k/ar9003_mci.c b/drivers/net/wireless/ath/ath9k/ar9003_mci.c index 2b9c07961cd7..3f0543e55d9b 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_mci.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_mci.c @@ -637,7 +637,7 @@ static u32 ar9003_mci_wait_for_gpm(struct ath_hw *ah, u8 gpm_type, * same time. Since BT's calibration doesn't happen * that often, we'll let BT completes calibration then * we continue to wait for cal_grant from BT. - * Orginal: Wait BT_CAL_GRANT. + * Original: Wait BT_CAL_GRANT. * New: Receive BT_CAL_REQ -> send WLAN_CAL_GRANT->wait * BT_CAL_DONE -> Wait BT_CAL_GRANT. */ @@ -747,7 +747,7 @@ int ar9003_mci_end_reset(struct ath_hw *ah, struct ath9k_channel *chan, * BT is sleeping. Check if BT wakes up during * WLAN calibration. If BT wakes up during * WLAN calibration, need to go through all - * message exchanges again and recal. + * message exchanges again and recalibrate. */ REG_WRITE(ah, AR_MCI_INTERRUPT_RX_MSG_RAW, (AR_MCI_INTERRUPT_RX_MSG_REMOTE_RESET | diff --git a/drivers/net/wireless/ath/ath9k/ar9003_phy.h b/drivers/net/wireless/ath/ath9k/ar9003_phy.h index ad72a30b67c3..e13873fb8e2f 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_phy.h +++ b/drivers/net/wireless/ath/ath9k/ar9003_phy.h @@ -246,7 +246,7 @@ /* - * MRC Feild Definitions + * MRC Field Definitions */ #define AR_PHY_SGI_DSC_MAN 0x0007FFF0 #define AR_PHY_SGI_DSC_MAN_S 4 diff --git a/drivers/net/wireless/ath/ath9k/channel.c b/drivers/net/wireless/ath/ath9k/channel.c index 571062f2e82a..02237d106f8c 100644 --- a/drivers/net/wireless/ath/ath9k/channel.c +++ b/drivers/net/wireless/ath/ath9k/channel.c @@ -17,7 +17,7 @@ #include "ath9k.h" /* Set/change channels. If the channel is really being changed, it's done - * by reseting the chip. To accomplish this we must first cleanup any pending + * by resetting the chip. To accomplish this we must first cleanup any pending * DMA, then restart stuff. */ static int ath_set_channel(struct ath_softc *sc) diff --git a/drivers/net/wireless/ath/ath9k/common-spectral.c b/drivers/net/wireless/ath/ath9k/common-spectral.c index 4b27445a5fb8..628eeec4b82f 100644 --- a/drivers/net/wireless/ath/ath9k/common-spectral.c +++ b/drivers/net/wireless/ath/ath9k/common-spectral.c @@ -734,7 +734,7 @@ void ath9k_cmn_spectral_scan_trigger(struct ath_common *common, ATH9K_RX_FILTER_PHYRADAR | ATH9K_RX_FILTER_PHYERR); - /* TODO: usually this should not be neccesary, but for some reason + /* TODO: usually this should not be necessary, but for some reason * (or in some mode?) the trigger must be called after the * configuration, otherwise the register will have its values reset * (on my ar9220 to value 0x01002310) diff --git a/drivers/net/wireless/ath/ath9k/dfs.c b/drivers/net/wireless/ath/ath9k/dfs.c index 3689e12db9f7..2fb73a5e1d51 100644 --- a/drivers/net/wireless/ath/ath9k/dfs.c +++ b/drivers/net/wireless/ath/ath9k/dfs.c @@ -79,7 +79,7 @@ static int ath9k_get_max_index_ht40(struct ath9k_dfs_fft_40 *fft, const int DFS_UPPER_BIN_OFFSET = 64; /* if detected radar on both channels, select the significant one */ if (is_ctl && is_ext) { - /* first check wether channels have 'strong' bins */ + /* first check whether channels have 'strong' bins */ is_ctl = fft_bitmap_weight(fft->lower_bins) != 0; is_ext = fft_bitmap_weight(fft->upper_bins) != 0; diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index 7265766cddbd..fe9abe8cd268 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -1198,7 +1198,7 @@ static int ath9k_hif_request_firmware(struct hif_device_usb *hif_dev, filename = FIRMWARE_AR9271; /* expected fw locations: - * - htc_9271.fw (stable version 1.3, depricated) + * - htc_9271.fw (stable version 1.3, deprecated) */ snprintf(hif_dev->fw_name, sizeof(hif_dev->fw_name), "%s", filename); diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index e2bef099adb3..a25eacabc664 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -2149,7 +2149,7 @@ static void ath9k_set_power_network_sleep(struct ath_hw *ah) /* When chip goes into network sleep, it could be waken * up by MCI_INT interrupt caused by BT's HW messages - * (LNA_xxx, CONT_xxx) which chould be in a very fast + * (LNA_xxx, CONT_xxx) which could be in a very fast * rate (~100us). This will cause chip to leave and * re-enter network sleep mode frequently, which in * consequence will have WLAN MCI HW to generate lots of @@ -2544,7 +2544,7 @@ int ath9k_hw_fill_cap_info(struct ath_hw *ah) pCap->tx_chainmask = ah->eep_ops->get_eeprom(ah, EEP_TX_MASK); /* - * For AR9271 we will temporarilly uses the rx chainmax as read from + * For AR9271 we will temporarily use the rx chainmax as read from * the EEPROM. */ if ((ah->hw_version.devid == AR5416_DEVID_PCI) && diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h index 450ab19b1d4e..e2cbf3f00da0 100644 --- a/drivers/net/wireless/ath/ath9k/hw.h +++ b/drivers/net/wireless/ath/ath9k/hw.h @@ -282,7 +282,7 @@ enum ath9k_hw_caps { * an exact user defined pattern or de-authentication/disassoc pattern. * @ATH9K_HW_WOW_PATTERN_MATCH_DWORD: device requires the first four * bytes of the pattern for user defined pattern, de-authentication and - * disassociation patterns for all types of possible frames recieved + * disassociation patterns for all types of possible frames received * of those types. */ diff --git a/drivers/net/wireless/ath/ath9k/mac.h b/drivers/net/wireless/ath/ath9k/mac.h index f03d792732da..16203e7ecf29 100644 --- a/drivers/net/wireless/ath/ath9k/mac.h +++ b/drivers/net/wireless/ath/ath9k/mac.h @@ -251,7 +251,7 @@ struct ath_desc { * when the descriptor is specifically marked to generate * an interrupt with this flag. Descriptors should be * marked periodically to insure timely replenishing of the - * supply needed for sending frames. Defering interrupts + * supply needed for sending frames. Deferring interrupts * reduces system load and potentially allows more concurrent * work to be done but if done to aggressively can cause * senders to backup. When the hardware queue is left too diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index b92c89dad8de..dd7910782858 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1001,7 +1001,7 @@ static bool ath9k_uses_beacons(int type) static void ath9k_vif_iter_set_beacon(struct ath9k_vif_iter_data *iter_data, struct ieee80211_vif *vif) { - /* Use the first (configured) interface, but prefering AP interfaces. */ + /* Use the first (configured) interface, but preferring AP interfaces. */ if (!iter_data->primary_beacon_vif) { iter_data->primary_beacon_vif = vif; } else { diff --git a/drivers/net/wireless/ath/ath9k/wow.c b/drivers/net/wireless/ath/ath9k/wow.c index 8d0b1730a9d5..ed4152cd44f0 100644 --- a/drivers/net/wireless/ath/ath9k/wow.c +++ b/drivers/net/wireless/ath/ath9k/wow.c @@ -60,7 +60,7 @@ static int ath9k_wow_add_disassoc_deauth_pattern(struct ath_softc *sc) memset(dis_deauth_mask, 0, MAX_PATTERN_SIZE); /* - * Create Dissassociate / Deauthenticate packet filter + * Create Disassociate / Deauthenticate packet filter * * 2 bytes 2 byte 6 bytes 6 bytes 6 bytes * +--------------+----------+---------+--------+--------+---- @@ -70,7 +70,7 @@ static int ath9k_wow_add_disassoc_deauth_pattern(struct ath_softc *sc) * The above is the management frame format for disassociate/ * deauthenticate pattern, from this we need to match the first byte * of 'Frame Control' and DA, SA, and BSSID fields - * (skipping 2nd byte of FC and Duration feild. + * (skipping 2nd byte of FC and Duration field. * * Disassociate pattern * -------------------- @@ -225,7 +225,7 @@ int ath9k_suspend(struct ieee80211_hw *hw, ath9k_stop_btcoex(sc); /* - * Enable wake up on recieving disassoc/deauth + * Enable wake up on receiving disassoc/deauth * frame by default. */ ret = ath9k_wow_add_disassoc_deauth_pattern(sc); diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 35aa47a9db90..0a24439dd30d 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -557,7 +557,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, /* * AR5416 can become deaf/mute when BA * issue happens. Chip needs to be reset. - * But AP code may have sychronization issues + * But AP code may have synchronization issues * when perform internal reset in this routine. * Only enable reset in STA mode for now. */ From eb8c0534713865d190856f10bfc97cf0b88475b1 Mon Sep 17 00:00:00 2001 From: Karol Przybylski Date: Tue, 5 Nov 2024 11:11:31 +0100 Subject: [PATCH 0013/1386] wifi: ath12k: Fix for out-of bound access error Selfgen stats are placed in a buffer using print_array_to_buf_index() function. Array length parameter passed to the function is too big, resulting in possible out-of bound memory error. Decreasing buffer size by one fixes faulty upper bound of passed array. Discovered in coverity scan, CID 1600742 and CID 1600758 Signed-off-by: Karol Przybylski Acked-by: Kalle Valo Link: https://patch.msgid.link/20241105101132.374372-1-karprzy7@gmail.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c index c9980c0193d1..43ea87e981f4 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c @@ -1562,7 +1562,8 @@ ath12k_htt_print_tx_selfgen_ac_stats_tlv(const void *tag_buf, u16 tag_len, le32_to_cpu(htt_stats_buf->ac_mu_mimo_ndp)); len += print_array_to_buf_index(buf, len, "ac_mu_mimo_brpollX_tried = ", 1, htt_stats_buf->ac_mu_mimo_brpoll, - ATH12K_HTT_TX_NUM_AC_MUMIMO_USER_STATS, "\n\n"); + ATH12K_HTT_TX_NUM_AC_MUMIMO_USER_STATS - 1, + "\n\n"); stats_req->buf_len = len; } @@ -1590,7 +1591,7 @@ ath12k_htt_print_tx_selfgen_ax_stats_tlv(const void *tag_buf, u16 tag_len, le32_to_cpu(htt_stats_buf->ax_mu_mimo_ndp)); len += print_array_to_buf_index(buf, len, "ax_mu_mimo_brpollX_tried = ", 1, htt_stats_buf->ax_mu_mimo_brpoll, - ATH12K_HTT_TX_NUM_AX_MUMIMO_USER_STATS, "\n"); + ATH12K_HTT_TX_NUM_AX_MUMIMO_USER_STATS - 1, "\n"); len += scnprintf(buf + len, buf_len - len, "ax_basic_trigger = %u\n", le32_to_cpu(htt_stats_buf->ax_basic_trigger)); len += scnprintf(buf + len, buf_len - len, "ax_ulmumimo_total_trigger = %u\n", From 78e154d42f2c72905fe66a400847e1b2b101b7b2 Mon Sep 17 00:00:00 2001 From: Balaji Pothunoori Date: Wed, 30 Oct 2024 17:16:25 +0530 Subject: [PATCH 0014/1386] wifi: ath11k: Fix unexpected return buffer manager error for WCN6750/WCN6855 The following error messages were encountered while parsing fragmented RX packets for WCN6750/WCN6855: ath11k 17a10040.wifi: invalid return buffer manager 4 This issue arose due to a hardcoded check for HAL_RX_BUF_RBM_SW3_BM introduced in 'commit 71c748b5e01e ("ath11k: Fix unexpected return buffer manager error for QCA6390")' For WCN6750 and WCN6855, the return buffer manager ID should be HAL_RX_BUF_RBM_SW1_BM. The incorrect conditional check caused fragmented packets to be dropped, resulting in the above error log. Fix this by adding a check for HAL_RX_BUF_RBM_SW1_BM. Tested-on: WCN6750 hw1.0 AHB WLAN.MSL.2.0.c2-00258-QCAMSLSWPL-1 Tested-on: WCN6855 hw2.1 WLAN.HSP.1.1-04479-QCAHSPSWPL_V1_V2_SILICONZ_IOE-1 Fixes: 71c748b5e01e ("ath11k: Fix unexpected return buffer manager error for QCA6390") Signed-off-by: Balaji Pothunoori Acked-by: Jeff Johnson Acked-by: Kalle Valo Link: https://patch.msgid.link/20241030114625.2416942-1-quic_bpothuno@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/dp_rx.c | 1 + drivers/net/wireless/ath/ath11k/hal_rx.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index c087d8a0f5b2..176bbc5d95a6 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -3872,6 +3872,7 @@ int ath11k_dp_process_rx_err(struct ath11k_base *ab, struct napi_struct *napi, ath11k_hal_rx_msdu_link_info_get(link_desc_va, &num_msdus, msdu_cookies, &rbm); if (rbm != HAL_RX_BUF_RBM_WBM_IDLE_DESC_LIST && + rbm != HAL_RX_BUF_RBM_SW1_BM && rbm != HAL_RX_BUF_RBM_SW3_BM) { ab->soc_stats.invalid_rbm++; ath11k_warn(ab, "invalid return buffer manager %d\n", rbm); diff --git a/drivers/net/wireless/ath/ath11k/hal_rx.c b/drivers/net/wireless/ath/ath11k/hal_rx.c index 8f7dd43dc1bd..753bd93f0212 100644 --- a/drivers/net/wireless/ath/ath11k/hal_rx.c +++ b/drivers/net/wireless/ath/ath11k/hal_rx.c @@ -372,7 +372,8 @@ int ath11k_hal_wbm_desc_parse_err(struct ath11k_base *ab, void *desc, ret_buf_mgr = FIELD_GET(BUFFER_ADDR_INFO1_RET_BUF_MGR, wbm_desc->buf_addr_info.info1); - if (ret_buf_mgr != HAL_RX_BUF_RBM_SW3_BM) { + if (ret_buf_mgr != HAL_RX_BUF_RBM_SW1_BM && + ret_buf_mgr != HAL_RX_BUF_RBM_SW3_BM) { ab->soc_stats.invalid_rbm++; return -EINVAL; } From 7b5ce65d90187f0944e70dc5741aa0edfac926f4 Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Wed, 6 Nov 2024 15:55:31 +0200 Subject: [PATCH 0015/1386] wifi: rtw88: 8821au: Add additional devices to the USB_DEVICE list These are the entries that Nick Morrow provided. From https://github.com/morrownr/8821au-20210708 Signed-off-by: Larry Finger Signed-off-by: Bitterblue Smith Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/da05b866-a9ff-428c-a008-35e8cf200a98@gmail.com --- .../net/wireless/realtek/rtw88/rtw8821au.c | 52 ++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821au.c b/drivers/net/wireless/realtek/rtw88/rtw8821au.c index 730018773e1c..a01744b64e8d 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821au.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821au.c @@ -9,8 +9,58 @@ #include "usb.h" static const struct usb_device_id rtw_8821au_id_table[] = { - { USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x011e, 0xff, 0xff, 0xff), + { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0x0811, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, + { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0x0820, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, + { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0x0821, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, + { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0x8822, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, + { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0x0823, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, + { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0xa811, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x0411, 0x0242, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* Buffalo */ + { USB_DEVICE_AND_INTERFACE_INFO(0x0411, 0x029b, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* Buffalo */ + { USB_DEVICE_AND_INTERFACE_INFO(0x04bb, 0x0953, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* I-O DATA */ + { USB_DEVICE_AND_INTERFACE_INFO(0x056e, 0x4007, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* ELECOM */ + { USB_DEVICE_AND_INTERFACE_INFO(0x056e, 0x400e, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* ELECOM */ + { USB_DEVICE_AND_INTERFACE_INFO(0x056e, 0x400f, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* ELECOM */ + { USB_DEVICE_AND_INTERFACE_INFO(0x0846, 0x9052, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* Netgear */ + { USB_DEVICE_AND_INTERFACE_INFO(0x0e66, 0x0023, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* HAWKING */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x3314, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* D-Link */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x3318, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* D-Link */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2019, 0xab32, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* Planex */ + { USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0x804b, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* TRENDnet */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x011e, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* TP Link */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x011f, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* TP Link */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x0120, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* TP Link */ + { USB_DEVICE_AND_INTERFACE_INFO(0x3823, 0x6249, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* Obihai */ + { USB_DEVICE_AND_INTERFACE_INFO(0x7392, 0xa811, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* Edimax */ + { USB_DEVICE_AND_INTERFACE_INFO(0x7392, 0xa812, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* Edimax */ + { USB_DEVICE_AND_INTERFACE_INFO(0x7392, 0xa813, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* Edimax */ + { USB_DEVICE_AND_INTERFACE_INFO(0x7392, 0xb611, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8821a_hw_spec) }, /* Edimax */ {}, }; MODULE_DEVICE_TABLE(usb, rtw_8821au_id_table); From 1ee6ff9ae3c1a9eda9081f9db04f85d3a7352d38 Mon Sep 17 00:00:00 2001 From: Nick Morrow Date: Wed, 6 Nov 2024 15:57:10 +0200 Subject: [PATCH 0016/1386] wifi: rtw88: 8812au: Add more device IDs From https://github.com/morrownr/8812au-20210820. Signed-off-by: Nick Morrow Signed-off-by: Bitterblue Smith Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/31b7ee6b-f96d-43e0-a32f-a9eb1174a0c1@gmail.com --- .../net/wireless/realtek/rtw88/rtw8812au.c | 68 ++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8812au.c b/drivers/net/wireless/realtek/rtw88/rtw8812au.c index 4da69590a423..e18995f4cc78 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8812au.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8812au.c @@ -9,8 +9,74 @@ #include "usb.h" static const struct usb_device_id rtw_8812au_id_table[] = { - { USB_DEVICE_AND_INTERFACE_INFO(0x2604, 0x0012, 0xff, 0xff, 0xff), + { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0x8812, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, + { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0x881a, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, + { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0x881b, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, + { USB_DEVICE_AND_INTERFACE_INFO(RTW_USB_VENDOR_ID_REALTEK, 0x881c, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, + { USB_DEVICE_AND_INTERFACE_INFO(0x0409, 0x0408, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* NEC */ + { USB_DEVICE_AND_INTERFACE_INFO(0x0411, 0x025d, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* Buffalo */ + { USB_DEVICE_AND_INTERFACE_INFO(0x04bb, 0x0952, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* I-O DATA */ + { USB_DEVICE_AND_INTERFACE_INFO(0x050d, 0x1106, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* Belkin */ + { USB_DEVICE_AND_INTERFACE_INFO(0x050d, 0x1109, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* Belkin */ + { USB_DEVICE_AND_INTERFACE_INFO(0x0586, 0x3426, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* ZyXEL */ + { USB_DEVICE_AND_INTERFACE_INFO(0x0789, 0x016e, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* Logitec */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07b8, 0x8812, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* Abocom */ + { USB_DEVICE_AND_INTERFACE_INFO(0x0846, 0x9051, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* Netgear */ + { USB_DEVICE_AND_INTERFACE_INFO(0x0b05, 0x17d2, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* ASUS */ + { USB_DEVICE_AND_INTERFACE_INFO(0x0df6, 0x0074, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* Sitecom */ + { USB_DEVICE_AND_INTERFACE_INFO(0x0e66, 0x0022, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* Hawking */ + { USB_DEVICE_AND_INTERFACE_INFO(0x1058, 0x0632, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* WD */ + { USB_DEVICE_AND_INTERFACE_INFO(0x13b1, 0x003f, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* Linksys */ + { USB_DEVICE_AND_INTERFACE_INFO(0x148f, 0x9097, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* Amped Wireless */ + { USB_DEVICE_AND_INTERFACE_INFO(0x1740, 0x0100, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* EnGenius */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x330e, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* D-Link */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x3313, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* D-Link */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x3315, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* D-Link */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x3316, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* D-Link */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2019, 0xab30, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* Planex */ + { USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0x805b, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* TRENDnet */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x0101, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* TP-Link */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x0103, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* TP-Link */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x010d, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* TP-Link */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x010e, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* TP-Link */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x010f, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* TP-Link */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x0122, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* TP-Link */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2604, 0x0012, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* Tenda */ + { USB_DEVICE_AND_INTERFACE_INFO(0x7392, 0xa822, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8812a_hw_spec) }, /* Edimax */ {}, }; MODULE_DEVICE_TABLE(usb, rtw_8812au_id_table); From d4c4903508f9e1b2bfec88f777718484e27343fb Mon Sep 17 00:00:00 2001 From: Nick Morrow Date: Thu, 7 Nov 2024 08:28:46 +0800 Subject: [PATCH 0017/1386] wifi: rtw88: Add additional USB IDs for RTL8812BU Add three additional USB IDs found in https://github.com/morrownr/88x2bu-20210702 to support more RTL8812BU devices. Signed-off-by: Nick Morrow Signed-off-by: Zenm Chen Reviewed-by: Mikhail Novosyolov Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241107002846.13748-1-zenmchen@gmail.com --- drivers/net/wireless/realtek/rtw88/rtw8822bu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822bu.c b/drivers/net/wireless/realtek/rtw88/rtw8822bu.c index ab620a0b1dfc..8883300fc6ad 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822bu.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822bu.c @@ -67,6 +67,12 @@ static const struct usb_device_id rtw_8822bu_id_table[] = { .driver_info = (kernel_ulong_t)&(rtw8822b_hw_spec) }, /* LiteOn */ { USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0x808a, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&(rtw8822b_hw_spec) }, /* TRENDnet TEW-808UBM */ + { USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0x805a, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8822b_hw_spec) }, /* TRENDnet TEW-805UBH */ + { USB_DEVICE_AND_INTERFACE_INFO(0x056e, 0x4011, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8822b_hw_spec) }, /* ELECOM WDB-867DU3S */ + { USB_DEVICE_AND_INTERFACE_INFO(0x2c4e, 0x0107, 0xff, 0xff, 0xff), + .driver_info = (kernel_ulong_t)&(rtw8822b_hw_spec) }, /* Mercusys MA30H */ {}, }; MODULE_DEVICE_TABLE(usb, rtw_8822bu_id_table); From 5e5903a442bb889a62a0f5d89ac33e53ab08592c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 6 Nov 2024 15:46:42 +0000 Subject: [PATCH 0018/1386] wifi: rtlwifi: rtl8821ae: phy: restore removed code to fix infinite loop A previous clean-up fix removed the assignment of v2 inside a while loop that turned it into an infinite loop. Fix this by restoring the assignment of v2 from array[] so that v2 is updated inside the loop. Fixes: cda37445718d ("wifi: rtlwifi: rtl8821ae: phy: remove some useless code") Signed-off-by: Colin Ian King Tested-by: Ping-Ke Shih Reviewed-by: Su Hui Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241106154642.1627886-1-colin.i.king@gmail.com --- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c index 1be51ea3f3c8..9eddbada8af1 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c @@ -2033,8 +2033,10 @@ static bool _rtl8821ae_phy_config_bb_with_pgheaderfile(struct ieee80211_hw *hw, if (!_rtl8821ae_check_condition(hw, v1)) { i += 2; /* skip the pair of expression*/ v2 = array[i+1]; - while (v2 != 0xDEAD) + while (v2 != 0xDEAD) { i += 3; + v2 = array[i + 1]; + } } } } From e73e11d303940119e41850a0452a0deda2cc4eb5 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 7 Nov 2024 10:33:18 -0300 Subject: [PATCH 0019/1386] wifi: rtlwifi: do not complete firmware loading needlessly The only code waiting for completion is driver removal, which will not be called when probe returns a failure. So this completion is unnecessary. Fixes: b0302aba812b ("rtlwifi: Convert to asynchronous firmware load") Signed-off-by: Thadeu Lima de Souza Cascardo Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241107133322.855112-2-cascardo@igalia.com --- drivers/net/wireless/realtek/rtlwifi/pci.c | 1 - drivers/net/wireless/realtek/rtlwifi/usb.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 11709b6c83f1..40fc3c297a8a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -2266,7 +2266,6 @@ fail2: pci_iounmap(pdev, (void __iomem *)rtlpriv->io.pci_mem_start); pci_release_regions(pdev); - complete(&rtlpriv->firmware_loading_complete); fail1: if (hw) diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index d37a017b2b81..c3aa0cd9ff21 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -1040,7 +1040,6 @@ error_out: error_out2: _rtl_usb_io_handler_release(hw); usb_put_dev(udev); - complete(&rtlpriv->firmware_loading_complete); kfree(rtlpriv->usb_data); ieee80211_free_hw(hw); return -ENODEV; From 8559a9e0c457729fe3edb3176bbf7c7874f482b0 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 7 Nov 2024 10:33:19 -0300 Subject: [PATCH 0020/1386] wifi: rtlwifi: rtl8192se: rise completion of firmware loading as last step Just like in commit 4dfde294b979 ("rtlwifi: rise completion at the last step of firmware callback"), only signal completion once the function is finished. Otherwise, the module removal waiting for the completion could free the memory that the callback will still use before returning. Fixes: b0302aba812b ("rtlwifi: Convert to asynchronous firmware load") Signed-off-by: Thadeu Lima de Souza Cascardo Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241107133322.855112-3-cascardo@igalia.com --- drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c index bbf8ff63dced..e63c67b1861b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c @@ -64,22 +64,23 @@ static void rtl92se_fw_cb(const struct firmware *firmware, void *context) rtl_dbg(rtlpriv, COMP_ERR, DBG_LOUD, "Firmware callback routine entered!\n"); - complete(&rtlpriv->firmware_loading_complete); if (!firmware) { pr_err("Firmware %s not available\n", fw_name); rtlpriv->max_fw_size = 0; - return; + goto exit; } if (firmware->size > rtlpriv->max_fw_size) { pr_err("Firmware is too big!\n"); rtlpriv->max_fw_size = 0; release_firmware(firmware); - return; + goto exit; } pfirmware = (struct rt_firmware *)rtlpriv->rtlhal.pfirmware; memcpy(pfirmware->sz_fw_tmpbuffer, firmware->data, firmware->size); pfirmware->sz_fw_tmpbufferlen = firmware->size; release_firmware(firmware); +exit: + complete(&rtlpriv->firmware_loading_complete); } static int rtl92s_init_sw_vars(struct ieee80211_hw *hw) From b4b26642b31ef282df6ff7ea8531985edfdef12a Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 7 Nov 2024 10:33:20 -0300 Subject: [PATCH 0021/1386] wifi: rtlwifi: wait for firmware loading before releasing memory At probe error path, the firmware loading work may have already been queued. In such a case, it will try to access memory allocated by the probe function, which is about to be released. In such paths, wait for the firmware worker to finish before releasing memory. Fixes: a7f7c15e945a ("rtlwifi: rtl8192cu: Free ieee80211_hw if probing fails") Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241107133322.855112-4-cascardo@igalia.com --- drivers/net/wireless/realtek/rtlwifi/usb.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index c3aa0cd9ff21..c27b116ccdff 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -1028,13 +1028,15 @@ int rtl_usb_probe(struct usb_interface *intf, err = ieee80211_register_hw(hw); if (err) { pr_err("Can't register mac80211 hw.\n"); - goto error_out; + goto error_init_vars; } rtlpriv->mac80211.mac80211_registered = 1; set_bit(RTL_STATUS_INTERFACE_START, &rtlpriv->status); return 0; +error_init_vars: + wait_for_completion(&rtlpriv->firmware_loading_complete); error_out: rtl_deinit_core(hw); error_out2: From 00260350aed80c002df270c805ca443ec9a719a6 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 7 Nov 2024 10:33:21 -0300 Subject: [PATCH 0022/1386] wifi: rtlwifi: fix init_sw_vars leak when probe fails If ieee80211_register_hw fails, the memory allocated for the firmware will not be released. Call deinit_sw_vars as the function that undoes the allocationes done by init_sw_vars. Fixes: cefe3dfdb9f5 ("rtl8192cu: Call ieee80211_register_hw from rtl_usb_probe") Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241107133322.855112-5-cascardo@igalia.com --- drivers/net/wireless/realtek/rtlwifi/usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index c27b116ccdff..8ec687fab572 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -1037,6 +1037,7 @@ int rtl_usb_probe(struct usb_interface *intf, error_init_vars: wait_for_completion(&rtlpriv->firmware_loading_complete); + rtlpriv->cfg->ops->deinit_sw_vars(hw); error_out: rtl_deinit_core(hw); error_out2: From f79bc5c67867c19ce2762e7934c20dbb835ed82c Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Thu, 7 Nov 2024 10:33:22 -0300 Subject: [PATCH 0023/1386] wifi: rtlwifi: usb: fix workqueue leak when probe fails rtl_init_core creates a workqueue that is then assigned to rtl_wq. rtl_deinit_core does not destroy it. It is left to rtl_usb_deinit, which must be called in the probe error path. Fixes: 2ca20f79e0d8 ("rtlwifi: Add usb driver") Fixes: 851639fdaeac ("rtlwifi: Modify some USB de-initialize code.") Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241107133322.855112-6-cascardo@igalia.com --- drivers/net/wireless/realtek/rtlwifi/usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index 8ec687fab572..0368ecea2e81 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -1039,6 +1039,7 @@ error_init_vars: wait_for_completion(&rtlpriv->firmware_loading_complete); rtlpriv->cfg->ops->deinit_sw_vars(hw); error_out: + rtl_usb_deinit(hw); rtl_deinit_core(hw); error_out2: _rtl_usb_io_handler_release(hw); From 9c1df813e08832c3836c254bc8a2f83ff22dbc06 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 11 Nov 2024 14:38:35 +0800 Subject: [PATCH 0024/1386] wifi: rtw89: pci: disable PCIE wake bit when PCIE deinit The PCIE wake bit is to control PCIE wake signal to host. When PCIE is going down, clear this bit to prevent waking up host unexpectedly. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241111063835.15454-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/pci.c | 16 +++++++++++++--- drivers/net/wireless/realtek/rtw89/pci.h | 9 +++++++++ drivers/net/wireless/realtek/rtw89/pci_be.c | 1 + 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/pci.c b/drivers/net/wireless/realtek/rtw89/pci.c index f923bec03d41..c3a027735d0f 100644 --- a/drivers/net/wireless/realtek/rtw89/pci.c +++ b/drivers/net/wireless/realtek/rtw89/pci.c @@ -2516,7 +2516,7 @@ static int rtw89_pci_dphy_delay(struct rtw89_dev *rtwdev) PCIE_DPHY_DLY_25US, PCIE_PHY_GEN1); } -static void rtw89_pci_power_wake(struct rtw89_dev *rtwdev, bool pwr_up) +static void rtw89_pci_power_wake_ax(struct rtw89_dev *rtwdev, bool pwr_up) { if (pwr_up) rtw89_write32_set(rtwdev, R_AX_HCI_OPT_CTRL, BIT_WAKE_CTRL); @@ -2825,6 +2825,8 @@ static int rtw89_pci_ops_deinit(struct rtw89_dev *rtwdev) { const struct rtw89_pci_info *info = rtwdev->pci_info; + rtw89_pci_power_wake(rtwdev, false); + if (rtwdev->chip->chip_id == RTL8852A) { /* ltr sw trigger */ rtw89_write32_set(rtwdev, R_AX_LTR_CTRL_0, B_AX_APP_LTR_IDLE); @@ -2867,7 +2869,7 @@ static int rtw89_pci_ops_mac_pre_init_ax(struct rtw89_dev *rtwdev) return ret; } - rtw89_pci_power_wake(rtwdev, true); + rtw89_pci_power_wake_ax(rtwdev, true); rtw89_pci_autoload_hang(rtwdev); rtw89_pci_l12_vmain(rtwdev); rtw89_pci_gen2_force_ib(rtwdev); @@ -2912,6 +2914,13 @@ static int rtw89_pci_ops_mac_pre_init_ax(struct rtw89_dev *rtwdev) return 0; } +static int rtw89_pci_ops_mac_pre_deinit_ax(struct rtw89_dev *rtwdev) +{ + rtw89_pci_power_wake_ax(rtwdev, false); + + return 0; +} + int rtw89_pci_ltr_set(struct rtw89_dev *rtwdev, bool en) { u32 val; @@ -4325,7 +4334,7 @@ const struct rtw89_pci_gen_def rtw89_pci_gen_ax = { B_AX_RDU_INT}, .mac_pre_init = rtw89_pci_ops_mac_pre_init_ax, - .mac_pre_deinit = NULL, + .mac_pre_deinit = rtw89_pci_ops_mac_pre_deinit_ax, .mac_post_init = rtw89_pci_ops_mac_post_init_ax, .clr_idx_all = rtw89_pci_clr_idx_all_ax, @@ -4343,6 +4352,7 @@ const struct rtw89_pci_gen_def rtw89_pci_gen_ax = { .l1ss_set = rtw89_pci_l1ss_set_ax, .disable_eq = rtw89_pci_disable_eq_ax, + .power_wake = rtw89_pci_power_wake_ax, }; EXPORT_SYMBOL(rtw89_pci_gen_ax); diff --git a/drivers/net/wireless/realtek/rtw89/pci.h b/drivers/net/wireless/realtek/rtw89/pci.h index b68e2d82eea9..d52db4ca1b99 100644 --- a/drivers/net/wireless/realtek/rtw89/pci.h +++ b/drivers/net/wireless/realtek/rtw89/pci.h @@ -1290,6 +1290,7 @@ struct rtw89_pci_gen_def { void (*l1ss_set)(struct rtw89_dev *rtwdev, bool enable); void (*disable_eq)(struct rtw89_dev *rtwdev); + void (*power_wake)(struct rtw89_dev *rtwdev, bool pwr_up); }; #define RTW89_PCI_SSID(v, d, ssv, ssd, cust) \ @@ -1805,4 +1806,12 @@ static inline void rtw89_pci_disable_eq(struct rtw89_dev *rtwdev) gen_def->disable_eq(rtwdev); } +static inline void rtw89_pci_power_wake(struct rtw89_dev *rtwdev, bool pwr_up) +{ + const struct rtw89_pci_info *info = rtwdev->pci_info; + const struct rtw89_pci_gen_def *gen_def = info->gen_def; + + gen_def->power_wake(rtwdev, pwr_up); +} + #endif diff --git a/drivers/net/wireless/realtek/rtw89/pci_be.c b/drivers/net/wireless/realtek/rtw89/pci_be.c index 34154506f5d4..cd39eebe8186 100644 --- a/drivers/net/wireless/realtek/rtw89/pci_be.c +++ b/drivers/net/wireless/realtek/rtw89/pci_be.c @@ -691,5 +691,6 @@ const struct rtw89_pci_gen_def rtw89_pci_gen_be = { .l1ss_set = rtw89_pci_l1ss_set_be, .disable_eq = rtw89_pci_disable_eq_be, + .power_wake = _patch_pcie_power_wake_be, }; EXPORT_SYMBOL(rtw89_pci_gen_be); From abb541d1e52f1573f40bff2173fe6f8465b0f26c Mon Sep 17 00:00:00 2001 From: Kuan-Chung Chen Date: Mon, 11 Nov 2024 14:51:30 +0800 Subject: [PATCH 0025/1386] wifi: rtw89: sar: tweak 6GHz SAR subbands span Given that the 6GHz subband edges are not aligned, specific frequencies can span two adjacent subbands. We considered the need for this functionality outside of SAR and moved it to a common function. No logic change for existing chips. Signed-off-by: Kuan-Chung Chen Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241111065132.19587-2-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.c | 47 +++++++++++++++++++ drivers/net/wireless/realtek/rtw89/core.h | 9 ++++ drivers/net/wireless/realtek/rtw89/sar.c | 55 +++-------------------- 3 files changed, 61 insertions(+), 50 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index e5b2968c1431..f73704fc5f85 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -203,6 +203,53 @@ static const struct ieee80211_iface_combination rtw89_iface_combs[] = { }, }; +#define RTW89_6GHZ_SPAN_HEAD 6145 +#define RTW89_6GHZ_SPAN_IDX(center_freq) \ + ((((int)(center_freq) - RTW89_6GHZ_SPAN_HEAD) / 5) / 2) + +#define RTW89_DECL_6GHZ_SPAN(center_freq, subband_l, subband_h) \ + [RTW89_6GHZ_SPAN_IDX(center_freq)] = { \ + .sar_subband_low = RTW89_SAR_6GHZ_ ## subband_l, \ + .sar_subband_high = RTW89_SAR_6GHZ_ ## subband_h, \ + } + +/* Since 6GHz subbands are not edge aligned, some cases span two subbands. + * In the following, we describe each of them with rtw89_6ghz_span. + */ +static const struct rtw89_6ghz_span rtw89_overlapping_6ghz[] = { + RTW89_DECL_6GHZ_SPAN(6145, SUBBAND_5_L, SUBBAND_5_H), + RTW89_DECL_6GHZ_SPAN(6165, SUBBAND_5_L, SUBBAND_5_H), + RTW89_DECL_6GHZ_SPAN(6185, SUBBAND_5_L, SUBBAND_5_H), + RTW89_DECL_6GHZ_SPAN(6505, SUBBAND_6, SUBBAND_7_L), + RTW89_DECL_6GHZ_SPAN(6525, SUBBAND_6, SUBBAND_7_L), + RTW89_DECL_6GHZ_SPAN(6545, SUBBAND_6, SUBBAND_7_L), + RTW89_DECL_6GHZ_SPAN(6665, SUBBAND_7_L, SUBBAND_7_H), + RTW89_DECL_6GHZ_SPAN(6705, SUBBAND_7_L, SUBBAND_7_H), + RTW89_DECL_6GHZ_SPAN(6825, SUBBAND_7_H, SUBBAND_8), + RTW89_DECL_6GHZ_SPAN(6865, SUBBAND_7_H, SUBBAND_8), + RTW89_DECL_6GHZ_SPAN(6875, SUBBAND_7_H, SUBBAND_8), + RTW89_DECL_6GHZ_SPAN(6885, SUBBAND_7_H, SUBBAND_8), +}; + +const struct rtw89_6ghz_span * +rtw89_get_6ghz_span(struct rtw89_dev *rtwdev, u32 center_freq) +{ + int idx; + + if (center_freq >= RTW89_6GHZ_SPAN_HEAD) { + idx = RTW89_6GHZ_SPAN_IDX(center_freq); + /* To decrease size of rtw89_overlapping_6ghz[], + * RTW89_6GHZ_SPAN_IDX() truncates the leading NULLs + * to make first span as index 0 of the table. So, if center + * frequency is less than the first one, it will get netative. + */ + if (idx >= 0 && idx < ARRAY_SIZE(rtw89_overlapping_6ghz)) + return &rtw89_overlapping_6ghz[idx]; + } + + return NULL; +} + bool rtw89_ra_report_to_bitrate(struct rtw89_dev *rtwdev, u8 rpt_rate, u16 *bitrate) { struct ieee80211_rate rate; diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 5ad32eacd0d5..f76c05513d3c 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -4597,6 +4597,13 @@ struct rtw89_sar_info { }; }; +struct rtw89_6ghz_span { + enum rtw89_sar_subband sar_subband_low; + enum rtw89_sar_subband sar_subband_high; +}; + +#define RTW89_SAR_SPAN_VALID(span) ((span)->sar_subband_high) + enum rtw89_tas_state { RTW89_TAS_STATE_DPR_OFF, RTW89_TAS_STATE_DPR_ON, @@ -6908,6 +6915,8 @@ struct rtw89_sta_link *rtw89_sta_set_link(struct rtw89_sta *rtwsta, unsigned int link_id); void rtw89_sta_unset_link(struct rtw89_sta *rtwsta, unsigned int link_id); void rtw89_core_set_chip_txpwr(struct rtw89_dev *rtwdev); +const struct rtw89_6ghz_span * +rtw89_get_6ghz_span(struct rtw89_dev *rtwdev, u32 center_freq); void rtw89_get_default_chandef(struct cfg80211_chan_def *chandef); void rtw89_get_channel_params(const struct cfg80211_chan_def *chandef, struct rtw89_chan *chan); diff --git a/drivers/net/wireless/realtek/rtw89/sar.c b/drivers/net/wireless/realtek/rtw89/sar.c index bcc287771b2a..871f45a6508c 100644 --- a/drivers/net/wireless/realtek/rtw89/sar.c +++ b/drivers/net/wireless/realtek/rtw89/sar.c @@ -42,7 +42,7 @@ static enum rtw89_sar_subband rtw89_sar_get_subband(struct rtw89_dev *rtwdev, /* freq 6875 (ch 185, 20MHz) spans RTW89_SAR_6GHZ_SUBBAND_7_H * and RTW89_SAR_6GHZ_SUBBAND_8, so directly describe it with - * struct rtw89_sar_span in the following. + * struct rtw89_6ghz_span. */ case 6895 ... 7115: @@ -50,63 +50,18 @@ static enum rtw89_sar_subband rtw89_sar_get_subband(struct rtw89_dev *rtwdev, } } -struct rtw89_sar_span { - enum rtw89_sar_subband subband_low; - enum rtw89_sar_subband subband_high; -}; - -#define RTW89_SAR_SPAN_VALID(span) ((span)->subband_high) - -#define RTW89_SAR_6GHZ_SPAN_HEAD 6145 -#define RTW89_SAR_6GHZ_SPAN_IDX(center_freq) \ - ((((int)(center_freq) - RTW89_SAR_6GHZ_SPAN_HEAD) / 5) / 2) - -#define RTW89_DECL_SAR_6GHZ_SPAN(center_freq, subband_l, subband_h) \ - [RTW89_SAR_6GHZ_SPAN_IDX(center_freq)] = { \ - .subband_low = RTW89_SAR_6GHZ_ ## subband_l, \ - .subband_high = RTW89_SAR_6GHZ_ ## subband_h, \ - } - -/* Since 6GHz SAR subbands are not edge aligned, some cases span two SAR - * subbands. In the following, we describe each of them with rtw89_sar_span. - */ -static const struct rtw89_sar_span rtw89_sar_overlapping_6ghz[] = { - RTW89_DECL_SAR_6GHZ_SPAN(6145, SUBBAND_5_L, SUBBAND_5_H), - RTW89_DECL_SAR_6GHZ_SPAN(6165, SUBBAND_5_L, SUBBAND_5_H), - RTW89_DECL_SAR_6GHZ_SPAN(6185, SUBBAND_5_L, SUBBAND_5_H), - RTW89_DECL_SAR_6GHZ_SPAN(6505, SUBBAND_6, SUBBAND_7_L), - RTW89_DECL_SAR_6GHZ_SPAN(6525, SUBBAND_6, SUBBAND_7_L), - RTW89_DECL_SAR_6GHZ_SPAN(6545, SUBBAND_6, SUBBAND_7_L), - RTW89_DECL_SAR_6GHZ_SPAN(6665, SUBBAND_7_L, SUBBAND_7_H), - RTW89_DECL_SAR_6GHZ_SPAN(6705, SUBBAND_7_L, SUBBAND_7_H), - RTW89_DECL_SAR_6GHZ_SPAN(6825, SUBBAND_7_H, SUBBAND_8), - RTW89_DECL_SAR_6GHZ_SPAN(6865, SUBBAND_7_H, SUBBAND_8), - RTW89_DECL_SAR_6GHZ_SPAN(6875, SUBBAND_7_H, SUBBAND_8), - RTW89_DECL_SAR_6GHZ_SPAN(6885, SUBBAND_7_H, SUBBAND_8), -}; - static int rtw89_query_sar_config_common(struct rtw89_dev *rtwdev, u32 center_freq, s32 *cfg) { struct rtw89_sar_cfg_common *rtwsar = &rtwdev->sar.cfg_common; - const struct rtw89_sar_span *span = NULL; enum rtw89_sar_subband subband_l, subband_h; - int idx; + const struct rtw89_6ghz_span *span; - if (center_freq >= RTW89_SAR_6GHZ_SPAN_HEAD) { - idx = RTW89_SAR_6GHZ_SPAN_IDX(center_freq); - /* To decrease size of rtw89_sar_overlapping_6ghz[], - * RTW89_SAR_6GHZ_SPAN_IDX() truncates the leading NULLs - * to make first span as index 0 of the table. So, if center - * frequency is less than the first one, it will get netative. - */ - if (idx >= 0 && idx < ARRAY_SIZE(rtw89_sar_overlapping_6ghz)) - span = &rtw89_sar_overlapping_6ghz[idx]; - } + span = rtw89_get_6ghz_span(rtwdev, center_freq); if (span && RTW89_SAR_SPAN_VALID(span)) { - subband_l = span->subband_low; - subband_h = span->subband_high; + subband_l = span->sar_subband_low; + subband_h = span->sar_subband_high; } else { subband_l = rtw89_sar_get_subband(rtwdev, center_freq); subband_h = subband_l; From f0f08a4456b5040e45282a59d9c4ea9f39cd2ef5 Mon Sep 17 00:00:00 2001 From: Kuan-Chung Chen Date: Mon, 11 Nov 2024 14:51:31 +0800 Subject: [PATCH 0026/1386] wifi: rtw89: introduce dynamic antenna gain feature Dynamic Antenna Gain (DAG) adjusts the transmit power based on the platform's antenna gain. This allows for higher transmit power when the antenna gain is lower, while still complying with regulatory limits. The driver reads the Realtek Antenna Gain (RTAG) data from BIOS, and DAG is only enabled when the regulatory domain allows it. Currently, it only supports 8852BE/8852BTE/8852CE. Signed-off-by: Kuan-Chung Chen Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241111065132.19587-3-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/acpi.c | 47 ++++ drivers/net/wireless/realtek/rtw89/acpi.h | 9 + drivers/net/wireless/realtek/rtw89/core.c | 3 + drivers/net/wireless/realtek/rtw89/core.h | 33 +++ drivers/net/wireless/realtek/rtw89/debug.c | 4 + drivers/net/wireless/realtek/rtw89/phy.c | 216 +++++++++++++++++- drivers/net/wireless/realtek/rtw89/phy.h | 3 + drivers/net/wireless/realtek/rtw89/rtw8851b.c | 1 + drivers/net/wireless/realtek/rtw89/rtw8852a.c | 1 + drivers/net/wireless/realtek/rtw89/rtw8852b.c | 1 + .../net/wireless/realtek/rtw89/rtw8852bt.c | 1 + drivers/net/wireless/realtek/rtw89/rtw8852c.c | 1 + drivers/net/wireless/realtek/rtw89/rtw8922a.c | 1 + 13 files changed, 317 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/acpi.c b/drivers/net/wireless/realtek/rtw89/acpi.c index 908e980a4b72..f5dedb12c129 100644 --- a/drivers/net/wireless/realtek/rtw89/acpi.c +++ b/drivers/net/wireless/realtek/rtw89/acpi.c @@ -148,3 +148,50 @@ int rtw89_acpi_evaluate_dsm(struct rtw89_dev *rtwdev, ACPI_FREE(obj); return ret; } + +int rtw89_acpi_evaluate_rtag(struct rtw89_dev *rtwdev, + struct rtw89_acpi_rtag_result *res) +{ + struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL}; + acpi_handle root, handle; + union acpi_object *obj; + acpi_status status; + u32 buf_len; + int ret = 0; + + root = ACPI_HANDLE(rtwdev->dev); + if (!root) + return -EOPNOTSUPP; + + status = acpi_get_handle(root, (acpi_string)"RTAG", &handle); + if (ACPI_FAILURE(status)) + return -EIO; + + status = acpi_evaluate_object(handle, NULL, NULL, &buf); + if (ACPI_FAILURE(status)) + return -EIO; + + obj = buf.pointer; + if (obj->type != ACPI_TYPE_BUFFER) { + rtw89_debug(rtwdev, RTW89_DBG_ACPI, + "acpi: expect buffer but type: %d\n", obj->type); + ret = -EINVAL; + goto out; + } + + buf_len = obj->buffer.length; + if (buf_len != sizeof(*res)) { + rtw89_debug(rtwdev, RTW89_DBG_ACPI, "%s: invalid buffer length: %u\n", + __func__, buf_len); + ret = -EINVAL; + goto out; + } + + *res = *(struct rtw89_acpi_rtag_result *)obj->buffer.pointer; + + rtw89_hex_dump(rtwdev, RTW89_DBG_ACPI, "antenna_gain: ", res, sizeof(*res)); + +out: + ACPI_FREE(obj); + return ret; +} diff --git a/drivers/net/wireless/realtek/rtw89/acpi.h b/drivers/net/wireless/realtek/rtw89/acpi.h index d274be1775bf..b43ab106e44d 100644 --- a/drivers/net/wireless/realtek/rtw89/acpi.h +++ b/drivers/net/wireless/realtek/rtw89/acpi.h @@ -63,8 +63,17 @@ struct rtw89_acpi_dsm_result { } u; }; +struct rtw89_acpi_rtag_result { + u8 tag[4]; + u8 revision; + __le32 domain; + u8 ant_gain_table[RTW89_ANT_GAIN_CHAIN_NUM][RTW89_ANT_GAIN_SUBBAND_NR]; +} __packed; + int rtw89_acpi_evaluate_dsm(struct rtw89_dev *rtwdev, enum rtw89_acpi_dsm_func func, struct rtw89_acpi_dsm_result *res); +int rtw89_acpi_evaluate_rtag(struct rtw89_dev *rtwdev, + struct rtw89_acpi_rtag_result *res); #endif diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index f73704fc5f85..34034f44c050 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -211,6 +211,8 @@ static const struct ieee80211_iface_combination rtw89_iface_combs[] = { [RTW89_6GHZ_SPAN_IDX(center_freq)] = { \ .sar_subband_low = RTW89_SAR_6GHZ_ ## subband_l, \ .sar_subband_high = RTW89_SAR_6GHZ_ ## subband_h, \ + .ant_gain_subband_low = RTW89_ANT_GAIN_6GHZ_ ## subband_l, \ + .ant_gain_subband_high = RTW89_ANT_GAIN_6GHZ_ ## subband_h, \ } /* Since 6GHz subbands are not edge aligned, some cases span two subbands. @@ -4802,6 +4804,7 @@ int rtw89_core_init(struct rtw89_dev *rtwdev) rtw89_ser_init(rtwdev); rtw89_entity_init(rtwdev); rtw89_tas_init(rtwdev); + rtw89_phy_ant_gain_init(rtwdev); return 0; } diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index f76c05513d3c..bf7aff426896 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -4255,6 +4255,7 @@ struct rtw89_chip_info { u16 support_bandwidths; bool support_unii4; bool support_rnr; + bool support_ant_gain; bool ul_tb_waveform_ctrl; bool ul_tb_pwr_diff; bool hw_sec_hdr; @@ -4597,12 +4598,43 @@ struct rtw89_sar_info { }; }; +enum rtw89_ant_gain_subband { + RTW89_ANT_GAIN_2GHZ_SUBBAND, + RTW89_ANT_GAIN_5GHZ_SUBBAND_1, /* U-NII-1 */ + RTW89_ANT_GAIN_5GHZ_SUBBAND_2, /* U-NII-2 */ + RTW89_ANT_GAIN_5GHZ_SUBBAND_2E, /* U-NII-2-Extended */ + RTW89_ANT_GAIN_5GHZ_SUBBAND_3_4, /* U-NII-3 and U-NII-4 */ + RTW89_ANT_GAIN_6GHZ_SUBBAND_5_L, /* U-NII-5 lower part */ + RTW89_ANT_GAIN_6GHZ_SUBBAND_5_H, /* U-NII-5 higher part */ + RTW89_ANT_GAIN_6GHZ_SUBBAND_6, /* U-NII-6 */ + RTW89_ANT_GAIN_6GHZ_SUBBAND_7_L, /* U-NII-7 lower part */ + RTW89_ANT_GAIN_6GHZ_SUBBAND_7_H, /* U-NII-7 higher part */ + RTW89_ANT_GAIN_6GHZ_SUBBAND_8, /* U-NII-8 */ + + RTW89_ANT_GAIN_SUBBAND_NR, +}; + +enum rtw89_ant_gain_domain_type { + RTW89_ANT_GAIN_ETSI = 0, + + RTW89_ANT_GAIN_DOMAIN_NUM, +}; + +#define RTW89_ANT_GAIN_CHAIN_NUM 2 +struct rtw89_ant_gain_info { + s8 offset[RTW89_ANT_GAIN_CHAIN_NUM][RTW89_ANT_GAIN_SUBBAND_NR]; + u32 regd_enabled; +}; + struct rtw89_6ghz_span { enum rtw89_sar_subband sar_subband_low; enum rtw89_sar_subband sar_subband_high; + enum rtw89_ant_gain_subband ant_gain_subband_low; + enum rtw89_ant_gain_subband ant_gain_subband_high; }; #define RTW89_SAR_SPAN_VALID(span) ((span)->sar_subband_high) +#define RTW89_ANT_GAIN_SPAN_VALID(span) ((span)->ant_gain_subband_high) enum rtw89_tas_state { RTW89_TAS_STATE_DPR_OFF, @@ -5643,6 +5675,7 @@ struct rtw89_dev { struct rtw89_regulatory_info regulatory; struct rtw89_sar_info sar; struct rtw89_tas_info tas; + struct rtw89_ant_gain_info ant_gain; struct rtw89_btc btc; enum rtw89_ps_mode ps_mode; diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c index 6abd88fa80ba..fd86752d86f3 100644 --- a/drivers/net/wireless/realtek/rtw89/debug.c +++ b/drivers/net/wireless/realtek/rtw89/debug.c @@ -9,6 +9,7 @@ #include "fw.h" #include "mac.h" #include "pci.h" +#include "phy.h" #include "ps.h" #include "reg.h" #include "sar.h" @@ -882,6 +883,9 @@ static int rtw89_debug_priv_txpwr_table_get(struct seq_file *m, void *v) seq_puts(m, "[TAS]\n"); rtw89_print_tas(m, rtwdev); + seq_puts(m, "[DAG]\n"); + rtw89_print_ant_gain(m, rtwdev, chan); + tbl = dbgfs_txpwr_tables[chip_gen]; if (!tbl) { ret = -EOPNOTSUPP; diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c index f24aca663cf0..e9a635c43a81 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.c +++ b/drivers/net/wireless/realtek/rtw89/phy.c @@ -2,6 +2,7 @@ /* Copyright(c) 2019-2020 Realtek Corporation */ +#include "acpi.h" #include "chan.h" #include "coex.h" #include "debug.h" @@ -1854,6 +1855,211 @@ void rtw89_phy_write_reg3_tbl(struct rtw89_dev *rtwdev, } EXPORT_SYMBOL(rtw89_phy_write_reg3_tbl); +static u8 rtw89_phy_ant_gain_domain_to_regd(struct rtw89_dev *rtwdev, u8 ant_gain_regd) +{ + switch (ant_gain_regd) { + case RTW89_ANT_GAIN_ETSI: + return RTW89_ETSI; + default: + rtw89_debug(rtwdev, RTW89_DBG_TXPWR, + "unknown antenna gain domain: %d\n", + ant_gain_regd); + return RTW89_REGD_NUM; + } +} + +/* antenna gain in unit of 0.25 dbm */ +#define RTW89_ANT_GAIN_2GHZ_MIN -8 +#define RTW89_ANT_GAIN_2GHZ_MAX 14 +#define RTW89_ANT_GAIN_5GHZ_MIN -8 +#define RTW89_ANT_GAIN_5GHZ_MAX 20 +#define RTW89_ANT_GAIN_6GHZ_MIN -8 +#define RTW89_ANT_GAIN_6GHZ_MAX 20 + +#define RTW89_ANT_GAIN_REF_2GHZ 14 +#define RTW89_ANT_GAIN_REF_5GHZ 20 +#define RTW89_ANT_GAIN_REF_6GHZ 20 + +void rtw89_phy_ant_gain_init(struct rtw89_dev *rtwdev) +{ + struct rtw89_ant_gain_info *ant_gain = &rtwdev->ant_gain; + const struct rtw89_chip_info *chip = rtwdev->chip; + struct rtw89_acpi_rtag_result res = {}; + u32 domain; + int ret; + u8 i, j; + u8 regd; + u8 val; + + if (!chip->support_ant_gain) + return; + + ret = rtw89_acpi_evaluate_rtag(rtwdev, &res); + if (ret) { + rtw89_debug(rtwdev, RTW89_DBG_TXPWR, + "acpi: cannot eval rtag: %d\n", ret); + return; + } + + if (res.revision != 0) { + rtw89_debug(rtwdev, RTW89_DBG_TXPWR, + "unknown rtag revision: %d\n", res.revision); + return; + } + + domain = get_unaligned_le32(&res.domain); + + for (i = 0; i < RTW89_ANT_GAIN_DOMAIN_NUM; i++) { + if (!(domain & BIT(i))) + continue; + + regd = rtw89_phy_ant_gain_domain_to_regd(rtwdev, i); + if (regd >= RTW89_REGD_NUM) + continue; + ant_gain->regd_enabled |= BIT(regd); + } + + for (i = 0; i < RTW89_ANT_GAIN_CHAIN_NUM; i++) { + for (j = 0; j < RTW89_ANT_GAIN_SUBBAND_NR; j++) { + val = res.ant_gain_table[i][j]; + switch (j) { + default: + case RTW89_ANT_GAIN_2GHZ_SUBBAND: + val = RTW89_ANT_GAIN_REF_2GHZ - + clamp_t(s8, val, + RTW89_ANT_GAIN_2GHZ_MIN, + RTW89_ANT_GAIN_2GHZ_MAX); + break; + case RTW89_ANT_GAIN_5GHZ_SUBBAND_1: + case RTW89_ANT_GAIN_5GHZ_SUBBAND_2: + case RTW89_ANT_GAIN_5GHZ_SUBBAND_2E: + case RTW89_ANT_GAIN_5GHZ_SUBBAND_3_4: + val = RTW89_ANT_GAIN_REF_5GHZ - + clamp_t(s8, val, + RTW89_ANT_GAIN_5GHZ_MIN, + RTW89_ANT_GAIN_5GHZ_MAX); + break; + case RTW89_ANT_GAIN_6GHZ_SUBBAND_5_L: + case RTW89_ANT_GAIN_6GHZ_SUBBAND_5_H: + case RTW89_ANT_GAIN_6GHZ_SUBBAND_6: + case RTW89_ANT_GAIN_6GHZ_SUBBAND_7_L: + case RTW89_ANT_GAIN_6GHZ_SUBBAND_7_H: + case RTW89_ANT_GAIN_6GHZ_SUBBAND_8: + val = RTW89_ANT_GAIN_REF_6GHZ - + clamp_t(s8, val, + RTW89_ANT_GAIN_6GHZ_MIN, + RTW89_ANT_GAIN_6GHZ_MAX); + } + ant_gain->offset[i][j] = val; + } + } +} + +static +enum rtw89_ant_gain_subband rtw89_phy_ant_gain_get_subband(struct rtw89_dev *rtwdev, + u32 center_freq) +{ + switch (center_freq) { + default: + rtw89_debug(rtwdev, RTW89_DBG_TXPWR, + "center freq: %u to antenna gain subband is unhandled\n", + center_freq); + fallthrough; + case 2412 ... 2484: + return RTW89_ANT_GAIN_2GHZ_SUBBAND; + case 5180 ... 5240: + return RTW89_ANT_GAIN_5GHZ_SUBBAND_1; + case 5250 ... 5320: + return RTW89_ANT_GAIN_5GHZ_SUBBAND_2; + case 5500 ... 5720: + return RTW89_ANT_GAIN_5GHZ_SUBBAND_2E; + case 5745 ... 5885: + return RTW89_ANT_GAIN_5GHZ_SUBBAND_3_4; + case 5955 ... 6155: + return RTW89_ANT_GAIN_6GHZ_SUBBAND_5_L; + case 6175 ... 6415: + return RTW89_ANT_GAIN_6GHZ_SUBBAND_5_H; + case 6435 ... 6515: + return RTW89_ANT_GAIN_6GHZ_SUBBAND_6; + case 6535 ... 6695: + return RTW89_ANT_GAIN_6GHZ_SUBBAND_7_L; + case 6715 ... 6855: + return RTW89_ANT_GAIN_6GHZ_SUBBAND_7_H; + + /* freq 6875 (ch 185, 20MHz) spans RTW89_ANT_GAIN_6GHZ_SUBBAND_7_H + * and RTW89_ANT_GAIN_6GHZ_SUBBAND_8, so directly describe it with + * struct rtw89_6ghz_span. + */ + + case 6895 ... 7115: + return RTW89_ANT_GAIN_6GHZ_SUBBAND_8; + } +} + +static s8 rtw89_phy_ant_gain_query(struct rtw89_dev *rtwdev, + enum rtw89_rf_path path, u32 center_freq) +{ + struct rtw89_ant_gain_info *ant_gain = &rtwdev->ant_gain; + enum rtw89_ant_gain_subband subband_l, subband_h; + const struct rtw89_6ghz_span *span; + + span = rtw89_get_6ghz_span(rtwdev, center_freq); + + if (span && RTW89_ANT_GAIN_SPAN_VALID(span)) { + subband_l = span->ant_gain_subband_low; + subband_h = span->ant_gain_subband_high; + } else { + subband_l = rtw89_phy_ant_gain_get_subband(rtwdev, center_freq); + subband_h = subband_l; + } + + rtw89_debug(rtwdev, RTW89_DBG_TXPWR, + "center_freq %u: antenna gain subband {%u, %u}\n", + center_freq, subband_l, subband_h); + + return min(ant_gain->offset[path][subband_l], + ant_gain->offset[path][subband_h]); +} + +static s8 rtw89_phy_ant_gain_offset(struct rtw89_dev *rtwdev, u8 band, u32 center_freq) +{ + struct rtw89_ant_gain_info *ant_gain = &rtwdev->ant_gain; + const struct rtw89_chip_info *chip = rtwdev->chip; + u8 regd = rtw89_regd_get(rtwdev, band); + s8 offset_patha, offset_pathb; + + if (!chip->support_ant_gain) + return 0; + + if (!(ant_gain->regd_enabled & BIT(regd))) + return 0; + + offset_patha = rtw89_phy_ant_gain_query(rtwdev, RF_PATH_A, center_freq); + offset_pathb = rtw89_phy_ant_gain_query(rtwdev, RF_PATH_B, center_freq); + + return max(offset_patha, offset_pathb); +} + +void rtw89_print_ant_gain(struct seq_file *m, struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan) +{ + struct rtw89_ant_gain_info *ant_gain = &rtwdev->ant_gain; + const struct rtw89_chip_info *chip = rtwdev->chip; + u8 regd = rtw89_regd_get(rtwdev, chan->band_type); + s8 offset_patha, offset_pathb; + + if (!chip->support_ant_gain || !(ant_gain->regd_enabled & BIT(regd))) { + seq_puts(m, "no DAG is applied\n"); + return; + } + + offset_patha = rtw89_phy_ant_gain_query(rtwdev, RF_PATH_A, chan->freq); + offset_pathb = rtw89_phy_ant_gain_query(rtwdev, RF_PATH_B, chan->freq); + + seq_printf(m, "ChainA offset: %d dBm\n", offset_patha); + seq_printf(m, "ChainB offset: %d dBm\n", offset_pathb); +} + static const u8 rtw89_rs_idx_num_ax[] = { [RTW89_RS_CCK] = RTW89_RATE_CCK_NUM, [RTW89_RS_OFDM] = RTW89_RATE_OFDM_NUM, @@ -2027,7 +2233,7 @@ s8 rtw89_phy_read_txpwr_limit(struct rtw89_dev *rtwdev, u8 band, u8 ch_idx = rtw89_channel_to_idx(rtwdev, band, ch); u8 regd = rtw89_regd_get(rtwdev, band); u8 reg6 = regulatory->reg_6ghz_power; - s8 lmt = 0, sar; + s8 lmt = 0, sar, offset; s8 cstr; switch (band) { @@ -2059,7 +2265,8 @@ s8 rtw89_phy_read_txpwr_limit(struct rtw89_dev *rtwdev, u8 band, return 0; } - lmt = rtw89_phy_txpwr_rf_to_mac(rtwdev, lmt); + offset = rtw89_phy_ant_gain_offset(rtwdev, band, freq); + lmt = rtw89_phy_txpwr_rf_to_mac(rtwdev, lmt + offset); sar = rtw89_query_sar(rtwdev, freq); cstr = rtw89_phy_get_tpe_constraint(rtwdev, band); @@ -2286,7 +2493,7 @@ s8 rtw89_phy_read_txpwr_limit_ru(struct rtw89_dev *rtwdev, u8 band, u8 ch_idx = rtw89_channel_to_idx(rtwdev, band, ch); u8 regd = rtw89_regd_get(rtwdev, band); u8 reg6 = regulatory->reg_6ghz_power; - s8 lmt_ru = 0, sar; + s8 lmt_ru = 0, sar, offset; s8 cstr; switch (band) { @@ -2318,7 +2525,8 @@ s8 rtw89_phy_read_txpwr_limit_ru(struct rtw89_dev *rtwdev, u8 band, return 0; } - lmt_ru = rtw89_phy_txpwr_rf_to_mac(rtwdev, lmt_ru); + offset = rtw89_phy_ant_gain_offset(rtwdev, band, freq); + lmt_ru = rtw89_phy_txpwr_rf_to_mac(rtwdev, lmt_ru + offset); sar = rtw89_query_sar(rtwdev, freq); cstr = rtw89_phy_get_tpe_constraint(rtwdev, band); diff --git a/drivers/net/wireless/realtek/rtw89/phy.h b/drivers/net/wireless/realtek/rtw89/phy.h index c683f4d7d29b..2720cabfafe4 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.h +++ b/drivers/net/wireless/realtek/rtw89/phy.h @@ -826,6 +826,9 @@ s8 *rtw89_phy_raw_byr_seek(struct rtw89_dev *rtwdev, const struct rtw89_rate_desc *desc); s8 rtw89_phy_read_txpwr_byrate(struct rtw89_dev *rtwdev, u8 band, u8 bw, const struct rtw89_rate_desc *rate_desc); +void rtw89_phy_ant_gain_init(struct rtw89_dev *rtwdev); +void rtw89_print_ant_gain(struct seq_file *m, struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan); void rtw89_phy_load_txpwr_byrate(struct rtw89_dev *rtwdev, const struct rtw89_txpwr_table *tbl); s8 rtw89_phy_read_txpwr_limit(struct rtw89_dev *rtwdev, u8 band, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b.c b/drivers/net/wireless/realtek/rtw89/rtw8851b.c index 68c67a763f4d..29c697a251d3 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8851b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8851b.c @@ -2479,6 +2479,7 @@ const struct rtw89_chip_info rtw8851b_chip_info = { BIT(NL80211_CHAN_WIDTH_40) | BIT(NL80211_CHAN_WIDTH_80), .support_unii4 = true, + .support_ant_gain = false, .ul_tb_waveform_ctrl = true, .ul_tb_pwr_diff = false, .hw_sec_hdr = false, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852a.c b/drivers/net/wireless/realtek/rtw89/rtw8852a.c index e647759ebd69..fd59ee9f3626 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852a.c @@ -2196,6 +2196,7 @@ const struct rtw89_chip_info rtw8852a_chip_info = { BIT(NL80211_CHAN_WIDTH_40) | BIT(NL80211_CHAN_WIDTH_80), .support_unii4 = false, + .support_ant_gain = false, .ul_tb_waveform_ctrl = false, .ul_tb_pwr_diff = false, .hw_sec_hdr = false, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b.c b/drivers/net/wireless/realtek/rtw89/rtw8852b.c index 49a319128316..76667d4f3631 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852b.c @@ -834,6 +834,7 @@ const struct rtw89_chip_info rtw8852b_chip_info = { BIT(NL80211_CHAN_WIDTH_40) | BIT(NL80211_CHAN_WIDTH_80), .support_unii4 = true, + .support_ant_gain = true, .ul_tb_waveform_ctrl = true, .ul_tb_pwr_diff = false, .hw_sec_hdr = false, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c index 876725133228..d8c0fb87b625 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c @@ -767,6 +767,7 @@ const struct rtw89_chip_info rtw8852bt_chip_info = { BIT(NL80211_CHAN_WIDTH_40) | BIT(NL80211_CHAN_WIDTH_80), .support_unii4 = true, + .support_ant_gain = true, .ul_tb_waveform_ctrl = true, .ul_tb_pwr_diff = false, .hw_sec_hdr = false, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c.c b/drivers/net/wireless/realtek/rtw89/rtw8852c.c index cde34f8e1e67..b63971010751 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852c.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852c.c @@ -2976,6 +2976,7 @@ const struct rtw89_chip_info rtw8852c_chip_info = { BIT(NL80211_CHAN_WIDTH_80) | BIT(NL80211_CHAN_WIDTH_160), .support_unii4 = true, + .support_ant_gain = true, .ul_tb_waveform_ctrl = false, .ul_tb_pwr_diff = true, .hw_sec_hdr = true, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a.c b/drivers/net/wireless/realtek/rtw89/rtw8922a.c index 9a4db04a1967..b3879b485918 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922a.c @@ -2746,6 +2746,7 @@ const struct rtw89_chip_info rtw8922a_chip_info = { BIT(NL80211_CHAN_WIDTH_80) | BIT(NL80211_CHAN_WIDTH_160), .support_unii4 = true, + .support_ant_gain = false, .ul_tb_waveform_ctrl = false, .ul_tb_pwr_diff = false, .hw_sec_hdr = true, From 50191eace88c4163c8990984a8a156f09f226d39 Mon Sep 17 00:00:00 2001 From: Kuan-Chung Chen Date: Mon, 11 Nov 2024 14:51:32 +0800 Subject: [PATCH 0027/1386] wifi: rtw89: handle different TX power between RF path The dynamic antenna gain (DAG) may independently apply different TX powers for each RF path. This can be accomplished by using the larger TX power as the reference path and adjusting the TX power of the other path based on the difference. Currently only 8852BE/8852BTE/ 8852CE are supported. Signed-off-by: Kuan-Chung Chen Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241111065132.19587-4-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 1 + drivers/net/wireless/realtek/rtw89/phy.c | 31 ++++++------ drivers/net/wireless/realtek/rtw89/phy.h | 23 +++++++++ drivers/net/wireless/realtek/rtw89/rtw8851b.c | 1 + drivers/net/wireless/realtek/rtw89/rtw8852a.c | 1 + drivers/net/wireless/realtek/rtw89/rtw8852b.c | 1 + .../wireless/realtek/rtw89/rtw8852b_common.c | 46 +++++++++++------- .../net/wireless/realtek/rtw89/rtw8852bt.c | 1 + drivers/net/wireless/realtek/rtw89/rtw8852c.c | 47 ++++++++++++------- drivers/net/wireless/realtek/rtw89/rtw8922a.c | 1 + 10 files changed, 107 insertions(+), 46 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index bf7aff426896..ecccb51184be 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -4297,6 +4297,7 @@ struct rtw89_chip_info { const struct rtw89_rfe_parms *dflt_parms; const struct rtw89_chanctx_listener *chanctx_listener; + u8 txpwr_factor_bb; u8 txpwr_factor_rf; u8 txpwr_factor_mac; diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c index e9a635c43a81..be2f5338c3a0 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.c +++ b/drivers/net/wireless/realtek/rtw89/phy.c @@ -2040,6 +2040,23 @@ static s8 rtw89_phy_ant_gain_offset(struct rtw89_dev *rtwdev, u8 band, u32 cente return max(offset_patha, offset_pathb); } +s16 rtw89_phy_ant_gain_pwr_offset(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan) +{ + struct rtw89_ant_gain_info *ant_gain = &rtwdev->ant_gain; + u8 regd = rtw89_regd_get(rtwdev, chan->band_type); + s8 offset_patha, offset_pathb; + + if (!(ant_gain->regd_enabled & BIT(regd))) + return 0; + + offset_patha = rtw89_phy_ant_gain_query(rtwdev, RF_PATH_A, chan->freq); + offset_pathb = rtw89_phy_ant_gain_query(rtwdev, RF_PATH_B, chan->freq); + + return rtw89_phy_txpwr_rf_to_bb(rtwdev, offset_patha - offset_pathb); +} +EXPORT_SYMBOL(rtw89_phy_ant_gain_pwr_offset); + void rtw89_print_ant_gain(struct seq_file *m, struct rtw89_dev *rtwdev, const struct rtw89_chan *chan) { @@ -2123,20 +2140,6 @@ void rtw89_phy_load_txpwr_byrate(struct rtw89_dev *rtwdev, } EXPORT_SYMBOL(rtw89_phy_load_txpwr_byrate); -static s8 rtw89_phy_txpwr_rf_to_mac(struct rtw89_dev *rtwdev, s8 txpwr_rf) -{ - const struct rtw89_chip_info *chip = rtwdev->chip; - - return txpwr_rf >> (chip->txpwr_factor_rf - chip->txpwr_factor_mac); -} - -static s8 rtw89_phy_txpwr_dbm_to_mac(struct rtw89_dev *rtwdev, s8 dbm) -{ - const struct rtw89_chip_info *chip = rtwdev->chip; - - return clamp_t(s16, dbm << chip->txpwr_factor_mac, -64, 63); -} - static s8 rtw89_phy_txpwr_dbm_without_tolerance(s8 dbm) { const u8 tssi_deviation_point = 0; diff --git a/drivers/net/wireless/realtek/rtw89/phy.h b/drivers/net/wireless/realtek/rtw89/phy.h index 2720cabfafe4..f4ef7f5fb081 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.h +++ b/drivers/net/wireless/realtek/rtw89/phy.h @@ -827,6 +827,8 @@ s8 *rtw89_phy_raw_byr_seek(struct rtw89_dev *rtwdev, s8 rtw89_phy_read_txpwr_byrate(struct rtw89_dev *rtwdev, u8 band, u8 bw, const struct rtw89_rate_desc *rate_desc); void rtw89_phy_ant_gain_init(struct rtw89_dev *rtwdev); +s16 rtw89_phy_ant_gain_pwr_offset(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan); void rtw89_print_ant_gain(struct seq_file *m, struct rtw89_dev *rtwdev, const struct rtw89_chan *chan); void rtw89_phy_load_txpwr_byrate(struct rtw89_dev *rtwdev, @@ -899,6 +901,27 @@ void rtw89_phy_set_txpwr_limit_ru(struct rtw89_dev *rtwdev, phy->set_txpwr_limit_ru(rtwdev, chan, phy_idx); } +static inline s8 rtw89_phy_txpwr_rf_to_bb(struct rtw89_dev *rtwdev, s8 txpwr_rf) +{ + const struct rtw89_chip_info *chip = rtwdev->chip; + + return txpwr_rf << (chip->txpwr_factor_bb - chip->txpwr_factor_rf); +} + +static inline s8 rtw89_phy_txpwr_rf_to_mac(struct rtw89_dev *rtwdev, s8 txpwr_rf) +{ + const struct rtw89_chip_info *chip = rtwdev->chip; + + return txpwr_rf >> (chip->txpwr_factor_rf - chip->txpwr_factor_mac); +} + +static inline s8 rtw89_phy_txpwr_dbm_to_mac(struct rtw89_dev *rtwdev, s8 dbm) +{ + const struct rtw89_chip_info *chip = rtwdev->chip; + + return clamp_t(s16, dbm << chip->txpwr_factor_mac, -64, 63); +} + void rtw89_phy_ra_assoc(struct rtw89_dev *rtwdev, struct rtw89_sta_link *rtwsta_link); void rtw89_phy_ra_update(struct rtw89_dev *rtwdev); void rtw89_phy_ra_update_sta(struct rtw89_dev *rtwdev, struct ieee80211_sta *sta, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b.c b/drivers/net/wireless/realtek/rtw89/rtw8851b.c index 29c697a251d3..1ed4e64cbd2c 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8851b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8851b.c @@ -2464,6 +2464,7 @@ const struct rtw89_chip_info rtw8851b_chip_info = { .nctl_post_table = &rtw8851b_nctl_post_defs_tbl, .dflt_parms = &rtw89_8851b_dflt_parms, .rfe_parms_conf = rtw89_8851b_rfe_parms_conf, + .txpwr_factor_bb = 3, .txpwr_factor_rf = 2, .txpwr_factor_mac = 1, .dig_table = NULL, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852a.c b/drivers/net/wireless/realtek/rtw89/rtw8852a.c index fd59ee9f3626..a7105a288bc4 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852a.c @@ -2181,6 +2181,7 @@ const struct rtw89_chip_info rtw8852a_chip_info = { .nctl_post_table = NULL, .dflt_parms = &rtw89_8852a_dflt_parms, .rfe_parms_conf = NULL, + .txpwr_factor_bb = 3, .txpwr_factor_rf = 2, .txpwr_factor_mac = 1, .dig_table = &rtw89_8852a_phy_dig_table, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b.c b/drivers/net/wireless/realtek/rtw89/rtw8852b.c index 76667d4f3631..ebc853a905dd 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852b.c @@ -819,6 +819,7 @@ const struct rtw89_chip_info rtw8852b_chip_info = { .nctl_post_table = NULL, .dflt_parms = &rtw89_8852b_dflt_parms, .rfe_parms_conf = NULL, + .txpwr_factor_bb = 3, .txpwr_factor_rf = 2, .txpwr_factor_mac = 1, .dig_table = NULL, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b_common.c b/drivers/net/wireless/realtek/rtw89/rtw8852b_common.c index f4aa4437fb75..012739d97f71 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852b_common.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852b_common.c @@ -1206,24 +1206,25 @@ void __rtw8852bx_set_channel_bb(struct rtw89_dev *rtwdev, const struct rtw89_cha } static u32 rtw8852bx_bb_cal_txpwr_ref(struct rtw89_dev *rtwdev, - enum rtw89_phy_idx phy_idx, s16 ref) + enum rtw89_phy_idx phy_idx, + s16 ref, u16 pwr_ofst_decrease) { const u16 tssi_16dbm_cw = 0x12c; const u8 base_cw_0db = 0x27; - const s8 ofst_int = 0; s16 pwr_s10_3; s16 rf_pwr_cw; u16 bb_pwr_cw; u32 pwr_cw; u32 tssi_ofst_cw; - pwr_s10_3 = (ref << 1) + (s16)(ofst_int) + (s16)(base_cw_0db << 3); + pwr_s10_3 = (ref << 1) + (s16)(base_cw_0db << 3) - pwr_ofst_decrease; bb_pwr_cw = u16_get_bits(pwr_s10_3, GENMASK(2, 0)); rf_pwr_cw = u16_get_bits(pwr_s10_3, GENMASK(8, 3)); rf_pwr_cw = clamp_t(s16, rf_pwr_cw, 15, 63); pwr_cw = (rf_pwr_cw << 3) | bb_pwr_cw; - tssi_ofst_cw = (u32)((s16)tssi_16dbm_cw + (ref << 1) - (16 << 3)); + tssi_ofst_cw = (u32)((s16)tssi_16dbm_cw + (ref << 1) - (16 << 3)) - + pwr_ofst_decrease; rtw89_debug(rtwdev, RTW89_DBG_TXPWR, "[TXPWR] tssi_ofst_cw=%d rf_cw=0x%x bb_cw=0x%x\n", tssi_ofst_cw, rf_pwr_cw, bb_pwr_cw); @@ -1234,10 +1235,11 @@ static u32 rtw8852bx_bb_cal_txpwr_ref(struct rtw89_dev *rtwdev, } static void rtw8852bx_set_txpwr_ref(struct rtw89_dev *rtwdev, - enum rtw89_phy_idx phy_idx) + enum rtw89_phy_idx phy_idx, s16 pwr_ofst) { static const u32 addr[RF_PATH_NUM_8852BX] = {0x5800, 0x7800}; const u32 mask = B_DPD_TSSI_CW | B_DPD_PWR_CW | B_DPD_REF; + u16 ofst_dec[RF_PATH_NUM_8852BX]; const u8 ofst_ofdm = 0x4; const u8 ofst_cck = 0x8; const s16 ref_ofdm = 0; @@ -1250,19 +1252,20 @@ static void rtw8852bx_set_txpwr_ref(struct rtw89_dev *rtwdev, rtw89_mac_txpwr_write32_mask(rtwdev, phy_idx, R_AX_PWR_RATE_CTRL, B_AX_PWR_REF, 0x0); - rtw89_debug(rtwdev, RTW89_DBG_TXPWR, "[TXPWR] set bb ofdm txpwr ref\n"); - val = rtw8852bx_bb_cal_txpwr_ref(rtwdev, phy_idx, ref_ofdm); + ofst_dec[RF_PATH_A] = pwr_ofst > 0 ? 0 : abs(pwr_ofst); + ofst_dec[RF_PATH_B] = pwr_ofst > 0 ? pwr_ofst : 0; - for (i = 0; i < RF_PATH_NUM_8852BX; i++) - rtw89_phy_write32_idx(rtwdev, addr[i] + ofst_ofdm, mask, val, - phy_idx); + rtw89_debug(rtwdev, RTW89_DBG_TXPWR, "[TXPWR] set bb ofdm txpwr ref\n"); + for (i = 0; i < RF_PATH_NUM_8852BX; i++) { + val = rtw8852bx_bb_cal_txpwr_ref(rtwdev, phy_idx, ref_ofdm, ofst_dec[i]); + rtw89_phy_write32_idx(rtwdev, addr[i] + ofst_ofdm, mask, val, phy_idx); + } rtw89_debug(rtwdev, RTW89_DBG_TXPWR, "[TXPWR] set bb cck txpwr ref\n"); - val = rtw8852bx_bb_cal_txpwr_ref(rtwdev, phy_idx, ref_cck); - - for (i = 0; i < RF_PATH_NUM_8852BX; i++) - rtw89_phy_write32_idx(rtwdev, addr[i] + ofst_cck, mask, val, - phy_idx); + for (i = 0; i < RF_PATH_NUM_8852BX; i++) { + val = rtw8852bx_bb_cal_txpwr_ref(rtwdev, phy_idx, ref_cck, ofst_dec[i]); + rtw89_phy_write32_idx(rtwdev, addr[i] + ofst_cck, mask, val, phy_idx); + } } static void rtw8852bx_bb_set_tx_shape_dfir(struct rtw89_dev *rtwdev, @@ -1333,6 +1336,16 @@ static void rtw8852bx_set_tx_shape(struct rtw89_dev *rtwdev, tx_shape_ofdm); } +static void rtw8852bx_set_txpwr_diff(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_phy_idx phy_idx) +{ + s16 pwr_ofst; + + pwr_ofst = rtw89_phy_ant_gain_pwr_offset(rtwdev, chan); + rtw8852bx_set_txpwr_ref(rtwdev, phy_idx, pwr_ofst); +} + static void __rtw8852bx_set_txpwr(struct rtw89_dev *rtwdev, const struct rtw89_chan *chan, enum rtw89_phy_idx phy_idx) @@ -1342,12 +1355,13 @@ static void __rtw8852bx_set_txpwr(struct rtw89_dev *rtwdev, rtw8852bx_set_tx_shape(rtwdev, chan, phy_idx); rtw89_phy_set_txpwr_limit(rtwdev, chan, phy_idx); rtw89_phy_set_txpwr_limit_ru(rtwdev, chan, phy_idx); + rtw8852bx_set_txpwr_diff(rtwdev, chan, phy_idx); } static void __rtw8852bx_set_txpwr_ctrl(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx) { - rtw8852bx_set_txpwr_ref(rtwdev, phy_idx); + rtw8852bx_set_txpwr_ref(rtwdev, phy_idx, 0); } static diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c index d8c0fb87b625..cd1385ff8003 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c @@ -752,6 +752,7 @@ const struct rtw89_chip_info rtw8852bt_chip_info = { .nctl_post_table = NULL, .dflt_parms = NULL, .rfe_parms_conf = NULL, + .txpwr_factor_bb = 3, .txpwr_factor_rf = 2, .txpwr_factor_mac = 1, .dig_table = NULL, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c.c b/drivers/net/wireless/realtek/rtw89/rtw8852c.c index b63971010751..c7d39499ca75 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852c.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852c.c @@ -1882,9 +1882,9 @@ static void rtw8852c_rfk_track(struct rtw89_dev *rtwdev) } static u32 rtw8852c_bb_cal_txpwr_ref(struct rtw89_dev *rtwdev, - enum rtw89_phy_idx phy_idx, s16 ref) + enum rtw89_phy_idx phy_idx, + s16 ref, u16 pwr_ofst_decrease) { - s8 ofst_int = 0; u8 base_cw_0db = 0x27; u16 tssi_16dbm_cw = 0x12c; s16 pwr_s10_3 = 0; @@ -1893,13 +1893,14 @@ static u32 rtw8852c_bb_cal_txpwr_ref(struct rtw89_dev *rtwdev, u32 pwr_cw = 0; u32 tssi_ofst_cw = 0; - pwr_s10_3 = (ref << 1) + (s16)(ofst_int) + (s16)(base_cw_0db << 3); + pwr_s10_3 = (ref << 1) + (s16)(base_cw_0db << 3) - pwr_ofst_decrease; bb_pwr_cw = FIELD_GET(GENMASK(2, 0), pwr_s10_3); rf_pwr_cw = FIELD_GET(GENMASK(8, 3), pwr_s10_3); rf_pwr_cw = clamp_t(s16, rf_pwr_cw, 15, 63); pwr_cw = (rf_pwr_cw << 3) | bb_pwr_cw; - tssi_ofst_cw = (u32)((s16)tssi_16dbm_cw + (ref << 1) - (16 << 3)); + tssi_ofst_cw = (u32)((s16)tssi_16dbm_cw + (ref << 1) - (16 << 3)) - + pwr_ofst_decrease; rtw89_debug(rtwdev, RTW89_DBG_TXPWR, "[TXPWR] tssi_ofst_cw=%d rf_cw=0x%x bb_cw=0x%x\n", tssi_ofst_cw, rf_pwr_cw, bb_pwr_cw); @@ -1943,9 +1944,10 @@ void rtw8852c_set_txpwr_ul_tb_offset(struct rtw89_dev *rtwdev, } static void rtw8852c_set_txpwr_ref(struct rtw89_dev *rtwdev, - enum rtw89_phy_idx phy_idx) + enum rtw89_phy_idx phy_idx, s16 pwr_ofst) { static const u32 addr[RF_PATH_NUM_8852C] = {0x5800, 0x7800}; + u16 ofst_dec[RF_PATH_NUM_8852C]; const u32 mask = 0x7FFFFFF; const u8 ofst_ofdm = 0x4; const u8 ofst_cck = 0x8; @@ -1959,19 +1961,20 @@ static void rtw8852c_set_txpwr_ref(struct rtw89_dev *rtwdev, rtw89_mac_txpwr_write32_mask(rtwdev, phy_idx, R_AX_PWR_RATE_CTRL, GENMASK(27, 10), 0x0); - rtw89_debug(rtwdev, RTW89_DBG_TXPWR, "[TXPWR] set bb ofdm txpwr ref\n"); - val = rtw8852c_bb_cal_txpwr_ref(rtwdev, phy_idx, ref_ofdm); + ofst_dec[RF_PATH_A] = pwr_ofst > 0 ? 0 : abs(pwr_ofst); + ofst_dec[RF_PATH_B] = pwr_ofst > 0 ? pwr_ofst : 0; - for (i = 0; i < RF_PATH_NUM_8852C; i++) - rtw89_phy_write32_idx(rtwdev, addr[i] + ofst_ofdm, mask, val, - phy_idx); + rtw89_debug(rtwdev, RTW89_DBG_TXPWR, "[TXPWR] set bb ofdm txpwr ref\n"); + for (i = 0; i < RF_PATH_NUM_8852C; i++) { + val = rtw8852c_bb_cal_txpwr_ref(rtwdev, phy_idx, ref_ofdm, ofst_dec[i]); + rtw89_phy_write32_idx(rtwdev, addr[i] + ofst_ofdm, mask, val, phy_idx); + } rtw89_debug(rtwdev, RTW89_DBG_TXPWR, "[TXPWR] set bb cck txpwr ref\n"); - val = rtw8852c_bb_cal_txpwr_ref(rtwdev, phy_idx, ref_cck); - - for (i = 0; i < RF_PATH_NUM_8852C; i++) - rtw89_phy_write32_idx(rtwdev, addr[i] + ofst_cck, mask, val, - phy_idx); + for (i = 0; i < RF_PATH_NUM_8852C; i++) { + val = rtw8852c_bb_cal_txpwr_ref(rtwdev, phy_idx, ref_cck, ofst_dec[i]); + rtw89_phy_write32_idx(rtwdev, addr[i] + ofst_cck, mask, val, phy_idx); + } } static void rtw8852c_bb_set_tx_shape_dfir(struct rtw89_dev *rtwdev, @@ -2052,6 +2055,16 @@ static void rtw8852c_set_tx_shape(struct rtw89_dev *rtwdev, B_P1_DAC_COMP_POST_DPD_EN); } +static void rtw8852c_set_txpwr_diff(struct rtw89_dev *rtwdev, + const struct rtw89_chan *chan, + enum rtw89_phy_idx phy_idx) +{ + s16 pwr_ofst; + + pwr_ofst = rtw89_phy_ant_gain_pwr_offset(rtwdev, chan); + rtw8852c_set_txpwr_ref(rtwdev, phy_idx, pwr_ofst); +} + static void rtw8852c_set_txpwr(struct rtw89_dev *rtwdev, const struct rtw89_chan *chan, enum rtw89_phy_idx phy_idx) @@ -2061,12 +2074,13 @@ static void rtw8852c_set_txpwr(struct rtw89_dev *rtwdev, rtw8852c_set_tx_shape(rtwdev, chan, phy_idx); rtw89_phy_set_txpwr_limit(rtwdev, chan, phy_idx); rtw89_phy_set_txpwr_limit_ru(rtwdev, chan, phy_idx); + rtw8852c_set_txpwr_diff(rtwdev, chan, phy_idx); } static void rtw8852c_set_txpwr_ctrl(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx) { - rtw8852c_set_txpwr_ref(rtwdev, phy_idx); + rtw8852c_set_txpwr_ref(rtwdev, phy_idx, 0); } static void @@ -2959,6 +2973,7 @@ const struct rtw89_chip_info rtw8852c_chip_info = { .dflt_parms = &rtw89_8852c_dflt_parms, .rfe_parms_conf = NULL, .chanctx_listener = &rtw8852c_chanctx_listener, + .txpwr_factor_bb = 3, .txpwr_factor_rf = 2, .txpwr_factor_mac = 1, .dig_table = NULL, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a.c b/drivers/net/wireless/realtek/rtw89/rtw8922a.c index b3879b485918..a5333099668a 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922a.c @@ -2729,6 +2729,7 @@ const struct rtw89_chip_info rtw8922a_chip_info = { .nctl_post_table = NULL, .dflt_parms = NULL, /* load parm from fw */ .rfe_parms_conf = NULL, /* load parm from fw */ + .txpwr_factor_bb = 3, .txpwr_factor_rf = 2, .txpwr_factor_mac = 1, .dig_table = NULL, From 31be3175bd7be89e39c82b3973c9d4ff55a17583 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 7 Nov 2024 15:08:33 +0100 Subject: [PATCH 0028/1386] wifi: rtl8xxxu: add more missing rtl8192cu USB IDs The rtl8xxxu has all the rtl8192cu USB IDs from rtlwifi/rtl8192cu/sw.c except for the following 10, add these to the untested section so they can be used with the rtl8xxxu as the rtl8192cu are well supported. This fixes these wifi modules not working on distributions which have disabled CONFIG_RTL8192CU replacing it with CONFIG_RTL8XXXU_UNTESTED, like Fedora. Closes: https://bugzilla.redhat.com/show_bug.cgi?id=2321540 Cc: stable@vger.kernel.org Cc: Peter Robinson Signed-off-by: Hans de Goede Reviewed-by: Peter Robinson Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241107140833.274986-1-hdegoede@redhat.com --- drivers/net/wireless/realtek/rtl8xxxu/core.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/core.c b/drivers/net/wireless/realtek/rtl8xxxu/core.c index f95898f68d68..4ce0c05c5129 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/core.c @@ -8147,6 +8147,8 @@ static const struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x817e, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, +{USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x8186, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x818a, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x317f, 0xff, 0xff, 0xff), @@ -8157,12 +8159,18 @@ static const struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x050d, 0x1102, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, +{USB_DEVICE_AND_INTERFACE_INFO(0x050d, 0x11f2, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x06f8, 0xe033, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, +{USB_DEVICE_AND_INTERFACE_INFO(0x07b8, 0x8188, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x07b8, 0x8189, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x0846, 0x9041, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, +{USB_DEVICE_AND_INTERFACE_INFO(0x0846, 0x9043, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x0b05, 0x17ba, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x1e1e, 0xff, 0xff, 0xff), @@ -8179,6 +8187,10 @@ static const struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x13d3, 0x3357, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, +{USB_DEVICE_AND_INTERFACE_INFO(0x13d3, 0x3358, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, +{USB_DEVICE_AND_INTERFACE_INFO(0x13d3, 0x3359, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x330b, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x2019, 0x4902, 0xff, 0xff, 0xff), @@ -8193,6 +8205,8 @@ static const struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x4856, 0x0091, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, +{USB_DEVICE_AND_INTERFACE_INFO(0x9846, 0x9041, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0xcdab, 0x8010, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x04f2, 0xaff7, 0xff, 0xff, 0xff), @@ -8218,6 +8232,8 @@ static const struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x0586, 0x341f, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, +{USB_DEVICE_AND_INTERFACE_INFO(0x06f8, 0xe033, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x06f8, 0xe035, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x0b05, 0x17ab, 0xff, 0xff, 0xff), @@ -8226,6 +8242,8 @@ static const struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x0df6, 0x0070, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, +{USB_DEVICE_AND_INTERFACE_INFO(0x0df6, 0x0077, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x0789, 0x016d, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x07aa, 0x0056, 0xff, 0xff, 0xff), @@ -8248,6 +8266,8 @@ static const struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x330a, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, +{USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x330d, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x2019, 0xab2b, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0x624d, 0xff, 0xff, 0xff), From b5f871ab4913b2403a7cdcbcde16d39d0b071fb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 6 Nov 2024 13:41:44 +0100 Subject: [PATCH 0029/1386] wifi: ath9k: Add RX inactivity detection and reset chip when it occurs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some ath9k chips can, seemingly at random, end up in a state which can be described as "deaf". No or nearly no interrupts are generated anymore for incoming packets. Existing links either break down after a while and new links will not be established. The circumstances leading to this "deafness" is still unclear, but some particular chips (especially 2-stream 11n SoCs, but also others) can go 'deaf' when running AP or mesh (or both) after some time. It's probably a hardware issue, and doing a channel scan to trigger a chip reset (which one normally can't do on an AP interface) recovers the hardware. The only way the driver can detect this state, is by detecting if there has been no RX activity for a while. In this case we can proactively reset the chip (which only takes a small number of milliseconds, so shouldn't interrupt things too much if it has been idle for several seconds), which functions as a workaround. OpenWrt, and various derivatives, have been carrying versions of this workaround for years, that were never upstreamed. One version[0], written by Felix Fietkau, used a simple counter and only reset if there was precisely zero RX activity for a long period of time. This had the problem that in some cases a small number of interrupts would appear even if the device was otherwise not responsive. For this reason, another version[1], written by Simon Wunderlich and Sven Eckelmann, used a time-based approach to calculate the average number of RX interrupts over a longer (four-second) interval, and reset the chip when seeing less than one interrupt per second over this period. However, that version relied on debugfs counters to keep track of the number of interrupts, which means it didn't work at all if debugfs was not enabled. This patch unifies the two versions: it uses the same approach as Felix' patch to count the number of RX handler invocations, but uses the same time-based windowing approach as Simon and Sven's patch to still handle the case where occasional interrupts appear but the device is otherwise deaf. Since this is based on ideas by all three people, but not actually directly derived from any of the patches, I'm including Suggested-by tags from Simon, Sven and Felix below, which should hopefully serve as proper credit. [0] https://patchwork.kernel.org/project/linux-wireless/patch/20170125163654.66431-3-nbd@nbd.name/ [1] https://patchwork.kernel.org/project/linux-wireless/patch/20161117083614.19188-2-sven.eckelmann@open-mesh.com/ Suggested-by: Simon Wunderlich Suggested-by: Sven Eckelmann Suggested-by: Felix Fietkau Signed-off-by: Toke Høiland-Jørgensen Tested-by: Sven Eckelmann Reviewed-by: Sven Eckelmann Tested-by: Issam Hamdi Acked-by: Simon Wunderlich Link: https://patch.msgid.link/20241106-ath9k-deaf-detection-v1-1-736a150d2425@redhat.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath9k/ath9k.h | 2 ++ drivers/net/wireless/ath/ath9k/debug.c | 1 + drivers/net/wireless/ath/ath9k/debug.h | 1 + drivers/net/wireless/ath/ath9k/link.c | 33 ++++++++++++++++++++++++-- drivers/net/wireless/ath/ath9k/main.c | 1 + 5 files changed, 36 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index 29ca65a732a6..bcfc8df0efe5 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -1018,6 +1018,8 @@ struct ath_softc { u8 gtt_cnt; u32 intrstatus; + u32 rx_active_check_time; + u32 rx_active_count; u16 ps_flags; /* PS_* */ bool ps_enabled; bool ps_idle; diff --git a/drivers/net/wireless/ath/ath9k/debug.c b/drivers/net/wireless/ath/ath9k/debug.c index eff894958a73..74a0134075cf 100644 --- a/drivers/net/wireless/ath/ath9k/debug.c +++ b/drivers/net/wireless/ath/ath9k/debug.c @@ -750,6 +750,7 @@ static int read_file_reset(struct seq_file *file, void *data) [RESET_TYPE_CALIBRATION] = "Calibration error", [RESET_TX_DMA_ERROR] = "Tx DMA stop error", [RESET_RX_DMA_ERROR] = "Rx DMA stop error", + [RESET_TYPE_RX_INACTIVE] = "Rx path inactive", }; int i; diff --git a/drivers/net/wireless/ath/ath9k/debug.h b/drivers/net/wireless/ath/ath9k/debug.h index 389459c04d14..cb3e75969875 100644 --- a/drivers/net/wireless/ath/ath9k/debug.h +++ b/drivers/net/wireless/ath/ath9k/debug.h @@ -53,6 +53,7 @@ enum ath_reset_type { RESET_TYPE_CALIBRATION, RESET_TX_DMA_ERROR, RESET_RX_DMA_ERROR, + RESET_TYPE_RX_INACTIVE, __RESET_TYPE_MAX }; diff --git a/drivers/net/wireless/ath/ath9k/link.c b/drivers/net/wireless/ath/ath9k/link.c index d1e5767aab3c..d078a59d7d3c 100644 --- a/drivers/net/wireless/ath/ath9k/link.c +++ b/drivers/net/wireless/ath/ath9k/link.c @@ -50,7 +50,36 @@ reset: "tx hung, resetting the chip\n"); ath9k_queue_reset(sc, RESET_TYPE_TX_HANG); return false; +} +#define RX_INACTIVE_CHECK_INTERVAL (4 * MSEC_PER_SEC) + +static bool ath_hw_rx_inactive_check(struct ath_softc *sc) +{ + struct ath_common *common = ath9k_hw_common(sc->sc_ah); + u32 interval, count; + + interval = jiffies_to_msecs(jiffies - sc->rx_active_check_time); + count = sc->rx_active_count; + + if (interval < RX_INACTIVE_CHECK_INTERVAL) + return true; /* too soon to check */ + + sc->rx_active_count = 0; + sc->rx_active_check_time = jiffies; + + /* Need at least one interrupt per second, and we should only react if + * we are within a factor two of the expected interval + */ + if (interval > RX_INACTIVE_CHECK_INTERVAL * 2 || + count >= interval / MSEC_PER_SEC) + return true; + + ath_dbg(common, RESET, + "RX inactivity detected. Schedule chip reset\n"); + ath9k_queue_reset(sc, RESET_TYPE_RX_INACTIVE); + + return false; } void ath_hw_check_work(struct work_struct *work) @@ -58,8 +87,8 @@ void ath_hw_check_work(struct work_struct *work) struct ath_softc *sc = container_of(work, struct ath_softc, hw_check_work.work); - if (!ath_hw_check(sc) || - !ath_tx_complete_check(sc)) + if (!ath_hw_check(sc) || !ath_tx_complete_check(sc) || + !ath_hw_rx_inactive_check(sc)) return; ieee80211_queue_delayed_work(sc->hw, &sc->hw_check_work, diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index dd7910782858..5a3309bda1bd 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -453,6 +453,7 @@ void ath9k_tasklet(struct tasklet_struct *t) ath_rx_tasklet(sc, 0, true); ath_rx_tasklet(sc, 0, false); + sc->rx_active_count++; } if (status & ATH9K_INT_TX) { From a93d125ebfdd530ea1980a45b7ad2e9471b82c87 Mon Sep 17 00:00:00 2001 From: Dinesh Karthikeyan Date: Fri, 15 Nov 2024 11:58:51 +0530 Subject: [PATCH 0030/1386] wifi: ath12k: Support Downlink Pager Stats Add support to request downlink pager stats from firmware through HTT stats type 36. These stats give paging information like number of pages, their timestamp, number of locked and free pages, synchronous and asynchronous locked pages. Note: MCC firmware version - WLAN.HMT.1.0-03427-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.15378.4 responds to the event requesting stats, but it does not give any data. Sample output: ------------- echo 36 > /sys/kernel/debug/ath12k/pci-0000\:06\:00.0/mac0/htt_stats_type cat /sys/kernel/debug/ath12k/pci-0000\:06\:00.0/mac0/htt_stats HTT_DLPAGER_STATS_TLV: ASYNC locked pages = 2 SYNC locked pages = 0 Total locked pages = 2 Total free pages = 127 LOCKED PAGES HISTORY last_locked_page_idx = 0 Index - 0 ; Page Number - 8495 ; Num of pages - 1 ; Timestamp - 4031009360us Index - 1 ; Page Number - 7219 ; Num of pages - 2 ; Timestamp - 885379515us Index - 2 ; Page Number - 0 ; Num of pages - 0 ; Timestamp - 0us Index - 3 ; Page Number - 0 ; Num of pages - 0 ; Timestamp - 0us ..... UNLOCKED PAGES HISTORY last_unlocked_page_idx = 0 Index - 0 ; Page Number - 7144 ; Num of pages - 2 ; Timestamp - 4032070008us Index - 1 ; Page Number - 7214 ; Num of pages - 2 ; Timestamp - 885379512us Index - 2 ; Page Number - 0 ; Num of pages - 0 ; Timestamp - 0us Index - 3 ; Page Number - 0 ; Num of pages - 0 ; Timestamp - 0us ..... Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Signed-off-by: Dinesh Karthikeyan Signed-off-by: Roopni Devanathan Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241115062854.1919672-2-quic_rdevanat@quicinc.com Signed-off-by: Jeff Johnson --- .../wireless/ath/ath12k/debugfs_htt_stats.c | 86 ++++++++++++++++++- .../wireless/ath/ath12k/debugfs_htt_stats.h | 31 +++++++ 2 files changed, 116 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c index 43ea87e981f4..ba06aed3d880 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c @@ -2543,6 +2543,88 @@ ath12k_htt_print_pdev_obss_pd_stats_tlv(const void *tag_buf, u16 tag_len, stats_req->buf_len = len; } +static void ath12k_htt_print_dlpager_entry(const struct ath12k_htt_pgs_info *pg_info, + int idx, char *str_buf) +{ + u64 page_timestamp; + u16 index = 0; + + page_timestamp = ath12k_le32hilo_to_u64(pg_info->ts_msb, pg_info->ts_lsb); + + index += snprintf(&str_buf[index], ATH12K_HTT_MAX_STRING_LEN - index, + "Index - %u ; Page Number - %u ; ", + idx, le32_to_cpu(pg_info->page_num)); + index += snprintf(&str_buf[index], ATH12K_HTT_MAX_STRING_LEN - index, + "Num of pages - %u ; Timestamp - %lluus\n", + le32_to_cpu(pg_info->num_pgs), page_timestamp); +} + +static void +ath12k_htt_print_dlpager_stats_tlv(const void *tag_buf, u16 tag_len, + struct debug_htt_stats_req *stats_req) +{ + const struct ath12k_htt_dl_pager_stats_tlv *stat_buf = tag_buf; + u32 len = stats_req->buf_len; + u32 buf_len = ATH12K_HTT_STATS_BUF_SIZE; + u32 dword_lock, dword_unlock; + int i; + u8 *buf = stats_req->buf; + u8 pg_locked; + u8 pg_unlock; + char str_buf[ATH12K_HTT_MAX_STRING_LEN] = {0}; + + if (tag_len < sizeof(*stat_buf)) + return; + + dword_lock = le32_get_bits(stat_buf->info2, + ATH12K_HTT_DLPAGER_TOTAL_LOCK_PAGES_INFO2); + dword_unlock = le32_get_bits(stat_buf->info2, + ATH12K_HTT_DLPAGER_TOTAL_FREE_PAGES_INFO2); + + pg_locked = ATH12K_HTT_STATS_PAGE_LOCKED; + pg_unlock = ATH12K_HTT_STATS_PAGE_UNLOCKED; + + len += scnprintf(buf + len, buf_len - len, "HTT_DLPAGER_STATS_TLV:\n"); + len += scnprintf(buf + len, buf_len - len, "ASYNC locked pages = %u\n", + le32_get_bits(stat_buf->info0, + ATH12K_HTT_DLPAGER_ASYNC_LOCK_PG_CNT_INFO0)); + len += scnprintf(buf + len, buf_len - len, "SYNC locked pages = %u\n", + le32_get_bits(stat_buf->info0, + ATH12K_HTT_DLPAGER_SYNC_LOCK_PG_CNT_INFO0)); + len += scnprintf(buf + len, buf_len - len, "Total locked pages = %u\n", + le32_get_bits(stat_buf->info1, + ATH12K_HTT_DLPAGER_TOTAL_LOCK_PAGES_INFO1)); + len += scnprintf(buf + len, buf_len - len, "Total free pages = %u\n", + le32_get_bits(stat_buf->info1, + ATH12K_HTT_DLPAGER_TOTAL_FREE_PAGES_INFO1)); + + len += scnprintf(buf + len, buf_len - len, "\nLOCKED PAGES HISTORY\n"); + len += scnprintf(buf + len, buf_len - len, "last_locked_page_idx = %u\n", + dword_lock ? dword_lock - 1 : (ATH12K_PAGER_MAX - 1)); + + for (i = 0; i < ATH12K_PAGER_MAX; i++) { + memset(str_buf, 0x0, ATH12K_HTT_MAX_STRING_LEN); + ath12k_htt_print_dlpager_entry(&stat_buf->pgs_info[pg_locked][i], + i, str_buf); + len += scnprintf(buf + len, buf_len - len, "%s", str_buf); + } + + len += scnprintf(buf + len, buf_len - len, "\nUNLOCKED PAGES HISTORY\n"); + len += scnprintf(buf + len, buf_len - len, "last_unlocked_page_idx = %u\n", + dword_unlock ? dword_unlock - 1 : ATH12K_PAGER_MAX - 1); + + for (i = 0; i < ATH12K_PAGER_MAX; i++) { + memset(str_buf, 0x0, ATH12K_HTT_MAX_STRING_LEN); + ath12k_htt_print_dlpager_entry(&stat_buf->pgs_info[pg_unlock][i], + i, str_buf); + len += scnprintf(buf + len, buf_len - len, "%s", str_buf); + } + + len += scnprintf(buf + len, buf_len - len, "\n"); + + stats_req->buf_len = len; +} + static void ath12k_htt_print_dmac_reset_stats_tlv(const void *tag_buf, u16 tag_len, struct debug_htt_stats_req *stats_req) @@ -2562,7 +2644,6 @@ ath12k_htt_print_dmac_reset_stats_tlv(const void *tag_buf, u16 tag_len, time = ath12k_le32hilo_to_u64(htt_stats_buf->reset_time_hi_ms, htt_stats_buf->reset_time_lo_ms); len += scnprintf(buf + len, buf_len - len, "reset_time_ms = %llu\n", time); - time = ath12k_le32hilo_to_u64(htt_stats_buf->disengage_time_hi_ms, htt_stats_buf->disengage_time_lo_ms); len += scnprintf(buf + len, buf_len - len, "disengage_time_ms = %llu\n", time); @@ -2870,6 +2951,9 @@ static int ath12k_dbg_htt_ext_stats_parse(struct ath12k_base *ab, case HTT_STATS_PDEV_OBSS_PD_TAG: ath12k_htt_print_pdev_obss_pd_stats_tlv(tag_buf, len, stats_req); break; + case HTT_STATS_DLPAGER_STATS_TAG: + ath12k_htt_print_dlpager_stats_tlv(tag_buf, len, stats_req); + break; case HTT_STATS_DMAC_RESET_STATS_TAG: ath12k_htt_print_dmac_reset_stats_tlv(tag_buf, len, stats_req); break; diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h index ac86cab234ec..dfb6538585d5 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h @@ -135,6 +135,7 @@ enum ath12k_dbg_htt_ext_stats_type { ATH12K_DBG_HTT_EXT_STATS_PDEV_TX_MU = 17, ATH12K_DBG_HTT_EXT_STATS_PDEV_CCA_STATS = 19, ATH12K_DBG_HTT_EXT_STATS_PDEV_OBSS_PD_STATS = 23, + ATH12K_DBG_HTT_EXT_STATS_DLPAGER_STATS = 36, ATH12K_DBG_HTT_EXT_STATS_SOC_ERROR = 45, ATH12K_DBG_HTT_EXT_STATS_PDEV_SCHED_ALGO = 49, ATH12K_DBG_HTT_EXT_STATS_MANDATORY_MUOFDMA = 51, @@ -194,6 +195,7 @@ enum ath12k_dbg_htt_tlv_tag { HTT_STATS_PDEV_CTRL_PATH_TX_STATS_TAG = 102, HTT_STATS_TX_SELFGEN_AC_SCHED_STATUS_STATS_TAG = 111, HTT_STATS_TX_SELFGEN_AX_SCHED_STATUS_STATS_TAG = 112, + HTT_STATS_DLPAGER_STATS_TAG = 120, HTT_STATS_MU_PPDU_DIST_TAG = 129, HTT_STATS_TX_PDEV_MUMIMO_GRP_STATS_TAG = 130, HTT_STATS_TX_PDEV_RATE_STATS_BE_OFDMA_TAG = 135, @@ -1054,6 +1056,35 @@ struct ath12k_htt_pdev_obss_pd_stats_tlv { __le32 num_sr_ppdu_abort_flush_cnt; } __packed; +enum ath12k_htt_stats_page_lock_state { + ATH12K_HTT_STATS_PAGE_LOCKED = 0, + ATH12K_HTT_STATS_PAGE_UNLOCKED = 1, + ATH12K_NUM_PG_LOCK_STATE +}; + +#define ATH12K_PAGER_MAX 10 + +#define ATH12K_HTT_DLPAGER_ASYNC_LOCK_PG_CNT_INFO0 GENMASK(7, 0) +#define ATH12K_HTT_DLPAGER_SYNC_LOCK_PG_CNT_INFO0 GENMASK(15, 8) +#define ATH12K_HTT_DLPAGER_TOTAL_LOCK_PAGES_INFO1 GENMASK(15, 0) +#define ATH12K_HTT_DLPAGER_TOTAL_FREE_PAGES_INFO1 GENMASK(31, 16) +#define ATH12K_HTT_DLPAGER_TOTAL_LOCK_PAGES_INFO2 GENMASK(15, 0) +#define ATH12K_HTT_DLPAGER_TOTAL_FREE_PAGES_INFO2 GENMASK(31, 16) + +struct ath12k_htt_pgs_info { + __le32 page_num; + __le32 num_pgs; + __le32 ts_lsb; + __le32 ts_msb; +} __packed; + +struct ath12k_htt_dl_pager_stats_tlv { + __le32 info0; + __le32 info1; + __le32 info2; + struct ath12k_htt_pgs_info pgs_info[ATH12K_NUM_PG_LOCK_STATE][ATH12K_PAGER_MAX]; +} __packed; + struct ath12k_htt_dmac_reset_stats_tlv { __le32 reset_count; __le32 reset_time_lo_ms; From c3527cdfcdf70dcd613c4582283e28e4f81df9ea Mon Sep 17 00:00:00 2001 From: Dinesh Karthikeyan Date: Fri, 15 Nov 2024 11:58:52 +0530 Subject: [PATCH 0031/1386] wifi: ath12k: Support phy counter and TPC stats Add support to request counters and Transmission Power Control (TPC) stats through HTT stats type 37. These stats give information about counters like received packet count, CRC pass count, error count, transmit abort count, etc., about counter reset like reset cause, channel frequency, number and mode, channel flags, etc., about TPC like transmit power scale, maximum transmit power, gain cap, EIRP, etc. Note: MCC firmware version - WLAN.HMT.1.0-03427-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.15378.4 does not support HTT stats type 37, i.e., the firmware does not respond to the command requesting stats. Sample output: ------------- echo 37 > /sys/kernel/debug/ath12k/pci-0000\:06\:00.0/mac0/htt_stats_type cat /sys/kernel/debug/ath12k/pci-0000\:06\:00.0/mac0/htt_stats HTT_PHY_STATS_TLV: bdf_nf_chain[0] = -92 bdf_nf_chain[1] = -94 bdf_nf_chain[2] = -94 bdf_nf_chain[3] = -93 ..... HTT_PHY_COUNTERS_TLV: rx_ofdma_timing_err_cnt = 18068 rx_cck_fail_cnt = 0 mactx_abort_cnt = 2612 macrx_abort_cnt = 0 ..... HTT_PHY_RESET_STATS_TLV: pdev_id = 0 chan_mhz = 0 chan_band_center_freq1 = 0 chan_band_center_freq2 = 0 ..... HTT_PHY_RESET_COUNTERS_TLV: pdev_id = 0 cf_active_low_fail_cnt = 0 cf_active_low_pass_cnt = 0 phy_off_through_vreg_cnt = 0 ..... HTT_PHY_TPC_STATS_TLV: pdev_id = 0 tx_power_scale = 0 tx_power_scale_db = 0 min_negative_tx_power = 0 ..... Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Signed-off-by: Dinesh Karthikeyan Signed-off-by: Roopni Devanathan Acked-by: Jeff Johnson Acked-by: Kalle Valo Link: https://patch.msgid.link/20241115062854.1919672-3-quic_rdevanat@quicinc.com Signed-off-by: Jeff Johnson --- .../wireless/ath/ath12k/debugfs_htt_stats.c | 248 ++++++++++++++++++ .../wireless/ath/ath12k/debugfs_htt_stats.h | 98 +++++++ 2 files changed, 346 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c index ba06aed3d880..78b9d8efa957 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c @@ -2625,6 +2625,239 @@ ath12k_htt_print_dlpager_stats_tlv(const void *tag_buf, u16 tag_len, stats_req->buf_len = len; } +static void +ath12k_htt_print_phy_stats_tlv(const void *tag_buf, u16 tag_len, + struct debug_htt_stats_req *stats_req) +{ + const struct ath12k_htt_phy_stats_tlv *htt_stats_buf = tag_buf; + u32 len = stats_req->buf_len; + u32 buf_len = ATH12K_HTT_STATS_BUF_SIZE; + u8 *buf = stats_req->buf, i; + + if (tag_len < sizeof(*htt_stats_buf)) + return; + + len += scnprintf(buf + len, buf_len - len, "HTT_PHY_STATS_TLV:\n"); + for (i = 0; i < ATH12K_HTT_STATS_MAX_CHAINS; i++) + len += scnprintf(buf + len, buf_len - len, "bdf_nf_chain[%d] = %d\n", + i, a_sle32_to_cpu(htt_stats_buf->nf_chain[i])); + for (i = 0; i < ATH12K_HTT_STATS_MAX_CHAINS; i++) + len += scnprintf(buf + len, buf_len - len, "runtime_nf_chain[%d] = %d\n", + i, a_sle32_to_cpu(htt_stats_buf->runtime_nf_chain[i])); + len += scnprintf(buf + len, buf_len - len, "false_radar_cnt = %u / %u (mins)\n", + le32_to_cpu(htt_stats_buf->false_radar_cnt), + le32_to_cpu(htt_stats_buf->fw_run_time)); + len += scnprintf(buf + len, buf_len - len, "radar_cs_cnt = %u\n", + le32_to_cpu(htt_stats_buf->radar_cs_cnt)); + len += scnprintf(buf + len, buf_len - len, "ani_level = %d\n\n", + a_sle32_to_cpu(htt_stats_buf->ani_level)); + + stats_req->buf_len = len; +} + +static void +ath12k_htt_print_phy_counters_tlv(const void *tag_buf, u16 tag_len, + struct debug_htt_stats_req *stats_req) +{ + const struct ath12k_htt_phy_counters_tlv *htt_stats_buf = tag_buf; + u32 len = stats_req->buf_len; + u32 buf_len = ATH12K_HTT_STATS_BUF_SIZE; + u8 *buf = stats_req->buf; + + if (tag_len < sizeof(*htt_stats_buf)) + return; + + len += scnprintf(buf + len, buf_len - len, "HTT_PHY_COUNTERS_TLV:\n"); + len += scnprintf(buf + len, buf_len - len, "rx_ofdma_timing_err_cnt = %u\n", + le32_to_cpu(htt_stats_buf->rx_ofdma_timing_err_cnt)); + len += scnprintf(buf + len, buf_len - len, "rx_cck_fail_cnt = %u\n", + le32_to_cpu(htt_stats_buf->rx_cck_fail_cnt)); + len += scnprintf(buf + len, buf_len - len, "mactx_abort_cnt = %u\n", + le32_to_cpu(htt_stats_buf->mactx_abort_cnt)); + len += scnprintf(buf + len, buf_len - len, "macrx_abort_cnt = %u\n", + le32_to_cpu(htt_stats_buf->macrx_abort_cnt)); + len += scnprintf(buf + len, buf_len - len, "phytx_abort_cnt = %u\n", + le32_to_cpu(htt_stats_buf->phytx_abort_cnt)); + len += scnprintf(buf + len, buf_len - len, "phyrx_abort_cnt = %u\n", + le32_to_cpu(htt_stats_buf->phyrx_abort_cnt)); + len += scnprintf(buf + len, buf_len - len, "phyrx_defer_abort_cnt = %u\n", + le32_to_cpu(htt_stats_buf->phyrx_defer_abort_cnt)); + len += scnprintf(buf + len, buf_len - len, "rx_gain_adj_lstf_event_cnt = %u\n", + le32_to_cpu(htt_stats_buf->rx_gain_adj_lstf_event_cnt)); + len += scnprintf(buf + len, buf_len - len, "rx_gain_adj_non_legacy_cnt = %u\n", + le32_to_cpu(htt_stats_buf->rx_gain_adj_non_legacy_cnt)); + len += print_array_to_buf(buf, len, "rx_pkt_cnt", htt_stats_buf->rx_pkt_cnt, + ATH12K_HTT_MAX_RX_PKT_CNT, "\n"); + len += print_array_to_buf(buf, len, "rx_pkt_crc_pass_cnt", + htt_stats_buf->rx_pkt_crc_pass_cnt, + ATH12K_HTT_MAX_RX_PKT_CRC_PASS_CNT, "\n"); + len += print_array_to_buf(buf, len, "per_blk_err_cnt", + htt_stats_buf->per_blk_err_cnt, + ATH12K_HTT_MAX_PER_BLK_ERR_CNT, "\n"); + len += print_array_to_buf(buf, len, "rx_ota_err_cnt", + htt_stats_buf->rx_ota_err_cnt, + ATH12K_HTT_MAX_RX_OTA_ERR_CNT, "\n\n"); + + stats_req->buf_len = len; +} + +static void +ath12k_htt_print_phy_reset_stats_tlv(const void *tag_buf, u16 tag_len, + struct debug_htt_stats_req *stats_req) +{ + const struct ath12k_htt_phy_reset_stats_tlv *htt_stats_buf = tag_buf; + u32 len = stats_req->buf_len; + u32 buf_len = ATH12K_HTT_STATS_BUF_SIZE; + u8 *buf = stats_req->buf; + + if (tag_len < sizeof(*htt_stats_buf)) + return; + + len += scnprintf(buf + len, buf_len - len, "HTT_PHY_RESET_STATS_TLV:\n"); + len += scnprintf(buf + len, buf_len - len, "pdev_id = %u\n", + le32_to_cpu(htt_stats_buf->pdev_id)); + len += scnprintf(buf + len, buf_len - len, "chan_mhz = %u\n", + le32_to_cpu(htt_stats_buf->chan_mhz)); + len += scnprintf(buf + len, buf_len - len, "chan_band_center_freq1 = %u\n", + le32_to_cpu(htt_stats_buf->chan_band_center_freq1)); + len += scnprintf(buf + len, buf_len - len, "chan_band_center_freq2 = %u\n", + le32_to_cpu(htt_stats_buf->chan_band_center_freq2)); + len += scnprintf(buf + len, buf_len - len, "chan_phy_mode = %u\n", + le32_to_cpu(htt_stats_buf->chan_phy_mode)); + len += scnprintf(buf + len, buf_len - len, "chan_flags = 0x%0x\n", + le32_to_cpu(htt_stats_buf->chan_flags)); + len += scnprintf(buf + len, buf_len - len, "chan_num = %u\n", + le32_to_cpu(htt_stats_buf->chan_num)); + len += scnprintf(buf + len, buf_len - len, "reset_cause = 0x%0x\n", + le32_to_cpu(htt_stats_buf->reset_cause)); + len += scnprintf(buf + len, buf_len - len, "prev_reset_cause = 0x%0x\n", + le32_to_cpu(htt_stats_buf->prev_reset_cause)); + len += scnprintf(buf + len, buf_len - len, "phy_warm_reset_src = 0x%0x\n", + le32_to_cpu(htt_stats_buf->phy_warm_reset_src)); + len += scnprintf(buf + len, buf_len - len, "rx_gain_tbl_mode = %d\n", + le32_to_cpu(htt_stats_buf->rx_gain_tbl_mode)); + len += scnprintf(buf + len, buf_len - len, "xbar_val = 0x%0x\n", + le32_to_cpu(htt_stats_buf->xbar_val)); + len += scnprintf(buf + len, buf_len - len, "force_calibration = %u\n", + le32_to_cpu(htt_stats_buf->force_calibration)); + len += scnprintf(buf + len, buf_len - len, "phyrf_mode = %u\n", + le32_to_cpu(htt_stats_buf->phyrf_mode)); + len += scnprintf(buf + len, buf_len - len, "phy_homechan = %u\n", + le32_to_cpu(htt_stats_buf->phy_homechan)); + len += scnprintf(buf + len, buf_len - len, "phy_tx_ch_mask = 0x%0x\n", + le32_to_cpu(htt_stats_buf->phy_tx_ch_mask)); + len += scnprintf(buf + len, buf_len - len, "phy_rx_ch_mask = 0x%0x\n", + le32_to_cpu(htt_stats_buf->phy_rx_ch_mask)); + len += scnprintf(buf + len, buf_len - len, "phybb_ini_mask = 0x%0x\n", + le32_to_cpu(htt_stats_buf->phybb_ini_mask)); + len += scnprintf(buf + len, buf_len - len, "phyrf_ini_mask = 0x%0x\n", + le32_to_cpu(htt_stats_buf->phyrf_ini_mask)); + len += scnprintf(buf + len, buf_len - len, "phy_dfs_en_mask = 0x%0x\n", + le32_to_cpu(htt_stats_buf->phy_dfs_en_mask)); + len += scnprintf(buf + len, buf_len - len, "phy_sscan_en_mask = 0x%0x\n", + le32_to_cpu(htt_stats_buf->phy_sscan_en_mask)); + len += scnprintf(buf + len, buf_len - len, "phy_synth_sel_mask = 0x%0x\n", + le32_to_cpu(htt_stats_buf->phy_synth_sel_mask)); + len += scnprintf(buf + len, buf_len - len, "phy_adfs_freq = %u\n", + le32_to_cpu(htt_stats_buf->phy_adfs_freq)); + len += scnprintf(buf + len, buf_len - len, "cck_fir_settings = 0x%0x\n", + le32_to_cpu(htt_stats_buf->cck_fir_settings)); + len += scnprintf(buf + len, buf_len - len, "phy_dyn_pri_chan = %u\n", + le32_to_cpu(htt_stats_buf->phy_dyn_pri_chan)); + len += scnprintf(buf + len, buf_len - len, "cca_thresh = 0x%0x\n", + le32_to_cpu(htt_stats_buf->cca_thresh)); + len += scnprintf(buf + len, buf_len - len, "dyn_cca_status = %u\n", + le32_to_cpu(htt_stats_buf->dyn_cca_status)); + len += scnprintf(buf + len, buf_len - len, "rxdesense_thresh_hw = 0x%x\n", + le32_to_cpu(htt_stats_buf->rxdesense_thresh_hw)); + len += scnprintf(buf + len, buf_len - len, "rxdesense_thresh_sw = 0x%x\n\n", + le32_to_cpu(htt_stats_buf->rxdesense_thresh_sw)); + + stats_req->buf_len = len; +} + +static void +ath12k_htt_print_phy_reset_counters_tlv(const void *tag_buf, u16 tag_len, + struct debug_htt_stats_req *stats_req) +{ + const struct ath12k_htt_phy_reset_counters_tlv *htt_stats_buf = tag_buf; + u32 len = stats_req->buf_len; + u32 buf_len = ATH12K_HTT_STATS_BUF_SIZE; + u8 *buf = stats_req->buf; + + if (tag_len < sizeof(*htt_stats_buf)) + return; + + len += scnprintf(buf + len, buf_len - len, "HTT_PHY_RESET_COUNTERS_TLV:\n"); + len += scnprintf(buf + len, buf_len - len, "pdev_id = %u\n", + le32_to_cpu(htt_stats_buf->pdev_id)); + len += scnprintf(buf + len, buf_len - len, "cf_active_low_fail_cnt = %u\n", + le32_to_cpu(htt_stats_buf->cf_active_low_fail_cnt)); + len += scnprintf(buf + len, buf_len - len, "cf_active_low_pass_cnt = %u\n", + le32_to_cpu(htt_stats_buf->cf_active_low_pass_cnt)); + len += scnprintf(buf + len, buf_len - len, "phy_off_through_vreg_cnt = %u\n", + le32_to_cpu(htt_stats_buf->phy_off_through_vreg_cnt)); + len += scnprintf(buf + len, buf_len - len, "force_calibration_cnt = %u\n", + le32_to_cpu(htt_stats_buf->force_calibration_cnt)); + len += scnprintf(buf + len, buf_len - len, "rf_mode_switch_phy_off_cnt = %u\n", + le32_to_cpu(htt_stats_buf->rf_mode_switch_phy_off_cnt)); + len += scnprintf(buf + len, buf_len - len, "temperature_recal_cnt = %u\n\n", + le32_to_cpu(htt_stats_buf->temperature_recal_cnt)); + + stats_req->buf_len = len; +} + +static void +ath12k_htt_print_phy_tpc_stats_tlv(const void *tag_buf, u16 tag_len, + struct debug_htt_stats_req *stats_req) +{ + const struct ath12k_htt_phy_tpc_stats_tlv *htt_stats_buf = tag_buf; + u32 len = stats_req->buf_len; + u32 buf_len = ATH12K_HTT_STATS_BUF_SIZE; + u8 *buf = stats_req->buf; + + if (tag_len < sizeof(*htt_stats_buf)) + return; + + len += scnprintf(buf + len, buf_len - len, "HTT_PHY_TPC_STATS_TLV:\n"); + len += scnprintf(buf + len, buf_len - len, "pdev_id = %u\n", + le32_to_cpu(htt_stats_buf->pdev_id)); + len += scnprintf(buf + len, buf_len - len, "tx_power_scale = %u\n", + le32_to_cpu(htt_stats_buf->tx_power_scale)); + len += scnprintf(buf + len, buf_len - len, "tx_power_scale_db = %u\n", + le32_to_cpu(htt_stats_buf->tx_power_scale_db)); + len += scnprintf(buf + len, buf_len - len, "min_negative_tx_power = %d\n", + le32_to_cpu(htt_stats_buf->min_negative_tx_power)); + len += scnprintf(buf + len, buf_len - len, "reg_ctl_domain = %u\n", + le32_to_cpu(htt_stats_buf->reg_ctl_domain)); + len += scnprintf(buf + len, buf_len - len, "twice_max_rd_power = %u\n", + le32_to_cpu(htt_stats_buf->twice_max_rd_power)); + len += scnprintf(buf + len, buf_len - len, "max_tx_power = %u\n", + le32_to_cpu(htt_stats_buf->max_tx_power)); + len += scnprintf(buf + len, buf_len - len, "home_max_tx_power = %u\n", + le32_to_cpu(htt_stats_buf->home_max_tx_power)); + len += scnprintf(buf + len, buf_len - len, "psd_power = %d\n", + le32_to_cpu(htt_stats_buf->psd_power)); + len += scnprintf(buf + len, buf_len - len, "eirp_power = %u\n", + le32_to_cpu(htt_stats_buf->eirp_power)); + len += scnprintf(buf + len, buf_len - len, "power_type_6ghz = %u\n", + le32_to_cpu(htt_stats_buf->power_type_6ghz)); + len += print_array_to_buf(buf, len, "max_reg_allowed_power", + htt_stats_buf->max_reg_allowed_power, + ATH12K_HTT_STATS_MAX_CHAINS, "\n"); + len += print_array_to_buf(buf, len, "max_reg_allowed_power_6ghz", + htt_stats_buf->max_reg_allowed_power_6ghz, + ATH12K_HTT_STATS_MAX_CHAINS, "\n"); + len += print_array_to_buf(buf, len, "sub_band_cfreq", + htt_stats_buf->sub_band_cfreq, + ATH12K_HTT_MAX_CH_PWR_INFO_SIZE, "\n"); + len += print_array_to_buf(buf, len, "sub_band_txpower", + htt_stats_buf->sub_band_txpower, + ATH12K_HTT_MAX_CH_PWR_INFO_SIZE, "\n\n"); + + stats_req->buf_len = len; +} + static void ath12k_htt_print_dmac_reset_stats_tlv(const void *tag_buf, u16 tag_len, struct debug_htt_stats_req *stats_req) @@ -2954,6 +3187,21 @@ static int ath12k_dbg_htt_ext_stats_parse(struct ath12k_base *ab, case HTT_STATS_DLPAGER_STATS_TAG: ath12k_htt_print_dlpager_stats_tlv(tag_buf, len, stats_req); break; + case HTT_STATS_PHY_STATS_TAG: + ath12k_htt_print_phy_stats_tlv(tag_buf, len, stats_req); + break; + case HTT_STATS_PHY_COUNTERS_TAG: + ath12k_htt_print_phy_counters_tlv(tag_buf, len, stats_req); + break; + case HTT_STATS_PHY_RESET_STATS_TAG: + ath12k_htt_print_phy_reset_stats_tlv(tag_buf, len, stats_req); + break; + case HTT_STATS_PHY_RESET_COUNTERS_TAG: + ath12k_htt_print_phy_reset_counters_tlv(tag_buf, len, stats_req); + break; + case HTT_STATS_PHY_TPC_STATS_TAG: + ath12k_htt_print_phy_tpc_stats_tlv(tag_buf, len, stats_req); + break; case HTT_STATS_DMAC_RESET_STATS_TAG: ath12k_htt_print_dmac_reset_stats_tlv(tag_buf, len, stats_req); break; diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h index dfb6538585d5..10d7ca9c02f4 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h @@ -136,6 +136,7 @@ enum ath12k_dbg_htt_ext_stats_type { ATH12K_DBG_HTT_EXT_STATS_PDEV_CCA_STATS = 19, ATH12K_DBG_HTT_EXT_STATS_PDEV_OBSS_PD_STATS = 23, ATH12K_DBG_HTT_EXT_STATS_DLPAGER_STATS = 36, + ATH12K_DBG_HTT_EXT_PHY_COUNTERS_AND_PHY_STATS = 37, ATH12K_DBG_HTT_EXT_STATS_SOC_ERROR = 45, ATH12K_DBG_HTT_EXT_STATS_PDEV_SCHED_ALGO = 49, ATH12K_DBG_HTT_EXT_STATS_MANDATORY_MUOFDMA = 51, @@ -196,6 +197,10 @@ enum ath12k_dbg_htt_tlv_tag { HTT_STATS_TX_SELFGEN_AC_SCHED_STATUS_STATS_TAG = 111, HTT_STATS_TX_SELFGEN_AX_SCHED_STATUS_STATS_TAG = 112, HTT_STATS_DLPAGER_STATS_TAG = 120, + HTT_STATS_PHY_COUNTERS_TAG = 121, + HTT_STATS_PHY_STATS_TAG = 122, + HTT_STATS_PHY_RESET_COUNTERS_TAG = 123, + HTT_STATS_PHY_RESET_STATS_TAG = 124, HTT_STATS_MU_PPDU_DIST_TAG = 129, HTT_STATS_TX_PDEV_MUMIMO_GRP_STATS_TAG = 130, HTT_STATS_TX_PDEV_RATE_STATS_BE_OFDMA_TAG = 135, @@ -203,6 +208,7 @@ enum ath12k_dbg_htt_tlv_tag { HTT_STATS_TX_SELFGEN_BE_STATS_TAG = 138, HTT_STATS_TX_SELFGEN_BE_SCHED_STATUS_STATS_TAG = 139, HTT_STATS_DMAC_RESET_STATS_TAG = 155, + HTT_STATS_PHY_TPC_STATS_TAG = 157, HTT_STATS_PDEV_SCHED_ALGO_OFDMA_STATS_TAG = 165, HTT_STATS_MAX_TAG, @@ -1085,6 +1091,98 @@ struct ath12k_htt_dl_pager_stats_tlv { struct ath12k_htt_pgs_info pgs_info[ATH12K_NUM_PG_LOCK_STATE][ATH12K_PAGER_MAX]; } __packed; +#define ATH12K_HTT_STATS_MAX_CHAINS 8 +#define ATH12K_HTT_MAX_RX_PKT_CNT 8 +#define ATH12K_HTT_MAX_RX_PKT_CRC_PASS_CNT 8 +#define ATH12K_HTT_MAX_PER_BLK_ERR_CNT 20 +#define ATH12K_HTT_MAX_RX_OTA_ERR_CNT 14 +#define ATH12K_HTT_MAX_CH_PWR_INFO_SIZE 16 + +struct ath12k_htt_phy_stats_tlv { + a_sle32 nf_chain[ATH12K_HTT_STATS_MAX_CHAINS]; + __le32 false_radar_cnt; + __le32 radar_cs_cnt; + a_sle32 ani_level; + __le32 fw_run_time; + a_sle32 runtime_nf_chain[ATH12K_HTT_STATS_MAX_CHAINS]; +} __packed; + +struct ath12k_htt_phy_counters_tlv { + __le32 rx_ofdma_timing_err_cnt; + __le32 rx_cck_fail_cnt; + __le32 mactx_abort_cnt; + __le32 macrx_abort_cnt; + __le32 phytx_abort_cnt; + __le32 phyrx_abort_cnt; + __le32 phyrx_defer_abort_cnt; + __le32 rx_gain_adj_lstf_event_cnt; + __le32 rx_gain_adj_non_legacy_cnt; + __le32 rx_pkt_cnt[ATH12K_HTT_MAX_RX_PKT_CNT]; + __le32 rx_pkt_crc_pass_cnt[ATH12K_HTT_MAX_RX_PKT_CRC_PASS_CNT]; + __le32 per_blk_err_cnt[ATH12K_HTT_MAX_PER_BLK_ERR_CNT]; + __le32 rx_ota_err_cnt[ATH12K_HTT_MAX_RX_OTA_ERR_CNT]; +} __packed; + +struct ath12k_htt_phy_reset_stats_tlv { + __le32 pdev_id; + __le32 chan_mhz; + __le32 chan_band_center_freq1; + __le32 chan_band_center_freq2; + __le32 chan_phy_mode; + __le32 chan_flags; + __le32 chan_num; + __le32 reset_cause; + __le32 prev_reset_cause; + __le32 phy_warm_reset_src; + __le32 rx_gain_tbl_mode; + __le32 xbar_val; + __le32 force_calibration; + __le32 phyrf_mode; + __le32 phy_homechan; + __le32 phy_tx_ch_mask; + __le32 phy_rx_ch_mask; + __le32 phybb_ini_mask; + __le32 phyrf_ini_mask; + __le32 phy_dfs_en_mask; + __le32 phy_sscan_en_mask; + __le32 phy_synth_sel_mask; + __le32 phy_adfs_freq; + __le32 cck_fir_settings; + __le32 phy_dyn_pri_chan; + __le32 cca_thresh; + __le32 dyn_cca_status; + __le32 rxdesense_thresh_hw; + __le32 rxdesense_thresh_sw; +} __packed; + +struct ath12k_htt_phy_reset_counters_tlv { + __le32 pdev_id; + __le32 cf_active_low_fail_cnt; + __le32 cf_active_low_pass_cnt; + __le32 phy_off_through_vreg_cnt; + __le32 force_calibration_cnt; + __le32 rf_mode_switch_phy_off_cnt; + __le32 temperature_recal_cnt; +} __packed; + +struct ath12k_htt_phy_tpc_stats_tlv { + __le32 pdev_id; + __le32 tx_power_scale; + __le32 tx_power_scale_db; + __le32 min_negative_tx_power; + __le32 reg_ctl_domain; + __le32 max_reg_allowed_power[ATH12K_HTT_STATS_MAX_CHAINS]; + __le32 max_reg_allowed_power_6ghz[ATH12K_HTT_STATS_MAX_CHAINS]; + __le32 twice_max_rd_power; + __le32 max_tx_power; + __le32 home_max_tx_power; + __le32 psd_power; + __le32 eirp_power; + __le32 power_type_6ghz; + __le32 sub_band_cfreq[ATH12K_HTT_MAX_CH_PWR_INFO_SIZE]; + __le32 sub_band_txpower[ATH12K_HTT_MAX_CH_PWR_INFO_SIZE]; +} __packed; + struct ath12k_htt_dmac_reset_stats_tlv { __le32 reset_count; __le32 reset_time_lo_ms; From 3a660e7fa44d556f41cbef6d2430f7227ef3f3ef Mon Sep 17 00:00:00 2001 From: Dinesh Karthikeyan Date: Fri, 15 Nov 2024 11:58:53 +0530 Subject: [PATCH 0032/1386] wifi: ath12k: Support SoC Common Stats Add support to request SoC stat from firmware through HTT stat type 38. This stat gives drop count of SoC. Note: MCC firmware version - WLAN.HMT.1.0-03427-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.15378.4 does not support tag HTT_STATS_SOC_TXRX_STATS_COMMON_TAG(125). Sample output: ------------- echo 38 > /sys/kernel/debug/ath12k/pci-0000\:06\:00.0/mac0/htt_stats_type cat /sys/kernel/debug/ath12k/pci-0000\:06\:00.0/mac0/htt_stats HTT_SOC_COMMON_STATS_TLV: soc_drop_count = 0x0000000000000000 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Signed-off-by: Dinesh Karthikeyan Signed-off-by: Roopni Devanathan Acked-by: Jeff Johnson Acked-by: Kalle Valo Link: https://patch.msgid.link/20241115062854.1919672-4-quic_rdevanat@quicinc.com Signed-off-by: Jeff Johnson --- .../wireless/ath/ath12k/debugfs_htt_stats.c | 26 +++++++++++++++++++ .../wireless/ath/ath12k/debugfs_htt_stats.h | 7 +++++ 2 files changed, 33 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c index 78b9d8efa957..f310e95a65f7 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c @@ -2858,6 +2858,29 @@ ath12k_htt_print_phy_tpc_stats_tlv(const void *tag_buf, u16 tag_len, stats_req->buf_len = len; } +static void +ath12k_htt_print_soc_txrx_stats_common_tlv(const void *tag_buf, u16 tag_len, + struct debug_htt_stats_req *stats_req) +{ + const struct ath12k_htt_t2h_soc_txrx_stats_common_tlv *htt_stats_buf = tag_buf; + u64 drop_count; + u32 len = stats_req->buf_len; + u32 buf_len = ATH12K_HTT_STATS_BUF_SIZE; + u8 *buf = stats_req->buf; + + if (tag_len < sizeof(*htt_stats_buf)) + return; + + drop_count = ath12k_le32hilo_to_u64(htt_stats_buf->inv_peers_msdu_drop_count_hi, + htt_stats_buf->inv_peers_msdu_drop_count_lo); + + len += scnprintf(buf + len, buf_len - len, "HTT_SOC_COMMON_STATS_TLV:\n"); + len += scnprintf(buf + len, buf_len - len, "soc_drop_count = %llu\n\n", + drop_count); + + stats_req->buf_len = len; +} + static void ath12k_htt_print_dmac_reset_stats_tlv(const void *tag_buf, u16 tag_len, struct debug_htt_stats_req *stats_req) @@ -3202,6 +3225,9 @@ static int ath12k_dbg_htt_ext_stats_parse(struct ath12k_base *ab, case HTT_STATS_PHY_TPC_STATS_TAG: ath12k_htt_print_phy_tpc_stats_tlv(tag_buf, len, stats_req); break; + case HTT_STATS_SOC_TXRX_STATS_COMMON_TAG: + ath12k_htt_print_soc_txrx_stats_common_tlv(tag_buf, len, stats_req); + break; case HTT_STATS_DMAC_RESET_STATS_TAG: ath12k_htt_print_dmac_reset_stats_tlv(tag_buf, len, stats_req); break; diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h index 10d7ca9c02f4..c07b60636c22 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h @@ -137,6 +137,7 @@ enum ath12k_dbg_htt_ext_stats_type { ATH12K_DBG_HTT_EXT_STATS_PDEV_OBSS_PD_STATS = 23, ATH12K_DBG_HTT_EXT_STATS_DLPAGER_STATS = 36, ATH12K_DBG_HTT_EXT_PHY_COUNTERS_AND_PHY_STATS = 37, + ATH12K_DBG_HTT_EXT_VDEVS_TXRX_STATS = 38, ATH12K_DBG_HTT_EXT_STATS_SOC_ERROR = 45, ATH12K_DBG_HTT_EXT_STATS_PDEV_SCHED_ALGO = 49, ATH12K_DBG_HTT_EXT_STATS_MANDATORY_MUOFDMA = 51, @@ -201,6 +202,7 @@ enum ath12k_dbg_htt_tlv_tag { HTT_STATS_PHY_STATS_TAG = 122, HTT_STATS_PHY_RESET_COUNTERS_TAG = 123, HTT_STATS_PHY_RESET_STATS_TAG = 124, + HTT_STATS_SOC_TXRX_STATS_COMMON_TAG = 125, HTT_STATS_MU_PPDU_DIST_TAG = 129, HTT_STATS_TX_PDEV_MUMIMO_GRP_STATS_TAG = 130, HTT_STATS_TX_PDEV_RATE_STATS_BE_OFDMA_TAG = 135, @@ -1183,6 +1185,11 @@ struct ath12k_htt_phy_tpc_stats_tlv { __le32 sub_band_txpower[ATH12K_HTT_MAX_CH_PWR_INFO_SIZE]; } __packed; +struct ath12k_htt_t2h_soc_txrx_stats_common_tlv { + __le32 inv_peers_msdu_drop_count_hi; + __le32 inv_peers_msdu_drop_count_lo; +} __packed; + struct ath12k_htt_dmac_reset_stats_tlv { __le32 reset_count; __le32 reset_time_lo_ms; From c8f314703bcbade1bcef8dfee9ffc6a1d66b9d8f Mon Sep 17 00:00:00 2001 From: Dinesh Karthikeyan Date: Fri, 15 Nov 2024 11:58:54 +0530 Subject: [PATCH 0033/1386] wifi: ath12k: Support Transmit PER Rate Stats Add support to request per rate stats through HTT stats type 40. These stats give information about rates of PPDUs and MPDUs for single user and for OFDMA and MUMIMO technologies corresponding to multiple users. Sample output: ------------- echo 40 > /sys/kernel/debug/ath12k/pci-0000\:06\:00.0/mac0/htt_stats_type cat /sys/kernel/debug/ath12k/pci-0000\:06\:00.0/mac0/htt_stats HTT_TX_PER_STATS: PER_STATS_SU: PER per BW: ppdus_tried_su = 0:0 1:0 2:0 3:0 4:0 ppdus_ack_failed_su = 0:0 1:0 2:0 3:0 4:0 mpdus_tried_su = 0:0 1:0 2:0 3:0 4:0 mpdus_failed_su = 0:0 1:0 2:0 3:0 4:0 PER per NSS: ppdus_tried_su = 0:0 1:0 2:0 3:0 4:0 5:0 6:0 7:0 ppdus_ack_failed_su = 0:0 1:0 2:0 3:0 4:0 5:0 6:0 7:0 mpdus_tried_su = 0:0 1:0 2:0 3:0 4:0 5:0 6:0 7:0 mpdus_failed_su = 0:0 1:0 2:0 3:0 4:0 5:0 6:0 7:0 PER per MCS: ppdus_tried_su = 0:0 1:0 2:0 3:0 4:0 5:0 6:0 7:0 8:0 9:0 10:0 11:0 12:0 13:0 ppdus_ack_failed_su = 0:0 1:0 2:0 3:0 4:0 5:0 6:0 7:0 8:0 9:0 10:0 11:0 12:0 13:0 mpdus_tried_su = 0:0 1:0 2:0 3:0 4:0 5:0 6:0 7:0 8:0 9:0 10:0 11:0 12:0 13:0 mpdus_failed_su = 0:0 1:0 2:0 3:0 4:0 5:0 6:0 7:0 8:0 9:0 10:0 11:0 12:0 13:0 ..... Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0-03427-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.15378.4 Signed-off-by: Dinesh Karthikeyan Signed-off-by: Roopni Devanathan Acked-by: Jeff Johnson Acked-by: Kalle Valo Link: https://patch.msgid.link/20241115062854.1919672-5-quic_rdevanat@quicinc.com Signed-off-by: Jeff Johnson --- .../wireless/ath/ath12k/debugfs_htt_stats.c | 267 +++++++++++++++++- .../wireless/ath/ath12k/debugfs_htt_stats.h | 68 ++++- 2 files changed, 332 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c index f310e95a65f7..92ff53767b2f 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c @@ -48,6 +48,28 @@ print_array_to_buf(u8 *buf, u32 offset, const char *header, footer); } +static const char *ath12k_htt_ax_tx_rx_ru_size_to_str(u8 ru_size) +{ + switch (ru_size) { + case ATH12K_HTT_TX_RX_PDEV_STATS_AX_RU_SIZE_26: + return "26"; + case ATH12K_HTT_TX_RX_PDEV_STATS_AX_RU_SIZE_52: + return "52"; + case ATH12K_HTT_TX_RX_PDEV_STATS_AX_RU_SIZE_106: + return "106"; + case ATH12K_HTT_TX_RX_PDEV_STATS_AX_RU_SIZE_242: + return "242"; + case ATH12K_HTT_TX_RX_PDEV_STATS_AX_RU_SIZE_484: + return "484"; + case ATH12K_HTT_TX_RX_PDEV_STATS_AX_RU_SIZE_996: + return "996"; + case ATH12K_HTT_TX_RX_PDEV_STATS_AX_RU_SIZE_996x2: + return "996x2"; + default: + return "unknown"; + } +} + static const char *ath12k_htt_be_tx_rx_ru_size_to_str(u8 ru_size) { switch (ru_size) { @@ -88,6 +110,17 @@ static const char *ath12k_htt_be_tx_rx_ru_size_to_str(u8 ru_size) } } +static const char* +ath12k_tx_ru_size_to_str(enum ath12k_htt_stats_ru_type ru_type, u8 ru_size) +{ + if (ru_type == ATH12K_HTT_STATS_RU_TYPE_SINGLE_RU_ONLY) + return ath12k_htt_ax_tx_rx_ru_size_to_str(ru_size); + else if (ru_type == ATH12K_HTT_STATS_RU_TYPE_SINGLE_AND_MULTI_RU) + return ath12k_htt_be_tx_rx_ru_size_to_str(ru_size); + else + return "unknown"; +} + static void htt_print_tx_pdev_stats_cmn_tlv(const void *tag_buf, u16 tag_len, struct debug_htt_stats_req *stats_req) @@ -2881,6 +2914,235 @@ ath12k_htt_print_soc_txrx_stats_common_tlv(const void *tag_buf, u16 tag_len, stats_req->buf_len = len; } +static void +ath12k_htt_print_tx_per_rate_stats_tlv(const void *tag_buf, u16 tag_len, + struct debug_htt_stats_req *stats_req) +{ + const struct ath12k_htt_tx_per_rate_stats_tlv *stats_buf = tag_buf; + u32 len = stats_req->buf_len; + u32 buf_len = ATH12K_HTT_STATS_BUF_SIZE; + u32 ru_size_cnt = 0; + u32 rc_mode, ru_type; + u8 *buf = stats_req->buf, i; + const char *mode_prefix; + + if (tag_len < sizeof(*stats_buf)) + return; + + rc_mode = le32_to_cpu(stats_buf->rc_mode); + ru_type = le32_to_cpu(stats_buf->ru_type); + + switch (rc_mode) { + case ATH12K_HTT_STATS_RC_MODE_DLSU: + len += scnprintf(buf + len, buf_len - len, "HTT_TX_PER_STATS:\n"); + len += scnprintf(buf + len, buf_len - len, "\nPER_STATS_SU:\n"); + mode_prefix = "su"; + break; + case ATH12K_HTT_STATS_RC_MODE_DLMUMIMO: + len += scnprintf(buf + len, buf_len - len, "\nPER_STATS_DL_MUMIMO:\n"); + mode_prefix = "mu"; + break; + case ATH12K_HTT_STATS_RC_MODE_DLOFDMA: + len += scnprintf(buf + len, buf_len - len, "\nPER_STATS_DL_OFDMA:\n"); + mode_prefix = "ofdma"; + if (ru_type == ATH12K_HTT_STATS_RU_TYPE_SINGLE_RU_ONLY) + ru_size_cnt = ATH12K_HTT_TX_RX_PDEV_STATS_NUM_AX_RU_SIZE_CNTRS; + else if (ru_type == ATH12K_HTT_STATS_RU_TYPE_SINGLE_AND_MULTI_RU) + ru_size_cnt = ATH12K_HTT_TX_RX_PDEV_NUM_BE_RU_SIZE_CNTRS; + break; + case ATH12K_HTT_STATS_RC_MODE_ULMUMIMO: + len += scnprintf(buf + len, buf_len - len, "HTT_RX_PER_STATS:\n"); + len += scnprintf(buf + len, buf_len - len, "\nPER_STATS_UL_MUMIMO:\n"); + mode_prefix = "ulmu"; + break; + case ATH12K_HTT_STATS_RC_MODE_ULOFDMA: + len += scnprintf(buf + len, buf_len - len, "\nPER_STATS_UL_OFDMA:\n"); + mode_prefix = "ulofdma"; + if (ru_type == ATH12K_HTT_STATS_RU_TYPE_SINGLE_RU_ONLY) + ru_size_cnt = ATH12K_HTT_TX_RX_PDEV_STATS_NUM_AX_RU_SIZE_CNTRS; + else if (ru_type == ATH12K_HTT_STATS_RU_TYPE_SINGLE_AND_MULTI_RU) + ru_size_cnt = ATH12K_HTT_TX_RX_PDEV_NUM_BE_RU_SIZE_CNTRS; + break; + default: + return; + } + + len += scnprintf(buf + len, buf_len - len, "\nPER per BW:\n"); + if (rc_mode == ATH12K_HTT_STATS_RC_MODE_ULOFDMA || + rc_mode == ATH12K_HTT_STATS_RC_MODE_ULMUMIMO) + len += scnprintf(buf + len, buf_len - len, "data_ppdus_%s = ", + mode_prefix); + else + len += scnprintf(buf + len, buf_len - len, "ppdus_tried_%s = ", + mode_prefix); + for (i = 0; i < ATH12K_HTT_TX_PDEV_STATS_NUM_BW_CNTRS; i++) + len += scnprintf(buf + len, buf_len - len, " %u:%u ", i, + le32_to_cpu(stats_buf->per_bw[i].ppdus_tried)); + len += scnprintf(buf + len, buf_len - len, " %u:%u\n", i, + le32_to_cpu(stats_buf->per_bw320.ppdus_tried)); + + if (rc_mode == ATH12K_HTT_STATS_RC_MODE_ULOFDMA || + rc_mode == ATH12K_HTT_STATS_RC_MODE_ULMUMIMO) + len += scnprintf(buf + len, buf_len - len, "non_data_ppdus_%s = ", + mode_prefix); + else + len += scnprintf(buf + len, buf_len - len, "ppdus_ack_failed_%s = ", + mode_prefix); + for (i = 0; i < ATH12K_HTT_TX_PDEV_STATS_NUM_BW_CNTRS; i++) + len += scnprintf(buf + len, buf_len - len, " %u:%u ", i, + le32_to_cpu(stats_buf->per_bw[i].ppdus_ack_failed)); + len += scnprintf(buf + len, buf_len - len, " %u:%u\n", i, + le32_to_cpu(stats_buf->per_bw320.ppdus_ack_failed)); + + len += scnprintf(buf + len, buf_len - len, "mpdus_tried_%s = ", mode_prefix); + for (i = 0; i < ATH12K_HTT_TX_PDEV_STATS_NUM_BW_CNTRS; i++) + len += scnprintf(buf + len, buf_len - len, " %u:%u ", i, + le32_to_cpu(stats_buf->per_bw[i].mpdus_tried)); + len += scnprintf(buf + len, buf_len - len, " %u:%u\n", i, + le32_to_cpu(stats_buf->per_bw320.mpdus_tried)); + + len += scnprintf(buf + len, buf_len - len, "mpdus_failed_%s = ", mode_prefix); + for (i = 0; i < ATH12K_HTT_TX_PDEV_STATS_NUM_BW_CNTRS; i++) + len += scnprintf(buf + len, buf_len - len, " %u:%u", i, + le32_to_cpu(stats_buf->per_bw[i].mpdus_failed)); + len += scnprintf(buf + len, buf_len - len, " %u:%u\n", i, + le32_to_cpu(stats_buf->per_bw320.mpdus_failed)); + + len += scnprintf(buf + len, buf_len - len, "\nPER per NSS:\n"); + if (rc_mode == ATH12K_HTT_STATS_RC_MODE_ULOFDMA || + rc_mode == ATH12K_HTT_STATS_RC_MODE_ULMUMIMO) + len += scnprintf(buf + len, buf_len - len, "data_ppdus_%s = ", + mode_prefix); + else + len += scnprintf(buf + len, buf_len - len, "ppdus_tried_%s = ", + mode_prefix); + for (i = 0; i < ATH12K_HTT_PDEV_STAT_NUM_SPATIAL_STREAMS; i++) + len += scnprintf(buf + len, buf_len - len, " %u:%u ", i + 1, + le32_to_cpu(stats_buf->per_nss[i].ppdus_tried)); + len += scnprintf(buf + len, buf_len - len, "\n"); + + if (rc_mode == ATH12K_HTT_STATS_RC_MODE_ULOFDMA || + rc_mode == ATH12K_HTT_STATS_RC_MODE_ULMUMIMO) + len += scnprintf(buf + len, buf_len - len, "non_data_ppdus_%s = ", + mode_prefix); + else + len += scnprintf(buf + len, buf_len - len, "ppdus_ack_failed_%s = ", + mode_prefix); + for (i = 0; i < ATH12K_HTT_PDEV_STAT_NUM_SPATIAL_STREAMS; i++) + len += scnprintf(buf + len, buf_len - len, " %u:%u ", i + 1, + le32_to_cpu(stats_buf->per_nss[i].ppdus_ack_failed)); + len += scnprintf(buf + len, buf_len - len, "\n"); + + len += scnprintf(buf + len, buf_len - len, "mpdus_tried_%s = ", mode_prefix); + for (i = 0; i < ATH12K_HTT_PDEV_STAT_NUM_SPATIAL_STREAMS; i++) + len += scnprintf(buf + len, buf_len - len, " %u:%u ", i + 1, + le32_to_cpu(stats_buf->per_nss[i].mpdus_tried)); + len += scnprintf(buf + len, buf_len - len, "\n"); + + len += scnprintf(buf + len, buf_len - len, "mpdus_failed_%s = ", mode_prefix); + for (i = 0; i < ATH12K_HTT_PDEV_STAT_NUM_SPATIAL_STREAMS; i++) + len += scnprintf(buf + len, buf_len - len, " %u:%u ", i + 1, + le32_to_cpu(stats_buf->per_nss[i].mpdus_failed)); + len += scnprintf(buf + len, buf_len - len, "\n"); + + len += scnprintf(buf + len, buf_len - len, "\nPER per MCS:\n"); + if (rc_mode == ATH12K_HTT_STATS_RC_MODE_ULOFDMA || + rc_mode == ATH12K_HTT_STATS_RC_MODE_ULMUMIMO) + len += scnprintf(buf + len, buf_len - len, "data_ppdus_%s = ", + mode_prefix); + else + len += scnprintf(buf + len, buf_len - len, "ppdus_tried_%s = ", + mode_prefix); + for (i = 0; i < ATH12K_HTT_TXBF_RATE_STAT_NUM_MCS_CNTRS; i++) + len += scnprintf(buf + len, buf_len - len, " %u:%u ", i, + le32_to_cpu(stats_buf->per_mcs[i].ppdus_tried)); + len += scnprintf(buf + len, buf_len - len, "\n"); + + if (rc_mode == ATH12K_HTT_STATS_RC_MODE_ULOFDMA || + rc_mode == ATH12K_HTT_STATS_RC_MODE_ULMUMIMO) + len += scnprintf(buf + len, buf_len - len, "non_data_ppdus_%s = ", + mode_prefix); + else + len += scnprintf(buf + len, buf_len - len, "ppdus_ack_failed_%s = ", + mode_prefix); + for (i = 0; i < ATH12K_HTT_TXBF_RATE_STAT_NUM_MCS_CNTRS; i++) + len += scnprintf(buf + len, buf_len - len, " %u:%u ", i, + le32_to_cpu(stats_buf->per_mcs[i].ppdus_ack_failed)); + len += scnprintf(buf + len, buf_len - len, "\n"); + + len += scnprintf(buf + len, buf_len - len, "mpdus_tried_%s = ", mode_prefix); + for (i = 0; i < ATH12K_HTT_TXBF_RATE_STAT_NUM_MCS_CNTRS; i++) + len += scnprintf(buf + len, buf_len - len, " %u:%u ", i, + le32_to_cpu(stats_buf->per_mcs[i].mpdus_tried)); + len += scnprintf(buf + len, buf_len - len, "\n"); + + len += scnprintf(buf + len, buf_len - len, "mpdus_failed_%s = ", mode_prefix); + for (i = 0; i < ATH12K_HTT_TXBF_RATE_STAT_NUM_MCS_CNTRS; i++) + len += scnprintf(buf + len, buf_len - len, " %u:%u ", i, + le32_to_cpu(stats_buf->per_mcs[i].mpdus_failed)); + len += scnprintf(buf + len, buf_len - len, "\n"); + + if ((rc_mode == ATH12K_HTT_STATS_RC_MODE_DLOFDMA || + rc_mode == ATH12K_HTT_STATS_RC_MODE_ULOFDMA) && + ru_type != ATH12K_HTT_STATS_RU_TYPE_INVALID) { + len += scnprintf(buf + len, buf_len - len, "\nPER per RU:\n"); + + if (rc_mode == ATH12K_HTT_STATS_RC_MODE_ULOFDMA) + len += scnprintf(buf + len, buf_len - len, "data_ppdus_%s = ", + mode_prefix); + else + len += scnprintf(buf + len, buf_len - len, "ppdus_tried_%s = ", + mode_prefix); + for (i = 0; i < ru_size_cnt; i++) + len += scnprintf(buf + len, buf_len - len, " %s:%u ", + ath12k_tx_ru_size_to_str(ru_type, i), + le32_to_cpu(stats_buf->ru[i].ppdus_tried)); + len += scnprintf(buf + len, buf_len - len, "\n"); + + if (rc_mode == ATH12K_HTT_STATS_RC_MODE_ULOFDMA) + len += scnprintf(buf + len, buf_len - len, + "non_data_ppdus_%s = ", mode_prefix); + else + len += scnprintf(buf + len, buf_len - len, + "ppdus_ack_failed_%s = ", mode_prefix); + for (i = 0; i < ru_size_cnt; i++) + len += scnprintf(buf + len, buf_len - len, " %s:%u ", + ath12k_tx_ru_size_to_str(ru_type, i), + le32_to_cpu(stats_buf->ru[i].ppdus_ack_failed)); + len += scnprintf(buf + len, buf_len - len, "\n"); + + len += scnprintf(buf + len, buf_len - len, "mpdus_tried_%s = ", + mode_prefix); + for (i = 0; i < ru_size_cnt; i++) + len += scnprintf(buf + len, buf_len - len, " %s:%u ", + ath12k_tx_ru_size_to_str(ru_type, i), + le32_to_cpu(stats_buf->ru[i].mpdus_tried)); + len += scnprintf(buf + len, buf_len - len, "\n"); + + len += scnprintf(buf + len, buf_len - len, "mpdus_failed_%s = ", + mode_prefix); + for (i = 0; i < ru_size_cnt; i++) + len += scnprintf(buf + len, buf_len - len, " %s:%u ", + ath12k_tx_ru_size_to_str(ru_type, i), + le32_to_cpu(stats_buf->ru[i].mpdus_failed)); + len += scnprintf(buf + len, buf_len - len, "\n\n"); + } + + if (rc_mode == ATH12K_HTT_STATS_RC_MODE_DLMUMIMO) { + len += scnprintf(buf + len, buf_len - len, "\nlast_probed_bw = %u\n", + le32_to_cpu(stats_buf->last_probed_bw)); + len += scnprintf(buf + len, buf_len - len, "last_probed_nss = %u\n", + le32_to_cpu(stats_buf->last_probed_nss)); + len += scnprintf(buf + len, buf_len - len, "last_probed_mcs = %u\n", + le32_to_cpu(stats_buf->last_probed_mcs)); + len += print_array_to_buf(buf, len, "MU Probe count per RC MODE", + stats_buf->probe_cnt, + ATH12K_HTT_RC_MODE_2D_COUNT, "\n\n"); + } + + stats_req->buf_len = len; +} + static void ath12k_htt_print_dmac_reset_stats_tlv(const void *tag_buf, u16 tag_len, struct debug_htt_stats_req *stats_req) @@ -3018,7 +3280,7 @@ ath12k_htt_print_tx_pdev_rate_stats_be_ofdma_tlv(const void *tag_buf, u16 tag_le len += scnprintf(buf + len, buf_len - len, "\n"); len += print_array_to_buf_index(buf, len, "be_ofdma_tx_nss = ", 1, htt_stats_buf->be_ofdma_tx_nss, - ATH12K_HTT_TX_PDEV_STATS_NUM_SPATIAL_STREAMS, + ATH12K_HTT_PDEV_STAT_NUM_SPATIAL_STREAMS, "\n"); len += print_array_to_buf(buf, len, "be_ofdma_tx_bw", htt_stats_buf->be_ofdma_tx_bw, @@ -3228,6 +3490,9 @@ static int ath12k_dbg_htt_ext_stats_parse(struct ath12k_base *ab, case HTT_STATS_SOC_TXRX_STATS_COMMON_TAG: ath12k_htt_print_soc_txrx_stats_common_tlv(tag_buf, len, stats_req); break; + case HTT_STATS_PER_RATE_STATS_TAG: + ath12k_htt_print_tx_per_rate_stats_tlv(tag_buf, len, stats_req); + break; case HTT_STATS_DMAC_RESET_STATS_TAG: ath12k_htt_print_dmac_reset_stats_tlv(tag_buf, len, stats_req); break; diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h index c07b60636c22..d199bed0a9d1 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h @@ -138,6 +138,7 @@ enum ath12k_dbg_htt_ext_stats_type { ATH12K_DBG_HTT_EXT_STATS_DLPAGER_STATS = 36, ATH12K_DBG_HTT_EXT_PHY_COUNTERS_AND_PHY_STATS = 37, ATH12K_DBG_HTT_EXT_VDEVS_TXRX_STATS = 38, + ATH12K_DBG_HTT_EXT_PDEV_PER_STATS = 40, ATH12K_DBG_HTT_EXT_STATS_SOC_ERROR = 45, ATH12K_DBG_HTT_EXT_STATS_PDEV_SCHED_ALGO = 49, ATH12K_DBG_HTT_EXT_STATS_MANDATORY_MUOFDMA = 51, @@ -203,6 +204,7 @@ enum ath12k_dbg_htt_tlv_tag { HTT_STATS_PHY_RESET_COUNTERS_TAG = 123, HTT_STATS_PHY_RESET_STATS_TAG = 124, HTT_STATS_SOC_TXRX_STATS_COMMON_TAG = 125, + HTT_STATS_PER_RATE_STATS_TAG = 128, HTT_STATS_MU_PPDU_DIST_TAG = 129, HTT_STATS_TX_PDEV_MUMIMO_GRP_STATS_TAG = 130, HTT_STATS_TX_PDEV_RATE_STATS_BE_OFDMA_TAG = 135, @@ -1221,6 +1223,10 @@ struct ath12k_htt_pdev_sched_algo_ofdma_stats_tlv { __le32 dlofdma_disabled_consec_no_mpdus_success[ATH12K_HTT_NUM_AC_WMM]; } __packed; +#define ATH12K_HTT_TX_PDEV_STATS_NUM_BW_CNTRS 4 +#define ATH12K_HTT_PDEV_STAT_NUM_SPATIAL_STREAMS 8 +#define ATH12K_HTT_TXBF_RATE_STAT_NUM_MCS_CNTRS 14 + enum ATH12K_HTT_TX_RX_PDEV_STATS_BE_RU_SIZE { ATH12K_HTT_TX_RX_PDEV_STATS_BE_RU_SIZE_26, ATH12K_HTT_TX_RX_PDEV_STATS_BE_RU_SIZE_52, @@ -1241,7 +1247,65 @@ enum ATH12K_HTT_TX_RX_PDEV_STATS_BE_RU_SIZE { ATH12K_HTT_TX_RX_PDEV_NUM_BE_RU_SIZE_CNTRS, }; -#define ATH12K_HTT_TX_PDEV_STATS_NUM_SPATIAL_STREAMS 8 +enum ATH12K_HTT_RC_MODE { + ATH12K_HTT_RC_MODE_SU_OL, + ATH12K_HTT_RC_MODE_SU_BF, + ATH12K_HTT_RC_MODE_MU1_INTF, + ATH12K_HTT_RC_MODE_MU2_INTF, + ATH12K_HTT_RC_MODE_MU3_INTF, + ATH12K_HTT_RC_MODE_MU4_INTF, + ATH12K_HTT_RC_MODE_MU5_INTF, + ATH12K_HTT_RC_MODE_MU6_INTF, + ATH12K_HTT_RC_MODE_MU7_INTF, + ATH12K_HTT_RC_MODE_2D_COUNT +}; + +enum ATH12K_HTT_TX_RX_PDEV_STATS_AX_RU_SIZE { + ATH12K_HTT_TX_RX_PDEV_STATS_AX_RU_SIZE_26, + ATH12K_HTT_TX_RX_PDEV_STATS_AX_RU_SIZE_52, + ATH12K_HTT_TX_RX_PDEV_STATS_AX_RU_SIZE_106, + ATH12K_HTT_TX_RX_PDEV_STATS_AX_RU_SIZE_242, + ATH12K_HTT_TX_RX_PDEV_STATS_AX_RU_SIZE_484, + ATH12K_HTT_TX_RX_PDEV_STATS_AX_RU_SIZE_996, + ATH12K_HTT_TX_RX_PDEV_STATS_AX_RU_SIZE_996x2, + ATH12K_HTT_TX_RX_PDEV_STATS_NUM_AX_RU_SIZE_CNTRS +}; + +enum ath12k_htt_stats_rc_mode { + ATH12K_HTT_STATS_RC_MODE_DLSU = 0, + ATH12K_HTT_STATS_RC_MODE_DLMUMIMO = 1, + ATH12K_HTT_STATS_RC_MODE_DLOFDMA = 2, + ATH12K_HTT_STATS_RC_MODE_ULMUMIMO = 3, + ATH12K_HTT_STATS_RC_MODE_ULOFDMA = 4, +}; + +enum ath12k_htt_stats_ru_type { + ATH12K_HTT_STATS_RU_TYPE_INVALID, + ATH12K_HTT_STATS_RU_TYPE_SINGLE_RU_ONLY, + ATH12K_HTT_STATS_RU_TYPE_SINGLE_AND_MULTI_RU, +}; + +struct ath12k_htt_tx_rate_stats { + __le32 ppdus_tried; + __le32 ppdus_ack_failed; + __le32 mpdus_tried; + __le32 mpdus_failed; +} __packed; + +struct ath12k_htt_tx_per_rate_stats_tlv { + __le32 rc_mode; + __le32 last_probed_mcs; + __le32 last_probed_nss; + __le32 last_probed_bw; + struct ath12k_htt_tx_rate_stats per_bw[ATH12K_HTT_TX_PDEV_STATS_NUM_BW_CNTRS]; + struct ath12k_htt_tx_rate_stats per_nss[ATH12K_HTT_PDEV_STAT_NUM_SPATIAL_STREAMS]; + struct ath12k_htt_tx_rate_stats per_mcs[ATH12K_HTT_TXBF_RATE_STAT_NUM_MCS_CNTRS]; + struct ath12k_htt_tx_rate_stats per_bw320; + __le32 probe_cnt[ATH12K_HTT_RC_MODE_2D_COUNT]; + __le32 ru_type; + struct ath12k_htt_tx_rate_stats ru[ATH12K_HTT_TX_RX_PDEV_NUM_BE_RU_SIZE_CNTRS]; +} __packed; + #define ATH12K_HTT_TX_PDEV_NUM_BE_MCS_CNTRS 16 #define ATH12K_HTT_TX_PDEV_NUM_BE_BW_CNTRS 5 #define ATH12K_HTT_TX_PDEV_NUM_EHT_SIG_MCS_CNTRS 4 @@ -1251,7 +1315,7 @@ struct ath12k_htt_tx_pdev_rate_stats_be_ofdma_tlv { __le32 mac_id__word; __le32 be_ofdma_tx_ldpc; __le32 be_ofdma_tx_mcs[ATH12K_HTT_TX_PDEV_NUM_BE_MCS_CNTRS]; - __le32 be_ofdma_tx_nss[ATH12K_HTT_TX_PDEV_STATS_NUM_SPATIAL_STREAMS]; + __le32 be_ofdma_tx_nss[ATH12K_HTT_PDEV_STAT_NUM_SPATIAL_STREAMS]; __le32 be_ofdma_tx_bw[ATH12K_HTT_TX_PDEV_NUM_BE_BW_CNTRS]; __le32 gi[ATH12K_HTT_TX_PDEV_NUM_GI_CNTRS][ATH12K_HTT_TX_PDEV_NUM_BE_MCS_CNTRS]; __le32 be_ofdma_tx_ru_size[ATH12K_HTT_TX_RX_PDEV_NUM_BE_RU_SIZE_CNTRS]; From ea58aae8458480d6391c404e62f1a4b30462b9c3 Mon Sep 17 00:00:00 2001 From: Sidhanta Sahu Date: Tue, 5 Nov 2024 10:58:54 +0530 Subject: [PATCH 0034/1386] wifi: ath12k: Support MBSSID Control Frame Stats Add support to request MBSSID control frame stats from firmware through HTT stats type 54. These stats give information such as basic trigger, BSR trigger, multi-user RTS and uplink MUMIMO trigger within and across various BSS. Note: WCN7850 firmware version - WLAN.HMT.1.0-03427-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.15378.4 does not support HTT stats type 54. Sample output: ------------- echo 54 > /sys/kernel/debug/ath12k/pci-0000\:06\:00.0/mac0/htt_stats_type cat /sys/kernel/debug/ath12k/pci-0000\:06\:00.0/mac0/htt_stats HTT_MBSSID_CTRL_FRAME_STATS_TLV: mac_id = 0 basic_trigger_across_bss = 0 basic_trigger_within_bss = 0 bsr_trigger_across_bss = 0 bsr_trigger_within_bss = 0 mu_rts_across_bss = 0 mu_rts_within_bss = 0 ul_mumimo_trigger_across_bss = 0 ul_mumimo_trigger_within_bss = 0 Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.1.1-00214-QCAHKSWPL_SILICONZ-1 Signed-off-by: Sidhanta Sahu Signed-off-by: Roopni Devanathan Acked-by: Kalle Valo Link: https://patch.msgid.link/20241105052854.2118987-1-quic_rdevanat@quicinc.com Signed-off-by: Jeff Johnson --- .../wireless/ath/ath12k/debugfs_htt_stats.c | 43 +++++++++++++++++++ .../wireless/ath/ath12k/debugfs_htt_stats.h | 14 ++++++ 2 files changed, 57 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c index 92ff53767b2f..0bc945c7a93a 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c @@ -3296,6 +3296,45 @@ ath12k_htt_print_tx_pdev_rate_stats_be_ofdma_tlv(const void *tag_buf, u16 tag_le stats_req->buf_len = len; } +static void +ath12k_htt_print_pdev_mbssid_ctrl_frame_stats_tlv(const void *tag_buf, u16 tag_len, + struct debug_htt_stats_req *stats_req) +{ + const struct ath12k_htt_pdev_mbssid_ctrl_frame_tlv *htt_stats_buf = tag_buf; + u8 *buf = stats_req->buf; + u32 len = stats_req->buf_len; + u32 buf_len = ATH12K_HTT_STATS_BUF_SIZE; + u32 mac_id_word; + + if (tag_len < sizeof(*htt_stats_buf)) + return; + + mac_id_word = le32_to_cpu(htt_stats_buf->mac_id__word); + + len += scnprintf(buf + len, buf_len - len, "HTT_MBSSID_CTRL_FRAME_STATS_TLV:\n"); + len += scnprintf(buf + len, buf_len - len, "mac_id = %u\n", + u32_get_bits(mac_id_word, ATH12K_HTT_STATS_MAC_ID)); + len += scnprintf(buf + len, buf_len - len, "basic_trigger_across_bss = %u\n", + le32_to_cpu(htt_stats_buf->basic_trigger_across_bss)); + len += scnprintf(buf + len, buf_len - len, "basic_trigger_within_bss = %u\n", + le32_to_cpu(htt_stats_buf->basic_trigger_within_bss)); + len += scnprintf(buf + len, buf_len - len, "bsr_trigger_across_bss = %u\n", + le32_to_cpu(htt_stats_buf->bsr_trigger_across_bss)); + len += scnprintf(buf + len, buf_len - len, "bsr_trigger_within_bss = %u\n", + le32_to_cpu(htt_stats_buf->bsr_trigger_within_bss)); + len += scnprintf(buf + len, buf_len - len, "mu_rts_across_bss = %u\n", + le32_to_cpu(htt_stats_buf->mu_rts_across_bss)); + len += scnprintf(buf + len, buf_len - len, "mu_rts_within_bss = %u\n", + le32_to_cpu(htt_stats_buf->mu_rts_within_bss)); + len += scnprintf(buf + len, buf_len - len, "ul_mumimo_trigger_across_bss = %u\n", + le32_to_cpu(htt_stats_buf->ul_mumimo_trigger_across_bss)); + len += scnprintf(buf + len, buf_len - len, + "ul_mumimo_trigger_within_bss = %u\n\n", + le32_to_cpu(htt_stats_buf->ul_mumimo_trigger_within_bss)); + + stats_req->buf_len = len; +} + static int ath12k_dbg_htt_ext_stats_parse(struct ath12k_base *ab, u16 tag, u16 len, const void *tag_buf, void *user_data) @@ -3502,6 +3541,10 @@ static int ath12k_dbg_htt_ext_stats_parse(struct ath12k_base *ab, case HTT_STATS_TX_PDEV_RATE_STATS_BE_OFDMA_TAG: ath12k_htt_print_tx_pdev_rate_stats_be_ofdma_tlv(tag_buf, len, stats_req); break; + case HTT_STATS_PDEV_MBSSID_CTRL_FRAME_STATS_TAG: + ath12k_htt_print_pdev_mbssid_ctrl_frame_stats_tlv(tag_buf, len, + stats_req); + break; default: break; } diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h index d199bed0a9d1..cf3c88f8d1b2 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.h @@ -142,6 +142,7 @@ enum ath12k_dbg_htt_ext_stats_type { ATH12K_DBG_HTT_EXT_STATS_SOC_ERROR = 45, ATH12K_DBG_HTT_EXT_STATS_PDEV_SCHED_ALGO = 49, ATH12K_DBG_HTT_EXT_STATS_MANDATORY_MUOFDMA = 51, + ATH12K_DGB_HTT_EXT_STATS_PDEV_MBSSID_CTRL_FRAME = 54, /* keep this last */ ATH12K_DBG_HTT_NUM_EXT_STATS, @@ -214,6 +215,7 @@ enum ath12k_dbg_htt_tlv_tag { HTT_STATS_DMAC_RESET_STATS_TAG = 155, HTT_STATS_PHY_TPC_STATS_TAG = 157, HTT_STATS_PDEV_SCHED_ALGO_OFDMA_STATS_TAG = 165, + HTT_STATS_PDEV_MBSSID_CTRL_FRAME_STATS_TAG = 176, HTT_STATS_MAX_TAG, }; @@ -1322,4 +1324,16 @@ struct ath12k_htt_tx_pdev_rate_stats_be_ofdma_tlv { __le32 be_ofdma_eht_sig_mcs[ATH12K_HTT_TX_PDEV_NUM_EHT_SIG_MCS_CNTRS]; } __packed; +struct ath12k_htt_pdev_mbssid_ctrl_frame_tlv { + __le32 mac_id__word; + __le32 basic_trigger_across_bss; + __le32 basic_trigger_within_bss; + __le32 bsr_trigger_across_bss; + __le32 bsr_trigger_within_bss; + __le32 mu_rts_across_bss; + __le32 mu_rts_within_bss; + __le32 ul_mumimo_trigger_across_bss; + __le32 ul_mumimo_trigger_within_bss; +} __packed; + #endif From 82a35723a67c29f685d7b518962154a73b7163a2 Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Thu, 14 Nov 2024 17:46:08 +0200 Subject: [PATCH 0035/1386] wifi: rtw88: usb: Support USB 3 with RTL8812AU Add the function to automatically switch the RTL8812AU into USB 3 mode. Signed-off-by: Bitterblue Smith Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/19cda72b-f1f1-4b69-8369-0e4376b646bf@gmail.com --- drivers/net/wireless/realtek/rtw88/usb.c | 44 ++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c index 8d6db68246f1..db1769a4b617 100644 --- a/drivers/net/wireless/realtek/rtw88/usb.c +++ b/drivers/net/wireless/realtek/rtw88/usb.c @@ -930,6 +930,32 @@ static void rtw_usb_intf_deinit(struct rtw_dev *rtwdev, usb_set_intfdata(intf, NULL); } +static int rtw_usb_switch_mode_old(struct rtw_dev *rtwdev) +{ + struct rtw_usb *rtwusb = rtw_get_usb_priv(rtwdev); + enum usb_device_speed cur_speed = rtwusb->udev->speed; + u8 hci_opt; + + if (cur_speed == USB_SPEED_HIGH) { + hci_opt = rtw_read8(rtwdev, REG_HCI_OPT_CTRL); + + if ((hci_opt & (BIT(2) | BIT(3))) != BIT(3)) { + rtw_write8(rtwdev, REG_HCI_OPT_CTRL, 0x8); + rtw_write8(rtwdev, REG_SYS_SDIO_CTRL, 0x2); + rtw_write8(rtwdev, REG_ACLK_MON, 0x1); + rtw_write8(rtwdev, 0x3d, 0x3); + /* usb disconnect */ + rtw_write8(rtwdev, REG_SYS_PW_CTRL + 1, 0x80); + return 1; + } + } else if (cur_speed == USB_SPEED_SUPER) { + rtw_write8_clr(rtwdev, REG_SYS_SDIO_CTRL, BIT(1)); + rtw_write8_clr(rtwdev, REG_ACLK_MON, BIT(0)); + } + + return 0; +} + static int rtw_usb_switch_mode_new(struct rtw_dev *rtwdev) { enum usb_device_speed cur_speed; @@ -979,11 +1005,22 @@ static int rtw_usb_switch_mode_new(struct rtw_dev *rtwdev) return 1; } +static bool rtw_usb3_chip_old(u8 chip_id) +{ + return chip_id == RTW_CHIP_TYPE_8812A; +} + +static bool rtw_usb3_chip_new(u8 chip_id) +{ + return chip_id == RTW_CHIP_TYPE_8822C || + chip_id == RTW_CHIP_TYPE_8822B; +} + static int rtw_usb_switch_mode(struct rtw_dev *rtwdev) { u8 id = rtwdev->chip->id; - if (id != RTW_CHIP_TYPE_8822C && id != RTW_CHIP_TYPE_8822B) + if (!rtw_usb3_chip_new(id) && !rtw_usb3_chip_old(id)) return 0; if (!rtwdev->efuse.usb_mode_switch) { @@ -998,7 +1035,10 @@ static int rtw_usb_switch_mode(struct rtw_dev *rtwdev) return 0; } - return rtw_usb_switch_mode_new(rtwdev); + if (rtw_usb3_chip_old(id)) + return rtw_usb_switch_mode_old(rtwdev); + else + return rtw_usb_switch_mode_new(rtwdev); } int rtw_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) From ce5dea83ee8f945203144fb891fdcb978216e45a Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Thu, 14 Nov 2024 17:48:09 +0200 Subject: [PATCH 0036/1386] wifi: rtw88: usb: Enable RX aggregation for 8821au/8812au USB RX aggregation improves the RX speed on certain ARM systems, like the NanoPi NEO Core2. With RTL8811AU, before: 30 Mbps, after: 224 Mbps. The out-of-tree driver uses aggregation size of 7 in USB 3 mode, but that doesn't work here. rtw88 advertises support for receiving AMSDU in AMPDU, so the AP sends larger frames, up to ~5100 bytes. With a size of 7 RTL8812AU frequently tries to aggregate more frames than will fit in 32768 bytes. Use a size of 6 instead. Signed-off-by: Bitterblue Smith Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/63012163-a425-4b15-b830-43f279c06b73@gmail.com --- drivers/net/wireless/realtek/rtw88/usb.c | 28 ++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c index db1769a4b617..be193c7add77 100644 --- a/drivers/net/wireless/realtek/rtw88/usb.c +++ b/drivers/net/wireless/realtek/rtw88/usb.c @@ -789,6 +789,30 @@ static void rtw_usb_dynamic_rx_agg_v1(struct rtw_dev *rtwdev, bool enable) rtw_write16(rtwdev, REG_RXDMA_AGG_PG_TH, val16); } +static void rtw_usb_dynamic_rx_agg_v2(struct rtw_dev *rtwdev, bool enable) +{ + struct rtw_usb *rtwusb = rtw_get_usb_priv(rtwdev); + u8 size, timeout; + u16 val16; + + if (!enable) { + size = 0x0; + timeout = 0x1; + } else if (rtwusb->udev->speed == USB_SPEED_SUPER) { + size = 0x6; + timeout = 0x1a; + } else { + size = 0x5; + timeout = 0x20; + } + + val16 = u16_encode_bits(size, BIT_RXDMA_AGG_PG_TH) | + u16_encode_bits(timeout, BIT_DMA_AGG_TO_V1); + + rtw_write16(rtwdev, REG_RXDMA_AGG_PG_TH, val16); + rtw_write8_set(rtwdev, REG_TXDMA_PQ_MAP, BIT_RXDMA_AGG_EN); +} + static void rtw_usb_dynamic_rx_agg(struct rtw_dev *rtwdev, bool enable) { switch (rtwdev->chip->id) { @@ -797,6 +821,10 @@ static void rtw_usb_dynamic_rx_agg(struct rtw_dev *rtwdev, bool enable) case RTW_CHIP_TYPE_8821C: rtw_usb_dynamic_rx_agg_v1(rtwdev, enable); break; + case RTW_CHIP_TYPE_8821A: + case RTW_CHIP_TYPE_8812A: + rtw_usb_dynamic_rx_agg_v2(rtwdev, enable); + break; case RTW_CHIP_TYPE_8723D: /* Doesn't like aggregation. */ break; From 1cfa6d4e5bd9bfb15d165d8d843163363929ba1b Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Tue, 19 Nov 2024 22:15:16 +0530 Subject: [PATCH 0037/1386] wifi: ath12k: Fix endianness issue in struct hal_tlv_64_hdr struct hal_tlv_64_hdr has a 64-bit member that should be in little-endian format, but the current definition uses host byte order. Fix this by changing the definition and updating the corresponding helper functions used for the byte order conversion. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Signed-off-by: P Praneesh Acked-by: Kalle Valo Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241119164516.756478-1-quic_ppranees@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_rx.c | 2 +- drivers/net/wireless/ath/ath12k/hal_desc.h | 2 +- drivers/net/wireless/ath/ath12k/hal_rx.c | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 9ae579e50557..0fb39c174475 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -3912,7 +3912,7 @@ void ath12k_dp_rx_process_reo_status(struct ath12k_base *ab) ath12k_hal_srng_access_begin(ab, srng); while ((hdr = ath12k_hal_srng_dst_get_next_entry(ab, srng))) { - tag = u64_get_bits(hdr->tl, HAL_SRNG_TLV_HDR_TAG); + tag = le64_get_bits(hdr->tl, HAL_SRNG_TLV_HDR_TAG); switch (tag) { case HAL_REO_GET_QUEUE_STATS_STATUS: diff --git a/drivers/net/wireless/ath/ath12k/hal_desc.h b/drivers/net/wireless/ath/ath12k/hal_desc.h index 739f73370015..a460d432288f 100644 --- a/drivers/net/wireless/ath/ath12k/hal_desc.h +++ b/drivers/net/wireless/ath/ath12k/hal_desc.h @@ -581,7 +581,7 @@ struct hal_tlv_hdr { #define HAL_TLV_64_HDR_LEN GENMASK(21, 10) struct hal_tlv_64_hdr { - u64 tl; + __le64 tl; u8 value[]; } __packed; diff --git a/drivers/net/wireless/ath/ath12k/hal_rx.c b/drivers/net/wireless/ath/ath12k/hal_rx.c index f7c1aaa3b5d4..ac17d6223fa7 100644 --- a/drivers/net/wireless/ath/ath12k/hal_rx.c +++ b/drivers/net/wireless/ath/ath12k/hal_rx.c @@ -26,8 +26,8 @@ static int ath12k_hal_reo_cmd_queue_stats(struct hal_tlv_64_hdr *tlv, { struct hal_reo_get_queue_stats *desc; - tlv->tl = u32_encode_bits(HAL_REO_GET_QUEUE_STATS, HAL_TLV_HDR_TAG) | - u32_encode_bits(sizeof(*desc), HAL_TLV_HDR_LEN); + tlv->tl = le64_encode_bits(HAL_REO_GET_QUEUE_STATS, HAL_TLV_HDR_TAG) | + le64_encode_bits(sizeof(*desc), HAL_TLV_HDR_LEN); desc = (struct hal_reo_get_queue_stats *)tlv->value; memset_startat(desc, 0, queue_addr_lo); @@ -59,8 +59,8 @@ static int ath12k_hal_reo_cmd_flush_cache(struct ath12k_hal *hal, hal->current_blk_index = avail_slot; } - tlv->tl = u32_encode_bits(HAL_REO_FLUSH_CACHE, HAL_TLV_HDR_TAG) | - u32_encode_bits(sizeof(*desc), HAL_TLV_HDR_LEN); + tlv->tl = le64_encode_bits(HAL_REO_FLUSH_CACHE, HAL_TLV_HDR_TAG) | + le64_encode_bits(sizeof(*desc), HAL_TLV_HDR_LEN); desc = (struct hal_reo_flush_cache *)tlv->value; memset_startat(desc, 0, cache_addr_lo); @@ -97,8 +97,8 @@ static int ath12k_hal_reo_cmd_update_rx_queue(struct hal_tlv_64_hdr *tlv, { struct hal_reo_update_rx_queue *desc; - tlv->tl = u32_encode_bits(HAL_REO_UPDATE_RX_REO_QUEUE, HAL_TLV_HDR_TAG) | - u32_encode_bits(sizeof(*desc), HAL_TLV_HDR_LEN); + tlv->tl = le64_encode_bits(HAL_REO_UPDATE_RX_REO_QUEUE, HAL_TLV_HDR_TAG) | + le64_encode_bits(sizeof(*desc), HAL_TLV_HDR_LEN); desc = (struct hal_reo_update_rx_queue *)tlv->value; memset_startat(desc, 0, queue_addr_lo); From 6200d947f050efdba4090dfefd8a01981363d954 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barnab=C3=A1s=20Cz=C3=A9m=C3=A1n?= Date: Mon, 4 Nov 2024 21:00:35 +0100 Subject: [PATCH 0038/1386] wifi: wcn36xx: fix channel survey memory allocation size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KASAN reported a memory allocation issue in wcn->chan_survey due to incorrect size calculation. This commit uses kcalloc to allocate memory for wcn->chan_survey, ensuring proper initialization and preventing the use of uninitialized values when there are no frames on the channel. Fixes: 29696e0aa413 ("wcn36xx: Track SNR and RSSI for each RX frame") Signed-off-by: Barnabás Czémán Acked-by: Loic Poulain Reviewed-by: Bryan O'Donoghue Link: https://patch.msgid.link/20241104-wcn36xx-memory-allocation-v1-1-5ec901cf37b6@mainlining.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/wcn36xx/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index 408776562a7e..cd36cab6db75 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -1590,7 +1590,10 @@ static int wcn36xx_probe(struct platform_device *pdev) } n_channels = wcn_band_2ghz.n_channels + wcn_band_5ghz.n_channels; - wcn->chan_survey = devm_kmalloc(wcn->dev, n_channels, GFP_KERNEL); + wcn->chan_survey = devm_kcalloc(wcn->dev, + n_channels, + sizeof(struct wcn36xx_chan_survey), + GFP_KERNEL); if (!wcn->chan_survey) { ret = -ENOMEM; goto out_wq; From 733a8c69ded704616b864d30d2531d090ee7a57e Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Mon, 11 Nov 2024 10:01:49 +0300 Subject: [PATCH 0039/1386] wifi: ath11k: cleanup struct ath11k_vif Remove set but otherwise unused 'tx_seq_no' member of 'struct ath11k_vif', adjust 'ath11k_control_beaconing()' accordingly. This field was actually unused since an initial commit of the driver. Compile tested only. Signed-off-by: Dmitry Antipov Acked-by: Kalle Valo Link: https://patch.msgid.link/20241111070152.85140-1-dmantipov@yandex.ru Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/core.h | 1 - drivers/net/wireless/ath/ath11k/mac.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 09c37e19a168..5ab1fdd21144 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -370,7 +370,6 @@ struct ath11k_vif { struct ath11k *ar; struct ieee80211_vif *vif; - u16 tx_seq_no; struct wmi_wmm_params_all_arg wmm_params; struct list_head list; union { diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index e6acbff06749..9757ac4aae50 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -1697,8 +1697,6 @@ static void ath11k_control_beaconing(struct ath11k_vif *arvif, return; } - arvif->tx_seq_no = 0x1000; - arvif->aid = 0; ether_addr_copy(arvif->bssid, info->bssid); From 95e5de4aae8ca1af851fc922a854bbe822bf2dd4 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Mon, 11 Nov 2024 10:01:50 +0300 Subject: [PATCH 0040/1386] wifi: ath11k: cleanup struct ath11k_reg_tpc_power_info Remove unused 'ap_constraint_power' field of 'struct ath11k_reg_tpc_power_info' and adjust related comment. Compile tested only. Fixes: 6f4e235be655 ("wifi: ath11k: add parse of transmit power envelope element") Signed-off-by: Dmitry Antipov Acked-by: Kalle Valo Link: https://patch.msgid.link/20241111070152.85140-2-dmantipov@yandex.ru Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/core.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 5ab1fdd21144..b1440a70a886 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -340,7 +340,6 @@ struct ath11k_chan_power_info { * @ap_power_type: type of power (SP/LPI/VLP) * @num_pwr_levels: number of power levels * @reg_max: Array of maximum TX power (dBm) per PSD value - * @ap_constraint_power: AP constraint power (dBm) * @tpe: TPE values processed from TPE IE * @chan_power_info: power info to send to firmware */ @@ -350,7 +349,6 @@ struct ath11k_reg_tpc_power_info { enum wmi_reg_6ghz_ap_type ap_power_type; u8 num_pwr_levels; u8 reg_max[ATH11K_NUM_PWR_LEVELS]; - u8 ap_constraint_power; s8 tpe[ATH11K_NUM_PWR_LEVELS]; struct ath11k_chan_power_info chan_power_info[ATH11K_NUM_PWR_LEVELS]; }; From 93962446ef907cb169b089d0ff3f356e7ce004ab Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Mon, 11 Nov 2024 10:01:51 +0300 Subject: [PATCH 0041/1386] wifi: ath11k: cleanup struct ath11k_mon_data Remove initialized but otherwise unused 'rx_status_q' member of 'struct ath11k_mon_data' and adjust 'ath11k_dp_rx_pdev_mon_status_attach' accordingly. Compile tested only. Fixes: 67a9d399fcb0 ("ath11k: enable RX PPDU stats in monitor co-exist mode") Signed-off-by: Dmitry Antipov Acked-by: Kalle Valo Link: https://patch.msgid.link/20241111070152.85140-3-dmantipov@yandex.ru Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/dp.h | 1 - drivers/net/wireless/ath/ath11k/dp_rx.c | 2 -- 2 files changed, 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/dp.h b/drivers/net/wireless/ath/ath11k/dp.h index 65d2bc0687c8..f777314db8b3 100644 --- a/drivers/net/wireless/ath/ath11k/dp.h +++ b/drivers/net/wireless/ath/ath11k/dp.h @@ -165,7 +165,6 @@ struct ath11k_mon_data { struct ath11k_pdev_mon_stats rx_mon_stats; /* lock for monitor data */ spinlock_t mon_lock; - struct sk_buff_head rx_status_q; }; struct ath11k_pdev_dp { diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 176bbc5d95a6..238b0e5e92a0 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -5703,8 +5703,6 @@ static int ath11k_dp_rx_pdev_mon_status_attach(struct ath11k *ar) struct ath11k_pdev_dp *dp = &ar->dp; struct ath11k_mon_data *pmon = (struct ath11k_mon_data *)&dp->mon_data; - skb_queue_head_init(&pmon->rx_status_q); - pmon->mon_ppdu_status = DP_PPDU_STATUS_START; memset(&pmon->rx_mon_stats, 0, From 8f5e8e7efb135fc648abbb572bd86d0c96819eaf Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Mon, 11 Nov 2024 10:01:52 +0300 Subject: [PATCH 0042/1386] wifi: ath11k: miscellaneous spelling fixes Correct spelling here and there as suggested by codespell. Signed-off-by: Dmitry Antipov Acked-by: Kalle Valo Link: https://patch.msgid.link/20241111070152.85140-4-dmantipov@yandex.ru Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/hal.h | 6 +++--- drivers/net/wireless/ath/ath11k/mac.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/hal.h b/drivers/net/wireless/ath/ath11k/hal.h index dc8bbe073017..601542410c75 100644 --- a/drivers/net/wireless/ath/ath11k/hal.h +++ b/drivers/net/wireless/ath/ath11k/hal.h @@ -700,7 +700,7 @@ enum hal_rx_buf_return_buf_manager { #define HAL_REO_CMD_FLG_UNBLK_RESOURCE BIT(7) #define HAL_REO_CMD_FLG_UNBLK_CACHE BIT(8) -/* Should be matching with HAL_REO_UPD_RX_QUEUE_INFO0_UPD_* feilds */ +/* Should be matching with HAL_REO_UPD_RX_QUEUE_INFO0_UPD_* fields */ #define HAL_REO_CMD_UPD0_RX_QUEUE_NUM BIT(8) #define HAL_REO_CMD_UPD0_VLD BIT(9) #define HAL_REO_CMD_UPD0_ALDC BIT(10) @@ -725,7 +725,7 @@ enum hal_rx_buf_return_buf_manager { #define HAL_REO_CMD_UPD0_PN_VALID BIT(29) #define HAL_REO_CMD_UPD0_PN BIT(30) -/* Should be matching with HAL_REO_UPD_RX_QUEUE_INFO1_* feilds */ +/* Should be matching with HAL_REO_UPD_RX_QUEUE_INFO1_* fields */ #define HAL_REO_CMD_UPD1_VLD BIT(16) #define HAL_REO_CMD_UPD1_ALDC GENMASK(18, 17) #define HAL_REO_CMD_UPD1_DIS_DUP_DETECTION BIT(19) @@ -741,7 +741,7 @@ enum hal_rx_buf_return_buf_manager { #define HAL_REO_CMD_UPD1_PN_HANDLE_ENABLE BIT(30) #define HAL_REO_CMD_UPD1_IGNORE_AMPDU_FLG BIT(31) -/* Should be matching with HAL_REO_UPD_RX_QUEUE_INFO2_* feilds */ +/* Should be matching with HAL_REO_UPD_RX_QUEUE_INFO2_* fields */ #define HAL_REO_CMD_UPD2_SVLD BIT(10) #define HAL_REO_CMD_UPD2_SSN GENMASK(22, 11) #define HAL_REO_CMD_UPD2_SEQ_2K_ERR BIT(23) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 9757ac4aae50..31ae9b384a29 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -2228,7 +2228,7 @@ static void ath11k_peer_assoc_h_vht(struct ath11k *ar, __le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map), vht_mcs_mask); /* In IPQ8074 platform, VHT mcs rate 10 and 11 is enabled by default. - * VHT mcs rate 10 and 11 is not suppoerted in 11ac standard. + * VHT mcs rate 10 and 11 is not supported in 11ac standard. * so explicitly disable the VHT MCS rate 10 and 11 in 11ac mode. */ arg->tx_mcs_set &= ~IEEE80211_VHT_MCS_SUPPORT_0_11_MASK; @@ -6950,7 +6950,7 @@ err_vdev_del: /* Recalc txpower for remaining vdev */ ath11k_mac_txpower_recalc(ar); - /* TODO: recal traffic pause state based on the available vdevs */ + /* TODO: recalc traffic pause state based on the available vdevs */ mutex_unlock(&ar->conf_mutex); } From e238638e6f24109c0c7639f4a2db023388bd1b76 Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Thu, 31 Oct 2024 08:05:41 +0800 Subject: [PATCH 0043/1386] wifi: ath11k: add support for QCA6698AQ QCA6698AQ IP core is the same as WCN6855 hw2.1, they share the same PCI device ID, the same major and minor version numbers, the same register address, and same HAL descriptors, etc. The most significant difference is that QCA6698AQ has different RF, IPA, thermal, etc. Follow the approach done in commit 5dc9d1a55e95 ("wifi: ath11k: add support for QCA2066"), enumerate the subversion number to identify the specific card. Tested-on: QCA6698AQ hw2.1 PCI WLAN.HSP.1.1-04479-QCAHSPSWPL_V1_V2_SILICONZ_IOE-1 Signed-off-by: Miaoqing Pan Acked-by: Kalle Valo Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241031000541.3331606-1-quic_miaoqing@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/core.c | 87 ++++++++++++++++++++++++++ drivers/net/wireless/ath/ath11k/core.h | 1 + drivers/net/wireless/ath/ath11k/mhi.c | 1 + drivers/net/wireless/ath/ath11k/pci.c | 3 + drivers/net/wireless/ath/ath11k/pcic.c | 13 +++- 5 files changed, 104 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index a9aefb1a705d..c576bbba52bf 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -811,6 +811,93 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .end = 0x0177ffff, }, + .tcl_ring_retry = true, + .tx_ring_size = DP_TCL_DATA_RING_SIZE, + .smp2p_wow_exit = false, + .support_fw_mac_sequence = true, + .support_dual_stations = true, + }, + { + .name = "qca6698aq hw2.1", + .hw_rev = ATH11K_HW_QCA6698AQ_HW21, + .fw = { + .dir = "QCA6698AQ/hw2.1", + .board_size = 256 * 1024, + .cal_offset = 128 * 1024, + }, + .max_radios = 3, + .bdf_addr = 0x4B0C0000, + .hw_ops = &wcn6855_ops, + .ring_mask = &ath11k_hw_ring_mask_qca6390, + .internal_sleep_clock = true, + .regs = &wcn6855_regs, + .qmi_service_ins_id = ATH11K_QMI_WLFW_SERVICE_INS_ID_V01_QCA6390, + .host_ce_config = ath11k_host_ce_config_qca6390, + .ce_count = 9, + .target_ce_config = ath11k_target_ce_config_wlan_qca6390, + .target_ce_count = 9, + .svc_to_ce_map = ath11k_target_service_to_ce_map_wlan_qca6390, + .svc_to_ce_map_len = 14, + .single_pdev_only = true, + .rxdma1_enable = false, + .num_rxdma_per_pdev = 2, + .rx_mac_buf_ring = true, + .vdev_start_delay = true, + .htt_peer_map_v2 = false, + + .spectral = { + .fft_sz = 0, + .fft_pad_sz = 0, + .summary_pad_sz = 0, + .fft_hdr_len = 0, + .max_fft_bins = 0, + .fragment_160mhz = false, + }, + + .interface_modes = BIT(NL80211_IFTYPE_STATION) | + BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_P2P_DEVICE) | + BIT(NL80211_IFTYPE_P2P_CLIENT) | + BIT(NL80211_IFTYPE_P2P_GO), + .supports_monitor = false, + .supports_shadow_regs = true, + .idle_ps = true, + .supports_sta_ps = true, + .coldboot_cal_mm = false, + .coldboot_cal_ftm = false, + .cbcal_restart_fw = false, + .fw_mem_mode = 0, + .num_vdevs = 2 + 1, + .num_peers = 512, + .supports_suspend = true, + .hal_desc_sz = sizeof(struct hal_rx_desc_wcn6855), + .supports_regdb = true, + .fix_l1ss = false, + .credit_flow = true, + .max_tx_ring = DP_TCL_NUM_RING_MAX_QCA6390, + .hal_params = &ath11k_hw_hal_params_qca6390, + .supports_dynamic_smps_6ghz = false, + .alloc_cacheable_memory = false, + .supports_rssi_stats = true, + .fw_wmi_diag_event = true, + .current_cc_support = true, + .dbr_debug_support = false, + .global_reset = true, + .bios_sar_capa = &ath11k_hw_sar_capa_wcn6855, + .m3_fw_support = true, + .fixed_bdf_addr = false, + .fixed_mem_region = false, + .static_window_map = false, + .hybrid_bus_type = false, + .fixed_fw_mem = false, + .support_off_channel_tx = true, + .supports_multi_bssid = true, + + .sram_dump = { + .start = 0x01400000, + .end = 0x0177ffff, + }, + .tcl_ring_retry = true, .tx_ring_size = DP_TCL_DATA_RING_SIZE, .smp2p_wow_exit = false, diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index b1440a70a886..a9dc7fe7765a 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -148,6 +148,7 @@ enum ath11k_hw_rev { ATH11K_HW_WCN6750_HW10, ATH11K_HW_IPQ5018_HW10, ATH11K_HW_QCA2066_HW21, + ATH11K_HW_QCA6698AQ_HW21, }; enum ath11k_firmware_mode { diff --git a/drivers/net/wireless/ath/ath11k/mhi.c b/drivers/net/wireless/ath/ath11k/mhi.c index 6974a551883f..6e45f464a429 100644 --- a/drivers/net/wireless/ath/ath11k/mhi.c +++ b/drivers/net/wireless/ath/ath11k/mhi.c @@ -398,6 +398,7 @@ int ath11k_mhi_register(struct ath11k_pci *ab_pci) case ATH11K_HW_WCN6855_HW20: case ATH11K_HW_WCN6855_HW21: case ATH11K_HW_QCA2066_HW21: + case ATH11K_HW_QCA6698AQ_HW21: ath11k_mhi_config = &ath11k_mhi_config_qca6390; break; default: diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c index be9d2c69cc41..b93f04973ad7 100644 --- a/drivers/net/wireless/ath/ath11k/pci.c +++ b/drivers/net/wireless/ath/ath11k/pci.c @@ -846,6 +846,9 @@ static int ath11k_pci_probe(struct pci_dev *pdev, case 0x1019D0E1: ab->hw_rev = ATH11K_HW_QCA2066_HW21; break; + case 0x001e60e1: + ab->hw_rev = ATH11K_HW_QCA6698AQ_HW21; + break; default: ab->hw_rev = ATH11K_HW_WCN6855_HW21; } diff --git a/drivers/net/wireless/ath/ath11k/pcic.c b/drivers/net/wireless/ath/ath11k/pcic.c index debe7c5919ef..3fe77310c71f 100644 --- a/drivers/net/wireless/ath/ath11k/pcic.c +++ b/drivers/net/wireless/ath/ath11k/pcic.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2019-2021 The Linux Foundation. All rights reserved. - * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2021-2024 Qualcomm Innovation Center, Inc. All rights reserved. */ #include "core.h" @@ -126,6 +126,17 @@ static const struct ath11k_msi_config ath11k_msi_config[] = { }, .hw_rev = ATH11K_HW_QCA2066_HW21, }, + { + .total_vectors = 32, + .total_users = 4, + .users = (struct ath11k_msi_user[]) { + { .name = "MHI", .num_vectors = 3, .base_vector = 0 }, + { .name = "CE", .num_vectors = 10, .base_vector = 3 }, + { .name = "WAKE", .num_vectors = 1, .base_vector = 13 }, + { .name = "DP", .num_vectors = 18, .base_vector = 14 }, + }, + .hw_rev = ATH11K_HW_QCA6698AQ_HW21, + }, }; int ath11k_pcic_init_msi_config(struct ath11k_base *ab) From 47c7ebfba30f242a24a89317c74eada47adfaa95 Mon Sep 17 00:00:00 2001 From: Roopni Devanathan Date: Mon, 18 Nov 2024 09:27:22 +0530 Subject: [PATCH 0044/1386] wifi: ath12k: Fix inappropriate use of print_array_to_buf_index() Currently in ath12k_htt_print_tx_pdev_mumimo_grp_stats_tlv() the htt_stats_buf->ul_mumimo_grp_best_usrs array is printed using print_array_to_buf_index() with a stats_index of 1. This is meant to convey the semantic that first entry in ul_mumimo_grp_best_usrs is associated with user 1. However, unlike some of the other "usr" arrays which have that semantic, ul_mumimo_grp_best_usrs does not have that semantic. Instead the first entry corresponds to user 0. Fix the issue by calling the API - print_array_to_buf(), instead of print_array_to_buf_index(). Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Signed-off-by: Roopni Devanathan Acked-by: Jeff Johnson Acked-by: Kalle Valo Link: https://patch.msgid.link/20241118035722.1755373-1-quic_rdevanat@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c index 0bc945c7a93a..d8f137bfba7b 100644 --- a/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c +++ b/drivers/net/wireless/ath/ath12k/debugfs_htt_stats.c @@ -2310,9 +2310,9 @@ ath12k_htt_print_tx_pdev_mumimo_grp_stats_tlv(const void *tag_buf, u16 tag_len, len += print_array_to_buf(buf, len, "ul_mumimo_grp_best_grp_size", htt_stats_buf->ul_mumimo_grp_best_grp_size, ATH12K_HTT_STATS_NUM_MAX_MUMIMO_SZ, "\n"); - len += print_array_to_buf_index(buf, len, "ul_mumimo_grp_best_num_usrs = ", 1, - htt_stats_buf->ul_mumimo_grp_best_usrs, - ATH12K_HTT_TX_NUM_AX_MUMIMO_USER_STATS, "\n"); + len += print_array_to_buf(buf, len, "ul_mumimo_grp_best_num_usrs = ", + htt_stats_buf->ul_mumimo_grp_best_usrs, + ATH12K_HTT_TX_NUM_AX_MUMIMO_USER_STATS, "\n"); len += print_array_to_buf(buf, len, "ul_mumimo_grp_tputs_observed (per bin = 300 mbps)", htt_stats_buf->ul_mumimo_grp_tputs, From 8ea1d2072ad1a9c24b326b50ebdc2c810c4b2cce Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 19 Nov 2024 07:47:38 -0800 Subject: [PATCH 0045/1386] wifi: ath11k: mark some QMI driver event helpers as noinline When compiling the ath11k driver using clang with KASAN enabled, the following warning is observed: drivers/net/wireless/ath/ath11k/qmi.c:3199:13: warning: stack frame size (1560) exceeds limit (1024) in 'ath11k_qmi_driver_event_work' [-Wframe-larger-than] This is similar to the issue found in ath12k/qmi.c that was discussed in [1] and fixed with [2]. The issue is that clang inlining can explode stack usage. Just as in ath12k, ath11k_qmi_driver_event_work() itself is a pretty lightweight function, but it dispatches to several other functions which do the real work: ath11k_qmi_driver_event_work() ath11k_qmi_event_server_arrive() ath11k_qmi_fw_ind_register_send() ath11k_qmi_host_cap_send() * ath11k_qmi_event_load_bdf() ath11k_qmi_event_mem_request() ath11k_qmi_respond_fw_mem_request() ath11k_qmi_event_load_bdf() ath11k_qmi_wlanfw_m3_info_send() * ath11k_qmi_m3_load() ath11k_qmi_process_coldboot_calibration() Of these, the two marked with * have non-trivial stack usage. Mark those functions as 'noinline_for_stack' to prevent them from being inlined in ath12k_qmi_driver_event_work(), thereby eliminating the excessive stack usage. Note that this approach is a bit more "surgical" than the ath12k approach as only the two functions with the largest stack usage are modified. Compile tested only. Link: https://msgid.link/bc214795-1c51-4cb7-922f-67d6ef98bff2@quicinc.com # [1] Link: https://patch.msgid.link/20241028-ath12k_qmi_driver_event_work-v1-1-0d532eb593fa@quicinc.com # [2] Acked-by: Kalle Valo Link: https://patch.msgid.link/20241119-ath11k-noinline-v1-1-4ec0a8aa30b2@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/qmi.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c index 7a22483b35cd..5759fc521316 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.c +++ b/drivers/net/wireless/ath/ath11k/qmi.c @@ -1704,7 +1704,9 @@ static const struct qmi_elem_info qmi_wlfw_fw_init_done_ind_msg_v01_ei[] = { }, }; -static int ath11k_qmi_host_cap_send(struct ath11k_base *ab) +/* clang stack usage explodes if this is inlined */ +static noinline_for_stack +int ath11k_qmi_host_cap_send(struct ath11k_base *ab) { struct qmi_wlanfw_host_cap_req_msg_v01 req; struct qmi_wlanfw_host_cap_resp_msg_v01 resp; @@ -2570,7 +2572,9 @@ static void ath11k_qmi_m3_free(struct ath11k_base *ab) m3_mem->size = 0; } -static int ath11k_qmi_wlanfw_m3_info_send(struct ath11k_base *ab) +/* clang stack usage explodes if this is inlined */ +static noinline_for_stack +int ath11k_qmi_wlanfw_m3_info_send(struct ath11k_base *ab) { struct m3_mem_region *m3_mem = &ab->qmi.m3_mem; struct qmi_wlanfw_m3_info_req_msg_v01 req; From 500d7ec88652ba7316e7fba334754e39e3177e4a Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 19 Nov 2024 07:47:39 -0800 Subject: [PATCH 0046/1386] wifi: ath11k: mark ath11k_dp_rx_mon_mpdu_pop() as noinline When compiling the ath11k driver using clang with KASAN enabled, the following warning is observed: drivers/net/wireless/ath/ath11k/dp_rx.c:5244:5: warning: stack frame size (1304) exceeds limit (1024) in 'ath11k_dp_rx_process_mon_status' [-Wframe-larger-than] This is similar to the issue found in ath12k/qmi.c that was discussed in [1] and fixed with [2]. The issue is that clang inlining can explode stack usage. ath11k_dp_rx_process_mon_status() itself is a pretty lightweight function, but it dispatches to several other functions which do the real work: ath11k_dp_rx_process_mon_status() ath11k_dp_rx_reap_mon_status_ring() ath11k_dp_rx_mon_dest_process() ath11k_dp_rx_mon_mpdu_pop() * ath11k_dp_rx_mon_deliver() ath11k_dp_rx_mon_merg_msdus() ath11k_dp_rx_deliver_msdu() ath11k_dp_rx_update_peer_stats() Of these, only ath11k_dp_rx_mon_mpdu_pop() has non-trivial stack usage, so mark that function as 'noinline_for_stack' to prevent it from being inlined in ath11k_dp_rx_process_mon_status(), thereby eliminating the excessive stack usage. Compile tested only. Link: https://msgid.link/bc214795-1c51-4cb7-922f-67d6ef98bff2@quicinc.com # [1] Link: https://patch.msgid.link/20241028-ath12k_qmi_driver_event_work-v1-1-0d532eb593fa@quicinc.com # [2] Acked-by: Kalle Valo Link: https://patch.msgid.link/20241119-ath11k-noinline-v1-2-4ec0a8aa30b2@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/dp_rx.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 238b0e5e92a0..8517994effce 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -4691,11 +4691,12 @@ static void ath11k_dp_mon_get_buf_len(struct hal_rx_msdu_desc_info *info, } } -static u32 -ath11k_dp_rx_mon_mpdu_pop(struct ath11k *ar, int mac_id, - void *ring_entry, struct sk_buff **head_msdu, - struct sk_buff **tail_msdu, u32 *npackets, - u32 *ppdu_id) +/* clang stack usage explodes if this is inlined */ +static noinline_for_stack +u32 ath11k_dp_rx_mon_mpdu_pop(struct ath11k *ar, int mac_id, + void *ring_entry, struct sk_buff **head_msdu, + struct sk_buff **tail_msdu, u32 *npackets, + u32 *ppdu_id) { struct ath11k_pdev_dp *dp = &ar->dp; struct ath11k_mon_data *pmon = (struct ath11k_mon_data *)&dp->mon_data; From 4ba72ff2919cad90e1963b708ce23b92120613ff Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 19 Nov 2024 07:47:40 -0800 Subject: [PATCH 0047/1386] wifi: ath11k: mark ath11k_wow_convert_8023_to_80211() as noinline When compiling the ath11k driver using clang with KASAN enabled, the following warning is observed: drivers/net/wireless/ath/ath11k/wow.c:672:5: warning: stack frame size (1336) exceeds limit (1024) in 'ath11k_wow_op_suspend' [-Wframe-larger-than] This is similar to the issue found in ath12k/qmi.c that was discussed in [1] and fixed with [2]. The issue is that clang inlining can explode stack usage. ath11k_wow_op_suspend() itself is a pretty lightweight function, but it dispatches to several other functions which do the real work. One path in particular is: ath11k_wow_op_suspend() ath11k_wow_set_wakeups() ath11k_vif_wow_set_wakeups() ath11k_wow_convert_8023_to_80211() Of these, ath11k_wow_convert_8023_to_80211() has non-trivial stack usage, so mark it as 'noinline_for_stack' to prevent it from being inlined in ath11k_wow_op_suspend(), thereby eliminating the excessive stack usage. Compile tested only. Link: https://msgid.link/bc214795-1c51-4cb7-922f-67d6ef98bff2@quicinc.com # [1] Link: https://patch.msgid.link/20241028-ath12k_qmi_driver_event_work-v1-1-0d532eb593fa@quicinc.com # [2] Acked-by: Kalle Valo Link: https://patch.msgid.link/20241119-ath11k-noinline-v1-3-4ec0a8aa30b2@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath11k/wow.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/wow.c b/drivers/net/wireless/ath/ath11k/wow.c index 827085a926b2..b6f08755129f 100644 --- a/drivers/net/wireless/ath/ath11k/wow.c +++ b/drivers/net/wireless/ath/ath11k/wow.c @@ -148,8 +148,10 @@ static int ath11k_wow_cleanup(struct ath11k *ar) * 802.11: |4B|dest mac(6B)| 6B |src mac(6B)| 8B |type(2B)| body... | * +--+------------+----+-----------+---------------+-----------+ */ -static void ath11k_wow_convert_8023_to_80211(struct cfg80211_pkt_pattern *new, - const struct cfg80211_pkt_pattern *old) +/* clang stack usage explodes if this is inlined */ +static noinline_for_stack +void ath11k_wow_convert_8023_to_80211(struct cfg80211_pkt_pattern *new, + const struct cfg80211_pkt_pattern *old) { u8 hdr_8023_pattern[ETH_HLEN] = {}; u8 hdr_8023_bit_mask[ETH_HLEN] = {}; From 8e6f8bc286031291dac9ff09977e60c9a069bf80 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Thu, 21 Nov 2024 17:57:59 +0200 Subject: [PATCH 0048/1386] wifi: ath12k: Add MLO station state change handling Add changes to handle multi-link station state change with proper link handling and add code changes for ML peer creation, peer deletion. In ath12k_mac_assign_link_sta() initialise all arsta fields first and only then call rcu_assign_pointer(). This is to make sure that readers don't have access to arsta which is still modified. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241121155806.1862733-2-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/core.h | 4 + drivers/net/wireless/ath/ath12k/mac.c | 227 ++++++++++++++++++++----- drivers/net/wireless/ath/ath12k/mac.h | 1 + drivers/net/wireless/ath/ath12k/peer.c | 83 ++++++++- drivers/net/wireless/ath/ath12k/peer.h | 1 + 5 files changed, 272 insertions(+), 44 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 8dbdf6818f58..c1d5e93b679a 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -510,7 +510,11 @@ struct ath12k_sta { struct ath12k_link_sta __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; /* indicates bitmap of link sta created in FW */ u16 links_map; + u8 assoc_link_id; u16 ml_peer_id; + u8 num_peer; + + enum ieee80211_sta_state state; }; #define ATH12K_MIN_5G_FREQ 4150 diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index ad27a2552a2c..d796185c8431 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -1251,7 +1251,7 @@ static int ath12k_mac_monitor_stop(struct ath12k *ar) return ret; } -static int ath12k_mac_vdev_stop(struct ath12k_link_vif *arvif) +int ath12k_mac_vdev_stop(struct ath12k_link_vif *arvif) { struct ath12k_vif *ahvif = arvif->ahvif; struct ath12k *ar = arvif->ar; @@ -4832,6 +4832,35 @@ static void ath12k_sta_rc_update_wk(struct wiphy *wiphy, struct wiphy_work *wk) } } +static void ath12k_mac_free_unassign_link_sta(struct ath12k_hw *ah, + struct ath12k_sta *ahsta, + u8 link_id) +{ + struct ath12k_link_sta *arsta; + + lockdep_assert_wiphy(ah->hw->wiphy); + + if (WARN_ON(link_id >= IEEE80211_MLD_MAX_NUM_LINKS)) + return; + + arsta = wiphy_dereference(ah->hw->wiphy, ahsta->link[link_id]); + if (WARN_ON(!arsta)) + return; + + ahsta->links_map &= ~BIT(link_id); + rcu_assign_pointer(ahsta->link[link_id], NULL); + synchronize_rcu(); + + if (arsta == &ahsta->deflink) { + arsta->link_id = ATH12K_INVALID_LINK_ID; + arsta->ahsta = NULL; + arsta->arvif = NULL; + return; + } + + kfree(arsta); +} + static int ath12k_mac_inc_num_stations(struct ath12k_link_vif *arvif, struct ath12k_link_sta *arsta) { @@ -4871,7 +4900,6 @@ static void ath12k_mac_station_post_remove(struct ath12k *ar, { struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); - struct ath12k_sta *ahsta = arsta->ahsta; struct ath12k_peer *peer; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -4894,14 +4922,6 @@ static void ath12k_mac_station_post_remove(struct ath12k *ar, kfree(arsta->rx_stats); arsta->rx_stats = NULL; - - if (arsta->link_id < IEEE80211_MLD_MAX_NUM_LINKS) { - ahsta->links_map &= ~(BIT(arsta->link_id)); - rcu_assign_pointer(ahsta->link[arsta->link_id], NULL); - synchronize_rcu(); - arsta->link_id = ATH12K_INVALID_LINK_ID; - arsta->ahsta = NULL; - } } static int ath12k_mac_station_unauthorize(struct ath12k *ar, @@ -4977,7 +4997,7 @@ static int ath12k_mac_station_remove(struct ath12k *ar, { struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); struct ath12k_vif *ahvif = arvif->ahvif; - int ret; + int ret = 0; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -4991,6 +5011,9 @@ static int ath12k_mac_station_remove(struct ath12k *ar, arvif->vdev_id, ret); } + if (sta->mlo) + return ret; + ath12k_dp_peer_cleanup(ar, arvif->vdev_id, sta->addr); ret = ath12k_peer_delete(ar, arvif->vdev_id, sta->addr); @@ -5003,6 +5026,10 @@ static int ath12k_mac_station_remove(struct ath12k *ar, ath12k_mac_station_post_remove(ar, arvif, arsta); + if (sta->valid_links) + ath12k_mac_free_unassign_link_sta(ahvif->ah, + arsta->ahsta, arsta->link_id); + return ret; } @@ -5114,51 +5141,112 @@ static u32 ath12k_mac_ieee80211_sta_bw_to_wmi(struct ath12k *ar, return bw; } +static int ath12k_mac_assign_link_sta(struct ath12k_hw *ah, + struct ath12k_sta *ahsta, + struct ath12k_link_sta *arsta, + struct ath12k_vif *ahvif, + u8 link_id) +{ + struct ieee80211_sta *sta = ath12k_ahsta_to_sta(ahsta); + struct ieee80211_link_sta *link_sta; + struct ath12k_link_vif *arvif; + + lockdep_assert_wiphy(ah->hw->wiphy); + + if (!arsta || link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + return -EINVAL; + + arvif = wiphy_dereference(ah->hw->wiphy, ahvif->link[link_id]); + if (!arvif) + return -EINVAL; + + memset(arsta, 0, sizeof(*arsta)); + + link_sta = wiphy_dereference(ah->hw->wiphy, sta->link[link_id]); + if (!link_sta) + return -EINVAL; + + ether_addr_copy(arsta->addr, link_sta->addr); + + /* logical index of the link sta in order of creation */ + arsta->link_idx = ahsta->num_peer++; + + arsta->link_id = link_id; + ahsta->links_map |= BIT(arsta->link_id); + arsta->arvif = arvif; + arsta->ahsta = ahsta; + wiphy_work_init(&arsta->update_wk, ath12k_sta_rc_update_wk); + + rcu_assign_pointer(ahsta->link[link_id], arsta); + + return 0; +} + +static void ath12k_mac_ml_station_remove(struct ath12k_vif *ahvif, + struct ath12k_sta *ahsta) +{ + struct ieee80211_sta *sta = ath12k_ahsta_to_sta(ahsta); + struct ath12k_hw *ah = ahvif->ah; + struct ath12k_link_vif *arvif; + struct ath12k_link_sta *arsta; + unsigned long links; + struct ath12k *ar; + u8 link_id; + + lockdep_assert_wiphy(ah->hw->wiphy); + + ath12k_peer_mlo_link_peers_delete(ahvif, ahsta); + + /* validate link station removal and clear arsta links */ + links = ahsta->links_map; + for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { + arvif = wiphy_dereference(ah->hw->wiphy, ahvif->link[link_id]); + arsta = wiphy_dereference(ah->hw->wiphy, ahsta->link[link_id]); + if (!arvif || !arsta) + continue; + + ar = arvif->ar; + + ath12k_mac_station_post_remove(ar, arvif, arsta); + + ath12k_mac_free_unassign_link_sta(ah, ahsta, link_id); + } + + ath12k_peer_ml_delete(ah, sta); +} + static int ath12k_mac_handle_link_sta_state(struct ieee80211_hw *hw, struct ath12k_link_vif *arvif, struct ath12k_link_sta *arsta, enum ieee80211_sta_state old_state, enum ieee80211_sta_state new_state) { - struct ath12k *ar = arvif->ar; struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); - struct ath12k_sta *ahsta = arsta->ahsta; + struct ath12k *ar = arvif->ar; int ret = 0; lockdep_assert_wiphy(hw->wiphy); + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac handle link %u sta %pM state %d -> %d\n", + arsta->link_id, arsta->addr, old_state, new_state); + /* IEEE80211_STA_NONE -> IEEE80211_STA_NOTEXIST: Remove the station * from driver */ if ((old_state == IEEE80211_STA_NONE && new_state == IEEE80211_STA_NOTEXIST)) { - /* ML sta needs separate handling */ - if (sta->mlo) - return 0; - ret = ath12k_mac_station_remove(ar, arvif, arsta); if (ret) { ath12k_warn(ar->ab, "Failed to remove station: %pM for VDEV: %d\n", arsta->addr, arvif->vdev_id); + goto exit; } } /* IEEE80211_STA_NOTEXIST -> IEEE80211_STA_NONE: Add new station to driver */ if (old_state == IEEE80211_STA_NOTEXIST && new_state == IEEE80211_STA_NONE) { - memset(arsta, 0, sizeof(*arsta)); - rcu_assign_pointer(ahsta->link[0], arsta); - /* TODO use appropriate link id once MLO support is added */ - arsta->link_id = ATH12K_DEFAULT_LINK_ID; - ahsta->links_map = BIT(arsta->link_id); - arsta->ahsta = ahsta; - arsta->arvif = arvif; - ether_addr_copy(arsta->addr, sta->addr); - wiphy_work_init(&arsta->update_wk, ath12k_sta_rc_update_wk); - - synchronize_rcu(); - ret = ath12k_mac_station_add(ar, arvif, arsta); if (ret) ath12k_warn(ar->ab, "Failed to add station: %pM for VDEV: %d\n", @@ -5200,6 +5288,7 @@ static int ath12k_mac_handle_link_sta_state(struct ieee80211_hw *hw, } else if (old_state == IEEE80211_STA_AUTHORIZED && new_state == IEEE80211_STA_ASSOC) { ath12k_mac_station_unauthorize(ar, arvif, arsta); + /* IEEE80211_STA_ASSOC -> IEEE80211_STA_AUTH: disassoc peer connected to * AP/mesh/ADHOC vif type. */ @@ -5214,6 +5303,7 @@ static int ath12k_mac_handle_link_sta_state(struct ieee80211_hw *hw, sta->addr); } +exit: return ret; } @@ -5225,10 +5315,12 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, { struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta); + struct ath12k_hw *ah = ath12k_hw_to_ah(hw); struct ath12k_link_vif *arvif; struct ath12k_link_sta *arsta; - int ret; + unsigned long valid_links; u8 link_id = 0; + int ret; lockdep_assert_wiphy(hw->wiphy); @@ -5237,32 +5329,83 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, link_id = ffs(sta->valid_links) - 1; } - /* Handle for non-ML station */ - if (!sta->mlo) { - arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); - arsta = &ahsta->deflink; - arsta->ahsta = ahsta; + /* IEEE80211_STA_NOTEXIST -> IEEE80211_STA_NONE: + * New station add received. If this is a ML station then + * ahsta->links_map will be zero and sta->valid_links will be 1. + * Assign default link to the first link sta. + */ + if (old_state == IEEE80211_STA_NOTEXIST && + new_state == IEEE80211_STA_NONE) { + memset(ahsta, 0, sizeof(*ahsta)); - if (WARN_ON(!arvif || !arsta)) { - ret = -EINVAL; + arsta = &ahsta->deflink; + + /* ML sta */ + if (sta->mlo && !ahsta->links_map && + (hweight16(sta->valid_links) == 1)) { + ret = ath12k_peer_ml_create(ah, sta); + if (ret) { + ath12k_hw_warn(ah, "unable to create ML peer for sta %pM", + sta->addr); + goto exit; + } + } + + ret = ath12k_mac_assign_link_sta(ah, ahsta, arsta, ahvif, + link_id); + if (ret) { + ath12k_hw_warn(ah, "unable assign link %d for sta %pM", + link_id, sta->addr); goto exit; } + /* above arsta will get memset, hence do this after assign + * link sta + */ + if (sta->mlo) { + arsta->is_assoc_link = true; + ahsta->assoc_link_id = link_id; + } + } + + /* Handle all the other state transitions in generic way */ + valid_links = ahsta->links_map; + for_each_set_bit(link_id, &valid_links, IEEE80211_MLD_MAX_NUM_LINKS) { + arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); + arsta = wiphy_dereference(hw->wiphy, ahsta->link[link_id]); + /* some assumptions went wrong! */ + if (WARN_ON(!arvif || !arsta)) + continue; + /* vdev might be in deleted */ - if (WARN_ON(!arvif->ar)) { - ret = -EINVAL; - goto exit; - } + if (WARN_ON(!arvif->ar)) + continue; ret = ath12k_mac_handle_link_sta_state(hw, arvif, arsta, old_state, new_state); - if (ret) + if (ret) { + ath12k_hw_warn(ah, "unable to move link sta %d of sta %pM from state %d to %d", + link_id, arsta->addr, old_state, new_state); goto exit; + } } + /* IEEE80211_STA_NONE -> IEEE80211_STA_NOTEXIST: + * Remove the station from driver (handle ML sta here since that + * needs special handling. Normal sta will be handled in generic + * handler below + */ + if (old_state == IEEE80211_STA_NONE && + new_state == IEEE80211_STA_NOTEXIST && sta->mlo) + ath12k_mac_ml_station_remove(ahvif, ahsta); + ret = 0; exit: + /* update the state if everything went well */ + if (!ret) + ahsta->state = new_state; + return ret; } diff --git a/drivers/net/wireless/ath/ath12k/mac.h b/drivers/net/wireless/ath/ath12k/mac.h index d382337ba649..c13630ee479a 100644 --- a/drivers/net/wireless/ath/ath12k/mac.h +++ b/drivers/net/wireless/ath/ath12k/mac.h @@ -89,5 +89,6 @@ int ath12k_mac_vif_set_keepalive(struct ath12k_link_vif *arvif, enum wmi_sta_keepalive_method method, u32 interval); u8 ath12k_mac_get_target_pdev_id(struct ath12k *ar); +int ath12k_mac_vdev_stop(struct ath12k_link_vif *arvif); #endif diff --git a/drivers/net/wireless/ath/ath12k/peer.c b/drivers/net/wireless/ath/ath12k/peer.c index 0e86847edd6e..ffbc1265ccc1 100644 --- a/drivers/net/wireless/ath/ath12k/peer.c +++ b/drivers/net/wireless/ath/ath12k/peer.c @@ -264,8 +264,9 @@ int ath12k_wait_for_peer_delete_done(struct ath12k *ar, u32 vdev_id, return 0; } -int ath12k_peer_delete(struct ath12k *ar, u32 vdev_id, u8 *addr) +static int ath12k_peer_delete_send(struct ath12k *ar, u32 vdev_id, const u8 *addr) { + struct ath12k_base *ab = ar->ab; int ret; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -274,12 +275,25 @@ int ath12k_peer_delete(struct ath12k *ar, u32 vdev_id, u8 *addr) ret = ath12k_wmi_send_peer_delete_cmd(ar, addr, vdev_id); if (ret) { - ath12k_warn(ar->ab, + ath12k_warn(ab, "failed to delete peer vdev_id %d addr %pM ret %d\n", vdev_id, addr, ret); return ret; } + return 0; +} + +int ath12k_peer_delete(struct ath12k *ar, u32 vdev_id, u8 *addr) +{ + int ret; + + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + + ret = ath12k_peer_delete_send(ar, vdev_id, addr); + if (ret) + return ret; + ret = ath12k_wait_for_peer_delete_done(ar, vdev_id, addr); if (ret) return ret; @@ -456,3 +470,68 @@ int ath12k_peer_ml_delete(struct ath12k_hw *ah, struct ieee80211_sta *sta) return 0; } + +int ath12k_peer_mlo_link_peers_delete(struct ath12k_vif *ahvif, struct ath12k_sta *ahsta) +{ + struct ieee80211_sta *sta = ath12k_ahsta_to_sta(ahsta); + struct ath12k_hw *ah = ahvif->ah; + struct ath12k_link_vif *arvif; + struct ath12k_link_sta *arsta; + unsigned long links; + struct ath12k *ar; + int ret, err_ret = 0; + u8 link_id; + + lockdep_assert_wiphy(ah->hw->wiphy); + + if (!sta->mlo) + return -EINVAL; + + /* FW expects delete of all link peers at once before waiting for reception + * of peer unmap or delete responses + */ + links = ahsta->links_map; + for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { + arvif = wiphy_dereference(ah->hw->wiphy, ahvif->link[link_id]); + arsta = wiphy_dereference(ah->hw->wiphy, ahsta->link[link_id]); + if (!arvif || !arsta) + continue; + + ar = arvif->ar; + if (!ar) + continue; + + ath12k_dp_peer_cleanup(ar, arvif->vdev_id, arsta->addr); + + ret = ath12k_peer_delete_send(ar, arvif->vdev_id, arsta->addr); + if (ret) { + ath12k_warn(ar->ab, + "failed to delete peer vdev_id %d addr %pM ret %d\n", + arvif->vdev_id, arsta->addr, ret); + err_ret = ret; + continue; + } + } + + /* Ensure all link peers are deleted and unmapped */ + links = ahsta->links_map; + for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { + arvif = wiphy_dereference(ah->hw->wiphy, ahvif->link[link_id]); + arsta = wiphy_dereference(ah->hw->wiphy, ahsta->link[link_id]); + if (!arvif || !arsta) + continue; + + ar = arvif->ar; + if (!ar) + continue; + + ret = ath12k_wait_for_peer_delete_done(ar, arvif->vdev_id, arsta->addr); + if (ret) { + err_ret = ret; + continue; + } + ar->num_peers--; + } + + return err_ret; +} diff --git a/drivers/net/wireless/ath/ath12k/peer.h b/drivers/net/wireless/ath/ath12k/peer.h index c28aca5d88a0..e398d5a3fdc8 100644 --- a/drivers/net/wireless/ath/ath12k/peer.h +++ b/drivers/net/wireless/ath/ath12k/peer.h @@ -78,5 +78,6 @@ bool ath12k_peer_exist_by_vdev_id(struct ath12k_base *ab, int vdev_id); struct ath12k_peer *ath12k_peer_find_by_ast(struct ath12k_base *ab, int ast_hash); int ath12k_peer_ml_create(struct ath12k_hw *ah, struct ieee80211_sta *sta); int ath12k_peer_ml_delete(struct ath12k_hw *ah, struct ieee80211_sta *sta); +int ath12k_peer_mlo_link_peers_delete(struct ath12k_vif *ahvif, struct ath12k_sta *ahsta); #endif /* _PEER_H_ */ From a27fa6148dacc79451e523c2694bc0a673b1be05 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Thu, 21 Nov 2024 17:58:00 +0200 Subject: [PATCH 0049/1386] wifi: ath12k: support change_sta_links() mac80211 op Add ath12k_mac_op_change_sta_links() for adding and removing link station. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241121155806.1862733-3-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/mac.c | 97 ++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index d796185c8431..d92a5e0afe2e 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -5554,6 +5554,101 @@ static void ath12k_mac_op_sta_rc_update(struct ieee80211_hw *hw, rcu_read_unlock(); } +static struct ath12k_link_sta *ath12k_mac_alloc_assign_link_sta(struct ath12k_hw *ah, + struct ath12k_sta *ahsta, + struct ath12k_vif *ahvif, + u8 link_id) +{ + struct ath12k_link_sta *arsta; + int ret; + + lockdep_assert_wiphy(ah->hw->wiphy); + + if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + return NULL; + + arsta = wiphy_dereference(ah->hw->wiphy, ahsta->link[link_id]); + if (arsta) + return NULL; + + arsta = kmalloc(sizeof(*arsta), GFP_KERNEL); + if (!arsta) + return NULL; + + ret = ath12k_mac_assign_link_sta(ah, ahsta, arsta, ahvif, link_id); + if (ret) { + kfree(arsta); + return NULL; + } + + return arsta; +} + +static int ath12k_mac_op_change_sta_links(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + u16 old_links, u16 new_links) +{ + struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); + struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta); + struct ath12k_hw *ah = hw->priv; + struct ath12k_link_vif *arvif; + struct ath12k_link_sta *arsta; + unsigned long valid_links; + struct ath12k *ar; + u8 link_id; + int ret; + + lockdep_assert_wiphy(hw->wiphy); + + if (!sta->valid_links) + return -EINVAL; + + /* Firmware does not support removal of one of link stas. All sta + * would be removed during ML STA delete in sta_state(), hence link + * sta removal is not handled here. + */ + if (new_links < old_links) + return 0; + + if (ahsta->ml_peer_id == ATH12K_MLO_PEER_ID_INVALID) { + ath12k_hw_warn(ah, "unable to add link for ml sta %pM", sta->addr); + return -EINVAL; + } + + /* this op is expected only after initial sta insertion with default link */ + if (WARN_ON(ahsta->links_map == 0)) + return -EINVAL; + + valid_links = new_links; + for_each_set_bit(link_id, &valid_links, IEEE80211_MLD_MAX_NUM_LINKS) { + if (ahsta->links_map & BIT(link_id)) + continue; + + arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); + arsta = ath12k_mac_alloc_assign_link_sta(ah, ahsta, ahvif, link_id); + + if (!arvif || !arsta) { + ath12k_hw_warn(ah, "Failed to alloc/assign link sta"); + continue; + } + + ar = arvif->ar; + if (!ar) + continue; + + ret = ath12k_mac_station_add(ar, arvif, arsta); + if (ret) { + ath12k_warn(ar->ab, "Failed to add station: %pM for VDEV: %d\n", + arsta->addr, arvif->vdev_id); + ath12k_mac_free_unassign_link_sta(ah, ahsta, link_id); + return ret; + } + } + + return 0; +} + static int ath12k_conf_tx_uapsd(struct ath12k_link_vif *arvif, u16 ac, bool enable) { @@ -9604,7 +9699,7 @@ static const struct ieee80211_ops ath12k_ops = { .sta_statistics = ath12k_mac_op_sta_statistics, .remain_on_channel = ath12k_mac_op_remain_on_channel, .cancel_remain_on_channel = ath12k_mac_op_cancel_remain_on_channel, - + .change_sta_links = ath12k_mac_op_change_sta_links, #ifdef CONFIG_PM .suspend = ath12k_wow_op_suspend, .resume = ath12k_wow_op_resume, From ea4192553850cc6b46d5676a9514f759ef3dee0d Mon Sep 17 00:00:00 2001 From: Sriram R Date: Thu, 21 Nov 2024 17:58:01 +0200 Subject: [PATCH 0050/1386] wifi: ath12k: add primary link for data path operations In case of Multi-link operation, data path peer setup and tid setup should be done only for primary link of multi-link station. Add changes to introduce primary link is peer. Currently, association link will be considered as primary link. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241121155806.1862733-4-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/dp.c | 5 +++++ drivers/net/wireless/ath/ath12k/dp_rx.c | 10 ++++++++++ drivers/net/wireless/ath/ath12k/peer.c | 23 +++++++++++++++++++++++ drivers/net/wireless/ath/ath12k/peer.h | 8 ++++++++ 4 files changed, 46 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/dp.c b/drivers/net/wireless/ath/ath12k/dp.c index c99e9ceb1a6e..23326e2dfe8d 100644 --- a/drivers/net/wireless/ath/ath12k/dp.c +++ b/drivers/net/wireless/ath/ath12k/dp.c @@ -41,6 +41,11 @@ void ath12k_dp_peer_cleanup(struct ath12k *ar, int vdev_id, const u8 *addr) return; } + if (!peer->primary_link) { + spin_unlock_bh(&ab->base_lock); + return; + } + ath12k_dp_rx_peer_tid_cleanup(ar, peer); crypto_free_shash(peer->tfm_mmic); peer->dp_setup_done = false; diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 0fb39c174475..da3ebdf094c3 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -940,6 +940,11 @@ int ath12k_dp_rx_peer_tid_setup(struct ath12k *ar, const u8 *peer_mac, int vdev_ return -ENOENT; } + if (!peer->primary_link) { + spin_unlock_bh(&ab->base_lock); + return 0; + } + if (ab->hw_params->reoq_lut_support && !dp->reoq_lut.vaddr) { spin_unlock_bh(&ab->base_lock); ath12k_warn(ab, "reo qref table is not setup\n"); @@ -2781,6 +2786,11 @@ int ath12k_dp_rx_peer_frag_setup(struct ath12k *ar, const u8 *peer_mac, int vdev return -ENOENT; } + if (!peer->primary_link) { + spin_unlock_bh(&ab->base_lock); + return 0; + } + for (i = 0; i <= IEEE80211_NUM_TIDS; i++) { rx_tid = &peer->rx_tid[i]; rx_tid->ab = ab; diff --git a/drivers/net/wireless/ath/ath12k/peer.c b/drivers/net/wireless/ath/ath12k/peer.c index ffbc1265ccc1..25905498e8fb 100644 --- a/drivers/net/wireless/ath/ath12k/peer.c +++ b/drivers/net/wireless/ath/ath12k/peer.c @@ -313,7 +313,11 @@ int ath12k_peer_create(struct ath12k *ar, struct ath12k_link_vif *arvif, struct ath12k_wmi_peer_create_arg *arg) { struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); + struct ath12k_link_sta *arsta; + u8 link_id = arvif->link_id; struct ath12k_peer *peer; + struct ath12k_sta *ahsta; + u16 ml_peer_id; int ret; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -379,6 +383,25 @@ int ath12k_peer_create(struct ath12k *ar, struct ath12k_link_vif *arvif, arvif->ast_idx = peer->hw_peer_id; } + if (sta) { + ahsta = ath12k_sta_to_ahsta(sta); + arsta = wiphy_dereference(ath12k_ar_to_hw(ar)->wiphy, + ahsta->link[link_id]); + + /* Fill ML info into created peer */ + if (sta->mlo) { + ml_peer_id = ahsta->ml_peer_id; + peer->ml_id = ml_peer_id | ATH12K_PEER_ML_ID_VALID; + ether_addr_copy(peer->ml_addr, sta->addr); + + /* the assoc link is considered primary for now */ + peer->primary_link = arsta->is_assoc_link; + } else { + peer->ml_id = ATH12K_MLO_PEER_ID_INVALID; + peer->primary_link = true; + } + } + peer->sec_type = HAL_ENCRYPT_TYPE_OPEN; peer->sec_type_grp = HAL_ENCRYPT_TYPE_OPEN; diff --git a/drivers/net/wireless/ath/ath12k/peer.h b/drivers/net/wireless/ath/ath12k/peer.h index e398d5a3fdc8..a39e943bd66b 100644 --- a/drivers/net/wireless/ath/ath12k/peer.h +++ b/drivers/net/wireless/ath/ath12k/peer.h @@ -51,6 +51,14 @@ struct ath12k_peer { bool dp_setup_done; u16 ml_id; + + /* any other ML info common for all partners can be added + * here and would be same for all partner peers. + */ + u8 ml_addr[ETH_ALEN]; + + /* To ensure only certain work related to dp is done once */ + bool primary_link; }; struct ath12k_ml_peer { From 061097e5732dc478ef0e57995fae307e1b95ed62 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Thu, 21 Nov 2024 17:58:02 +0200 Subject: [PATCH 0051/1386] wifi: ath12k: use arsta instead of sta Currently, struct ieee80211_sta (sta) is used for many WMI and mac80211 ops but for multi link station, driver should use struct ath12k_link_sta (arsta) instead of sta. Add changes to use arsta object for WMI commands and other mac80211 ops. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241121155806.1862733-5-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/mac.c | 144 ++++++++++++++------------ 1 file changed, 77 insertions(+), 67 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index d92a5e0afe2e..01932aeab4f3 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -1771,7 +1771,7 @@ static void ath12k_peer_assoc_h_basic(struct ath12k *ar, else aid = sta->aid; - ether_addr_copy(arg->peer_mac, sta->addr); + ether_addr_copy(arg->peer_mac, arsta->addr); arg->vdev_id = arvif->vdev_id; arg->peer_associd = aid; arg->auth_flag = true; @@ -2163,7 +2163,7 @@ static void ath12k_peer_assoc_h_vht(struct ath12k *ar, arg->tx_max_mcs_nss = 0xFF; ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vht peer %pM max_mpdu %d flags 0x%x\n", - sta->addr, arg->peer_max_mpdu, arg->peer_flags); + arsta->addr, arg->peer_max_mpdu, arg->peer_flags); /* TODO: rxnss_override */ } @@ -2459,7 +2459,7 @@ static void ath12k_peer_assoc_h_qos(struct ath12k *ar, } ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac peer %pM qos %d\n", - sta->addr, arg->qos_flag); + arsta->addr, arg->qos_flag); } static int ath12k_peer_assoc_qos_ap(struct ath12k *ar, @@ -2499,26 +2499,26 @@ static int ath12k_peer_assoc_qos_ap(struct ath12k *ar, arg.param = WMI_AP_PS_PEER_PARAM_UAPSD; arg.value = uapsd; - ret = ath12k_wmi_send_set_ap_ps_param_cmd(ar, sta->addr, &arg); + ret = ath12k_wmi_send_set_ap_ps_param_cmd(ar, arsta->addr, &arg); if (ret) goto err; arg.param = WMI_AP_PS_PEER_PARAM_MAX_SP; arg.value = max_sp; - ret = ath12k_wmi_send_set_ap_ps_param_cmd(ar, sta->addr, &arg); + ret = ath12k_wmi_send_set_ap_ps_param_cmd(ar, arsta->addr, &arg); if (ret) goto err; /* TODO: revisit during testing */ arg.param = WMI_AP_PS_PEER_PARAM_SIFS_RESP_FRMTYPE; arg.value = DISABLE_SIFS_RESPONSE_TRIGGER; - ret = ath12k_wmi_send_set_ap_ps_param_cmd(ar, sta->addr, &arg); + ret = ath12k_wmi_send_set_ap_ps_param_cmd(ar, arsta->addr, &arg); if (ret) goto err; arg.param = WMI_AP_PS_PEER_PARAM_SIFS_RESP_UAPSD; arg.value = DISABLE_SIFS_RESPONSE_TRIGGER; - ret = ath12k_wmi_send_set_ap_ps_param_cmd(ar, sta->addr, &arg); + ret = ath12k_wmi_send_set_ap_ps_param_cmd(ar, arsta->addr, &arg); if (ret) goto err; @@ -2705,7 +2705,7 @@ static void ath12k_peer_assoc_h_phymode(struct ath12k *ar, } ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac peer %pM phymode %s\n", - sta->addr, ath12k_mac_phymode_str(phymode)); + arsta->addr, ath12k_mac_phymode_str(phymode)); arg->peer_phymode = phymode; WARN_ON(phymode == MODE_UNKNOWN); @@ -4295,7 +4295,7 @@ static int ath12k_mac_set_key(struct ath12k *ar, enum set_key_cmd cmd, return 1; if (sta) - peer_addr = sta->addr; + peer_addr = arsta->addr; else if (ahvif->vdev_type == WMI_VDEV_TYPE_STA) peer_addr = vif->bss_conf.bssid; else @@ -4526,7 +4526,6 @@ ath12k_mac_set_peer_vht_fixed_rate(struct ath12k_link_vif *arvif, const struct cfg80211_bitrate_mask *mask, enum nl80211_band band) { - struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); struct ath12k *ar = arvif->ar; u8 vht_rate, nss; u32 rate_code; @@ -4545,24 +4544,24 @@ ath12k_mac_set_peer_vht_fixed_rate(struct ath12k_link_vif *arvif, if (!nss) { ath12k_warn(ar->ab, "No single VHT Fixed rate found to set for %pM", - sta->addr); + arsta->addr); return -EINVAL; } ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "Setting Fixed VHT Rate for peer %pM. Device will not switch to any other selected rates", - sta->addr); + arsta->addr); rate_code = ATH12K_HW_RATE_CODE(vht_rate, nss - 1, WMI_RATE_PREAMBLE_VHT); - ret = ath12k_wmi_set_peer_param(ar, sta->addr, + ret = ath12k_wmi_set_peer_param(ar, arsta->addr, arvif->vdev_id, WMI_PEER_PARAM_FIXED_RATE, rate_code); if (ret) ath12k_warn(ar->ab, "failed to update STA %pM Fixed Rate %d: %d\n", - sta->addr, rate_code, ret); + arsta->addr, rate_code, ret); return ret; } @@ -4580,12 +4579,16 @@ static int ath12k_mac_station_assoc(struct ath12k *ar, enum nl80211_band band; struct cfg80211_bitrate_mask *mask; u8 num_vht_rates; + u8 link_id = arvif->link_id; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); if (WARN_ON(ath12k_mac_vif_link_chan(vif, arvif->link_id, &def))) return -EPERM; + if (WARN_ON(!rcu_access_pointer(sta->link[link_id]))) + return -EINVAL; + band = def.chan->band; mask = &arvif->bitrate_mask; @@ -4599,13 +4602,13 @@ static int ath12k_mac_station_assoc(struct ath12k *ar, ret = ath12k_wmi_send_peer_assoc_cmd(ar, &peer_arg); if (ret) { ath12k_warn(ar->ab, "failed to run peer assoc for STA %pM vdev %i: %d\n", - sta->addr, arvif->vdev_id, ret); + arsta->addr, arvif->vdev_id, ret); return ret; } if (!wait_for_completion_timeout(&ar->peer_assoc_done, 1 * HZ)) { ath12k_warn(ar->ab, "failed to get peer assoc conf event for %pM vdev %i\n", - sta->addr, arvif->vdev_id); + arsta->addr, arvif->vdev_id); return -ETIMEDOUT; } @@ -4629,7 +4632,7 @@ static int ath12k_mac_station_assoc(struct ath12k *ar, if (reassoc) return 0; - ret = ath12k_setup_peer_smps(ar, arvif, sta->addr, + ret = ath12k_setup_peer_smps(ar, arvif, arsta->addr, &sta->deflink.ht_cap, &sta->deflink.he_6ghz_capa); if (ret) { @@ -4649,7 +4652,7 @@ static int ath12k_mac_station_assoc(struct ath12k *ar, ret = ath12k_peer_assoc_qos_ap(ar, arvif, arsta); if (ret) { ath12k_warn(ar->ab, "failed to set qos params for STA %pM for vdev %i: %d\n", - sta->addr, arvif->vdev_id, ret); + arsta->addr, arvif->vdev_id, ret); return ret; } } @@ -4732,65 +4735,65 @@ static void ath12k_sta_rc_update_wk(struct wiphy *wiphy, struct wiphy_work *wk) * WMI_PEER_CHWIDTH */ ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac bandwidth upgrade for sta %pM new %d old %d\n", - sta->addr, bw, bw_prev); - err = ath12k_wmi_set_peer_param(ar, sta->addr, + arsta->addr, bw, bw_prev); + err = ath12k_wmi_set_peer_param(ar, arsta->addr, arvif->vdev_id, WMI_PEER_PHYMODE, peer_phymode); if (err) { ath12k_warn(ar->ab, "failed to update STA %pM to peer phymode %d: %d\n", - sta->addr, peer_phymode, err); + arsta->addr, peer_phymode, err); return; } - err = ath12k_wmi_set_peer_param(ar, sta->addr, + err = ath12k_wmi_set_peer_param(ar, arsta->addr, arvif->vdev_id, WMI_PEER_CHWIDTH, bw); if (err) ath12k_warn(ar->ab, "failed to update STA %pM to peer bandwidth %d: %d\n", - sta->addr, bw, err); + arsta->addr, bw, err); } else { /* When we downgrade bandwidth this will conflict with phymode * and cause to trigger firmware crash. In this case we send * WMI_PEER_CHWIDTH followed by WMI_PEER_PHYMODE */ ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac bandwidth downgrade for sta %pM new %d old %d\n", - sta->addr, bw, bw_prev); - err = ath12k_wmi_set_peer_param(ar, sta->addr, + arsta->addr, bw, bw_prev); + err = ath12k_wmi_set_peer_param(ar, arsta->addr, arvif->vdev_id, WMI_PEER_CHWIDTH, bw); if (err) { ath12k_warn(ar->ab, "failed to update STA %pM peer to bandwidth %d: %d\n", - sta->addr, bw, err); + arsta->addr, bw, err); return; } - err = ath12k_wmi_set_peer_param(ar, sta->addr, + err = ath12k_wmi_set_peer_param(ar, arsta->addr, arvif->vdev_id, WMI_PEER_PHYMODE, peer_phymode); if (err) ath12k_warn(ar->ab, "failed to update STA %pM to peer phymode %d: %d\n", - sta->addr, peer_phymode, err); + arsta->addr, peer_phymode, err); } } if (changed & IEEE80211_RC_NSS_CHANGED) { ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac update sta %pM nss %d\n", - sta->addr, nss); + arsta->addr, nss); - err = ath12k_wmi_set_peer_param(ar, sta->addr, arvif->vdev_id, + err = ath12k_wmi_set_peer_param(ar, arsta->addr, arvif->vdev_id, WMI_PEER_NSS, nss); if (err) ath12k_warn(ar->ab, "failed to update STA %pM nss %d: %d\n", - sta->addr, nss, err); + arsta->addr, nss, err); } if (changed & IEEE80211_RC_SMPS_CHANGED) { ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac update sta %pM smps %d\n", - sta->addr, smps); + arsta->addr, smps); - err = ath12k_wmi_set_peer_param(ar, sta->addr, arvif->vdev_id, + err = ath12k_wmi_set_peer_param(ar, arsta->addr, arvif->vdev_id, WMI_PEER_MIMO_PS_STATE, smps); if (err) ath12k_warn(ar->ab, "failed to update STA %pM smps %d: %d\n", - sta->addr, smps, err); + arsta->addr, smps, err); } if (changed & IEEE80211_RC_SUPP_RATES_CHANGED) { @@ -4823,11 +4826,11 @@ static void ath12k_sta_rc_update_wk(struct wiphy *wiphy, struct wiphy_work *wk) err = ath12k_wmi_send_peer_assoc_cmd(ar, &peer_arg); if (err) ath12k_warn(ar->ab, "failed to run peer assoc for STA %pM vdev %i: %d\n", - sta->addr, arvif->vdev_id, err); + arsta->addr, arvif->vdev_id, err); if (!wait_for_completion_timeout(&ar->peer_assoc_done, 1 * HZ)) ath12k_warn(ar->ab, "failed to get peer assoc conf event for %pM vdev %i\n", - sta->addr, arvif->vdev_id); + arsta->addr, arvif->vdev_id); } } } @@ -4908,7 +4911,7 @@ static void ath12k_mac_station_post_remove(struct ath12k *ar, spin_lock_bh(&ar->ab->base_lock); - peer = ath12k_peer_find(ar->ab, arvif->vdev_id, sta->addr); + peer = ath12k_peer_find(ar->ab, arvif->vdev_id, arsta->addr); if (peer && peer->sta == sta) { ath12k_warn(ar->ab, "Found peer entry %pM n vdev %i after it was supposedly removed\n", vif->addr, arvif->vdev_id); @@ -4963,27 +4966,26 @@ static int ath12k_mac_station_authorize(struct ath12k *ar, { struct ath12k_peer *peer; struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); - struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); int ret; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); spin_lock_bh(&ar->ab->base_lock); - peer = ath12k_peer_find(ar->ab, arvif->vdev_id, sta->addr); + peer = ath12k_peer_find(ar->ab, arvif->vdev_id, arsta->addr); if (peer) peer->is_authorized = true; spin_unlock_bh(&ar->ab->base_lock); if (vif->type == NL80211_IFTYPE_STATION && arvif->is_up) { - ret = ath12k_wmi_set_peer_param(ar, sta->addr, + ret = ath12k_wmi_set_peer_param(ar, arsta->addr, arvif->vdev_id, WMI_PEER_AUTHORIZE, 1); if (ret) { ath12k_warn(ar->ab, "Unable to authorize peer %pM vdev %d: %d\n", - sta->addr, arvif->vdev_id, ret); + arsta->addr, arvif->vdev_id, ret); return ret; } } @@ -5014,15 +5016,15 @@ static int ath12k_mac_station_remove(struct ath12k *ar, if (sta->mlo) return ret; - ath12k_dp_peer_cleanup(ar, arvif->vdev_id, sta->addr); + ath12k_dp_peer_cleanup(ar, arvif->vdev_id, arsta->addr); - ret = ath12k_peer_delete(ar, arvif->vdev_id, sta->addr); + ret = ath12k_peer_delete(ar, arvif->vdev_id, arsta->addr); if (ret) ath12k_warn(ar->ab, "Failed to delete peer: %pM for VDEV: %d\n", - sta->addr, arvif->vdev_id); + arsta->addr, arvif->vdev_id); else ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "Removed peer: %pM for VDEV: %d\n", - sta->addr, arvif->vdev_id); + arsta->addr, arvif->vdev_id); ath12k_mac_station_post_remove(ar, arvif, arsta); @@ -5040,7 +5042,7 @@ static int ath12k_mac_station_add(struct ath12k *ar, struct ath12k_base *ab = ar->ab; struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); - struct ath12k_wmi_peer_create_arg peer_param; + struct ath12k_wmi_peer_create_arg peer_param = {0}; int ret; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); @@ -5065,28 +5067,28 @@ static int ath12k_mac_station_add(struct ath12k *ar, ret = ath12k_peer_create(ar, arvif, sta, &peer_param); if (ret) { ath12k_warn(ab, "Failed to add peer: %pM for VDEV: %d\n", - sta->addr, arvif->vdev_id); + arsta->addr, arvif->vdev_id); goto free_peer; } ath12k_dbg(ab, ATH12K_DBG_MAC, "Added peer: %pM for VDEV: %d\n", - sta->addr, arvif->vdev_id); + arsta->addr, arvif->vdev_id); if (ieee80211_vif_is_mesh(vif)) { - ret = ath12k_wmi_set_peer_param(ar, sta->addr, + ret = ath12k_wmi_set_peer_param(ar, arsta->addr, arvif->vdev_id, WMI_PEER_USE_4ADDR, 1); if (ret) { ath12k_warn(ab, "failed to STA %pM 4addr capability: %d\n", - sta->addr, ret); + arsta->addr, ret); goto free_peer; } } - ret = ath12k_dp_peer_setup(ar, arvif->vdev_id, sta->addr); + ret = ath12k_dp_peer_setup(ar, arvif->vdev_id, arsta->addr); if (ret) { ath12k_warn(ab, "failed to setup dp for peer %pM on vdev %i (%d)\n", - sta->addr, arvif->vdev_id, ret); + arsta->addr, arvif->vdev_id, ret); goto free_peer; } @@ -5103,7 +5105,7 @@ static int ath12k_mac_station_add(struct ath12k *ar, return 0; free_peer: - ath12k_peer_delete(ar, arvif->vdev_id, sta->addr); + ath12k_peer_delete(ar, arvif->vdev_id, arsta->addr); dec_num_station: ath12k_mac_dec_num_stations(arvif, arsta); exit: @@ -5175,6 +5177,8 @@ static int ath12k_mac_assign_link_sta(struct ath12k_hw *ah, ahsta->links_map |= BIT(arsta->link_id); arsta->arvif = arvif; arsta->ahsta = ahsta; + ahsta->ahvif = ahvif; + wiphy_work_init(&arsta->update_wk, ath12k_sta_rc_update_wk); rcu_assign_pointer(ahsta->link[link_id], arsta); @@ -5250,7 +5254,7 @@ static int ath12k_mac_handle_link_sta_state(struct ieee80211_hw *hw, ret = ath12k_mac_station_add(ar, arvif, arsta); if (ret) ath12k_warn(ar->ab, "Failed to add station: %pM for VDEV: %d\n", - sta->addr, arvif->vdev_id); + arsta->addr, arvif->vdev_id); /* IEEE80211_STA_AUTH -> IEEE80211_STA_ASSOC: Send station assoc command for * peer associated to AP/Mesh/ADHOC vif type. @@ -5263,7 +5267,7 @@ static int ath12k_mac_handle_link_sta_state(struct ieee80211_hw *hw, ret = ath12k_mac_station_assoc(ar, arvif, arsta, false); if (ret) ath12k_warn(ar->ab, "Failed to associate station: %pM\n", - sta->addr); + arsta->addr); spin_lock_bh(&ar->data_lock); @@ -5280,7 +5284,7 @@ static int ath12k_mac_handle_link_sta_state(struct ieee80211_hw *hw, ret = ath12k_mac_station_authorize(ar, arvif, arsta); if (ret) ath12k_warn(ar->ab, "Failed to authorize station: %pM\n", - sta->addr); + arsta->addr); /* IEEE80211_STA_AUTHORIZED -> IEEE80211_STA_ASSOC: station may be in removal, * deauthorize it. @@ -5300,7 +5304,7 @@ static int ath12k_mac_handle_link_sta_state(struct ieee80211_hw *hw, ret = ath12k_mac_station_disassoc(ar, arvif, arsta); if (ret) ath12k_warn(ar->ab, "Failed to disassociate station: %pM\n", - sta->addr); + arsta->addr); } exit: @@ -5413,16 +5417,22 @@ static int ath12k_mac_op_sta_set_txpwr(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta) { - struct ath12k_hw *ah = ath12k_hw_to_ah(hw); + struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta); struct ath12k *ar; struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); struct ath12k_link_vif *arvif; + struct ath12k_link_sta *arsta; + u8 link_id; int ret; s16 txpwr; lockdep_assert_wiphy(hw->wiphy); - arvif = &ahvif->deflink; + /* TODO: use link id from mac80211 once that's implemented */ + link_id = 0; + + arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); + arsta = wiphy_dereference(hw->wiphy, ahsta->link[link_id]); if (sta->deflink.txpwr.type == NL80211_TX_POWER_AUTOMATIC) { txpwr = 0; @@ -5439,9 +5449,9 @@ static int ath12k_mac_op_sta_set_txpwr(struct ieee80211_hw *hw, goto out; } - ar = ath12k_ah_to_ar(ah, 0); + ar = arvif->ar; - ret = ath12k_wmi_set_peer_param(ar, sta->addr, arvif->vdev_id, + ret = ath12k_wmi_set_peer_param(ar, arsta->addr, arvif->vdev_id, WMI_PEER_USE_FIXED_PWR, txpwr); if (ret) { ath12k_warn(ar->ab, "failed to set tx power for station ret: %d\n", @@ -5494,12 +5504,12 @@ static void ath12k_mac_op_sta_rc_update(struct ieee80211_hw *hw, } spin_lock_bh(&ar->ab->base_lock); - peer = ath12k_peer_find(ar->ab, arvif->vdev_id, sta->addr); + peer = ath12k_peer_find(ar->ab, arvif->vdev_id, arsta->addr); if (!peer) { spin_unlock_bh(&ar->ab->base_lock); rcu_read_unlock(); ath12k_warn(ar->ab, "mac sta rc update failed to find peer %pM on vdev %i\n", - sta->addr, arvif->vdev_id); + arsta->addr, arvif->vdev_id); return; } @@ -5507,7 +5517,7 @@ static void ath12k_mac_op_sta_rc_update(struct ieee80211_hw *hw, ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac sta rc update for %pM changed %08x bw %d nss %d smps %d\n", - sta->addr, changed, sta->deflink.bandwidth, sta->deflink.rx_nss, + arsta->addr, changed, sta->deflink.bandwidth, sta->deflink.rx_nss, sta->deflink.smps_mode); spin_lock_bh(&ar->data_lock); @@ -5537,7 +5547,7 @@ static void ath12k_mac_op_sta_rc_update(struct ieee80211_hw *hw, break; default: ath12k_warn(ar->ab, "Invalid smps %d in sta rc update for %pM\n", - sta->deflink.smps_mode, sta->addr); + sta->deflink.smps_mode, arsta->addr); smps = WMI_PEER_SMPS_PS_NONE; break; } @@ -9121,14 +9131,14 @@ static void ath12k_mac_disable_peer_fixed_rate(void *data, if (arsta->arvif != arvif) return; - ret = ath12k_wmi_set_peer_param(ar, sta->addr, + ret = ath12k_wmi_set_peer_param(ar, arsta->addr, arvif->vdev_id, WMI_PEER_PARAM_FIXED_RATE, WMI_FIXED_RATE_NONE); if (ret) ath12k_warn(ar->ab, "failed to disable peer fixed rate for STA %pM ret %d\n", - sta->addr, ret); + arsta->addr, ret); } static int From a0300e6bcfd4bc3ea9865856a3abad5f9dd6fd89 Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Thu, 21 Nov 2024 17:58:03 +0200 Subject: [PATCH 0052/1386] wifi: ath12k: add reo queue lookup table for ML peers Currently reoqueue tid setup uses lookup table (LUT) during peer association, but for ML peer there will be multiple link peers (belonging to different underlying firmware) affiliated to each other. Hence the reo queue should be setup only on one of the links which is the primary link. Add changes to create separate ML reo queue lookup table for ML peers and use the same while setting up rx tid for ML peer's primary link. For ML peers use ml_peer_id instead of peer_id to setup/lookup the reo queue entry in the LUT. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Rameshkumar Sundaram Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241121155806.1862733-6-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/dp.c | 39 +++++++++++++++----- drivers/net/wireless/ath/ath12k/dp.h | 1 + drivers/net/wireless/ath/ath12k/dp_rx.c | 48 ++++++++++++++++++------- drivers/net/wireless/ath/ath12k/peer.c | 2 ++ drivers/net/wireless/ath/ath12k/peer.h | 2 +- 5 files changed, 70 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp.c b/drivers/net/wireless/ath/ath12k/dp.c index 23326e2dfe8d..328be2c635d6 100644 --- a/drivers/net/wireless/ath/ath12k/dp.c +++ b/drivers/net/wireless/ath/ath12k/dp.c @@ -1265,15 +1265,23 @@ static void ath12k_dp_reoq_lut_cleanup(struct ath12k_base *ab) if (!ab->hw_params->reoq_lut_support) return; - if (!dp->reoq_lut.vaddr) - return; + if (dp->reoq_lut.vaddr) { + ath12k_hif_write32(ab, + HAL_SEQ_WCSS_UMAC_REO_REG + + HAL_REO1_QDESC_LUT_BASE0(ab), 0); + dma_free_coherent(ab->dev, DP_REOQ_LUT_SIZE, + dp->reoq_lut.vaddr, dp->reoq_lut.paddr); + dp->reoq_lut.vaddr = NULL; + } - dma_free_coherent(ab->dev, DP_REOQ_LUT_SIZE, - dp->reoq_lut.vaddr, dp->reoq_lut.paddr); - dp->reoq_lut.vaddr = NULL; - - ath12k_hif_write32(ab, - HAL_SEQ_WCSS_UMAC_REO_REG + HAL_REO1_QDESC_LUT_BASE0(ab), 0); + if (dp->ml_reoq_lut.vaddr) { + ath12k_hif_write32(ab, + HAL_SEQ_WCSS_UMAC_REO_REG + + HAL_REO1_QDESC_LUT_BASE1(ab), 0); + dma_free_coherent(ab->dev, DP_REOQ_LUT_SIZE, + dp->ml_reoq_lut.vaddr, dp->ml_reoq_lut.paddr); + dp->ml_reoq_lut.vaddr = NULL; + } } void ath12k_dp_free(struct ath12k_base *ab) @@ -1599,8 +1607,23 @@ static int ath12k_dp_reoq_lut_setup(struct ath12k_base *ab) return -ENOMEM; } + dp->ml_reoq_lut.vaddr = dma_alloc_coherent(ab->dev, + DP_REOQ_LUT_SIZE, + &dp->ml_reoq_lut.paddr, + GFP_KERNEL | __GFP_ZERO); + if (!dp->ml_reoq_lut.vaddr) { + ath12k_warn(ab, "failed to allocate memory for ML reoq table"); + dma_free_coherent(ab->dev, DP_REOQ_LUT_SIZE, + dp->reoq_lut.vaddr, dp->reoq_lut.paddr); + dp->reoq_lut.vaddr = NULL; + return -ENOMEM; + } + ath12k_hif_write32(ab, HAL_SEQ_WCSS_UMAC_REO_REG + HAL_REO1_QDESC_LUT_BASE0(ab), dp->reoq_lut.paddr); + ath12k_hif_write32(ab, HAL_SEQ_WCSS_UMAC_REO_REG + HAL_REO1_QDESC_LUT_BASE1(ab), + dp->ml_reoq_lut.paddr >> 8); + return 0; } diff --git a/drivers/net/wireless/ath/ath12k/dp.h b/drivers/net/wireless/ath/ath12k/dp.h index 2e05fc19410e..a120b7a8477d 100644 --- a/drivers/net/wireless/ath/ath12k/dp.h +++ b/drivers/net/wireless/ath/ath12k/dp.h @@ -368,6 +368,7 @@ struct ath12k_dp { struct dp_rxdma_mon_ring rxdma_mon_buf_ring; struct dp_rxdma_mon_ring tx_mon_buf_ring; struct ath12k_reo_q_addr_lut reoq_lut; + struct ath12k_reo_q_addr_lut ml_reoq_lut; }; /* HTT definitions */ diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index da3ebdf094c3..70680f2124e5 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -740,15 +740,22 @@ static void ath12k_peer_rx_tid_qref_setup(struct ath12k_base *ab, u16 peer_id, u { struct ath12k_reo_queue_ref *qref; struct ath12k_dp *dp = &ab->dp; + bool ml_peer = false; if (!ab->hw_params->reoq_lut_support) return; - /* TODO: based on ML peer or not, select the LUT. below assumes non - * ML peer - */ - qref = (struct ath12k_reo_queue_ref *)dp->reoq_lut.vaddr + - (peer_id * (IEEE80211_NUM_TIDS + 1) + tid); + if (peer_id & ATH12K_PEER_ML_ID_VALID) { + peer_id &= ~ATH12K_PEER_ML_ID_VALID; + ml_peer = true; + } + + if (ml_peer) + qref = (struct ath12k_reo_queue_ref *)dp->ml_reoq_lut.vaddr + + (peer_id * (IEEE80211_NUM_TIDS + 1) + tid); + else + qref = (struct ath12k_reo_queue_ref *)dp->reoq_lut.vaddr + + (peer_id * (IEEE80211_NUM_TIDS + 1) + tid); qref->info0 = u32_encode_bits(lower_32_bits(paddr), BUFFER_ADDR_INFO0_ADDR); @@ -761,15 +768,22 @@ static void ath12k_peer_rx_tid_qref_reset(struct ath12k_base *ab, u16 peer_id, u { struct ath12k_reo_queue_ref *qref; struct ath12k_dp *dp = &ab->dp; + bool ml_peer = false; if (!ab->hw_params->reoq_lut_support) return; - /* TODO: based on ML peer or not, select the LUT. below assumes non - * ML peer - */ - qref = (struct ath12k_reo_queue_ref *)dp->reoq_lut.vaddr + - (peer_id * (IEEE80211_NUM_TIDS + 1) + tid); + if (peer_id & ATH12K_PEER_ML_ID_VALID) { + peer_id &= ~ATH12K_PEER_ML_ID_VALID; + ml_peer = true; + } + + if (ml_peer) + qref = (struct ath12k_reo_queue_ref *)dp->ml_reoq_lut.vaddr + + (peer_id * (IEEE80211_NUM_TIDS + 1) + tid); + else + qref = (struct ath12k_reo_queue_ref *)dp->reoq_lut.vaddr + + (peer_id * (IEEE80211_NUM_TIDS + 1) + tid); qref->info0 = u32_encode_bits(0, BUFFER_ADDR_INFO0_ADDR); qref->info1 = u32_encode_bits(0, BUFFER_ADDR_INFO1_ADDR) | @@ -802,7 +816,10 @@ void ath12k_dp_rx_peer_tid_delete(struct ath12k *ar, rx_tid->vaddr = NULL; } - ath12k_peer_rx_tid_qref_reset(ar->ab, peer->peer_id, tid); + if (peer->mlo) + ath12k_peer_rx_tid_qref_reset(ar->ab, peer->ml_id, tid); + else + ath12k_peer_rx_tid_qref_reset(ar->ab, peer->peer_id, tid); rx_tid->active = false; } @@ -945,7 +962,8 @@ int ath12k_dp_rx_peer_tid_setup(struct ath12k *ar, const u8 *peer_mac, int vdev_ return 0; } - if (ab->hw_params->reoq_lut_support && !dp->reoq_lut.vaddr) { + if (ab->hw_params->reoq_lut_support && + (!dp->reoq_lut.vaddr || !dp->ml_reoq_lut.vaddr)) { spin_unlock_bh(&ab->base_lock); ath12k_warn(ab, "reo qref table is not setup\n"); return -EINVAL; @@ -1026,7 +1044,11 @@ int ath12k_dp_rx_peer_tid_setup(struct ath12k *ar, const u8 *peer_mac, int vdev_ /* Update the REO queue LUT at the corresponding peer id * and tid with qaddr. */ - ath12k_peer_rx_tid_qref_setup(ab, peer->peer_id, tid, paddr); + if (peer->mlo) + ath12k_peer_rx_tid_qref_setup(ab, peer->ml_id, tid, paddr); + else + ath12k_peer_rx_tid_qref_setup(ab, peer->peer_id, tid, paddr); + spin_unlock_bh(&ab->base_lock); } else { spin_unlock_bh(&ab->base_lock); diff --git a/drivers/net/wireless/ath/ath12k/peer.c b/drivers/net/wireless/ath/ath12k/peer.c index 25905498e8fb..5763c5a40cfc 100644 --- a/drivers/net/wireless/ath/ath12k/peer.c +++ b/drivers/net/wireless/ath/ath12k/peer.c @@ -396,9 +396,11 @@ int ath12k_peer_create(struct ath12k *ar, struct ath12k_link_vif *arvif, /* the assoc link is considered primary for now */ peer->primary_link = arsta->is_assoc_link; + peer->mlo = true; } else { peer->ml_id = ATH12K_MLO_PEER_ID_INVALID; peer->primary_link = true; + peer->mlo = false; } } diff --git a/drivers/net/wireless/ath/ath12k/peer.h b/drivers/net/wireless/ath/ath12k/peer.h index a39e943bd66b..7e6231cb2b52 100644 --- a/drivers/net/wireless/ath/ath12k/peer.h +++ b/drivers/net/wireless/ath/ath12k/peer.h @@ -46,7 +46,7 @@ struct ath12k_peer { struct ppdu_user_delayba ppdu_stats_delayba; bool delayba_flag; bool is_authorized; - + bool mlo; /* protected by ab->data_lock */ bool dp_setup_done; From aaac8850a07f9072ed62f54b0e5fcb14c8e0d044 Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Thu, 21 Nov 2024 17:58:04 +0200 Subject: [PATCH 0053/1386] wifi: ath12k: modify chanctx iterators for MLO Currently ath12k's chanctx iterator functions use deflink of given ahvif and bss_conf of corresponding vif to make sure the iterator returns intended vif. An ML vif can have multiple affiliated links each having its own channel context, hence iterate through the links of the given ahvif and use the link objects (arvif and link_conf) to make sure the chan ctx iterator returns intended link of the given vif. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Rameshkumar Sundaram Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241121155806.1862733-7-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/mac.c | 80 +++++++++++++++++++-------- 1 file changed, 56 insertions(+), 24 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 01932aeab4f3..956d43520510 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -8309,19 +8309,32 @@ ath12k_mac_change_chanctx_cnt_iter(void *data, u8 *mac, { struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); struct ath12k_mac_change_chanctx_arg *arg = data; + struct ieee80211_bss_conf *link_conf; struct ath12k_link_vif *arvif; + unsigned long links_map; + u8 link_id; lockdep_assert_wiphy(ahvif->ah->hw->wiphy); - arvif = &ahvif->deflink; + links_map = ahvif->links_map; + for_each_set_bit(link_id, &links_map, IEEE80211_MLD_MAX_NUM_LINKS) { + arvif = wiphy_dereference(ahvif->ah->hw->wiphy, ahvif->link[link_id]); + if (WARN_ON(!arvif)) + continue; - if (arvif->ar != arg->ar) - return; + if (arvif->ar != arg->ar) + continue; - if (rcu_access_pointer(vif->bss_conf.chanctx_conf) != arg->ctx) - return; + link_conf = wiphy_dereference(ahvif->ah->hw->wiphy, + vif->link_conf[link_id]); + if (WARN_ON(!link_conf)) + continue; - arg->n_vifs++; + if (rcu_access_pointer(link_conf->chanctx_conf) != arg->ctx) + continue; + + arg->n_vifs++; + } } static void @@ -8330,27 +8343,41 @@ ath12k_mac_change_chanctx_fill_iter(void *data, u8 *mac, { struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); struct ath12k_mac_change_chanctx_arg *arg = data; + struct ieee80211_bss_conf *link_conf; struct ieee80211_chanctx_conf *ctx; struct ath12k_link_vif *arvif; + unsigned long links_map; + u8 link_id; lockdep_assert_wiphy(ahvif->ah->hw->wiphy); - arvif = &ahvif->deflink; + links_map = ahvif->links_map; + for_each_set_bit(link_id, &links_map, IEEE80211_MLD_MAX_NUM_LINKS) { + arvif = wiphy_dereference(ahvif->ah->hw->wiphy, ahvif->link[link_id]); + if (WARN_ON(!arvif)) + continue; - if (arvif->ar != arg->ar) - return; + if (arvif->ar != arg->ar) + continue; - ctx = rcu_access_pointer(vif->bss_conf.chanctx_conf); - if (ctx != arg->ctx) - return; + link_conf = wiphy_dereference(ahvif->ah->hw->wiphy, + vif->link_conf[arvif->link_id]); + if (WARN_ON(!link_conf)) + continue; - if (WARN_ON(arg->next_vif == arg->n_vifs)) - return; + ctx = rcu_access_pointer(link_conf->chanctx_conf); + if (ctx != arg->ctx) + continue; - arg->vifs[arg->next_vif].vif = vif; - arg->vifs[arg->next_vif].old_ctx = ctx; - arg->vifs[arg->next_vif].new_ctx = ctx; - arg->next_vif++; + if (WARN_ON(arg->next_vif == arg->n_vifs)) + return; + + arg->vifs[arg->next_vif].vif = vif; + arg->vifs[arg->next_vif].old_ctx = ctx; + arg->vifs[arg->next_vif].new_ctx = ctx; + arg->vifs[arg->next_vif].link_conf = link_conf; + arg->next_vif++; + } } static u32 ath12k_mac_nlwidth_to_wmiwidth(enum nl80211_chan_width width) @@ -8410,10 +8437,12 @@ ath12k_mac_update_vif_chan(struct ath12k *ar, int n_vifs) { struct ath12k_wmi_vdev_up_params params = {}; + struct ieee80211_bss_conf *link_conf; struct ath12k_base *ab = ar->ab; struct ath12k_link_vif *arvif; struct ieee80211_vif *vif; struct ath12k_vif *ahvif; + u8 link_id; int ret; int i; bool monitor_vif = false; @@ -8423,7 +8452,10 @@ ath12k_mac_update_vif_chan(struct ath12k *ar, for (i = 0; i < n_vifs; i++) { vif = vifs[i].vif; ahvif = ath12k_vif_to_ahvif(vif); - arvif = &ahvif->deflink; + link_conf = vifs[i].link_conf; + link_id = link_conf->link_id; + arvif = wiphy_dereference(ath12k_ar_to_hw(ar)->wiphy, + ahvif->link[link_id]); if (vif->type == NL80211_IFTYPE_MONITOR) monitor_vif = true; @@ -8476,13 +8508,13 @@ ath12k_mac_update_vif_chan(struct ath12k *ar, params.aid = ahvif->aid; params.bssid = arvif->bssid; if (vif->mbssid_tx_vif) { - struct ath12k_vif *ahvif = + struct ath12k_vif *tx_ahvif = ath12k_vif_to_ahvif(vif->mbssid_tx_vif); - struct ath12k_link_vif *arvif = &ahvif->deflink; + struct ath12k_link_vif *tx_arvif = &tx_ahvif->deflink; - params.tx_bssid = arvif->bssid; - params.nontx_profile_idx = vif->bss_conf.bssid_index; - params.nontx_profile_cnt = 1 << vif->bss_conf.bssid_indicator; + params.tx_bssid = tx_arvif->bssid; + params.nontx_profile_idx = link_conf->bssid_index; + params.nontx_profile_cnt = 1 << link_conf->bssid_indicator; } ret = ath12k_wmi_vdev_up(arvif->ar, ¶ms); if (ret) { From 3952657848c035855007f7a430a753e123935b3a Mon Sep 17 00:00:00 2001 From: Sriram R Date: Thu, 21 Nov 2024 17:58:05 +0200 Subject: [PATCH 0054/1386] wifi: ath12k: Use mac80211 vif's link_conf instead of bss_conf Currently mac80211 vif's bss_conf is used to fetch any vif related configurations in driver but with MLO multiple links are affiliated to a vif and corresponding link configs are present in vif->link_conf[]. Fetch link_conf for corresponding link from vif and use the same for configurations. Add ath12k_mac_get_link_bss_conf() helper to fetch link_conf from arvif. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Signed-off-by: Rameshkumar Sundaram Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241121155806.1862733-8-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/mac.c | 149 ++++++++++++++++++++++---- drivers/net/wireless/ath/ath12k/wmi.c | 16 ++- 2 files changed, 141 insertions(+), 24 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 956d43520510..827a1700e8ba 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -501,6 +501,24 @@ static int ath12k_mac_vif_link_chan(struct ieee80211_vif *vif, u8 link_id, return 0; } +static struct ieee80211_bss_conf * +ath12k_mac_get_link_bss_conf(struct ath12k_link_vif *arvif) +{ + struct ieee80211_vif *vif = arvif->ahvif->vif; + struct ieee80211_bss_conf *link_conf; + struct ath12k *ar = arvif->ar; + + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + + if (arvif->link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + return NULL; + + link_conf = wiphy_dereference(ath12k_ar_to_hw(ar)->wiphy, + vif->link_conf[arvif->link_id]); + + return link_conf; +} + static bool ath12k_mac_bitrate_is_cck(int bitrate) { switch (bitrate) { @@ -1498,7 +1516,7 @@ static void ath12k_mac_set_arvif_ies(struct ath12k_link_vif *arvif, struct sk_bu static int ath12k_mac_setup_bcn_tmpl_ema(struct ath12k_link_vif *arvif) { struct ath12k_vif *ahvif = arvif->ahvif; - struct ieee80211_bss_conf *bss_conf = &ahvif->vif->bss_conf; + struct ieee80211_bss_conf *bss_conf; struct ath12k_wmi_bcn_tmpl_ema_arg ema_args; struct ieee80211_ema_beacons *beacons; struct ath12k_link_vif *tx_arvif; @@ -1507,6 +1525,14 @@ static int ath12k_mac_setup_bcn_tmpl_ema(struct ath12k_link_vif *arvif) int ret = 0; u8 i; + bss_conf = ath12k_mac_get_link_bss_conf(arvif); + if (!bss_conf) { + ath12k_warn(arvif->ar->ab, + "failed to get link bss conf to update bcn tmpl for vif %pM link %u\n", + ahvif->vif->addr, arvif->link_id); + return -ENOLINK; + } + tx_ahvif = ath12k_vif_to_ahvif(ahvif->vif->mbssid_tx_vif); tx_arvif = &tx_ahvif->deflink; beacons = ieee80211_beacon_get_template_ema_list(ath12k_ar_to_hw(tx_arvif->ar), @@ -1553,6 +1579,7 @@ static int ath12k_mac_setup_bcn_tmpl(struct ath12k_link_vif *arvif) { struct ath12k_vif *ahvif = arvif->ahvif; struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif); + struct ieee80211_bss_conf *link_conf; struct ath12k_link_vif *tx_arvif = arvif; struct ath12k *ar = arvif->ar; struct ath12k_base *ab = ar->ab; @@ -1565,13 +1592,20 @@ static int ath12k_mac_setup_bcn_tmpl(struct ath12k_link_vif *arvif) if (ahvif->vdev_type != WMI_VDEV_TYPE_AP) return 0; + link_conf = ath12k_mac_get_link_bss_conf(arvif); + if (!link_conf) { + ath12k_warn(ar->ab, "unable to access bss link conf to set bcn tmpl for vif %pM link %u\n", + vif->addr, arvif->link_id); + return -ENOLINK; + } + if (vif->mbssid_tx_vif) { tx_ahvif = ath12k_vif_to_ahvif(vif->mbssid_tx_vif); tx_arvif = &tx_ahvif->deflink; if (tx_arvif != arvif && arvif->is_up) return 0; - if (vif->bss_conf.ema_ap) + if (link_conf->ema_ap) return ath12k_mac_setup_bcn_tmpl_ema(arvif); } @@ -1586,7 +1620,7 @@ static int ath12k_mac_setup_bcn_tmpl(struct ath12k_link_vif *arvif) ath12k_mac_set_arvif_ies(arvif, bcn, 0, NULL); } else { ath12k_mac_set_arvif_ies(arvif, bcn, - ahvif->vif->bss_conf.bssid_index, + link_conf->bssid_index, &nontx_profile_found); if (!nontx_profile_found) ath12k_warn(ab, @@ -1762,6 +1796,7 @@ static void ath12k_peer_assoc_h_basic(struct ath12k *ar, struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); struct ieee80211_hw *hw = ath12k_ar_to_hw(ar); + struct ieee80211_bss_conf *bss_conf; u32 aid; lockdep_assert_wiphy(hw->wiphy); @@ -1778,7 +1813,15 @@ static void ath12k_peer_assoc_h_basic(struct ath12k *ar, /* TODO: STA WAR in ath10k for listen interval required? */ arg->peer_listen_intval = hw->conf.listen_interval; arg->peer_nss = 1; - arg->peer_caps = vif->bss_conf.assoc_capability; + + bss_conf = ath12k_mac_get_link_bss_conf(arvif); + if (!bss_conf) { + ath12k_warn(ar->ab, "unable to access bss link conf in peer assoc for vif %pM link %u\n", + vif->addr, arvif->link_id); + return; + } + + arg->peer_caps = bss_conf->assoc_capability; } static void ath12k_peer_assoc_h_crypto(struct ath12k *ar, @@ -1788,7 +1831,7 @@ static void ath12k_peer_assoc_h_crypto(struct ath12k *ar, { struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); - struct ieee80211_bss_conf *info = &vif->bss_conf; + struct ieee80211_bss_conf *info; struct cfg80211_chan_def def; struct cfg80211_bss *bss; struct ieee80211_hw *hw = ath12k_ar_to_hw(ar); @@ -1797,6 +1840,13 @@ static void ath12k_peer_assoc_h_crypto(struct ath12k *ar, lockdep_assert_wiphy(hw->wiphy); + info = ath12k_mac_get_link_bss_conf(arvif); + if (!info) { + ath12k_warn(ar->ab, "unable to access bss link conf for peer assoc crypto for vif %pM link %u\n", + vif->addr, arvif->link_id); + return; + } + if (WARN_ON(ath12k_mac_vif_link_chan(vif, arvif->link_id, &def))) return; @@ -2176,6 +2226,7 @@ static void ath12k_peer_assoc_h_he(struct ath12k *ar, struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); const struct ieee80211_sta_he_cap *he_cap = &sta->deflink.he_cap; + struct ieee80211_bss_conf *link_conf; int i; u8 ampdu_factor, max_nss; u8 rx_mcs_80 = IEEE80211_HE_MCS_NOT_SUPPORTED; @@ -2184,6 +2235,13 @@ static void ath12k_peer_assoc_h_he(struct ath12k *ar, bool support_160; u16 v; + link_conf = ath12k_mac_get_link_bss_conf(arvif); + if (!link_conf) { + ath12k_warn(ar->ab, "unable to access bss link conf in peer assoc he for vif %pM link %u", + vif->addr, arvif->link_id); + return; + } + if (!he_cap->has_he) return; @@ -2227,7 +2285,7 @@ static void ath12k_peer_assoc_h_he(struct ath12k *ar, sizeof(he_cap->he_cap_elem.mac_cap_info)); memcpy(&arg->peer_he_cap_phyinfo, he_cap->he_cap_elem.phy_cap_info, sizeof(he_cap->he_cap_elem.phy_cap_info)); - arg->peer_he_ops = vif->bss_conf.he_oper.params; + arg->peer_he_ops = link_conf->he_oper.params; /* the top most byte is used to indicate BSS color info */ arg->peer_he_ops &= 0xffffff; @@ -3133,6 +3191,7 @@ static void ath12k_recalculate_mgmt_rate(struct ath12k *ar, struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); struct ieee80211_hw *hw = ath12k_ar_to_hw(ar); const struct ieee80211_supported_band *sband; + struct ieee80211_bss_conf *bss_conf; u8 basic_rate_idx; int hw_rate_code; u32 vdev_param; @@ -3141,8 +3200,15 @@ static void ath12k_recalculate_mgmt_rate(struct ath12k *ar, lockdep_assert_wiphy(hw->wiphy); + bss_conf = ath12k_mac_get_link_bss_conf(arvif); + if (!bss_conf) { + ath12k_warn(ar->ab, "unable to access bss link conf in mgmt rate calc for vif %pM link %u\n", + vif->addr, arvif->link_id); + return; + } + sband = hw->wiphy->bands[def->chan->band]; - basic_rate_idx = ffs(vif->bss_conf.basic_rates) - 1; + basic_rate_idx = ffs(bss_conf->basic_rates) - 1; bitrate = sband->bitrates[basic_rate_idx].bitrate; hw_rate_code = ath12k_mac_get_rate_hw_value(bitrate); @@ -3226,6 +3292,7 @@ static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw, { struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); unsigned long links = ahvif->links_map; + struct ieee80211_bss_conf *info; struct ath12k_link_vif *arvif; struct ath12k *ar; u8 link_id; @@ -3246,10 +3313,15 @@ static void ath12k_mac_op_vif_cfg_changed(struct ieee80211_hw *hw, ar = arvif->ar; - if (vif->cfg.assoc) - ath12k_bss_assoc(ar, arvif, &vif->bss_conf); - else + if (vif->cfg.assoc) { + info = ath12k_mac_get_link_bss_conf(arvif); + if (!info) + continue; + + ath12k_bss_assoc(ar, arvif, info); + } else { ath12k_bss_disassoc(ar, arvif); + } } } } @@ -3260,6 +3332,7 @@ static void ath12k_mac_vif_setup_ps(struct ath12k_link_vif *arvif) struct ieee80211_vif *vif = arvif->ahvif->vif; struct ieee80211_conf *conf = &ath12k_ar_to_hw(ar)->conf; enum wmi_sta_powersave_param param; + struct ieee80211_bss_conf *info; enum wmi_sta_ps_mode psmode; int ret; int timeout; @@ -3277,8 +3350,15 @@ static void ath12k_mac_vif_setup_ps(struct ath12k_link_vif *arvif) timeout = conf->dynamic_ps_timeout; if (timeout == 0) { + info = ath12k_mac_get_link_bss_conf(arvif); + if (!info) { + ath12k_warn(ar->ab, "unable to access bss link conf in setup ps for vif %pM link %u\n", + vif->addr, arvif->link_id); + return; + } + /* firmware doesn't like 0 */ - timeout = ieee80211_tu_to_usec(vif->bss_conf.beacon_int) / 1000; + timeout = ieee80211_tu_to_usec(info->beacon_int) / 1000; } ret = ath12k_wmi_set_sta_ps_param(ar, arvif->vdev_id, param, @@ -3389,8 +3469,8 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, if (changed & BSS_CHANGED_BEACON_ENABLED) { ath12k_control_beaconing(arvif, info); - if (arvif->is_up && vif->bss_conf.he_support && - vif->bss_conf.he_oper.params) { + if (arvif->is_up && info->he_support && + info->he_oper.params) { /* TODO: Extend to support 1024 BA Bitmap size */ ret = ath12k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, WMI_VDEV_PARAM_BA_MODE, @@ -3401,7 +3481,7 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, arvif->vdev_id); param_id = WMI_VDEV_PARAM_HEOPS_0_31; - param_value = vif->bss_conf.he_oper.params; + param_value = info->he_oper.params; ret = ath12k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id, param_id, param_value); ath12k_dbg(ar->ab, ATH12K_DBG_MAC, @@ -3493,12 +3573,12 @@ static void ath12k_mac_bss_info_changed(struct ath12k *ar, if (changed & BSS_CHANGED_MCAST_RATE && !ath12k_mac_vif_link_chan(vif, arvif->link_id, &def)) { band = def.chan->band; - mcast_rate = vif->bss_conf.mcast_rate[band]; + mcast_rate = info->mcast_rate[band]; if (mcast_rate > 0) rateidx = mcast_rate - 1; else - rateidx = ffs(vif->bss_conf.basic_rates) - 1; + rateidx = ffs(info->basic_rates) - 1; if (ar->pdev->cap.supported_bands & WMI_HOST_WLAN_5G_CAP) rateidx += ATH12K_MAC_FIRST_OFDM_RATE_IDX; @@ -4278,6 +4358,7 @@ static int ath12k_mac_set_key(struct ath12k *ar, enum set_key_cmd cmd, { struct ath12k_vif *ahvif = arvif->ahvif; struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif); + struct ieee80211_bss_conf *link_conf; struct ieee80211_sta *sta = NULL; struct ath12k_base *ab = ar->ab; struct ath12k_peer *peer; @@ -4294,12 +4375,19 @@ static int ath12k_mac_set_key(struct ath12k *ar, enum set_key_cmd cmd, if (test_bit(ATH12K_FLAG_HW_CRYPTO_DISABLED, &ab->dev_flags)) return 1; + link_conf = ath12k_mac_get_link_bss_conf(arvif); + if (!link_conf) { + ath12k_warn(ab, "unable to access bss link conf in set key for vif %pM link %u\n", + vif->addr, arvif->link_id); + return -ENOLINK; + } + if (sta) peer_addr = arsta->addr; else if (ahvif->vdev_type == WMI_VDEV_TYPE_STA) - peer_addr = vif->bss_conf.bssid; + peer_addr = link_conf->bssid; else - peer_addr = vif->addr; + peer_addr = link_conf->addr; key->hw_key_idx = key->keyidx; @@ -7086,6 +7174,7 @@ static int ath12k_mac_setup_vdev_params_mbssid(struct ath12k_link_vif *arvif, { struct ath12k_vif *ahvif = arvif->ahvif; struct ieee80211_vif *tx_vif = ahvif->vif->mbssid_tx_vif; + struct ieee80211_bss_conf *link_conf; struct ath12k *ar = arvif->ar; struct ath12k_link_vif *tx_arvif; struct ath12k_vif *tx_ahvif; @@ -7093,10 +7182,17 @@ static int ath12k_mac_setup_vdev_params_mbssid(struct ath12k_link_vif *arvif, if (!tx_vif) return 0; + link_conf = ath12k_mac_get_link_bss_conf(arvif); + if (!link_conf) { + ath12k_warn(ar->ab, "unable to access bss link conf in set mbssid params for vif %pM link %u\n", + ahvif->vif->addr, arvif->link_id); + return -ENOLINK; + } + tx_ahvif = ath12k_vif_to_ahvif(tx_vif); tx_arvif = &tx_ahvif->deflink; - if (ahvif->vif->bss_conf.nontransmitted) { + if (link_conf->nontransmitted) { if (ar->ah->hw->wiphy != ieee80211_vif_to_wdev(tx_vif)->wiphy) return -EINVAL; @@ -7108,7 +7204,7 @@ static int ath12k_mac_setup_vdev_params_mbssid(struct ath12k_link_vif *arvif, return -EINVAL; } - if (ahvif->vif->bss_conf.ema_ap) + if (link_conf->ema_ap) *flags |= WMI_VDEV_MBSSID_FLAGS_EMA_MODE; return 0; @@ -7476,7 +7572,7 @@ int ath12k_mac_vdev_create(struct ath12k *ar, struct ath12k_link_vif *arvif) break; } - arvif->txpower = vif->bss_conf.txpower; + arvif->txpower = link_conf->txpower; ret = ath12k_mac_txpower_recalc(ar); if (ret) goto err_peer_del; @@ -8165,11 +8261,18 @@ ath12k_mac_vdev_start_restart(struct ath12k_link_vif *arvif, struct wmi_vdev_start_req_arg arg = {}; const struct cfg80211_chan_def *chandef = &ctx->def; struct ath12k_vif *ahvif = arvif->ahvif; - int he_support = ahvif->vif->bss_conf.he_support; + struct ieee80211_bss_conf *link_conf; int ret; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + link_conf = ath12k_mac_get_link_bss_conf(arvif); + if (!link_conf) { + ath12k_warn(ar->ab, "unable to access bss link conf in vdev start for vif %pM link %u\n", + ahvif->vif->addr, arvif->link_id); + return -ENOLINK; + } + reinit_completion(&ar->vdev_setup_done); arg.vdev_id = arvif->vdev_id; @@ -8221,7 +8324,7 @@ ath12k_mac_vdev_start_restart(struct ath12k_link_vif *arvif, spin_unlock_bh(&ab->base_lock); /* TODO: Notify if secondary 80Mhz also needs radar detection */ - if (he_support) { + if (link_conf->he_support) { ret = ath12k_set_he_mu_sounding_mode(ar, arvif); if (ret) { ath12k_warn(ar->ab, "failed to set he mode vdev %i\n", diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 50ed7e72f178..402ae477da61 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -6854,6 +6854,7 @@ ath12k_wmi_process_csa_switch_count_event(struct ath12k_base *ab, const u32 *vdev_ids) { int i; + struct ieee80211_bss_conf *conf; struct ath12k_link_vif *arvif; struct ath12k_vif *ahvif; @@ -6872,7 +6873,20 @@ ath12k_wmi_process_csa_switch_count_event(struct ath12k_base *ab, } ahvif = arvif->ahvif; - if (arvif->is_up && ahvif->vif->bss_conf.csa_active) + if (arvif->link_id > IEEE80211_MLD_MAX_NUM_LINKS) { + ath12k_warn(ab, "Invalid CSA switch count even link id: %d\n", + arvif->link_id); + continue; + } + + conf = rcu_dereference(ahvif->vif->link_conf[arvif->link_id]); + if (!conf) { + ath12k_warn(ab, "unable to access bss link conf in process csa for vif %pM link %u\n", + ahvif->vif->addr, arvif->link_id); + continue; + } + + if (arvif->is_up && conf->csa_active) ieee80211_csa_finish(ahvif->vif, 0); } rcu_read_unlock(); From 63fdb90642eec9439dd13c93c4b5c184b60a50cd Mon Sep 17 00:00:00 2001 From: Sriram R Date: Thu, 21 Nov 2024 17:58:06 +0200 Subject: [PATCH 0055/1386] wifi: ath12k: Use mac80211 sta's link_sta instead of deflink Currently mac80211's struct ieee80211_sta deflink is used to fetch any sta related configurations in driver. With MLO multiple link sta's (struct ieee80211_link_sta) are affiliated to an ML sta and corresponding link configs are present in sta->link[]. Fetch link sta of corresponding link from ML sta and use the same for configurations. Add ath12k_mac_get_link_sta() helper to fetch ieee80211_link_sta from arsta. But as ath12k_mac_op_sta_rc_update() is called in atomic context the helper cannot be used and instead rcu_dereference() has to be called directly. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Co-developed-by: Rameshkumar Sundaram Signed-off-by: Rameshkumar Sundaram Signed-off-by: Kalle Valo Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241121155806.1862733-9-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/mac.c | 292 ++++++++++++++++++-------- 1 file changed, 209 insertions(+), 83 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 827a1700e8ba..60702bf07141 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -519,6 +519,23 @@ ath12k_mac_get_link_bss_conf(struct ath12k_link_vif *arvif) return link_conf; } +static struct ieee80211_link_sta *ath12k_mac_get_link_sta(struct ath12k_link_sta *arsta) +{ + struct ath12k_sta *ahsta = arsta->ahsta; + struct ieee80211_sta *sta = ath12k_ahsta_to_sta(ahsta); + struct ieee80211_link_sta *link_sta; + + lockdep_assert_wiphy(ahsta->ahvif->ah->hw->wiphy); + + if (arsta->link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + return NULL; + + link_sta = wiphy_dereference(ahsta->ahvif->ah->hw->wiphy, + sta->link[arsta->link_id]); + + return link_sta; +} + static bool ath12k_mac_bitrate_is_cck(int bitrate) { switch (bitrate) { @@ -1902,6 +1919,7 @@ static void ath12k_peer_assoc_h_rates(struct ath12k *ar, struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); struct wmi_rate_set_arg *rateset = &arg->peer_legacy_rates; + struct ieee80211_link_sta *link_sta; struct cfg80211_chan_def def; const struct ieee80211_supported_band *sband; const struct ieee80211_rate *rates; @@ -1916,9 +1934,16 @@ static void ath12k_peer_assoc_h_rates(struct ath12k *ar, if (WARN_ON(ath12k_mac_vif_link_chan(vif, arvif->link_id, &def))) return; + link_sta = ath12k_mac_get_link_sta(arsta); + if (!link_sta) { + ath12k_warn(ar->ab, "unable to access link sta in peer assoc rates for sta %pM link %u\n", + sta->addr, arsta->link_id); + return; + } + band = def.chan->band; sband = hw->wiphy->bands[band]; - ratemask = sta->deflink.supp_rates[band]; + ratemask = link_sta->supp_rates[band]; ratemask &= arvif->bitrate_mask.control[band].legacy; rates = sband->bitrates; @@ -1965,7 +1990,8 @@ static void ath12k_peer_assoc_h_ht(struct ath12k *ar, { struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); - const struct ieee80211_sta_ht_cap *ht_cap = &sta->deflink.ht_cap; + const struct ieee80211_sta_ht_cap *ht_cap; + struct ieee80211_link_sta *link_sta; struct cfg80211_chan_def def; enum nl80211_band band; const u8 *ht_mcs_mask; @@ -1978,6 +2004,14 @@ static void ath12k_peer_assoc_h_ht(struct ath12k *ar, if (WARN_ON(ath12k_mac_vif_link_chan(vif, arvif->link_id, &def))) return; + link_sta = ath12k_mac_get_link_sta(arsta); + if (!link_sta) { + ath12k_warn(ar->ab, "unable to access link sta in peer assoc ht for sta %pM link %u\n", + sta->addr, arsta->link_id); + return; + } + + ht_cap = &link_sta->ht_cap; if (!ht_cap->ht_supported) return; @@ -2001,7 +2035,7 @@ static void ath12k_peer_assoc_h_ht(struct ath12k *ar, if (ht_cap->cap & IEEE80211_HT_CAP_LDPC_CODING) arg->ldpc_flag = true; - if (sta->deflink.bandwidth >= IEEE80211_STA_RX_BW_40) { + if (link_sta->bandwidth >= IEEE80211_STA_RX_BW_40) { arg->bw_40 = true; arg->peer_rate_caps |= WMI_HOST_RC_CW40_FLAG; } @@ -2051,7 +2085,7 @@ static void ath12k_peer_assoc_h_ht(struct ath12k *ar, arg->peer_ht_rates.rates[i] = i; } else { arg->peer_ht_rates.num_rates = n; - arg->peer_nss = min(sta->deflink.rx_nss, max_nss); + arg->peer_nss = min(link_sta->rx_nss, max_nss); } ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac ht peer %pM mcs cnt %d nss %d\n", @@ -2127,7 +2161,8 @@ static void ath12k_peer_assoc_h_vht(struct ath12k *ar, { struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); - const struct ieee80211_sta_vht_cap *vht_cap = &sta->deflink.vht_cap; + const struct ieee80211_sta_vht_cap *vht_cap; + struct ieee80211_link_sta *link_sta; struct cfg80211_chan_def def; enum nl80211_band band; const u16 *vht_mcs_mask; @@ -2141,6 +2176,14 @@ static void ath12k_peer_assoc_h_vht(struct ath12k *ar, if (WARN_ON(ath12k_mac_vif_link_chan(vif, arvif->link_id, &def))) return; + link_sta = ath12k_mac_get_link_sta(arsta); + if (!link_sta) { + ath12k_warn(ar->ab, "unable to access link sta in peer assoc vht for sta %pM link %u\n", + sta->addr, arsta->link_id); + return; + } + + vht_cap = &link_sta->vht_cap; if (!vht_cap->vht_supported) return; @@ -2173,10 +2216,10 @@ static void ath12k_peer_assoc_h_vht(struct ath12k *ar, (1U << (IEEE80211_HT_MAX_AMPDU_FACTOR + ampdu_factor)) - 1); - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_80) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_80) arg->bw_80 = true; - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_160) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_160) arg->bw_160 = true; /* Calculate peer NSS capability from VHT capabilities if STA @@ -2190,7 +2233,7 @@ static void ath12k_peer_assoc_h_vht(struct ath12k *ar, vht_mcs_mask[i]) max_nss = i + 1; } - arg->peer_nss = min(sta->deflink.rx_nss, max_nss); + arg->peer_nss = min(link_sta->rx_nss, max_nss); arg->rx_max_rate = __le16_to_cpu(vht_cap->vht_mcs.rx_highest); arg->rx_mcs_set = __le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map); arg->tx_max_rate = __le16_to_cpu(vht_cap->vht_mcs.tx_highest); @@ -2225,8 +2268,9 @@ static void ath12k_peer_assoc_h_he(struct ath12k *ar, { struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); - const struct ieee80211_sta_he_cap *he_cap = &sta->deflink.he_cap; + const struct ieee80211_sta_he_cap *he_cap; struct ieee80211_bss_conf *link_conf; + struct ieee80211_link_sta *link_sta; int i; u8 ampdu_factor, max_nss; u8 rx_mcs_80 = IEEE80211_HE_MCS_NOT_SUPPORTED; @@ -2242,6 +2286,14 @@ static void ath12k_peer_assoc_h_he(struct ath12k *ar, return; } + link_sta = ath12k_mac_get_link_sta(arsta); + if (!link_sta) { + ath12k_warn(ar->ab, "unable to access link sta in peer assoc he for sta %pM link %u\n", + sta->addr, arsta->link_id); + return; + } + + he_cap = &link_sta->he_cap; if (!he_cap->has_he) return; @@ -2279,7 +2331,7 @@ static void ath12k_peer_assoc_h_he(struct ath12k *ar, else max_nss = rx_mcs_80; - arg->peer_nss = min(sta->deflink.rx_nss, max_nss); + arg->peer_nss = min(link_sta->rx_nss, max_nss); memcpy(&arg->peer_he_cap_macinfo, he_cap->he_cap_elem.mac_cap_info, sizeof(he_cap->he_cap_elem.mac_cap_info)); @@ -2306,10 +2358,10 @@ static void ath12k_peer_assoc_h_he(struct ath12k *ar, IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK); if (ampdu_factor) { - if (sta->deflink.vht_cap.vht_supported) + if (link_sta->vht_cap.vht_supported) arg->peer_max_mpdu = (1 << (IEEE80211_HE_VHT_MAX_AMPDU_FACTOR + ampdu_factor)) - 1; - else if (sta->deflink.ht_cap.ht_supported) + else if (link_sta->ht_cap.ht_supported) arg->peer_max_mpdu = (1 << (IEEE80211_HE_HT_MAX_AMPDU_FACTOR + ampdu_factor)) - 1; } @@ -2350,7 +2402,7 @@ static void ath12k_peer_assoc_h_he(struct ath12k *ar, if (he_cap->he_cap_elem.mac_cap_info[0] & IEEE80211_HE_MAC_CAP0_TWT_REQ) arg->twt_requester = true; - switch (sta->deflink.bandwidth) { + switch (link_sta->bandwidth) { case IEEE80211_STA_RX_BW_160: if (he_cap->he_cap_elem.phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G) { @@ -2390,7 +2442,8 @@ static void ath12k_peer_assoc_h_he_6ghz(struct ath12k *ar, { struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); - const struct ieee80211_sta_he_cap *he_cap = &sta->deflink.he_cap; + const struct ieee80211_sta_he_cap *he_cap; + struct ieee80211_link_sta *link_sta; struct cfg80211_chan_def def; enum nl80211_band band; u8 ampdu_factor, mpdu_density; @@ -2400,22 +2453,31 @@ static void ath12k_peer_assoc_h_he_6ghz(struct ath12k *ar, band = def.chan->band; - if (!arg->he_flag || band != NL80211_BAND_6GHZ || !sta->deflink.he_6ghz_capa.capa) + link_sta = ath12k_mac_get_link_sta(arsta); + if (!link_sta) { + ath12k_warn(ar->ab, "unable to access link sta in peer assoc he 6ghz for sta %pM link %u\n", + sta->addr, arsta->link_id); + return; + } + + he_cap = &link_sta->he_cap; + + if (!arg->he_flag || band != NL80211_BAND_6GHZ || !link_sta->he_6ghz_capa.capa) return; - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_40) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_40) arg->bw_40 = true; - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_80) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_80) arg->bw_80 = true; - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_160) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_160) arg->bw_160 = true; - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_320) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_320) arg->bw_320 = true; - arg->peer_he_caps_6ghz = le16_to_cpu(sta->deflink.he_6ghz_capa.capa); + arg->peer_he_caps_6ghz = le16_to_cpu(link_sta->he_6ghz_capa.capa); mpdu_density = u32_get_bits(arg->peer_he_caps_6ghz, IEEE80211_HE_6GHZ_CAP_MIN_MPDU_START); @@ -2459,10 +2521,23 @@ static void ath12k_peer_assoc_h_smps(struct ath12k_link_sta *arsta, struct ath12k_wmi_peer_assoc_arg *arg) { struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); - const struct ieee80211_he_6ghz_capa *he_6ghz_capa = &sta->deflink.he_6ghz_capa; - const struct ieee80211_sta_ht_cap *ht_cap = &sta->deflink.ht_cap; + const struct ieee80211_he_6ghz_capa *he_6ghz_capa; + struct ath12k_link_vif *arvif = arsta->arvif; + const struct ieee80211_sta_ht_cap *ht_cap; + struct ieee80211_link_sta *link_sta; + struct ath12k *ar = arvif->ar; int smps; + link_sta = ath12k_mac_get_link_sta(arsta); + if (!link_sta) { + ath12k_warn(ar->ab, "unable to access link sta in peer assoc he for sta %pM link %u\n", + sta->addr, arsta->link_id); + return; + } + + he_6ghz_capa = &link_sta->he_6ghz_capa; + ht_cap = &link_sta->ht_cap; + if (!ht_cap->ht_supported && !he_6ghz_capa->capa) return; @@ -2588,17 +2663,17 @@ err: return ret; } -static bool ath12k_mac_sta_has_ofdm_only(struct ieee80211_sta *sta) +static bool ath12k_mac_sta_has_ofdm_only(struct ieee80211_link_sta *sta) { - return sta->deflink.supp_rates[NL80211_BAND_2GHZ] >> + return sta->supp_rates[NL80211_BAND_2GHZ] >> ATH12K_MAC_FIRST_OFDM_RATE_IDX; } static enum wmi_phy_mode ath12k_mac_get_phymode_vht(struct ath12k *ar, - struct ieee80211_sta *sta) + struct ieee80211_link_sta *link_sta) { - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_160) { - switch (sta->deflink.vht_cap.cap & + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_160) { + switch (link_sta->vht_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: return MODE_11AC_VHT160; @@ -2610,74 +2685,74 @@ static enum wmi_phy_mode ath12k_mac_get_phymode_vht(struct ath12k *ar, } } - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_80) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_80) return MODE_11AC_VHT80; - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_40) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_40) return MODE_11AC_VHT40; - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_20) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_20) return MODE_11AC_VHT20; return MODE_UNKNOWN; } static enum wmi_phy_mode ath12k_mac_get_phymode_he(struct ath12k *ar, - struct ieee80211_sta *sta) + struct ieee80211_link_sta *link_sta) { - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_160) { - if (sta->deflink.he_cap.he_cap_elem.phy_cap_info[0] & + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_160) { + if (link_sta->he_cap.he_cap_elem.phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G) return MODE_11AX_HE160; - else if (sta->deflink.he_cap.he_cap_elem.phy_cap_info[0] & + else if (link_sta->he_cap.he_cap_elem.phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G) return MODE_11AX_HE80_80; /* not sure if this is a valid case? */ return MODE_11AX_HE160; } - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_80) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_80) return MODE_11AX_HE80; - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_40) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_40) return MODE_11AX_HE40; - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_20) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_20) return MODE_11AX_HE20; return MODE_UNKNOWN; } static enum wmi_phy_mode ath12k_mac_get_phymode_eht(struct ath12k *ar, - struct ieee80211_sta *sta) + struct ieee80211_link_sta *link_sta) { - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_320) - if (sta->deflink.eht_cap.eht_cap_elem.phy_cap_info[0] & + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_320) + if (link_sta->eht_cap.eht_cap_elem.phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) return MODE_11BE_EHT320; - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_160) { - if (sta->deflink.he_cap.he_cap_elem.phy_cap_info[0] & + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_160) { + if (link_sta->he_cap.he_cap_elem.phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G) return MODE_11BE_EHT160; - if (sta->deflink.he_cap.he_cap_elem.phy_cap_info[0] & + if (link_sta->he_cap.he_cap_elem.phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G) return MODE_11BE_EHT80_80; ath12k_warn(ar->ab, "invalid EHT PHY capability info for 160 Mhz: %d\n", - sta->deflink.he_cap.he_cap_elem.phy_cap_info[0]); + link_sta->he_cap.he_cap_elem.phy_cap_info[0]); return MODE_11BE_EHT160; } - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_80) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_80) return MODE_11BE_EHT80; - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_40) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_40) return MODE_11BE_EHT40; - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_20) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_20) return MODE_11BE_EHT20; return MODE_UNKNOWN; @@ -2688,6 +2763,7 @@ static void ath12k_peer_assoc_h_phymode(struct ath12k *ar, struct ath12k_link_sta *arsta, struct ath12k_wmi_peer_assoc_arg *arg) { + struct ieee80211_link_sta *link_sta; struct cfg80211_chan_def def; enum nl80211_band band; const u8 *ht_mcs_mask; @@ -2706,33 +2782,40 @@ static void ath12k_peer_assoc_h_phymode(struct ath12k *ar, ht_mcs_mask = arvif->bitrate_mask.control[band].ht_mcs; vht_mcs_mask = arvif->bitrate_mask.control[band].vht_mcs; + link_sta = ath12k_mac_get_link_sta(arsta); + if (!link_sta) { + ath12k_warn(ar->ab, "unable to access link sta in peer assoc he for sta %pM link %u\n", + sta->addr, arsta->link_id); + return; + } + switch (band) { case NL80211_BAND_2GHZ: - if (sta->deflink.eht_cap.has_eht) { - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_40) + if (link_sta->eht_cap.has_eht) { + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_40) phymode = MODE_11BE_EHT40_2G; else phymode = MODE_11BE_EHT20_2G; - } else if (sta->deflink.he_cap.has_he) { - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_80) + } else if (link_sta->he_cap.has_he) { + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_80) phymode = MODE_11AX_HE80_2G; - else if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_40) + else if (link_sta->bandwidth == IEEE80211_STA_RX_BW_40) phymode = MODE_11AX_HE40_2G; else phymode = MODE_11AX_HE20_2G; - } else if (sta->deflink.vht_cap.vht_supported && + } else if (link_sta->vht_cap.vht_supported && !ath12k_peer_assoc_h_vht_masked(vht_mcs_mask)) { - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_40) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_40) phymode = MODE_11AC_VHT40; else phymode = MODE_11AC_VHT20; - } else if (sta->deflink.ht_cap.ht_supported && + } else if (link_sta->ht_cap.ht_supported && !ath12k_peer_assoc_h_ht_masked(ht_mcs_mask)) { - if (sta->deflink.bandwidth == IEEE80211_STA_RX_BW_40) + if (link_sta->bandwidth == IEEE80211_STA_RX_BW_40) phymode = MODE_11NG_HT40; else phymode = MODE_11NG_HT20; - } else if (ath12k_mac_sta_has_ofdm_only(sta)) { + } else if (ath12k_mac_sta_has_ofdm_only(link_sta)) { phymode = MODE_11G; } else { phymode = MODE_11B; @@ -2741,16 +2824,16 @@ static void ath12k_peer_assoc_h_phymode(struct ath12k *ar, case NL80211_BAND_5GHZ: case NL80211_BAND_6GHZ: /* Check EHT first */ - if (sta->deflink.eht_cap.has_eht) { - phymode = ath12k_mac_get_phymode_eht(ar, sta); - } else if (sta->deflink.he_cap.has_he) { - phymode = ath12k_mac_get_phymode_he(ar, sta); - } else if (sta->deflink.vht_cap.vht_supported && + if (link_sta->eht_cap.has_eht) { + phymode = ath12k_mac_get_phymode_eht(ar, link_sta); + } else if (link_sta->he_cap.has_he) { + phymode = ath12k_mac_get_phymode_he(ar, link_sta); + } else if (link_sta->vht_cap.vht_supported && !ath12k_peer_assoc_h_vht_masked(vht_mcs_mask)) { - phymode = ath12k_mac_get_phymode_vht(ar, sta); - } else if (sta->deflink.ht_cap.ht_supported && + phymode = ath12k_mac_get_phymode_vht(ar, link_sta); + } else if (link_sta->ht_cap.ht_supported && !ath12k_peer_assoc_h_ht_masked(ht_mcs_mask)) { - if (sta->deflink.bandwidth >= IEEE80211_STA_RX_BW_40) + if (link_sta->bandwidth >= IEEE80211_STA_RX_BW_40) phymode = MODE_11NA_HT40; else phymode = MODE_11NA_HT20; @@ -2838,15 +2921,25 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, struct ath12k_wmi_peer_assoc_arg *arg) { struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); - const struct ieee80211_sta_eht_cap *eht_cap = &sta->deflink.eht_cap; - const struct ieee80211_sta_he_cap *he_cap = &sta->deflink.he_cap; const struct ieee80211_eht_mcs_nss_supp_20mhz_only *bw_20; const struct ieee80211_eht_mcs_nss_supp_bw *bw; + const struct ieee80211_sta_eht_cap *eht_cap; + const struct ieee80211_sta_he_cap *he_cap; + struct ieee80211_link_sta *link_sta; u32 *rx_mcs, *tx_mcs; lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); - if (!sta->deflink.he_cap.has_he || !eht_cap->has_eht) + link_sta = ath12k_mac_get_link_sta(arsta); + if (!link_sta) { + ath12k_warn(ar->ab, "unable to access link sta in peer assoc eht for sta %pM link %u\n", + sta->addr, arsta->link_id); + return; + } + + eht_cap = &link_sta->eht_cap; + he_cap = &link_sta->he_cap; + if (!he_cap->has_he || !eht_cap->has_eht) return; arg->eht_flag = true; @@ -2865,7 +2958,7 @@ static void ath12k_peer_assoc_h_eht(struct ath12k *ar, rx_mcs = arg->peer_eht_rx_mcs_set; tx_mcs = arg->peer_eht_tx_mcs_set; - switch (sta->deflink.bandwidth) { + switch (link_sta->bandwidth) { case IEEE80211_STA_RX_BW_320: bw = &eht_cap->eht_mcs_nss_supp.bw._320; ath12k_mac_set_eht_mcs(bw->rx_tx_mcs9_max_nss, @@ -4662,6 +4755,7 @@ static int ath12k_mac_station_assoc(struct ath12k *ar, struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); struct ath12k_wmi_peer_assoc_arg peer_arg; + struct ieee80211_link_sta *link_sta; int ret; struct cfg80211_chan_def def; enum nl80211_band band; @@ -4707,7 +4801,13 @@ static int ath12k_mac_station_assoc(struct ath12k *ar, * fixed param. * Note that all other rates and NSS will be disabled for this peer. */ - if (sta->deflink.vht_cap.vht_supported && num_vht_rates == 1) { + link_sta = ath12k_mac_get_link_sta(arsta); + if (!link_sta) { + ath12k_warn(ar->ab, "unable to access link sta in station assoc\n"); + return -EINVAL; + } + + if (link_sta->vht_cap.vht_supported && num_vht_rates == 1) { ret = ath12k_mac_set_peer_vht_fixed_rate(arvif, arsta, mask, band); if (ret) @@ -4721,8 +4821,7 @@ static int ath12k_mac_station_assoc(struct ath12k *ar, return 0; ret = ath12k_setup_peer_smps(ar, arvif, arsta->addr, - &sta->deflink.ht_cap, - &sta->deflink.he_6ghz_capa); + &link_sta->ht_cap, &link_sta->he_6ghz_capa); if (ret) { ath12k_warn(ar->ab, "failed to setup peer SMPS for vdev %d: %d\n", arvif->vdev_id, ret); @@ -4766,6 +4865,7 @@ static int ath12k_mac_station_disassoc(struct ath12k *ar, static void ath12k_sta_rc_update_wk(struct wiphy *wiphy, struct wiphy_work *wk) { + struct ieee80211_link_sta *link_sta; struct ath12k *ar; struct ath12k_link_vif *arvif; struct ieee80211_sta *sta; @@ -4900,7 +5000,14 @@ static void ath12k_sta_rc_update_wk(struct wiphy *wiphy, struct wiphy_work *wk) * TODO: Check RATEMASK_CMDID to support auto rates selection * across HT/VHT and for multiple VHT MCS support. */ - if (sta->deflink.vht_cap.vht_supported && num_vht_rates == 1) { + link_sta = ath12k_mac_get_link_sta(arsta); + if (!link_sta) { + ath12k_warn(ar->ab, "unable to access link sta in peer assoc he for sta %pM link %u\n", + sta->addr, arsta->link_id); + return; + } + + if (link_sta->vht_cap.vht_supported && num_vht_rates == 1) { ath12k_mac_set_peer_vht_fixed_rate(arvif, arsta, mask, band); } else { @@ -5603,10 +5710,23 @@ static void ath12k_mac_op_sta_rc_update(struct ieee80211_hw *hw, spin_unlock_bh(&ar->ab->base_lock); + if (arsta->link_id >= IEEE80211_MLD_MAX_NUM_LINKS) { + rcu_read_unlock(); + return; + } + + link_sta = rcu_dereference(sta->link[arsta->link_id]); + if (!link_sta) { + rcu_read_unlock(); + ath12k_warn(ar->ab, "unable to access link sta in rc update for sta %pM link %u\n", + sta->addr, arsta->link_id); + return; + } + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac sta rc update for %pM changed %08x bw %d nss %d smps %d\n", - arsta->addr, changed, sta->deflink.bandwidth, sta->deflink.rx_nss, - sta->deflink.smps_mode); + arsta->addr, changed, link_sta->bandwidth, link_sta->rx_nss, + link_sta->smps_mode); spin_lock_bh(&ar->data_lock); @@ -5617,12 +5737,12 @@ static void ath12k_mac_op_sta_rc_update(struct ieee80211_hw *hw, } if (changed & IEEE80211_RC_NSS_CHANGED) - arsta->nss = sta->deflink.rx_nss; + arsta->nss = link_sta->rx_nss; if (changed & IEEE80211_RC_SMPS_CHANGED) { smps = WMI_PEER_SMPS_PS_NONE; - switch (sta->deflink.smps_mode) { + switch (link_sta->smps_mode) { case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_OFF: smps = WMI_PEER_SMPS_PS_NONE; @@ -5634,8 +5754,8 @@ static void ath12k_mac_op_sta_rc_update(struct ieee80211_hw *hw, smps = WMI_PEER_SMPS_DYNAMIC; break; default: - ath12k_warn(ar->ab, "Invalid smps %d in sta rc update for %pM\n", - sta->deflink.smps_mode, arsta->addr); + ath12k_warn(ar->ab, "Invalid smps %d in sta rc update for %pM link %u\n", + link_sta->smps_mode, arsta->addr, link_sta->link_id); smps = WMI_PEER_SMPS_PS_NONE; break; } @@ -9241,10 +9361,11 @@ static void ath12k_mac_set_bitrate_mask_iter(void *data, { struct ath12k_link_vif *arvif = data; struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta); - struct ath12k_link_sta *arsta = &ahsta->deflink; + struct ath12k_link_sta *arsta; struct ath12k *ar = arvif->ar; - if (arsta->arvif != arvif) + arsta = rcu_dereference(ahsta->link[arvif->link_id]); + if (!arsta || arsta->arvif != arvif) return; spin_lock_bh(&ar->data_lock); @@ -9259,11 +9380,16 @@ static void ath12k_mac_disable_peer_fixed_rate(void *data, { struct ath12k_link_vif *arvif = data; struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta); - struct ath12k_link_sta *arsta = &ahsta->deflink; + struct ath12k_link_sta *arsta; struct ath12k *ar = arvif->ar; int ret; - if (arsta->arvif != arvif) + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + + arsta = wiphy_dereference(ath12k_ar_to_hw(ar)->wiphy, + ahsta->link[arvif->link_id]); + + if (!arsta || arsta->arvif != arvif) return; ret = ath12k_wmi_set_peer_param(ar, arsta->addr, From 144c6cd24b3556e6e7a14271cee57a42ebf97450 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Wed, 20 Nov 2024 11:40:49 +0800 Subject: [PATCH 0056/1386] wifi: rtw89: 8922a: configure AP_LINK_PS if FW supports After FW v0.35.46.0, for AP mode, RTL8922A FW supports a new FW feature, called NOTIFY_AP_INFO, to notify driver information related to AP mode. And, one function of it is to monitor PS states of remote stations. Once one of them changes, FW will send a C2H event to tell driver. With this FW feature, we can declare AP_LINK_PS. For now, driver still needs to determine if a frame is ps-poll or U-APSD trigger. So, add the corresponding RX handling in driver, which activates only when at least one AP is running. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241120034054.13575-2-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.c | 42 ++++++++++++ drivers/net/wireless/realtek/rtw89/core.h | 37 +++++++++++ drivers/net/wireless/realtek/rtw89/fw.c | 66 +++++++++++++++++++ drivers/net/wireless/realtek/rtw89/fw.h | 19 ++++++ drivers/net/wireless/realtek/rtw89/mac.c | 50 ++++++++++++++ drivers/net/wireless/realtek/rtw89/mac.h | 7 ++ drivers/net/wireless/realtek/rtw89/mac80211.c | 13 +++- drivers/net/wireless/realtek/rtw89/ser.c | 1 + 8 files changed, 234 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index 34034f44c050..c99111df90a7 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -2749,6 +2749,41 @@ static void rtw89_core_flush_ppdu_rx_queue(struct rtw89_dev *rtwdev, } } +static +void rtw89_core_rx_pkt_hdl(struct rtw89_dev *rtwdev, const struct sk_buff *skb, + const struct rtw89_rx_desc_info *desc) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + struct rtw89_sta_link *rtwsta_link; + struct ieee80211_sta *sta; + struct rtw89_sta *rtwsta; + u8 macid = desc->mac_id; + + if (!refcount_read(&rtwdev->refcount_ap_info)) + return; + + rcu_read_lock(); + + rtwsta_link = rtw89_assoc_link_rcu_dereference(rtwdev, macid); + if (!rtwsta_link) + goto out; + + rtwsta = rtwsta_link->rtwsta; + if (!test_bit(RTW89_REMOTE_STA_IN_PS, rtwsta->flags)) + goto out; + + sta = rtwsta_to_sta(rtwsta); + if (ieee80211_is_pspoll(hdr->frame_control)) + ieee80211_sta_pspoll(sta); + else if (ieee80211_has_pm(hdr->frame_control) && + (ieee80211_is_data_qos(hdr->frame_control) || + ieee80211_is_qos_nullfunc(hdr->frame_control))) + ieee80211_sta_uapsd_trigger(sta, ieee80211_get_tid(hdr)); + +out: + rcu_read_unlock(); +} + void rtw89_core_rx(struct rtw89_dev *rtwdev, struct rtw89_rx_desc_info *desc_info, struct sk_buff *skb) @@ -2771,6 +2806,7 @@ void rtw89_core_rx(struct rtw89_dev *rtwdev, rx_status = IEEE80211_SKB_RXCB(skb); memset(rx_status, 0, sizeof(*rx_status)); rtw89_core_update_rx_status(rtwdev, desc_info, rx_status); + rtw89_core_rx_pkt_hdl(rtwdev, skb, desc_info); if (desc_info->long_rxdesc && BIT(desc_info->frame_type) & PPDU_FILTER_BITMAP) skb_queue_tail(&ppdu_sts->rx_queue[band], skb); @@ -3748,6 +3784,8 @@ int rtw89_core_sta_link_disassoc(struct rtw89_dev *rtwdev, { const struct ieee80211_vif *vif = rtwvif_link_to_vif(rtwvif_link); + rtw89_assoc_link_clr(rtwsta_link); + if (vif->type == NL80211_IFTYPE_STATION) rtw89_fw_h2c_set_bcn_fltr_cfg(rtwdev, rtwvif_link, false); @@ -3883,6 +3921,7 @@ int rtw89_core_sta_link_assoc(struct rtw89_dev *rtwdev, rtw89_fw_h2c_set_bcn_fltr_cfg(rtwdev, rtwvif_link, true); } + rtw89_assoc_link_set(rtwsta_link); return ret; } @@ -5150,6 +5189,9 @@ static int rtw89_core_register_hw(struct rtw89_dev *rtwdev) if (RTW89_CHK_FW_FEATURE(BEACON_FILTER, &rtwdev->fw)) ieee80211_hw_set(hw, CONNECTION_MONITOR); + if (RTW89_CHK_FW_FEATURE(NOTIFY_AP_INFO, &rtwdev->fw)) + ieee80211_hw_set(hw, AP_LINK_PS); + hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_AP) | BIT(NL80211_IFTYPE_P2P_CLIENT) | diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index ecccb51184be..3e93b53fd67b 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -4452,6 +4452,7 @@ enum rtw89_fw_feature { RTW89_FW_FEATURE_RFK_PRE_NOTIFY_V0, RTW89_FW_FEATURE_RFK_RXDCK_V0, RTW89_FW_FEATURE_NO_WOW_CPU_IO_RX, + RTW89_FW_FEATURE_NOTIFY_AP_INFO, }; struct rtw89_fw_suit { @@ -5596,6 +5597,9 @@ struct rtw89_dev { struct rtw89_rfe_data *rfe_data; enum rtw89_custid custid; + struct rtw89_sta_link __rcu *assoc_link_on_macid[RTW89_MAX_MAC_ID_NUM]; + refcount_t refcount_ap_info; + /* ensures exclusive access from mac80211 callbacks */ struct mutex mutex; struct list_head rtwvifs_list; @@ -5730,10 +5734,18 @@ static inline bool rtw89_vif_assign_link_is_valid(struct rtw89_vif_link **rtwvif for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) \ if (rtw89_vif_assign_link_is_valid(&(rtwvif_link), rtwvif, link_id)) +enum rtw89_sta_flags { + RTW89_REMOTE_STA_IN_PS, + + NUM_OF_RTW89_STA_FLAGS, +}; + struct rtw89_sta { struct rtw89_dev *rtwdev; struct rtw89_vif *rtwvif; + DECLARE_BITMAP(flags, NUM_OF_RTW89_STA_FLAGS); + bool disassoc; struct sk_buff_head roc_queue; @@ -5811,6 +5823,31 @@ u8 rtw89_sta_link_inst_get_index(struct rtw89_sta_link *rtwsta_link) return rtwsta_link - rtwsta->links_inst; } +static inline void rtw89_assoc_link_set(struct rtw89_sta_link *rtwsta_link) +{ + struct rtw89_sta *rtwsta = rtwsta_link->rtwsta; + struct rtw89_dev *rtwdev = rtwsta->rtwdev; + + rcu_assign_pointer(rtwdev->assoc_link_on_macid[rtwsta_link->mac_id], + rtwsta_link); +} + +static inline void rtw89_assoc_link_clr(struct rtw89_sta_link *rtwsta_link) +{ + struct rtw89_sta *rtwsta = rtwsta_link->rtwsta; + struct rtw89_dev *rtwdev = rtwsta->rtwdev; + + rcu_assign_pointer(rtwdev->assoc_link_on_macid[rtwsta_link->mac_id], + NULL); + synchronize_rcu(); +} + +static inline struct rtw89_sta_link * +rtw89_assoc_link_rcu_dereference(struct rtw89_dev *rtwdev, u8 macid) +{ + return rcu_dereference(rtwdev->assoc_link_on_macid[macid]); +} + static inline int rtw89_hci_tx_write(struct rtw89_dev *rtwdev, struct rtw89_core_tx_request *tx_req) { diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 2191c037d72e..7bda9aab382c 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -728,6 +728,7 @@ static const struct __fw_feat_cfg fw_feat_tbl[] = { __CFG_FW_FEAT(RTL8922A, ge, 0, 35, 22, 0, WOW_REASON_V1), __CFG_FW_FEAT(RTL8922A, lt, 0, 35, 31, 0, RFK_PRE_NOTIFY_V0), __CFG_FW_FEAT(RTL8922A, lt, 0, 35, 42, 0, RFK_RXDCK_V0), + __CFG_FW_FEAT(RTL8922A, ge, 0, 35, 46, 0, NOTIFY_AP_INFO), }; static void rtw89_fw_iterate_feature_cfg(struct rtw89_fw_info *fw, @@ -8164,6 +8165,71 @@ int rtw89_fw_h2c_mrc_upd_duration(struct rtw89_dev *rtwdev, return 0; } +static int rtw89_fw_h2c_ap_info(struct rtw89_dev *rtwdev, bool en) +{ + struct rtw89_h2c_ap_info *h2c; + u32 len = sizeof(*h2c); + struct sk_buff *skb; + int ret; + + skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len); + if (!skb) { + rtw89_err(rtwdev, "failed to alloc skb for ap info\n"); + return -ENOMEM; + } + + skb_put(skb, len); + h2c = (struct rtw89_h2c_ap_info *)skb->data; + + h2c->w0 = le32_encode_bits(en, RTW89_H2C_AP_INFO_W0_PWR_INT_EN); + + rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C, + H2C_CAT_MAC, + H2C_CL_AP, + H2C_FUNC_AP_INFO, 0, 0, + len); + + ret = rtw89_h2c_tx(rtwdev, skb, false); + if (ret) { + rtw89_err(rtwdev, "failed to send h2c\n"); + dev_kfree_skb_any(skb); + return -EBUSY; + } + + return 0; +} + +int rtw89_fw_h2c_ap_info_refcount(struct rtw89_dev *rtwdev, bool en) +{ + int ret; + + if (en) { + if (refcount_inc_not_zero(&rtwdev->refcount_ap_info)) + return 0; + } else { + if (!refcount_dec_and_test(&rtwdev->refcount_ap_info)) + return 0; + } + + ret = rtw89_fw_h2c_ap_info(rtwdev, en); + if (ret) { + if (!test_bit(RTW89_FLAG_SER_HANDLING, rtwdev->flags)) + return ret; + + /* During recovery, neither driver nor stack has full error + * handling, so show a warning, but return 0 with refcount + * increased normally. It can avoid underflow when calling + * with @en == false later. + */ + rtw89_warn(rtwdev, "h2c ap_info failed during SER\n"); + } + + if (en) + refcount_set(&rtwdev->refcount_ap_info, 1); + + return 0; +} + static bool __fw_txpwr_entry_zero_ext(const void *ext_ptr, u8 ext_len) { static const u8 zeros[U8_MAX] = {}; diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index efa63d444821..9106bcce1851 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -3466,6 +3466,12 @@ struct rtw89_h2c_wow_aoac { __le32 w0; } __packed; +struct rtw89_h2c_ap_info { + __le32 w0; +} __packed; + +#define RTW89_H2C_AP_INFO_W0_PWR_INT_EN BIT(0) + #define RTW89_C2H_HEADER_LEN 8 struct rtw89_c2h_hdr { @@ -3725,6 +3731,14 @@ struct rtw89_c2h_wow_aoac_report { #define RTW89_C2H_WOW_AOAC_RPT_REKEY_IDX BIT(0) +struct rtw89_c2h_pwr_int_notify { + struct rtw89_c2h_hdr hdr; + __le32 w2; +} __packed; + +#define RTW89_C2H_PWR_INT_NOTIFY_W2_MACID GENMASK(15, 0) +#define RTW89_C2H_PWR_INT_NOTIFY_W2_PWR_STATUS BIT(16) + struct rtw89_h2c_tx_duty { __le32 w0; __le32 w1; @@ -4168,6 +4182,10 @@ enum rtw89_mrc_h2c_func { #define RTW89_MRC_WAIT_COND_REQ_TSF \ RTW89_MRC_WAIT_COND(0 /* don't care */, H2C_FUNC_MRC_REQ_TSF) +/* CLASS 36 - AP */ +#define H2C_CL_AP 0x24 +#define H2C_FUNC_AP_INFO 0x0 + #define H2C_CAT_OUTSRC 0x2 #define H2C_CL_OUTSRC_RA 0x1 @@ -4697,6 +4715,7 @@ int rtw89_fw_h2c_mrc_sync(struct rtw89_dev *rtwdev, const struct rtw89_fw_mrc_sync_arg *arg); int rtw89_fw_h2c_mrc_upd_duration(struct rtw89_dev *rtwdev, const struct rtw89_fw_mrc_upd_duration_arg *arg); +int rtw89_fw_h2c_ap_info_refcount(struct rtw89_dev *rtwdev, bool en); static inline void rtw89_fw_h2c_init_ba_cam(struct rtw89_dev *rtwdev) { diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c index 7907b84d204b..03fc21440271 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.c +++ b/drivers/net/wireless/realtek/rtw89/mac.c @@ -5364,6 +5364,39 @@ rtw89_mac_c2h_mrc_status_rpt(struct rtw89_dev *rtwdev, struct sk_buff *c2h, u32 rtw89_complete_cond(wait, cond, &data); } +static void +rtw89_mac_c2h_pwr_int_notify(struct rtw89_dev *rtwdev, struct sk_buff *skb, u32 len) +{ + const struct rtw89_c2h_pwr_int_notify *c2h; + struct rtw89_sta_link *rtwsta_link; + struct ieee80211_sta *sta; + struct rtw89_sta *rtwsta; + u16 macid; + bool ps; + + c2h = (const struct rtw89_c2h_pwr_int_notify *)skb->data; + macid = le32_get_bits(c2h->w2, RTW89_C2H_PWR_INT_NOTIFY_W2_MACID); + ps = le32_get_bits(c2h->w2, RTW89_C2H_PWR_INT_NOTIFY_W2_PWR_STATUS); + + rcu_read_lock(); + + rtwsta_link = rtw89_assoc_link_rcu_dereference(rtwdev, macid); + if (unlikely(!rtwsta_link)) + goto out; + + rtwsta = rtwsta_link->rtwsta; + if (ps) + set_bit(RTW89_REMOTE_STA_IN_PS, rtwsta->flags); + else + clear_bit(RTW89_REMOTE_STA_IN_PS, rtwsta->flags); + + sta = rtwsta_to_sta(rtwsta); + ieee80211_sta_ps_transition(sta, ps); + +out: + rcu_read_unlock(); +} + static void (* const rtw89_mac_c2h_ofld_handler[])(struct rtw89_dev *rtwdev, struct sk_buff *c2h, u32 len) = { @@ -5409,6 +5442,12 @@ void (* const rtw89_mac_c2h_wow_handler[])(struct rtw89_dev *rtwdev, [RTW89_MAC_C2H_FUNC_AOAC_REPORT] = rtw89_mac_c2h_wow_aoac_rpt, }; +static +void (* const rtw89_mac_c2h_ap_handler[])(struct rtw89_dev *rtwdev, + struct sk_buff *c2h, u32 len) = { + [RTW89_MAC_C2H_FUNC_PWR_INT_NOTIFY] = rtw89_mac_c2h_pwr_int_notify, +}; + static void rtw89_mac_c2h_scanofld_rsp_atomic(struct rtw89_dev *rtwdev, struct sk_buff *skb) { @@ -5463,6 +5502,13 @@ bool rtw89_mac_c2h_chk_atomic(struct rtw89_dev *rtwdev, struct sk_buff *c2h, return true; case RTW89_MAC_C2H_CLASS_WOW: return true; + case RTW89_MAC_C2H_CLASS_AP: + switch (func) { + default: + return false; + case RTW89_MAC_C2H_FUNC_PWR_INT_NOTIFY: + return true; + } } } @@ -5493,6 +5539,10 @@ void rtw89_mac_c2h_handle(struct rtw89_dev *rtwdev, struct sk_buff *skb, if (func < NUM_OF_RTW89_MAC_C2H_FUNC_WOW) handler = rtw89_mac_c2h_wow_handler[func]; break; + case RTW89_MAC_C2H_CLASS_AP: + if (func < NUM_OF_RTW89_MAC_C2H_FUNC_AP) + handler = rtw89_mac_c2h_ap_handler[func]; + break; case RTW89_MAC_C2H_CLASS_FWDBG: return; default: diff --git a/drivers/net/wireless/realtek/rtw89/mac.h b/drivers/net/wireless/realtek/rtw89/mac.h index 18579c020548..81507274a97e 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.h +++ b/drivers/net/wireless/realtek/rtw89/mac.h @@ -426,6 +426,12 @@ enum rtw89_mac_c2h_wow_func { NUM_OF_RTW89_MAC_C2H_FUNC_WOW, }; +enum rtw89_mac_c2h_ap_func { + RTW89_MAC_C2H_FUNC_PWR_INT_NOTIFY = 0, + + NUM_OF_RTW89_MAC_C2H_FUNC_AP, +}; + enum rtw89_mac_c2h_class { RTW89_MAC_C2H_CLASS_INFO = 0x0, RTW89_MAC_C2H_CLASS_OFLD = 0x1, @@ -434,6 +440,7 @@ enum rtw89_mac_c2h_class { RTW89_MAC_C2H_CLASS_MCC = 0x4, RTW89_MAC_C2H_CLASS_FWDBG = 0x5, RTW89_MAC_C2H_CLASS_MRC = 0xe, + RTW89_MAC_C2H_CLASS_AP = 0x18, RTW89_MAC_C2H_CLASS_MAX, }; diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c index 619d2d3771d5..8dc475f6fdf7 100644 --- a/drivers/net/wireless/realtek/rtw89/mac80211.c +++ b/drivers/net/wireless/realtek/rtw89/mac80211.c @@ -775,6 +775,7 @@ static int rtw89_ops_start_ap(struct ieee80211_hw *hw, struct rtw89_vif *rtwvif = vif_to_rtwvif(vif); struct rtw89_vif_link *rtwvif_link; const struct rtw89_chan *chan; + int ret = 0; mutex_lock(&rtwdev->mutex); @@ -783,6 +784,7 @@ static int rtw89_ops_start_ap(struct ieee80211_hw *hw, rtw89_err(rtwdev, "%s: rtwvif link (link_id %u) is not active\n", __func__, link_conf->link_id); + ret = -ENOLINK; goto out; } @@ -804,12 +806,18 @@ static int rtw89_ops_start_ap(struct ieee80211_hw *hw, rtw89_fw_h2c_cam(rtwdev, rtwvif_link, NULL, NULL); rtw89_chip_rfk_channel(rtwdev, rtwvif_link); + if (RTW89_CHK_FW_FEATURE(NOTIFY_AP_INFO, &rtwdev->fw)) { + ret = rtw89_fw_h2c_ap_info_refcount(rtwdev, true); + if (ret) + goto out; + } + rtw89_queue_chanctx_work(rtwdev); out: mutex_unlock(&rtwdev->mutex); - return 0; + return ret; } static @@ -830,6 +838,9 @@ void rtw89_ops_stop_ap(struct ieee80211_hw *hw, struct ieee80211_vif *vif, goto out; } + if (RTW89_CHK_FW_FEATURE(NOTIFY_AP_INFO, &rtwdev->fw)) + rtw89_fw_h2c_ap_info_refcount(rtwdev, false); + rtw89_mac_stop_ap(rtwdev, rtwvif_link); rtw89_chip_h2c_assoc_cmac_tbl(rtwdev, rtwvif_link, NULL); rtw89_fw_h2c_join_info(rtwdev, rtwvif_link, NULL, true); diff --git a/drivers/net/wireless/realtek/rtw89/ser.c b/drivers/net/wireless/realtek/rtw89/ser.c index 7b203bb7f151..26a944d3b672 100644 --- a/drivers/net/wireless/realtek/rtw89/ser.c +++ b/drivers/net/wireless/realtek/rtw89/ser.c @@ -365,6 +365,7 @@ static void ser_reset_mac_binding(struct rtw89_dev *rtwdev) ser_reset_vif(rtwdev, rtwvif); rtwdev->total_sta_assoc = 0; + refcount_set(&rtwdev->refcount_ap_info, 0); } /* hal function */ From c821a8af435ca1b029ebdf0520cfb5c35b5e3d77 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Wed, 20 Nov 2024 11:40:50 +0800 Subject: [PATCH 0057/1386] wifi: rtw89: register ops of can_activate_links Register mac80211 ops of can_activate_links which is required when we are ready to enable multiple active links. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241120034054.13575-3-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 1 + drivers/net/wireless/realtek/rtw89/mac80211.c | 25 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 3e93b53fd67b..dcd2ccadac96 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -830,6 +830,7 @@ enum rtw89_phy_idx { }; #define __RTW89_MLD_MAX_LINK_NUM 2 +#define RTW89_MLD_NON_STA_LINK_NUM 1 enum rtw89_chanctx_idx { RTW89_CHANCTX_0 = 0, diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c index 8dc475f6fdf7..40ff5aa041fa 100644 --- a/drivers/net/wireless/realtek/rtw89/mac80211.c +++ b/drivers/net/wireless/realtek/rtw89/mac80211.c @@ -1484,6 +1484,30 @@ static int rtw89_ops_set_tid_config(struct ieee80211_hw *hw, return 0; } +static bool rtw89_can_work_on_links(struct rtw89_dev *rtwdev, + struct ieee80211_vif *vif, u16 links) +{ + struct rtw89_vif *rtwvif = vif_to_rtwvif(vif); + u8 w = hweight16(links); + + if (vif->type != NL80211_IFTYPE_STATION && + w > RTW89_MLD_NON_STA_LINK_NUM) + return false; + + return w <= rtwvif->links_inst_valid_num; +} + +static bool rtw89_ops_can_activate_links(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u16 active_links) +{ + struct rtw89_dev *rtwdev = hw->priv; + + guard(mutex)(&rtwdev->mutex); + + return rtw89_can_work_on_links(rtwdev, vif, active_links); +} + #ifdef CONFIG_PM static int rtw89_ops_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) @@ -1611,6 +1635,7 @@ const struct ieee80211_ops rtw89_ops = { .set_sar_specs = rtw89_ops_set_sar_specs, .link_sta_rc_update = rtw89_ops_sta_rc_update, .set_tid_config = rtw89_ops_set_tid_config, + .can_activate_links = rtw89_ops_can_activate_links, #ifdef CONFIG_PM .suspend = rtw89_ops_suspend, .resume = rtw89_ops_resume, From d2b387bdca4684052d58f81667fe6fd6c746faca Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Wed, 20 Nov 2024 11:40:51 +0800 Subject: [PATCH 0058/1386] wifi: rtw89: implement ops of change vif/sta links To support MLO, implement change_vif_links() and change_sta_links() ops. Basically, we follow arguments to set/clear links. One special thing is that when vif is idle, i.e. no connection, link id 0 is set up by us for default uses. So, when bitmap of vif links change from 0x0 to non-zero, we clear the default one first. And when bitmap of vif links change from non-zero to 0x0, we set up a default one at the end. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241120034054.13575-4-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 17 ++ drivers/net/wireless/realtek/rtw89/mac80211.c | 226 +++++++++++++++++- 2 files changed, 238 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index dcd2ccadac96..bcfefd7fa01e 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -5700,10 +5700,17 @@ struct rtw89_dev { u8 priv[] __aligned(sizeof(void *)); }; +struct rtw89_link_conf_container { + struct ieee80211_bss_conf *link_conf[IEEE80211_MLD_MAX_NUM_LINKS]; +}; + +#define RTW89_VIF_IDLE_LINK_ID 0 + struct rtw89_vif { struct rtw89_dev *rtwdev; struct list_head list; struct list_head mgnt_entry; + struct rtw89_link_conf_container __rcu *snap_link_confs; u8 mac_addr[ETH_ALEN]; __be32 ip_addr; @@ -6273,9 +6280,19 @@ static inline struct ieee80211_bss_conf * __rtw89_vif_rcu_dereference_link(struct rtw89_vif_link *rtwvif_link, bool *nolink) { struct ieee80211_vif *vif = rtwvif_link_to_vif(rtwvif_link); + struct rtw89_vif *rtwvif = rtwvif_link->rtwvif; + struct rtw89_link_conf_container *snap; struct ieee80211_bss_conf *bss_conf; + snap = rcu_dereference(rtwvif->snap_link_confs); + if (snap) { + bss_conf = snap->link_conf[rtwvif_link->link_id]; + goto out; + } + bss_conf = rcu_dereference(vif->link_conf[rtwvif_link->link_id]); + +out: if (unlikely(!bss_conf)) { *nolink = true; return &vif->bss_conf; diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c index 40ff5aa041fa..a4e47ef22b9b 100644 --- a/drivers/net/wireless/realtek/rtw89/mac80211.c +++ b/drivers/net/wireless/realtek/rtw89/mac80211.c @@ -202,7 +202,7 @@ static int rtw89_ops_add_interface(struct ieee80211_hw *hw, rtw89_traffic_stats_init(rtwdev, &rtwvif->stats); - rtwvif_link = rtw89_vif_set_link(rtwvif, 0); + rtwvif_link = rtw89_vif_set_link(rtwvif, RTW89_VIF_IDLE_LINK_ID); if (!rtwvif_link) { ret = -EINVAL; goto release_port; @@ -218,7 +218,7 @@ static int rtw89_ops_add_interface(struct ieee80211_hw *hw, return 0; unset_link: - rtw89_vif_unset_link(rtwvif, 0); + rtw89_vif_unset_link(rtwvif, RTW89_VIF_IDLE_LINK_ID); release_port: list_del_init(&rtwvif->list); rtw89_core_release_bit_map(rtwdev->hw_port, port); @@ -246,17 +246,17 @@ static void rtw89_ops_remove_interface(struct ieee80211_hw *hw, mutex_lock(&rtwdev->mutex); - rtwvif_link = rtwvif->links[0]; + rtwvif_link = rtwvif->links[RTW89_VIF_IDLE_LINK_ID]; if (unlikely(!rtwvif_link)) { rtw89_err(rtwdev, "%s: rtwvif link (link_id %u) is not active\n", - __func__, 0); + __func__, RTW89_VIF_IDLE_LINK_ID); goto bottom; } __rtw89_ops_remove_iface_link(rtwdev, rtwvif_link); - rtw89_vif_unset_link(rtwvif, 0); + rtw89_vif_unset_link(rtwvif, RTW89_VIF_IDLE_LINK_ID); bottom: list_del_init(&rtwvif->list); @@ -1508,6 +1508,220 @@ static bool rtw89_ops_can_activate_links(struct ieee80211_hw *hw, return rtw89_can_work_on_links(rtwdev, vif, active_links); } +static void __rtw89_ops_clr_vif_links(struct rtw89_dev *rtwdev, + struct rtw89_vif *rtwvif, + unsigned long clr_links) +{ + struct rtw89_vif_link *rtwvif_link; + unsigned int link_id; + + for_each_set_bit(link_id, &clr_links, IEEE80211_MLD_MAX_NUM_LINKS) { + rtwvif_link = rtwvif->links[link_id]; + if (unlikely(!rtwvif_link)) + continue; + + __rtw89_ops_remove_iface_link(rtwdev, rtwvif_link); + + rtw89_vif_unset_link(rtwvif, link_id); + } +} + +static int __rtw89_ops_set_vif_links(struct rtw89_dev *rtwdev, + struct rtw89_vif *rtwvif, + unsigned long set_links) +{ + struct rtw89_vif_link *rtwvif_link; + unsigned int link_id; + int ret; + + for_each_set_bit(link_id, &set_links, IEEE80211_MLD_MAX_NUM_LINKS) { + rtwvif_link = rtw89_vif_set_link(rtwvif, link_id); + if (!rtwvif_link) + return -EINVAL; + + ret = __rtw89_ops_add_iface_link(rtwdev, rtwvif_link); + if (ret) { + rtw89_err(rtwdev, "%s: failed to add iface (link id %u)\n", + __func__, link_id); + return ret; + } + } + + return 0; +} + +static +int rtw89_ops_change_vif_links(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u16 old_links, u16 new_links, + struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]) +{ + struct rtw89_dev *rtwdev = hw->priv; + struct rtw89_vif *rtwvif = vif_to_rtwvif(vif); + unsigned long clr_links = old_links & ~new_links; + unsigned long set_links = new_links & ~old_links; + bool removing_links = !old_links || clr_links; + struct rtw89_link_conf_container *snap; + int ret = 0; + int i; + + guard(mutex)(&rtwdev->mutex); + + rtw89_debug(rtwdev, RTW89_DBG_STATE, + "%s: old_links (0x%08x) -> new_links (0x%08x)\n", + __func__, old_links, new_links); + + if (!rtw89_can_work_on_links(rtwdev, vif, new_links)) + return -EOPNOTSUPP; + + if (removing_links) { + snap = kzalloc(sizeof(*snap), GFP_KERNEL); + if (!snap) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(snap->link_conf); i++) + snap->link_conf[i] = old[i]; + + rcu_assign_pointer(rtwvif->snap_link_confs, snap); + } + + /* might depend on @snap; don't change order */ + rtw89_leave_ips_by_hwflags(rtwdev); + + if (rtwdev->scanning) + rtw89_hw_scan_abort(rtwdev, rtwdev->scan_info.scanning_vif); + + if (!old_links) + __rtw89_ops_clr_vif_links(rtwdev, rtwvif, + BIT(RTW89_VIF_IDLE_LINK_ID)); + else if (clr_links) + __rtw89_ops_clr_vif_links(rtwdev, rtwvif, clr_links); + + if (removing_links) { + /* @snap is required if and only if during removing links. + * However, it's done here. So, cleanup @snap immediately. + */ + rcu_assign_pointer(rtwvif->snap_link_confs, NULL); + + /* The pointers in @old will free after this function return, + * so synchronously wait for all readers of snap to be done. + */ + synchronize_rcu(); + kfree(snap); + } + + if (set_links) { + ret = __rtw89_ops_set_vif_links(rtwdev, rtwvif, set_links); + if (ret) + __rtw89_ops_clr_vif_links(rtwdev, rtwvif, set_links); + } else if (!new_links) { + ret = __rtw89_ops_set_vif_links(rtwdev, rtwvif, + BIT(RTW89_VIF_IDLE_LINK_ID)); + if (ret) + __rtw89_ops_clr_vif_links(rtwdev, rtwvif, + BIT(RTW89_VIF_IDLE_LINK_ID)); + } + + rtw89_enter_ips_by_hwflags(rtwdev); + return ret; +} + +static void __rtw89_ops_clr_sta_links(struct rtw89_dev *rtwdev, + struct rtw89_sta *rtwsta, + unsigned long clr_links) +{ + struct rtw89_vif_link *rtwvif_link; + struct rtw89_sta_link *rtwsta_link; + unsigned int link_id; + + for_each_set_bit(link_id, &clr_links, IEEE80211_MLD_MAX_NUM_LINKS) { + rtwsta_link = rtwsta->links[link_id]; + if (unlikely(!rtwsta_link)) + continue; + + rtwvif_link = rtwsta_link->rtwvif_link; + + rtw89_core_sta_link_disassoc(rtwdev, rtwvif_link, rtwsta_link); + rtw89_core_sta_link_disconnect(rtwdev, rtwvif_link, rtwsta_link); + rtw89_core_sta_link_remove(rtwdev, rtwvif_link, rtwsta_link); + + rtw89_sta_unset_link(rtwsta, link_id); + } +} + +static int __rtw89_ops_set_sta_links(struct rtw89_dev *rtwdev, + struct rtw89_sta *rtwsta, + unsigned long set_links) +{ + struct rtw89_vif_link *rtwvif_link; + struct rtw89_sta_link *rtwsta_link; + unsigned int link_id; + int ret; + + for_each_set_bit(link_id, &set_links, IEEE80211_MLD_MAX_NUM_LINKS) { + rtwsta_link = rtw89_sta_set_link(rtwsta, link_id); + if (!rtwsta_link) + return -EINVAL; + + rtwvif_link = rtwsta_link->rtwvif_link; + + ret = rtw89_core_sta_link_add(rtwdev, rtwvif_link, rtwsta_link); + if (ret) { + rtw89_err(rtwdev, "%s: failed to add sta (link id %u)\n", + __func__, link_id); + return ret; + } + + rtw89_vif_type_mapping(rtwvif_link, true); + + ret = rtw89_core_sta_link_assoc(rtwdev, rtwvif_link, rtwsta_link); + if (ret) { + rtw89_err(rtwdev, "%s: failed to assoc sta (link id %u)\n", + __func__, link_id); + return ret; + } + + __rtw89_ops_bss_link_assoc(rtwdev, rtwvif_link); + } + + return 0; +} + +static +int rtw89_ops_change_sta_links(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + u16 old_links, u16 new_links) +{ + struct rtw89_dev *rtwdev = hw->priv; + struct rtw89_sta *rtwsta = sta_to_rtwsta(sta); + unsigned long clr_links = old_links & ~new_links; + unsigned long set_links = new_links & ~old_links; + int ret = 0; + + guard(mutex)(&rtwdev->mutex); + + rtw89_debug(rtwdev, RTW89_DBG_STATE, + "%s: old_links (0x%08x) -> new_links (0x%08x)\n", + __func__, old_links, new_links); + + if (!rtw89_can_work_on_links(rtwdev, vif, new_links)) + return -EOPNOTSUPP; + + rtw89_leave_ps_mode(rtwdev); + + if (clr_links) + __rtw89_ops_clr_sta_links(rtwdev, rtwsta, clr_links); + + if (set_links) { + ret = __rtw89_ops_set_sta_links(rtwdev, rtwsta, set_links); + if (ret) + __rtw89_ops_clr_sta_links(rtwdev, rtwsta, set_links); + } + + return ret; +} + #ifdef CONFIG_PM static int rtw89_ops_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) @@ -1636,6 +1850,8 @@ const struct ieee80211_ops rtw89_ops = { .link_sta_rc_update = rtw89_ops_sta_rc_update, .set_tid_config = rtw89_ops_set_tid_config, .can_activate_links = rtw89_ops_can_activate_links, + .change_vif_links = rtw89_ops_change_vif_links, + .change_sta_links = rtw89_ops_change_sta_links, #ifdef CONFIG_PM .suspend = rtw89_ops_suspend, .resume = rtw89_ops_resume, From a6db83bef0f587494fcc9cc5a9ec6e66ea13236e Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Wed, 20 Nov 2024 11:40:52 +0800 Subject: [PATCH 0059/1386] wifi: rtw89: apply MLD pairwise key to dynamically active links In MLD connection, a pairwise key should work on all active links. And, we take just one entry in security CAM for one pairwise key. (It means we will reuse one single entry for all links.) Originally, we already applied the security CAM entry of pairwise key to deflink's address CAM. However, links can be activated dynamically. So now for pairwise keys, each rtw89_sta records the IDs of the security CAM entries. Then, when driver is notified that some links are active via change_sta_links(), we apply target pairwise keys to them according to the record. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241120034054.13575-5-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/cam.c | 32 ++++++++++++++++--- drivers/net/wireless/realtek/rtw89/cam.h | 5 +++ drivers/net/wireless/realtek/rtw89/core.h | 4 +++ drivers/net/wireless/realtek/rtw89/mac80211.c | 16 ++++++++++ 4 files changed, 53 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/cam.c b/drivers/net/wireless/realtek/rtw89/cam.c index 8ef59994c0db..8fa1e6c1ce13 100644 --- a/drivers/net/wireless/realtek/rtw89/cam.c +++ b/drivers/net/wireless/realtek/rtw89/cam.c @@ -135,8 +135,8 @@ again: } static int rtw89_cam_get_addr_cam_key_idx(struct rtw89_addr_cam_entry *addr_cam, - struct rtw89_sec_cam_entry *sec_cam, - struct ieee80211_key_conf *key, + const struct rtw89_sec_cam_entry *sec_cam, + const struct ieee80211_key_conf *key, u8 *key_idx) { u8 idx; @@ -246,8 +246,8 @@ static int __rtw89_cam_detach_sec_cam(struct rtw89_dev *rtwdev, static int __rtw89_cam_attach_sec_cam(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link, struct rtw89_sta_link *rtwsta_link, - struct ieee80211_key_conf *key, - struct rtw89_sec_cam_entry *sec_cam) + const struct ieee80211_key_conf *key, + const struct rtw89_sec_cam_entry *sec_cam) { struct rtw89_addr_cam_entry *addr_cam; u8 key_idx = 0; @@ -286,6 +286,22 @@ static int __rtw89_cam_attach_sec_cam(struct rtw89_dev *rtwdev, return 0; } +int rtw89_cam_attach_link_sec_cam(struct rtw89_dev *rtwdev, + struct rtw89_vif_link *rtwvif_link, + struct rtw89_sta_link *rtwsta_link, + u8 sec_cam_idx) +{ + struct rtw89_cam_info *cam_info = &rtwdev->cam_info; + const struct rtw89_sec_cam_entry *sec_cam; + + sec_cam = cam_info->sec_entries[sec_cam_idx]; + if (!sec_cam) + return -ENOENT; + + return __rtw89_cam_attach_sec_cam(rtwdev, rtwvif_link, rtwsta_link, + sec_cam->key_conf, sec_cam); +} + static int rtw89_cam_detach_sec_cam(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif, struct ieee80211_sta *sta, @@ -306,6 +322,9 @@ static int rtw89_cam_detach_sec_cam(struct rtw89_dev *rtwdev, rtwvif = vif_to_rtwvif(vif); + if (rtwsta) + clear_bit(sec_cam->sec_cam_idx, rtwsta->pairwise_sec_cam_map); + rtw89_vif_for_each_link(rtwvif, rtwvif_link, link_id) { rtwsta_link = rtwsta ? rtwsta->links[link_id] : NULL; if (rtwsta && !rtwsta_link) @@ -369,6 +388,8 @@ static int rtw89_cam_attach_sec_cam(struct rtw89_dev *rtwdev, return ret; } + set_bit(sec_cam->sec_cam_idx, rtwsta->pairwise_sec_cam_map); + return 0; } @@ -410,6 +431,9 @@ static int rtw89_cam_sec_key_install(struct rtw89_dev *rtwdev, sec_cam->len = RTW89_SEC_CAM_LEN; sec_cam->ext_key = ext_key; memcpy(sec_cam->key, key->key, key->keylen); + + sec_cam->key_conf = key; + ret = rtw89_cam_send_sec_key_cmd(rtwdev, sec_cam); if (ret) { rtw89_err(rtwdev, "failed to send sec key cmd: %d\n", ret); diff --git a/drivers/net/wireless/realtek/rtw89/cam.h b/drivers/net/wireless/realtek/rtw89/cam.h index 3134ebf08825..8fd2d776408e 100644 --- a/drivers/net/wireless/realtek/rtw89/cam.h +++ b/drivers/net/wireless/realtek/rtw89/cam.h @@ -578,4 +578,9 @@ int rtw89_cam_sec_key_del(struct rtw89_dev *rtwdev, void rtw89_cam_bssid_changed(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link); void rtw89_cam_reset_keys(struct rtw89_dev *rtwdev); +int rtw89_cam_attach_link_sec_cam(struct rtw89_dev *rtwdev, + struct rtw89_vif_link *rtwvif_link, + struct rtw89_sta_link *rtwsta_link, + u8 sec_cam_idx); + #endif diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index bcfefd7fa01e..409cbdc6b92a 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -3359,6 +3359,8 @@ struct rtw89_sec_cam_entry { u8 spp_mode : 1; /* 256 bits */ u8 key[32]; + + struct ieee80211_key_conf *key_conf; }; struct rtw89_sta_link { @@ -5761,6 +5763,8 @@ struct rtw89_sta { struct rtw89_ampdu_params ampdu_params[IEEE80211_NUM_TIDS]; DECLARE_BITMAP(ampdu_map, IEEE80211_NUM_TIDS); + DECLARE_BITMAP(pairwise_sec_cam_map, RTW89_MAX_SEC_CAM_NUM); + u8 links_inst_valid_num; DECLARE_BITMAP(links_inst_map, __RTW89_MLD_MAX_LINK_NUM); struct rtw89_sta_link *links[IEEE80211_MLD_MAX_NUM_LINKS]; diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c index a4e47ef22b9b..bf7a674bce28 100644 --- a/drivers/net/wireless/realtek/rtw89/mac80211.c +++ b/drivers/net/wireless/realtek/rtw89/mac80211.c @@ -509,6 +509,7 @@ static int __rtw89_ops_sta_add(struct rtw89_dev *rtwdev, rtw89_core_txq_init(rtwdev, sta->txq[i]); skb_queue_head_init(&rtwsta->roc_queue); + bitmap_zero(rtwsta->pairwise_sec_cam_map, RTW89_MAX_SEC_CAM_NUM); rtwsta_link = rtw89_sta_set_link(rtwsta, sta->deflink.link_id); if (!rtwsta_link) { @@ -1656,6 +1657,7 @@ static int __rtw89_ops_set_sta_links(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link; struct rtw89_sta_link *rtwsta_link; unsigned int link_id; + u8 sec_cam_idx; int ret; for_each_set_bit(link_id, &set_links, IEEE80211_MLD_MAX_NUM_LINKS) { @@ -1682,6 +1684,20 @@ static int __rtw89_ops_set_sta_links(struct rtw89_dev *rtwdev, } __rtw89_ops_bss_link_assoc(rtwdev, rtwvif_link); + + for_each_set_bit(sec_cam_idx, rtwsta->pairwise_sec_cam_map, + RTW89_MAX_SEC_CAM_NUM) { + ret = rtw89_cam_attach_link_sec_cam(rtwdev, + rtwvif_link, + rtwsta_link, + sec_cam_idx); + if (ret) { + rtw89_err(rtwdev, + "%s: failed to apply pairwise key (link id %u)\n", + __func__, link_id); + return ret; + } + } } return 0; From 55709b195464e59bd5c51abf25fa243d7a8b7a3e Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Wed, 20 Nov 2024 11:40:53 +0800 Subject: [PATCH 0060/1386] wifi: rtw89: pass target link_id to ieee80211_gtk_rekey_add() When calling ieee80211_gtk_rekey_add(), pass the target link_id instead of always -1. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241120034054.13575-6-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/wow.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/wow.c b/drivers/net/wireless/realtek/rtw89/wow.c index 3e81fd974ec1..1e1dbb20d47a 100644 --- a/drivers/net/wireless/realtek/rtw89/wow.c +++ b/drivers/net/wireless/realtek/rtw89/wow.c @@ -620,7 +620,10 @@ static struct ieee80211_key_conf *rtw89_wow_gtk_rekey(struct rtw89_dev *rtwdev, * need to unlock mutex */ mutex_unlock(&rtwdev->mutex); - key = ieee80211_gtk_rekey_add(wow_vif, rekey_conf, -1); + if (ieee80211_vif_is_mld(wow_vif)) + key = ieee80211_gtk_rekey_add(wow_vif, rekey_conf, rtwvif_link->link_id); + else + key = ieee80211_gtk_rekey_add(wow_vif, rekey_conf, -1); mutex_lock(&rtwdev->mutex); kfree(rekey_conf); From f79257f5b97199a08d5c2c039bf4908323f9dd92 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Wed, 20 Nov 2024 11:40:54 +0800 Subject: [PATCH 0061/1386] wifi: rtw89: pass target link_id to ieee80211_nullfunc_get() When calling ieee80211_nullfunc_get(), pass the target link_id instead of always -1. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241120034054.13575-7-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.c | 3 ++- drivers/net/wireless/realtek/rtw89/fw.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index c99111df90a7..6f9b4f0b2748 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -3216,6 +3216,7 @@ static int rtw89_core_send_nullfunc(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link, bool qos, bool ps) { struct ieee80211_vif *vif = rtwvif_link_to_vif(rtwvif_link); + int link_id = ieee80211_vif_is_mld(vif) ? rtwvif_link->link_id : -1; struct ieee80211_sta *sta; struct ieee80211_hdr *hdr; struct sk_buff *skb; @@ -3231,7 +3232,7 @@ static int rtw89_core_send_nullfunc(struct rtw89_dev *rtwdev, goto out; } - skb = ieee80211_nullfunc_get(rtwdev->hw, vif, -1, qos); + skb = ieee80211_nullfunc_get(rtwdev->hw, vif, link_id, qos); if (!skb) { ret = -ENOMEM; goto out; diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 7bda9aab382c..cbd759c844e5 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -2415,6 +2415,7 @@ static int rtw89_fw_h2c_add_general_pkt(struct rtw89_dev *rtwdev, u8 *id) { struct ieee80211_vif *vif = rtwvif_link_to_vif(rtwvif_link); + int link_id = ieee80211_vif_is_mld(vif) ? rtwvif_link->link_id : -1; struct rtw89_pktofld_info *info; struct sk_buff *skb; int ret; @@ -2431,10 +2432,10 @@ static int rtw89_fw_h2c_add_general_pkt(struct rtw89_dev *rtwdev, skb = ieee80211_proberesp_get(rtwdev->hw, vif); break; case RTW89_PKT_OFLD_TYPE_NULL_DATA: - skb = ieee80211_nullfunc_get(rtwdev->hw, vif, -1, false); + skb = ieee80211_nullfunc_get(rtwdev->hw, vif, link_id, false); break; case RTW89_PKT_OFLD_TYPE_QOS_NULL: - skb = ieee80211_nullfunc_get(rtwdev->hw, vif, -1, true); + skb = ieee80211_nullfunc_get(rtwdev->hw, vif, link_id, true); break; case RTW89_PKT_OFLD_TYPE_EAPOL_KEY: skb = rtw89_eapol_get(rtwdev, rtwvif_link); From 56dcbf0b520796e26b2bbe5686bdd305ad924954 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 26 Nov 2024 19:11:30 +0200 Subject: [PATCH 0062/1386] wifi: ath12k: convert struct ath12k::wmi_mgmt_tx_work to struct wiphy_work To simplify locking for the next patches convert struct ath12k::wmi_mgmt_tx_work to use wiphy_work. After this ath12k_mgmt_over_wmi_tx_work() is called with wiphy_lock() taken. In ath12k_core_suspend() we need to take wiphy_lock() because ath12k_mac_wait_tx_complete() requires it. Also add lockdep_assert_wiphy() to document when wiphy_lock() is held. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241126171139.2350704-2-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/core.c | 6 ++++++ drivers/net/wireless/ath/ath12k/core.h | 2 +- drivers/net/wireless/ath/ath12k/mac.c | 20 ++++++++++++++++---- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index c57322221e1d..263a7c789122 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -79,11 +79,17 @@ int ath12k_core_suspend(struct ath12k_base *ab) ar = ab->pdevs[i].ar; if (!ar) continue; + + wiphy_lock(ath12k_ar_to_hw(ar)->wiphy); + ret = ath12k_mac_wait_tx_complete(ar); if (ret) { + wiphy_unlock(ath12k_ar_to_hw(ar)->wiphy); ath12k_warn(ab, "failed to wait tx complete: %d\n", ret); return ret; } + + wiphy_unlock(ath12k_ar_to_hw(ar)->wiphy); } /* PM framework skips suspend_late/resume_early callbacks diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index c1d5e93b679a..5be977008319 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -679,7 +679,7 @@ struct ath12k { struct work_struct regd_update_work; - struct work_struct wmi_mgmt_tx_work; + struct wiphy_work wmi_mgmt_tx_work; struct sk_buff_head wmi_mgmt_tx_queue; struct ath12k_wow wow; diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 60702bf07141..a6fe998c177e 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -6726,6 +6726,8 @@ static void ath12k_mgmt_over_wmi_tx_drop(struct ath12k *ar, struct sk_buff *skb) { int num_mgmt; + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + ieee80211_free_txskb(ath12k_ar_to_hw(ar), skb); num_mgmt = atomic_dec_if_positive(&ar->num_pending_mgmt_tx); @@ -6787,6 +6789,8 @@ static int ath12k_mac_mgmt_tx_wmi(struct ath12k *ar, struct ath12k_link_vif *arv int buf_id; int ret; + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + ATH12K_SKB_CB(skb)->ar = ar; spin_lock_bh(&ar->txmgmt_idr_lock); buf_id = idr_alloc(&ar->txmgmt_idr, skb, 0, @@ -6841,7 +6845,7 @@ static void ath12k_mgmt_over_wmi_tx_purge(struct ath12k *ar) ath12k_mgmt_over_wmi_tx_drop(ar, skb); } -static void ath12k_mgmt_over_wmi_tx_work(struct work_struct *work) +static void ath12k_mgmt_over_wmi_tx_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ath12k *ar = container_of(work, struct ath12k, wmi_mgmt_tx_work); struct ath12k_skb_cb *skb_cb; @@ -6850,6 +6854,8 @@ static void ath12k_mgmt_over_wmi_tx_work(struct work_struct *work) struct sk_buff *skb; int ret; + lockdep_assert_wiphy(wiphy); + while ((skb = skb_dequeue(&ar->wmi_mgmt_tx_queue)) != NULL) { skb_cb = ATH12K_SKB_CB(skb); if (!skb_cb->vif) { @@ -6904,7 +6910,7 @@ static int ath12k_mac_mgmt_tx(struct ath12k *ar, struct sk_buff *skb, skb_queue_tail(q, skb); atomic_inc(&ar->num_pending_mgmt_tx); - ieee80211_queue_work(ath12k_ar_to_hw(ar), &ar->wmi_mgmt_tx_work); + wiphy_work_queue(ath12k_ar_to_hw(ar)->wiphy, &ar->wmi_mgmt_tx_work); return 0; } @@ -6981,10 +6987,12 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, void ath12k_mac_drain_tx(struct ath12k *ar) { + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + /* make sure rcu-protected mac80211 tx path itself is drained */ synchronize_net(); - cancel_work_sync(&ar->wmi_mgmt_tx_work); + wiphy_work_cancel(ath12k_ar_to_hw(ar)->wiphy, &ar->wmi_mgmt_tx_work); ath12k_mgmt_over_wmi_tx_purge(ar); } @@ -7101,6 +7109,8 @@ static void ath12k_drain_tx(struct ath12k_hw *ah) struct ath12k *ar; int i; + lockdep_assert_wiphy(ah->hw->wiphy); + for_each_ar(ah, ar, i) ath12k_mac_drain_tx(ar); } @@ -9134,6 +9144,8 @@ static int ath12k_mac_flush(struct ath12k *ar) int ath12k_mac_wait_tx_complete(struct ath12k *ar) { + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + ath12k_mac_drain_tx(ar); return ath12k_mac_flush(ar); } @@ -10604,7 +10616,7 @@ static void ath12k_mac_setup(struct ath12k *ar) INIT_DELAYED_WORK(&ar->scan.timeout, ath12k_scan_timeout_work); INIT_WORK(&ar->regd_update_work, ath12k_regd_update_work); - INIT_WORK(&ar->wmi_mgmt_tx_work, ath12k_mgmt_over_wmi_tx_work); + wiphy_work_init(&ar->wmi_mgmt_tx_work, ath12k_mgmt_over_wmi_tx_work); skb_queue_head_init(&ar->wmi_mgmt_tx_queue); } From 648a121bafa3f4487254ab8e9e298f12540f0603 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Tue, 26 Nov 2024 19:11:31 +0200 Subject: [PATCH 0063/1386] wifi: ath12k: ath12k_mac_op_tx(): MLO support For a frame transmission for an ML vif, mac80211 mentions transmit link id in the tx control info. Use it to convert the RA/TA to the corresponding link sta and link vif address before enqueueing the frame for transmission. For 802.3 data frames, always enqueue the frame on the primary (assoc) link id. Firmware does the link selection, builds 802.11 header and therefore the address translation too. Also ensure right link vif is used for WMI based management transmission and add comments to document when RCU read lock is held. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Co-developed-by: Rameshkumar Sundaram Signed-off-by: Rameshkumar Sundaram Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241126171139.2350704-3-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/core.h | 1 + drivers/net/wireless/ath/ath12k/mac.c | 139 ++++++++++++++++++++++++- 2 files changed, 137 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 5be977008319..e246e3d3c162 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -122,6 +122,7 @@ struct ath12k_skb_cb { dma_addr_t paddr_ext_desc; u32 cipher; u8 flags; + u8 link_id; }; struct ath12k_skb_rxcb { diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index a6fe998c177e..5ca96cb86d17 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -6848,6 +6848,7 @@ static void ath12k_mgmt_over_wmi_tx_purge(struct ath12k *ar) static void ath12k_mgmt_over_wmi_tx_work(struct wiphy *wiphy, struct wiphy_work *work) { struct ath12k *ar = container_of(work, struct ath12k, wmi_mgmt_tx_work); + struct ath12k_hw *ah = ar->ah; struct ath12k_skb_cb *skb_cb; struct ath12k_vif *ahvif; struct ath12k_link_vif *arvif; @@ -6865,7 +6866,15 @@ static void ath12k_mgmt_over_wmi_tx_work(struct wiphy *wiphy, struct wiphy_work } ahvif = ath12k_vif_to_ahvif(skb_cb->vif); - arvif = &ahvif->deflink; + if (!(ahvif->links_map & BIT(skb_cb->link_id))) { + ath12k_warn(ar->ab, + "invalid linkid %u in mgmt over wmi tx with linkmap 0x%x\n", + skb_cb->link_id, ahvif->links_map); + ath12k_mgmt_over_wmi_tx_drop(ar, skb); + continue; + } + + arvif = wiphy_dereference(ah->hw->wiphy, ahvif->link[skb_cb->link_id]); if (ar->allocated_vdev_map & (1LL << arvif->vdev_id)) { ret = ath12k_mac_mgmt_tx_wmi(ar, arvif, skb); if (ret) { @@ -6875,8 +6884,9 @@ static void ath12k_mgmt_over_wmi_tx_work(struct wiphy *wiphy, struct wiphy_work } } else { ath12k_warn(ar->ab, - "dropping mgmt frame for vdev %d, is_started %d\n", + "dropping mgmt frame for vdev %d link %u is_started %d\n", arvif->vdev_id, + skb_cb->link_id, arvif->is_started); ath12k_mgmt_over_wmi_tx_drop(ar, skb); } @@ -6936,6 +6946,105 @@ static void ath12k_mac_add_p2p_noa_ie(struct ath12k *ar, spin_unlock_bh(&ar->data_lock); } +/* Note: called under rcu_read_lock() */ +static u8 ath12k_mac_get_tx_link(struct ieee80211_sta *sta, struct ieee80211_vif *vif, + u8 link, struct sk_buff *skb, u32 info_flags) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); + struct ieee80211_link_sta *link_sta; + struct ieee80211_bss_conf *bss_conf; + struct ath12k_sta *ahsta; + + /* Use the link id passed or the default vif link */ + if (!sta) { + if (link != IEEE80211_LINK_UNSPECIFIED) + return link; + + return ahvif->deflink.link_id; + } + + ahsta = ath12k_sta_to_ahsta(sta); + + /* Below translation ensures we pass proper A2 & A3 for non ML clients. + * Also it assumes for now support only for MLO AP in this path + */ + if (!sta->mlo) { + link = ahsta->deflink.link_id; + + if (info_flags & IEEE80211_TX_CTL_HW_80211_ENCAP) + return link; + + bss_conf = rcu_dereference(vif->link_conf[link]); + if (bss_conf) { + ether_addr_copy(hdr->addr2, bss_conf->addr); + if (!ieee80211_has_tods(hdr->frame_control) && + !ieee80211_has_fromds(hdr->frame_control)) + ether_addr_copy(hdr->addr3, bss_conf->addr); + } + + return link; + } + + /* enqueue eth enacap & data frames on primary link, FW does link + * selection and address translation. + */ + if (info_flags & IEEE80211_TX_CTL_HW_80211_ENCAP || + ieee80211_is_data(hdr->frame_control)) + return ahsta->assoc_link_id; + + /* 802.11 frame cases */ + if (link == IEEE80211_LINK_UNSPECIFIED) + link = ahsta->deflink.link_id; + + if (!ieee80211_is_mgmt(hdr->frame_control)) + return link; + + /* Perform address conversion for ML STA Tx */ + bss_conf = rcu_dereference(vif->link_conf[link]); + link_sta = rcu_dereference(sta->link[link]); + + if (bss_conf && link_sta) { + ether_addr_copy(hdr->addr1, link_sta->addr); + ether_addr_copy(hdr->addr2, bss_conf->addr); + + if (vif->type == NL80211_IFTYPE_STATION && bss_conf->bssid) + ether_addr_copy(hdr->addr3, bss_conf->bssid); + else if (vif->type == NL80211_IFTYPE_AP) + ether_addr_copy(hdr->addr3, bss_conf->addr); + + return link; + } + + if (bss_conf) { + /* In certain cases where a ML sta associated and added subset of + * links on which the ML AP is active, but now sends some frame + * (ex. Probe request) on a different link which is active in our + * MLD but was not added during previous association, we can + * still honor the Tx to that ML STA via the requested link. + * The control would reach here in such case only when that link + * address is same as the MLD address or in worst case clients + * used MLD address at TA wrongly which would have helped + * identify the ML sta object and pass it here. + * If the link address of that STA is different from MLD address, + * then the sta object would be NULL and control won't reach + * here but return at the start of the function itself with !sta + * check. Also this would not need any translation at hdr->addr1 + * from MLD to link address since the RA is the MLD address + * (same as that link address ideally) already. + */ + ether_addr_copy(hdr->addr2, bss_conf->addr); + + if (vif->type == NL80211_IFTYPE_STATION && bss_conf->bssid) + ether_addr_copy(hdr->addr3, bss_conf->bssid); + else if (vif->type == NL80211_IFTYPE_AP) + ether_addr_copy(hdr->addr3, bss_conf->addr); + } + + return link; +} + +/* Note: called under rcu_read_lock() */ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control, struct sk_buff *skb) @@ -6945,13 +7054,16 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif = info->control.vif; struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); struct ath12k_link_vif *arvif = &ahvif->deflink; - struct ath12k *ar = arvif->ar; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct ieee80211_key_conf *key = info->control.hw_key; + struct ieee80211_sta *sta = control->sta; u32 info_flags = info->flags; + struct ath12k *ar; bool is_prb_rsp; + u8 link_id; int ret; + link_id = u32_get_bits(info->control.flags, IEEE80211_TX_CTRL_MLO_LINK); memset(skb_cb, 0, sizeof(*skb_cb)); skb_cb->vif = vif; @@ -6960,6 +7072,27 @@ static void ath12k_mac_op_tx(struct ieee80211_hw *hw, skb_cb->flags |= ATH12K_SKB_CIPHER_SET; } + /* handle only for MLO case, use deflink for non MLO case */ + if (ieee80211_vif_is_mld(vif)) { + link_id = ath12k_mac_get_tx_link(sta, vif, link_id, skb, info_flags); + if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS) { + ieee80211_free_txskb(hw, skb); + return; + } + } else { + link_id = 0; + } + + arvif = rcu_dereference(ahvif->link[link_id]); + if (!arvif || !arvif->ar) { + ath12k_warn(ahvif->ah, "failed to find arvif link id %u for frame transmission", + link_id); + ieee80211_free_txskb(hw, skb); + return; + } + + ar = arvif->ar; + skb_cb->link_id = link_id; is_prb_rsp = ieee80211_is_probe_resp(hdr->frame_control); if (info_flags & IEEE80211_TX_CTL_HW_80211_ENCAP) { From 2197feb0249d308bbb0ba0443bd45511cdec190a Mon Sep 17 00:00:00 2001 From: Sriram R Date: Tue, 26 Nov 2024 19:11:32 +0200 Subject: [PATCH 0064/1386] wifi: ath12k: ath12k_mac_op_flush(): MLO support Currently when tx flush is requested for an vif only packets corresponding to deflink are flushed, with MLO multiple link arvif could be affiliated to the ML vif and packets corresponding to all of them should be flushed. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Co-developed-by: Maharaja Kennadyrajan Signed-off-by: Maharaja Kennadyrajan Signed-off-by: Rameshkumar Sundaram Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241126171139.2350704-4-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/mac.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 5ca96cb86d17..595e8110ab86 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -9287,7 +9287,11 @@ static void ath12k_mac_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *v u32 queues, bool drop) { struct ath12k_hw *ah = ath12k_hw_to_ah(hw); + struct ath12k_link_vif *arvif; + struct ath12k_vif *ahvif; + unsigned long links; struct ath12k *ar; + u8 link_id; int i; lockdep_assert_wiphy(hw->wiphy); @@ -9302,12 +9306,18 @@ static void ath12k_mac_op_flush(struct ieee80211_hw *hw, struct ieee80211_vif *v return; } - ar = ath12k_get_ar_by_vif(hw, vif); + for_each_ar(ah, ar, i) + wiphy_work_flush(hw->wiphy, &ar->wmi_mgmt_tx_work); - if (!ar) - return; + ahvif = ath12k_vif_to_ahvif(vif); + links = ahvif->links_map; + for_each_set_bit(link_id, &links, IEEE80211_MLD_MAX_NUM_LINKS) { + arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); + if (!(arvif && arvif->ar)) + continue; - ath12k_mac_flush(ar); + ath12k_mac_flush(arvif->ar); + } } static int From 5419ef950da4a76c54c91129f16c292fc65da56b Mon Sep 17 00:00:00 2001 From: Sriram R Date: Tue, 26 Nov 2024 19:11:33 +0200 Subject: [PATCH 0065/1386] wifi: ath12k: ath12k_mac_op_ampdu_action(): MLO support Apply tid queue setup based on all link stations on receiving ampdu action params for an ML Station. Modify ath12k_get_ar_by_vif() to fetch ar based on link arvif inside ahvif. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Signed-off-by: Rameshkumar Sundaram Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241126171139.2350704-5-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/dp_rx.c | 36 +++++++++--- drivers/net/wireless/ath/ath12k/dp_rx.h | 6 +- drivers/net/wireless/ath/ath12k/mac.c | 76 ++++++++++++++----------- 3 files changed, 76 insertions(+), 42 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 70680f2124e5..b24d1de4aabb 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -1065,15 +1065,25 @@ err_mem_free: } int ath12k_dp_rx_ampdu_start(struct ath12k *ar, - struct ieee80211_ampdu_params *params) + struct ieee80211_ampdu_params *params, + u8 link_id) { struct ath12k_base *ab = ar->ab; struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(params->sta); - struct ath12k_link_sta *arsta = &ahsta->deflink; - int vdev_id = arsta->arvif->vdev_id; + struct ath12k_link_sta *arsta; + int vdev_id; int ret; - ret = ath12k_dp_rx_peer_tid_setup(ar, params->sta->addr, vdev_id, + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + + arsta = wiphy_dereference(ath12k_ar_to_hw(ar)->wiphy, + ahsta->link[link_id]); + if (!arsta) + return -ENOLINK; + + vdev_id = arsta->arvif->vdev_id; + + ret = ath12k_dp_rx_peer_tid_setup(ar, arsta->addr, vdev_id, params->tid, params->buf_size, params->ssn, arsta->ahsta->pn_type); if (ret) @@ -1083,19 +1093,29 @@ int ath12k_dp_rx_ampdu_start(struct ath12k *ar, } int ath12k_dp_rx_ampdu_stop(struct ath12k *ar, - struct ieee80211_ampdu_params *params) + struct ieee80211_ampdu_params *params, + u8 link_id) { struct ath12k_base *ab = ar->ab; struct ath12k_peer *peer; struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(params->sta); - struct ath12k_link_sta *arsta = &ahsta->deflink; - int vdev_id = arsta->arvif->vdev_id; + struct ath12k_link_sta *arsta; + int vdev_id; bool active; int ret; + lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + + arsta = wiphy_dereference(ath12k_ar_to_hw(ar)->wiphy, + ahsta->link[link_id]); + if (!arsta) + return -ENOLINK; + + vdev_id = arsta->arvif->vdev_id; + spin_lock_bh(&ab->base_lock); - peer = ath12k_peer_find(ab, vdev_id, params->sta->addr); + peer = ath12k_peer_find(ab, vdev_id, arsta->addr); if (!peer) { spin_unlock_bh(&ab->base_lock); ath12k_warn(ab, "failed to find the peer to stop rx aggregation\n"); diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.h b/drivers/net/wireless/ath/ath12k/dp_rx.h index bfd4f814553e..1ce82088c954 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.h +++ b/drivers/net/wireless/ath/ath12k/dp_rx.h @@ -85,9 +85,11 @@ static inline u32 ath12k_he_gi_to_nl80211_he_gi(u8 sgi) } int ath12k_dp_rx_ampdu_start(struct ath12k *ar, - struct ieee80211_ampdu_params *params); + struct ieee80211_ampdu_params *params, + u8 link_id); int ath12k_dp_rx_ampdu_stop(struct ath12k *ar, - struct ieee80211_ampdu_params *params); + struct ieee80211_ampdu_params *params, + u8 link_id); int ath12k_dp_rx_peer_pn_replay_config(struct ath12k_link_vif *arvif, const u8 *peer_addr, enum set_key_cmd key_cmd, diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 595e8110ab86..ec8209f17261 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -725,11 +725,14 @@ static struct ath12k *ath12k_get_ar_by_ctx(struct ieee80211_hw *hw, } static struct ath12k *ath12k_get_ar_by_vif(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) + struct ieee80211_vif *vif, + u8 link_id) { struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); - struct ath12k_link_vif *arvif = &ahvif->deflink; struct ath12k_hw *ah = ath12k_hw_to_ah(hw); + struct ath12k_link_vif *arvif; + + lockdep_assert_wiphy(hw->wiphy); /* If there is one pdev within ah, then we return * ar directly. @@ -737,7 +740,11 @@ static struct ath12k *ath12k_get_ar_by_vif(struct ieee80211_hw *hw, if (ah->num_radio == 1) return ah->radio; - if (arvif->is_created) + if (!(ahvif->links_map & BIT(link_id))) + return NULL; + + arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); + if (arvif && arvif->is_created) return arvif->ar; return NULL; @@ -5667,6 +5674,7 @@ static void ath12k_mac_op_sta_rc_update(struct ieee80211_hw *hw, struct ath12k *ar; struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta); struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); + struct ath12k_hw *ah = ath12k_hw_to_ah(hw); struct ath12k_link_sta *arsta; struct ath12k_link_vif *arvif; struct ath12k_peer *peer; @@ -5676,20 +5684,17 @@ static void ath12k_mac_op_sta_rc_update(struct ieee80211_hw *hw, */ u8 link_id = ATH12K_DEFAULT_LINK_ID; - ar = ath12k_get_ar_by_vif(hw, vif); - if (!ar) { - WARN_ON_ONCE(1); - return; - } - rcu_read_lock(); arvif = rcu_dereference(ahvif->link[link_id]); if (!arvif) { - ath12k_warn(ar->ab, "mac sta rc update failed to fetch link vif on link id %u for peer %pM\n", - link_id, sta->addr); + ath12k_hw_warn(ah, "mac sta rc update failed to fetch link vif on link id %u for peer %pM\n", + link_id, sta->addr); rcu_read_unlock(); return; } + + ar = arvif->ar; + arsta = rcu_dereference(ahsta->link[link_id]); if (!arsta) { rcu_read_unlock(); @@ -8288,20 +8293,26 @@ static int ath12k_mac_op_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx return ret; } -static int ath12k_mac_ampdu_action(struct ath12k_link_vif *arvif, - struct ieee80211_ampdu_params *params) +static int ath12k_mac_ampdu_action(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_ampdu_params *params, + u8 link_id) { - struct ath12k *ar = arvif->ar; + struct ath12k *ar; int ret = -EINVAL; - lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + lockdep_assert_wiphy(hw->wiphy); + + ar = ath12k_get_ar_by_vif(hw, vif, link_id); + if (!ar) + return -EINVAL; switch (params->action) { case IEEE80211_AMPDU_RX_START: - ret = ath12k_dp_rx_ampdu_start(ar, params); + ret = ath12k_dp_rx_ampdu_start(ar, params, link_id); break; case IEEE80211_AMPDU_RX_STOP: - ret = ath12k_dp_rx_ampdu_stop(ar, params); + ret = ath12k_dp_rx_ampdu_stop(ar, params, link_id); break; case IEEE80211_AMPDU_TX_START: case IEEE80211_AMPDU_TX_STOP_CONT: @@ -8315,6 +8326,10 @@ static int ath12k_mac_ampdu_action(struct ath12k_link_vif *arvif, break; } + if (ret) + ath12k_warn(ar->ab, "unable to perform ampdu action %d for vif %pM link %u ret %d\n", + params->action, vif->addr, link_id, ret); + return ret; } @@ -8322,27 +8337,24 @@ static int ath12k_mac_op_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_ampdu_params *params) { - struct ath12k_hw *ah = ath12k_hw_to_ah(hw); - struct ath12k *ar; - struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); - struct ath12k_link_vif *arvif; + struct ieee80211_sta *sta = params->sta; + struct ath12k_sta *ahsta = ath12k_sta_to_ahsta(sta); + unsigned long links_map = ahsta->links_map; int ret = -EINVAL; + u8 link_id; lockdep_assert_wiphy(hw->wiphy); - ar = ath12k_get_ar_by_vif(hw, vif); - if (!ar) - return -EINVAL; + if (WARN_ON(!links_map)) + return ret; - ar = ath12k_ah_to_ar(ah, 0); - arvif = &ahvif->deflink; + for_each_set_bit(link_id, &links_map, IEEE80211_MLD_MAX_NUM_LINKS) { + ret = ath12k_mac_ampdu_action(hw, vif, params, link_id); + if (ret) + return ret; + } - ret = ath12k_mac_ampdu_action(arvif, params); - if (ret) - ath12k_warn(ar->ab, "pdev idx %d unable to perform ampdu action %d ret %d\n", - ar->pdev_idx, params->action, ret); - - return ret; + return 0; } static int ath12k_mac_op_add_chanctx(struct ieee80211_hw *hw, From 85edf16384d12db938a09458d89662cdff87068e Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Tue, 26 Nov 2024 19:11:34 +0200 Subject: [PATCH 0066/1386] wifi: ath12k: ath12k_mac_station_add(): fix potential rx_stats leak If peer creation fails ar->rx_stats needs to be freed in error handling. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Rameshkumar Sundaram Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241126171139.2350704-6-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/mac.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index ec8209f17261..0b9dd50959e6 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -5308,6 +5308,8 @@ static int ath12k_mac_station_add(struct ath12k *ar, free_peer: ath12k_peer_delete(ar, arvif->vdev_id, arsta->addr); + kfree(arsta->rx_stats); + arsta->rx_stats = NULL; dec_num_station: ath12k_mac_dec_num_stations(arvif, arsta); exit: From 90570ba4610bdb1db39ef45f2b271a9f89680a9d Mon Sep 17 00:00:00 2001 From: Sriram R Date: Tue, 26 Nov 2024 19:11:35 +0200 Subject: [PATCH 0067/1386] wifi: ath12k: do not return invalid link id for scan link When a scan request is received, driver selects a link id for which the arvif can be mapped. Same link is also used for getting the link conf address. Currently, we return 0 as link id for a non ML vif, which is correct since that is the default link id. Also when any of the link vif is active and the scan request is for a channel in the active link we return its link id. But, when we don't hit both of the above cases (i.e not a ML vif or no active link vif for the channel is present) we currently return 0 as the link id. Bu the problemis that this might not work out always, eg., when only one link (eg. linkid = 1) is added to vif, then we won't find any link conf for link id 0 in the vif resulting in scan failure. During AP bringup, such scan failure causes bringup issues. Hence avoid sending link id 0 as default. Rather use a default link for scan and default link address for the same. This scan vdev will either be deleted if another scan is requested on same vif or when AP is broughtup on same link or during interface cleanup. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Sriram R Signed-off-by: Rameshkumar Sundaram Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241126171139.2350704-7-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/core.h | 3 +- drivers/net/wireless/ath/ath12k/mac.c | 65 +++++++++++++++++++------- drivers/net/wireless/ath/ath12k/mac.h | 6 +++ 3 files changed, 56 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index e246e3d3c162..f4a710d49584 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -322,10 +322,11 @@ struct ath12k_vif { bool ps; struct ath12k_link_vif deflink; - struct ath12k_link_vif __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; + struct ath12k_link_vif __rcu *link[ATH12K_NUM_MAX_LINKS]; struct ath12k_vif_cache *cache[IEEE80211_MLD_MAX_NUM_LINKS]; /* indicates bitmap of link vif created in FW */ u16 links_map; + u8 last_scan_link; /* Must be last - ends in a flexible-array member. * diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 0b9dd50959e6..5aff5ba7e3b4 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -3792,6 +3792,9 @@ static void ath12k_ahvif_put_link_key_cache(struct ath12k_vif_cache *cache) static void ath12k_ahvif_put_link_cache(struct ath12k_vif *ahvif, u8 link_id) { + if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + return; + ath12k_ahvif_put_link_key_cache(ahvif->cache[link_id]); kfree(ahvif->cache[link_id]); ahvif->cache[link_id] = NULL; @@ -3852,9 +3855,9 @@ static struct ath12k_link_vif *ath12k_mac_assign_link_vif(struct ath12k_hw *ah, arvif = &ahvif->deflink; } else { /* If this is the first link arvif being created for an ML VIF - * use the preallocated deflink memory + * use the preallocated deflink memory except for scan arvifs */ - if (!ahvif->links_map) { + if (!ahvif->links_map && link_id != ATH12K_DEFAULT_SCAN_LINK) { arvif = &ahvif->deflink; } else { arvif = (struct ath12k_link_vif *) @@ -4154,10 +4157,10 @@ ath12k_mac_find_link_id_by_ar(struct ath12k_vif *ahvif, struct ath12k *ar) return link_id; } - /* input ar is not assigned to any of the links, use link id - * 0 for scan vdev creation. + /* input ar is not assigned to any of the links of ML VIF, use scan + * link (15) for scan vdev creation. */ - return 0; + return ATH12K_DEFAULT_SCAN_LINK; } static int ath12k_mac_op_hw_scan(struct ieee80211_hw *hw, @@ -4188,7 +4191,7 @@ static int ath12k_mac_op_hw_scan(struct ieee80211_hw *hw, /* check if any of the links of ML VIF is already started on * radio(ar) correpsondig to given scan frequency and use it, - * if not use deflink(link 0) for scan purpose. + * if not use scan link (link 15) for scan purpose. */ link_id = ath12k_mac_find_link_id_by_ar(ahvif, ar); arvif = ath12k_mac_assign_link_vif(ah, vif, link_id); @@ -4298,6 +4301,13 @@ static int ath12k_mac_op_hw_scan(struct ieee80211_hw *hw, spin_unlock_bh(&ar->data_lock); } + /* As per cfg80211/mac80211 scan design, it allows only one + * scan at a time. Hence last_scan link id is used for + * tracking the link id on which the scan is been done on + * this vif. + */ + ahvif->last_scan_link = arvif->link_id; + /* Add a margin to account for event/command processing */ ieee80211_queue_delayed_work(ath12k_ar_to_hw(ar), &ar->scan.timeout, msecs_to_jiffies(arg->max_scan_time + @@ -4317,14 +4327,14 @@ static void ath12k_mac_op_cancel_hw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); + u16 link_id = ahvif->last_scan_link; struct ath12k_link_vif *arvif; struct ath12k *ar; lockdep_assert_wiphy(hw->wiphy); - arvif = &ahvif->deflink; - - if (!arvif->is_created) + arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); + if (!arvif || !arvif->is_created) return; ar = arvif->ar; @@ -7688,10 +7698,19 @@ int ath12k_mac_vdev_create(struct ath12k *ar, struct ath12k_link_vif *arvif) u16 nss; int i; int ret, vdev_id; + u8 link_id; lockdep_assert_wiphy(hw->wiphy); - link_conf = wiphy_dereference(hw->wiphy, vif->link_conf[arvif->link_id]); + /* If no link is active and scan vdev is requested + * use a default link conf for scan address purpose. + */ + if (arvif->link_id == ATH12K_DEFAULT_SCAN_LINK && vif->valid_links) + link_id = ffs(vif->valid_links) - 1; + else + link_id = arvif->link_id; + + link_conf = wiphy_dereference(hw->wiphy, vif->link_conf[link_id]); if (!link_conf) { ath12k_warn(ar->ab, "unable to access bss link conf in vdev create for vif %pM link %u\n", vif->addr, arvif->link_id); @@ -7971,7 +7990,9 @@ static struct ath12k *ath12k_mac_assign_vif_to_vdev(struct ieee80211_hw *hw, struct ath12k_link_vif *arvif, struct ieee80211_chanctx_conf *ctx) { - struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); + struct ath12k_vif *ahvif = arvif->ahvif; + struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif); + struct ath12k_link_vif *scan_arvif; struct ath12k_hw *ah = hw->priv; struct ath12k *ar; struct ath12k_base *ab; @@ -7990,6 +8011,19 @@ static struct ath12k *ath12k_mac_assign_vif_to_vdev(struct ieee80211_hw *hw, if (!ar) return NULL; + /* cleanup the scan vdev if we are done scan on that ar + * and now we want to create for actual usage. + */ + if (ieee80211_vif_is_mld(vif)) { + scan_arvif = wiphy_dereference(hw->wiphy, + ahvif->link[ATH12K_DEFAULT_SCAN_LINK]); + if (scan_arvif && scan_arvif->ar == ar) { + ar->scan.vdev_id = -1; + ath12k_mac_remove_link_interface(hw, scan_arvif); + ath12k_mac_unassign_link_vif(scan_arvif); + } + } + if (arvif->ar) { /* This is not expected really */ if (WARN_ON(!arvif->is_created)) { @@ -8194,7 +8228,7 @@ static void ath12k_mac_op_remove_interface(struct ieee80211_hw *hw, lockdep_assert_wiphy(hw->wiphy); - for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { + for (link_id = 0; link_id < ATH12K_NUM_MAX_LINKS; link_id++) { /* if we cached some config but never received assign chanctx, * free the allocated cache. */ @@ -9042,11 +9076,8 @@ ath12k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, return -ENOMEM; } - if (!arvif->is_started) { - ar = ath12k_mac_assign_vif_to_vdev(hw, arvif, ctx); - if (!ar) - return -EINVAL; - } else { + ar = ath12k_mac_assign_vif_to_vdev(hw, arvif, ctx); + if (!ar) { ath12k_warn(arvif->ar->ab, "failed to assign chanctx for vif %pM link id %u link vif is already started", vif->addr, link_id); return -EINVAL; diff --git a/drivers/net/wireless/ath/ath12k/mac.h b/drivers/net/wireless/ath/ath12k/mac.h index c13630ee479a..abdc9a6c0740 100644 --- a/drivers/net/wireless/ath/ath12k/mac.h +++ b/drivers/net/wireless/ath/ath12k/mac.h @@ -44,6 +44,12 @@ struct ath12k_generic_iter { #define ATH12K_DEFAULT_LINK_ID 0 #define ATH12K_INVALID_LINK_ID 255 +/* Default link after the IEEE802.11 defined Max link id limit + * for driver usage purpose. + */ +#define ATH12K_DEFAULT_SCAN_LINK IEEE80211_MLD_MAX_NUM_LINKS +#define ATH12K_NUM_MAX_LINKS (IEEE80211_MLD_MAX_NUM_LINKS + 1) + enum ath12k_supported_bw { ATH12K_BW_20 = 0, ATH12K_BW_40 = 1, From 1833a2ce5d7df2b064e491d3e912da9fa0b85eb9 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 26 Nov 2024 19:11:36 +0200 Subject: [PATCH 0068/1386] wifi: ath12k: ath12k_bss_assoc(): MLO support Currently, the ath12k_bss_assoc() function handles only deflink station connections. To support multi-link station connections, make the necessary changes to retrieve the required information from the link-level members. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Aditya Kumar Singh Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241126171139.2350704-8-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/mac.c | 28 +++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 5aff5ba7e3b4..2bb5d79c66b1 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -3133,7 +3133,9 @@ static void ath12k_bss_assoc(struct ath12k *ar, struct ath12k_vif *ahvif = arvif->ahvif; struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif); struct ath12k_wmi_vdev_up_params params = {}; - struct ath12k_wmi_peer_assoc_arg peer_arg; + struct ath12k_wmi_peer_assoc_arg peer_arg = {}; + struct ieee80211_link_sta *link_sta; + u8 link_id = bss_conf->link_id; struct ath12k_link_sta *arsta; struct ieee80211_sta *ap_sta; struct ath12k_sta *ahsta; @@ -3143,27 +3145,38 @@ static void ath12k_bss_assoc(struct ath12k *ar, lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); - ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vdev %i assoc bssid %pM aid %d\n", - arvif->vdev_id, arvif->bssid, ahvif->aid); + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, + "mac vdev %i link id %u assoc bssid %pM aid %d\n", + arvif->vdev_id, link_id, arvif->bssid, ahvif->aid); rcu_read_lock(); - ap_sta = ieee80211_find_sta(vif, bss_conf->bssid); + /* During ML connection, cfg.ap_addr has the MLD address. For + * non-ML connection, it has the BSSID. + */ + ap_sta = ieee80211_find_sta(vif, vif->cfg.ap_addr); if (!ap_sta) { ath12k_warn(ar->ab, "failed to find station entry for bss %pM vdev %i\n", - bss_conf->bssid, arvif->vdev_id); + vif->cfg.ap_addr, arvif->vdev_id); rcu_read_unlock(); return; } ahsta = ath12k_sta_to_ahsta(ap_sta); - arsta = &ahsta->deflink; + arsta = wiphy_dereference(ath12k_ar_to_hw(ar)->wiphy, + ahsta->link[link_id]); if (WARN_ON(!arsta)) { rcu_read_unlock(); return; } + link_sta = ath12k_mac_get_link_sta(arsta); + if (WARN_ON(!link_sta)) { + rcu_read_unlock(); + return; + } + ath12k_peer_assoc_prepare(ar, arvif, arsta, &peer_arg, false); rcu_read_unlock(); @@ -3182,8 +3195,7 @@ static void ath12k_bss_assoc(struct ath12k *ar, } ret = ath12k_setup_peer_smps(ar, arvif, bss_conf->bssid, - &ap_sta->deflink.ht_cap, - &ap_sta->deflink.he_6ghz_capa); + &link_sta->ht_cap, &link_sta->he_6ghz_capa); if (ret) { ath12k_warn(ar->ab, "failed to setup peer SMPS for vdev %d: %d\n", arvif->vdev_id, ret); From aa80f12f3bedc2d73e4cc43554aee44c277cc938 Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Tue, 26 Nov 2024 19:11:37 +0200 Subject: [PATCH 0069/1386] wifi: ath12k: defer vdev creation for MLO Currently for single radio devices (ah->num_radio == 1) ath12k_mac_op_add_interface() creates vdev and later hw scan and assign_vif_chanctx uses the same. For MLO, vdev create request should carry ML address which will not be known during ath12k_mac_op_add_interface() as vif will be marked as ML only after links are added to it. If hw scan is requested, the vdev will be deleted post hw scan and subsequent assign_vif_chanctx call will create new vdev with ML address. But in certain cases assign_vif_chanctx could be called without any prior hw scan request and reusing the previously created vdev causes a non-ML vdev to be used for an ML vif and firmware operates the vdev in non-ML mode. Fix this by deferring vdev creation for interface until hw scan or assign_vif_chanctx request is received from mac80211. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Rameshkumar Sundaram Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241126171139.2350704-9-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/mac.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 2bb5d79c66b1..ee804d4a3fd8 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -8131,14 +8131,9 @@ static int ath12k_mac_op_add_interface(struct ieee80211_hw *hw, vif->hw_queue[i] = ATH12K_HW_DEFAULT_QUEUE; vif->driver_flags |= IEEE80211_VIF_SUPPORTS_UAPSD; - /* For non-ml vifs, vif->addr is the actual vdev address but for - * ML vif link(link BSSID) address is the vdev address and it can be a - * different one from vif->addr (i.e ML address). - * Defer vdev creation until assign_chanctx or hw_scan is initiated as driver + /* Defer vdev creation until assign_chanctx or hw_scan is initiated as driver * will not know if this interface is an ML vif at this point. */ - ath12k_mac_assign_vif_to_vdev(hw, arvif, NULL); - return 0; } From ad969bc9ee73fa9eda6223be2a7c0c6caf937d71 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 26 Nov 2024 19:11:38 +0200 Subject: [PATCH 0070/1386] wifi: ath12k: ath12k_mac_op_set_key(): fix uninitialized symbol 'ret' Dan reported that in some cases the ret variable could be uninitialized. Fix that by removing the out label entirely and returning zero explicitly on succesful cases. Also remove the unnecessary else branches to follow more the style used in ath12k and now it's easier to see the error handling. No functional changes. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/all/7e7afd00-ad84-4744-8d94-416bab7e7dd9@stanley.mountain/ Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241126171139.2350704-10-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/mac.c | 59 +++++++++++++++------------ 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index ee804d4a3fd8..ae8a253c466c 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4657,6 +4657,7 @@ static int ath12k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (sta) { ahsta = ath12k_sta_to_ahsta(sta); + /* For an ML STA Pairwise key is same for all associated link Stations, * hence do set key for all link STAs which are active. */ @@ -4679,41 +4680,47 @@ static int ath12k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (ret) break; } - } else { - arsta = &ahsta->deflink; - arvif = arsta->arvif; - if (WARN_ON(!arvif)) { - ret = -EINVAL; - goto out; - } - ret = ath12k_mac_set_key(arvif->ar, cmd, arvif, arsta, key); - } - } else { - if (key->link_id >= 0 && key->link_id < IEEE80211_MLD_MAX_NUM_LINKS) { - link_id = key->link_id; - arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); - } else { - link_id = 0; - arvif = &ahvif->deflink; + return 0; } - if (!arvif || !arvif->is_created) { - cache = ath12k_ahvif_get_link_cache(ahvif, link_id); - if (!cache) - return -ENOSPC; - - ret = ath12k_mac_update_key_cache(cache, cmd, sta, key); + arsta = &ahsta->deflink; + arvif = arsta->arvif; + if (WARN_ON(!arvif)) + return -EINVAL; + ret = ath12k_mac_set_key(arvif->ar, cmd, arvif, arsta, key); + if (ret) return ret; - } - ret = ath12k_mac_set_key(arvif->ar, cmd, arvif, NULL, key); + return 0; } -out: + if (key->link_id >= 0 && key->link_id < IEEE80211_MLD_MAX_NUM_LINKS) { + link_id = key->link_id; + arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); + } else { + link_id = 0; + arvif = &ahvif->deflink; + } - return ret; + if (!arvif || !arvif->is_created) { + cache = ath12k_ahvif_get_link_cache(ahvif, link_id); + if (!cache) + return -ENOSPC; + + ret = ath12k_mac_update_key_cache(cache, cmd, sta, key); + if (ret) + return ret; + + return 0; + } + + ret = ath12k_mac_set_key(arvif->ar, cmd, arvif, NULL, key); + if (ret) + return ret; + + return 0; } static int From 8c2143702d0719a0357600bca0236900781ffc78 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 26 Nov 2024 19:11:39 +0200 Subject: [PATCH 0071/1386] wifi: ath12k: ath12k_mac_op_sta_rc_update(): use mac80211 provided link id There's a todo comment to use mac80211 provided link id. As mac80211 now provides it use it in ath12k and remove the comment. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241126171139.2350704-11-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/mac.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index ae8a253c466c..8d4207707867 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -5696,10 +5696,10 @@ out: return ret; } -static void ath12k_mac_op_sta_rc_update(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_link_sta *link_sta, - u32 changed) +static void ath12k_mac_op_link_sta_rc_update(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_link_sta *link_sta, + u32 changed) { struct ieee80211_sta *sta = link_sta->sta; struct ath12k *ar; @@ -5710,27 +5710,23 @@ static void ath12k_mac_op_sta_rc_update(struct ieee80211_hw *hw, struct ath12k_link_vif *arvif; struct ath12k_peer *peer; u32 bw, smps; - /* TODO: use proper link id once link sta specific rc update support is - * available in mac80211. - */ - u8 link_id = ATH12K_DEFAULT_LINK_ID; rcu_read_lock(); - arvif = rcu_dereference(ahvif->link[link_id]); + arvif = rcu_dereference(ahvif->link[link_sta->link_id]); if (!arvif) { ath12k_hw_warn(ah, "mac sta rc update failed to fetch link vif on link id %u for peer %pM\n", - link_id, sta->addr); + link_sta->link_id, sta->addr); rcu_read_unlock(); return; } ar = arvif->ar; - arsta = rcu_dereference(ahsta->link[link_id]); + arsta = rcu_dereference(ahsta->link[link_sta->link_id]); if (!arsta) { rcu_read_unlock(); ath12k_warn(ar->ab, "mac sta rc update failed to fetch link sta on link id %u for peer %pM\n", - link_id, sta->addr); + link_sta->link_id, sta->addr); return; } spin_lock_bh(&ar->ab->base_lock); @@ -10165,7 +10161,7 @@ static const struct ieee80211_ops ath12k_ops = { .set_rekey_data = ath12k_mac_op_set_rekey_data, .sta_state = ath12k_mac_op_sta_state, .sta_set_txpwr = ath12k_mac_op_sta_set_txpwr, - .link_sta_rc_update = ath12k_mac_op_sta_rc_update, + .link_sta_rc_update = ath12k_mac_op_link_sta_rc_update, .conf_tx = ath12k_mac_op_conf_tx, .set_antenna = ath12k_mac_op_set_antenna, .get_antenna = ath12k_mac_op_get_antenna, From 130727c37b7e2495db10535f6ef00095783ad5a9 Mon Sep 17 00:00:00 2001 From: Pierre-Henry Moussay Date: Mon, 30 Sep 2024 10:54:30 +0100 Subject: [PATCH 0072/1386] dt-bindings: can: mpfs: add PIC64GX CAN compatibility PIC64GX CAN is compatible with the MPFS CAN, only add a fallback Signed-off-by: Pierre-Henry Moussay Acked-by: Conor Dooley Reviewed-by: Marc Kleine-Budde Link: https://patch.msgid.link/20240930095449.1813195-2-pierre-henry.moussay@microchip.com Signed-off-by: Marc Kleine-Budde --- .../devicetree/bindings/net/can/microchip,mpfs-can.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml b/Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml index 01e4d4a54df6..1219c5cb601f 100644 --- a/Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml +++ b/Documentation/devicetree/bindings/net/can/microchip,mpfs-can.yaml @@ -15,7 +15,11 @@ allOf: properties: compatible: - const: microchip,mpfs-can + oneOf: + - items: + - const: microchip,pic64gx-can + - const: microchip,mpfs-can + - const: microchip,mpfs-can reg: maxItems: 1 From 79195755cdebffb085bd2b6c0767272ef39f53bb Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Thu, 28 Nov 2024 09:29:21 +0100 Subject: [PATCH 0073/1386] dt-bindings: can: convert tcan4x5x.txt to DT schema Convert binding doc tcan4x5x.txt to yaml. Added during conversion, required clock-names cclk. Signed-off-by: Sean Nyekjaer Acked-by: Conor Dooley Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20241128-convert-tcan-v3-1-bf2d8005bab5@geanix.com Signed-off-by: Marc Kleine-Budde --- .../devicetree/bindings/net/can/tcan4x5x.txt | 48 ----- .../bindings/net/can/ti,tcan4x5x.yaml | 191 ++++++++++++++++++ 2 files changed, 191 insertions(+), 48 deletions(-) delete mode 100644 Documentation/devicetree/bindings/net/can/tcan4x5x.txt create mode 100644 Documentation/devicetree/bindings/net/can/ti,tcan4x5x.yaml diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt deleted file mode 100644 index 20c0572c9853..000000000000 --- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt +++ /dev/null @@ -1,48 +0,0 @@ -Texas Instruments TCAN4x5x CAN Controller -================================================ - -This file provides device node information for the TCAN4x5x interface contains. - -Required properties: - - compatible: - "ti,tcan4552", "ti,tcan4x5x" - "ti,tcan4553", "ti,tcan4x5x" or - "ti,tcan4x5x" - - reg: 0 - - #address-cells: 1 - - #size-cells: 0 - - spi-max-frequency: Maximum frequency of the SPI bus the chip can - operate at should be less than or equal to 18 MHz. - - interrupt-parent: the phandle to the interrupt controller which provides - the interrupt. - - interrupts: interrupt specification for data-ready. - -See Documentation/devicetree/bindings/net/can/bosch,m_can.yaml for additional -required property details. - -Optional properties: - - reset-gpios: Hardwired output GPIO. If not defined then software - reset. - - device-state-gpios: Input GPIO that indicates if the device is in - a sleep state or if the device is active. Not - available with tcan4552/4553. - - device-wake-gpios: Wake up GPIO to wake up the TCAN device. Not - available with tcan4552/4553. - - wakeup-source: Leave the chip running when suspended, and configure - the RX interrupt to wake up the device. - -Example: -tcan4x5x: tcan4x5x@0 { - compatible = "ti,tcan4x5x"; - reg = <0>; - #address-cells = <1>; - #size-cells = <1>; - spi-max-frequency = <10000000>; - bosch,mram-cfg = <0x0 0 0 16 0 0 1 1>; - interrupt-parent = <&gpio1>; - interrupts = <14 IRQ_TYPE_LEVEL_LOW>; - device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; - device-wake-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>; - reset-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>; - wakeup-source; -}; diff --git a/Documentation/devicetree/bindings/net/can/ti,tcan4x5x.yaml b/Documentation/devicetree/bindings/net/can/ti,tcan4x5x.yaml new file mode 100644 index 000000000000..afd9d315dea2 --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/ti,tcan4x5x.yaml @@ -0,0 +1,191 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/can/ti,tcan4x5x.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Texas Instruments TCAN4x5x CAN Controller + +maintainers: + - Marc Kleine-Budde + +properties: + compatible: + oneOf: + - items: + - enum: + - ti,tcan4552 + - ti,tcan4553 + - const: ti,tcan4x5x + - const: ti,tcan4x5x + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + description: The GPIO parent interrupt. + + clocks: + maxItems: 1 + + clock-names: + items: + - const: cclk + + reset-gpios: + description: Hardwired output GPIO. If not defined then software reset. + maxItems: 1 + + device-state-gpios: + description: + Input GPIO that indicates if the device is in a sleep state or if the + device is active. Not available with tcan4552/4553. + maxItems: 1 + + device-wake-gpios: + description: + Wake up GPIO to wake up the TCAN device. + Not available with tcan4552/4553. + maxItems: 1 + + bosch,mram-cfg: + description: | + Message RAM configuration data. + Multiple M_CAN instances can share the same Message RAM + and each element(e.g Rx FIFO or Tx Buffer and etc) number + in Message RAM is also configurable, so this property is + telling driver how the shared or private Message RAM are + used by this M_CAN controller. + + The format should be as follows: + + The 'offset' is an address offset of the Message RAM where + the following elements start from. This is usually set to + 0x0 if you're using a private Message RAM. The remain cells + are used to specify how many elements are used for each FIFO/Buffer. + + M_CAN includes the following elements according to user manual: + 11-bit Filter 0-128 elements / 0-128 words + 29-bit Filter 0-64 elements / 0-128 words + Rx FIFO 0 0-64 elements / 0-1152 words + Rx FIFO 1 0-64 elements / 0-1152 words + Rx Buffers 0-64 elements / 0-1152 words + Tx Event FIFO 0-32 elements / 0-64 words + Tx Buffers 0-32 elements / 0-576 words + + Please refer to 2.4.1 Message RAM Configuration in Bosch + M_CAN user manual for details. + $ref: /schemas/types.yaml#/definitions/int32-array + items: + - description: The 'offset' is an address offset of the Message RAM where + the following elements start from. This is usually set to 0x0 if + you're using a private Message RAM. + default: 0 + - description: 11-bit Filter 0-128 elements / 0-128 words + minimum: 0 + maximum: 128 + - description: 29-bit Filter 0-64 elements / 0-128 words + minimum: 0 + maximum: 64 + - description: Rx FIFO 0 0-64 elements / 0-1152 words + minimum: 0 + maximum: 64 + - description: Rx FIFO 1 0-64 elements / 0-1152 words + minimum: 0 + maximum: 64 + - description: Rx Buffers 0-64 elements / 0-1152 words + minimum: 0 + maximum: 64 + - description: Tx Event FIFO 0-32 elements / 0-64 words + minimum: 0 + maximum: 32 + - description: Tx Buffers 0-32 elements / 0-576 words + minimum: 0 + maximum: 32 + minItems: 1 + + spi-max-frequency: + description: + Must be half or less of "clocks" frequency. + maximum: 18000000 + + wakeup-source: + $ref: /schemas/types.yaml#/definitions/flag + description: + Enable CAN remote wakeup. + +allOf: + - $ref: can-controller.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# + - if: + properties: + compatible: + contains: + enum: + - ti,tcan4552 + - ti,tcan4553 + then: + properties: + device-state-gpios: false + device-wake-gpios: false + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - bosch,mram-cfg + +unevaluatedProperties: false + +examples: + - | + #include + #include + + spi { + #address-cells = <1>; + #size-cells = <0>; + + can@0 { + compatible = "ti,tcan4x5x"; + reg = <0>; + clocks = <&can0_osc>; + clock-names = "cclk"; + pinctrl-names = "default"; + pinctrl-0 = <&can0_pins>; + spi-max-frequency = <10000000>; + bosch,mram-cfg = <0x0 0 0 16 0 0 1 1>; + interrupt-parent = <&gpio1>; + interrupts = <14 IRQ_TYPE_LEVEL_LOW>; + device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; + device-wake-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>; + wakeup-source; + }; + }; + - | + #include + #include + + spi { + #address-cells = <1>; + #size-cells = <0>; + + can@0 { + compatible = "ti,tcan4552", "ti,tcan4x5x"; + reg = <0>; + clocks = <&can0_osc>; + clock-names = "cclk"; + pinctrl-names = "default"; + pinctrl-0 = <&can0_pins>; + spi-max-frequency = <10000000>; + bosch,mram-cfg = <0x0 0 0 16 0 0 1 1>; + interrupt-parent = <&gpio1>; + interrupts = <14 IRQ_TYPE_LEVEL_LOW>; + reset-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>; + wakeup-source; + }; + }; From 6495567981be6f91eccb48e058ca88dd7acad181 Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Thu, 14 Nov 2024 10:14:49 +0100 Subject: [PATCH 0074/1386] dt-bindings: can: tcan4x5x: Document the ti,nwkrq-voltage-vio option The nWKRQ pin supports an output voltage of either the internal reference voltage (3.6V) or the reference voltage of the digital interface 0-6V (VIO). Add the devicetree option ti,nwkrq-voltage-vio to set it to VIO. If this property is omitted the reset default, the internal reference voltage, is used. Signed-off-by: Sean Nyekjaer Reviewed-by: Rob Herring (Arm) Reviewed-by: Marc Kleine-Budde Link: https://patch.msgid.link/20241114-tcan-wkrqv-v5-1-a2d50833ed71@geanix.com Signed-off-by: Marc Kleine-Budde --- .../devicetree/bindings/net/can/ti,tcan4x5x.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/devicetree/bindings/net/can/ti,tcan4x5x.yaml b/Documentation/devicetree/bindings/net/can/ti,tcan4x5x.yaml index afd9d315dea2..384e15da2713 100644 --- a/Documentation/devicetree/bindings/net/can/ti,tcan4x5x.yaml +++ b/Documentation/devicetree/bindings/net/can/ti,tcan4x5x.yaml @@ -110,6 +110,13 @@ properties: Must be half or less of "clocks" frequency. maximum: 18000000 + ti,nwkrq-voltage-vio: + type: boolean + description: + nWKRQ Pin GPO buffer voltage configuration. + Set nWKRQ to use VIO voltage rail. + When not set nWKRQ will use internal voltage rail. + wakeup-source: $ref: /schemas/types.yaml#/definitions/flag description: @@ -163,6 +170,7 @@ examples: device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; device-wake-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>; reset-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>; + ti,nwkrq-voltage-vio; wakeup-source; }; }; From fc38e9339c47d704934bc74e55c331f0d2d88583 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Fri, 29 Nov 2024 13:20:33 +0200 Subject: [PATCH 0075/1386] wifi: ath12k: Refactor core startup In the upcoming hardware device group abstraction radios across different devices can be grouped together to support multi-link operation and register as a device group to mac80211. Currently, ath12k_mac_allocate() and ath12k_mac_register() are part of ath12k_core_start() and ath12k_core_pdev_create() respectively and are based on per device (struct ath12k_base). These APIs can be decoupled and moved out to ath12k_core_qmi_firmware_ready() itself. This refactor is helpful for device group abstraction when mac80211 allocate and register will be changed from per device (struct ath12k_base) to per device group (struct ath12k_hw_group). Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241128165026.2618331-2-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/core.c | 63 ++++++++++++-------------- drivers/net/wireless/ath/ath12k/dp.c | 7 +++ drivers/net/wireless/ath/ath12k/pci.c | 9 ++++ drivers/net/wireless/ath/ath12k/qmi.c | 4 ++ 4 files changed, 50 insertions(+), 33 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 263a7c789122..5313b0267307 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -604,9 +604,10 @@ static void ath12k_core_stop(struct ath12k_base *ab) ath12k_acpi_stop(ab); + ath12k_dp_rx_pdev_reo_cleanup(ab); ath12k_hif_stop(ab); ath12k_wmi_detach(ab); - ath12k_dp_rx_pdev_reo_cleanup(ab); + ath12k_dp_free(ab); /* De-Init of components as needed */ } @@ -708,7 +709,7 @@ err_qmi_deinit: static void ath12k_core_soc_destroy(struct ath12k_base *ab) { - ath12k_dp_free(ab); + ath12k_hif_power_down(ab, false); ath12k_reg_free(ab); ath12k_debugfs_soc_destroy(ab); ath12k_qmi_deinit_service(ab); @@ -718,30 +719,17 @@ static int ath12k_core_pdev_create(struct ath12k_base *ab) { int ret; - ret = ath12k_mac_register(ab); - if (ret) { - ath12k_err(ab, "failed register the radio with mac80211: %d\n", ret); - return ret; - } - ret = ath12k_dp_pdev_alloc(ab); if (ret) { ath12k_err(ab, "failed to attach DP pdev: %d\n", ret); - goto err_mac_unregister; + return ret; } return 0; - -err_mac_unregister: - ath12k_mac_unregister(ab); - - return ret; } static void ath12k_core_pdev_destroy(struct ath12k_base *ab) { - ath12k_mac_unregister(ab); - ath12k_hif_irq_disable(ab); ath12k_dp_pdev_free(ab); } @@ -799,19 +787,12 @@ static int ath12k_core_start(struct ath12k_base *ab, goto err_hif_stop; } - ret = ath12k_mac_allocate(ab); - if (ret) { - ath12k_err(ab, "failed to create new hw device with mac80211 :%d\n", - ret); - goto err_hif_stop; - } - ath12k_dp_cc_config(ab); ret = ath12k_dp_rx_pdev_reo_setup(ab); if (ret) { ath12k_err(ab, "failed to initialize reo destination rings: %d\n", ret); - goto err_mac_destroy; + goto err_hif_stop; } ath12k_dp_hal_rx_desc_init(ab); @@ -854,8 +835,6 @@ static int ath12k_core_start(struct ath12k_base *ab, err_reo_cleanup: ath12k_dp_rx_pdev_reo_cleanup(ab); -err_mac_destroy: - ath12k_mac_destroy(ab); err_hif_stop: ath12k_hif_stop(ab); err_wmi_detach: @@ -909,28 +888,46 @@ int ath12k_core_qmi_firmware_ready(struct ath12k_base *ab) goto err_dp_free; } + ret = ath12k_mac_allocate(ab); + if (ret) { + ath12k_err(ab, "failed to create new hw device with mac80211 :%d\n", + ret); + goto err_core_stop; + } + + ret = ath12k_mac_register(ab); + if (ret) { + ath12k_err(ab, "failed register the radio with mac80211: %d\n", ret); + goto err_mac_destroy; + } + ret = ath12k_core_pdev_create(ab); if (ret) { ath12k_err(ab, "failed to create pdev core: %d\n", ret); - goto err_core_stop; + goto err_mac_unregister; } + ath12k_hif_irq_enable(ab); ret = ath12k_core_rfkill_config(ab); if (ret && ret != -EOPNOTSUPP) { ath12k_err(ab, "failed to config rfkill: %d\n", ret); - goto err_core_pdev_destroy; + goto err_hif_irq_disable; } mutex_unlock(&ab->core_lock); return 0; -err_core_pdev_destroy: +err_hif_irq_disable: + ath12k_hif_irq_disable(ab); ath12k_core_pdev_destroy(ab); +err_mac_unregister: + ath12k_mac_unregister(ab); +err_mac_destroy: + ath12k_mac_destroy(ab); err_core_stop: ath12k_core_stop(ab); - ath12k_mac_destroy(ab); err_dp_free: ath12k_dp_free(ab); mutex_unlock(&ab->core_lock); @@ -1270,15 +1267,15 @@ void ath12k_core_deinit(struct ath12k_base *ab) mutex_lock(&ab->core_lock); + ath12k_hif_irq_disable(ab); ath12k_core_pdev_destroy(ab); + ath12k_mac_unregister(ab); + ath12k_mac_destroy(ab); ath12k_core_stop(ab); mutex_unlock(&ab->core_lock); - ath12k_hif_power_down(ab, false); - ath12k_mac_destroy(ab); ath12k_core_soc_destroy(ab); - ath12k_fw_unmap(ab); } void ath12k_core_free(struct ath12k_base *ab) diff --git a/drivers/net/wireless/ath/ath12k/dp.c b/drivers/net/wireless/ath/ath12k/dp.c index 328be2c635d6..ce823b1c175f 100644 --- a/drivers/net/wireless/ath/ath12k/dp.c +++ b/drivers/net/wireless/ath/ath12k/dp.c @@ -982,6 +982,9 @@ void ath12k_dp_pdev_free(struct ath12k_base *ab) { int i; + if (!ab->mon_reap_timer.function) + return; + del_timer_sync(&ab->mon_reap_timer); for (i = 0; i < ab->num_radios; i++) @@ -1289,6 +1292,9 @@ void ath12k_dp_free(struct ath12k_base *ab) struct ath12k_dp *dp = &ab->dp; int i; + if (!dp->ab) + return; + ath12k_dp_link_desc_cleanup(ab, dp->link_desc_banks, HAL_WBM_IDLE_LINK, &dp->wbm_idle_ring); @@ -1306,6 +1312,7 @@ void ath12k_dp_free(struct ath12k_base *ab) ath12k_dp_rx_free(ab); /* Deinit any SOC level resource */ + dp->ab = NULL; } void ath12k_dp_cc_config(struct ath12k_base *ab) diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index cf907550e6a4..8dbc7377ae7c 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -1123,6 +1123,9 @@ void ath12k_pci_ext_irq_enable(struct ath12k_base *ab) void ath12k_pci_ext_irq_disable(struct ath12k_base *ab) { + if (!test_bit(ATH12K_FLAG_EXT_IRQ_ENABLED, &ab->dev_flags)) + return; + __ath12k_pci_ext_irq_disable(ab); ath12k_pci_sync_ext_irqs(ab); } @@ -1147,6 +1150,11 @@ int ath12k_pci_hif_resume(struct ath12k_base *ab) void ath12k_pci_stop(struct ath12k_base *ab) { + struct ath12k_pci *ab_pci = ath12k_pci_priv(ab); + + if (!test_bit(ATH12K_PCI_FLAG_INIT_DONE, &ab_pci->flags)) + return; + ath12k_pci_ce_irq_disable_sync(ab); ath12k_ce_cleanup_pipes(ab); } @@ -1725,6 +1733,7 @@ static void ath12k_pci_remove(struct pci_dev *pdev) cancel_work_sync(&ab->reset_work); cancel_work_sync(&ab->dump_work); ath12k_core_deinit(ab); + ath12k_fw_unmap(ab); qmi_fail: ath12k_mhi_unregister(ab_pci); diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index d2d9d03c7a28..f5388eae01dc 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -3402,11 +3402,15 @@ int ath12k_qmi_init_service(struct ath12k_base *ab) void ath12k_qmi_deinit_service(struct ath12k_base *ab) { + if (!ab->qmi.ab) + return; + qmi_handle_release(&ab->qmi.handle); cancel_work_sync(&ab->qmi.event_work); destroy_workqueue(ab->qmi.event_wq); ath12k_qmi_m3_free(ab); ath12k_qmi_free_target_mem_chunk(ab); + ab->qmi.ab = NULL; } void ath12k_qmi_free_resource(struct ath12k_base *ab) From 016abac20b832ce2e2b8afbe2c9ef8158ad1cfe8 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Fri, 29 Nov 2024 13:20:33 +0200 Subject: [PATCH 0076/1386] wifi: ath12k: add ath12k_ab_to_ah() and ath12k_ab_set_ah() Currently, one or more ath12k_hw is part of a device (struct ath12k_base) but in future, it would be part of device group abstraction (struct ath12k_hw_group), i.e., when multiple radios (ar) across different devices can be combined together in a device group (struct ath12k_hw_group). In order to facilitate the above transition, introduce helpers ath12k_ab_to_ah() and ath12k_ab_set_ah() to get and set values of ath12k_hw respectively. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241128165026.2618331-3-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/core.c | 8 ++++---- drivers/net/wireless/ath/ath12k/core.h | 11 +++++++++++ drivers/net/wireless/ath/ath12k/mac.c | 23 +++++++++++++---------- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 5313b0267307..14d0aa26d850 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -986,7 +986,7 @@ static void ath12k_rfkill_work(struct work_struct *work) spin_unlock_bh(&ab->base_lock); for (i = 0; i < ab->num_hw; i++) { - ah = ab->ah[i]; + ah = ath12k_ab_to_ah(ab, i); if (!ah) continue; @@ -1038,7 +1038,7 @@ static void ath12k_core_pre_reconfigure_recovery(struct ath12k_base *ab) set_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags); for (i = 0; i < ab->num_hw; i++) { - ah = ab->ah[i]; + ah = ath12k_ab_to_ah(ab, i); if (!ah || ah->state == ATH12K_HW_STATE_OFF) continue; @@ -1077,7 +1077,7 @@ static void ath12k_core_post_reconfigure_recovery(struct ath12k_base *ab) int i, j; for (i = 0; i < ab->num_hw; i++) { - ah = ab->ah[i]; + ah = ath12k_ab_to_ah(ab, i); if (!ah || ah->state == ATH12K_HW_STATE_OFF) continue; @@ -1131,7 +1131,7 @@ static void ath12k_core_restart(struct work_struct *work) if (ab->is_reset) { for (i = 0; i < ab->num_hw; i++) { - ah = ab->ah[i]; + ah = ath12k_ab_to_ah(ab, i); ieee80211_restart_hw(ah->hw); } } diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index f4a710d49584..ba52be1cfd0f 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -1160,4 +1160,15 @@ static inline struct ieee80211_hw *ath12k_ar_to_hw(struct ath12k *ar) #define for_each_ar(ah, ar, index) \ for ((index) = 0; ((index) < (ah)->num_radio && \ ((ar) = &(ah)->radio[(index)])); (index)++) + +static inline struct ath12k_hw *ath12k_ab_to_ah(struct ath12k_base *ab, int idx) +{ + return ab->ah[idx]; +} + +static inline void ath12k_ab_set_ah(struct ath12k_base *ab, int idx, + struct ath12k_hw *ah) +{ + ab->ah[idx] = ah; +} #endif /* _CORE_H_ */ diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 8d4207707867..8cafb67523c9 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -10832,7 +10832,7 @@ int ath12k_mac_register(struct ath12k_base *ab) ab->free_vdev_map = (1LL << (ab->num_radios * TARGET_NUM_VDEVS)) - 1; for (i = 0; i < ab->num_hw; i++) { - ah = ab->ah[i]; + ah = ath12k_ab_to_ah(ab, i); ret = ath12k_mac_hw_register(ah); if (ret) @@ -10843,7 +10843,7 @@ int ath12k_mac_register(struct ath12k_base *ab) err: for (i = i - 1; i >= 0; i--) { - ah = ab->ah[i]; + ah = ath12k_ab_to_ah(ab, i); if (!ah) continue; @@ -10859,7 +10859,7 @@ void ath12k_mac_unregister(struct ath12k_base *ab) int i; for (i = ab->num_hw - 1; i >= 0; i--) { - ah = ab->ah[i]; + ah = ath12k_ab_to_ah(ab, i); if (!ah) continue; @@ -10917,6 +10917,7 @@ static struct ath12k_hw *ath12k_mac_hw_allocate(struct ath12k_base *ab, void ath12k_mac_destroy(struct ath12k_base *ab) { struct ath12k_pdev *pdev; + struct ath12k_hw *ah; int i; for (i = 0; i < ab->num_radios; i++) { @@ -10928,11 +10929,12 @@ void ath12k_mac_destroy(struct ath12k_base *ab) } for (i = 0; i < ab->num_hw; i++) { - if (!ab->ah[i]) + ah = ath12k_ab_to_ah(ab, i); + if (!ah) continue; - ath12k_mac_hw_destroy(ab->ah[i]); - ab->ah[i] = NULL; + ath12k_mac_hw_destroy(ah); + ath12k_ab_set_ah(ab, i, NULL); } } @@ -10965,7 +10967,7 @@ int ath12k_mac_allocate(struct ath12k_base *ab) ah->dev = ab->dev; - ab->ah[i] = ah; + ath12k_ab_set_ah(ab, i, ah); } ath12k_dp_pdev_pre_alloc(ab); @@ -10974,11 +10976,12 @@ int ath12k_mac_allocate(struct ath12k_base *ab) err: for (i = i - 1; i >= 0; i--) { - if (!ab->ah[i]) + ah = ath12k_ab_to_ah(ab, i); + if (!ah) continue; - ath12k_mac_hw_destroy(ab->ah[i]); - ab->ah[i] = NULL; + ath12k_mac_hw_destroy(ah); + ath12k_ab_set_ah(ab, i, NULL); } return ret; From 17dd22aff52716eda49541bfec71a8f10bd7e514 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Fri, 29 Nov 2024 13:20:33 +0200 Subject: [PATCH 0077/1386] wifi: ath12k: add ath12k_get_num_hw() Currently, one or more struct ath12k_hw is part of device (struct ath12k_base) but in future, ath12k_hw would be part of device group (struct ath12k_hw_group). Hence, num_hw under device would be moved to device group. To facilitate above transition, add helper ath12k_get_num_hw() to get the number of radios per device. In future, this helper will return the number of radios in a device group. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241128165026.2618331-4-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/core.c | 8 ++++---- drivers/net/wireless/ath/ath12k/core.h | 5 +++++ drivers/net/wireless/ath/ath12k/mac.c | 8 ++++---- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 14d0aa26d850..386d42db29ac 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -985,7 +985,7 @@ static void ath12k_rfkill_work(struct work_struct *work) rfkill_radio_on = ab->rfkill_radio_on; spin_unlock_bh(&ab->base_lock); - for (i = 0; i < ab->num_hw; i++) { + for (i = 0; i < ath12k_get_num_hw(ab); i++) { ah = ath12k_ab_to_ah(ab, i); if (!ah) continue; @@ -1037,7 +1037,7 @@ static void ath12k_core_pre_reconfigure_recovery(struct ath12k_base *ab) if (ab->is_reset) set_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags); - for (i = 0; i < ab->num_hw; i++) { + for (i = 0; i < ath12k_get_num_hw(ab); i++) { ah = ath12k_ab_to_ah(ab, i); if (!ah || ah->state == ATH12K_HW_STATE_OFF) continue; @@ -1076,7 +1076,7 @@ static void ath12k_core_post_reconfigure_recovery(struct ath12k_base *ab) struct ath12k *ar; int i, j; - for (i = 0; i < ab->num_hw; i++) { + for (i = 0; i < ath12k_get_num_hw(ab); i++) { ah = ath12k_ab_to_ah(ab, i); if (!ah || ah->state == ATH12K_HW_STATE_OFF) continue; @@ -1130,7 +1130,7 @@ static void ath12k_core_restart(struct work_struct *work) } if (ab->is_reset) { - for (i = 0; i < ab->num_hw; i++) { + for (i = 0; i < ath12k_get_num_hw(ab); i++) { ah = ath12k_ab_to_ah(ab, i); ieee80211_restart_hw(ah->hw); } diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index ba52be1cfd0f..4bfc7a7cc894 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -1171,4 +1171,9 @@ static inline void ath12k_ab_set_ah(struct ath12k_base *ab, int idx, { ab->ah[idx] = ah; } + +static inline int ath12k_get_num_hw(struct ath12k_base *ab) +{ + return ab->num_hw; +} #endif /* _CORE_H_ */ diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 8cafb67523c9..129607ac6c1a 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -10831,7 +10831,7 @@ int ath12k_mac_register(struct ath12k_base *ab) ab->cc_freq_hz = 320000; ab->free_vdev_map = (1LL << (ab->num_radios * TARGET_NUM_VDEVS)) - 1; - for (i = 0; i < ab->num_hw; i++) { + for (i = 0; i < ath12k_get_num_hw(ab); i++) { ah = ath12k_ab_to_ah(ab, i); ret = ath12k_mac_hw_register(ah); @@ -10858,7 +10858,7 @@ void ath12k_mac_unregister(struct ath12k_base *ab) struct ath12k_hw *ah; int i; - for (i = ab->num_hw - 1; i >= 0; i--) { + for (i = ath12k_get_num_hw(ab) - 1; i >= 0; i--) { ah = ath12k_ab_to_ah(ab, i); if (!ah) continue; @@ -10928,7 +10928,7 @@ void ath12k_mac_destroy(struct ath12k_base *ab) pdev->ar = NULL; } - for (i = 0; i < ab->num_hw; i++) { + for (i = 0; i < ath12k_get_num_hw(ab); i++) { ah = ath12k_ab_to_ah(ab, i); if (!ah) continue; @@ -10951,7 +10951,7 @@ int ath12k_mac_allocate(struct ath12k_base *ab) ab->num_hw = ab->num_radios; radio_per_hw = 1; - for (i = 0; i < ab->num_hw; i++) { + for (i = 0; i < ath12k_get_num_hw(ab); i++) { for (j = 0; j < radio_per_hw; j++) { pdev_map[j].ab = ab; pdev_map[j].pdev_idx = (i * radio_per_hw) + j; From 45e72c306c08d59d0dc42238a8571bbbf04823f5 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Fri, 29 Nov 2024 13:20:33 +0200 Subject: [PATCH 0078/1386] wifi: ath12k: introduce QMI firmware ready flag When hardware device group abstraction is introduced, the QMI firmware ready event of different devices in a group can be received simultaneously. To indicate the firmware ready event is completed for a particular device in a group set a flag (ATH12K_FLAG_QMI_FW_READY_COMPLETE). This helps the upcoming hardware recovery implementation for hardware device group abstraction. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241128165026.2618331-5-kvalo@kernel.org --- drivers/net/wireless/ath/ath12k/core.c | 2 +- drivers/net/wireless/ath/ath12k/core.h | 1 + drivers/net/wireless/ath/ath12k/qmi.c | 12 +++++++++--- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 386d42db29ac..4da147f7bfac 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -1145,7 +1145,7 @@ static void ath12k_core_reset(struct work_struct *work) int reset_count, fail_cont_count; long time_left; - if (!(test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags))) { + if (!(test_bit(ATH12K_FLAG_QMI_FW_READY_COMPLETE, &ab->dev_flags))) { ath12k_warn(ab, "ignore reset dev flags 0x%lx\n", ab->dev_flags); return; } diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 4bfc7a7cc894..9ddced140056 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -228,6 +228,7 @@ enum ath12k_dev_flags { ATH12K_FLAG_HTC_SUSPEND_COMPLETE, ATH12K_FLAG_CE_IRQ_ENABLED, ATH12K_FLAG_EXT_IRQ_ENABLED, + ATH12K_FLAG_QMI_FW_READY_COMPLETE, }; struct ath12k_tx_conf { diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index f5388eae01dc..77d8ee14bf33 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -3033,6 +3033,8 @@ void ath12k_qmi_firmware_stop(struct ath12k_base *ab) { int ret; + clear_bit(ATH12K_FLAG_QMI_FW_READY_COMPLETE, &ab->dev_flags); + ret = ath12k_qmi_wlanfw_mode_send(ab, ATH12K_FIRMWARE_MODE_OFF); if (ret < 0) { ath12k_warn(ab, "qmi failed to send wlan mode off\n"); @@ -3336,7 +3338,7 @@ static void ath12k_qmi_driver_event_work(struct work_struct *work) break; case ATH12K_QMI_EVENT_FW_READY: clear_bit(ATH12K_FLAG_QMI_FAIL, &ab->dev_flags); - if (test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags)) { + if (test_bit(ATH12K_FLAG_QMI_FW_READY_COMPLETE, &ab->dev_flags)) { if (ab->is_reset) ath12k_hal_dump_srng_stats(ab); queue_work(ab->workqueue, &ab->restart_work); @@ -3346,8 +3348,12 @@ static void ath12k_qmi_driver_event_work(struct work_struct *work) clear_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags); clear_bit(ATH12K_FLAG_RECOVERY, &ab->dev_flags); - ath12k_core_qmi_firmware_ready(ab); - set_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); + ret = ath12k_core_qmi_firmware_ready(ab); + if (!ret) { + set_bit(ATH12K_FLAG_QMI_FW_READY_COMPLETE, + &ab->dev_flags); + set_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); + } break; default: From b32913a5609a36c230e9b091da26d38f8e80a056 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Sat, 30 Nov 2024 15:53:49 +0100 Subject: [PATCH 0079/1386] ptp: Switch back to struct platform_driver::remove() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 0edb555a65d1 ("platform: Make platform_driver::remove() return void") .remove() is (again) the right callback to implement for platform drivers. Convert all platform drivers below drivers/ptp to use .remove(), with the eventual goal to drop struct platform_driver::remove_new(). As .remove() and .remove_new() have the same prototypes, conversion is done by just changing the structure member name in the driver initializer. While touching these drivers, make the alignment of the touched initializers consistent. Signed-off-by: Uwe Kleine-König Acked-by: Richard Cochran Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_clockmatrix.c | 2 +- drivers/ptp/ptp_dte.c | 4 ++-- drivers/ptp/ptp_fc3.c | 2 +- drivers/ptp/ptp_idt82p33.c | 2 +- drivers/ptp/ptp_ines.c | 4 ++-- drivers/ptp/ptp_qoriq.c | 2 +- drivers/ptp/ptp_vmclock.c | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c index b6f1941308b1..fbb3fa8fc60b 100644 --- a/drivers/ptp/ptp_clockmatrix.c +++ b/drivers/ptp/ptp_clockmatrix.c @@ -2471,7 +2471,7 @@ static struct platform_driver idtcm_driver = { .name = "8a3400x-phc", }, .probe = idtcm_probe, - .remove_new = idtcm_remove, + .remove = idtcm_remove, }; module_platform_driver(idtcm_driver); diff --git a/drivers/ptp/ptp_dte.c b/drivers/ptp/ptp_dte.c index 449ff90927be..372168578a30 100644 --- a/drivers/ptp/ptp_dte.c +++ b/drivers/ptp/ptp_dte.c @@ -326,8 +326,8 @@ static struct platform_driver ptp_dte_driver = { .pm = PTP_DTE_PM_OPS, .of_match_table = ptp_dte_of_match, }, - .probe = ptp_dte_probe, - .remove_new = ptp_dte_remove, + .probe = ptp_dte_probe, + .remove = ptp_dte_remove, }; module_platform_driver(ptp_dte_driver); diff --git a/drivers/ptp/ptp_fc3.c b/drivers/ptp/ptp_fc3.c index 879b82f03535..cfced36c70bc 100644 --- a/drivers/ptp/ptp_fc3.c +++ b/drivers/ptp/ptp_fc3.c @@ -1003,7 +1003,7 @@ static struct platform_driver idtfc3_driver = { .name = "rc38xxx-phc", }, .probe = idtfc3_probe, - .remove_new = idtfc3_remove, + .remove = idtfc3_remove, }; module_platform_driver(idtfc3_driver); diff --git a/drivers/ptp/ptp_idt82p33.c b/drivers/ptp/ptp_idt82p33.c index d5732490ed9d..b2fd94d4f863 100644 --- a/drivers/ptp/ptp_idt82p33.c +++ b/drivers/ptp/ptp_idt82p33.c @@ -1461,7 +1461,7 @@ static struct platform_driver idt82p33_driver = { .name = "82p33x1x-phc", }, .probe = idt82p33_probe, - .remove_new = idt82p33_remove, + .remove = idt82p33_remove, }; module_platform_driver(idt82p33_driver); diff --git a/drivers/ptp/ptp_ines.c b/drivers/ptp/ptp_ines.c index 14a23d3a27f2..3d723a2aa6bb 100644 --- a/drivers/ptp/ptp_ines.c +++ b/drivers/ptp/ptp_ines.c @@ -781,8 +781,8 @@ static const struct of_device_id ines_ptp_ctrl_of_match[] = { MODULE_DEVICE_TABLE(of, ines_ptp_ctrl_of_match); static struct platform_driver ines_ptp_ctrl_driver = { - .probe = ines_ptp_ctrl_probe, - .remove_new = ines_ptp_ctrl_remove, + .probe = ines_ptp_ctrl_probe, + .remove = ines_ptp_ctrl_remove, .driver = { .name = "ines_ptp_ctrl", .of_match_table = ines_ptp_ctrl_of_match, diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index 879cfc1537ac..4d488c1f1941 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -670,7 +670,7 @@ static struct platform_driver ptp_qoriq_driver = { .of_match_table = match_table, }, .probe = ptp_qoriq_probe, - .remove_new = ptp_qoriq_remove, + .remove = ptp_qoriq_remove, }; module_platform_driver(ptp_qoriq_driver); diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index cdca8a3ad1aa..0a2cfc8ad3c5 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -601,7 +601,7 @@ MODULE_DEVICE_TABLE(acpi, vmclock_acpi_ids); static struct platform_driver vmclock_platform_driver = { .probe = vmclock_probe, - .remove_new = vmclock_remove, + .remove = vmclock_remove, .driver = { .name = "vmclock", .acpi_match_table = vmclock_acpi_ids, From e8e7be7d212dc2bc83b8151e51088666a6c42092 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 2 Dec 2024 09:27:13 +0100 Subject: [PATCH 0080/1386] mctp i2c: drop check because i2c_unregister_device() is NULL safe No need to check the argument of i2c_unregister_device() because the function itself does it. Signed-off-by: Wolfram Sang Link: https://patch.msgid.link/20241202082713.9719-1-wsa+renesas@sang-engineering.com Signed-off-by: Paolo Abeni --- drivers/net/mctp/mctp-i2c.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c index d2b3f5a59141..e3dcdeacc12c 100644 --- a/drivers/net/mctp/mctp-i2c.c +++ b/drivers/net/mctp/mctp-i2c.c @@ -177,8 +177,7 @@ static struct mctp_i2c_client *mctp_i2c_new_client(struct i2c_client *client) return mcli; err: if (mcli) { - if (mcli->client) - i2c_unregister_device(mcli->client); + i2c_unregister_device(mcli->client); kfree(mcli); } return ERR_PTR(rc); From ebf7f7d616818f2841c8aece14084e87d59bd8c7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 3 Dec 2024 17:16:43 -0800 Subject: [PATCH 0081/1386] Revert "ptp: Switch back to struct platform_driver::remove()" This reverts commit b32913a5609a36c230e9b091da26d38f8e80a056. Linus applied directly commit e70140ba0d2b ("Get rid of 'remove_new' relic from platform driver struct"), drop our local change to avoid conflicts. Link: https://lore.kernel.org/CAMuHMdV3J=o2x9G=1t_y97iv9eLsPfiej108vU6JHnn=AR-Nvw@mail.gmail.com Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_clockmatrix.c | 2 +- drivers/ptp/ptp_dte.c | 4 ++-- drivers/ptp/ptp_fc3.c | 2 +- drivers/ptp/ptp_idt82p33.c | 2 +- drivers/ptp/ptp_ines.c | 4 ++-- drivers/ptp/ptp_qoriq.c | 2 +- drivers/ptp/ptp_vmclock.c | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c index fbb3fa8fc60b..b6f1941308b1 100644 --- a/drivers/ptp/ptp_clockmatrix.c +++ b/drivers/ptp/ptp_clockmatrix.c @@ -2471,7 +2471,7 @@ static struct platform_driver idtcm_driver = { .name = "8a3400x-phc", }, .probe = idtcm_probe, - .remove = idtcm_remove, + .remove_new = idtcm_remove, }; module_platform_driver(idtcm_driver); diff --git a/drivers/ptp/ptp_dte.c b/drivers/ptp/ptp_dte.c index 372168578a30..449ff90927be 100644 --- a/drivers/ptp/ptp_dte.c +++ b/drivers/ptp/ptp_dte.c @@ -326,8 +326,8 @@ static struct platform_driver ptp_dte_driver = { .pm = PTP_DTE_PM_OPS, .of_match_table = ptp_dte_of_match, }, - .probe = ptp_dte_probe, - .remove = ptp_dte_remove, + .probe = ptp_dte_probe, + .remove_new = ptp_dte_remove, }; module_platform_driver(ptp_dte_driver); diff --git a/drivers/ptp/ptp_fc3.c b/drivers/ptp/ptp_fc3.c index cfced36c70bc..879b82f03535 100644 --- a/drivers/ptp/ptp_fc3.c +++ b/drivers/ptp/ptp_fc3.c @@ -1003,7 +1003,7 @@ static struct platform_driver idtfc3_driver = { .name = "rc38xxx-phc", }, .probe = idtfc3_probe, - .remove = idtfc3_remove, + .remove_new = idtfc3_remove, }; module_platform_driver(idtfc3_driver); diff --git a/drivers/ptp/ptp_idt82p33.c b/drivers/ptp/ptp_idt82p33.c index b2fd94d4f863..d5732490ed9d 100644 --- a/drivers/ptp/ptp_idt82p33.c +++ b/drivers/ptp/ptp_idt82p33.c @@ -1461,7 +1461,7 @@ static struct platform_driver idt82p33_driver = { .name = "82p33x1x-phc", }, .probe = idt82p33_probe, - .remove = idt82p33_remove, + .remove_new = idt82p33_remove, }; module_platform_driver(idt82p33_driver); diff --git a/drivers/ptp/ptp_ines.c b/drivers/ptp/ptp_ines.c index 3d723a2aa6bb..14a23d3a27f2 100644 --- a/drivers/ptp/ptp_ines.c +++ b/drivers/ptp/ptp_ines.c @@ -781,8 +781,8 @@ static const struct of_device_id ines_ptp_ctrl_of_match[] = { MODULE_DEVICE_TABLE(of, ines_ptp_ctrl_of_match); static struct platform_driver ines_ptp_ctrl_driver = { - .probe = ines_ptp_ctrl_probe, - .remove = ines_ptp_ctrl_remove, + .probe = ines_ptp_ctrl_probe, + .remove_new = ines_ptp_ctrl_remove, .driver = { .name = "ines_ptp_ctrl", .of_match_table = ines_ptp_ctrl_of_match, diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c index 4d488c1f1941..879cfc1537ac 100644 --- a/drivers/ptp/ptp_qoriq.c +++ b/drivers/ptp/ptp_qoriq.c @@ -670,7 +670,7 @@ static struct platform_driver ptp_qoriq_driver = { .of_match_table = match_table, }, .probe = ptp_qoriq_probe, - .remove = ptp_qoriq_remove, + .remove_new = ptp_qoriq_remove, }; module_platform_driver(ptp_qoriq_driver); diff --git a/drivers/ptp/ptp_vmclock.c b/drivers/ptp/ptp_vmclock.c index 0a2cfc8ad3c5..cdca8a3ad1aa 100644 --- a/drivers/ptp/ptp_vmclock.c +++ b/drivers/ptp/ptp_vmclock.c @@ -601,7 +601,7 @@ MODULE_DEVICE_TABLE(acpi, vmclock_acpi_ids); static struct platform_driver vmclock_platform_driver = { .probe = vmclock_probe, - .remove = vmclock_remove, + .remove_new = vmclock_remove, .driver = { .name = "vmclock", .acpi_match_table = vmclock_acpi_ids, From 2e20bf8cc05766dcd0357cdfcada49e1bc45512b Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 2 Dec 2024 21:14:35 +0100 Subject: [PATCH 0082/1386] r8169: remove unused flag RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE After 854d71c555dfc3 ("r8169: remove original workaround for RTL8125 broken rx issue") this flag isn't used any longer. So remove it. Signed-off-by: Heiner Kallweit Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/d9dd214b-3027-4f60-b0e8-6f34a0c76582@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 739707a7b40f..4b96b4ad81b9 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -623,7 +623,6 @@ struct rtl8169_tc_offsets { enum rtl_flag { RTL_FLAG_TASK_RESET_PENDING, - RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE, RTL_FLAG_TASK_TX_TIMEOUT, RTL_FLAG_MAX }; @@ -4723,8 +4722,6 @@ static void rtl_task(struct work_struct *work) reset: rtl_reset_work(tp); netif_wake_queue(tp->dev); - } else if (test_and_clear_bit(RTL_FLAG_TASK_RESET_NO_QUEUE_WAKE, tp->wk.flags)) { - rtl_reset_work(tp); } } From bb18265c3aba92b91a1355609769f3e967b65dee Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 2 Dec 2024 21:20:02 +0100 Subject: [PATCH 0083/1386] r8169: remove support for chip version 11 This is a follow-up to 982300c115d2 ("r8169: remove detection of chip version 11 (early RTL8168b)"). Nobody complained yet, so remove support for this chip version. Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Link: https://patch.msgid.link/b689ab6d-20b5-4b64-bd7e-531a0a972ba3@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169.h | 2 +- drivers/net/ethernet/realtek/r8169_main.c | 14 +------------- drivers/net/ethernet/realtek/r8169_phy_config.c | 10 ---------- 3 files changed, 2 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h index be4c9622618d..8904aae41aca 100644 --- a/drivers/net/ethernet/realtek/r8169.h +++ b/drivers/net/ethernet/realtek/r8169.h @@ -23,7 +23,7 @@ enum mac_version { RTL_GIGA_MAC_VER_08, RTL_GIGA_MAC_VER_09, RTL_GIGA_MAC_VER_10, - RTL_GIGA_MAC_VER_11, + /* support for RTL_GIGA_MAC_VER_11 has been removed */ /* RTL_GIGA_MAC_VER_12 was handled the same as VER_17 */ /* RTL_GIGA_MAC_VER_13 was merged with VER_10 */ RTL_GIGA_MAC_VER_14, diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 4b96b4ad81b9..cc14cd540f74 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -104,7 +104,6 @@ static const struct { [RTL_GIGA_MAC_VER_08] = {"RTL8102e" }, [RTL_GIGA_MAC_VER_09] = {"RTL8102e/RTL8103e" }, [RTL_GIGA_MAC_VER_10] = {"RTL8101e/RTL8100e" }, - [RTL_GIGA_MAC_VER_11] = {"RTL8168b/8111b" }, [RTL_GIGA_MAC_VER_14] = {"RTL8401" }, [RTL_GIGA_MAC_VER_17] = {"RTL8168b/8111b" }, [RTL_GIGA_MAC_VER_18] = {"RTL8168cp/8111cp" }, @@ -2335,7 +2334,7 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii) /* 8168B family. */ { 0x7c8, 0x380, RTL_GIGA_MAC_VER_17 }, - /* This one is very old and rare, let's see if anybody complains. + /* This one is very old and rare, support has been removed. * { 0x7c8, 0x300, RTL_GIGA_MAC_VER_11 }, */ @@ -3803,7 +3802,6 @@ static void rtl_hw_config(struct rtl8169_private *tp) [RTL_GIGA_MAC_VER_08] = rtl_hw_start_8102e_3, [RTL_GIGA_MAC_VER_09] = rtl_hw_start_8102e_2, [RTL_GIGA_MAC_VER_10] = NULL, - [RTL_GIGA_MAC_VER_11] = rtl_hw_start_8168b, [RTL_GIGA_MAC_VER_14] = rtl_hw_start_8401, [RTL_GIGA_MAC_VER_17] = rtl_hw_start_8168b, [RTL_GIGA_MAC_VER_18] = rtl_hw_start_8168cp_1, @@ -4679,12 +4677,6 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) if (status & LinkChg) phy_mac_interrupt(tp->phydev); - if (unlikely(status & RxFIFOOver && - tp->mac_version == RTL_GIGA_MAC_VER_11)) { - netif_stop_queue(tp->dev); - rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING); - } - rtl_irq_disable(tp); napi_schedule(&tp->napi); out: @@ -5100,9 +5092,6 @@ static void rtl_set_irq_mask(struct rtl8169_private *tp) if (tp->mac_version <= RTL_GIGA_MAC_VER_06) tp->irq_mask |= SYSErr | RxFIFOOver; - else if (tp->mac_version == RTL_GIGA_MAC_VER_11) - /* special workaround needed */ - tp->irq_mask |= RxFIFOOver; } static int rtl_alloc_irq(struct rtl8169_private *tp) @@ -5297,7 +5286,6 @@ static int rtl_jumbo_max(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06: return JUMBO_7K; /* RTL8168b */ - case RTL_GIGA_MAC_VER_11: case RTL_GIGA_MAC_VER_17: return JUMBO_4K; /* RTL8168c */ diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c index 5307c6ff4e25..b28b30390e84 100644 --- a/drivers/net/ethernet/realtek/r8169_phy_config.c +++ b/drivers/net/ethernet/realtek/r8169_phy_config.c @@ -276,15 +276,6 @@ static void rtl8169sce_hw_phy_config(struct rtl8169_private *tp, rtl_writephy_batch(phydev, phy_reg_init); } -static void rtl8168bb_hw_phy_config(struct rtl8169_private *tp, - struct phy_device *phydev) -{ - phy_write(phydev, 0x1f, 0x0001); - phy_set_bits(phydev, 0x16, BIT(0)); - phy_write(phydev, 0x10, 0xf41b); - phy_write(phydev, 0x1f, 0x0000); -} - static void rtl8168bef_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev) { @@ -1136,7 +1127,6 @@ void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev, [RTL_GIGA_MAC_VER_08] = rtl8102e_hw_phy_config, [RTL_GIGA_MAC_VER_09] = rtl8102e_hw_phy_config, [RTL_GIGA_MAC_VER_10] = NULL, - [RTL_GIGA_MAC_VER_11] = rtl8168bb_hw_phy_config, [RTL_GIGA_MAC_VER_14] = rtl8401_hw_phy_config, [RTL_GIGA_MAC_VER_17] = rtl8168bef_hw_phy_config, [RTL_GIGA_MAC_VER_18] = rtl8168cp_1_hw_phy_config, From 1b5b7f3d29dc705bdeb3d2663df1b4617276491a Mon Sep 17 00:00:00 2001 From: Aloka Dixit Date: Wed, 27 Nov 2024 10:02:55 -0800 Subject: [PATCH 0084/1386] wifi: mac80211: fix variable used in for_each_sdata_link() Macro for_each_sdata_link() accepts input '_local' but uses 'local' in its processing. This currently works because all the functions calling this macro have declared 'local' as a variable themselves. But this results in compilation error when a new caller uses 'sdata->local' instead of declaring 'local' variable. Use '_local' instead of 'local' in for_each_sdata_link(). Signed-off-by: Aloka Dixit Link: https://patch.msgid.link/20241127180255.1460553-1-quic_alokad@quicinc.com Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a00096dd787b..534a20054151 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1206,7 +1206,7 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) for (int ___link_id = 0; \ ___link_id < ARRAY_SIZE(___sdata->link); \ ___link_id++) \ - if ((_link = wiphy_dereference((local)->hw.wiphy, \ + if ((_link = wiphy_dereference((_local)->hw.wiphy, \ ___sdata->link[___link_id]))) static inline int From bee404e14477917c2e15f78b2ad1ea443939720c Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Tue, 3 Dec 2024 22:28:50 +0200 Subject: [PATCH 0085/1386] wifi: mac80211: Accept authentication frames on P2P device This is needed for PASN based P2P pairing. Signed-off-by: Andrei Otcheretianski Reviewed-by: Miriam Rachel Korenblit Link: https://patch.msgid.link/20241203222744.6ee3ef9f1374.Ib3074ffbe7b296e0f162b2543e84346b190dfbeb@changeid Signed-off-by: Johannes Berg --- net/mac80211/main.c | 9 +++++++-- net/mac80211/rx.c | 4 +++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index ee1211a213d7..f13c14fa82e8 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -5,7 +5,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation */ #include @@ -726,8 +726,13 @@ ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = { }, [NL80211_IFTYPE_P2P_DEVICE] = { .tx = 0xffff, + /* + * To support P2P PASN pairing let user space register to rx + * also AUTH frames on P2P device interface. + */ .rx = BIT(IEEE80211_STYPE_ACTION >> 4) | - BIT(IEEE80211_STYPE_PROBE_REQ >> 4), + BIT(IEEE80211_STYPE_PROBE_REQ >> 4) | + BIT(IEEE80211_STYPE_AUTH >> 4), }, }; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 2bec18fc1b03..58c1b9a4e8b5 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4562,7 +4562,9 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) return ieee80211_is_public_action(hdr, skb->len) || ieee80211_is_probe_req(hdr->frame_control) || ieee80211_is_probe_resp(hdr->frame_control) || - ieee80211_is_beacon(hdr->frame_control); + ieee80211_is_beacon(hdr->frame_control) || + (ieee80211_is_auth(hdr->frame_control) && + ether_addr_equal(sdata->vif.addr, hdr->addr1)); case NL80211_IFTYPE_NAN: /* Currently no frames on NAN interface are allowed */ return false; From f42d22d3f79639c1b4e41daf28dad2505d6a5a8b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 Nov 2024 09:42:25 +0100 Subject: [PATCH 0086/1386] wifi: cfg80211: define and use wiphy guard Define a guard for the wiphy mutex, and use it in most code in cfg80211, though not all due to some interaction with RTNL and/or indentation. Suggested-by: Jeff Johnson Reviewed-by: Jeff Johnson Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20241122094225.88765cbaab65.I610c9b14f36902e75e1d13f0db29f8bef2298804@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 + net/wireless/chan.c | 4 +- net/wireless/core.c | 42 +++-- net/wireless/mlme.c | 8 +- net/wireless/nl80211.c | 190 +++++++++------------- net/wireless/pmsr.c | 4 +- net/wireless/reg.c | 53 +++---- net/wireless/scan.c | 40 +++-- net/wireless/sme.c | 12 +- net/wireless/util.c | 7 +- net/wireless/wext-compat.c | 317 +++++++++++++------------------------ net/wireless/wext-sme.c | 43 ++--- 12 files changed, 277 insertions(+), 447 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 27acf1292a5c..63e79a22a214 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6031,6 +6031,10 @@ static inline void wiphy_unlock(struct wiphy *wiphy) mutex_unlock(&wiphy->mtx); } +DEFINE_GUARD(wiphy, struct wiphy *, + mutex_lock(&_T->mtx), + mutex_unlock(&_T->mtx)) + struct wiphy_work; typedef void (*wiphy_work_func_t)(struct wiphy *, struct wiphy_work *); diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 40b6375a5de4..833ea73053a4 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -1039,10 +1039,10 @@ bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, if (!reg_dfs_domain_same(wiphy, &rdev->wiphy)) continue; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + found = cfg80211_is_wiphy_oper_chan(&rdev->wiphy, chan) || cfg80211_offchan_chain_is_active(rdev, chan); - wiphy_unlock(&rdev->wiphy); if (found) return true; diff --git a/net/wireless/core.c b/net/wireless/core.c index afbdc549fb4a..70857018f020 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -191,7 +191,8 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, return err; } - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) continue; @@ -212,7 +213,6 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, continue; nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); } - wiphy_unlock(&rdev->wiphy); return 0; } @@ -221,9 +221,9 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) { struct cfg80211_registered_device *rdev = data; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + rdev_rfkill_poll(rdev); - wiphy_unlock(&rdev->wiphy); } void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, @@ -283,7 +283,7 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) /* otherwise, check iftype */ - wiphy_lock(wiphy); + guard(wiphy)(wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: @@ -295,8 +295,6 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) default: break; } - - wiphy_unlock(wiphy); } } EXPORT_SYMBOL_GPL(cfg80211_shutdown_all_interfaces); @@ -331,9 +329,9 @@ static void cfg80211_event_work(struct work_struct *work) rdev = container_of(work, struct cfg80211_registered_device, event_work); - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + cfg80211_process_rdev_events(rdev); - wiphy_unlock(&rdev->wiphy); } void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) @@ -347,10 +345,10 @@ void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) if (wdev->netdev) dev_close(wdev->netdev); - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + cfg80211_leave(rdev, wdev); cfg80211_remove_virtual_intf(rdev, wdev); - wiphy_unlock(&rdev->wiphy); } } } @@ -423,9 +421,9 @@ static void cfg80211_wiphy_work(struct work_struct *work) trace_wiphy_work_worker_start(&rdev->wiphy); - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); if (rdev->suspended) - goto out; + return; spin_lock_irq(&rdev->wiphy_work_lock); wk = list_first_entry_or_null(&rdev->wiphy_work_list, @@ -441,8 +439,6 @@ static void cfg80211_wiphy_work(struct work_struct *work) } else { spin_unlock_irq(&rdev->wiphy_work_lock); } -out: - wiphy_unlock(&rdev->wiphy); } /* exported functions */ @@ -1526,9 +1522,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; case NETDEV_REGISTER: if (!wdev->registered) { - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + cfg80211_register_wdev(rdev, wdev); - wiphy_unlock(&rdev->wiphy); } break; case NETDEV_UNREGISTER: @@ -1537,16 +1533,16 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, * so check wdev->registered. */ if (wdev->registered && !wdev->registering) { - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + _cfg80211_unregister_wdev(wdev, false); - wiphy_unlock(&rdev->wiphy); } break; case NETDEV_GOING_DOWN: - wiphy_lock(&rdev->wiphy); - cfg80211_leave(rdev, wdev); - cfg80211_remove_links(wdev); - wiphy_unlock(&rdev->wiphy); + scoped_guard(wiphy, &rdev->wiphy) { + cfg80211_leave(rdev, wdev); + cfg80211_remove_links(wdev); + } /* since we just did cfg80211_leave() nothing to do there */ cancel_work_sync(&wdev->disconnect_wk); cancel_work_sync(&wdev->pmsr_free_wk); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index a5eb92d93074..9d577523462d 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -627,10 +627,10 @@ void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk) rdev = container_of(wk, struct cfg80211_registered_device, mgmt_registrations_update_wk); - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) cfg80211_mgmt_registrations_update(wdev); - wiphy_unlock(&rdev->wiphy); } int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, @@ -1193,10 +1193,10 @@ cfg80211_background_cac_event(struct cfg80211_registered_device *rdev, const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event) { - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + __cfg80211_background_cac_event(rdev, rdev->background_radar_wdev, chandef, event); - wiphy_unlock(&rdev->wiphy); } void cfg80211_background_cac_done_wk(struct work_struct *work) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9d2edb71f981..9590f9bd2ec0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3626,7 +3626,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } else wdev = netdev->ieee80211_ptr; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); /* * end workaround code, by now the rdev is available @@ -3639,32 +3639,24 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rtnl_unlock(); if (result) - goto out; + return result; if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) { struct ieee80211_txq_params txq_params; struct nlattr *tb[NL80211_TXQ_ATTR_MAX + 1]; - if (!rdev->ops->set_txq_params) { - result = -EOPNOTSUPP; - goto out; - } + if (!rdev->ops->set_txq_params) + return -EOPNOTSUPP; - if (!netdev) { - result = -EINVAL; - goto out; - } + if (!netdev) + return -EINVAL; if (netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { - result = -EINVAL; - goto out; - } + netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EINVAL; - if (!netif_running(netdev)) { - result = -ENETDOWN; - goto out; - } + if (!netif_running(netdev)) + return -ENETDOWN; nla_for_each_nested(nl_txq_params, info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], @@ -3675,10 +3667,11 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) txq_params_policy, info->extack); if (result) - goto out; + return result; + result = parse_txq_params(tb, &txq_params); if (result) - goto out; + return result; txq_params.link_id = nl80211_link_id_or_invalid(info->attrs); @@ -3694,7 +3687,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) result = rdev_set_txq_params(rdev, netdev, &txq_params); if (result) - goto out; + return result; } } @@ -3711,7 +3704,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } if (result) - goto out; + return result; } if (info->attrs[NL80211_ATTR_WIPHY_TX_POWER_SETTING]) { @@ -3722,19 +3715,15 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (!(rdev->wiphy.features & NL80211_FEATURE_VIF_TXPOWER)) txp_wdev = NULL; - if (!rdev->ops->set_tx_power) { - result = -EOPNOTSUPP; - goto out; - } + if (!rdev->ops->set_tx_power) + return -EOPNOTSUPP; idx = NL80211_ATTR_WIPHY_TX_POWER_SETTING; type = nla_get_u32(info->attrs[idx]); if (!info->attrs[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] && - (type != NL80211_TX_POWER_AUTOMATIC)) { - result = -EINVAL; - goto out; - } + (type != NL80211_TX_POWER_AUTOMATIC)) + return -EINVAL; if (type != NL80211_TX_POWER_AUTOMATIC) { idx = NL80211_ATTR_WIPHY_TX_POWER_LEVEL; @@ -3743,7 +3732,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) result = rdev_set_tx_power(rdev, txp_wdev, type, mbm); if (result) - goto out; + return result; } if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] && @@ -3752,10 +3741,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if ((!rdev->wiphy.available_antennas_tx && !rdev->wiphy.available_antennas_rx) || - !rdev->ops->set_antenna) { - result = -EOPNOTSUPP; - goto out; - } + !rdev->ops->set_antenna) + return -EOPNOTSUPP; tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]); rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]); @@ -3763,17 +3750,15 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) /* reject antenna configurations which don't match the * available antenna masks, except for the "all" mask */ if ((~tx_ant && (tx_ant & ~rdev->wiphy.available_antennas_tx)) || - (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas_rx))) { - result = -EINVAL; - goto out; - } + (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas_rx))) + return -EINVAL; tx_ant = tx_ant & rdev->wiphy.available_antennas_tx; rx_ant = rx_ant & rdev->wiphy.available_antennas_rx; result = rdev_set_antenna(rdev, tx_ant, rx_ant); if (result) - goto out; + return result; } changed = 0; @@ -3795,10 +3780,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]) { frag_threshold = nla_get_u32( info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]); - if (frag_threshold < 256) { - result = -EINVAL; - goto out; - } + if (frag_threshold < 256) + return -EINVAL; if (frag_threshold != (u32) -1) { /* @@ -3819,10 +3802,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]) { - if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) { - result = -EINVAL; - goto out; - } + if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) + return -EINVAL; coverage_class = nla_get_u8( info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]); @@ -3830,20 +3811,17 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) { - if (!(rdev->wiphy.features & NL80211_FEATURE_ACKTO_ESTIMATION)) { - result = -EOPNOTSUPP; - goto out; - } + if (!(rdev->wiphy.features & NL80211_FEATURE_ACKTO_ESTIMATION)) + return -EOPNOTSUPP; changed |= WIPHY_PARAM_DYN_ACK; } if (info->attrs[NL80211_ATTR_TXQ_LIMIT]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_TXQS)) { - result = -EOPNOTSUPP; - goto out; - } + NL80211_EXT_FEATURE_TXQS)) + return -EOPNOTSUPP; + txq_limit = nla_get_u32( info->attrs[NL80211_ATTR_TXQ_LIMIT]); changed |= WIPHY_PARAM_TXQ_LIMIT; @@ -3851,10 +3829,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_TXQ_MEMORY_LIMIT]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_TXQS)) { - result = -EOPNOTSUPP; - goto out; - } + NL80211_EXT_FEATURE_TXQS)) + return -EOPNOTSUPP; + txq_memory_limit = nla_get_u32( info->attrs[NL80211_ATTR_TXQ_MEMORY_LIMIT]); changed |= WIPHY_PARAM_TXQ_MEMORY_LIMIT; @@ -3862,10 +3839,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_TXQ_QUANTUM]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_TXQS)) { - result = -EOPNOTSUPP; - goto out; - } + NL80211_EXT_FEATURE_TXQS)) + return -EOPNOTSUPP; + txq_quantum = nla_get_u32( info->attrs[NL80211_ATTR_TXQ_QUANTUM]); changed |= WIPHY_PARAM_TXQ_QUANTUM; @@ -3877,10 +3853,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) u8 old_coverage_class; u32 old_txq_limit, old_txq_memory_limit, old_txq_quantum; - if (!rdev->ops->set_wiphy_params) { - result = -EOPNOTSUPP; - goto out; - } + if (!rdev->ops->set_wiphy_params) + return -EOPNOTSUPP; old_retry_short = rdev->wiphy.retry_short; old_retry_long = rdev->wiphy.retry_long; @@ -3918,15 +3892,11 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev->wiphy.txq_limit = old_txq_limit; rdev->wiphy.txq_memory_limit = old_txq_memory_limit; rdev->wiphy.txq_quantum = old_txq_quantum; - goto out; + return result; } } - result = 0; - -out: - wiphy_unlock(&rdev->wiphy); - return result; + return 0; } int nl80211_send_chandef(struct sk_buff *msg, const struct cfg80211_chan_def *chandef) @@ -4144,22 +4114,22 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx = 0; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (if_idx < if_start) { if_idx++; continue; } + if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev, - NL80211_CMD_NEW_INTERFACE) < 0) { - wiphy_unlock(&rdev->wiphy); + NL80211_CMD_NEW_INTERFACE) < 0) goto out; - } + if_idx++; } - wiphy_unlock(&rdev->wiphy); if_start = 0; wp_idx++; @@ -4517,16 +4487,13 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - int ret; /* to avoid failing a new interface creation due to pending removal */ cfg80211_destroy_ifaces(rdev); - wiphy_lock(&rdev->wiphy); - ret = _nl80211_new_interface(skb, info); - wiphy_unlock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); - return ret; + return _nl80211_new_interface(skb, info); } static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) @@ -10098,7 +10065,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, struct cfg80211_chan_def chandef; enum nl80211_dfs_regions dfs_region; unsigned int cac_time_ms; - int err = -EINVAL; + int err; flush_delayed_work(&rdev->dfs_update_channels_wk); @@ -10113,35 +10080,29 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, return -EINVAL; } - wiphy_lock(wiphy); + guard(wiphy)(wiphy); dfs_region = reg_get_dfs_region(wiphy); if (dfs_region == NL80211_DFS_UNSET) - goto unlock; + return -EINVAL; err = nl80211_parse_chandef(rdev, info, &chandef); if (err) - goto unlock; + return err; err = cfg80211_chandef_dfs_required(wiphy, &chandef, wdev->iftype); if (err < 0) - goto unlock; + return err; - if (err == 0) { - err = -EINVAL; - goto unlock; - } + if (err == 0) + return -EINVAL; - if (!cfg80211_chandef_dfs_usable(wiphy, &chandef)) { - err = -EINVAL; - goto unlock; - } + if (!cfg80211_chandef_dfs_usable(wiphy, &chandef)) + return -EINVAL; - if (nla_get_flag(info->attrs[NL80211_ATTR_RADAR_BACKGROUND])) { - err = cfg80211_start_background_radar_detection(rdev, wdev, - &chandef); - goto unlock; - } + if (nla_get_flag(info->attrs[NL80211_ATTR_RADAR_BACKGROUND])) + return cfg80211_start_background_radar_detection(rdev, wdev, + &chandef); if (cfg80211_beaconing_iface_active(wdev)) { /* During MLO other link(s) can beacon, only the current link @@ -10151,26 +10112,19 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, !wdev->links[link_id].ap.beacon_interval) { /* nothing */ } else { - err = -EBUSY; - goto unlock; + return -EBUSY; } } - if (wdev->links[link_id].cac_started) { - err = -EBUSY; - goto unlock; - } + if (wdev->links[link_id].cac_started) + return -EBUSY; /* CAC start is offloaded to HW and can't be started manually */ - if (wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD)) { - err = -EOPNOTSUPP; - goto unlock; - } + if (wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD)) + return -EOPNOTSUPP; - if (!rdev->ops->start_radar_detection) { - err = -EOPNOTSUPP; - goto unlock; - } + if (!rdev->ops->start_radar_detection) + return -EOPNOTSUPP; cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, &chandef); if (WARN_ON(!cac_time_ms)) @@ -10197,10 +10151,8 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, wdev->links[link_id].cac_start_time = jiffies; wdev->links[link_id].cac_time_ms = cac_time_ms; } -unlock: - wiphy_unlock(wiphy); - return err; + return 0; } static int nl80211_notify_radar_detection(struct sk_buff *skb, diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 0396fa19bdf1..a117f5093ca2 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -630,9 +630,9 @@ void cfg80211_pmsr_free_wk(struct work_struct *work) struct wireless_dev *wdev = container_of(work, struct wireless_dev, pmsr_free_wk); - wiphy_lock(wdev->wiphy); + guard(wiphy)(wdev->wiphy); + cfg80211_pmsr_process_abort(wdev); - wiphy_unlock(wdev->wiphy); } void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 1df65a5a44f7..2dd0533e7660 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2465,11 +2465,11 @@ static void reg_leave_invalid_chans(struct wiphy *wiphy) struct wireless_dev *wdev; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - wiphy_lock(wiphy); + guard(wiphy)(wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) if (!reg_wdev_chan_valid(wiphy, wdev)) cfg80211_leave(rdev, wdev); - wiphy_unlock(wiphy); } static void reg_check_chans_work(struct work_struct *work) @@ -2649,13 +2649,11 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, return; rtnl_lock(); - wiphy_lock(wiphy); - - tmp = get_wiphy_regdom(wiphy); - rcu_assign_pointer(wiphy->regd, new_regd); - rcu_free_regdom(tmp); - - wiphy_unlock(wiphy); + scoped_guard(wiphy, wiphy) { + tmp = get_wiphy_regdom(wiphy); + rcu_assign_pointer(wiphy->regd, new_regd); + rcu_free_regdom(tmp); + } rtnl_unlock(); } EXPORT_SYMBOL(wiphy_apply_custom_regulatory); @@ -2825,9 +2823,9 @@ reg_process_hint_driver(struct wiphy *wiphy, tmp = get_wiphy_regdom(wiphy); ASSERT_RTNL(); - wiphy_lock(wiphy); - rcu_assign_pointer(wiphy->regd, regd); - wiphy_unlock(wiphy); + scoped_guard(wiphy, wiphy) { + rcu_assign_pointer(wiphy->regd, regd); + } rcu_free_regdom(tmp); } @@ -3205,9 +3203,9 @@ static void reg_process_self_managed_hints(void) ASSERT_RTNL(); for_each_rdev(rdev) { - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + reg_process_self_managed_hint(&rdev->wiphy); - wiphy_unlock(&rdev->wiphy); } reg_check_channels(); @@ -3600,14 +3598,12 @@ static bool is_wiphy_all_set_reg_flag(enum ieee80211_regulatory_flags flag) struct wireless_dev *wdev; for_each_rdev(rdev) { - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { - if (!(wdev->wiphy->regulatory_flags & flag)) { - wiphy_unlock(&rdev->wiphy); + if (!(wdev->wiphy->regulatory_flags & flag)) return false; - } } - wiphy_unlock(&rdev->wiphy); } return true; @@ -3883,19 +3879,18 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd, if (!driver_request->intersect) { ASSERT_RTNL(); - wiphy_lock(request_wiphy); - if (request_wiphy->regd) - tmp = get_wiphy_regdom(request_wiphy); + scoped_guard(wiphy, request_wiphy) { + if (request_wiphy->regd) + tmp = get_wiphy_regdom(request_wiphy); - regd = reg_copy_regd(rd); - if (IS_ERR(regd)) { - wiphy_unlock(request_wiphy); - return PTR_ERR(regd); + regd = reg_copy_regd(rd); + if (IS_ERR(regd)) + return PTR_ERR(regd); + + rcu_assign_pointer(request_wiphy->regd, regd); + rcu_free_regdom(tmp); } - rcu_assign_pointer(request_wiphy->regd, regd); - rcu_free_regdom(tmp); - wiphy_unlock(request_wiphy); reset_regdomains(false, rd); return 0; } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 1c6fd45aa809..d056248c43d2 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1238,7 +1238,8 @@ void cfg80211_sched_scan_results_wk(struct work_struct *work) rdev = container_of(work, struct cfg80211_registered_device, sched_scan_res_wk); - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) { if (req->report_results) { req->report_results = false; @@ -1253,7 +1254,6 @@ void cfg80211_sched_scan_results_wk(struct work_struct *work) NL80211_CMD_SCHED_SCAN_RESULTS); } } - wiphy_unlock(&rdev->wiphy); } void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid) @@ -1288,9 +1288,9 @@ EXPORT_SYMBOL(cfg80211_sched_scan_stopped_locked); void cfg80211_sched_scan_stopped(struct wiphy *wiphy, u64 reqid) { - wiphy_lock(wiphy); + guard(wiphy)(wiphy); + cfg80211_sched_scan_stopped_locked(wiphy, reqid); - wiphy_unlock(wiphy); } EXPORT_SYMBOL(cfg80211_sched_scan_stopped); @@ -3565,10 +3565,8 @@ int cfg80211_wext_siwscan(struct net_device *dev, /* translate "Scan for SSID" request */ if (wreq) { if (wrqu->data.flags & IW_SCAN_THIS_ESSID) { - if (wreq->essid_len > IEEE80211_MAX_SSID_LEN) { - err = -EINVAL; - goto out; - } + if (wreq->essid_len > IEEE80211_MAX_SSID_LEN) + return -EINVAL; memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len); creq->ssids[0].ssid_len = wreq->essid_len; } @@ -3584,20 +3582,20 @@ int cfg80211_wext_siwscan(struct net_device *dev, eth_broadcast_addr(creq->bssid); - wiphy_lock(&rdev->wiphy); - - rdev->scan_req = creq; - err = rdev_scan(rdev, creq); - if (err) { - rdev->scan_req = NULL; - /* creq will be freed below */ - } else { - nl80211_send_scan_start(rdev, dev->ieee80211_ptr); - /* creq now owned by driver */ - creq = NULL; - dev_hold(dev); + scoped_guard(wiphy, &rdev->wiphy) { + rdev->scan_req = creq; + err = rdev_scan(rdev, creq); + if (err) { + rdev->scan_req = NULL; + /* creq will be freed below */ + } else { + nl80211_send_scan_start(rdev, dev->ieee80211_ptr); + /* creq now owned by driver */ + creq = NULL; + dev_hold(dev); + } } - wiphy_unlock(&rdev->wiphy); + out: kfree(creq); return err; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 431da30817a6..7a734c8085af 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -251,7 +251,7 @@ void cfg80211_conn_work(struct work_struct *work) u8 bssid_buf[ETH_ALEN], *bssid = NULL; enum nl80211_timeout_reason treason; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) @@ -279,8 +279,6 @@ void cfg80211_conn_work(struct work_struct *work) __cfg80211_connect_result(wdev->netdev, &cr, false); } } - - wiphy_unlock(&rdev->wiphy); } static void cfg80211_step_auth_next(struct cfg80211_conn *conn, @@ -692,13 +690,13 @@ static bool cfg80211_is_all_idle(void) * as chan dfs state, etc. */ for_each_rdev(rdev) { - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (wdev->conn || wdev->connected || cfg80211_beaconing_iface_active(wdev)) is_all_idle = false; } - wiphy_unlock(&rdev->wiphy); } return is_all_idle; @@ -1582,7 +1580,7 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) container_of(work, struct wireless_dev, disconnect_wk); struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - wiphy_lock(wdev->wiphy); + guard(wiphy)(wdev->wiphy); if (wdev->conn_owner_nlportid) { switch (wdev->iftype) { @@ -1618,6 +1616,4 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) break; } } - - wiphy_unlock(wdev->wiphy); } diff --git a/net/wireless/util.c b/net/wireless/util.c index 040d62051eb9..c7c6261c9146 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2572,7 +2572,6 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, { struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; - int ret; wdev = dev->ieee80211_ptr; if (!wdev) @@ -2584,11 +2583,9 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, memset(sinfo, 0, sizeof(*sinfo)); - wiphy_lock(&rdev->wiphy); - ret = rdev_get_station(rdev, dev, mac_addr, sinfo); - wiphy_unlock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); - return ret; + return rdev_get_station(rdev, dev, mac_addr, sinfo); } EXPORT_SYMBOL(cfg80211_get_station); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 90d5c0592667..687f93664d1f 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -39,7 +39,6 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, struct cfg80211_registered_device *rdev; struct vif_params vifparams; enum nl80211_iftype type; - int ret; rdev = wiphy_to_rdev(wdev->wiphy); @@ -62,11 +61,9 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, memset(&vifparams, 0, sizeof(vifparams)); - wiphy_lock(wdev->wiphy); - ret = cfg80211_change_iface(rdev, dev, type, &vifparams); - wiphy_unlock(wdev->wiphy); + guard(wiphy)(wdev->wiphy); - return ret; + return cfg80211_change_iface(rdev, dev, type, &vifparams); } int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info, @@ -258,23 +255,17 @@ int cfg80211_wext_siwrts(struct net_device *dev, u32 orts = wdev->wiphy->rts_threshold; int err; - wiphy_lock(&rdev->wiphy); - if (rts->disabled || !rts->fixed) { + guard(wiphy)(&rdev->wiphy); + if (rts->disabled || !rts->fixed) wdev->wiphy->rts_threshold = (u32) -1; - } else if (rts->value < 0) { - err = -EINVAL; - goto out; - } else { + else if (rts->value < 0) + return -EINVAL; + else wdev->wiphy->rts_threshold = rts->value; - } err = rdev_set_wiphy_params(rdev, WIPHY_PARAM_RTS_THRESHOLD); - if (err) wdev->wiphy->rts_threshold = orts; - -out: - wiphy_unlock(&rdev->wiphy); return err; } @@ -302,12 +293,12 @@ int cfg80211_wext_siwfrag(struct net_device *dev, u32 ofrag = wdev->wiphy->frag_threshold; int err; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + if (frag->disabled || !frag->fixed) { wdev->wiphy->frag_threshold = (u32) -1; } else if (frag->value < 256) { - err = -EINVAL; - goto out; + return -EINVAL; } else { /* Fragment length must be even, so strip LSB. */ wdev->wiphy->frag_threshold = frag->value & ~0x1; @@ -316,9 +307,6 @@ int cfg80211_wext_siwfrag(struct net_device *dev, err = rdev_set_wiphy_params(rdev, WIPHY_PARAM_FRAG_THRESHOLD); if (err) wdev->wiphy->frag_threshold = ofrag; -out: - wiphy_unlock(&rdev->wiphy); - return err; } @@ -352,7 +340,8 @@ static int cfg80211_wext_siwretry(struct net_device *dev, (retry->flags & IW_RETRY_TYPE) != IW_RETRY_LIMIT) return -EINVAL; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + if (retry->flags & IW_RETRY_LONG) { wdev->wiphy->retry_long = retry->value; changed |= WIPHY_PARAM_RETRY_LONG; @@ -371,7 +360,6 @@ static int cfg80211_wext_siwretry(struct net_device *dev, wdev->wiphy->retry_short = oshort; wdev->wiphy->retry_long = olong; } - wiphy_unlock(&rdev->wiphy); return err; } @@ -578,9 +566,9 @@ static int cfg80211_wext_siwencode(struct net_device *dev, struct iw_point *erq = &wrqu->encoding; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - int idx, err; - bool remove = false; struct key_params params; + bool remove = false; + int idx; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_ADHOC) @@ -592,11 +580,9 @@ static int cfg80211_wext_siwencode(struct net_device *dev, !rdev->ops->set_default_key) return -EOPNOTSUPP; - wiphy_lock(&rdev->wiphy); - if (wdev->valid_links) { - err = -EOPNOTSUPP; - goto out; - } + guard(wiphy)(&rdev->wiphy); + if (wdev->valid_links) + return -EOPNOTSUPP; idx = erq->flags & IW_ENCODE_INDEX; if (idx == 0) { @@ -604,8 +590,7 @@ static int cfg80211_wext_siwencode(struct net_device *dev, if (idx < 0) idx = 0; } else if (idx < 1 || idx > 4) { - err = -EINVAL; - goto out; + return -EINVAL; } else { idx--; } @@ -614,7 +599,8 @@ static int cfg80211_wext_siwencode(struct net_device *dev, remove = true; else if (erq->length == 0) { /* No key data - just set the default TX key index */ - err = 0; + int err = 0; + if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) @@ -622,28 +608,22 @@ static int cfg80211_wext_siwencode(struct net_device *dev, true); if (!err) wdev->wext.default_key = idx; - goto out; + return err; } memset(¶ms, 0, sizeof(params)); params.key = keybuf; params.key_len = erq->length; - if (erq->length == 5) { + if (erq->length == 5) params.cipher = WLAN_CIPHER_SUITE_WEP40; - } else if (erq->length == 13) { + else if (erq->length == 13) params.cipher = WLAN_CIPHER_SUITE_WEP104; - } else if (!remove) { - err = -EINVAL; - goto out; - } + else if (!remove) + return -EINVAL; - err = cfg80211_set_encryption(rdev, dev, false, NULL, remove, - wdev->wext.default_key == -1, - idx, ¶ms); -out: - wiphy_unlock(&rdev->wiphy); - - return err; + return cfg80211_set_encryption(rdev, dev, false, NULL, remove, + wdev->wext.default_key == -1, + idx, ¶ms); } static int cfg80211_wext_siwencodeext(struct net_device *dev, @@ -659,7 +639,6 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev, bool remove = false; struct key_params params; u32 cipher; - int ret; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_ADHOC) @@ -734,16 +713,13 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev, params.seq_len = 6; } - wiphy_lock(wdev->wiphy); - ret = cfg80211_set_encryption( - rdev, dev, - !(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY), - addr, remove, - ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, - idx, ¶ms); - wiphy_unlock(wdev->wiphy); + guard(wiphy)(wdev->wiphy); - return ret; + return cfg80211_set_encryption(rdev, dev, + !(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY), + addr, remove, + ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, + idx, ¶ms); } static int cfg80211_wext_giwencode(struct net_device *dev, @@ -794,61 +770,41 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, struct cfg80211_chan_def chandef = { .width = NL80211_CHAN_WIDTH_20_NOHT, }; - int freq, ret; + int freq; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: - ret = cfg80211_mgd_wext_siwfreq(dev, info, wextfreq, extra); - break; + return cfg80211_mgd_wext_siwfreq(dev, info, wextfreq, extra); case NL80211_IFTYPE_ADHOC: - ret = cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra); - break; + return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra); case NL80211_IFTYPE_MONITOR: freq = cfg80211_wext_freq(wextfreq); - if (freq < 0) { - ret = freq; - break; - } - if (freq == 0) { - ret = -EINVAL; - break; - } + if (freq < 0) + return freq; + if (freq == 0) + return -EINVAL; + chandef.center_freq1 = freq; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); - if (!chandef.chan) { - ret = -EINVAL; - break; - } - ret = cfg80211_set_monitor_channel(rdev, dev, &chandef); - break; + if (!chandef.chan) + return -EINVAL; + return cfg80211_set_monitor_channel(rdev, dev, &chandef); case NL80211_IFTYPE_MESH_POINT: freq = cfg80211_wext_freq(wextfreq); - if (freq < 0) { - ret = freq; - break; - } - if (freq == 0) { - ret = -EINVAL; - break; - } + if (freq < 0) + return freq; + if (freq == 0) + return -EINVAL; chandef.center_freq1 = freq; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); - if (!chandef.chan) { - ret = -EINVAL; - break; - } - ret = cfg80211_set_mesh_channel(rdev, wdev, &chandef); - break; + if (!chandef.chan) + return -EINVAL; + return cfg80211_set_mesh_channel(rdev, wdev, &chandef); default: - ret = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - - wiphy_unlock(&rdev->wiphy); - - return ret; } static int cfg80211_wext_giwfreq(struct net_device *dev, @@ -861,35 +817,26 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, struct cfg80211_chan_def chandef = {}; int ret; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + switch (wdev->iftype) { case NL80211_IFTYPE_STATION: - ret = cfg80211_mgd_wext_giwfreq(dev, info, freq, extra); - break; + return cfg80211_mgd_wext_giwfreq(dev, info, freq, extra); case NL80211_IFTYPE_ADHOC: - ret = cfg80211_ibss_wext_giwfreq(dev, info, freq, extra); - break; + return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra); case NL80211_IFTYPE_MONITOR: - if (!rdev->ops->get_channel) { - ret = -EINVAL; - break; - } + if (!rdev->ops->get_channel) + return -EINVAL; ret = rdev_get_channel(rdev, wdev, 0, &chandef); if (ret) - break; + return ret; freq->m = chandef.chan->center_freq; freq->e = 6; - ret = 0; - break; + return ret; default: - ret = -EINVAL; - break; + return -EINVAL; } - - wiphy_unlock(&rdev->wiphy); - - return ret; } static int cfg80211_wext_siwtxpower(struct net_device *dev, @@ -900,7 +847,6 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); enum nl80211_tx_power_setting type; int dbm = 0; - int ret; if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) return -EINVAL; @@ -942,11 +888,9 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, return 0; } - wiphy_lock(&rdev->wiphy); - ret = rdev_set_tx_power(rdev, wdev, type, DBM_TO_MBM(dbm)); - wiphy_unlock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); - return ret; + return rdev_set_tx_power(rdev, wdev, type, DBM_TO_MBM(dbm)); } static int cfg80211_wext_giwtxpower(struct net_device *dev, @@ -965,9 +909,9 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev, if (!rdev->ops->get_tx_power) return -EOPNOTSUPP; - wiphy_lock(&rdev->wiphy); - err = rdev_get_tx_power(rdev, wdev, &val); - wiphy_unlock(&rdev->wiphy); + scoped_guard(wiphy, &rdev->wiphy) { + err = rdev_get_tx_power(rdev, wdev, &val); + } if (err) return err; @@ -1209,9 +1153,9 @@ static int cfg80211_wext_siwpower(struct net_device *dev, timeout = wrq->value / 1000; } - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + err = rdev_set_power_mgmt(rdev, dev, ps, timeout); - wiphy_unlock(&rdev->wiphy); if (err) return err; @@ -1244,8 +1188,8 @@ static int cfg80211_wext_siwrate(struct net_device *dev, struct cfg80211_bitrate_mask mask; u32 fixed, maxrate; struct ieee80211_supported_band *sband; - int band, ridx, ret; bool match = false; + int band, ridx; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; @@ -1283,14 +1227,12 @@ static int cfg80211_wext_siwrate(struct net_device *dev, if (!match) return -EINVAL; - wiphy_lock(&rdev->wiphy); - if (dev->ieee80211_ptr->valid_links) - ret = -EOPNOTSUPP; - else - ret = rdev_set_bitrate_mask(rdev, dev, 0, NULL, &mask); - wiphy_unlock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); - return ret; + if (dev->ieee80211_ptr->valid_links) + return -EOPNOTSUPP; + + return rdev_set_bitrate_mask(rdev, dev, 0, NULL, &mask); } static int cfg80211_wext_giwrate(struct net_device *dev, @@ -1319,9 +1261,9 @@ static int cfg80211_wext_giwrate(struct net_device *dev, if (err) return err; - wiphy_lock(&rdev->wiphy); - err = rdev_get_station(rdev, dev, addr, &sinfo); - wiphy_unlock(&rdev->wiphy); + scoped_guard(wiphy, &rdev->wiphy) { + err = rdev_get_station(rdev, dev, addr, &sinfo); + } if (err) return err; @@ -1420,23 +1362,17 @@ static int cfg80211_wext_siwap(struct net_device *dev, struct sockaddr *ap_addr = &wrqu->ap_addr; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - int ret; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - ret = cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra); - break; + return cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra); case NL80211_IFTYPE_STATION: - ret = cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra); - break; + return cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra); default: - ret = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - wiphy_unlock(&rdev->wiphy); - - return ret; } static int cfg80211_wext_giwap(struct net_device *dev, @@ -1446,23 +1382,17 @@ static int cfg80211_wext_giwap(struct net_device *dev, struct sockaddr *ap_addr = &wrqu->ap_addr; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - int ret; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - ret = cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra); - break; + return cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra); case NL80211_IFTYPE_STATION: - ret = cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra); - break; + return cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra); default: - ret = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - wiphy_unlock(&rdev->wiphy); - - return ret; } static int cfg80211_wext_siwessid(struct net_device *dev, @@ -1472,23 +1402,17 @@ static int cfg80211_wext_siwessid(struct net_device *dev, struct iw_point *data = &wrqu->data; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - int ret; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - ret = cfg80211_ibss_wext_siwessid(dev, info, data, ssid); - break; + return cfg80211_ibss_wext_siwessid(dev, info, data, ssid); case NL80211_IFTYPE_STATION: - ret = cfg80211_mgd_wext_siwessid(dev, info, data, ssid); - break; + return cfg80211_mgd_wext_siwessid(dev, info, data, ssid); default: - ret = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - wiphy_unlock(&rdev->wiphy); - - return ret; } static int cfg80211_wext_giwessid(struct net_device *dev, @@ -1498,26 +1422,20 @@ static int cfg80211_wext_giwessid(struct net_device *dev, struct iw_point *data = &wrqu->data; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - int ret; data->flags = 0; data->length = 0; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - ret = cfg80211_ibss_wext_giwessid(dev, info, data, ssid); - break; + return cfg80211_ibss_wext_giwessid(dev, info, data, ssid); case NL80211_IFTYPE_STATION: - ret = cfg80211_mgd_wext_giwessid(dev, info, data, ssid); - break; + return cfg80211_mgd_wext_giwessid(dev, info, data, ssid); default: - ret = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - wiphy_unlock(&rdev->wiphy); - - return ret; } static int cfg80211_wext_siwpmksa(struct net_device *dev, @@ -1528,7 +1446,6 @@ static int cfg80211_wext_siwpmksa(struct net_device *dev, struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_pmksa cfg_pmksa; struct iw_pmksa *pmksa = (struct iw_pmksa *)extra; - int ret; memset(&cfg_pmksa, 0, sizeof(struct cfg80211_pmksa)); @@ -1538,39 +1455,27 @@ static int cfg80211_wext_siwpmksa(struct net_device *dev, cfg_pmksa.bssid = pmksa->bssid.sa_data; cfg_pmksa.pmkid = pmksa->pmkid; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + switch (pmksa->cmd) { case IW_PMKSA_ADD: - if (!rdev->ops->set_pmksa) { - ret = -EOPNOTSUPP; - break; - } + if (!rdev->ops->set_pmksa) + return -EOPNOTSUPP; - ret = rdev_set_pmksa(rdev, dev, &cfg_pmksa); - break; + return rdev_set_pmksa(rdev, dev, &cfg_pmksa); case IW_PMKSA_REMOVE: - if (!rdev->ops->del_pmksa) { - ret = -EOPNOTSUPP; - break; - } + if (!rdev->ops->del_pmksa) + return -EOPNOTSUPP; - ret = rdev_del_pmksa(rdev, dev, &cfg_pmksa); - break; + return rdev_del_pmksa(rdev, dev, &cfg_pmksa); case IW_PMKSA_FLUSH: - if (!rdev->ops->flush_pmksa) { - ret = -EOPNOTSUPP; - break; - } + if (!rdev->ops->flush_pmksa) + return -EOPNOTSUPP; - ret = rdev_flush_pmksa(rdev, dev); - break; + return rdev_flush_pmksa(rdev, dev); default: - ret = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - wiphy_unlock(&rdev->wiphy); - - return ret; } static const iw_handler cfg80211_handlers[] = { diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 8edd9ada69d0..573b6b15a446 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -302,8 +302,8 @@ int cfg80211_wext_siwgenie(struct net_device *dev, struct iw_point *data = &wrqu->data; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + int ie_len = data->length; u8 *ie = extra; - int ie_len = data->length, err; if (wdev->iftype != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; @@ -311,39 +311,31 @@ int cfg80211_wext_siwgenie(struct net_device *dev, if (!ie_len) ie = NULL; - wiphy_lock(wdev->wiphy); + guard(wiphy)(wdev->wiphy); /* no change */ - err = 0; if (wdev->wext.ie_len == ie_len && memcmp(wdev->wext.ie, ie, ie_len) == 0) - goto out; + return 0; if (ie_len) { ie = kmemdup(extra, ie_len, GFP_KERNEL); - if (!ie) { - err = -ENOMEM; - goto out; - } - } else + if (!ie) + return -ENOMEM; + } else { ie = NULL; + } kfree(wdev->wext.ie); wdev->wext.ie = ie; wdev->wext.ie_len = ie_len; - if (wdev->conn) { - err = cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, false); - if (err) - goto out; - } + if (wdev->conn) + return cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, false); /* userspace better not think we'll reconnect */ - err = 0; - out: - wiphy_unlock(wdev->wiphy); - return err; + return 0; } int cfg80211_wext_siwmlme(struct net_device *dev, @@ -353,7 +345,6 @@ int cfg80211_wext_siwmlme(struct net_device *dev, struct wireless_dev *wdev = dev->ieee80211_ptr; struct iw_mlme *mlme = (struct iw_mlme *)extra; struct cfg80211_registered_device *rdev; - int err; if (!wdev) return -EOPNOTSUPP; @@ -366,17 +357,13 @@ int cfg80211_wext_siwmlme(struct net_device *dev, if (mlme->addr.sa_family != ARPHRD_ETHER) return -EINVAL; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + switch (mlme->cmd) { case IW_MLME_DEAUTH: case IW_MLME_DISASSOC: - err = cfg80211_disconnect(rdev, dev, mlme->reason_code, true); - break; + return cfg80211_disconnect(rdev, dev, mlme->reason_code, true); default: - err = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - wiphy_unlock(&rdev->wiphy); - - return err; } From 8e66f6c6738e5b458345cd5f75ef6da035d95599 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 Nov 2024 09:42:26 +0100 Subject: [PATCH 0087/1386] wifi: mac80211: use wiphy guard The wiphy guard simplifies some code here, so use it. Reviewed-by: Jeff Johnson Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20241122094225.cea65b2d2fd4.Icc168c4bbeddec98ea096aee9077211a7b88b69e@changeid Signed-off-by: Johannes Berg --- net/mac80211/debugfs.c | 4 ++-- net/mac80211/ethtool.c | 18 ++++++------------ net/mac80211/iface.c | 25 +++++++++---------------- 3 files changed, 17 insertions(+), 30 deletions(-) diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index be2e486907f9..bf0a2902d93c 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -284,7 +284,8 @@ static ssize_t aql_txq_limit_write(struct file *file, q_limit_low_old = local->aql_txq_limit_low[ac]; q_limit_high_old = local->aql_txq_limit_high[ac]; - wiphy_lock(local->hw.wiphy); + guard(wiphy)(local->hw.wiphy); + local->aql_txq_limit_low[ac] = q_limit_low; local->aql_txq_limit_high[ac] = q_limit_high; @@ -296,7 +297,6 @@ static ssize_t aql_txq_limit_write(struct file *file, sta->airtime[ac].aql_limit_high = q_limit_high; } } - wiphy_unlock(local->hw.wiphy); return count; } diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index 99f6174a9d69..069aa05139cd 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -19,16 +19,13 @@ static int ieee80211_set_ringparam(struct net_device *dev, struct netlink_ext_ack *extack) { struct ieee80211_local *local = wiphy_priv(dev->ieee80211_ptr->wiphy); - int ret; if (rp->rx_mini_pending != 0 || rp->rx_jumbo_pending != 0) return -EINVAL; - wiphy_lock(local->hw.wiphy); - ret = drv_set_ringparam(local, rp->tx_pending, rp->rx_pending); - wiphy_unlock(local->hw.wiphy); + guard(wiphy)(local->hw.wiphy); - return ret; + return drv_set_ringparam(local, rp->tx_pending, rp->rx_pending); } static void ieee80211_get_ringparam(struct net_device *dev, @@ -40,10 +37,10 @@ static void ieee80211_get_ringparam(struct net_device *dev, memset(rp, 0, sizeof(*rp)); - wiphy_lock(local->hw.wiphy); + guard(wiphy)(local->hw.wiphy); + drv_get_ringparam(local, &rp->tx_pending, &rp->tx_max_pending, &rp->rx_pending, &rp->rx_max_pending); - wiphy_unlock(local->hw.wiphy); } static const char ieee80211_gstrings_sta_stats[][ETH_GSTRING_LEN] = { @@ -109,7 +106,7 @@ static void ieee80211_get_stats(struct net_device *dev, * network device. */ - wiphy_lock(local->hw.wiphy); + guard(wiphy)(local->hw.wiphy); if (sdata->vif.type == NL80211_IFTYPE_STATION) { sta = sta_info_get_bss(sdata, sdata->deflink.u.mgd.bssid); @@ -205,13 +202,10 @@ do_survey: else data[i++] = -1LL; - if (WARN_ON(i != STA_STATS_LEN)) { - wiphy_unlock(local->hw.wiphy); + if (WARN_ON(i != STA_STATS_LEN)) return; - } drv_get_et_stats(sdata, stats, &(data[STA_STATS_LEN])); - wiphy_unlock(local->hw.wiphy); } static void ieee80211_get_strings(struct net_device *dev, u32 sset, u8 *data) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index a8fbedd530f4..32aaf3856ccf 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -300,7 +300,6 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; - int ret; /* * This happens during unregistration if there's a bond device @@ -310,11 +309,9 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) if (!dev->ieee80211_ptr->registered) return 0; - wiphy_lock(local->hw.wiphy); - ret = _ieee80211_change_mac(sdata, addr); - wiphy_unlock(local->hw.wiphy); + guard(wiphy)(local->hw.wiphy); - return ret; + return _ieee80211_change_mac(sdata, addr); } static inline int identical_mac_addr_allowed(int type1, int type2) @@ -450,16 +447,13 @@ static int ieee80211_open(struct net_device *dev) if (!is_valid_ether_addr(dev->dev_addr)) return -EADDRNOTAVAIL; - wiphy_lock(sdata->local->hw.wiphy); + guard(wiphy)(sdata->local->hw.wiphy); + err = ieee80211_check_concurrent_iface(sdata, sdata->vif.type); if (err) - goto out; + return err; - err = ieee80211_do_open(&sdata->wdev, true); -out: - wiphy_unlock(sdata->local->hw.wiphy); - - return err; + return ieee80211_do_open(&sdata->wdev, true); } static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_down) @@ -780,11 +774,11 @@ static int ieee80211_stop(struct net_device *dev) ieee80211_stop_mbssid(sdata); } - wiphy_lock(sdata->local->hw.wiphy); + guard(wiphy)(sdata->local->hw.wiphy); + wiphy_work_cancel(sdata->local->hw.wiphy, &sdata->activate_links_work); ieee80211_do_stop(sdata, true); - wiphy_unlock(sdata->local->hw.wiphy); return 0; } @@ -2282,7 +2276,7 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) */ cfg80211_shutdown_all_interfaces(local->hw.wiphy); - wiphy_lock(local->hw.wiphy); + guard(wiphy)(local->hw.wiphy); WARN(local->open_count, "%s: open count remains %d\n", wiphy_name(local->hw.wiphy), local->open_count); @@ -2312,7 +2306,6 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) if (!netdev) kfree(sdata); } - wiphy_unlock(local->hw.wiphy); } static int netdev_notify(struct notifier_block *nb, From 13c4f7714c6a1ecf748a2f22099447c14fe6ed8c Mon Sep 17 00:00:00 2001 From: Zichen Xie Date: Fri, 15 Nov 2024 00:38:36 -0600 Subject: [PATCH 0088/1386] wifi: cfg80211: tests: Fix potential NULL dereference in test_cfg80211_parse_colocated_ap() kunit_kzalloc() may return NULL, dereferencing it without NULL check may lead to NULL dereference. Add a NULL check for ies. Fixes: 45d43937a44c ("wifi: cfg80211: add a kunit test for 6 GHz colocated AP parsing") Signed-off-by: Zichen Xie Link: https://patch.msgid.link/20241115063835.5888-1-zichenxie0106@gmail.com Signed-off-by: Johannes Berg --- net/wireless/tests/scan.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/tests/scan.c b/net/wireless/tests/scan.c index 9f458be71659..79a99cf5e892 100644 --- a/net/wireless/tests/scan.c +++ b/net/wireless/tests/scan.c @@ -810,6 +810,8 @@ static void test_cfg80211_parse_colocated_ap(struct kunit *test) skb_put_data(input, "123", 3); ies = kunit_kzalloc(test, struct_size(ies, data, input->len), GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, ies); + ies->len = input->len; memcpy(ies->data, input->data, input->len); From 4e3a841c47bbf985782a9f761d57f2f999e1d31b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Jos=C3=A9=20Arboleda?= Date: Mon, 19 Aug 2024 16:45:20 -0500 Subject: [PATCH 0089/1386] wifi: iwlwifi: mvm: Replace spaces for tabs in iwl_mvm_vendor_events_idx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch replaces spaces with tabs in the affected source files to adhere to the Linux kernel coding style guidelines. This change is purely stylistic and do not affect the functionality of the code. Signed-off-by: Juan José Arboleda Link: https://patch.msgid.link/715c74c74b336bed81e92e1336bd1a6ddb7b90ff.1724103043.git.soyjuanarbol@gmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c b/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c index 080a1587caa5..0f7fa6032c66 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/vendor-cmd.c @@ -104,9 +104,9 @@ static const struct wiphy_vendor_command iwl_mvm_vendor_commands[] = { }; enum iwl_mvm_vendor_events_idx { - /* 0x0 - 0x3 are deprecated */ - IWL_MVM_VENDOR_EVENT_IDX_ROAMING_FORBIDDEN = 4, - NUM_IWL_MVM_VENDOR_EVENT_IDX + /* 0x0 - 0x3 are deprecated */ + IWL_MVM_VENDOR_EVENT_IDX_ROAMING_FORBIDDEN = 4, + NUM_IWL_MVM_VENDOR_EVENT_IDX }; static const struct nl80211_vendor_cmd_info From 7a53af85d3bbdbe06cd47b81a6d99a04dc0a3963 Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Mon, 25 Nov 2024 14:02:16 +0530 Subject: [PATCH 0090/1386] wifi: cfg80211: send MLO links tx power info in GET_INTERFACE Currently, TX power is reported on interface/wdev level as part of NL80211_CMD_GET_INTERFACE. With MLO, Multiple links can be part of an interface/wdev and hence its necessary to report the TX power of each link. Add support to send tx power for all valid links of an MLD as part of NL80211_CMD_GET_INTERFACE request. As far as userspace is concerned, there is no behavioral change for Non-ML Interfaces. For ML interfaces, userspace should fetch TX power that is nested inside NL80211_ATTR_MLO_LINKS, similar to how channel info(NL80211_ATTR_WIPHY_FREQ) is fetched. Co-developed-by: Aaradhana Sahu Signed-off-by: Aaradhana Sahu Signed-off-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20241125083217.216095-2-quic_ramess@quicinc.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 1 + .../broadcom/brcm80211/brcmfmac/cfg80211.c | 2 +- .../net/wireless/marvell/mwifiex/cfg80211.c | 2 +- .../wireless/microchip/wilc1000/cfg80211.c | 2 +- .../net/wireless/quantenna/qtnfmac/cfg80211.c | 2 +- .../staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 3 +- include/net/cfg80211.h | 2 +- net/mac80211/cfg.c | 1 + net/wireless/nl80211.c | 13 +++++- net/wireless/rdev-ops.h | 7 +-- net/wireless/trace.h | 44 +++++++++---------- net/wireless/wext-compat.c | 2 +- 12 files changed, 47 insertions(+), 34 deletions(-) diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 61b2e3f15f0e..72ce321f2a77 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1441,6 +1441,7 @@ static int ath6kl_cfg80211_set_txpower(struct wiphy *wiphy, static int ath6kl_cfg80211_get_txpower(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id, int *dbm) { struct ath6kl *ar = (struct ath6kl *)wiphy_priv(wiphy); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 297a7c738c01..689e779fe00f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -2676,7 +2676,7 @@ done: static s32 brcmf_cfg80211_get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, - s32 *dbm) + unsigned int link_id, s32 *dbm) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); struct brcmf_cfg80211_vif *vif = wdev_to_vif(wdev); diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index fca3eea7ee84..a099fdaafa45 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -410,7 +410,7 @@ mwifiex_cfg80211_set_tx_power(struct wiphy *wiphy, static int mwifiex_cfg80211_get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, - int *dbm) + unsigned int link_id, int *dbm) { struct mwifiex_adapter *adapter = mwifiex_cfg80211_get_adapter(wiphy); struct mwifiex_private *priv = mwifiex_get_priv(adapter, diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index e96736cc7259..e7aa0f991923 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -1669,7 +1669,7 @@ static int set_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, } static int get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, - int *dbm) + unsigned int link_id, int *dbm) { int ret; struct wilc_vif *vif = netdev_priv(wdev->netdev); diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 8b97accf6638..0b2282528342 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -881,7 +881,7 @@ static int qtnf_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, } static int qtnf_get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, - int *dbm) + unsigned int link_id, int *dbm) { struct qtnf_vif *vif = qtnf_netdev_get_priv(wdev->netdev); int ret; diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index c053ee9c1361..7fcc46a0bb48 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -1802,7 +1802,8 @@ static int cfg80211_rtw_set_txpower(struct wiphy *wiphy, } static int cfg80211_rtw_get_txpower(struct wiphy *wiphy, - struct wireless_dev *wdev, int *dbm) + struct wireless_dev *wdev, + unsigned int link_id, int *dbm) { *dbm = (12); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 63e79a22a214..0a48f47a77dc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4733,7 +4733,7 @@ struct cfg80211_ops { int (*set_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm); int (*get_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, - int *dbm); + unsigned int link_id, int *dbm); void (*rfkill_poll)(struct wiphy *wiphy); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 61a824ec33da..b2410a913556 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3190,6 +3190,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, static int ieee80211_get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id, int *dbm) { struct ieee80211_local *local = wiphy_priv(wiphy); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9590f9bd2ec0..793d910347e3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3980,10 +3980,10 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag goto nla_put_failure; } - if (rdev->ops->get_tx_power) { + if (rdev->ops->get_tx_power && !wdev->valid_links) { int dbm, ret; - ret = rdev_get_tx_power(rdev, wdev, &dbm); + ret = rdev_get_tx_power(rdev, wdev, 0, &dbm); if (ret == 0 && nla_put_u32(msg, NL80211_ATTR_WIPHY_TX_POWER_LEVEL, DBM_TO_MBM(dbm))) @@ -4052,6 +4052,15 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag if (ret == 0 && nl80211_send_chandef(msg, &chandef)) goto nla_put_failure; + if (rdev->ops->get_tx_power) { + int dbm, ret; + + ret = rdev_get_tx_power(rdev, wdev, link_id, &dbm); + if (ret == 0 && + nla_put_u32(msg, NL80211_ATTR_WIPHY_TX_POWER_LEVEL, + DBM_TO_MBM(dbm))) + goto nla_put_failure; + } nla_nest_end(msg, link); } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index adb6105bbb7d..8f2aa7e76c0a 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -600,11 +600,12 @@ static inline int rdev_set_tx_power(struct cfg80211_registered_device *rdev, } static inline int rdev_get_tx_power(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, int *dbm) + struct wireless_dev *wdev, unsigned int link_id, + int *dbm) { int ret; - trace_rdev_get_tx_power(&rdev->wiphy, wdev); - ret = rdev->ops->get_tx_power(&rdev->wiphy, wdev, dbm); + trace_rdev_get_tx_power(&rdev->wiphy, wdev, link_id); + ret = rdev->ops->get_tx_power(&rdev->wiphy, wdev, link_id, dbm); trace_rdev_return_int_int(&rdev->wiphy, ret, *dbm); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index d5c9bb614fa6..a57210c8087c 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1690,9 +1690,28 @@ TRACE_EVENT(rdev_set_wiphy_params, WIPHY_PR_ARG, __entry->changed) ); -DEFINE_EVENT(wiphy_wdev_evt, rdev_get_tx_power, - TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), - TP_ARGS(wiphy, wdev) +DECLARE_EVENT_CLASS(wiphy_wdev_link_evt, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id), + TP_ARGS(wiphy, wdev, link_id), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(unsigned int, link_id) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->link_id = link_id; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %u", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id) +); + +DEFINE_EVENT(wiphy_wdev_link_evt, rdev_get_tx_power, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id), + TP_ARGS(wiphy, wdev, link_id) ); TRACE_EVENT(rdev_set_tx_power, @@ -2192,25 +2211,6 @@ TRACE_EVENT(rdev_set_noack_map, TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", noack_map: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->noack_map) ); - -DECLARE_EVENT_CLASS(wiphy_wdev_link_evt, - TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, - unsigned int link_id), - TP_ARGS(wiphy, wdev, link_id), - TP_STRUCT__entry( - WIPHY_ENTRY - WDEV_ENTRY - __field(unsigned int, link_id) - ), - TP_fast_assign( - WIPHY_ASSIGN; - WDEV_ASSIGN; - __entry->link_id = link_id; - ), - TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %u", - WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id) -); - DEFINE_EVENT(wiphy_wdev_link_evt, rdev_get_channel, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id), diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 687f93664d1f..a74b1afc594e 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -910,7 +910,7 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev, return -EOPNOTSUPP; scoped_guard(wiphy, &rdev->wiphy) { - err = rdev_get_tx_power(rdev, wdev, &val); + err = rdev_get_tx_power(rdev, wdev, 0, &val); } if (err) return err; From 24dab555ad5951824e3fb6b665aaca84ac69dd12 Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Mon, 25 Nov 2024 14:02:17 +0530 Subject: [PATCH 0091/1386] wifi: mac80211: get tx power per link ML interfaces can have multiple affiliated links to it and hence there is a need to report tx power of specified link rather deflink. Add changes to report tx power of requested link from mac80211, also pass link id as an argument in get_tx_power op so that supported drivers can use it to report link's tx power. Co-developed-by: Aaradhana Sahu Signed-off-by: Aaradhana Sahu Signed-off-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20241125083217.216095-3-quic_ramess@quicinc.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/mac.c | 1 + drivers/net/wireless/ath/ath9k/main.c | 2 +- drivers/net/wireless/mediatek/mt76/mac80211.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76.h | 2 +- include/net/mac80211.h | 2 +- net/mac80211/cfg.c | 15 +++++++++++---- net/mac80211/driver-ops.h | 7 ++++--- net/mac80211/trace.h | 10 ++++++---- 8 files changed, 26 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index e6acbff06749..7e75a9b13ef9 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -9356,6 +9356,7 @@ static int ath11k_fw_stats_request(struct ath11k *ar, static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + unsigned int link_id, int *dbm) { struct ath11k *ar = hw->priv; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index b92c89dad8de..2f137856a823 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -2767,7 +2767,7 @@ void ath9k_fill_chanctx_ops(void) #endif static int ath9k_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - int *dbm) + unsigned int link_id, int *dbm) { struct ath_softc *sc = hw->priv; struct ath_vif *avp = (void *)vif->drv_priv; diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 9d5561f44134..7fbce5e757df 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -1596,7 +1596,7 @@ void mt76_wcid_cleanup(struct mt76_dev *dev, struct mt76_wcid *wcid) EXPORT_SYMBOL_GPL(mt76_wcid_cleanup); int mt76_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - int *dbm) + unsigned int link_id, int *dbm) { struct mt76_phy *phy = hw->priv; int n_chains = hweight16(phy->chainmask); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 0b75a45ad2e8..ca2dba3ac65d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -1431,7 +1431,7 @@ void mt76_sta_pre_rcu_remove(struct ieee80211_hw *hw, struct ieee80211_vif *vif, int mt76_get_min_avg_rssi(struct mt76_dev *dev, bool ext_phy); int mt76_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - int *dbm); + unsigned int link_id, int *dbm); int mt76_init_sar_power(struct ieee80211_hw *hw, const struct cfg80211_sar_specs *sar); int mt76_get_sar_power(struct mt76_phy *phy, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a97c9f85ae9a..5ce4dfa3fba5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4759,7 +4759,7 @@ struct ieee80211_ops { u32 (*get_expected_throughput)(struct ieee80211_hw *hw, struct ieee80211_sta *sta); int (*get_txpower)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - int *dbm); + unsigned int link_id, int *dbm); int (*tdls_channel_switch)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b2410a913556..2fa594fb6c1a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3195,15 +3195,22 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy, { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct ieee80211_link_data *link_data; if (local->ops->get_txpower && (sdata->flags & IEEE80211_SDATA_IN_DRIVER)) - return drv_get_txpower(local, sdata, dbm); + return drv_get_txpower(local, sdata, link_id, dbm); - if (local->emulate_chanctx) + if (local->emulate_chanctx) { *dbm = local->hw.conf.power_level; - else - *dbm = sdata->vif.bss_conf.txpower; + } else { + link_data = wiphy_dereference(wiphy, sdata->link[link_id]); + + if (link_data) + *dbm = link_data->conf->txpower; + else + return -ENOLINK; + } /* INT_MIN indicates no power level was set yet */ if (*dbm == INT_MIN) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index edd1e4d4ad9d..c64531e0a60e 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1273,7 +1273,8 @@ static inline u32 drv_get_expected_throughput(struct ieee80211_local *local, } static inline int drv_get_txpower(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, int *dbm) + struct ieee80211_sub_if_data *sdata, + unsigned int link_id, int *dbm) { int ret; @@ -1283,8 +1284,8 @@ static inline int drv_get_txpower(struct ieee80211_local *local, if (!local->ops->get_txpower) return -EOPNOTSUPP; - ret = local->ops->get_txpower(&local->hw, &sdata->vif, dbm); - trace_drv_get_txpower(local, sdata, *dbm, ret); + ret = local->ops->get_txpower(&local->hw, &sdata->vif, link_id, dbm); + trace_drv_get_txpower(local, sdata, link_id, *dbm, ret); return ret; } diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 7a4985fc2b16..dc35fed7e9b0 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2173,13 +2173,14 @@ DEFINE_EVENT(chanswitch_evt, drv_channel_switch_rx_beacon, TRACE_EVENT(drv_get_txpower, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - int dbm, int ret), + unsigned int link_id, int dbm, int ret), - TP_ARGS(local, sdata, dbm, ret), + TP_ARGS(local, sdata, link_id, dbm, ret), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY + __field(unsigned int, link_id) __field(int, dbm) __field(int, ret) ), @@ -2187,13 +2188,14 @@ TRACE_EVENT(drv_get_txpower, TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; + __entry->link_id = link_id; __entry->dbm = dbm; __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT " dbm:%d ret:%d", - LOCAL_PR_ARG, VIF_PR_ARG, __entry->dbm, __entry->ret + LOCAL_PR_FMT VIF_PR_FMT " link_id:%d dbm:%d ret:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id, __entry->dbm, __entry->ret ) ); From 4f85a3b370e6f1a353cfbbfb5c398590dfeca9d7 Mon Sep 17 00:00:00 2001 From: Dylan Eskew Date: Wed, 13 Nov 2024 06:46:08 -0800 Subject: [PATCH 0092/1386] wifi: mac80211: ethtool: add monitor channel reporting When running ethtool on a monitor interface, the channel wasn't reporting properly. This adds logic to properly report the channel for monitor interfaces in ethtool. Signed-off-by: Dylan Eskew Link: https://patch.msgid.link/20241113144608.334060-1-dylan.eskew@candelatech.com Signed-off-by: Johannes Berg --- net/mac80211/ethtool.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index 069aa05139cd..42f7ee142ce3 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -157,6 +157,10 @@ do_survey: chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (chanctx_conf) channel = chanctx_conf->def.chan; + else if (local->open_count > 0 && + local->open_count == local->monitors && + sdata->vif.type == NL80211_IFTYPE_MONITOR) + channel = local->monitor_chanreq.oper.chan; else channel = NULL; rcu_read_unlock(); From b63a95d35f7ff59329257cfd31f990b3b295a242 Mon Sep 17 00:00:00 2001 From: Sathishkumar Muruganandam Date: Tue, 19 Nov 2024 09:57:59 +0530 Subject: [PATCH 0093/1386] wifi: mac80211: add EHT 320 MHz support for mesh Currently, ieee80211_ie_build_he_oper() lacks support for 320 MHz handling (already noted as a TODO). This is because 320 MHz is not included in IEEE 802.11-ax. However, IEEE 802.11-be introduces 320 MHz support and if the chandef indicates a 320 MHz bandwidth and is used directly as it is, it will result in an incorrect HE Operation Information Element. In order to support EHT 320 MHz, HE Operation Element should indicate bandwidth as 160 MHz only. In EHT Operation IE, the correct bandwidth will be present. Devices capable of EHT can parse EHT Information Element and connect in 320 MHz and other HE capable devices can parse HE and can connect in 160 MHz. Add support to downgrade the bandwidth in ieee80211_ie_build_he_oper() during 320 MHz operation and advertise it. Signed-off-by: Sathishkumar Muruganandam Signed-off-by: Aditya Kumar Singh Link: https://patch.msgid.link/20241119-mesh_320mhz_support-v1-1-f9463338d584@quicinc.com Signed-off-by: Johannes Berg --- net/mac80211/util.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index a4e1301cc999..c88ce537aaa7 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2762,6 +2762,7 @@ u8 *ieee80211_ie_build_he_oper(u8 *pos, const struct cfg80211_chan_def *chandef) { struct ieee80211_he_operation *he_oper; struct ieee80211_he_6ghz_oper *he_6ghz_op; + struct cfg80211_chan_def he_chandef; u32 he_oper_params; u8 ie_len = 1 + sizeof(struct ieee80211_he_operation); @@ -2793,27 +2794,33 @@ u8 *ieee80211_ie_build_he_oper(u8 *pos, const struct cfg80211_chan_def *chandef) if (chandef->chan->band != NL80211_BAND_6GHZ) goto out; + cfg80211_chandef_create(&he_chandef, chandef->chan, NL80211_CHAN_NO_HT); + he_chandef.center_freq1 = chandef->center_freq1; + he_chandef.center_freq2 = chandef->center_freq2; + he_chandef.width = chandef->width; + /* TODO add VHT operational */ he_6ghz_op = (struct ieee80211_he_6ghz_oper *)pos; he_6ghz_op->minrate = 6; /* 6 Mbps */ he_6ghz_op->primary = - ieee80211_frequency_to_channel(chandef->chan->center_freq); + ieee80211_frequency_to_channel(he_chandef.chan->center_freq); he_6ghz_op->ccfs0 = - ieee80211_frequency_to_channel(chandef->center_freq1); - if (chandef->center_freq2) + ieee80211_frequency_to_channel(he_chandef.center_freq1); + if (he_chandef.center_freq2) he_6ghz_op->ccfs1 = - ieee80211_frequency_to_channel(chandef->center_freq2); + ieee80211_frequency_to_channel(he_chandef.center_freq2); else he_6ghz_op->ccfs1 = 0; - switch (chandef->width) { + switch (he_chandef.width) { case NL80211_CHAN_WIDTH_320: - /* - * TODO: mesh operation is not defined over 6GHz 320 MHz - * channels. + /* Downgrade EHT 320 MHz BW to 160 MHz for HE and set new + * center_freq1 */ - WARN_ON(1); - break; + ieee80211_chandef_downgrade(&he_chandef, NULL); + he_6ghz_op->ccfs0 = + ieee80211_frequency_to_channel(he_chandef.center_freq1); + fallthrough; case NL80211_CHAN_WIDTH_160: /* Convert 160 MHz channel width to new style as interop * workaround. @@ -2821,7 +2828,7 @@ u8 *ieee80211_ie_build_he_oper(u8 *pos, const struct cfg80211_chan_def *chandef) he_6ghz_op->control = IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ; he_6ghz_op->ccfs1 = he_6ghz_op->ccfs0; - if (chandef->chan->center_freq < chandef->center_freq1) + if (he_chandef.chan->center_freq < he_chandef.center_freq1) he_6ghz_op->ccfs0 -= 8; else he_6ghz_op->ccfs0 += 8; From b81e0211e9c70be9eb70924e4e29698bfbbbc03a Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 19 Nov 2024 09:58:00 +0530 Subject: [PATCH 0094/1386] wifi: mac80211_hwsim: add 6 GHz EHT Mesh capabilities To facilitate testing of mesh EHT 320 MHz, add support for advertising this capability. Signed-off-by: Aditya Kumar Singh Link: https://patch.msgid.link/20241119-mesh_320mhz_support-v1-2-f9463338d584@quicinc.com Signed-off-by: Johannes Berg --- drivers/net/wireless/virtual/mac80211_hwsim.c | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/drivers/net/wireless/virtual/mac80211_hwsim.c b/drivers/net/wireless/virtual/mac80211_hwsim.c index 347a15544afe..cf6a331d4042 100644 --- a/drivers/net/wireless/virtual/mac80211_hwsim.c +++ b/drivers/net/wireless/virtual/mac80211_hwsim.c @@ -5048,6 +5048,45 @@ static const struct ieee80211_sband_iftype_data sband_capa_6ghz[] = { .tx_mcs_80p80 = cpu_to_le16(0xfffa), }, }, + .eht_cap = { + .has_eht = true, + .eht_cap_elem = { + .mac_cap_info[0] = IEEE80211_EHT_MAC_CAP0_OM_CONTROL | + IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1, + .phy_cap_info[0] = IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ, + /* Leave all the other PHY capability bytes + * unset, as DCM, beam forming, RU and PPE + * threshold information are not supported + */ + }, + /* For all MCS and bandwidth, set 8 NSS for both Tx and + * Rx + */ + .eht_mcs_nss_supp = { + /* As B1 and B2 are set in the supported + * channel width set field in the HE PHY + * capabilities information field and 320MHz in + * 6GHz is supported include all the following + * MCS/NSS. + */ + .bw._80 = { + .rx_tx_mcs9_max_nss = 0x88, + .rx_tx_mcs11_max_nss = 0x88, + .rx_tx_mcs13_max_nss = 0x88, + }, + .bw._160 = { + .rx_tx_mcs9_max_nss = 0x88, + .rx_tx_mcs11_max_nss = 0x88, + .rx_tx_mcs13_max_nss = 0x88, + }, + .bw._320 = { + .rx_tx_mcs9_max_nss = 0x88, + .rx_tx_mcs11_max_nss = 0x88, + .rx_tx_mcs13_max_nss = 0x88, + }, + }, + /* PPE threshold information is not supported */ + }, }, #endif }; From a9ab02ed97c654a9ea09afb3e6294cea3768c388 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Mon, 2 Dec 2024 16:22:05 -0800 Subject: [PATCH 0095/1386] netpoll: Use rtnl_dereference() for npinfo pointer access In the __netpoll_setup() function, when accessing the device's npinfo pointer, replace rcu_access_pointer() with rtnl_dereference(). This change is more appropriate, as suggested by Herbert Xu[1]. The function is called with the RTNL mutex held, and the pointer is being dereferenced later, so, dereference earlier and just reuse the pointer for the if/else. The replacement ensures correct pointer access while maintaining the existing locking and RCU semantics of the netpoll subsystem. Link: https://lore.kernel.org/lkml/Zz1cKZYt1e7elibV@gondor.apana.org.au/ [1] Suggested-by: Herbert Xu Signed-off-by: Breno Leitao Reviewed-by: Jacob Keller Acked-by: Herbert Xu Link: https://patch.msgid.link/20241202-netpoll_rcu_herbet_fix-v2-1-2b9d58edc76a@debian.org Signed-off-by: Jakub Kicinski --- net/core/netpoll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2e459b9d88eb..99e5aa9cc992 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -634,7 +634,8 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) goto out; } - if (!rcu_access_pointer(ndev->npinfo)) { + npinfo = rtnl_dereference(ndev->npinfo); + if (!npinfo) { npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); if (!npinfo) { err = -ENOMEM; @@ -654,7 +655,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev) goto free_npinfo; } } else { - npinfo = rtnl_dereference(ndev->npinfo); refcount_inc(&npinfo->refcnt); } From a61b19f4a6586590a9ae6baf2ac4a25a852e547f Mon Sep 17 00:00:00 2001 From: Maksym Kutsevol Date: Mon, 2 Dec 2024 11:55:07 -0800 Subject: [PATCH 0096/1386] netpoll: Make netpoll_send_udp return status instead of void netpoll_send_udp can return if send was successful. It will allow client code to be aware of the send status. Possible return values are the result of __netpoll_send_skb (cast to int) and -ENOMEM. This doesn't cover the case when TX was not successful instantaneously and was scheduled for later, __netpoll__send_skb returns success in that case. Signed-off-by: Maksym Kutsevol Link: https://patch.msgid.link/20241202-netcons-add-udp-send-fail-statistics-to-netconsole-v5-1-70e82239f922@kutsevol.com Signed-off-by: Jakub Kicinski --- include/linux/netpoll.h | 2 +- net/core/netpoll.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index b34301650c47..f91e50a76efd 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -57,7 +57,7 @@ static inline void netpoll_poll_disable(struct net_device *dev) { return; } static inline void netpoll_poll_enable(struct net_device *dev) { return; } #endif -void netpoll_send_udp(struct netpoll *np, const char *msg, int len); +int netpoll_send_udp(struct netpoll *np, const char *msg, int len); void netpoll_print_options(struct netpoll *np); int netpoll_parse_options(struct netpoll *np, char *opt); int __netpoll_setup(struct netpoll *np, struct net_device *ndev); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 99e5aa9cc992..6f2647b000b8 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -390,7 +390,7 @@ netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) } EXPORT_SYMBOL(netpoll_send_skb); -void netpoll_send_udp(struct netpoll *np, const char *msg, int len) +int netpoll_send_udp(struct netpoll *np, const char *msg, int len) { int total_len, ip_len, udp_len; struct sk_buff *skb; @@ -414,7 +414,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) skb = find_skb(np, total_len + np->dev->needed_tailroom, total_len - len); if (!skb) - return; + return -ENOMEM; skb_copy_to_linear_data(skb, msg, len); skb_put(skb, len); @@ -490,7 +490,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) skb->dev = np->dev; - netpoll_send_skb(np, skb); + return (int)netpoll_send_skb(np, skb); } EXPORT_SYMBOL(netpoll_send_udp); From 36de47bfd013f2152860339ca763fcb64c76f9b2 Mon Sep 17 00:00:00 2001 From: Maksym Kutsevol Date: Mon, 2 Dec 2024 11:55:08 -0800 Subject: [PATCH 0097/1386] netcons: Add udp send fail statistics to netconsole Enhance observability of netconsole. Packet sends can fail. Start tracking at least two failure possibilities: ENOMEM and NET_XMIT_DROP for every target. Stats are exposed via an additional attribute in CONFIGFS. The exposed statistics allows easier debugging of cases when netconsole messages were not seen by receivers, eliminating the guesswork if the sender thinks that messages in question were sent out. Stats are not reset on enable/disable/change remote ip/etc, they belong to the netcons target itself. Reported-by: Breno Leitao Closes: https://lore.kernel.org/all/ZsWoUzyK5du9Ffl+@gmail.com/ Signed-off-by: Maksym Kutsevol Link: https://patch.msgid.link/20241202-netcons-add-udp-send-fail-statistics-to-netconsole-v5-2-70e82239f922@kutsevol.com Signed-off-by: Jakub Kicinski --- Documentation/networking/netconsole.rst | 5 ++- drivers/net/netconsole.c | 60 +++++++++++++++++++++++-- 2 files changed, 60 insertions(+), 5 deletions(-) diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst index d55c2a22ec7a..94c4680fdf3e 100644 --- a/Documentation/networking/netconsole.rst +++ b/Documentation/networking/netconsole.rst @@ -124,7 +124,7 @@ To remove a target:: The interface exposes these parameters of a netconsole target to userspace: - ============== ================================= ============ + =============== ================================= ============ enabled Is this target currently enabled? (read-write) extended Extended mode enabled (read-write) release Prepend kernel release to message (read-write) @@ -135,7 +135,8 @@ The interface exposes these parameters of a netconsole target to userspace: remote_ip Remote agent's IP address (read-write) local_mac Local interface's MAC address (read-only) remote_mac Remote agent's MAC address (read-write) - ============== ================================= ============ + transmit_errors Number of packet send errors (read-only) + =============== ================================= ============ The "enabled" attribute is also used to control whether the parameters of a target can be updated or not -- you can modify the parameters of only diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 4ea44a2f48f7..f422a2f666ef 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -90,6 +91,12 @@ static DEFINE_MUTEX(target_cleanup_list_lock); */ static struct console netconsole_ext; +struct netconsole_target_stats { + u64_stats_t xmit_drop_count; + u64_stats_t enomem_count; + struct u64_stats_sync syncp; +}; + /** * struct netconsole_target - Represents a configured netconsole target. * @list: Links this target into the target_list. @@ -97,6 +104,7 @@ static struct console netconsole_ext; * @userdata_group: Links to the userdata configfs hierarchy * @userdata_complete: Cached, formatted string of append * @userdata_length: String length of userdata_complete + * @stats: Packet send stats for the target. Used for debugging. * @enabled: On / off knob to enable / disable target. * Visible from userspace (read-write). * We maintain a strict 1:1 correspondence between this and @@ -124,6 +132,7 @@ struct netconsole_target { char userdata_complete[MAX_USERDATA_ENTRY_LENGTH * MAX_USERDATA_ITEMS]; size_t userdata_length; #endif + struct netconsole_target_stats stats; bool enabled; bool extended; bool release; @@ -262,6 +271,7 @@ static void netconsole_process_cleanups_core(void) * | remote_ip * | local_mac * | remote_mac + * | transmit_errors * | userdata/ * | / * | value @@ -371,6 +381,21 @@ static ssize_t remote_mac_show(struct config_item *item, char *buf) return sysfs_emit(buf, "%pM\n", to_target(item)->np.remote_mac); } +static ssize_t transmit_errors_show(struct config_item *item, char *buf) +{ + struct netconsole_target *nt = to_target(item); + u64 xmit_drop_count, enomem_count; + unsigned int start; + + do { + start = u64_stats_fetch_begin(&nt->stats.syncp); + xmit_drop_count = u64_stats_read(&nt->stats.xmit_drop_count); + enomem_count = u64_stats_read(&nt->stats.enomem_count); + } while (u64_stats_fetch_retry(&nt->stats.syncp, start)); + + return sysfs_emit(buf, "%llu\n", xmit_drop_count + enomem_count); +} + /* * This one is special -- targets created through the configfs interface * are not enabled (and the corresponding netpoll activated) by default. @@ -842,6 +867,7 @@ CONFIGFS_ATTR(, remote_ip); CONFIGFS_ATTR_RO(, local_mac); CONFIGFS_ATTR(, remote_mac); CONFIGFS_ATTR(, release); +CONFIGFS_ATTR_RO(, transmit_errors); static struct configfs_attribute *netconsole_target_attrs[] = { &attr_enabled, @@ -854,6 +880,7 @@ static struct configfs_attribute *netconsole_target_attrs[] = { &attr_remote_ip, &attr_local_mac, &attr_remote_mac, + &attr_transmit_errors, NULL, }; @@ -1058,6 +1085,33 @@ static struct notifier_block netconsole_netdev_notifier = { .notifier_call = netconsole_netdev_event, }; +/** + * send_udp - Wrapper for netpoll_send_udp that counts errors + * @nt: target to send message to + * @msg: message to send + * @len: length of message + * + * Calls netpoll_send_udp and classifies the return value. If an error + * occurred it increments statistics in nt->stats accordingly. + * Only calls netpoll_send_udp if CONFIG_NETCONSOLE_DYNAMIC is disabled. + */ +static void send_udp(struct netconsole_target *nt, const char *msg, int len) +{ + int result = netpoll_send_udp(&nt->np, msg, len); + + if (IS_ENABLED(CONFIG_NETCONSOLE_DYNAMIC)) { + if (result == NET_XMIT_DROP) { + u64_stats_update_begin(&nt->stats.syncp); + u64_stats_inc(&nt->stats.xmit_drop_count); + u64_stats_update_end(&nt->stats.syncp); + } else if (result == -ENOMEM) { + u64_stats_update_begin(&nt->stats.syncp); + u64_stats_inc(&nt->stats.enomem_count); + u64_stats_update_end(&nt->stats.syncp); + } + } +} + static void send_msg_no_fragmentation(struct netconsole_target *nt, const char *msg, int msg_len, @@ -1085,7 +1139,7 @@ static void send_msg_no_fragmentation(struct netconsole_target *nt, MAX_PRINT_CHUNK - msg_len, "%s", userdata); - netpoll_send_udp(&nt->np, buf, msg_len); + send_udp(nt, buf, msg_len); } static void append_release(char *buf) @@ -1178,7 +1232,7 @@ static void send_fragmented_body(struct netconsole_target *nt, char *buf, this_offset += this_chunk; } - netpoll_send_udp(&nt->np, buf, this_header + this_offset); + send_udp(nt, buf, this_header + this_offset); offset += this_offset; } } @@ -1288,7 +1342,7 @@ static void write_msg(struct console *con, const char *msg, unsigned int len) tmp = msg; for (left = len; left;) { frag = min(left, MAX_PRINT_CHUNK); - netpoll_send_udp(&nt->np, tmp, frag); + send_udp(nt, tmp, frag); tmp += frag; left -= frag; } From 4485043a9bf83d1a03a62b460ef9d6eec643b0ad Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Tue, 3 Dec 2024 18:31:46 +0800 Subject: [PATCH 0098/1386] rtase: Add support for RTL907XD-VA PCIe port 1. Add RTL907XD-VA hardware version id. 2. Add the reported speed for RTL907XD-VA. Signed-off-by: Justin Lai Link: https://patch.msgid.link/20241203103146.734516-1-justinlai0215@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/rtase/rtase.h | 1 + drivers/net/ethernet/realtek/rtase/rtase_main.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/net/ethernet/realtek/rtase/rtase.h b/drivers/net/ethernet/realtek/rtase/rtase.h index dbc3f92eebc4..2bbfcad613ab 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase.h +++ b/drivers/net/ethernet/realtek/rtase/rtase.h @@ -13,6 +13,7 @@ #define RTASE_HW_VER_906X_7XA 0x00800000 #define RTASE_HW_VER_906X_7XC 0x04000000 #define RTASE_HW_VER_907XD_V1 0x04800000 +#define RTASE_HW_VER_907XD_VA 0x08000000 #define RTASE_RX_DMA_BURST_256 4 #define RTASE_TX_DMA_BURST_UNLIMITED 7 diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index de7f11232593..6106aa5333bc 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -1725,6 +1725,7 @@ static int rtase_get_settings(struct net_device *dev, cmd->base.speed = SPEED_5000; break; case RTASE_HW_VER_907XD_V1: + case RTASE_HW_VER_907XD_VA: cmd->base.speed = SPEED_10000; break; } @@ -1993,6 +1994,7 @@ static int rtase_check_mac_version_valid(struct rtase_private *tp) case RTASE_HW_VER_906X_7XA: case RTASE_HW_VER_906X_7XC: case RTASE_HW_VER_907XD_V1: + case RTASE_HW_VER_907XD_VA: ret = 0; break; } From 17ed1911f9c8d4f9af8e13b2c95103ee06dadc0f Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:30:47 +0000 Subject: [PATCH 0099/1386] net: phylink: pass phylink and pcs into phylink_pcs_neg_mode() Move the call to phylink_pcs_neg_mode() in phylink_major_config() after we have selected the appropriate PCS to allow the PCS to be passed in. Add struct phylink and struct phylink_pcs pointers to phylink_pcs_neg_mode() and pass in the appropriate structures. Set pl->pcs_neg_mode before returning, and remove the return value. This will allow the capabilities of the PCS and any PHY to be used when deciding which pcs_neg_mode should be used. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tIUrP-006ITh-6u@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 30a654e98352..daee679f33b3 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -1085,7 +1085,8 @@ static void phylink_pcs_an_restart(struct phylink *pl) /** * phylink_pcs_neg_mode() - helper to determine PCS inband mode - * @mode: one of %MLO_AN_FIXED, %MLO_AN_PHY, %MLO_AN_INBAND. + * @pl: a pointer to a &struct phylink returned from phylink_create() + * @pcs: a pointer to &struct phylink_pcs * @interface: interface mode to be used * @advertising: adertisement ethtool link mode mask * @@ -1102,11 +1103,13 @@ static void phylink_pcs_an_restart(struct phylink *pl) * Note: this is for cases where the PCS itself is involved in negotiation * (e.g. Clause 37, SGMII and similar) not Clause 73. */ -static unsigned int phylink_pcs_neg_mode(unsigned int mode, - phy_interface_t interface, - const unsigned long *advertising) +static void phylink_pcs_neg_mode(struct phylink *pl, struct phylink_pcs *pcs, + phy_interface_t interface, + const unsigned long *advertising) { - unsigned int neg_mode; + unsigned int neg_mode, mode; + + mode = pl->cur_link_an_mode; switch (interface) { case PHY_INTERFACE_MODE_SGMII: @@ -1147,7 +1150,7 @@ static unsigned int phylink_pcs_neg_mode(unsigned int mode, break; } - return neg_mode; + pl->pcs_neg_mode = neg_mode; } static void phylink_major_config(struct phylink *pl, bool restart, @@ -1161,10 +1164,6 @@ static void phylink_major_config(struct phylink *pl, bool restart, phylink_dbg(pl, "major config %s\n", phy_modes(state->interface)); - pl->pcs_neg_mode = phylink_pcs_neg_mode(pl->cur_link_an_mode, - state->interface, - state->advertising); - if (pl->mac_ops->mac_select_pcs) { pcs = pl->mac_ops->mac_select_pcs(pl->config, state->interface); if (IS_ERR(pcs)) { @@ -1177,6 +1176,8 @@ static void phylink_major_config(struct phylink *pl, bool restart, pcs_changed = pl->pcs != pcs; } + phylink_pcs_neg_mode(pl, pcs, state->interface, state->advertising); + phylink_pcs_poll_stop(pl); if (pl->mac_ops->mac_prepare) { @@ -1267,9 +1268,8 @@ static int phylink_change_inband_advert(struct phylink *pl) pl->link_config.pause); /* Recompute the PCS neg mode */ - pl->pcs_neg_mode = phylink_pcs_neg_mode(pl->cur_link_an_mode, - pl->link_config.interface, - pl->link_config.advertising); + phylink_pcs_neg_mode(pl, pl->pcs, pl->link_config.interface, + pl->link_config.advertising); neg_mode = pl->cur_link_an_mode; if (pl->pcs->neg_mode) From 1f92ead7e15003f632b5f138e8138095e0997d3d Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:30:52 +0000 Subject: [PATCH 0100/1386] net: phylink: split cur_link_an_mode into requested and active There is an interdependence between the current link_an_mode and pcs_neg_mode that some drivers rely upon to know whether inband or PHY mode will be used. In order to support detection of PCS and PHY inband capabilities resulting in automatic selection of inband or PHY mode, we need to cater for this, and support changing the MAC link_an_mode. However, we end up with an inter-dependency between the current link_an_mode and pcs_neg_mode. To solve this, split the current link_an_mode into the requested link_an_mode and active link_an_mode. The requested link_an_mode will always be passed to phylink_pcs_neg_mode(), and the active link_an_mode will be used for everything else, and only updated during phylink_major_config(). This will ensure that phylink_pcs_neg_mode()'s link_an_mode will not depend on the active link_an_mode that will, in a future patch, depend on pcs_neg_mode. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tIUrU-006ITn-Ai@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 60 ++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index daee679f33b3..098021f1ab49 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -56,7 +56,8 @@ struct phylink { struct phy_device *phydev; phy_interface_t link_interface; /* PHY_INTERFACE_xxx */ u8 cfg_link_an_mode; /* MLO_AN_xxx */ - u8 cur_link_an_mode; + u8 req_link_an_mode; /* Requested MLO_AN_xxx mode */ + u8 act_link_an_mode; /* Active MLO_AN_xxx mode */ u8 link_port; /* The current non-phy ethtool port */ __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); @@ -1065,13 +1066,13 @@ static void phylink_mac_config(struct phylink *pl, phylink_dbg(pl, "%s: mode=%s/%s/%s adv=%*pb pause=%02x\n", - __func__, phylink_an_mode_str(pl->cur_link_an_mode), + __func__, phylink_an_mode_str(pl->act_link_an_mode), phy_modes(st.interface), phy_rate_matching_to_str(st.rate_matching), __ETHTOOL_LINK_MODE_MASK_NBITS, st.advertising, st.pause); - pl->mac_ops->mac_config(pl->config, pl->cur_link_an_mode, &st); + pl->mac_ops->mac_config(pl->config, pl->act_link_an_mode, &st); } static void phylink_pcs_an_restart(struct phylink *pl) @@ -1079,7 +1080,7 @@ static void phylink_pcs_an_restart(struct phylink *pl) if (pl->pcs && linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, pl->link_config.advertising) && phy_interface_mode_is_8023z(pl->link_config.interface) && - phylink_autoneg_inband(pl->cur_link_an_mode)) + phylink_autoneg_inband(pl->act_link_an_mode)) pl->pcs->ops->pcs_an_restart(pl->pcs); } @@ -1109,7 +1110,7 @@ static void phylink_pcs_neg_mode(struct phylink *pl, struct phylink_pcs *pcs, { unsigned int neg_mode, mode; - mode = pl->cur_link_an_mode; + mode = pl->req_link_an_mode; switch (interface) { case PHY_INTERFACE_MODE_SGMII: @@ -1151,6 +1152,7 @@ static void phylink_pcs_neg_mode(struct phylink *pl, struct phylink_pcs *pcs, } pl->pcs_neg_mode = neg_mode; + pl->act_link_an_mode = mode; } static void phylink_major_config(struct phylink *pl, bool restart, @@ -1181,7 +1183,7 @@ static void phylink_major_config(struct phylink *pl, bool restart, phylink_pcs_poll_stop(pl); if (pl->mac_ops->mac_prepare) { - err = pl->mac_ops->mac_prepare(pl->config, pl->cur_link_an_mode, + err = pl->mac_ops->mac_prepare(pl->config, pl->act_link_an_mode, state->interface); if (err < 0) { phylink_err(pl, "mac_prepare failed: %pe\n", @@ -1215,7 +1217,7 @@ static void phylink_major_config(struct phylink *pl, bool restart, if (pl->pcs_state == PCS_STATE_STARTING || pcs_changed) phylink_pcs_enable(pl->pcs); - neg_mode = pl->cur_link_an_mode; + neg_mode = pl->act_link_an_mode; if (pl->pcs && pl->pcs->neg_mode) neg_mode = pl->pcs_neg_mode; @@ -1231,7 +1233,7 @@ static void phylink_major_config(struct phylink *pl, bool restart, phylink_pcs_an_restart(pl); if (pl->mac_ops->mac_finish) { - err = pl->mac_ops->mac_finish(pl->config, pl->cur_link_an_mode, + err = pl->mac_ops->mac_finish(pl->config, pl->act_link_an_mode, state->interface); if (err < 0) phylink_err(pl, "mac_finish failed: %pe\n", @@ -1262,7 +1264,7 @@ static int phylink_change_inband_advert(struct phylink *pl) return 0; phylink_dbg(pl, "%s: mode=%s/%s adv=%*pb pause=%02x\n", __func__, - phylink_an_mode_str(pl->cur_link_an_mode), + phylink_an_mode_str(pl->req_link_an_mode), phy_modes(pl->link_config.interface), __ETHTOOL_LINK_MODE_MASK_NBITS, pl->link_config.advertising, pl->link_config.pause); @@ -1271,7 +1273,7 @@ static int phylink_change_inband_advert(struct phylink *pl) phylink_pcs_neg_mode(pl, pl->pcs, pl->link_config.interface, pl->link_config.advertising); - neg_mode = pl->cur_link_an_mode; + neg_mode = pl->act_link_an_mode; if (pl->pcs->neg_mode) neg_mode = pl->pcs_neg_mode; @@ -1336,7 +1338,7 @@ static void phylink_mac_initial_config(struct phylink *pl, bool force_restart) { struct phylink_link_state link_state; - switch (pl->cur_link_an_mode) { + switch (pl->req_link_an_mode) { case MLO_AN_PHY: link_state = pl->phy_state; break; @@ -1410,14 +1412,14 @@ static void phylink_link_up(struct phylink *pl, pl->cur_interface = link_state.interface; - neg_mode = pl->cur_link_an_mode; + neg_mode = pl->act_link_an_mode; if (pl->pcs && pl->pcs->neg_mode) neg_mode = pl->pcs_neg_mode; phylink_pcs_link_up(pl->pcs, neg_mode, pl->cur_interface, speed, duplex); - pl->mac_ops->mac_link_up(pl->config, pl->phydev, pl->cur_link_an_mode, + pl->mac_ops->mac_link_up(pl->config, pl->phydev, pl->act_link_an_mode, pl->cur_interface, speed, duplex, !!(link_state.pause & MLO_PAUSE_TX), rx_pause); @@ -1437,7 +1439,7 @@ static void phylink_link_down(struct phylink *pl) if (ndev) netif_carrier_off(ndev); - pl->mac_ops->mac_link_down(pl->config, pl->cur_link_an_mode, + pl->mac_ops->mac_link_down(pl->config, pl->act_link_an_mode, pl->cur_interface); phylink_info(pl, "Link is Down\n"); } @@ -1463,10 +1465,10 @@ static void phylink_resolve(struct work_struct *w) } else if (pl->link_failed) { link_state.link = false; retrigger = true; - } else if (pl->cur_link_an_mode == MLO_AN_FIXED) { + } else if (pl->act_link_an_mode == MLO_AN_FIXED) { phylink_get_fixed_state(pl, &link_state); mac_config = link_state.link; - } else if (pl->cur_link_an_mode == MLO_AN_PHY) { + } else if (pl->act_link_an_mode == MLO_AN_PHY) { link_state = pl->phy_state; mac_config = link_state.link; } else { @@ -1520,7 +1522,7 @@ static void phylink_resolve(struct work_struct *w) } } - if (pl->cur_link_an_mode != MLO_AN_FIXED) + if (pl->act_link_an_mode != MLO_AN_FIXED) phylink_apply_manual_flow(pl, &link_state); if (mac_config) { @@ -1644,7 +1646,7 @@ int phylink_set_fixed_link(struct phylink *pl, pl->link_config.an_complete = 1; pl->cfg_link_an_mode = MLO_AN_FIXED; - pl->cur_link_an_mode = pl->cfg_link_an_mode; + pl->req_link_an_mode = pl->cfg_link_an_mode; return 0; } @@ -1732,7 +1734,7 @@ struct phylink *phylink_create(struct phylink_config *config, } } - pl->cur_link_an_mode = pl->cfg_link_an_mode; + pl->req_link_an_mode = pl->cfg_link_an_mode; ret = phylink_register_sfp(pl, fwnode); if (ret < 0) { @@ -2189,7 +2191,7 @@ void phylink_start(struct phylink *pl) ASSERT_RTNL(); phylink_info(pl, "configuring for %s/%s link mode\n", - phylink_an_mode_str(pl->cur_link_an_mode), + phylink_an_mode_str(pl->req_link_an_mode), phy_modes(pl->link_config.interface)); /* Always set the carrier off */ @@ -2474,7 +2476,7 @@ int phylink_ethtool_ksettings_get(struct phylink *pl, linkmode_copy(kset->link_modes.supported, pl->supported); - switch (pl->cur_link_an_mode) { + switch (pl->act_link_an_mode) { case MLO_AN_FIXED: /* We are using fixed settings. Report these as the * current link settings - and note that these also @@ -2566,7 +2568,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, /* If we have a fixed link, refuse to change link parameters. * If the link parameters match, accept them but do nothing. */ - if (pl->cur_link_an_mode == MLO_AN_FIXED) { + if (pl->req_link_an_mode == MLO_AN_FIXED) { if (s->speed != pl->link_config.speed || s->duplex != pl->link_config.duplex) return -EINVAL; @@ -2582,7 +2584,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, * is our default case) but do not allow the advertisement to * be changed. If the advertisement matches, simply return. */ - if (pl->cur_link_an_mode == MLO_AN_FIXED) { + if (pl->req_link_an_mode == MLO_AN_FIXED) { if (!linkmode_equal(config.advertising, pl->link_config.advertising)) return -EINVAL; @@ -2617,7 +2619,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, linkmode_copy(support, pl->supported); if (phylink_validate(pl, support, &config)) { phylink_err(pl, "validation of %s/%s with support %*pb failed\n", - phylink_an_mode_str(pl->cur_link_an_mode), + phylink_an_mode_str(pl->req_link_an_mode), phy_modes(config.interface), __ETHTOOL_LINK_MODE_MASK_NBITS, support); return -EINVAL; @@ -2717,7 +2719,7 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl, ASSERT_RTNL(); - if (pl->cur_link_an_mode == MLO_AN_FIXED) + if (pl->req_link_an_mode == MLO_AN_FIXED) return -EOPNOTSUPP; if (!phylink_test(pl->supported, Pause) && @@ -2981,7 +2983,7 @@ static int phylink_mii_read(struct phylink *pl, unsigned int phy_id, struct phylink_link_state state; int val = 0xffff; - switch (pl->cur_link_an_mode) { + switch (pl->act_link_an_mode) { case MLO_AN_FIXED: if (phy_id == 0) { phylink_get_fixed_state(pl, &state); @@ -3006,7 +3008,7 @@ static int phylink_mii_read(struct phylink *pl, unsigned int phy_id, static int phylink_mii_write(struct phylink *pl, unsigned int phy_id, unsigned int reg, unsigned int val) { - switch (pl->cur_link_an_mode) { + switch (pl->act_link_an_mode) { case MLO_AN_FIXED: break; @@ -3196,9 +3198,9 @@ static void phylink_sfp_set_config(struct phylink *pl, u8 mode, changed = true; } - if (pl->cur_link_an_mode != mode || + if (pl->req_link_an_mode != mode || pl->link_config.interface != state->interface) { - pl->cur_link_an_mode = mode; + pl->req_link_an_mode = mode; pl->link_config.interface = state->interface; changed = true; From 4e7d000286fe8e12f2d88032711ffab3ab658b12 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:30:57 +0000 Subject: [PATCH 0101/1386] net: phylink: add debug for phylink_major_config() Now that we have a more complexity in phylink_major_config(), augment the debugging so we can see what's going on there. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tIUrZ-006ITt-Fa@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 098021f1ab49..fda53dd58285 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -175,6 +175,24 @@ static const char *phylink_an_mode_str(unsigned int mode) return mode < ARRAY_SIZE(modestr) ? modestr[mode] : "unknown"; } +static const char *phylink_pcs_mode_str(unsigned int mode) +{ + if (!mode) + return "none"; + + if (mode & PHYLINK_PCS_NEG_OUTBAND) + return "outband"; + + if (mode & PHYLINK_PCS_NEG_INBAND) { + if (mode & PHYLINK_PCS_NEG_ENABLED) + return "inband,an-enabled"; + else + return "inband,an-disabled"; + } + + return "unknown"; +} + static unsigned int phylink_interface_signal_rate(phy_interface_t interface) { switch (interface) { @@ -1164,7 +1182,9 @@ static void phylink_major_config(struct phylink *pl, bool restart, unsigned int neg_mode; int err; - phylink_dbg(pl, "major config %s\n", phy_modes(state->interface)); + phylink_dbg(pl, "major config, requested %s/%s\n", + phylink_an_mode_str(pl->req_link_an_mode), + phy_modes(state->interface)); if (pl->mac_ops->mac_select_pcs) { pcs = pl->mac_ops->mac_select_pcs(pl->config, state->interface); @@ -1180,6 +1200,11 @@ static void phylink_major_config(struct phylink *pl, bool restart, phylink_pcs_neg_mode(pl, pcs, state->interface, state->advertising); + phylink_dbg(pl, "major config, active %s/%s/%s\n", + phylink_an_mode_str(pl->act_link_an_mode), + phylink_pcs_mode_str(pl->pcs_neg_mode), + phy_modes(state->interface)); + phylink_pcs_poll_stop(pl); if (pl->mac_ops->mac_prepare) { From b4c7698dd95f253c6958d8c6ac219098009bf28a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:31:02 +0000 Subject: [PATCH 0102/1386] net: phy: add phy_inband_caps() Add a method to query the PHY's in-band capabilities for a PHY interface mode. Where the interface mode does not have in-band capability, or the PHY driver has not been updated to return this information, then phy_inband_caps() should return zero. Otherwise, PHY drivers will return a value consisting of the following flags: LINK_INBAND_DISABLE indicates that the hardware does not support in-band signalling, or can have in-band signalling configured via software to be disabled. LINK_INBAND_ENABLE indicates that the hardware will use in-band signalling, or can have in-band signalling configured via software to be enabled. LINK_INBAND_BYPASS indicates that the hardware has the ability to bypass in-band signalling when enabled after a timeout if the link partner does not respond to its in-band signalling. This reports the PHY capabilities for the particular interface mode, not the current configuration. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tIUre-006ITz-KF@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy.c | 21 +++++++++++++++++++++ include/linux/phy.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 0d20b534122b..f42cd6584841 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1005,6 +1005,27 @@ static int phy_check_link_status(struct phy_device *phydev) return 0; } +/** + * phy_inband_caps - query which in-band signalling modes are supported + * @phydev: a pointer to a &struct phy_device + * @interface: the interface mode for the PHY + * + * Returns zero if it is unknown what in-band signalling is supported by the + * PHY (e.g. because the PHY driver doesn't implement the method.) Otherwise, + * returns a bit mask of the LINK_INBAND_* values from + * &enum link_inband_signalling to describe which inband modes are supported + * by the PHY for this interface mode. + */ +unsigned int phy_inband_caps(struct phy_device *phydev, + phy_interface_t interface) +{ + if (phydev->drv && phydev->drv->inband_caps) + return phydev->drv->inband_caps(phydev, interface); + + return 0; +} +EXPORT_SYMBOL_GPL(phy_inband_caps); + /** * _phy_start_aneg - start auto-negotiation for this PHY device * @phydev: the phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index 563c46205685..ccb93d892da9 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -817,6 +817,24 @@ struct phy_tdr_config { }; #define PHY_PAIR_ALL -1 +/** + * enum link_inband_signalling - in-band signalling modes that are supported + * + * @LINK_INBAND_DISABLE: in-band signalling can be disabled + * @LINK_INBAND_ENABLE: in-band signalling can be enabled without bypass + * @LINK_INBAND_BYPASS: in-band signalling can be enabled with bypass + * + * The possible and required bits can only be used if the valid bit is set. + * If possible is clear, that means inband signalling can not be used. + * Required is only valid when possible is set, and means that inband + * signalling must be used. + */ +enum link_inband_signalling { + LINK_INBAND_DISABLE = BIT(0), + LINK_INBAND_ENABLE = BIT(1), + LINK_INBAND_BYPASS = BIT(2), +}; + /** * struct phy_plca_cfg - Configuration of the PLCA (Physical Layer Collision * Avoidance) Reconciliation Sublayer. @@ -956,6 +974,14 @@ struct phy_driver { */ int (*get_features)(struct phy_device *phydev); + /** + * @inband_caps: query whether in-band is supported for the given PHY + * interface mode. Returns a bitmask of bits defined by enum + * link_inband_signalling. + */ + unsigned int (*inband_caps)(struct phy_device *phydev, + phy_interface_t interface); + /** * @get_rate_matching: Get the supported type of rate matching for a * particular phy interface. This is used by phy consumers to determine @@ -1818,6 +1844,8 @@ int phy_config_aneg(struct phy_device *phydev); int _phy_start_aneg(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); +unsigned int phy_inband_caps(struct phy_device *phydev, + phy_interface_t interface); int phy_speed_down(struct phy_device *phydev, bool sync); int phy_speed_up(struct phy_device *phydev); bool phy_check_valid(int speed, int duplex, unsigned long *features); From c64c7fa0a774d9da72071a8517e359992baac982 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:31:07 +0000 Subject: [PATCH 0103/1386] net: phy: bcm84881: implement phy_inband_caps() method BCM84881 has no support for inband signalling, so this is a trivial implementation that returns no support for inband. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Acked-by: Florian Fainelli Link: https://patch.msgid.link/E1tIUrj-006IU6-ON@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/bcm84881.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/phy/bcm84881.c b/drivers/net/phy/bcm84881.c index 97da3aee4942..47405bded677 100644 --- a/drivers/net/phy/bcm84881.c +++ b/drivers/net/phy/bcm84881.c @@ -235,11 +235,21 @@ static int bcm84881_read_status(struct phy_device *phydev) return genphy_c45_read_mdix(phydev); } +/* The Broadcom BCM84881 in the Methode DM7052 is unable to provide a SGMII + * or 802.3z control word, so inband will not work. + */ +static unsigned int bcm84881_inband_caps(struct phy_device *phydev, + phy_interface_t interface) +{ + return LINK_INBAND_DISABLE; +} + static struct phy_driver bcm84881_drivers[] = { { .phy_id = 0xae025150, .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM84881", + .inband_caps = bcm84881_inband_caps, .config_init = bcm84881_config_init, .probe = bcm84881_probe, .get_features = bcm84881_get_features, From 1c86828dff88e28b8ade6bddeee0163a023faf91 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:31:12 +0000 Subject: [PATCH 0104/1386] net: phy: marvell: implement phy_inband_caps() method Provide an implementation for phy_inband_caps() for Marvell PHYs used on SFP modules, so that phylink knows the PHYs capabilities. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tIUro-006IUC-Rq@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/marvell.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index cd50cd6a7f75..3075ebc3f964 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -717,6 +717,20 @@ static int marvell_config_aneg_fiber(struct phy_device *phydev) return genphy_check_and_restart_aneg(phydev, changed); } +static unsigned int m88e1111_inband_caps(struct phy_device *phydev, + phy_interface_t interface) +{ + /* In 1000base-X and SGMII modes, the inband mode can be changed + * through the Fibre page BMCR ANENABLE bit. + */ + if (interface == PHY_INTERFACE_MODE_1000BASEX || + interface == PHY_INTERFACE_MODE_SGMII) + return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE | + LINK_INBAND_BYPASS; + + return 0; +} + static int m88e1111_config_aneg(struct phy_device *phydev) { int extsr = phy_read(phydev, MII_M1111_PHY_EXT_SR); @@ -3677,6 +3691,7 @@ static struct phy_driver marvell_drivers[] = { .name = "Marvell 88E1112", /* PHY_GBIT_FEATURES */ .probe = marvell_probe, + .inband_caps = m88e1111_inband_caps, .config_init = m88e1112_config_init, .config_aneg = marvell_config_aneg, .config_intr = marvell_config_intr, @@ -3698,6 +3713,7 @@ static struct phy_driver marvell_drivers[] = { /* PHY_GBIT_FEATURES */ .flags = PHY_POLL_CABLE_TEST, .probe = marvell_probe, + .inband_caps = m88e1111_inband_caps, .config_init = m88e1111gbe_config_init, .config_aneg = m88e1111_config_aneg, .read_status = marvell_read_status, @@ -3721,6 +3737,7 @@ static struct phy_driver marvell_drivers[] = { .name = "Marvell 88E1111 (Finisar)", /* PHY_GBIT_FEATURES */ .probe = marvell_probe, + .inband_caps = m88e1111_inband_caps, .config_init = m88e1111gbe_config_init, .config_aneg = m88e1111_config_aneg, .read_status = marvell_read_status, From 5d58a890c02770ba8d790b1f3c6e8c0e20514dc2 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:31:18 +0000 Subject: [PATCH 0105/1386] net: phy: add phy_config_inband() Add a method to configure the PHY's in-band mode. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tIUru-006IUI-08@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy.c | 32 ++++++++++++++++++++++++++++++++ include/linux/phy.h | 6 ++++++ 2 files changed, 38 insertions(+) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index f42cd6584841..0c228aa18019 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1026,6 +1026,38 @@ unsigned int phy_inband_caps(struct phy_device *phydev, } EXPORT_SYMBOL_GPL(phy_inband_caps); +/** + * phy_config_inband - configure the desired PHY in-band mode + * @phydev: the phy_device struct + * @modes: in-band modes to configure + * + * Description: disables, enables or enables-with-bypass in-band signalling + * between the PHY and host system. + * + * Returns: zero on success, or negative errno value. + */ +int phy_config_inband(struct phy_device *phydev, unsigned int modes) +{ + int err; + + if (!!(modes & LINK_INBAND_DISABLE) + + !!(modes & LINK_INBAND_ENABLE) + + !!(modes & LINK_INBAND_BYPASS) != 1) + return -EINVAL; + + mutex_lock(&phydev->lock); + if (!phydev->drv) + err = -EIO; + else if (!phydev->drv->config_inband) + err = -EOPNOTSUPP; + else + err = phydev->drv->config_inband(phydev, modes); + mutex_unlock(&phydev->lock); + + return err; +} +EXPORT_SYMBOL(phy_config_inband); + /** * _phy_start_aneg - start auto-negotiation for this PHY device * @phydev: the phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index ccb93d892da9..61a1bc81f597 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -982,6 +982,11 @@ struct phy_driver { unsigned int (*inband_caps)(struct phy_device *phydev, phy_interface_t interface); + /** + * @config_inband: configure in-band mode for the PHY + */ + int (*config_inband)(struct phy_device *phydev, unsigned int modes); + /** * @get_rate_matching: Get the supported type of rate matching for a * particular phy interface. This is used by phy consumers to determine @@ -1846,6 +1851,7 @@ int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); unsigned int phy_inband_caps(struct phy_device *phydev, phy_interface_t interface); +int phy_config_inband(struct phy_device *phydev, unsigned int modes); int phy_speed_down(struct phy_device *phydev, bool sync); int phy_speed_up(struct phy_device *phydev); bool phy_check_valid(int speed, int duplex, unsigned long *features); From a219912e0fec73c346e64ef47013cb2e152f88fc Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:31:23 +0000 Subject: [PATCH 0106/1386] net: phy: marvell: implement config_inband() method Implement the config_inband() method for Marvell 88E1112, 88E1111, and Finisar's 88E1111 variant. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tIUrz-006IUO-3r@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/marvell.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 3075ebc3f964..b885bc0fe6e0 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -731,6 +731,34 @@ static unsigned int m88e1111_inband_caps(struct phy_device *phydev, return 0; } +static int m88e1111_config_inband(struct phy_device *phydev, unsigned int modes) +{ + u16 extsr, bmcr; + int err; + + if (phydev->interface != PHY_INTERFACE_MODE_1000BASEX && + phydev->interface != PHY_INTERFACE_MODE_SGMII) + return -EINVAL; + + if (modes == LINK_INBAND_BYPASS) + extsr = MII_M1111_HWCFG_SERIAL_AN_BYPASS; + else + extsr = 0; + + if (modes == LINK_INBAND_DISABLE) + bmcr = 0; + else + bmcr = BMCR_ANENABLE; + + err = phy_modify(phydev, MII_M1111_PHY_EXT_SR, + MII_M1111_HWCFG_SERIAL_AN_BYPASS, extsr); + if (err < 0) + return extsr; + + return phy_modify_paged(phydev, MII_MARVELL_FIBER_PAGE, MII_BMCR, + BMCR_ANENABLE, bmcr); +} + static int m88e1111_config_aneg(struct phy_device *phydev) { int extsr = phy_read(phydev, MII_M1111_PHY_EXT_SR); @@ -3692,6 +3720,7 @@ static struct phy_driver marvell_drivers[] = { /* PHY_GBIT_FEATURES */ .probe = marvell_probe, .inband_caps = m88e1111_inband_caps, + .config_inband = m88e1111_config_inband, .config_init = m88e1112_config_init, .config_aneg = marvell_config_aneg, .config_intr = marvell_config_intr, @@ -3714,6 +3743,7 @@ static struct phy_driver marvell_drivers[] = { .flags = PHY_POLL_CABLE_TEST, .probe = marvell_probe, .inband_caps = m88e1111_inband_caps, + .config_inband = m88e1111_config_inband, .config_init = m88e1111gbe_config_init, .config_aneg = m88e1111_config_aneg, .read_status = marvell_read_status, @@ -3738,6 +3768,7 @@ static struct phy_driver marvell_drivers[] = { /* PHY_GBIT_FEATURES */ .probe = marvell_probe, .inband_caps = m88e1111_inband_caps, + .config_inband = m88e1111_config_inband, .config_init = m88e1111gbe_config_init, .config_aneg = m88e1111_config_aneg, .read_status = marvell_read_status, From df874f9e52c340cc6f0a0014a97b778f67d46849 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:31:28 +0000 Subject: [PATCH 0107/1386] net: phylink: add pcs_inband_caps() method Add a pcs_inband_caps() method to query the PCS for its inband link capabilities, and use this to determine whether link modes used with optical SFPs can be supported. When a PCS does not provide a method, we allow inband negotiation to be either on or off, making this a no-op until the pcs_inband_caps() method is implemented by a PCS driver. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tIUs4-006IUU-7K@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 60 +++++++++++++++++++++++++++++++++++++++ include/linux/phylink.h | 17 +++++++++++ 2 files changed, 77 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index fda53dd58285..42f3c7ccbf38 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -990,6 +990,15 @@ static void phylink_resolve_an_pause(struct phylink_link_state *state) } } +static unsigned int phylink_pcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface) +{ + if (pcs && pcs->ops->pcs_inband_caps) + return pcs->ops->pcs_inband_caps(pcs, interface); + + return 0; +} + static void phylink_pcs_pre_config(struct phylink_pcs *pcs, phy_interface_t interface) { @@ -1043,6 +1052,24 @@ static void phylink_pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, pcs->ops->pcs_link_up(pcs, neg_mode, interface, speed, duplex); } +/* Query inband for a specific interface mode, asking the MAC for the + * PCS which will be used to handle the interface mode. + */ +static unsigned int phylink_inband_caps(struct phylink *pl, + phy_interface_t interface) +{ + struct phylink_pcs *pcs; + + if (!pl->mac_ops->mac_select_pcs) + return 0; + + pcs = pl->mac_ops->mac_select_pcs(pl->config, interface); + if (!pcs) + return 0; + + return phylink_pcs_inband_caps(pcs, interface); +} + static void phylink_pcs_poll_stop(struct phylink *pl) { if (pl->cfg_link_an_mode == MLO_AN_INBAND) @@ -2532,6 +2559,26 @@ int phylink_ethtool_ksettings_get(struct phylink *pl, } EXPORT_SYMBOL_GPL(phylink_ethtool_ksettings_get); +static bool phylink_validate_pcs_inband_autoneg(struct phylink *pl, + phy_interface_t interface, + unsigned long *adv) +{ + unsigned int inband = phylink_inband_caps(pl, interface); + unsigned int mask; + + /* If the PCS doesn't implement inband support, be permissive. */ + if (!inband) + return true; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, adv)) + mask = LINK_INBAND_ENABLE; + else + mask = LINK_INBAND_DISABLE; + + /* Check whether the PCS implements the required mode */ + return !!(inband & mask); +} + /** * phylink_ethtool_ksettings_set() - set the link settings * @pl: a pointer to a &struct phylink returned from phylink_create() @@ -2662,6 +2709,13 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, phylink_is_empty_linkmode(config.advertising)) return -EINVAL; + /* Validate the autonegotiation state. We don't have a PHY in this + * situation, so the PCS is the media-facing entity. + */ + if (!phylink_validate_pcs_inband_autoneg(pl, config.interface, + config.advertising)) + return -EINVAL; + mutex_lock(&pl->state_mutex); pl->link_config.speed = config.speed; pl->link_config.duplex = config.duplex; @@ -3341,6 +3395,12 @@ static int phylink_sfp_config_optical(struct phylink *pl) phylink_dbg(pl, "optical SFP: chosen %s interface\n", phy_modes(interface)); + if (!phylink_validate_pcs_inband_autoneg(pl, interface, + config.advertising)) { + phylink_err(pl, "autoneg setting not compatible with PCS"); + return -EINVAL; + } + config.interface = interface; /* Ignore errors if we're expecting a PHY to attach later */ diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 5c01048860c4..5462cc6a37dc 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -419,6 +419,7 @@ struct phylink_pcs { /** * struct phylink_pcs_ops - MAC PCS operations structure. * @pcs_validate: validate the link configuration. + * @pcs_inband_caps: query inband support for interface mode. * @pcs_enable: enable the PCS. * @pcs_disable: disable the PCS. * @pcs_pre_config: pre-mac_config method (for errata) @@ -434,6 +435,8 @@ struct phylink_pcs { struct phylink_pcs_ops { int (*pcs_validate)(struct phylink_pcs *pcs, unsigned long *supported, const struct phylink_link_state *state); + unsigned int (*pcs_inband_caps)(struct phylink_pcs *pcs, + phy_interface_t interface); int (*pcs_enable)(struct phylink_pcs *pcs); void (*pcs_disable)(struct phylink_pcs *pcs); void (*pcs_pre_config)(struct phylink_pcs *pcs, @@ -470,6 +473,20 @@ struct phylink_pcs_ops { int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported, const struct phylink_link_state *state); +/** + * pcs_inband_caps - query PCS in-band capabilities for interface mode. + * @pcs: a pointer to a &struct phylink_pcs. + * @interface: interface mode to be queried + * + * Returns zero if it is unknown what in-band signalling is supported by the + * PHY (e.g. because the PHY driver doesn't implement the method.) Otherwise, + * returns a bit mask of the LINK_INBAND_* values from + * &enum link_inband_signalling to describe which inband modes are supported + * for this interface mode. + */ +unsigned int pcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface); + /** * pcs_enable() - enable the PCS. * @pcs: a pointer to a &struct phylink_pcs. From 513e8fb8fa32035b3325e2e14fb9598f8cb545e9 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:31:33 +0000 Subject: [PATCH 0108/1386] net: mvneta: implement pcs_inband_caps() method Report the PCS in-band capabilities to phylink for Marvell NETA interfaces. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tIUs9-006IUb-Au@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/mvneta.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 1fb285fa0bdb..fe6261b81540 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3960,20 +3960,27 @@ static struct mvneta_port *mvneta_pcs_to_port(struct phylink_pcs *pcs) return container_of(pcs, struct mvneta_port, phylink_pcs); } -static int mvneta_pcs_validate(struct phylink_pcs *pcs, - unsigned long *supported, - const struct phylink_link_state *state) +static unsigned int mvneta_pcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface) { - /* We only support QSGMII, SGMII, 802.3z and RGMII modes. - * When in 802.3z mode, we must have AN enabled: + /* When operating in an 802.3z mode, we must have AN enabled: * "Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ... * When = 1 (1000BASE-X) this field must be set to 1." + * Therefore, inband is "required". */ - if (phy_interface_mode_is_8023z(state->interface) && - !phylink_test(state->advertising, Autoneg)) - return -EINVAL; + if (phy_interface_mode_is_8023z(interface)) + return LINK_INBAND_ENABLE; - return 0; + /* QSGMII, SGMII and RGMII can be configured to use inband + * signalling of the AN result. Indicate these as "possible". + */ + if (interface == PHY_INTERFACE_MODE_SGMII || + interface == PHY_INTERFACE_MODE_QSGMII || + phy_interface_mode_is_rgmii(interface)) + return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE; + + /* For any other modes, indicate that inband is not supported. */ + return LINK_INBAND_DISABLE; } static void mvneta_pcs_get_state(struct phylink_pcs *pcs, @@ -4071,7 +4078,7 @@ static void mvneta_pcs_an_restart(struct phylink_pcs *pcs) } static const struct phylink_pcs_ops mvneta_phylink_pcs_ops = { - .pcs_validate = mvneta_pcs_validate, + .pcs_inband_caps = mvneta_pcs_inband_caps, .pcs_get_state = mvneta_pcs_get_state, .pcs_config = mvneta_pcs_config, .pcs_an_restart = mvneta_pcs_an_restart, From d4169f0c7665afb8d8adb5e1b1df3db88517d0ad Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:31:38 +0000 Subject: [PATCH 0109/1386] net: mvpp2: implement pcs_inband_caps() method Report the PCS in-band capabilities to phylink for Marvell PP2 interfaces. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tIUsE-006IUh-E7@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- .../net/ethernet/marvell/mvpp2/mvpp2_main.c | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 571631a30320..f85229a30844 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -6224,19 +6224,26 @@ static const struct phylink_pcs_ops mvpp2_phylink_xlg_pcs_ops = { .pcs_config = mvpp2_xlg_pcs_config, }; -static int mvpp2_gmac_pcs_validate(struct phylink_pcs *pcs, - unsigned long *supported, - const struct phylink_link_state *state) +static unsigned int mvpp2_gmac_pcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface) { - /* When in 802.3z mode, we must have AN enabled: + /* When operating in an 802.3z mode, we must have AN enabled: * Bit 2 Field InBandAnEn In-band Auto-Negotiation enable. ... * When = 1 (1000BASE-X) this field must be set to 1. + * Therefore, inband is "required". */ - if (phy_interface_mode_is_8023z(state->interface) && - !phylink_test(state->advertising, Autoneg)) - return -EINVAL; + if (phy_interface_mode_is_8023z(interface)) + return LINK_INBAND_ENABLE; - return 0; + /* SGMII and RGMII can be configured to use inband signalling of the + * AN result. Indicate these as "possible". + */ + if (interface == PHY_INTERFACE_MODE_SGMII || + phy_interface_mode_is_rgmii(interface)) + return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE; + + /* For any other modes, indicate that inband is not supported. */ + return LINK_INBAND_DISABLE; } static void mvpp2_gmac_pcs_get_state(struct phylink_pcs *pcs, @@ -6343,7 +6350,7 @@ static void mvpp2_gmac_pcs_an_restart(struct phylink_pcs *pcs) } static const struct phylink_pcs_ops mvpp2_phylink_gmac_pcs_ops = { - .pcs_validate = mvpp2_gmac_pcs_validate, + .pcs_inband_caps = mvpp2_gmac_pcs_inband_caps, .pcs_get_state = mvpp2_gmac_pcs_get_state, .pcs_config = mvpp2_gmac_pcs_config, .pcs_an_restart = mvpp2_gmac_pcs_an_restart, From 5fd0f1a02e750e2db4038dee60edea669ce5aab1 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:31:43 +0000 Subject: [PATCH 0110/1386] net: phylink: add negotiation of in-band capabilities Support for in-band signalling with Serdes links is uncertain. Some PHYs do not support in-band for e.g. SGMII. Some PCS do not support in-band for 2500Base-X. Some PCS require in-band for Base-X protocols. Simply using what is in DT is insufficient when we have hot-pluggable PHYs e.g. in the form of SFP modules, which may not provide the in-band signalling. In order to address this, we have introduced phy_inband_caps() and pcs_inband_caps() functions to allow phylink to retrieve the capabilities from each end of the PCS/PHY link. This commit adds code to resolve whether in-band will be used in the various scenarios that we have: In-band not being used, PHY present using SGMII or Base-X, PHY not present. We also deal with no capabilties provided. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tIUsJ-006IUn-H3@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 154 +++++++++++++++++++++++++++++++++++--- 1 file changed, 144 insertions(+), 10 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 42f3c7ccbf38..b0881fa9c72e 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -75,6 +75,7 @@ struct phylink { struct mutex state_mutex; struct phylink_link_state phy_state; + unsigned int phy_ib_mode; struct work_struct resolve; unsigned int pcs_neg_mode; unsigned int pcs_state; @@ -1153,10 +1154,18 @@ static void phylink_pcs_neg_mode(struct phylink *pl, struct phylink_pcs *pcs, phy_interface_t interface, const unsigned long *advertising) { + unsigned int pcs_ib_caps = 0; + unsigned int phy_ib_caps = 0; unsigned int neg_mode, mode; + enum { + INBAND_CISCO_SGMII, + INBAND_BASEX, + } type; mode = pl->req_link_an_mode; + pl->phy_ib_mode = 0; + switch (interface) { case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_QSGMII: @@ -1168,10 +1177,7 @@ static void phylink_pcs_neg_mode(struct phylink *pl, struct phylink_pcs *pcs, * inband communication. Note: there exist PHYs that run * with SGMII but do not send the inband data. */ - if (!phylink_autoneg_inband(mode)) - neg_mode = PHYLINK_PCS_NEG_OUTBAND; - else - neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED; + type = INBAND_CISCO_SGMII; break; case PHY_INTERFACE_MODE_1000BASEX: @@ -1182,18 +1188,139 @@ static void phylink_pcs_neg_mode(struct phylink *pl, struct phylink_pcs *pcs, * as well, but drivers may not support this, so may * need to override this. */ - if (!phylink_autoneg_inband(mode)) + type = INBAND_BASEX; + break; + + default: + pl->pcs_neg_mode = PHYLINK_PCS_NEG_NONE; + pl->act_link_an_mode = mode; + return; + } + + if (pcs) + pcs_ib_caps = phylink_pcs_inband_caps(pcs, interface); + + if (pl->phydev) + phy_ib_caps = phy_inband_caps(pl->phydev, interface); + + phylink_dbg(pl, "interface %s inband modes: pcs=%02x phy=%02x\n", + phy_modes(interface), pcs_ib_caps, phy_ib_caps); + + if (!phylink_autoneg_inband(mode)) { + bool pcs_ib_only = false; + bool phy_ib_only = false; + + if (pcs_ib_caps && pcs_ib_caps != LINK_INBAND_DISABLE) { + /* PCS supports reporting in-band capabilities, and + * supports more than disable mode. + */ + if (pcs_ib_caps & LINK_INBAND_DISABLE) + neg_mode = PHYLINK_PCS_NEG_OUTBAND; + else if (pcs_ib_caps & LINK_INBAND_ENABLE) + pcs_ib_only = true; + } + + if (phy_ib_caps && phy_ib_caps != LINK_INBAND_DISABLE) { + /* PHY supports in-band capabilities, and supports + * more than disable mode. + */ + if (phy_ib_caps & LINK_INBAND_DISABLE) + pl->phy_ib_mode = LINK_INBAND_DISABLE; + else if (phy_ib_caps & LINK_INBAND_BYPASS) + pl->phy_ib_mode = LINK_INBAND_BYPASS; + else if (phy_ib_caps & LINK_INBAND_ENABLE) + phy_ib_only = true; + } + + /* If either the PCS or PHY requires inband to be enabled, + * this is an invalid configuration. Provide a diagnostic + * message for this case, but don't try to force the issue. + */ + if (pcs_ib_only || phy_ib_only) + phylink_warn(pl, + "firmware wants %s mode, but %s%s%s requires inband\n", + phylink_an_mode_str(mode), + pcs_ib_only ? "PCS" : "", + pcs_ib_only && phy_ib_only ? " and " : "", + phy_ib_only ? "PHY" : ""); + + neg_mode = PHYLINK_PCS_NEG_OUTBAND; + } else if (type == INBAND_CISCO_SGMII || pl->phydev) { + /* For SGMII modes which are designed to be used with PHYs, or + * Base-X with a PHY, we try to use in-band mode where-ever + * possible. However, there are some PHYs e.g. BCM84881 which + * do not support in-band. + */ + const unsigned int inband_ok = LINK_INBAND_ENABLE | + LINK_INBAND_BYPASS; + const unsigned int outband_ok = LINK_INBAND_DISABLE | + LINK_INBAND_BYPASS; + /* PCS PHY + * D E D E + * 0 0 0 0 no information inband enabled + * 1 0 0 0 pcs doesn't support outband + * 0 1 0 0 pcs required inband enabled + * 1 1 0 0 pcs optional inband enabled + * 0 0 1 0 phy doesn't support outband + * 1 0 1 0 pcs+phy doesn't support outband + * 0 1 1 0 pcs required, phy doesn't support, invalid + * 1 1 1 0 pcs optional, phy doesn't support, outband + * 0 0 0 1 phy required inband enabled + * 1 0 0 1 pcs doesn't support, phy required, invalid + * 0 1 0 1 pcs+phy required inband enabled + * 1 1 0 1 pcs optional, phy required inband enabled + * 0 0 1 1 phy optional inband enabled + * 1 0 1 1 pcs doesn't support, phy optional, outband + * 0 1 1 1 pcs required, phy optional inband enabled + * 1 1 1 1 pcs+phy optional inband enabled + */ + if ((!pcs_ib_caps || pcs_ib_caps & inband_ok) && + (!phy_ib_caps || phy_ib_caps & inband_ok)) { + /* In-band supported or unknown at both ends. Enable + * in-band mode with or without bypass at the PHY. + */ + if (phy_ib_caps & LINK_INBAND_ENABLE) + pl->phy_ib_mode = LINK_INBAND_ENABLE; + else if (phy_ib_caps & LINK_INBAND_BYPASS) + pl->phy_ib_mode = LINK_INBAND_BYPASS; + + neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED; + } else if ((!pcs_ib_caps || pcs_ib_caps & outband_ok) && + (!phy_ib_caps || phy_ib_caps & outband_ok)) { + /* Either in-band not supported at at least one end. + * In-band bypass at the other end is possible. + */ + if (phy_ib_caps & LINK_INBAND_DISABLE) + pl->phy_ib_mode = LINK_INBAND_DISABLE; + else if (phy_ib_caps & LINK_INBAND_BYPASS) + pl->phy_ib_mode = LINK_INBAND_BYPASS; + neg_mode = PHYLINK_PCS_NEG_OUTBAND; + if (pl->phydev) + mode = MLO_AN_PHY; + } else { + /* invalid */ + phylink_warn(pl, "%s: incompatible in-band capabilities, trying in-band", + phy_modes(interface)); + neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED; + } + } else { + /* For Base-X without a PHY */ + if (pcs_ib_caps == LINK_INBAND_DISABLE) + /* If the PCS doesn't support inband, then inband must + * be disabled. + */ + neg_mode = PHYLINK_PCS_NEG_INBAND_DISABLED; + else if (pcs_ib_caps == LINK_INBAND_ENABLE) + /* If the PCS requires inband, then inband must always + * be enabled. + */ + neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED; else if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, advertising)) neg_mode = PHYLINK_PCS_NEG_INBAND_ENABLED; else neg_mode = PHYLINK_PCS_NEG_INBAND_DISABLED; - break; - - default: - neg_mode = PHYLINK_PCS_NEG_NONE; - break; } pl->pcs_neg_mode = neg_mode; @@ -1292,6 +1419,13 @@ static void phylink_major_config(struct phylink *pl, bool restart, ERR_PTR(err)); } + if (pl->phydev && pl->phy_ib_mode) { + err = phy_config_inband(pl->phydev, pl->phy_ib_mode); + if (err < 0) + phylink_err(pl, "phy_config_inband: %pe\n", + ERR_PTR(err)); + } + if (pl->sfp_bus) { rate_kbd = phylink_interface_signal_rate(state->interface); if (rate_kbd) From 77ac9a8b2536e0eaca6c6f21070068458bf55981 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:31:48 +0000 Subject: [PATCH 0111/1386] net: phylink: remove phylink_phy_no_inband() Remove phylink_phy_no_inband() now that we are handling the lack of inband negotiation by querying the capabilities of the PHY and PCS, and the BCM84881 PHY driver provides us the information necessary to make the decision. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tIUsO-006IUt-KN@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index b0881fa9c72e..95fbc363f9a6 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -3391,10 +3391,11 @@ static phy_interface_t phylink_choose_sfp_interface(struct phylink *pl, return interface; } -static void phylink_sfp_set_config(struct phylink *pl, u8 mode, +static void phylink_sfp_set_config(struct phylink *pl, unsigned long *supported, struct phylink_link_state *state) { + u8 mode = MLO_AN_INBAND; bool changed = false; phylink_dbg(pl, "requesting link mode %s/%s with support %*pb\n", @@ -3428,8 +3429,7 @@ static void phylink_sfp_set_config(struct phylink *pl, u8 mode, phylink_mac_initial_config(pl, false); } -static int phylink_sfp_config_phy(struct phylink *pl, u8 mode, - struct phy_device *phy) +static int phylink_sfp_config_phy(struct phylink *pl, struct phy_device *phy) { __ETHTOOL_DECLARE_LINK_MODE_MASK(support); struct phylink_link_state config; @@ -3473,7 +3473,7 @@ static int phylink_sfp_config_phy(struct phylink *pl, u8 mode, pl->link_port = pl->sfp_port; - phylink_sfp_set_config(pl, mode, support, &config); + phylink_sfp_set_config(pl, support, &config); return 0; } @@ -3548,7 +3548,7 @@ static int phylink_sfp_config_optical(struct phylink *pl) pl->link_port = pl->sfp_port; - phylink_sfp_set_config(pl, MLO_AN_INBAND, pl->sfp_support, &config); + phylink_sfp_set_config(pl, pl->sfp_support, &config); return 0; } @@ -3619,19 +3619,9 @@ static void phylink_sfp_link_up(void *upstream) phylink_enable_and_run_resolve(pl, PHYLINK_DISABLE_LINK); } -/* The Broadcom BCM84881 in the Methode DM7052 is unable to provide a SGMII - * or 802.3z control word, so inband will not work. - */ -static bool phylink_phy_no_inband(struct phy_device *phy) -{ - return phy->is_c45 && phy_id_compare(phy->c45_ids.device_ids[1], - 0xae025150, 0xfffffff0); -} - static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy) { struct phylink *pl = upstream; - u8 mode; /* * This is the new way of dealing with flow control for PHYs, @@ -3642,17 +3632,12 @@ static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy) */ phy_support_asym_pause(phy); - if (phylink_phy_no_inband(phy)) - mode = MLO_AN_PHY; - else - mode = MLO_AN_INBAND; - /* Set the PHY's host supported interfaces */ phy_interface_and(phy->host_interfaces, phylink_sfp_interfaces, pl->config->supported_interfaces); /* Do the initial configuration */ - return phylink_sfp_config_phy(pl, mode, phy); + return phylink_sfp_config_phy(pl, phy); } static void phylink_sfp_disconnect_phy(void *upstream, From 5204ccbfa22358f95afd031a3f337e6d9a74baea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 3 Dec 2024 17:36:17 +0000 Subject: [PATCH 0112/1386] inet: add indirect call wrapper for getfrag() calls UDP send path suffers from one indirect call to ip_generic_getfrag() We can use INDIRECT_CALL_1() to avoid it. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: Brian Vazquez Link: https://patch.msgid.link/20241203173617.2595451-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/ip_output.c | 13 +++++++++---- net/ipv6/ip6_output.c | 13 ++++++++----- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0065b1996c94..a59204a8d850 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1169,7 +1169,10 @@ alloc_new_skb: /* [!] NOTE: copy will be negative if pagedlen>0 * because then the equation reduces to -fraggap. */ - if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { + if (copy > 0 && + INDIRECT_CALL_1(getfrag, ip_generic_getfrag, + from, data + transhdrlen, offset, + copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; @@ -1213,8 +1216,9 @@ alloc_new_skb: unsigned int off; off = skb->len; - if (getfrag(from, skb_put(skb, copy), - offset, copy, off, skb) < 0) { + if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag, + from, skb_put(skb, copy), + offset, copy, off, skb) < 0) { __skb_trim(skb, off); err = -EFAULT; goto error; @@ -1252,7 +1256,8 @@ alloc_new_skb: get_page(pfrag->page); } copy = min_t(int, copy, pfrag->size - pfrag->offset); - if (getfrag(from, + if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag, + from, page_address(pfrag->page) + pfrag->offset, offset, copy, skb->len, skb) < 0) goto error_efault; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index f7b4608bb316..3d672dea9f56 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1697,8 +1697,9 @@ alloc_new_skb: pskb_trim_unique(skb_prev, maxfraglen); } if (copy > 0 && - getfrag(from, data + transhdrlen, offset, - copy, fraggap, skb) < 0) { + INDIRECT_CALL_1(getfrag, ip_generic_getfrag, + from, data + transhdrlen, offset, + copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; @@ -1742,8 +1743,9 @@ alloc_new_skb: unsigned int off; off = skb->len; - if (getfrag(from, skb_put(skb, copy), - offset, copy, off, skb) < 0) { + if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag, + from, skb_put(skb, copy), + offset, copy, off, skb) < 0) { __skb_trim(skb, off); err = -EFAULT; goto error; @@ -1781,7 +1783,8 @@ alloc_new_skb: get_page(pfrag->page); } copy = min_t(int, copy, pfrag->size - pfrag->offset); - if (getfrag(from, + if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag, + from, page_address(pfrag->page) + pfrag->offset, offset, copy, skb->len, skb) < 0) goto error_efault; From ac98b3132402e1b892c16f87d766f21ef18dd344 Mon Sep 17 00:00:00 2001 From: Kenjiro Nakayama Date: Wed, 4 Dec 2024 07:28:44 +0900 Subject: [PATCH 0113/1386] selftests/net: call sendmmsg via udpgso_bench.sh Currently, sendmmsg is implemented in udpgso_bench_tx.c, but it is not called by any test script. This patch adds a test for sendmmsg in udpgso_bench.sh. This allows for basic API testing and benchmarking comparisons with GSO. Signed-off-by: Kenjiro Nakayama Reviewed-by: Hangbin Liu Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20241203222843.26983-1-nakayamakenjiro@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/udpgso_bench.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 640bc43452fa..88fa1d53ba2b 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -92,6 +92,9 @@ run_udp() { echo "udp" run_in_netns ${args} + echo "udp sendmmsg" + run_in_netns ${args} -m + echo "udp gso" run_in_netns ${args} -S 0 From 152d00a913969514967ad3f962b3b1c8983eb2d7 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Tue, 3 Dec 2024 22:33:22 +0100 Subject: [PATCH 0114/1386] r8169: simplify setting hwmon attribute visibility Use new member visible to simplify setting the static visibility. Signed-off-by: Heiner Kallweit Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/dba77e76-be45-4a30-96c7-45e284072ad2@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169_main.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index cc14cd540f74..6934bdee2a91 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5332,13 +5332,6 @@ static bool rtl_aspm_is_safe(struct rtl8169_private *tp) return false; } -static umode_t r8169_hwmon_is_visible(const void *drvdata, - enum hwmon_sensor_types type, - u32 attr, int channel) -{ - return 0444; -} - static int r8169_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { @@ -5355,7 +5348,7 @@ static int r8169_hwmon_read(struct device *dev, enum hwmon_sensor_types type, } static const struct hwmon_ops r8169_hwmon_ops = { - .is_visible = r8169_hwmon_is_visible, + .visible = 0444, .read = r8169_hwmon_read, }; From 05b6555feeddc077f1eaa14c3e2c409b7ddf917b Mon Sep 17 00:00:00 2001 From: Po-Hao Huang Date: Thu, 28 Nov 2024 13:54:28 +0800 Subject: [PATCH 0115/1386] wifi: rtw89: 8922a: Extend channel info field length for scan Extend the bitfield for duration in channel info to 16 bits. Update the related format in H2C and C2H, then increase firmware format sequence to 3. Signed-off-by: Po-Hao Huang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241128055433.11851-2-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 1 + drivers/net/wireless/realtek/rtw89/fw.c | 43 +++++++++++++------ drivers/net/wireless/realtek/rtw89/fw.h | 22 +++++++++- drivers/net/wireless/realtek/rtw89/mac.c | 24 ++++++++--- drivers/net/wireless/realtek/rtw89/rtw8922a.c | 2 +- 5 files changed, 71 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 409cbdc6b92a..8c7e8b1c954f 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -4456,6 +4456,7 @@ enum rtw89_fw_feature { RTW89_FW_FEATURE_RFK_RXDCK_V0, RTW89_FW_FEATURE_NO_WOW_CPU_IO_RX, RTW89_FW_FEATURE_NOTIFY_AP_INFO, + RTW89_FW_FEATURE_CH_INFO_BE_V0, }; struct rtw89_fw_suit { diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index cbd759c844e5..a0408fcd6799 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -729,6 +729,7 @@ static const struct __fw_feat_cfg fw_feat_tbl[] = { __CFG_FW_FEAT(RTL8922A, lt, 0, 35, 31, 0, RFK_PRE_NOTIFY_V0), __CFG_FW_FEAT(RTL8922A, lt, 0, 35, 42, 0, RFK_RXDCK_V0), __CFG_FW_FEAT(RTL8922A, ge, 0, 35, 46, 0, NOTIFY_AP_INFO), + __CFG_FW_FEAT(RTL8922A, lt, 0, 35, 47, 0, CH_INFO_BE_V0), }; static void rtw89_fw_iterate_feature_cfg(struct rtw89_fw_info *fw, @@ -4956,13 +4957,14 @@ int rtw89_fw_h2c_scan_list_offload_be(struct rtw89_dev *rtwdev, int ch_num, struct rtw89_wait_info *wait = &rtwdev->mac.fw_ofld_wait; struct rtw89_h2c_chinfo_elem_be *elem; struct rtw89_mac_chinfo_be *ch_info; - struct rtw89_h2c_chinfo *h2c; + struct rtw89_h2c_chinfo_be *h2c; struct sk_buff *skb; unsigned int cond; + u8 ver = U8_MAX; int skb_len; int ret; - static_assert(sizeof(*elem) == RTW89_MAC_CHINFO_SIZE); + static_assert(sizeof(*elem) == RTW89_MAC_CHINFO_SIZE_BE); skb_len = struct_size(h2c, elem, ch_num); skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, skb_len); @@ -4971,8 +4973,11 @@ int rtw89_fw_h2c_scan_list_offload_be(struct rtw89_dev *rtwdev, int ch_num, return -ENOMEM; } + if (RTW89_CHK_FW_FEATURE(CH_INFO_BE_V0, &rtwdev->fw)) + ver = 0; + skb_put(skb, sizeof(*h2c)); - h2c = (struct rtw89_h2c_chinfo *)skb->data; + h2c = (struct rtw89_h2c_chinfo_be *)skb->data; h2c->ch_num = ch_num; h2c->elem_size = sizeof(*elem) / 4; /* in unit of 4 bytes */ @@ -4982,8 +4987,7 @@ int rtw89_fw_h2c_scan_list_offload_be(struct rtw89_dev *rtwdev, int ch_num, list_for_each_entry(ch_info, chan_list, list) { elem = (struct rtw89_h2c_chinfo_elem_be *)skb_put(skb, sizeof(*elem)); - elem->w0 = le32_encode_bits(ch_info->period, RTW89_H2C_CHINFO_BE_W0_PERIOD) | - le32_encode_bits(ch_info->dwell_time, RTW89_H2C_CHINFO_BE_W0_DWELL) | + elem->w0 = le32_encode_bits(ch_info->dwell_time, RTW89_H2C_CHINFO_BE_W0_DWELL) | le32_encode_bits(ch_info->central_ch, RTW89_H2C_CHINFO_BE_W0_CENTER_CH) | le32_encode_bits(ch_info->pri_ch, RTW89_H2C_CHINFO_BE_W0_PRI_CH); @@ -5030,6 +5034,12 @@ int rtw89_fw_h2c_scan_list_offload_be(struct rtw89_dev *rtwdev, int ch_num, RTW89_H2C_CHINFO_BE_W6_FW_PROBE0_SHORTSSIDS) | le32_encode_bits(ch_info->fw_probe0_bssids, RTW89_H2C_CHINFO_BE_W6_FW_PROBE0_BSSIDS); + if (ver == 0) + elem->w0 |= + le32_encode_bits(ch_info->period, RTW89_H2C_CHINFO_BE_W0_PERIOD); + else + elem->w7 = le32_encode_bits(ch_info->period, + RTW89_H2C_CHINFO_BE_W7_PERIOD_V1); } rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C, @@ -5173,6 +5183,7 @@ int rtw89_fw_h2c_scan_offload_be(struct rtw89_dev *rtwdev, u8 probe_id[NUM_NL80211_BANDS]; u8 cfg_len = sizeof(*h2c); unsigned int cond; + u8 ver = U8_MAX; void *ptr; int ret; u32 len; @@ -5193,6 +5204,9 @@ int rtw89_fw_h2c_scan_offload_be(struct rtw89_dev *rtwdev, memset(probe_id, RTW89_SCANOFLD_PKT_NONE, sizeof(probe_id)); + if (RTW89_CHK_FW_FEATURE(CH_INFO_BE_V0, &rtwdev->fw)) + ver = 0; + if (!wowlan) { list_for_each_entry(pkt_info, &scan_info->pkt_list[NL80211_BAND_6GHZ], list) { if (pkt_info->wildcard_6ghz) { @@ -5288,9 +5302,7 @@ flex_member: le32_encode_bits(RTW89_OFF_CHAN_TIME / 10, RTW89_H2C_SCANOFLD_BE_OPCH_W0_POLICY_VAL); - opch->w1 = le32_encode_bits(RTW89_CHANNEL_TIME, - RTW89_H2C_SCANOFLD_BE_OPCH_W1_DURATION) | - le32_encode_bits(op->band_type, + opch->w1 = le32_encode_bits(op->band_type, RTW89_H2C_SCANOFLD_BE_OPCH_W1_CH_BAND) | le32_encode_bits(op->band_width, RTW89_H2C_SCANOFLD_BE_OPCH_W1_BW) | @@ -5316,6 +5328,13 @@ flex_member: RTW89_H2C_SCANOFLD_BE_OPCH_W3_PKT2) | le32_encode_bits(RTW89_SCANOFLD_PKT_NONE, RTW89_H2C_SCANOFLD_BE_OPCH_W3_PKT3); + + if (ver == 0) + opch->w1 |= le32_encode_bits(RTW89_CHANNEL_TIME, + RTW89_H2C_SCANOFLD_BE_OPCH_W1_DURATION); + else + opch->w4 = le32_encode_bits(RTW89_CHANNEL_TIME, + RTW89_H2C_SCANOFLD_BE_OPCH_W4_DURATION_V1); ptr += sizeof(*opch); } @@ -6498,7 +6517,7 @@ int rtw89_pno_scan_add_chan_list_ax(struct rtw89_dev *rtwdev, INIT_LIST_HEAD(&chan_list); for (idx = 0, list_len = 0; - idx < nd_config->n_channels && list_len < RTW89_SCAN_LIST_LIMIT; + idx < nd_config->n_channels && list_len < RTW89_SCAN_LIST_LIMIT_AX; idx++, list_len++) { channel = nd_config->channels[idx]; ch_info = kzalloc(sizeof(*ch_info), GFP_KERNEL); @@ -6549,7 +6568,7 @@ int rtw89_hw_scan_add_chan_list_ax(struct rtw89_dev *rtwdev, INIT_LIST_HEAD(&chan_list); for (idx = rtwdev->scan_info.last_chan_idx, list_len = 0; - idx < req->n_channels && list_len < RTW89_SCAN_LIST_LIMIT; + idx < req->n_channels && list_len < RTW89_SCAN_LIST_LIMIT_AX; idx++, list_len++) { channel = req->channels[idx]; ch_info = kzalloc(sizeof(*ch_info), GFP_KERNEL); @@ -6626,7 +6645,7 @@ int rtw89_pno_scan_add_chan_list_be(struct rtw89_dev *rtwdev, INIT_LIST_HEAD(&chan_list); for (idx = 0, list_len = 0; - idx < nd_config->n_channels && list_len < RTW89_SCAN_LIST_LIMIT; + idx < nd_config->n_channels && list_len < RTW89_SCAN_LIST_LIMIT_BE; idx++, list_len++) { channel = nd_config->channels[idx]; ch_info = kzalloc(sizeof(*ch_info), GFP_KERNEL); @@ -6681,7 +6700,7 @@ int rtw89_hw_scan_add_chan_list_be(struct rtw89_dev *rtwdev, INIT_LIST_HEAD(&chan_list); for (idx = rtwdev->scan_info.last_chan_idx, list_len = 0; - idx < req->n_channels && list_len < RTW89_SCAN_LIST_LIMIT; + idx < req->n_channels && list_len < RTW89_SCAN_LIST_LIMIT_BE; idx++, list_len++) { channel = req->channels[idx]; ch_info = kzalloc(sizeof(*ch_info), GFP_KERNEL); diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index 9106bcce1851..95681c390bb8 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -310,9 +310,12 @@ struct rtw89_fw_macid_pause_sleep_grp { #define RTW89_SCANOFLD_DEBUG_MASK 0x1F #define RTW89_CHAN_INVALID 0xFF #define RTW89_MAC_CHINFO_SIZE 28 +#define RTW89_MAC_CHINFO_SIZE_BE 32 #define RTW89_SCAN_LIST_GUARD 4 -#define RTW89_SCAN_LIST_LIMIT \ - ((RTW89_H2C_MAX_SIZE / RTW89_MAC_CHINFO_SIZE) - RTW89_SCAN_LIST_GUARD) +#define RTW89_SCAN_LIST_LIMIT(size) \ + ((RTW89_H2C_MAX_SIZE / (size)) - RTW89_SCAN_LIST_GUARD) +#define RTW89_SCAN_LIST_LIMIT_AX RTW89_SCAN_LIST_LIMIT(RTW89_MAC_CHINFO_SIZE) +#define RTW89_SCAN_LIST_LIMIT_BE RTW89_SCAN_LIST_LIMIT(RTW89_MAC_CHINFO_SIZE_BE) #define RTW89_BCN_LOSS_CNT 10 @@ -2647,6 +2650,7 @@ struct rtw89_h2c_chinfo_elem_be { __le32 w4; __le32 w5; __le32 w6; + __le32 w7; } __packed; #define RTW89_H2C_CHINFO_BE_W0_PERIOD GENMASK(7, 0) @@ -2678,6 +2682,7 @@ struct rtw89_h2c_chinfo_elem_be { #define RTW89_H2C_CHINFO_BE_W5_FW_PROBE0_SSIDS GENMASK(31, 16) #define RTW89_H2C_CHINFO_BE_W6_FW_PROBE0_SHORTSSIDS GENMASK(15, 0) #define RTW89_H2C_CHINFO_BE_W6_FW_PROBE0_BSSIDS GENMASK(31, 16) +#define RTW89_H2C_CHINFO_BE_W7_PERIOD_V1 GENMASK(15, 0) struct rtw89_h2c_chinfo { u8 ch_num; @@ -2687,6 +2692,14 @@ struct rtw89_h2c_chinfo { struct rtw89_h2c_chinfo_elem elem[] __counted_by(ch_num); } __packed; +struct rtw89_h2c_chinfo_be { + u8 ch_num; + u8 elem_size; + u8 arg; + u8 rsvd0; + struct rtw89_h2c_chinfo_elem_be elem[] __counted_by(ch_num); +} __packed; + #define RTW89_H2C_CHINFO_ARG_MAC_IDX_MASK BIT(0) #define RTW89_H2C_CHINFO_ARG_APPEND_MASK BIT(1) @@ -2733,6 +2746,7 @@ struct rtw89_h2c_scanofld_be_opch { __le32 w1; __le32 w2; __le32 w3; + __le32 w4; } __packed; #define RTW89_H2C_SCANOFLD_BE_OPCH_W0_MACID GENMASK(15, 0) @@ -2754,6 +2768,7 @@ struct rtw89_h2c_scanofld_be_opch { #define RTW89_H2C_SCANOFLD_BE_OPCH_W3_PKT1 GENMASK(15, 8) #define RTW89_H2C_SCANOFLD_BE_OPCH_W3_PKT2 GENMASK(23, 16) #define RTW89_H2C_SCANOFLD_BE_OPCH_W3_PKT3 GENMASK(31, 24) +#define RTW89_H2C_SCANOFLD_BE_OPCH_W4_DURATION_V1 GENMASK(15, 0) struct rtw89_h2c_scanofld_be { __le32 w0; @@ -3596,6 +3611,7 @@ struct rtw89_c2h_scanofld { __le32 w5; __le32 w6; __le32 w7; + __le32 w8; } __packed; #define RTW89_C2H_SCANOFLD_W2_PRI_CH GENMASK(7, 0) @@ -3610,6 +3626,8 @@ struct rtw89_c2h_scanofld { #define RTW89_C2H_SCANOFLD_W6_EXPECT_PERIOD GENMASK(15, 8) #define RTW89_C2H_SCANOFLD_W6_FW_DEF GENMASK(23, 16) #define RTW89_C2H_SCANOFLD_W7_REPORT_TSF GENMASK(31, 0) +#define RTW89_C2H_SCANOFLD_W8_PERIOD_V1 GENMASK(15, 0) +#define RTW89_C2H_SCANOFLD_W8_EXPECT_PERIOD_V1 GENMASK(31, 16) #define RTW89_GET_MAC_C2H_MCC_RCV_ACK_GROUP(c2h) \ le32_get_bits(*((const __le32 *)(c2h) + 2), GENMASK(1, 0)) diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c index 03fc21440271..95f2beb89fe6 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.c +++ b/drivers/net/wireless/realtek/rtw89/mac.c @@ -4788,9 +4788,11 @@ rtw89_mac_c2h_scanofld_rsp(struct rtw89_dev *rtwdev, struct sk_buff *skb, struct rtw89_vif_link *rtwvif_link = rtwdev->scan_info.scanning_vif; struct rtw89_vif *rtwvif; struct rtw89_chan new; - u8 reason, status, tx_fail, band, actual_period, expect_period; u32 last_chan = rtwdev->scan_info.last_chan_idx, report_tsf; + u16 actual_period, expect_period; + u8 reason, status, tx_fail, band; u8 mac_idx, sw_def, fw_def; + u8 ver = U8_MAX; u16 chan; int ret; @@ -4799,6 +4801,9 @@ rtw89_mac_c2h_scanofld_rsp(struct rtw89_dev *rtwdev, struct sk_buff *skb, rtwvif = rtwvif_link->rtwvif; + if (RTW89_CHK_FW_FEATURE(CH_INFO_BE_V0, &rtwdev->fw)) + ver = 0; + tx_fail = le32_get_bits(c2h->w5, RTW89_C2H_SCANOFLD_W5_TX_FAIL); status = le32_get_bits(c2h->w2, RTW89_C2H_SCANOFLD_W2_STATUS); chan = le32_get_bits(c2h->w2, RTW89_C2H_SCANOFLD_W2_PRI_CH); @@ -4811,21 +4816,28 @@ rtw89_mac_c2h_scanofld_rsp(struct rtw89_dev *rtwdev, struct sk_buff *skb, if (!(rtwdev->chip->support_bands & BIT(NL80211_BAND_6GHZ))) band = chan > 14 ? RTW89_BAND_5G : RTW89_BAND_2G; - rtw89_debug(rtwdev, RTW89_DBG_HW_SCAN, - "mac_idx[%d] band: %d, chan: %d, reason: %d, status: %d, tx_fail: %d, actual: %d\n", - mac_idx, band, chan, reason, status, tx_fail, actual_period); - if (rtwdev->chip->chip_gen == RTW89_CHIP_BE) { sw_def = le32_get_bits(c2h->w6, RTW89_C2H_SCANOFLD_W6_SW_DEF); - expect_period = le32_get_bits(c2h->w6, RTW89_C2H_SCANOFLD_W6_EXPECT_PERIOD); fw_def = le32_get_bits(c2h->w6, RTW89_C2H_SCANOFLD_W6_FW_DEF); report_tsf = le32_get_bits(c2h->w7, RTW89_C2H_SCANOFLD_W7_REPORT_TSF); + if (ver == 0) { + expect_period = + le32_get_bits(c2h->w6, RTW89_C2H_SCANOFLD_W6_EXPECT_PERIOD); + } else { + actual_period = le32_get_bits(c2h->w8, RTW89_C2H_SCANOFLD_W8_PERIOD_V1); + expect_period = + le32_get_bits(c2h->w8, RTW89_C2H_SCANOFLD_W8_EXPECT_PERIOD_V1); + } rtw89_debug(rtwdev, RTW89_DBG_HW_SCAN, "sw_def: %d, fw_def: %d, tsf: %x, expect: %d\n", sw_def, fw_def, report_tsf, expect_period); } + rtw89_debug(rtwdev, RTW89_DBG_HW_SCAN, + "mac_idx[%d] band: %d, chan: %d, reason: %d, status: %d, tx_fail: %d, actual: %d\n", + mac_idx, band, chan, reason, status, tx_fail, actual_period); + switch (reason) { case RTW89_SCAN_LEAVE_OP_NOTIFY: case RTW89_SCAN_LEAVE_CH_NOTIFY: diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a.c b/drivers/net/wireless/realtek/rtw89/rtw8922a.c index a5333099668a..a96b58ce6592 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922a.c @@ -14,7 +14,7 @@ #include "rtw8922a_rfk.h" #include "util.h" -#define RTW8922A_FW_FORMAT_MAX 2 +#define RTW8922A_FW_FORMAT_MAX 3 #define RTW8922A_FW_BASENAME "rtw89/rtw8922a_fw" #define RTW8922A_MODULE_FIRMWARE \ RTW8922A_FW_BASENAME "-" __stringify(RTW8922A_FW_FORMAT_MAX) ".bin" From d56c261e5214e51e2c6d22149f63555039b5601e Mon Sep 17 00:00:00 2001 From: Po-Hao Huang Date: Thu, 28 Nov 2024 13:54:29 +0800 Subject: [PATCH 0116/1386] wifi: rtw89: 8852b: add beacon filter and CQM support Declare beacon filter and connection monitor for 8852B. This offloads connection monitor mechanism to firmware, and this is required for the MCC feature. Signed-off-by: Po-Hao Huang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241128055433.11851-3-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index a0408fcd6799..3fba37b8013a 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -709,6 +709,7 @@ static const struct __fw_feat_cfg fw_feat_tbl[] = { __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 26, 0, TX_WAKE), __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 29, 0, CRASH_TRIGGER), __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 29, 0, SCAN_OFFLOAD), + __CFG_FW_FEAT(RTL8852B, ge, 0, 29, 29, 7, BEACON_FILTER), __CFG_FW_FEAT(RTL8852B, lt, 0, 29, 30, 0, NO_WOW_CPU_IO_RX), __CFG_FW_FEAT(RTL8852BT, ge, 0, 29, 74, 0, NO_LPS_PG), __CFG_FW_FEAT(RTL8852BT, ge, 0, 29, 74, 0, TX_WAKE), From 3374c63111b0811134b773e9d4c028bd643bb9c9 Mon Sep 17 00:00:00 2001 From: Po-Hao Huang Date: Thu, 28 Nov 2024 13:54:30 +0800 Subject: [PATCH 0117/1386] wifi: rtw89: 8852bt: add beacon filter and CQM support Declare beacon filter and connection monitor for 8852BT. This offloads connection monitor mechanism to firmware, and this is required for the MCC feature. Signed-off-by: Po-Hao Huang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241128055433.11851-4-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/fw.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index 3fba37b8013a..c604ea1d39f1 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -715,6 +715,7 @@ static const struct __fw_feat_cfg fw_feat_tbl[] = { __CFG_FW_FEAT(RTL8852BT, ge, 0, 29, 74, 0, TX_WAKE), __CFG_FW_FEAT(RTL8852BT, ge, 0, 29, 90, 0, CRASH_TRIGGER), __CFG_FW_FEAT(RTL8852BT, ge, 0, 29, 91, 0, SCAN_OFFLOAD), + __CFG_FW_FEAT(RTL8852BT, ge, 0, 29, 110, 0, BEACON_FILTER), __CFG_FW_FEAT(RTL8852C, le, 0, 27, 33, 0, NO_DEEP_PS), __CFG_FW_FEAT(RTL8852C, ge, 0, 27, 34, 0, TX_WAKE), __CFG_FW_FEAT(RTL8852C, ge, 0, 27, 36, 0, SCAN_OFFLOAD), From b6853ed2be27ac7f9511867faff3d6dd1139b915 Mon Sep 17 00:00:00 2001 From: Chih-Kang Chang Date: Thu, 28 Nov 2024 13:54:31 +0800 Subject: [PATCH 0118/1386] wifi: rtw89: 8922a: use RSSI from PHY report in RX descriptor The PPDU status of probe response will fail to parse the IE due to being filtered by the to_self check. Therefore, we parse RSSI from PHY report in RX descriptor. Signed-off-by: Chih-Kang Chang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241128055433.11851-5-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.c | 17 +++++++++ drivers/net/wireless/realtek/rtw89/core.h | 19 ++++++++++ drivers/net/wireless/realtek/rtw89/mac.c | 1 + drivers/net/wireless/realtek/rtw89/mac.h | 36 +++++++++++++++++++ drivers/net/wireless/realtek/rtw89/mac_be.c | 15 ++++++++ drivers/net/wireless/realtek/rtw89/reg.h | 4 +++ drivers/net/wireless/realtek/rtw89/rtw8851b.c | 4 ++- drivers/net/wireless/realtek/rtw89/rtw8852a.c | 5 ++- drivers/net/wireless/realtek/rtw89/rtw8852b.c | 1 + .../wireless/realtek/rtw89/rtw8852b_common.c | 4 ++- .../net/wireless/realtek/rtw89/rtw8852bt.c | 1 + drivers/net/wireless/realtek/rtw89/rtw8852c.c | 6 +++- drivers/net/wireless/realtek/rtw89/rtw8922a.c | 17 +++++++-- drivers/net/wireless/realtek/rtw89/txrx.h | 3 ++ 14 files changed, 127 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index 6f9b4f0b2748..29d0ac502bab 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -2366,6 +2366,12 @@ static void rtw89_core_update_radiotap(struct rtw89_dev *rtwdev, } } +static void rtw89_core_validate_rx_signal(struct ieee80211_rx_status *rx_status) +{ + if (!rx_status->signal) + rx_status->flag |= RX_FLAG_NO_SIGNAL_VAL; +} + static void rtw89_core_rx_to_mac80211(struct rtw89_dev *rtwdev, struct rtw89_rx_phy_ppdu *phy_ppdu, struct rtw89_rx_desc_info *desc_info, @@ -2382,6 +2388,8 @@ static void rtw89_core_rx_to_mac80211(struct rtw89_dev *rtwdev, rtw89_core_rx_stats(rtwdev, phy_ppdu, desc_info, skb_ppdu); rtw89_core_update_rx_status_by_ppdu(rtwdev, rx_status, phy_ppdu); rtw89_core_update_radiotap(rtwdev, skb_ppdu, rx_status); + rtw89_core_validate_rx_signal(rx_status); + /* In low power mode, it does RX in thread context. */ local_bh_disable(); ieee80211_rx_napi(rtwdev->hw, NULL, skb_ppdu, napi); @@ -2517,6 +2525,7 @@ void rtw89_core_query_rxdesc_v2(struct rtw89_dev *rtwdev, struct rtw89_rx_desc_info *desc_info, u8 *data, u32 data_offset) { + struct rtw89_rxdesc_phy_rpt_v2 *rxd_rpt; struct rtw89_rxdesc_short_v2 *rxd_s; struct rtw89_rxdesc_long_v2 *rxd_l; u16 shift_len, drv_info_len, phy_rtp_len, hdr_cnv_len; @@ -2564,6 +2573,12 @@ void rtw89_core_query_rxdesc_v2(struct rtw89_dev *rtwdev, desc_info->rxd_len = sizeof(struct rtw89_rxdesc_short_v2); desc_info->ready = true; + if (phy_rtp_len == sizeof(*rxd_rpt)) { + rxd_rpt = (struct rtw89_rxdesc_phy_rpt_v2 *)(data + data_offset + + desc_info->rxd_len); + desc_info->rssi = le32_get_bits(rxd_rpt->dword0, BE_RXD_PHY_RSSI); + } + if (!desc_info->long_rxdesc) return; @@ -2706,6 +2721,7 @@ static void rtw89_core_update_rx_status(struct rtw89_dev *rtwdev, rx_status->flag |= RX_FLAG_MACTIME_START; rx_status->mactime = desc_info->free_run_cnt; + rtw89_chip_phy_rpt_to_rssi(rtwdev, desc_info, rx_status); rtw89_core_stats_sta_rx_status(rtwdev, desc_info, rx_status); } @@ -4522,6 +4538,7 @@ int rtw89_core_start(struct rtw89_dev *rtwdev) rtw89_phy_dm_init(rtwdev); rtw89_mac_cfg_ppdu_status_bands(rtwdev, true); + rtw89_mac_cfg_phy_rpt_bands(rtwdev, true); rtw89_mac_update_rts_threshold(rtwdev); rtw89_tas_reset(rtwdev); diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 8c7e8b1c954f..82844e470d1b 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -1084,6 +1084,7 @@ struct rtw89_rx_desc_info { u16 offset; u16 rxd_len; bool ready; + u16 rssi; }; struct rtw89_rxdesc_short { @@ -1126,6 +1127,11 @@ struct rtw89_rxdesc_long_v2 { __le32 dword9; } __packed; +struct rtw89_rxdesc_phy_rpt_v2 { + __le32 dword0; + __le32 dword1; +} __packed; + struct rtw89_tx_desc_info { u16 pkt_size; u8 wp_offset; @@ -3624,6 +3630,9 @@ struct rtw89_chip_ops { struct ieee80211_rx_status *status); void (*convert_rpl_to_rssi)(struct rtw89_dev *rtwdev, struct rtw89_rx_phy_ppdu *phy_ppdu); + void (*phy_rpt_to_rssi)(struct rtw89_dev *rtwdev, + struct rtw89_rx_desc_info *desc_info, + struct ieee80211_rx_status *rx_status); void (*ctrl_nbtg_bt_tx)(struct rtw89_dev *rtwdev, bool en, enum rtw89_phy_idx phy_idx); void (*cfg_txrx_path)(struct rtw89_dev *rtwdev); @@ -6706,6 +6715,16 @@ static inline void rtw89_chip_convert_rpl_to_rssi(struct rtw89_dev *rtwdev, chip->ops->convert_rpl_to_rssi(rtwdev, phy_ppdu); } +static inline void rtw89_chip_phy_rpt_to_rssi(struct rtw89_dev *rtwdev, + struct rtw89_rx_desc_info *desc_info, + struct ieee80211_rx_status *rx_status) +{ + const struct rtw89_chip_info *chip = rtwdev->chip; + + if (chip->ops->phy_rpt_to_rssi) + chip->ops->phy_rpt_to_rssi(rtwdev, desc_info, rx_status); +} + static inline void rtw89_ctrl_nbtg_bt_tx(struct rtw89_dev *rtwdev, bool en, enum rtw89_phy_idx phy_idx) { diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c index 95f2beb89fe6..bb4f58118e05 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.c +++ b/drivers/net/wireless/realtek/rtw89/mac.c @@ -6736,6 +6736,7 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_ax = { .typ_fltr_opt = rtw89_mac_typ_fltr_opt_ax, .cfg_ppdu_status = rtw89_mac_cfg_ppdu_status_ax, + .cfg_phy_rpt = NULL, .dle_mix_cfg = dle_mix_cfg_ax, .chk_dle_rdy = chk_dle_rdy_ax, diff --git a/drivers/net/wireless/realtek/rtw89/mac.h b/drivers/net/wireless/realtek/rtw89/mac.h index 81507274a97e..8edea96d037f 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.h +++ b/drivers/net/wireless/realtek/rtw89/mac.h @@ -169,6 +169,20 @@ enum rtw89_mac_ax_l0_to_l1_event { MAC_AX_L0_TO_L1_EVENT_MAX = 15, }; +enum rtw89_mac_phy_rpt_size { + MAC_AX_PHY_RPT_SIZE_0 = 0, + MAC_AX_PHY_RPT_SIZE_8 = 1, + MAC_AX_PHY_RPT_SIZE_16 = 2, + MAC_AX_PHY_RPT_SIZE_24 = 3, +}; + +enum rtw89_mac_hdr_cnv_size { + MAC_AX_HDR_CNV_SIZE_0 = 0, + MAC_AX_HDR_CNV_SIZE_32 = 1, + MAC_AX_HDR_CNV_SIZE_64 = 2, + MAC_AX_HDR_CNV_SIZE_96 = 3, +}; + enum rtw89_mac_wow_fw_status { WOWLAN_NOT_READY = 0x00, WOWLAN_SLEEP_READY = 0x01, @@ -968,6 +982,7 @@ struct rtw89_mac_gen_def { enum rtw89_mac_fwd_target fwd_target, u8 mac_idx); int (*cfg_ppdu_status)(struct rtw89_dev *rtwdev, u8 mac_idx, bool enable); + void (*cfg_phy_rpt)(struct rtw89_dev *rtwdev, u8 mac_idx, bool enable); int (*dle_mix_cfg)(struct rtw89_dev *rtwdev, const struct rtw89_dle_mem *cfg); int (*chk_dle_rdy)(struct rtw89_dev *rtwdev, bool wde_or_ple); @@ -1223,6 +1238,27 @@ int rtw89_mac_stop_sch_tx_v2(struct rtw89_dev *rtwdev, u8 mac_idx, int rtw89_mac_resume_sch_tx(struct rtw89_dev *rtwdev, u8 mac_idx, u32 tx_en); int rtw89_mac_resume_sch_tx_v1(struct rtw89_dev *rtwdev, u8 mac_idx, u32 tx_en); int rtw89_mac_resume_sch_tx_v2(struct rtw89_dev *rtwdev, u8 mac_idx, u32 tx_en); +void rtw89_mac_cfg_phy_rpt_be(struct rtw89_dev *rtwdev, u8 mac_idx, bool enable); + +static inline +void rtw89_mac_cfg_phy_rpt(struct rtw89_dev *rtwdev, u8 mac_idx, bool enable) +{ + const struct rtw89_mac_gen_def *mac = rtwdev->chip->mac_def; + + if (mac->cfg_phy_rpt) + mac->cfg_phy_rpt(rtwdev, mac_idx, enable); +} + +static inline +void rtw89_mac_cfg_phy_rpt_bands(struct rtw89_dev *rtwdev, bool enable) +{ + rtw89_mac_cfg_phy_rpt(rtwdev, RTW89_MAC_0, enable); + + if (!rtwdev->dbcc_en) + return; + + rtw89_mac_cfg_phy_rpt(rtwdev, RTW89_MAC_1, enable); +} static inline int rtw89_mac_cfg_ppdu_status(struct rtw89_dev *rtwdev, u8 mac_idx, bool enable) diff --git a/drivers/net/wireless/realtek/rtw89/mac_be.c b/drivers/net/wireless/realtek/rtw89/mac_be.c index f7a396c8a3cd..2dbdeae904ad 100644 --- a/drivers/net/wireless/realtek/rtw89/mac_be.c +++ b/drivers/net/wireless/realtek/rtw89/mac_be.c @@ -1988,6 +1988,20 @@ int rtw89_mac_resume_sch_tx_v2(struct rtw89_dev *rtwdev, u8 mac_idx, u32 tx_en) } EXPORT_SYMBOL(rtw89_mac_resume_sch_tx_v2); +void rtw89_mac_cfg_phy_rpt_be(struct rtw89_dev *rtwdev, u8 mac_idx, bool enable) +{ + u32 reg, val; + + reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_RCR, mac_idx); + val = enable ? MAC_AX_PHY_RPT_SIZE_8 : MAC_AX_PHY_RPT_SIZE_0; + rtw89_write32_mask(rtwdev, reg, B_BE_PHY_RPT_SZ_MASK, val); + rtw89_write32_mask(rtwdev, reg, B_BE_HDR_CNV_SZ_MASK, MAC_AX_HDR_CNV_SIZE_0); + + reg = rtw89_mac_reg_by_idx(rtwdev, R_BE_DRV_INFO_OPTION, mac_idx); + rtw89_write32_mask(rtwdev, reg, B_BE_DRV_INFO_PHYRPT_EN, enable); +} +EXPORT_SYMBOL(rtw89_mac_cfg_phy_rpt_be); + static int rtw89_mac_cfg_ppdu_status_be(struct rtw89_dev *rtwdev, u8 mac_idx, bool enable) { @@ -2583,6 +2597,7 @@ const struct rtw89_mac_gen_def rtw89_mac_gen_be = { .typ_fltr_opt = rtw89_mac_typ_fltr_opt_be, .cfg_ppdu_status = rtw89_mac_cfg_ppdu_status_be, + .cfg_phy_rpt = rtw89_mac_cfg_phy_rpt_be, .dle_mix_cfg = dle_mix_cfg_be, .chk_dle_rdy = chk_dle_rdy_be, diff --git a/drivers/net/wireless/realtek/rtw89/reg.h b/drivers/net/wireless/realtek/rtw89/reg.h index 18ec7c0252fb..10d0efa7a58e 100644 --- a/drivers/net/wireless/realtek/rtw89/reg.h +++ b/drivers/net/wireless/realtek/rtw89/reg.h @@ -7447,6 +7447,10 @@ #define B_BE_CSIPRT_HESU_AID_EN BIT(25) #define B_BE_CSIPRT_VHTSU_AID_EN BIT(24) +#define R_BE_DRV_INFO_OPTION 0x11470 +#define R_BE_DRV_INFO_OPTION_C1 0x15470 +#define B_BE_DRV_INFO_PHYRPT_EN BIT(0) + #define R_BE_RX_ERR_ISR 0x114F4 #define R_BE_RX_ERR_ISR_C1 0x154F4 #define B_BE_RX_ERR_TRIG_ACT_TO BIT(9) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b.c b/drivers/net/wireless/realtek/rtw89/rtw8851b.c index 1ed4e64cbd2c..c56f70267882 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8851b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8851b.c @@ -2298,7 +2298,8 @@ static void rtw8851b_query_ppdu(struct rtw89_dev *rtwdev, u8 path; u8 *rx_power = phy_ppdu->rssi; - status->signal = RTW89_RSSI_RAW_TO_DBM(rx_power[RF_PATH_A]); + if (!status->signal) + status->signal = RTW89_RSSI_RAW_TO_DBM(rx_power[RF_PATH_A]); for (path = 0; path < rtwdev->chip->rf_path_num; path++) { status->chains |= BIT(path); @@ -2391,6 +2392,7 @@ static const struct rtw89_chip_ops rtw8851b_chip_ops = { .ctrl_btg_bt_rx = rtw8851b_ctrl_btg_bt_rx, .query_ppdu = rtw8851b_query_ppdu, .convert_rpl_to_rssi = NULL, + .phy_rpt_to_rssi = NULL, .ctrl_nbtg_bt_tx = rtw8851b_ctrl_nbtg_bt_tx, .cfg_txrx_path = rtw8851b_bb_cfg_txrx_path, .set_txpwr_ul_tb_offset = rtw8851b_set_txpwr_ul_tb_offset, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852a.c b/drivers/net/wireless/realtek/rtw89/rtw8852a.c index a7105a288bc4..9bd2842c27d5 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852a.c @@ -2068,7 +2068,9 @@ static void rtw8852a_query_ppdu(struct rtw89_dev *rtwdev, u8 path; u8 *rx_power = phy_ppdu->rssi; - status->signal = RTW89_RSSI_RAW_TO_DBM(max(rx_power[RF_PATH_A], rx_power[RF_PATH_B])); + if (!status->signal) + status->signal = RTW89_RSSI_RAW_TO_DBM(max(rx_power[RF_PATH_A], + rx_power[RF_PATH_B])); for (path = 0; path < rtwdev->chip->rf_path_num; path++) { status->chains |= BIT(path); status->chain_signal[path] = RTW89_RSSI_RAW_TO_DBM(rx_power[path]); @@ -2116,6 +2118,7 @@ static const struct rtw89_chip_ops rtw8852a_chip_ops = { .ctrl_btg_bt_rx = rtw8852a_ctrl_btg_bt_rx, .query_ppdu = rtw8852a_query_ppdu, .convert_rpl_to_rssi = NULL, + .phy_rpt_to_rssi = NULL, .ctrl_nbtg_bt_tx = rtw8852a_ctrl_nbtg_bt_tx, .cfg_txrx_path = NULL, .set_txpwr_ul_tb_offset = rtw8852a_set_txpwr_ul_tb_offset, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b.c b/drivers/net/wireless/realtek/rtw89/rtw8852b.c index ebc853a905dd..dfb2bf61b0b8 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852b.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852b.c @@ -745,6 +745,7 @@ static const struct rtw89_chip_ops rtw8852b_chip_ops = { .ctrl_btg_bt_rx = rtw8852bx_ctrl_btg_bt_rx, .query_ppdu = rtw8852bx_query_ppdu, .convert_rpl_to_rssi = rtw8852bx_convert_rpl_to_rssi, + .phy_rpt_to_rssi = NULL, .ctrl_nbtg_bt_tx = rtw8852bx_ctrl_nbtg_bt_tx, .cfg_txrx_path = rtw8852bx_bb_cfg_txrx_path, .set_txpwr_ul_tb_offset = rtw8852bx_set_txpwr_ul_tb_offset, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852b_common.c b/drivers/net/wireless/realtek/rtw89/rtw8852b_common.c index 012739d97f71..0e094ce9c9b0 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852b_common.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852b_common.c @@ -1950,7 +1950,9 @@ static void __rtw8852bx_query_ppdu(struct rtw89_dev *rtwdev, u8 path; u8 *rx_power = phy_ppdu->rssi; - status->signal = RTW89_RSSI_RAW_TO_DBM(max(rx_power[RF_PATH_A], rx_power[RF_PATH_B])); + if (!status->signal) + status->signal = RTW89_RSSI_RAW_TO_DBM(max(rx_power[RF_PATH_A], + rx_power[RF_PATH_B])); for (path = 0; path < rtwdev->chip->rf_path_num; path++) { status->chains |= BIT(path); status->chain_signal[path] = RTW89_RSSI_RAW_TO_DBM(rx_power[path]); diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c index cd1385ff8003..bde3e1fb7ca6 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852bt.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852bt.c @@ -679,6 +679,7 @@ static const struct rtw89_chip_ops rtw8852bt_chip_ops = { .ctrl_btg_bt_rx = rtw8852bx_ctrl_btg_bt_rx, .query_ppdu = rtw8852bx_query_ppdu, .convert_rpl_to_rssi = rtw8852bx_convert_rpl_to_rssi, + .phy_rpt_to_rssi = NULL, .ctrl_nbtg_bt_tx = rtw8852bx_ctrl_nbtg_bt_tx, .cfg_txrx_path = rtw8852bx_bb_cfg_txrx_path, .set_txpwr_ul_tb_offset = rtw8852bx_set_txpwr_ul_tb_offset, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c.c b/drivers/net/wireless/realtek/rtw89/rtw8852c.c index c7d39499ca75..bc84b15e7826 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852c.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852c.c @@ -2807,7 +2807,10 @@ static void rtw8852c_query_ppdu(struct rtw89_dev *rtwdev, u8 path; u8 *rx_power = phy_ppdu->rssi; - status->signal = RTW89_RSSI_RAW_TO_DBM(max(rx_power[RF_PATH_A], rx_power[RF_PATH_B])); + if (!status->signal) + status->signal = RTW89_RSSI_RAW_TO_DBM(max(rx_power[RF_PATH_A], + rx_power[RF_PATH_B])); + for (path = 0; path < rtwdev->chip->rf_path_num; path++) { status->chains |= BIT(path); status->chain_signal[path] = RTW89_RSSI_RAW_TO_DBM(rx_power[path]); @@ -2907,6 +2910,7 @@ static const struct rtw89_chip_ops rtw8852c_chip_ops = { .ctrl_btg_bt_rx = rtw8852c_ctrl_btg_bt_rx, .query_ppdu = rtw8852c_query_ppdu, .convert_rpl_to_rssi = NULL, + .phy_rpt_to_rssi = NULL, .ctrl_nbtg_bt_tx = rtw8852c_ctrl_nbtg_bt_tx, .cfg_txrx_path = rtw8852c_bb_cfg_txrx_path, .set_txpwr_ul_tb_offset = rtw8852c_set_txpwr_ul_tb_offset, diff --git a/drivers/net/wireless/realtek/rtw89/rtw8922a.c b/drivers/net/wireless/realtek/rtw89/rtw8922a.c index a96b58ce6592..f04cb3b11372 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8922a.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8922a.c @@ -2565,8 +2565,10 @@ static void rtw8922a_query_ppdu(struct rtw89_dev *rtwdev, u8 path; u8 *rx_power = phy_ppdu->rssi; - status->signal = - RTW89_RSSI_RAW_TO_DBM(max(rx_power[RF_PATH_A], rx_power[RF_PATH_B])); + if (!status->signal) + status->signal = RTW89_RSSI_RAW_TO_DBM(max(rx_power[RF_PATH_A], + rx_power[RF_PATH_B])); + for (path = 0; path < rtwdev->chip->rf_path_num; path++) { status->chains |= BIT(path); status->chain_signal[path] = RTW89_RSSI_RAW_TO_DBM(rx_power[path]); @@ -2607,6 +2609,16 @@ static void rtw8922a_convert_rpl_to_rssi(struct rtw89_dev *rtwdev, phy_ppdu->rssi_avg = phy_ppdu->rpl_avg; } +static void rtw8922a_phy_rpt_to_rssi(struct rtw89_dev *rtwdev, + struct rtw89_rx_desc_info *desc_info, + struct ieee80211_rx_status *rx_status) +{ + if (desc_info->rssi <= 0x1 || (desc_info->rssi >> 2) > MAX_RSSI) + return; + + rx_status->signal = (desc_info->rssi >> 2) - MAX_RSSI; +} + static int rtw8922a_mac_enable_bb_rf(struct rtw89_dev *rtwdev) { rtw89_write8_set(rtwdev, R_BE_FEN_RST_ENABLE, @@ -2665,6 +2677,7 @@ static const struct rtw89_chip_ops rtw8922a_chip_ops = { .ctrl_btg_bt_rx = rtw8922a_ctrl_btg_bt_rx, .query_ppdu = rtw8922a_query_ppdu, .convert_rpl_to_rssi = rtw8922a_convert_rpl_to_rssi, + .phy_rpt_to_rssi = rtw8922a_phy_rpt_to_rssi, .ctrl_nbtg_bt_tx = rtw8922a_ctrl_nbtg_bt_tx, .cfg_txrx_path = rtw8922a_bb_cfg_txrx_path, .set_txpwr_ul_tb_offset = NULL, diff --git a/drivers/net/wireless/realtek/rtw89/txrx.h b/drivers/net/wireless/realtek/rtw89/txrx.h index b2e47829983f..70fe7cebc9d5 100644 --- a/drivers/net/wireless/realtek/rtw89/txrx.h +++ b/drivers/net/wireless/realtek/rtw89/txrx.h @@ -560,6 +560,9 @@ struct rtw89_phy_sts_iehdr { #define BE_RXD_HDR_OFFSET_MASK GENMASK(20, 16) #define BE_RXD_WL_HD_IV_LEN_MASK GENMASK(26, 21) +/* BE RXD - PHY RPT dword0 */ +#define BE_RXD_PHY_RSSI GENMASK(11, 0) + struct rtw89_phy_sts_ie00 { __le32 w0; __le32 w1; From 148cd1e184cf3f19c17f8596c8f3f1a3b447edd6 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Thu, 28 Nov 2024 13:54:32 +0800 Subject: [PATCH 0119/1386] wifi: rtw89: refine link handling for link_sta_rc_update The original handling will iterate all active links under the given sta and apply the changes to each. Now, stack tweaks ops from sta_rc_update to link_sta_rc_update, which means targeting a given link. Then, our link iteration looks redundant. So, refine it to apply the changes to the link directly. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241128055433.11851-6-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/mac80211.c | 9 +++++++-- drivers/net/wireless/realtek/rtw89/phy.c | 15 ++++++--------- drivers/net/wireless/realtek/rtw89/phy.h | 3 +++ 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/mac80211.c b/drivers/net/wireless/realtek/rtw89/mac80211.c index bf7a674bce28..5eac0b524060 100644 --- a/drivers/net/wireless/realtek/rtw89/mac80211.c +++ b/drivers/net/wireless/realtek/rtw89/mac80211.c @@ -1307,10 +1307,15 @@ static void rtw89_ops_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_link_sta *link_sta, u32 changed) { - struct ieee80211_sta *sta = link_sta->sta; + struct rtw89_sta *rtwsta = sta_to_rtwsta(link_sta->sta); struct rtw89_dev *rtwdev = hw->priv; + struct rtw89_sta_link *rtwsta_link; - rtw89_phy_ra_update_sta(rtwdev, sta, changed); + rtwsta_link = rtwsta->links[link_sta->link_id]; + if (unlikely(!rtwsta_link)) + return; + + rtw89_phy_ra_update_sta_link(rtwdev, rtwsta_link, changed); } static int rtw89_ops_add_chanctx(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c index be2f5338c3a0..e88ed9ec57c5 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.c +++ b/drivers/net/wireless/realtek/rtw89/phy.c @@ -467,11 +467,11 @@ static void rtw89_phy_ra_sta_update(struct rtw89_dev *rtwdev, ra->csi_mode = csi_mode; } -static void __rtw89_phy_ra_update_sta(struct rtw89_dev *rtwdev, - struct rtw89_vif_link *rtwvif_link, - struct rtw89_sta_link *rtwsta_link, - u32 changed) +void rtw89_phy_ra_update_sta_link(struct rtw89_dev *rtwdev, + struct rtw89_sta_link *rtwsta_link, + u32 changed) { + struct rtw89_vif_link *rtwvif_link = rtwsta_link->rtwvif_link; struct ieee80211_vif *vif = rtwvif_link_to_vif(rtwvif_link); struct rtw89_ra_info *ra = &rtwsta_link->ra; struct ieee80211_link_sta *link_sta; @@ -504,14 +504,11 @@ void rtw89_phy_ra_update_sta(struct rtw89_dev *rtwdev, struct ieee80211_sta *sta u32 changed) { struct rtw89_sta *rtwsta = sta_to_rtwsta(sta); - struct rtw89_vif_link *rtwvif_link; struct rtw89_sta_link *rtwsta_link; unsigned int link_id; - rtw89_sta_for_each_link(rtwsta, rtwsta_link, link_id) { - rtwvif_link = rtwsta_link->rtwvif_link; - __rtw89_phy_ra_update_sta(rtwdev, rtwvif_link, rtwsta_link, changed); - } + rtw89_sta_for_each_link(rtwsta, rtwsta_link, link_id) + rtw89_phy_ra_update_sta_link(rtwdev, rtwsta_link, changed); } static bool __check_rate_pattern(struct rtw89_phy_rate_pattern *next, diff --git a/drivers/net/wireless/realtek/rtw89/phy.h b/drivers/net/wireless/realtek/rtw89/phy.h index f4ef7f5fb081..08a026ac9d38 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.h +++ b/drivers/net/wireless/realtek/rtw89/phy.h @@ -926,6 +926,9 @@ void rtw89_phy_ra_assoc(struct rtw89_dev *rtwdev, struct rtw89_sta_link *rtwsta_ void rtw89_phy_ra_update(struct rtw89_dev *rtwdev); void rtw89_phy_ra_update_sta(struct rtw89_dev *rtwdev, struct ieee80211_sta *sta, u32 changed); +void rtw89_phy_ra_update_sta_link(struct rtw89_dev *rtwdev, + struct rtw89_sta_link *rtwsta_link, + u32 changed); void rtw89_phy_rate_pattern_vif(struct rtw89_dev *rtwdev, struct ieee80211_vif *vif, const struct cfg80211_bitrate_mask *mask); From 7b98caea39676561f22db58752551161bb36462b Mon Sep 17 00:00:00 2001 From: Chih-Kang Chang Date: Thu, 28 Nov 2024 13:54:33 +0800 Subject: [PATCH 0120/1386] wifi: rtw89: add crystal_cap check to avoid setting as overflow value In the original flow, the crystal_cap might be calculated as a negative value and set as an overflow value. Therefore, we added a check to limit the calculated crystal_cap value. Additionally, we shrank the crystal_cap adjustment according to specific CFO. Signed-off-by: Chih-Kang Chang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241128055433.11851-7-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/phy.c | 13 +++++++------ drivers/net/wireless/realtek/rtw89/phy.h | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c index e88ed9ec57c5..8d36bf962732 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.c +++ b/drivers/net/wireless/realtek/rtw89/phy.c @@ -4266,7 +4266,6 @@ static void rtw89_phy_cfo_set_crystal_cap(struct rtw89_dev *rtwdev, if (!force && cfo->crystal_cap == crystal_cap) return; - crystal_cap = clamp_t(u8, crystal_cap, 0, 127); if (chip->chip_id == RTL8852A || chip->chip_id == RTL8851B) { rtw89_phy_cfo_set_xcap_reg(rtwdev, true, crystal_cap); rtw89_phy_cfo_set_xcap_reg(rtwdev, false, crystal_cap); @@ -4389,7 +4388,7 @@ static void rtw89_phy_cfo_crystal_cap_adjust(struct rtw89_dev *rtwdev, s32 curr_cfo) { struct rtw89_cfo_tracking_info *cfo = &rtwdev->cfo_tracking; - s8 crystal_cap = cfo->crystal_cap; + int crystal_cap = cfo->crystal_cap; s32 cfo_abs = abs(curr_cfo); int sign; @@ -4410,15 +4409,17 @@ static void rtw89_phy_cfo_crystal_cap_adjust(struct rtw89_dev *rtwdev, } sign = curr_cfo > 0 ? 1 : -1; if (cfo_abs > CFO_TRK_STOP_TH_4) - crystal_cap += 7 * sign; - else if (cfo_abs > CFO_TRK_STOP_TH_3) - crystal_cap += 5 * sign; - else if (cfo_abs > CFO_TRK_STOP_TH_2) crystal_cap += 3 * sign; + else if (cfo_abs > CFO_TRK_STOP_TH_3) + crystal_cap += 3 * sign; + else if (cfo_abs > CFO_TRK_STOP_TH_2) + crystal_cap += 1 * sign; else if (cfo_abs > CFO_TRK_STOP_TH_1) crystal_cap += 1 * sign; else return; + + crystal_cap = clamp(crystal_cap, 0, 127); rtw89_phy_cfo_set_crystal_cap(rtwdev, (u8)crystal_cap, false); rtw89_debug(rtwdev, RTW89_DBG_CFO, "X_cap{Curr,Default}={0x%x,0x%x}\n", diff --git a/drivers/net/wireless/realtek/rtw89/phy.h b/drivers/net/wireless/realtek/rtw89/phy.h index 08a026ac9d38..e6d06f0a6c09 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.h +++ b/drivers/net/wireless/realtek/rtw89/phy.h @@ -57,7 +57,7 @@ #define CFO_TRK_STOP_TH_4 (30 << 2) #define CFO_TRK_STOP_TH_3 (20 << 2) #define CFO_TRK_STOP_TH_2 (10 << 2) -#define CFO_TRK_STOP_TH_1 (00 << 2) +#define CFO_TRK_STOP_TH_1 (03 << 2) #define CFO_TRK_STOP_TH (2 << 2) #define CFO_SW_COMP_FINE_TUNE (2 << 2) #define CFO_PERIOD_CNT 15 From ffa794846bf777a06407d94ef69b9b1c5ac5a6c6 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:06:58 -0500 Subject: [PATCH 0121/1386] xfrm: config: add CONFIG_XFRM_IPTFS Add new Kconfig option to enable IP-TFS (RFC9347) functionality. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/Kconfig | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig index d7b16f2c23e9..f0157702718f 100644 --- a/net/xfrm/Kconfig +++ b/net/xfrm/Kconfig @@ -135,6 +135,22 @@ config NET_KEY_MIGRATE If unsure, say N. +config XFRM_IPTFS + tristate "IPsec IP-TFS/AGGFRAG (RFC 9347) encapsulation support" + depends on XFRM + help + Information on the IP-TFS/AGGFRAG encapsulation can be found + in RFC 9347. This feature supports demand driven (i.e., + non-constant send rate) IP-TFS to take advantage of the + AGGFRAG ESP payload encapsulation. This payload type + supports aggregation and fragmentation of the inner IP + packet stream which in turn yields higher small-packet + bandwidth as well as reducing MTU/PMTU issues. Congestion + control is unimplementated as the send rate is demand driven + rather than constant. + + If unsure, say N. + config XFRM_ESPINTCP bool From 64e844505bc08cde3f346f193cbbbab0096fef54 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:06:59 -0500 Subject: [PATCH 0122/1386] include: uapi: protocol number and packet structs for AGGFRAG in ESP Add the RFC assigned IP protocol number for AGGFRAG. Add the on-wire basic and congestion-control IP-TFS packet headers. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- include/uapi/linux/in.h | 2 ++ include/uapi/linux/ip.h | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 5d32d53508d9..ced0fc3c3aa5 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -79,6 +79,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ #define IPPROTO_ETHERNET IPPROTO_ETHERNET + IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */ +#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_SMC = 256, /* Shared Memory Communications */ diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 283dec7e3645..5bd7ce934d74 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -137,6 +137,22 @@ struct ip_beet_phdr { __u8 reserved; }; +struct ip_iptfs_hdr { + __u8 subtype; /* 0*: basic, 1: CC */ + __u8 flags; + __be16 block_offset; +}; + +struct ip_iptfs_cc_hdr { + __u8 subtype; /* 0: basic, 1*: CC */ + __u8 flags; + __be16 block_offset; + __be32 loss_rate; + __be64 rtt_adelay_xdelay; + __be32 tval; + __be32 techo; +}; + /* index values for the variables in ipv4_devconf */ enum { From f69eb4f65c58f5a081dbafb76011dad73757420c Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:00 -0500 Subject: [PATCH 0123/1386] xfrm: netlink: add config (netlink) options Add netlink options for configuring IP-TFS SAs. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 9 ++++++- net/xfrm/xfrm_compat.c | 10 ++++++-- net/xfrm/xfrm_user.c | 52 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index d73a97e3030a..a23495c0e0a1 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -158,7 +158,8 @@ enum { #define XFRM_MODE_ROUTEOPTIMIZATION 2 #define XFRM_MODE_IN_TRIGGER 3 #define XFRM_MODE_BEET 4 -#define XFRM_MODE_MAX 5 +#define XFRM_MODE_IPTFS 5 +#define XFRM_MODE_MAX 6 /* Netlink configuration messages. */ enum { @@ -323,6 +324,12 @@ enum xfrm_attr_type_t { XFRMA_SA_DIR, /* __u8 */ XFRMA_NAT_KEEPALIVE_INTERVAL, /* __u32 in seconds for NAT keepalive */ XFRMA_SA_PCPU, /* __u32 */ + XFRMA_IPTFS_DROP_TIME, /* __u32 in: usec to wait for next seq */ + XFRMA_IPTFS_REORDER_WINDOW, /* __u16 in: reorder window size (pkts) */ + XFRMA_IPTFS_DONT_FRAG, /* out: don't use fragmentation */ + XFRMA_IPTFS_INIT_DELAY, /* __u32 out: initial packet wait delay (usec) */ + XFRMA_IPTFS_MAX_QSIZE, /* __u32 out: max ingress queue size (octets) */ + XFRMA_IPTFS_PKT_SIZE, /* __u32 out: size of outer packet, 0 for PMTU */ __XFRMA_MAX #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 5b9ee63e30b6..b8d2e6930041 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -284,9 +284,15 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_SA_DIR: case XFRMA_NAT_KEEPALIVE_INTERVAL: case XFRMA_SA_PCPU: + case XFRMA_IPTFS_DROP_TIME: + case XFRMA_IPTFS_REORDER_WINDOW: + case XFRMA_IPTFS_DONT_FRAG: + case XFRMA_IPTFS_INIT_DELAY: + case XFRMA_IPTFS_MAX_QSIZE: + case XFRMA_IPTFS_PKT_SIZE: return xfrm_nla_cpy(dst, src, nla_len(src)); default: - BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_IPTFS_PKT_SIZE); pr_warn_once("unsupported nla_type %d\n", src->nla_type); return -EOPNOTSUPP; } @@ -441,7 +447,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, int err; if (type > XFRMA_MAX) { - BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_IPTFS_PKT_SIZE); NL_SET_ERR_MSG(extack, "Bad attribute"); return -EOPNOTSUPP; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b2876e09328b..749ec56101ac 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -301,6 +301,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, NL_SET_ERR_MSG(extack, "TFC padding can only be used in tunnel mode"); goto out; } + if ((attrs[XFRMA_IPTFS_DROP_TIME] || + attrs[XFRMA_IPTFS_REORDER_WINDOW] || + attrs[XFRMA_IPTFS_DONT_FRAG] || + attrs[XFRMA_IPTFS_INIT_DELAY] || + attrs[XFRMA_IPTFS_MAX_QSIZE] || + attrs[XFRMA_IPTFS_PKT_SIZE]) && + p->mode != XFRM_MODE_IPTFS) { + NL_SET_ERR_MSG(extack, "IP-TFS options can only be used in IP-TFS mode"); + goto out; + } break; case IPPROTO_COMP: @@ -421,6 +431,18 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; } + if (attrs[XFRMA_IPTFS_DROP_TIME]) { + NL_SET_ERR_MSG(extack, "IP-TFS drop time should not be set for output SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_IPTFS_REORDER_WINDOW]) { + NL_SET_ERR_MSG(extack, "IP-TFS reorder window should not be set for output SA"); + err = -EINVAL; + goto out; + } + if (attrs[XFRMA_REPLAY_VAL]) { struct xfrm_replay_state *replay; @@ -458,6 +480,30 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, } } + + if (attrs[XFRMA_IPTFS_DONT_FRAG]) { + NL_SET_ERR_MSG(extack, "IP-TFS don't fragment should not be set for input SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_IPTFS_INIT_DELAY]) { + NL_SET_ERR_MSG(extack, "IP-TFS initial delay should not be set for input SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_IPTFS_MAX_QSIZE]) { + NL_SET_ERR_MSG(extack, "IP-TFS max queue size should not be set for input SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_IPTFS_PKT_SIZE]) { + NL_SET_ERR_MSG(extack, "IP-TFS packet size should not be set for input SA"); + err = -EINVAL; + goto out; + } } if (!sa_dir && attrs[XFRMA_SA_PCPU]) { @@ -3220,6 +3266,12 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 }, [XFRMA_SA_PCPU] = { .type = NLA_U32 }, + [XFRMA_IPTFS_DROP_TIME] = { .type = NLA_U32 }, + [XFRMA_IPTFS_REORDER_WINDOW] = { .type = NLA_U16 }, + [XFRMA_IPTFS_DONT_FRAG] = { .type = NLA_FLAG }, + [XFRMA_IPTFS_INIT_DELAY] = { .type = NLA_U32 }, + [XFRMA_IPTFS_MAX_QSIZE] = { .type = NLA_U32 }, + [XFRMA_IPTFS_PKT_SIZE] = { .type = NLA_U32 }, }; EXPORT_SYMBOL_GPL(xfrma_policy); From 7ac64f4598b4daa3f955f82759760666e047bdf8 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:01 -0500 Subject: [PATCH 0124/1386] xfrm: add mode_cbs module functionality Add a set of callbacks xfrm_mode_cbs to xfrm_state. These callbacks enable the addition of new xfrm modes, such as IP-TFS to be defined in modules. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 43 +++++++++++++++++++++++++ net/xfrm/xfrm_device.c | 3 +- net/xfrm/xfrm_input.c | 18 +++++++++-- net/xfrm/xfrm_output.c | 2 ++ net/xfrm/xfrm_policy.c | 18 +++++++---- net/xfrm/xfrm_state.c | 72 ++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_user.c | 13 ++++++++ 7 files changed, 159 insertions(+), 10 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 32c09e85a64c..1ebc09cde627 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -213,6 +213,7 @@ struct xfrm_state { u16 family; xfrm_address_t saddr; int header_len; + int enc_hdr_len; int trailer_len; u32 extra_flags; struct xfrm_mark smark; @@ -303,6 +304,9 @@ struct xfrm_state { * interpreted by xfrm_type methods. */ void *data; u8 dir; + + const struct xfrm_mode_cbs *mode_cbs; + void *mode_data; }; static inline struct net *xs_net(struct xfrm_state *x) @@ -460,6 +464,45 @@ struct xfrm_type_offload { int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family); void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family); +/** + * struct xfrm_mode_cbs - XFRM mode callbacks + * @owner: module owner or NULL + * @init_state: Add/init mode specific state in `xfrm_state *x` + * @clone_state: Copy mode specific values from `orig` to new state `x` + * @destroy_state: Cleanup mode specific state from `xfrm_state *x` + * @user_init: Process mode specific netlink attributes from user + * @copy_to_user: Add netlink attributes to `attrs` based on state in `x` + * @sa_len: Return space required to store mode specific netlink attributes + * @get_inner_mtu: Return avail payload space after removing encap overhead + * @input: Process received packet from SA using mode + * @output: Output given packet using mode + * @prepare_output: Add mode specific encapsulation to packet in skb. On return + * `transport_header` should point at ESP header, `network_header` should + * point at outer IP header and `mac_header` should opint at the + * protocol/nexthdr field of the outer IP. + * + * One should examine and understand the specific uses of these callbacks in + * xfrm for further detail on how and when these functions are called. RTSL. + */ +struct xfrm_mode_cbs { + struct module *owner; + int (*init_state)(struct xfrm_state *x); + int (*clone_state)(struct xfrm_state *x, struct xfrm_state *orig); + void (*destroy_state)(struct xfrm_state *x); + int (*user_init)(struct net *net, struct xfrm_state *x, + struct nlattr **attrs, + struct netlink_ext_ack *extack); + int (*copy_to_user)(struct xfrm_state *x, struct sk_buff *skb); + unsigned int (*sa_len)(const struct xfrm_state *x); + u32 (*get_inner_mtu)(struct xfrm_state *x, int outer_mtu); + int (*input)(struct xfrm_state *x, struct sk_buff *skb); + int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); + int (*prepare_output)(struct xfrm_state *x, struct sk_buff *skb); +}; + +int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs); +void xfrm_unregister_mode_cbs(u8 mode); + static inline int xfrm_af2proto(unsigned int family) { switch(family) { diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index b33c4591e09a..1fe1b07d879d 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -42,7 +42,8 @@ static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb, skb->transport_header = skb->network_header + hsize; skb_reset_mac_len(skb); - pskb_pull(skb, skb->mac_len + x->props.header_len); + pskb_pull(skb, + skb->mac_len + x->props.header_len - x->props.enc_hdr_len); } static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb, diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 841a60a6fbfe..2c4ae61e7e3a 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -446,6 +446,9 @@ static int xfrm_inner_mode_input(struct xfrm_state *x, WARN_ON_ONCE(1); break; default: + if (x->mode_cbs && x->mode_cbs->input) + return x->mode_cbs->input(x, skb); + WARN_ON_ONCE(1); break; } @@ -453,6 +456,10 @@ static int xfrm_inner_mode_input(struct xfrm_state *x, return -EOPNOTSUPP; } +/* NOTE: encap_type - In addition to the normal (non-negative) values for + * encap_type, a negative value of -1 or -2 can be used to resume/restart this + * function after a previous invocation early terminated for async operation. + */ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { const struct xfrm_state_afinfo *afinfo; @@ -489,6 +496,10 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) family = x->props.family; + /* An encap_type of -2 indicates reconstructed inner packet */ + if (encap_type == -2) + goto resume_decapped; + /* An encap_type of -1 indicates async resumption. */ if (encap_type == -1) { async = 1; @@ -679,11 +690,14 @@ resume: XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; - if (xfrm_inner_mode_input(x, skb)) { + err = xfrm_inner_mode_input(x, skb); + if (err == -EINPROGRESS) + return 0; + else if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } - +resume_decapped: if (x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index e5722c95b8bb..ef81359e4038 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -472,6 +472,8 @@ static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) WARN_ON_ONCE(1); break; default: + if (x->mode_cbs && x->mode_cbs->prepare_output) + return x->mode_cbs->prepare_output(x, skb); WARN_ON_ONCE(1); break; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4408c11c0835..c04014ee623f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2748,13 +2748,17 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst1->input = dst_discard; - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); - if (likely(afinfo)) - dst1->output = afinfo->output; - else - dst1->output = dst_discard_out; - rcu_read_unlock(); + if (xfrm[i]->mode_cbs && xfrm[i]->mode_cbs->output) { + dst1->output = xfrm[i]->mode_cbs->output; + } else { + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); + if (likely(afinfo)) + dst1->output = afinfo->output; + else + dst1->output = dst_discard_out; + rcu_read_unlock(); + } xdst_prev = xdst; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 67ca7ac955a3..cf68ba891729 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -515,6 +515,60 @@ static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) return NULL; } +static const struct xfrm_mode_cbs __rcu *xfrm_mode_cbs_map[XFRM_MODE_MAX]; +static DEFINE_SPINLOCK(xfrm_mode_cbs_map_lock); + +int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs) +{ + if (mode >= XFRM_MODE_MAX) + return -EINVAL; + + spin_lock_bh(&xfrm_mode_cbs_map_lock); + rcu_assign_pointer(xfrm_mode_cbs_map[mode], mode_cbs); + spin_unlock_bh(&xfrm_mode_cbs_map_lock); + + return 0; +} +EXPORT_SYMBOL(xfrm_register_mode_cbs); + +void xfrm_unregister_mode_cbs(u8 mode) +{ + if (mode >= XFRM_MODE_MAX) + return; + + spin_lock_bh(&xfrm_mode_cbs_map_lock); + RCU_INIT_POINTER(xfrm_mode_cbs_map[mode], NULL); + spin_unlock_bh(&xfrm_mode_cbs_map_lock); + synchronize_rcu(); +} +EXPORT_SYMBOL(xfrm_unregister_mode_cbs); + +static const struct xfrm_mode_cbs *xfrm_get_mode_cbs(u8 mode) +{ + const struct xfrm_mode_cbs *cbs; + bool try_load = true; + + if (mode >= XFRM_MODE_MAX) + return NULL; + +retry: + rcu_read_lock(); + + cbs = rcu_dereference(xfrm_mode_cbs_map[mode]); + if (cbs && !try_module_get(cbs->owner)) + cbs = NULL; + + rcu_read_unlock(); + + if (mode == XFRM_MODE_IPTFS && !cbs && try_load) { + request_module("xfrm-iptfs"); + try_load = false; + goto retry; + } + + return cbs; +} + void xfrm_state_free(struct xfrm_state *x) { kmem_cache_free(xfrm_state_cache, x); @@ -523,6 +577,8 @@ EXPORT_SYMBOL(xfrm_state_free); static void ___xfrm_state_destroy(struct xfrm_state *x) { + if (x->mode_cbs && x->mode_cbs->destroy_state) + x->mode_cbs->destroy_state(x); hrtimer_cancel(&x->mtimer); del_timer_sync(&x->rtimer); kfree(x->aead); @@ -682,6 +738,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) x->replay_maxdiff = 0; x->pcpu_num = UINT_MAX; spin_lock_init(&x->lock); + x->mode_data = NULL; } return x; } @@ -1945,6 +2002,12 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, x->new_mapping_sport = 0; x->dir = orig->dir; + x->mode_cbs = orig->mode_cbs; + if (x->mode_cbs && x->mode_cbs->clone_state) { + if (x->mode_cbs->clone_state(x, orig)) + goto error; + } + return x; error: @@ -2986,6 +3049,9 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) case XFRM_MODE_TUNNEL: break; default: + if (x->mode_cbs && x->mode_cbs->get_inner_mtu) + return x->mode_cbs->get_inner_mtu(x, mtu); + WARN_ON_ONCE(1); break; } @@ -3086,6 +3152,12 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, } } + x->mode_cbs = xfrm_get_mode_cbs(x->props.mode); + if (x->mode_cbs) { + if (x->mode_cbs->init_state) + err = x->mode_cbs->init_state(x); + module_put(x->mode_cbs->owner); + } error: return err; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 749ec56101ac..71b452fff8db 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -932,6 +932,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; } + if (x->mode_cbs && x->mode_cbs->user_init) { + err = x->mode_cbs->user_init(net, x, attrs, extack); + if (err) + goto error; + } + return x; error: @@ -1347,6 +1353,10 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (ret) goto out; } + if (x->mode_cbs && x->mode_cbs->copy_to_user) + ret = x->mode_cbs->copy_to_user(x, skb); + if (ret) + goto out; if (x->mapping_maxage) { ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage); if (ret) @@ -3606,6 +3616,9 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) if (x->nat_keepalive_interval) l += nla_total_size(sizeof(x->nat_keepalive_interval)); + if (x->mode_cbs && x->mode_cbs->sa_len) + l += x->mode_cbs->sa_len(x); + return l; } From d1716d5a44c37e5743bf6ea4e5cdbdab37727f27 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:02 -0500 Subject: [PATCH 0125/1386] xfrm: add generic iptfs defines and functionality Define `XFRM_MODE_IPTFS` and `IPSEC_MODE_IPTFS` constants, and add these to switch case and conditionals adjacent with the existing TUNNEL modes. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + include/uapi/linux/ipsec.h | 3 ++- include/uapi/linux/snmp.h | 2 ++ net/ipv4/esp4.c | 3 ++- net/ipv6/esp6.c | 3 ++- net/netfilter/nft_xfrm.c | 3 ++- net/xfrm/xfrm_device.c | 1 + net/xfrm/xfrm_output.c | 4 ++++ net/xfrm/xfrm_policy.c | 8 ++++++-- net/xfrm/xfrm_proc.c | 2 ++ net/xfrm/xfrm_state.c | 12 ++++++++++++ net/xfrm/xfrm_user.c | 12 ++++++++++++ 12 files changed, 48 insertions(+), 6 deletions(-) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1ebc09cde627..4b0677e48190 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -38,6 +38,7 @@ #define XFRM_PROTO_COMP 108 #define XFRM_PROTO_IPIP 4 #define XFRM_PROTO_IPV6 41 +#define XFRM_PROTO_IPTFS IPPROTO_AGGFRAG #define XFRM_PROTO_ROUTING IPPROTO_ROUTING #define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS diff --git a/include/uapi/linux/ipsec.h b/include/uapi/linux/ipsec.h index 50d8ee1791e2..696b790f4346 100644 --- a/include/uapi/linux/ipsec.h +++ b/include/uapi/linux/ipsec.h @@ -14,7 +14,8 @@ enum { IPSEC_MODE_ANY = 0, /* We do not support this for SA */ IPSEC_MODE_TRANSPORT = 1, IPSEC_MODE_TUNNEL = 2, - IPSEC_MODE_BEET = 3 + IPSEC_MODE_BEET = 3, + IPSEC_MODE_IPTFS = 4 }; enum { diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index adf5fd78dd50..5a2553511190 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -339,6 +339,8 @@ enum LINUX_MIB_XFRMACQUIREERROR, /* XfrmAcquireError */ LINUX_MIB_XFRMOUTSTATEDIRERROR, /* XfrmOutStateDirError */ LINUX_MIB_XFRMINSTATEDIRERROR, /* XfrmInStateDirError */ + LINUX_MIB_XFRMINIPTFSERROR, /* XfrmInIptfsError */ + LINUX_MIB_XFRMOUTNOQSPACE, /* XfrmOutNoQueueSpace */ __LINUX_MIB_XFRMMAX }; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index f3281312eb5e..b0fbf804bbba 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -816,7 +816,8 @@ int esp_input_done2(struct sk_buff *skb, int err) } skb_pull_rcsum(skb, hlen); - if (x->props.mode == XFRM_MODE_TUNNEL) + if (x->props.mode == XFRM_MODE_TUNNEL || + x->props.mode == XFRM_MODE_IPTFS) skb_reset_transport_header(skb); else skb_set_transport_header(skb, -ihl); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index b2400c226a32..5f3d0cc1555a 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -859,7 +859,8 @@ int esp6_input_done2(struct sk_buff *skb, int err) skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); skb_pull_rcsum(skb, hlen); - if (x->props.mode == XFRM_MODE_TUNNEL) + if (x->props.mode == XFRM_MODE_TUNNEL || + x->props.mode == XFRM_MODE_IPTFS) skb_reset_transport_header(skb); else skb_set_transport_header(skb, -hdr_len); diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 8a07b46cc8fb..3210cfc966ab 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -112,7 +112,8 @@ static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode) return true; } - return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL; + return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL || + mode == XFRM_MODE_IPTFS; } static void nft_xfrm_state_get_key(const struct nft_xfrm *priv, diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 1fe1b07d879d..d1fa94e52cea 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -69,6 +69,7 @@ static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb, static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) { switch (x->outer_mode.encap) { + case XFRM_MODE_IPTFS: case XFRM_MODE_TUNNEL: if (x->outer_mode.family == AF_INET) return __xfrm_mode_tunnel_prep(x, skb, diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index ef81359e4038..b5025cf6136e 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -677,6 +677,10 @@ static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x) return; } + if (x->outer_mode.encap == XFRM_MODE_IPTFS) { + xo->inner_ipproto = IPPROTO_AGGFRAG; + return; + } /* non-Tunnel Mode */ if (!skb->encapsulation) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c04014ee623f..9e510021ee91 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2497,6 +2497,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; if (tmpl->mode == XFRM_MODE_TUNNEL || + tmpl->mode == XFRM_MODE_IPTFS || tmpl->mode == XFRM_MODE_BEET) { remote = &tmpl->id.daddr; local = &tmpl->saddr; @@ -3294,7 +3295,8 @@ no_transform: ok: xfrm_pols_put(pols, drop_pols); if (dst && dst->xfrm && - dst->xfrm->props.mode == XFRM_MODE_TUNNEL) + (dst->xfrm->props.mode == XFRM_MODE_TUNNEL || + dst->xfrm->props.mode == XFRM_MODE_IPTFS)) dst->flags |= DST_XFRM_TUNNEL; return dst; @@ -4523,6 +4525,7 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm switch (t->mode) { case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: + case XFRM_MODE_IPTFS: if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, m->old_family) && xfrm_addr_equal(&t->saddr, &m->old_saddr, @@ -4565,7 +4568,8 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, continue; n++; if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && - pol->xfrm_vec[i].mode != XFRM_MODE_BEET) + pol->xfrm_vec[i].mode != XFRM_MODE_BEET && + pol->xfrm_vec[i].mode != XFRM_MODE_IPTFS) continue; /* update endpoints */ memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index eeb984be03a7..8e07dd614b0b 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -43,6 +43,8 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR), SNMP_MIB_ITEM("XfrmOutStateDirError", LINUX_MIB_XFRMOUTSTATEDIRERROR), SNMP_MIB_ITEM("XfrmInStateDirError", LINUX_MIB_XFRMINSTATEDIRERROR), + SNMP_MIB_ITEM("XfrmInIptfsError", LINUX_MIB_XFRMINIPTFSERROR), + SNMP_MIB_ITEM("XfrmOutNoQueueSpace", LINUX_MIB_XFRMOUTNOQSPACE), SNMP_MIB_SENTINEL }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index cf68ba891729..34067cb8a479 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -467,6 +467,11 @@ static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = { .flags = XFRM_MODE_FLAG_TUNNEL, .family = AF_INET, }, + [XFRM_MODE_IPTFS] = { + .encap = XFRM_MODE_IPTFS, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET, + }, }; static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = { @@ -488,6 +493,11 @@ static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = { .flags = XFRM_MODE_FLAG_TUNNEL, .family = AF_INET6, }, + [XFRM_MODE_IPTFS] = { + .encap = XFRM_MODE_IPTFS, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET6, + }, }; static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) @@ -2334,6 +2344,7 @@ static int __xfrm6_state_sort_cmp(const void *p) #endif case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: + case XFRM_MODE_IPTFS: return 4; } return 5; @@ -2360,6 +2371,7 @@ static int __xfrm6_tmpl_sort_cmp(const void *p) #endif case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: + case XFRM_MODE_IPTFS: return 3; } return 4; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 71b452fff8db..08c6d6f0179f 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -383,6 +383,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, case XFRM_MODE_ROUTEOPTIMIZATION: case XFRM_MODE_BEET: break; + case XFRM_MODE_IPTFS: + if (p->id.proto != IPPROTO_ESP) { + NL_SET_ERR_MSG(extack, "IP-TFS mode only supported with ESP"); + goto out; + } + if (sa_dir == 0) { + NL_SET_ERR_MSG(extack, "IP-TFS mode requires in or out direction attribute"); + goto out; + } + break; default: NL_SET_ERR_MSG(extack, "Unsupported mode"); @@ -2014,6 +2024,8 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family, return -EINVAL; } break; + case XFRM_MODE_IPTFS: + break; default: if (ut[i].family != prev_family) { NL_SET_ERR_MSG(extack, "Mode in template doesn't support a family change"); From 4b3faf610cc63bfac972711635eafbca5e7d7117 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:03 -0500 Subject: [PATCH 0126/1386] xfrm: iptfs: add new iptfs xfrm mode impl Add a new xfrm mode implementing AggFrag/IP-TFS from RFC9347. This utilizes the new xfrm_mode_cbs to implement demand-driven IP-TFS functionality. This functionality can be used to increase bandwidth utilization through small packet aggregation, as well as help solve PMTU issues through it's efficient use of fragmentation. Link: https://www.rfc-editor.org/rfc/rfc9347.txt Multiple commits follow to build the functionality into xfrm_iptfs.c Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/Makefile | 1 + net/xfrm/xfrm_iptfs.c | 216 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 217 insertions(+) create mode 100644 net/xfrm/xfrm_iptfs.c diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 512e0b2f8514..5a1787587cb3 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -21,5 +21,6 @@ obj-$(CONFIG_XFRM_USER) += xfrm_user.o obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o +obj-$(CONFIG_XFRM_IPTFS) += xfrm_iptfs.o obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c new file mode 100644 index 000000000000..e7cb8734fc0f --- /dev/null +++ b/net/xfrm/xfrm_iptfs.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* xfrm_iptfs: IPTFS encapsulation support + * + * April 21 2022, Christian Hopps + * + * Copyright (c) 2022, LabN Consulting, L.L.C. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "xfrm_inout.h" + +/** + * struct xfrm_iptfs_config - configuration for the IPTFS tunnel. + * @pkt_size: size of the outer IP packet. 0 to use interface and MTU discovery, + * otherwise the user specified value. + */ +struct xfrm_iptfs_config { + u32 pkt_size; /* outer_packet_size or 0 */ +}; + +/** + * struct xfrm_iptfs_data - mode specific xfrm state. + * @cfg: IPTFS tunnel config. + * @x: owning SA (xfrm_state). + * @payload_mtu: max payload size. + */ +struct xfrm_iptfs_data { + struct xfrm_iptfs_config cfg; + + /* Ingress User Input */ + struct xfrm_state *x; /* owning state */ + u32 payload_mtu; /* max payload size */ +}; + +/* ========================== */ +/* State Management Functions */ +/* ========================== */ + +/** + * iptfs_get_inner_mtu() - return inner MTU with no fragmentation. + * @x: xfrm state. + * @outer_mtu: the outer mtu + */ +static u32 iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu) +{ + struct crypto_aead *aead; + u32 blksize; + + aead = x->data; + blksize = ALIGN(crypto_aead_blocksize(aead), 4); + return ((outer_mtu - x->props.header_len - crypto_aead_authsize(aead)) & + ~(blksize - 1)) - 2; +} + +/** + * iptfs_user_init() - initialize the SA with IPTFS options from netlink. + * @net: the net data + * @x: xfrm state + * @attrs: netlink attributes + * @extack: extack return data + * + * Return: 0 on success or a negative error code on failure + */ +static int iptfs_user_init(struct net *net, struct xfrm_state *x, + struct nlattr **attrs, + struct netlink_ext_ack *extack) +{ + struct xfrm_iptfs_data *xtfs = x->mode_data; + struct xfrm_iptfs_config *xc; + + xc = &xtfs->cfg; + + if (attrs[XFRMA_IPTFS_PKT_SIZE]) { + xc->pkt_size = nla_get_u32(attrs[XFRMA_IPTFS_PKT_SIZE]); + if (!xc->pkt_size) { + xtfs->payload_mtu = 0; + } else if (xc->pkt_size > x->props.header_len) { + xtfs->payload_mtu = xc->pkt_size - x->props.header_len; + } else { + NL_SET_ERR_MSG(extack, + "Packet size must be 0 or greater than IPTFS/ESP header length"); + return -EINVAL; + } + } + return 0; +} + +static unsigned int iptfs_sa_len(const struct xfrm_state *x) +{ + struct xfrm_iptfs_data *xtfs = x->mode_data; + struct xfrm_iptfs_config *xc = &xtfs->cfg; + unsigned int l = 0; + + if (x->dir == XFRM_SA_DIR_OUT) + l += nla_total_size(sizeof(xc->pkt_size)); + + return l; +} + +static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb) +{ + struct xfrm_iptfs_data *xtfs = x->mode_data; + struct xfrm_iptfs_config *xc = &xtfs->cfg; + int ret = 0; + + if (x->dir == XFRM_SA_DIR_OUT) + ret = nla_put_u32(skb, XFRMA_IPTFS_PKT_SIZE, xc->pkt_size); + + return ret; +} + +static void __iptfs_init_state(struct xfrm_state *x, + struct xfrm_iptfs_data *xtfs) +{ + /* Modify type (esp) adjustment values */ + + if (x->props.family == AF_INET) + x->props.header_len += sizeof(struct iphdr) + sizeof(struct ip_iptfs_hdr); + else if (x->props.family == AF_INET6) + x->props.header_len += sizeof(struct ipv6hdr) + sizeof(struct ip_iptfs_hdr); + x->props.enc_hdr_len = sizeof(struct ip_iptfs_hdr); + + /* Always keep a module reference when x->mode_data is set */ + __module_get(x->mode_cbs->owner); + + x->mode_data = xtfs; + xtfs->x = x; +} + +static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) +{ + struct xfrm_iptfs_data *xtfs; + + xtfs = kmemdup(orig->mode_data, sizeof(*xtfs), GFP_KERNEL); + if (!xtfs) + return -ENOMEM; + + x->mode_data = xtfs; + xtfs->x = x; + + return 0; +} + +static int iptfs_init_state(struct xfrm_state *x) +{ + struct xfrm_iptfs_data *xtfs; + + if (x->mode_data) { + /* We have arrived here from xfrm_state_clone() */ + xtfs = x->mode_data; + } else { + xtfs = kzalloc(sizeof(*xtfs), GFP_KERNEL); + if (!xtfs) + return -ENOMEM; + } + + __iptfs_init_state(x, xtfs); + + return 0; +} + +static void iptfs_destroy_state(struct xfrm_state *x) +{ + struct xfrm_iptfs_data *xtfs = x->mode_data; + + if (!xtfs) + return; + + kfree_sensitive(xtfs); + + module_put(x->mode_cbs->owner); +} + +static const struct xfrm_mode_cbs iptfs_mode_cbs = { + .owner = THIS_MODULE, + .init_state = iptfs_init_state, + .clone_state = iptfs_clone_state, + .destroy_state = iptfs_destroy_state, + .user_init = iptfs_user_init, + .copy_to_user = iptfs_copy_to_user, + .sa_len = iptfs_sa_len, + .get_inner_mtu = iptfs_get_inner_mtu, +}; + +static int __init xfrm_iptfs_init(void) +{ + int err; + + pr_info("xfrm_iptfs: IPsec IP-TFS tunnel mode module\n"); + + err = xfrm_register_mode_cbs(XFRM_MODE_IPTFS, &iptfs_mode_cbs); + if (err < 0) + pr_info("%s: can't register IP-TFS\n", __func__); + + return err; +} + +static void __exit xfrm_iptfs_fini(void) +{ + xfrm_unregister_mode_cbs(XFRM_MODE_IPTFS); +} + +module_init(xfrm_iptfs_init); +module_exit(xfrm_iptfs_fini); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IP-TFS support for xfrm ipsec tunnels"); From 0e4fbf013fa566f274ce9b4ce698c75b1f998c52 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:04 -0500 Subject: [PATCH 0127/1386] xfrm: iptfs: add user packet (tunnel ingress) handling Add tunnel packet output functionality. This is code handles the ingress to the tunnel. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_iptfs.c | 563 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 560 insertions(+), 3 deletions(-) diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index e7cb8734fc0f..c4cff005ea9a 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -19,29 +19,541 @@ #include "xfrm_inout.h" +/* ------------------------------------------------ */ +/* IPTFS default SA values (tunnel ingress/dir-out) */ +/* ------------------------------------------------ */ + +/** + * define IPTFS_DEFAULT_INIT_DELAY_USECS - default initial output delay + * + * The initial output delay is the amount of time prior to servicing the output + * queue after queueing the first packet on said queue. This applies anytime the + * output queue was previously empty. + * + * Default 0. + */ +#define IPTFS_DEFAULT_INIT_DELAY_USECS 0 + +/** + * define IPTFS_DEFAULT_MAX_QUEUE_SIZE - default max output queue size. + * + * The default IPTFS max output queue size in octets. The output queue is where + * received packets destined for output over an IPTFS tunnel are stored prior to + * being output in aggregated/fragmented form over the IPTFS tunnel. + * + * Default 1M. + */ +#define IPTFS_DEFAULT_MAX_QUEUE_SIZE (1024 * 10240) + +#define NSECS_IN_USEC 1000 + +#define IPTFS_HRTIMER_MODE HRTIMER_MODE_REL_SOFT + /** * struct xfrm_iptfs_config - configuration for the IPTFS tunnel. * @pkt_size: size of the outer IP packet. 0 to use interface and MTU discovery, * otherwise the user specified value. + * @max_queue_size: The maximum number of octets allowed to be queued to be sent + * over the IPTFS SA. The queue size is measured as the size of all the + * packets enqueued. */ struct xfrm_iptfs_config { u32 pkt_size; /* outer_packet_size or 0 */ + u32 max_queue_size; /* octets */ }; /** * struct xfrm_iptfs_data - mode specific xfrm state. * @cfg: IPTFS tunnel config. * @x: owning SA (xfrm_state). + * @queue: queued user packets to send. + * @queue_size: number of octets on queue (sum of packet sizes). + * @ecn_queue_size: octets above with ECN mark. + * @init_delay_ns: nanoseconds to wait to send initial IPTFS packet. + * @iptfs_timer: output timer. * @payload_mtu: max payload size. */ struct xfrm_iptfs_data { struct xfrm_iptfs_config cfg; /* Ingress User Input */ - struct xfrm_state *x; /* owning state */ + struct xfrm_state *x; /* owning state */ + struct sk_buff_head queue; /* output queue */ + + u32 queue_size; /* octets */ + u32 ecn_queue_size; /* octets above which ECN mark */ + u64 init_delay_ns; /* nanoseconds */ + struct hrtimer iptfs_timer; /* output timer */ u32 payload_mtu; /* max payload size */ }; +static u32 iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu); +static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me); + +/* ================================= */ +/* IPTFS Sending (ingress) Functions */ +/* ================================= */ + +/* ------------------------- */ +/* Enqueue to send functions */ +/* ------------------------- */ + +/** + * iptfs_enqueue() - enqueue packet if ok to send. + * @xtfs: xtfs state + * @skb: the packet + * + * Return: true if packet enqueued. + */ +static bool iptfs_enqueue(struct xfrm_iptfs_data *xtfs, struct sk_buff *skb) +{ + u64 newsz = xtfs->queue_size + skb->len; + struct iphdr *iph; + + assert_spin_locked(&xtfs->x->lock); + + if (newsz > xtfs->cfg.max_queue_size) + return false; + + /* Set ECN CE if we are above our ECN queue threshold */ + if (newsz > xtfs->ecn_queue_size) { + iph = ip_hdr(skb); + if (iph->version == 4) + IP_ECN_set_ce(iph); + else if (iph->version == 6) + IP6_ECN_set_ce(skb, ipv6_hdr(skb)); + } + + __skb_queue_tail(&xtfs->queue, skb); + xtfs->queue_size += skb->len; + return true; +} + +static int iptfs_get_cur_pmtu(struct xfrm_state *x, struct xfrm_iptfs_data *xtfs, + struct sk_buff *skb) +{ + struct xfrm_dst *xdst = (struct xfrm_dst *)skb_dst(skb); + u32 payload_mtu = xtfs->payload_mtu; + u32 pmtu = iptfs_get_inner_mtu(x, xdst->child_mtu_cached); + + if (payload_mtu && payload_mtu < pmtu) + pmtu = payload_mtu; + + return pmtu; +} + +static int iptfs_is_too_big(struct sock *sk, struct sk_buff *skb, u32 pmtu) +{ + if (skb->len <= pmtu) + return 0; + + /* We only send ICMP too big if the user has configured us as + * dont-fragment. + */ + if (skb->dev) + XFRM_INC_STATS(dev_net(skb->dev), LINUX_MIB_XFRMOUTERROR); + + if (sk) + xfrm_local_error(skb, pmtu); + else if (ip_hdr(skb)->version == 4) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(pmtu)); + else + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, pmtu); + + return 1; +} + +/* IPv4/IPv6 packet ingress to IPTFS tunnel, arrange to send in IPTFS payload + * (i.e., aggregating or fragmenting as appropriate). + * This is set in dst->output for an SA. + */ +static int iptfs_output_collect(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + struct xfrm_iptfs_data *xtfs = x->mode_data; + struct sk_buff *segs, *nskb; + u32 pmtu = 0; + bool ok = true; + bool was_gso; + + /* We have hooked into dst_entry->output which means we have skipped the + * protocol specific netfilter (see xfrm4_output, xfrm6_output). + * when our timer runs we will end up calling xfrm_output directly on + * the encapsulated traffic. + * + * For both cases this is the NF_INET_POST_ROUTING hook which allows + * changing the skb->dst entry which then may not be xfrm based anymore + * in which case a REROUTED flag is set. and dst_output is called. + * + * For IPv6 we are also skipping fragmentation handling for local + * sockets, which may or may not be good depending on our tunnel DF + * setting. Normally with fragmentation supported we want to skip this + * fragmentation. + */ + + pmtu = iptfs_get_cur_pmtu(x, xtfs, skb); + + /* Break apart GSO skbs. If the queue is nearing full then we want the + * accounting and queuing to be based on the individual packets not on the + * aggregate GSO buffer. + */ + was_gso = skb_is_gso(skb); + if (!was_gso) { + segs = skb; + } else { + segs = skb_gso_segment(skb, 0); + if (IS_ERR_OR_NULL(segs)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + kfree_skb(skb); + if (IS_ERR(segs)) + return PTR_ERR(segs); + return -EINVAL; + } + consume_skb(skb); + skb = NULL; + } + + /* We can be running on multiple cores and from the network softirq or + * from user context depending on where the packet is coming from. + */ + spin_lock_bh(&x->lock); + + skb_list_walk_safe(segs, skb, nskb) { + skb_mark_not_on_list(skb); + + /* Once we drop due to no queue space we continue to drop the + * rest of the packets from that GRO. + */ + if (!ok) { +nospace: + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOQSPACE); + kfree_skb_reason(skb, SKB_DROP_REASON_FULL_RING); + continue; + } + + /* Fragmenting handled in following commits. */ + if (iptfs_is_too_big(sk, skb, pmtu)) { + kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); + continue; + } + + /* Enqueue to send in tunnel */ + ok = iptfs_enqueue(xtfs, skb); + if (!ok) + goto nospace; + } + + /* Start a delay timer if we don't have one yet */ + if (!hrtimer_is_queued(&xtfs->iptfs_timer)) + hrtimer_start(&xtfs->iptfs_timer, xtfs->init_delay_ns, IPTFS_HRTIMER_MODE); + + spin_unlock_bh(&x->lock); + return 0; +} + +/* -------------------------- */ +/* Dequeue and send functions */ +/* -------------------------- */ + +static void iptfs_output_prepare_skb(struct sk_buff *skb, u32 blkoff) +{ + struct ip_iptfs_hdr *h; + size_t hsz = sizeof(*h); + + /* now reset values to be pointing at the rest of the packets */ + h = skb_push(skb, hsz); + memset(h, 0, hsz); + if (blkoff) + h->block_offset = htons(blkoff); + + /* network_header current points at the inner IP packet + * move it to the iptfs header + */ + skb->transport_header = skb->network_header; + skb->network_header -= hsz; + + IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; +} + +static struct sk_buff **iptfs_rehome_fraglist(struct sk_buff **nextp, struct sk_buff *child) +{ + u32 fllen = 0; + + /* It might be possible to account for a frag list in addition to page + * fragment if it's a valid state to be in. The page fragments size + * should be kept as data_len so only the frag_list size is removed, + * this must be done above as well. + */ + *nextp = skb_shinfo(child)->frag_list; + while (*nextp) { + fllen += (*nextp)->len; + nextp = &(*nextp)->next; + } + skb_frag_list_init(child); + child->len -= fllen; + child->data_len -= fllen; + + return nextp; +} + +static void iptfs_output_queued(struct xfrm_state *x, struct sk_buff_head *list) +{ + struct xfrm_iptfs_data *xtfs = x->mode_data; + struct sk_buff *skb, *skb2, **nextp; + struct skb_shared_info *shi; + + while ((skb = __skb_dequeue(list))) { + u32 mtu = iptfs_get_cur_pmtu(x, xtfs, skb); + int remaining; + + /* protocol comes to us cleared sometimes */ + skb->protocol = x->outer_mode.family == AF_INET ? htons(ETH_P_IP) : + htons(ETH_P_IPV6); + + if (skb->len > mtu) { + /* We handle this case before enqueueing so we are only + * here b/c MTU changed after we enqueued before we + * dequeued, just drop these. + */ + XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTERROR); + + kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); + continue; + } + + /* If we don't have a cksum in the packet we need to add one + * before encapsulation. + */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb_checksum_help(skb)) { + XFRM_INC_STATS(dev_net(skb_dst(skb)->dev), LINUX_MIB_XFRMOUTERROR); + kfree_skb(skb); + continue; + } + } + + /* Consider the buffer Tx'd and no longer owned */ + skb_orphan(skb); + + /* Convert first inner packet into an outer IPTFS packet */ + iptfs_output_prepare_skb(skb, 0); + + /* The space remaining to send more inner packet data is `mtu` - + * (skb->len - sizeof iptfs header). This is b/c the `mtu` value + * has the basic IPTFS header len accounted for, and we added + * that header to the skb so it is a part of skb->len, thus we + * subtract it from the skb length. + */ + remaining = mtu - (skb->len - sizeof(struct ip_iptfs_hdr)); + + /* Re-home (un-nest) nested fragment lists. We need to do this + * b/c we will simply be appending any following aggregated + * inner packets to the frag list. + */ + shi = skb_shinfo(skb); + nextp = &shi->frag_list; + while (*nextp) { + if (skb_has_frag_list(*nextp)) + nextp = iptfs_rehome_fraglist(&(*nextp)->next, *nextp); + else + nextp = &(*nextp)->next; + } + + /* See if we have enough space to simply append. + * + * NOTE: Maybe do not append if we will be mis-aligned, + * SW-based endpoints will probably have to copy in this + * case. + */ + while ((skb2 = skb_peek(list))) { + if (skb2->len > remaining) + break; + + __skb_unlink(skb2, list); + + /* Consider the buffer Tx'd and no longer owned */ + skb_orphan(skb); + + /* If we don't have a cksum in the packet we need to add + * one before encapsulation. + */ + if (skb2->ip_summed == CHECKSUM_PARTIAL) { + if (skb_checksum_help(skb2)) { + XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTERROR); + kfree_skb(skb2); + continue; + } + } + + /* Do accounting */ + skb->data_len += skb2->len; + skb->len += skb2->len; + remaining -= skb2->len; + + /* Append to the frag_list */ + *nextp = skb2; + nextp = &skb2->next; + if (skb_has_frag_list(skb2)) + nextp = iptfs_rehome_fraglist(nextp, skb2); + skb->truesize += skb2->truesize; + } + + xfrm_output(NULL, skb); + } +} + +static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me) +{ + struct sk_buff_head list; + struct xfrm_iptfs_data *xtfs; + struct xfrm_state *x; + + xtfs = container_of(me, typeof(*xtfs), iptfs_timer); + x = xtfs->x; + + /* Process all the queued packets + * + * softirq execution order: timer > tasklet > hrtimer + * + * Network rx will have run before us giving one last chance to queue + * ingress packets for us to process and transmit. + */ + + spin_lock(&x->lock); + __skb_queue_head_init(&list); + skb_queue_splice_init(&xtfs->queue, &list); + xtfs->queue_size = 0; + spin_unlock(&x->lock); + + /* After the above unlock, packets can begin queuing again, and the + * timer can be set again, from another CPU either in softirq or user + * context (not from this one since we are running at softirq level + * already). + */ + + iptfs_output_queued(x, &list); + + return HRTIMER_NORESTART; +} + +/** + * iptfs_encap_add_ipv4() - add outer encaps + * @x: xfrm state + * @skb: the packet + * + * This was originally taken from xfrm4_tunnel_encap_add. The reason for the + * copy is that IP-TFS/AGGFRAG can have different functionality for how to set + * the TOS/DSCP bits. Sets the protocol to a different value and doesn't do + * anything with inner headers as they aren't pointing into a normal IP + * singleton inner packet. + * + * Return: 0 on success or a negative error code on failure + */ +static int iptfs_encap_add_ipv4(struct xfrm_state *x, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct iphdr *top_iph; + + skb_reset_inner_network_header(skb); + skb_reset_inner_transport_header(skb); + + skb_set_network_header(skb, -(x->props.header_len - x->props.enc_hdr_len)); + skb->mac_header = skb->network_header + offsetof(struct iphdr, protocol); + skb->transport_header = skb->network_header + sizeof(*top_iph); + + top_iph = ip_hdr(skb); + top_iph->ihl = 5; + top_iph->version = 4; + top_iph->protocol = IPPROTO_AGGFRAG; + + /* As we have 0, fractional, 1 or N inner packets there's no obviously + * correct DSCP mapping to inherit. ECN should be cleared per RFC9347 + * 3.1. + */ + top_iph->tos = 0; + + top_iph->frag_off = htons(IP_DF); + top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); + top_iph->saddr = x->props.saddr.a4; + top_iph->daddr = x->id.daddr.a4; + ip_select_ident(dev_net(dst->dev), skb, NULL); + + return 0; +} + +#if IS_ENABLED(CONFIG_IPV6) +/** + * iptfs_encap_add_ipv6() - add outer encaps + * @x: xfrm state + * @skb: the packet + * + * This was originally taken from xfrm6_tunnel_encap_add. The reason for the + * copy is that IP-TFS/AGGFRAG can have different functionality for how to set + * the flow label and TOS/DSCP bits. It also sets the protocol to a different + * value and doesn't do anything with inner headers as they aren't pointing into + * a normal IP singleton inner packet. + * + * Return: 0 on success or a negative error code on failure + */ +static int iptfs_encap_add_ipv6(struct xfrm_state *x, struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + struct ipv6hdr *top_iph; + int dsfield; + + skb_reset_inner_network_header(skb); + skb_reset_inner_transport_header(skb); + + skb_set_network_header(skb, -x->props.header_len + x->props.enc_hdr_len); + skb->mac_header = skb->network_header + offsetof(struct ipv6hdr, nexthdr); + skb->transport_header = skb->network_header + sizeof(*top_iph); + + top_iph = ipv6_hdr(skb); + top_iph->version = 6; + top_iph->priority = 0; + memset(top_iph->flow_lbl, 0, sizeof(top_iph->flow_lbl)); + top_iph->nexthdr = IPPROTO_AGGFRAG; + + /* As we have 0, fractional, 1 or N inner packets there's no obviously + * correct DSCP mapping to inherit. ECN should be cleared per RFC9347 + * 3.1. + */ + dsfield = 0; + ipv6_change_dsfield(top_iph, 0, dsfield); + + top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst)); + top_iph->saddr = *(struct in6_addr *)&x->props.saddr; + top_iph->daddr = *(struct in6_addr *)&x->id.daddr; + + return 0; +} +#endif + +/** + * iptfs_prepare_output() - prepare the skb for output + * @x: xfrm state + * @skb: the packet + * + * Return: Error value, if 0 then skb values should be as follows: + * - transport_header should point at ESP header + * - network_header should point at Outer IP header + * - mac_header should point at protocol/nexthdr of the outer IP + */ +static int iptfs_prepare_output(struct xfrm_state *x, struct sk_buff *skb) +{ + if (x->outer_mode.family == AF_INET) + return iptfs_encap_add_ipv4(x, skb); + if (x->outer_mode.family == AF_INET6) { +#if IS_ENABLED(CONFIG_IPV6) + return iptfs_encap_add_ipv6(x, skb); +#else + return -EAFNOSUPPORT; +#endif + } + return -EOPNOTSUPP; +} + /* ========================== */ /* State Management Functions */ /* ========================== */ @@ -77,8 +589,11 @@ static int iptfs_user_init(struct net *net, struct xfrm_state *x, { struct xfrm_iptfs_data *xtfs = x->mode_data; struct xfrm_iptfs_config *xc; + u64 q; xc = &xtfs->cfg; + xc->max_queue_size = IPTFS_DEFAULT_MAX_QUEUE_SIZE; + xtfs->init_delay_ns = IPTFS_DEFAULT_INIT_DELAY_USECS * NSECS_IN_USEC; if (attrs[XFRMA_IPTFS_PKT_SIZE]) { xc->pkt_size = nla_get_u32(attrs[XFRMA_IPTFS_PKT_SIZE]); @@ -92,6 +607,16 @@ static int iptfs_user_init(struct net *net, struct xfrm_state *x, return -EINVAL; } } + if (attrs[XFRMA_IPTFS_MAX_QSIZE]) + xc->max_queue_size = nla_get_u32(attrs[XFRMA_IPTFS_MAX_QSIZE]); + if (attrs[XFRMA_IPTFS_INIT_DELAY]) + xtfs->init_delay_ns = + (u64)nla_get_u32(attrs[XFRMA_IPTFS_INIT_DELAY]) * NSECS_IN_USEC; + + q = (u64)xc->max_queue_size * 95; + do_div(q, 100); + xtfs->ecn_queue_size = (u32)q; + return 0; } @@ -101,8 +626,11 @@ static unsigned int iptfs_sa_len(const struct xfrm_state *x) struct xfrm_iptfs_config *xc = &xtfs->cfg; unsigned int l = 0; - if (x->dir == XFRM_SA_DIR_OUT) + if (x->dir == XFRM_SA_DIR_OUT) { + l += nla_total_size(sizeof(u32)); /* init delay usec */ + l += nla_total_size(sizeof(xc->max_queue_size)); l += nla_total_size(sizeof(xc->pkt_size)); + } return l; } @@ -112,9 +640,21 @@ static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb) struct xfrm_iptfs_data *xtfs = x->mode_data; struct xfrm_iptfs_config *xc = &xtfs->cfg; int ret = 0; + u64 q; + + if (x->dir == XFRM_SA_DIR_OUT) { + q = xtfs->init_delay_ns; + do_div(q, NSECS_IN_USEC); + ret = nla_put_u32(skb, XFRMA_IPTFS_INIT_DELAY, q); + if (ret) + return ret; + + ret = nla_put_u32(skb, XFRMA_IPTFS_MAX_QSIZE, xc->max_queue_size); + if (ret) + return ret; - if (x->dir == XFRM_SA_DIR_OUT) ret = nla_put_u32(skb, XFRMA_IPTFS_PKT_SIZE, xc->pkt_size); + } return ret; } @@ -122,6 +662,10 @@ static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb) static void __iptfs_init_state(struct xfrm_state *x, struct xfrm_iptfs_data *xtfs) { + __skb_queue_head_init(&xtfs->queue); + hrtimer_init(&xtfs->iptfs_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE); + xtfs->iptfs_timer.function = iptfs_delay_timer; + /* Modify type (esp) adjustment values */ if (x->props.family == AF_INET) @@ -172,10 +716,21 @@ static int iptfs_init_state(struct xfrm_state *x) static void iptfs_destroy_state(struct xfrm_state *x) { struct xfrm_iptfs_data *xtfs = x->mode_data; + struct sk_buff_head list; + struct sk_buff *skb; if (!xtfs) return; + spin_lock_bh(&xtfs->x->lock); + hrtimer_cancel(&xtfs->iptfs_timer); + __skb_queue_head_init(&list); + skb_queue_splice_init(&xtfs->queue, &list); + spin_unlock_bh(&xtfs->x->lock); + + while ((skb = __skb_dequeue(&list))) + kfree_skb(skb); + kfree_sensitive(xtfs); module_put(x->mode_cbs->owner); @@ -190,6 +745,8 @@ static const struct xfrm_mode_cbs iptfs_mode_cbs = { .copy_to_user = iptfs_copy_to_user, .sa_len = iptfs_sa_len, .get_inner_mtu = iptfs_get_inner_mtu, + .output = iptfs_output_collect, + .prepare_output = iptfs_prepare_output, }; static int __init xfrm_iptfs_init(void) From b96ba312e21c9b7ac1526829b9640ddc06695c0b Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:05 -0500 Subject: [PATCH 0128/1386] xfrm: iptfs: share page fragments of inner packets When possible rather than appending secondary (aggregated) inner packets to the fragment list, share their page fragments with the outer IPTFS packet. This allows for more efficient packet transmission. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_iptfs.c | 85 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 77 insertions(+), 8 deletions(-) diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index c4cff005ea9a..7bf18f472fed 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -90,6 +91,23 @@ struct xfrm_iptfs_data { static u32 iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu); static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me); +/* ======================= */ +/* IPTFS SK_BUFF Functions */ +/* ======================= */ + +/** + * iptfs_skb_head_to_frag() - initialize a skb_frag_t based on skb head data + * @skb: skb with the head data + * @frag: frag to initialize + */ +static void iptfs_skb_head_to_frag(const struct sk_buff *skb, skb_frag_t *frag) +{ + struct page *page = virt_to_head_page(skb->data); + unsigned char *addr = (unsigned char *)page_address(page); + + skb_frag_fill_page_desc(frag, page, skb->data - addr, skb_headlen(skb)); +} + /* ================================= */ /* IPTFS Sending (ingress) Functions */ /* ================================= */ @@ -297,14 +315,44 @@ static struct sk_buff **iptfs_rehome_fraglist(struct sk_buff **nextp, struct sk_ return nextp; } +static void iptfs_consume_frags(struct sk_buff *to, struct sk_buff *from) +{ + struct skb_shared_info *fromi = skb_shinfo(from); + struct skb_shared_info *toi = skb_shinfo(to); + unsigned int new_truesize; + + /* If we have data in a head page, grab it */ + if (!skb_headlen(from)) { + new_truesize = SKB_TRUESIZE(skb_end_offset(from)); + } else { + iptfs_skb_head_to_frag(from, &toi->frags[toi->nr_frags]); + skb_frag_ref(to, toi->nr_frags++); + new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff)); + } + + /* Move any other page fragments rather than copy */ + memcpy(&toi->frags[toi->nr_frags], fromi->frags, + sizeof(fromi->frags[0]) * fromi->nr_frags); + toi->nr_frags += fromi->nr_frags; + fromi->nr_frags = 0; + from->data_len = 0; + from->len = 0; + to->truesize += from->truesize - new_truesize; + from->truesize = new_truesize; + + /* We are done with this SKB */ + consume_skb(from); +} + static void iptfs_output_queued(struct xfrm_state *x, struct sk_buff_head *list) { struct xfrm_iptfs_data *xtfs = x->mode_data; struct sk_buff *skb, *skb2, **nextp; - struct skb_shared_info *shi; + struct skb_shared_info *shi, *shi2; while ((skb = __skb_dequeue(list))) { u32 mtu = iptfs_get_cur_pmtu(x, xtfs, skb); + bool share_ok = true; int remaining; /* protocol comes to us cleared sometimes */ @@ -349,7 +397,7 @@ static void iptfs_output_queued(struct xfrm_state *x, struct sk_buff_head *list) /* Re-home (un-nest) nested fragment lists. We need to do this * b/c we will simply be appending any following aggregated - * inner packets to the frag list. + * inner packets using the frag list. */ shi = skb_shinfo(skb); nextp = &shi->frag_list; @@ -360,6 +408,9 @@ static void iptfs_output_queued(struct xfrm_state *x, struct sk_buff_head *list) nextp = &(*nextp)->next; } + if (shi->frag_list || skb_cloned(skb) || skb_shared(skb)) + share_ok = false; + /* See if we have enough space to simply append. * * NOTE: Maybe do not append if we will be mis-aligned, @@ -386,17 +437,35 @@ static void iptfs_output_queued(struct xfrm_state *x, struct sk_buff_head *list) } } + /* skb->pp_recycle is passed to __skb_flag_unref for all + * frag pages so we can only share pages with skb's who + * match ourselves. + */ + shi2 = skb_shinfo(skb2); + if (share_ok && + (shi2->frag_list || + (!skb2->head_frag && skb_headlen(skb)) || + skb->pp_recycle != skb2->pp_recycle || + skb_zcopy(skb2) || + (shi->nr_frags + shi2->nr_frags + 1 > MAX_SKB_FRAGS))) + share_ok = false; + /* Do accounting */ skb->data_len += skb2->len; skb->len += skb2->len; remaining -= skb2->len; - /* Append to the frag_list */ - *nextp = skb2; - nextp = &skb2->next; - if (skb_has_frag_list(skb2)) - nextp = iptfs_rehome_fraglist(nextp, skb2); - skb->truesize += skb2->truesize; + if (share_ok) { + iptfs_consume_frags(skb, skb2); + } else { + /* Append to the frag_list */ + *nextp = skb2; + nextp = &skb2->next; + if (skb_has_frag_list(skb2)) + nextp = iptfs_rehome_fraglist(nextp, + skb2); + skb->truesize += skb2->truesize; + } } xfrm_output(NULL, skb); From 8579d342ea2b3c1c672858de180152ccf9cb0ee1 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:06 -0500 Subject: [PATCH 0129/1386] xfrm: iptfs: add fragmenting of larger than MTU user packets Add support for tunneling user (inner) packets that are larger than the tunnel's path MTU (outer) using IP-TFS fragmentation. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_iptfs.c | 343 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 315 insertions(+), 28 deletions(-) diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 7bf18f472fed..b7d706a006eb 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -46,6 +46,22 @@ */ #define IPTFS_DEFAULT_MAX_QUEUE_SIZE (1024 * 10240) +/* Assumed: skb->head is cache aligned. + * + * L2 Header resv: Arrange for cacheline to start at skb->data - 16 to keep the + * to-be-pushed L2 header in the same cacheline as resulting `skb->data` (i.e., + * the L3 header). If cacheline size is > 64 then skb->data + pushed L2 will all + * be in a single cacheline if we simply reserve 64 bytes. + * + * L3 Header resv: For L3+L2 headers (i.e., skb->data points at the IPTFS payload) + * we want `skb->data` to be cacheline aligned and all pushed L2L3 headers will + * be in their own cacheline[s]. 128 works for cachelins up to 128 bytes, for + * any larger cacheline sizes the pushed headers will simply share the cacheline + * with the start of the IPTFS payload (skb->data). + */ +#define XFRM_IPTFS_MIN_L3HEADROOM 128 +#define XFRM_IPTFS_MIN_L2HEADROOM (L1_CACHE_BYTES > 64 ? 64 : 64 + 16) + #define NSECS_IN_USEC 1000 #define IPTFS_HRTIMER_MODE HRTIMER_MODE_REL_SOFT @@ -57,10 +73,12 @@ * @max_queue_size: The maximum number of octets allowed to be queued to be sent * over the IPTFS SA. The queue size is measured as the size of all the * packets enqueued. + * @dont_frag: true to inhibit fragmenting across IPTFS outer packets. */ struct xfrm_iptfs_config { u32 pkt_size; /* outer_packet_size or 0 */ u32 max_queue_size; /* octets */ + u8 dont_frag : 1; }; /** @@ -88,13 +106,72 @@ struct xfrm_iptfs_data { u32 payload_mtu; /* max payload size */ }; -static u32 iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu); +static u32 __iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu); static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me); /* ======================= */ /* IPTFS SK_BUFF Functions */ /* ======================= */ +/** + * iptfs_alloc_skb() - Allocate a new `skb`. + * @tpl: the skb to copy required meta-data from. + * @len: the linear length of the head data, zero is fine. + * @l3resv: true if skb reserve needs to support pushing L3 headers + * + * A new `skb` is allocated and required meta-data is copied from `tpl`, the + * head data is sized to `len` + reserved space set according to the @l3resv + * boolean. + * + * When @l3resv is false, resv is XFRM_IPTFS_MIN_L2HEADROOM which arranges for + * `skb->data - 16` which is a good guess for good cache alignment (placing the + * to be pushed L2 header at the start of a cacheline. + * + * Otherwise, @l3resv is true and resv is set to the correct reserved space for + * dst->dev plus the calculated L3 overhead for the xfrm dst or + * XFRM_IPTFS_MIN_L3HEADROOM whichever is larger. This is then cache aligned so + * that all the headers will commonly fall in a cacheline when possible. + * + * l3resv=true is used on tunnel ingress (tx), because we need to reserve for + * the new IPTFS packet (i.e., L2+L3 headers). On tunnel egress (rx) the data + * being copied into the skb includes the user L3 headers already so we only + * need to reserve for L2. + * + * Return: the new skb or NULL. + */ +static struct sk_buff *iptfs_alloc_skb(struct sk_buff *tpl, u32 len, bool l3resv) +{ + struct sk_buff *skb; + u32 resv; + + if (!l3resv) { + resv = XFRM_IPTFS_MIN_L2HEADROOM; + } else { + struct dst_entry *dst = skb_dst(tpl); + + resv = LL_RESERVED_SPACE(dst->dev) + dst->header_len; + resv = max(resv, XFRM_IPTFS_MIN_L3HEADROOM); + resv = L1_CACHE_ALIGN(resv); + } + + skb = alloc_skb(len + resv, GFP_ATOMIC | __GFP_NOWARN); + if (!skb) + return NULL; + + skb_reserve(skb, resv); + + if (!l3resv) { + /* xfrm_input resume needs dev and xfrm ext from tunnel pkt */ + skb->dev = tpl->dev; + __skb_ext_copy(skb, tpl); + } + + /* dropped by xfrm_input, used by xfrm_output */ + skb_dst_copy(skb, tpl); + + return skb; +} + /** * iptfs_skb_head_to_frag() - initialize a skb_frag_t based on skb head data * @skb: skb with the head data @@ -152,7 +229,7 @@ static int iptfs_get_cur_pmtu(struct xfrm_state *x, struct xfrm_iptfs_data *xtfs { struct xfrm_dst *xdst = (struct xfrm_dst *)skb_dst(skb); u32 payload_mtu = xtfs->payload_mtu; - u32 pmtu = iptfs_get_inner_mtu(x, xdst->child_mtu_cached); + u32 pmtu = __iptfs_get_inner_mtu(x, xdst->child_mtu_cached); if (payload_mtu && payload_mtu < pmtu) pmtu = payload_mtu; @@ -210,7 +287,8 @@ static int iptfs_output_collect(struct net *net, struct sock *sk, struct sk_buff * fragmentation. */ - pmtu = iptfs_get_cur_pmtu(x, xtfs, skb); + if (xtfs->cfg.dont_frag) + pmtu = iptfs_get_cur_pmtu(x, xtfs, skb); /* Break apart GSO skbs. If the queue is nearing full then we want the * accounting and queuing to be based on the individual packets not on the @@ -250,8 +328,10 @@ nospace: continue; } - /* Fragmenting handled in following commits. */ - if (iptfs_is_too_big(sk, skb, pmtu)) { + /* If the user indicated no iptfs fragmenting check before + * enqueue. + */ + if (xtfs->cfg.dont_frag && iptfs_is_too_big(sk, skb, pmtu)) { kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); continue; } @@ -294,6 +374,181 @@ static void iptfs_output_prepare_skb(struct sk_buff *skb, u32 blkoff) IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; } +/** + * iptfs_copy_create_frag() - create an inner fragment skb. + * @st: The source packet data. + * @offset: offset in @st of the new fragment data. + * @copy_len: the amount of data to copy from @st. + * + * Create a new skb holding a single IPTFS inner packet fragment. @copy_len must + * not be greater than the max fragment size. + * + * Return: the new fragment skb or an ERR_PTR(). + */ +static struct sk_buff *iptfs_copy_create_frag(struct skb_seq_state *st, u32 offset, u32 copy_len) +{ + struct sk_buff *src = st->root_skb; + struct sk_buff *skb; + int err; + + skb = iptfs_alloc_skb(src, copy_len, true); + if (!skb) + return ERR_PTR(-ENOMEM); + + /* Now copy `copy_len` data from src */ + err = skb_copy_seq_read(st, offset, skb_put(skb, copy_len), copy_len); + if (err) { + kfree_skb(skb); + return ERR_PTR(err); + } + + return skb; +} + +/** + * iptfs_copy_create_frags() - create and send N-1 fragments of a larger skb. + * @skbp: the source packet skb (IN), skb holding the last fragment in + * the fragment stream (OUT). + * @xtfs: IPTFS SA state. + * @mtu: the max IPTFS fragment size. + * + * This function is responsible for fragmenting a larger inner packet into a + * sequence of IPTFS payload packets. The last fragment is returned rather than + * being sent so that the caller can append more inner packets (aggregation) if + * there is room. + * + * Return: 0 on success or a negative error code on failure + */ +static int iptfs_copy_create_frags(struct sk_buff **skbp, struct xfrm_iptfs_data *xtfs, u32 mtu) +{ + struct skb_seq_state skbseq; + struct list_head sublist; + struct sk_buff *skb = *skbp; + struct sk_buff *nskb = *skbp; + u32 copy_len, offset; + u32 to_copy = skb->len - mtu; + int err = 0; + + INIT_LIST_HEAD(&sublist); + + skb_prepare_seq_read(skb, 0, skb->len, &skbseq); + + /* A trimmed `skb` will be sent as the first fragment, later. */ + offset = mtu; + to_copy = skb->len - offset; + while (to_copy) { + /* Send all but last fragment to allow agg. append */ + list_add_tail(&nskb->list, &sublist); + + /* FUTURE: if the packet has an odd/non-aligning length we could + * send less data in the penultimate fragment so that the last + * fragment then ends on an aligned boundary. + */ + copy_len = min(to_copy, mtu); + nskb = iptfs_copy_create_frag(&skbseq, offset, copy_len); + if (IS_ERR(nskb)) { + XFRM_INC_STATS(xs_net(xtfs->x), LINUX_MIB_XFRMOUTERROR); + skb_abort_seq_read(&skbseq); + err = PTR_ERR(nskb); + nskb = NULL; + break; + } + iptfs_output_prepare_skb(nskb, to_copy); + offset += copy_len; + to_copy -= copy_len; + } + skb_abort_seq_read(&skbseq); + + /* return last fragment that will be unsent (or NULL) */ + *skbp = nskb; + + /* trim the original skb to MTU */ + if (!err) + err = pskb_trim(skb, mtu); + + if (err) { + /* Free all frags. Don't bother sending a partial packet we will + * never complete. + */ + kfree_skb(nskb); + list_for_each_entry_safe(skb, nskb, &sublist, list) { + skb_list_del_init(skb); + kfree_skb(skb); + } + return err; + } + + /* prepare the initial fragment with an iptfs header */ + iptfs_output_prepare_skb(skb, 0); + + /* Send all but last fragment, if we fail to send a fragment then free + * the rest -- no point in sending a packet that can't be reassembled. + */ + list_for_each_entry_safe(skb, nskb, &sublist, list) { + skb_list_del_init(skb); + if (!err) + err = xfrm_output(NULL, skb); + else + kfree_skb(skb); + } + if (err) + kfree_skb(*skbp); + return err; +} + +/** + * iptfs_first_skb() - handle the first dequeued inner packet for output + * @skbp: the source packet skb (IN), skb holding the last fragment in + * the fragment stream (OUT). + * @xtfs: IPTFS SA state. + * @mtu: the max IPTFS fragment size. + * + * This function is responsible for fragmenting a larger inner packet into a + * sequence of IPTFS payload packets. + * + * The last fragment is returned rather than being sent so that the caller can + * append more inner packets (aggregation) if there is room. + * + * Return: 0 on success or a negative error code on failure + */ +static int iptfs_first_skb(struct sk_buff **skbp, struct xfrm_iptfs_data *xtfs, u32 mtu) +{ + struct sk_buff *skb = *skbp; + int err; + + /* Classic ESP skips the don't fragment ICMP error if DF is clear on + * the inner packet or ignore_df is set. Otherwise it will send an ICMP + * or local error if the inner packet won't fit it's MTU. + * + * With IPTFS we do not care about the inner packet DF bit. If the + * tunnel is configured to "don't fragment" we error back if things + * don't fit in our max packet size. Otherwise we iptfs-fragment as + * normal. + */ + + /* The opportunity for HW offload has ended */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + err = skb_checksum_help(skb); + if (err) + return err; + } + + /* We've split gso up before queuing */ + + /* Consider the buffer Tx'd and no longer owned */ + skb_orphan(skb); + + /* Simple case -- it fits. `mtu` accounted for all the overhead + * including the basic IPTFS header. + */ + if (skb->len <= mtu) { + iptfs_output_prepare_skb(skb, 0); + return 0; + } + + return iptfs_copy_create_frags(skbp, xtfs, mtu); +} + static struct sk_buff **iptfs_rehome_fraglist(struct sk_buff **nextp, struct sk_buff *child) { u32 fllen = 0; @@ -350,6 +605,15 @@ static void iptfs_output_queued(struct xfrm_state *x, struct sk_buff_head *list) struct sk_buff *skb, *skb2, **nextp; struct skb_shared_info *shi, *shi2; + /* If we are fragmenting due to a large inner packet we will output all + * the outer IPTFS packets required to contain the fragments of the + * single large inner packet. These outer packets need to be sent + * consecutively (ESP seq-wise). Since this output function is always + * running from a timer we do not need a lock to provide this guarantee. + * We will output our packets consecutively before the timer is allowed + * to run again on some other CPU. + */ + while ((skb = __skb_dequeue(list))) { u32 mtu = iptfs_get_cur_pmtu(x, xtfs, skb); bool share_ok = true; @@ -359,7 +623,7 @@ static void iptfs_output_queued(struct xfrm_state *x, struct sk_buff_head *list) skb->protocol = x->outer_mode.family == AF_INET ? htons(ETH_P_IP) : htons(ETH_P_IPV6); - if (skb->len > mtu) { + if (skb->len > mtu && xtfs->cfg.dont_frag) { /* We handle this case before enqueueing so we are only * here b/c MTU changed after we enqueued before we * dequeued, just drop these. @@ -370,28 +634,22 @@ static void iptfs_output_queued(struct xfrm_state *x, struct sk_buff_head *list) continue; } - /* If we don't have a cksum in the packet we need to add one - * before encapsulation. + /* Convert first inner packet into an outer IPTFS packet, + * dealing with any fragmentation into multiple outer packets + * if necessary. */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { - if (skb_checksum_help(skb)) { - XFRM_INC_STATS(dev_net(skb_dst(skb)->dev), LINUX_MIB_XFRMOUTERROR); - kfree_skb(skb); - continue; - } - } + if (iptfs_first_skb(&skb, xtfs, mtu)) + continue; - /* Consider the buffer Tx'd and no longer owned */ - skb_orphan(skb); - - /* Convert first inner packet into an outer IPTFS packet */ - iptfs_output_prepare_skb(skb, 0); - - /* The space remaining to send more inner packet data is `mtu` - - * (skb->len - sizeof iptfs header). This is b/c the `mtu` value - * has the basic IPTFS header len accounted for, and we added - * that header to the skb so it is a part of skb->len, thus we - * subtract it from the skb length. + /* If fragmentation was required the returned skb is the last + * IPTFS fragment in the chain, and it's IPTFS header blkoff has + * been set just past the end of the fragment data. + * + * In either case the space remaining to send more inner packet + * data is `mtu` - (skb->len - sizeof iptfs header). This is b/c + * the `mtu` value has the basic IPTFS header len accounted for, + * and we added that header to the skb so it is a part of + * skb->len, thus we subtract it from the skb length. */ remaining = mtu - (skb->len - sizeof(struct ip_iptfs_hdr)); @@ -628,11 +886,13 @@ static int iptfs_prepare_output(struct xfrm_state *x, struct sk_buff *skb) /* ========================== */ /** - * iptfs_get_inner_mtu() - return inner MTU with no fragmentation. + * __iptfs_get_inner_mtu() - return inner MTU with no fragmentation. * @x: xfrm state. * @outer_mtu: the outer mtu + * + * Return: Correct MTU taking in to account the encap overhead. */ -static u32 iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu) +static u32 __iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu) { struct crypto_aead *aead; u32 blksize; @@ -643,6 +903,23 @@ static u32 iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu) ~(blksize - 1)) - 2; } +/** + * iptfs_get_inner_mtu() - return the inner MTU for an IPTFS xfrm. + * @x: xfrm state. + * @outer_mtu: Outer MTU for the encapsulated packet. + * + * Return: Correct MTU taking in to account the encap overhead. + */ +static u32 iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu) +{ + struct xfrm_iptfs_data *xtfs = x->mode_data; + + /* If not dont-frag we have no MTU */ + if (!xtfs->cfg.dont_frag) + return x->outer_mode.family == AF_INET ? IP_MAX_MTU : IP6_MAX_MTU; + return __iptfs_get_inner_mtu(x, outer_mtu); +} + /** * iptfs_user_init() - initialize the SA with IPTFS options from netlink. * @net: the net data @@ -664,6 +941,8 @@ static int iptfs_user_init(struct net *net, struct xfrm_state *x, xc->max_queue_size = IPTFS_DEFAULT_MAX_QUEUE_SIZE; xtfs->init_delay_ns = IPTFS_DEFAULT_INIT_DELAY_USECS * NSECS_IN_USEC; + if (attrs[XFRMA_IPTFS_DONT_FRAG]) + xc->dont_frag = true; if (attrs[XFRMA_IPTFS_PKT_SIZE]) { xc->pkt_size = nla_get_u32(attrs[XFRMA_IPTFS_PKT_SIZE]); if (!xc->pkt_size) { @@ -696,6 +975,8 @@ static unsigned int iptfs_sa_len(const struct xfrm_state *x) unsigned int l = 0; if (x->dir == XFRM_SA_DIR_OUT) { + if (xc->dont_frag) + l += nla_total_size(0); /* dont-frag flag */ l += nla_total_size(sizeof(u32)); /* init delay usec */ l += nla_total_size(sizeof(xc->max_queue_size)); l += nla_total_size(sizeof(xc->pkt_size)); @@ -712,6 +993,12 @@ static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb) u64 q; if (x->dir == XFRM_SA_DIR_OUT) { + if (xc->dont_frag) { + ret = nla_put_flag(skb, XFRMA_IPTFS_DONT_FRAG); + if (ret) + return ret; + } + q = xtfs->init_delay_ns; do_div(q, NSECS_IN_USEC); ret = nla_put_u32(skb, XFRMA_IPTFS_INIT_DELAY, q); From 6c82d2433671819a550227bf65bfb6043e3d3305 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:07 -0500 Subject: [PATCH 0130/1386] xfrm: iptfs: add basic receive packet (tunnel egress) handling Add handling of packets received from the tunnel. This implements tunnel egress functionality. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_iptfs.c | 276 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 276 insertions(+) diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index b7d706a006eb..085964fe3251 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -20,6 +20,10 @@ #include "xfrm_inout.h" +/* IPTFS encap (header) values. */ +#define IPTFS_SUBTYPE_BASIC 0 +#define IPTFS_SUBTYPE_CC 1 + /* ------------------------------------------------ */ /* IPTFS default SA values (tunnel ingress/dir-out) */ /* ------------------------------------------------ */ @@ -185,6 +189,277 @@ static void iptfs_skb_head_to_frag(const struct sk_buff *skb, skb_frag_t *frag) skb_frag_fill_page_desc(frag, page, skb->data - addr, skb_headlen(skb)); } +/* ================================== */ +/* IPTFS Receiving (egress) Functions */ +/* ================================== */ + +/** + * iptfs_pskb_extract_seq() - Create and load data into a new sk_buff. + * @skblen: the total data size for `skb`. + * @st: The source for the rest of the data to copy into `skb`. + * @off: The offset into @st to copy data from. + * @len: The length of data to copy from @st into `skb`. This must be <= + * @skblen. + * + * Create a new sk_buff `skb` with @skblen of packet data space. If non-zero, + * copy @rlen bytes of @runt into `skb`. Then using seq functions copy @len + * bytes from @st into `skb` starting from @off. + * + * It is an error for @len to be greater than the amount of data left in @st. + * + * Return: The newly allocated sk_buff `skb` or NULL if an error occurs. + */ +static struct sk_buff * +iptfs_pskb_extract_seq(u32 skblen, struct skb_seq_state *st, u32 off, int len) +{ + struct sk_buff *skb = iptfs_alloc_skb(st->root_skb, skblen, false); + + if (!skb) + return NULL; + if (skb_copy_seq_read(st, off, skb_put(skb, len), len)) { + XFRM_INC_STATS(dev_net(st->root_skb->dev), LINUX_MIB_XFRMINERROR); + kfree_skb(skb); + return NULL; + } + return skb; +} + +/** + * iptfs_complete_inner_skb() - finish preparing the inner packet for gro recv. + * @x: xfrm state + * @skb: the inner packet + * + * Finish the standard xfrm processing on the inner packet prior to sending back + * through gro_cells_receive. We do this separately b/c we are building a list + * of packets in the hopes that one day a list will be taken by + * xfrm_input. + */ +static void iptfs_complete_inner_skb(struct xfrm_state *x, struct sk_buff *skb) +{ + skb_reset_network_header(skb); + + /* The packet is going back through gro_cells_receive no need to + * set this. + */ + skb_reset_transport_header(skb); + + /* Packet already has checksum value set. */ + skb->ip_summed = CHECKSUM_NONE; + + /* Our skb will contain the header data copied when this outer packet + * which contained the start of this inner packet. This is true + * when we allocate a new skb as well as when we reuse the existing skb. + */ + if (ip_hdr(skb)->version == 0x4) { + struct iphdr *iph = ip_hdr(skb); + + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv4_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, iph); + if (!(x->props.flags & XFRM_STATE_NOECN)) + if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos)) + IP_ECN_set_ce(iph); + + skb->protocol = htons(ETH_P_IP); + } else { + struct ipv6hdr *iph = ipv6_hdr(skb); + + if (x->props.flags & XFRM_STATE_DECAP_DSCP) + ipv6_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, iph); + if (!(x->props.flags & XFRM_STATE_NOECN)) + if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos)) + IP6_ECN_set_ce(skb, iph); + + skb->protocol = htons(ETH_P_IPV6); + } +} + +static bool __input_process_payload(struct xfrm_state *x, u32 data, + struct skb_seq_state *skbseq, + struct list_head *sublist) +{ + u8 hbytes[sizeof(struct ipv6hdr)]; + struct sk_buff *first_skb, *next, *skb; + const unsigned char *old_mac; + struct iphdr *iph; + struct net *net; + u32 remaining, iplen, iphlen, tail; + + net = xs_net(x); + skb = skbseq->root_skb; + first_skb = NULL; + + /* Save the old mac header if set */ + old_mac = skb_mac_header_was_set(skb) ? skb_mac_header(skb) : NULL; + + /* New packets */ + + tail = skb->len; + while (data < tail) { + __be16 protocol = 0; + + /* Gather information on the next data block. + * `data` points to the start of the data block. + */ + remaining = tail - data; + + /* try and copy enough bytes to read length from ipv4/ipv6 */ + iphlen = min_t(u32, remaining, 6); + if (skb_copy_seq_read(skbseq, data, hbytes, iphlen)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); + goto done; + } + + iph = (struct iphdr *)hbytes; + if (iph->version == 0x4) { + /* must have at least tot_len field present */ + if (remaining < 4) + break; + + iplen = be16_to_cpu(iph->tot_len); + iphlen = iph->ihl << 2; + protocol = cpu_to_be16(ETH_P_IP); + XFRM_MODE_SKB_CB(skbseq->root_skb)->tos = iph->tos; + } else if (iph->version == 0x6) { + /* must have at least payload_len field present */ + if (remaining < 6) + break; + + iplen = be16_to_cpu(((struct ipv6hdr *)hbytes)->payload_len); + iplen += sizeof(struct ipv6hdr); + iphlen = sizeof(struct ipv6hdr); + protocol = cpu_to_be16(ETH_P_IPV6); + XFRM_MODE_SKB_CB(skbseq->root_skb)->tos = + ipv6_get_dsfield((struct ipv6hdr *)iph); + } else if (iph->version == 0x0) { + /* pad */ + break; + } else { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); + goto done; + } + + if (unlikely(skbseq->stepped_offset)) { + /* We need to reset our seq read, it can't backup at + * this point. + */ + struct sk_buff *save = skbseq->root_skb; + + skb_abort_seq_read(skbseq); + skb_prepare_seq_read(save, data, tail, skbseq); + } + + if (!first_skb) + first_skb = skb; + + /* Fragment handling in following commits */ + if (iplen > remaining) + break; + + skb = iptfs_pskb_extract_seq(iplen, skbseq, data, iplen); + if (!skb) { + /* skip to next packet or done */ + data += iplen; + continue; + } + + skb->protocol = protocol; + if (old_mac) { + /* rebuild the mac header */ + skb_set_mac_header(skb, -first_skb->mac_len); + memcpy(skb_mac_header(skb), old_mac, first_skb->mac_len); + eth_hdr(skb)->h_proto = skb->protocol; + } + + data += iplen; + iptfs_complete_inner_skb(x, skb); + list_add_tail(&skb->list, sublist); + } + + /* Send the packets! */ + list_for_each_entry_safe(skb, next, sublist, list) { + skb_list_del_init(skb); + if (xfrm_input(skb, 0, 0, -2)) + kfree_skb(skb); + } + +done: + return false; +} + +/** + * iptfs_input() - handle receipt of iptfs payload + * @x: xfrm state + * @skb: the packet + * + * Process the IPTFS payload in `skb` and consume it afterwards. + * + * Returns 0. + */ +static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ip_iptfs_cc_hdr iptcch; + struct skb_seq_state skbseq; + struct list_head sublist; /* rename this it's just a list */ + struct ip_iptfs_hdr *ipth; + struct net *net; + u32 remaining, data; + bool consumed = false; + + net = xs_net(x); + + /* Large enough to hold both types of header */ + ipth = (struct ip_iptfs_hdr *)&iptcch; + + skb_prepare_seq_read(skb, 0, skb->len, &skbseq); + + /* Get the IPTFS header and validate it */ + + if (skb_copy_seq_read(&skbseq, 0, ipth, sizeof(*ipth))) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); + goto done; + } + data = sizeof(*ipth); + + /* Set data past the basic header */ + if (ipth->subtype == IPTFS_SUBTYPE_CC) { + /* Copy the rest of the CC header */ + remaining = sizeof(iptcch) - sizeof(*ipth); + if (skb_copy_seq_read(&skbseq, data, ipth + 1, remaining)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); + goto done; + } + data += remaining; + } else if (ipth->subtype != IPTFS_SUBTYPE_BASIC) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); + goto done; + } + + if (ipth->flags != 0) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); + goto done; + } + + INIT_LIST_HEAD(&sublist); + + /* Fragment handling in following commits */ + data += ntohs(ipth->block_offset); + + /* New packets */ + consumed = __input_process_payload(x, data, &skbseq, &sublist); +done: + skb = skbseq.root_skb; + skb_abort_seq_read(&skbseq); + + if (!consumed) + kfree_skb(skb); + + /* We always have dealt with the input SKB, either we are re-using it, + * or we have freed it. Return EINPROGRESS so that xfrm_input stops + * processing it. + */ + return -EINPROGRESS; +} + /* ================================= */ /* IPTFS Sending (ingress) Functions */ /* ================================= */ @@ -1101,6 +1376,7 @@ static const struct xfrm_mode_cbs iptfs_mode_cbs = { .copy_to_user = iptfs_copy_to_user, .sa_len = iptfs_sa_len, .get_inner_mtu = iptfs_get_inner_mtu, + .input = iptfs_input, .output = iptfs_output_collect, .prepare_output = iptfs_prepare_output, }; From 07569476544681816335099929ff3494dfbf6b05 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:08 -0500 Subject: [PATCH 0131/1386] xfrm: iptfs: handle received fragmented inner packets Add support for handling receipt of partial inner packets that have been fragmented across multiple outer IP-TFS tunnel packets. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_iptfs.c | 480 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 461 insertions(+), 19 deletions(-) diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 085964fe3251..4af1f7b5818e 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -24,6 +24,21 @@ #define IPTFS_SUBTYPE_BASIC 0 #define IPTFS_SUBTYPE_CC 1 +/* ----------------------------------------------- */ +/* IP-TFS default SA values (tunnel egress/dir-in) */ +/* ----------------------------------------------- */ + +/** + * define IPTFS_DEFAULT_DROP_TIME_USECS - default drop time + * + * The default IPTFS drop time in microseconds. The drop time is the amount of + * time before a missing out-of-order IPTFS tunnel packet is considered lost. + * See also the reorder window. + * + * Default 1s. + */ +#define IPTFS_DEFAULT_DROP_TIME_USECS 1000000 + /* ------------------------------------------------ */ /* IPTFS default SA values (tunnel ingress/dir-out) */ /* ------------------------------------------------ */ @@ -95,6 +110,13 @@ struct xfrm_iptfs_config { * @init_delay_ns: nanoseconds to wait to send initial IPTFS packet. * @iptfs_timer: output timer. * @payload_mtu: max payload size. + * @drop_lock: lock to protect reorder queue. + * @drop_timer: timer for considering next packet lost. + * @drop_time_ns: timer intervan in nanoseconds. + * @ra_newskb: new pkt being reassembled. + * @ra_wantseq: expected next sequence for reassembly. + * @ra_runt: last pkt bytes from very end of last skb. + * @ra_runtlen: size of ra_runt. */ struct xfrm_iptfs_data { struct xfrm_iptfs_config cfg; @@ -108,10 +130,33 @@ struct xfrm_iptfs_data { u64 init_delay_ns; /* nanoseconds */ struct hrtimer iptfs_timer; /* output timer */ u32 payload_mtu; /* max payload size */ + + /* Tunnel egress */ + spinlock_t drop_lock; + struct hrtimer drop_timer; + u64 drop_time_ns; + + /* Tunnel egress reassembly */ + struct sk_buff *ra_newskb; /* new pkt being reassembled */ + u64 ra_wantseq; /* expected next sequence */ + u8 ra_runt[6]; /* last pkt bytes from last skb */ + u8 ra_runtlen; /* count of ra_runt */ }; static u32 __iptfs_get_inner_mtu(struct xfrm_state *x, int outer_mtu); static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me); +static enum hrtimer_restart iptfs_drop_timer(struct hrtimer *me); + +/* ================= */ +/* Utility Functions */ +/* ================= */ + +static u64 __esp_seq(struct sk_buff *skb) +{ + u64 seq = ntohl(XFRM_SKB_CB(skb)->seq.input.low); + + return seq | (u64)ntohl(XFRM_SKB_CB(skb)->seq.input.hi) << 32; +} /* ======================= */ /* IPTFS SK_BUFF Functions */ @@ -224,6 +269,63 @@ iptfs_pskb_extract_seq(u32 skblen, struct skb_seq_state *st, u32 off, int len) return skb; } +/** + * iptfs_input_save_runt() - save data in xtfs runt space. + * @xtfs: xtfs state + * @seq: the current sequence + * @buf: packet data + * @len: length of packet data + * + * Save the small (`len`) start of a fragmented packet in `buf` in the xtfs data + * runt space. + */ +static void iptfs_input_save_runt(struct xfrm_iptfs_data *xtfs, u64 seq, + u8 *buf, int len) +{ + memcpy(xtfs->ra_runt, buf, len); + + xtfs->ra_runtlen = len; + xtfs->ra_wantseq = seq + 1; +} + +/** + * __iptfs_iphlen() - return the v4/v6 header length using packet data. + * @data: pointer at octet with version nibble + * + * The version data has been checked to be valid (i.e., either 4 or 6). + * + * Return: the IP header size based on the IP version. + */ +static u32 __iptfs_iphlen(u8 *data) +{ + struct iphdr *iph = (struct iphdr *)data; + + if (iph->version == 0x4) + return sizeof(*iph); + return sizeof(struct ipv6hdr); +} + +/** + * __iptfs_iplen() - return the v4/v6 length using packet data. + * @data: pointer to ip (v4/v6) packet header + * + * Grab the IPv4 or IPv6 length value in the start of the inner packet header + * pointed to by `data`. Assumes data len is enough for the length field only. + * + * The version data has been checked to be valid (i.e., either 4 or 6). + * + * Return: the length value. + */ +static u32 __iptfs_iplen(u8 *data) +{ + struct iphdr *iph = (struct iphdr *)data; + + if (iph->version == 0x4) + return ntohs(iph->tot_len); + return ntohs(((struct ipv6hdr *)iph)->payload_len) + + sizeof(struct ipv6hdr); +} + /** * iptfs_complete_inner_skb() - finish preparing the inner packet for gro recv. * @x: xfrm state @@ -273,6 +375,227 @@ static void iptfs_complete_inner_skb(struct xfrm_state *x, struct sk_buff *skb) } } +static void __iptfs_reassem_done(struct xfrm_iptfs_data *xtfs, bool free) +{ + assert_spin_locked(&xtfs->drop_lock); + + /* We don't care if it works locking takes care of things */ + hrtimer_try_to_cancel(&xtfs->drop_timer); + if (free) + kfree_skb(xtfs->ra_newskb); + xtfs->ra_newskb = NULL; +} + +/** + * iptfs_reassem_abort() - In-progress packet is aborted free the state. + * @xtfs: xtfs state + */ +static void iptfs_reassem_abort(struct xfrm_iptfs_data *xtfs) +{ + __iptfs_reassem_done(xtfs, true); +} + +/** + * iptfs_reassem_done() - In-progress packet is complete, clear the state. + * @xtfs: xtfs state + */ +static void iptfs_reassem_done(struct xfrm_iptfs_data *xtfs) +{ + __iptfs_reassem_done(xtfs, false); +} + +/** + * iptfs_reassem_cont() - Continue the reassembly of an inner packets. + * @xtfs: xtfs state + * @seq: sequence of current packet + * @st: seq read stat for current packet + * @skb: current packet + * @data: offset into sequential packet data + * @blkoff: packet blkoff value + * @list: list of skbs to enqueue completed packet on + * + * Process an IPTFS payload that has a non-zero `blkoff` or when we are + * expecting the continuation b/c we have a runt or in-progress packet. + * + * Return: the new data offset to continue processing from. + */ +static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq, + struct skb_seq_state *st, struct sk_buff *skb, + u32 data, u32 blkoff, struct list_head *list) +{ + struct sk_buff *newskb = xtfs->ra_newskb; + u32 remaining = skb->len - data; + u32 runtlen = xtfs->ra_runtlen; + u32 copylen, fraglen, ipremain, iphlen, iphremain, rrem; + + /* Handle packet fragment we aren't expecting */ + if (!runtlen && !xtfs->ra_newskb) + return data + min(blkoff, remaining); + + /* Important to remember that input to this function is an ordered + * packet stream (unless the user disabled the reorder window). Thus if + * we are waiting for, and expecting the next packet so we can continue + * assembly, a newer sequence number indicates older ones are not coming + * (or if they do should be ignored). Technically we can receive older + * ones when the reorder window is disabled; however, the user should + * have disabled fragmentation in this case, and regardless we don't + * deal with it. + * + * blkoff could be zero if the stream is messed up (or it's an all pad + * insertion) be careful to handle that case in each of the below + */ + + /* Too old case: This can happen when the reorder window is disabled so + * ordering isn't actually guaranteed. + */ + if (seq < xtfs->ra_wantseq) + return data + remaining; + + /* Too new case: We missed what we wanted cleanup. */ + if (seq > xtfs->ra_wantseq) { + XFRM_INC_STATS(xs_net(xtfs->x), LINUX_MIB_XFRMINIPTFSERROR); + goto abandon; + } + + if (blkoff == 0) { + if ((*skb->data & 0xF0) != 0) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINIPTFSERROR); + goto abandon; + } + /* Handle all pad case, advance expected sequence number. + * (RFC 9347 S2.2.3) + */ + xtfs->ra_wantseq++; + /* will end parsing */ + return data + remaining; + } + + if (runtlen) { + /* Regardless of what happens we're done with the runt */ + xtfs->ra_runtlen = 0; + + /* The start of this inner packet was at the very end of the last + * iptfs payload which didn't include enough for the ip header + * length field. We must have *at least* that now. + */ + rrem = sizeof(xtfs->ra_runt) - runtlen; + if (remaining < rrem || blkoff < rrem) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINIPTFSERROR); + goto abandon; + } + + /* fill in the runt data */ + if (skb_copy_seq_read(st, data, &xtfs->ra_runt[runtlen], + rrem)) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINBUFFERERROR); + goto abandon; + } + + /* We have enough data to get the ip length value now, + * allocate an in progress skb + */ + ipremain = __iptfs_iplen(xtfs->ra_runt); + if (ipremain < sizeof(xtfs->ra_runt)) { + /* length has to be at least runtsize large */ + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINIPTFSERROR); + goto abandon; + } + + /* For the runt case we don't attempt sharing currently. NOTE: + * Currently, this IPTFS implementation will not create runts. + */ + + newskb = iptfs_alloc_skb(skb, ipremain, false); + if (!newskb) { + XFRM_INC_STATS(xs_net(xtfs->x), LINUX_MIB_XFRMINERROR); + goto abandon; + } + xtfs->ra_newskb = newskb; + + /* Copy the runt data into the buffer, but leave data + * pointers the same as normal non-runt case. The extra `rrem` + * recopied bytes are basically cacheline free. Allows using + * same logic below to complete. + */ + memcpy(skb_put(newskb, runtlen), xtfs->ra_runt, + sizeof(xtfs->ra_runt)); + } + + /* Continue reassembling the packet */ + ipremain = __iptfs_iplen(newskb->data); + iphlen = __iptfs_iphlen(newskb->data); + + ipremain -= newskb->len; + if (blkoff < ipremain) { + /* Corrupt data, we don't have enough to complete the packet */ + XFRM_INC_STATS(xs_net(xtfs->x), LINUX_MIB_XFRMINIPTFSERROR); + goto abandon; + } + + /* We want the IP header in linear space */ + if (newskb->len < iphlen) { + iphremain = iphlen - newskb->len; + if (blkoff < iphremain) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINIPTFSERROR); + goto abandon; + } + fraglen = min(blkoff, remaining); + copylen = min(fraglen, iphremain); + if (skb_copy_seq_read(st, data, skb_put(newskb, copylen), + copylen)) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINBUFFERERROR); + goto abandon; + } + /* this is a silly condition that might occur anyway */ + if (copylen < iphremain) { + xtfs->ra_wantseq++; + return data + fraglen; + } + /* update data and things derived from it */ + data += copylen; + blkoff -= copylen; + remaining -= copylen; + ipremain -= copylen; + } + + fraglen = min(blkoff, remaining); + copylen = min(fraglen, ipremain); + + /* copy fragment data into newskb */ + if (skb_copy_seq_read(st, data, skb_put(newskb, copylen), copylen)) { + XFRM_INC_STATS(dev_net(skb->dev), LINUX_MIB_XFRMINBUFFERERROR); + goto abandon; + } + + if (copylen < ipremain) { + xtfs->ra_wantseq++; + } else { + /* We are done with packet reassembly! */ + iptfs_reassem_done(xtfs); + iptfs_complete_inner_skb(xtfs->x, newskb); + list_add_tail(&newskb->list, list); + } + + /* will continue on to new data block or end */ + return data + fraglen; + +abandon: + if (xtfs->ra_newskb) { + iptfs_reassem_abort(xtfs); + } else { + xtfs->ra_runtlen = 0; + xtfs->ra_wantseq = 0; + } + /* skip past fragment, maybe to end */ + return data + min(blkoff, remaining); +} + static bool __input_process_payload(struct xfrm_state *x, u32 data, struct skb_seq_state *skbseq, struct list_head *sublist) @@ -280,14 +603,20 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, u8 hbytes[sizeof(struct ipv6hdr)]; struct sk_buff *first_skb, *next, *skb; const unsigned char *old_mac; + struct xfrm_iptfs_data *xtfs; struct iphdr *iph; struct net *net; u32 remaining, iplen, iphlen, tail; + u32 capturelen; + u64 seq; + xtfs = x->mode_data; net = xs_net(x); skb = skbseq->root_skb; first_skb = NULL; + seq = __esp_seq(skb); + /* Save the old mac header if set */ old_mac = skb_mac_header_was_set(skb) ? skb_mac_header(skb) : NULL; @@ -312,8 +641,13 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, iph = (struct iphdr *)hbytes; if (iph->version == 0x4) { /* must have at least tot_len field present */ - if (remaining < 4) + if (remaining < 4) { + /* save the bytes we have, advance data and exit */ + iptfs_input_save_runt(xtfs, seq, hbytes, + remaining); + data += remaining; break; + } iplen = be16_to_cpu(iph->tot_len); iphlen = iph->ihl << 2; @@ -321,8 +655,13 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, XFRM_MODE_SKB_CB(skbseq->root_skb)->tos = iph->tos; } else if (iph->version == 0x6) { /* must have at least payload_len field present */ - if (remaining < 6) + if (remaining < 6) { + /* save the bytes we have, advance data and exit */ + iptfs_input_save_runt(xtfs, seq, hbytes, + remaining); + data += remaining; break; + } iplen = be16_to_cpu(((struct ipv6hdr *)hbytes)->payload_len); iplen += sizeof(struct ipv6hdr); @@ -332,6 +671,7 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, ipv6_get_dsfield((struct ipv6hdr *)iph); } else if (iph->version == 0x0) { /* pad */ + data = tail; break; } else { XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); @@ -351,14 +691,11 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, if (!first_skb) first_skb = skb; - /* Fragment handling in following commits */ - if (iplen > remaining) - break; - - skb = iptfs_pskb_extract_seq(iplen, skbseq, data, iplen); + capturelen = min(iplen, remaining); + skb = iptfs_pskb_extract_seq(iplen, skbseq, data, capturelen); if (!skb) { /* skip to next packet or done */ - data += iplen; + data += capturelen; continue; } @@ -370,18 +707,40 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, eth_hdr(skb)->h_proto = skb->protocol; } - data += iplen; + data += capturelen; + + if (skb->len < iplen) { + /* Start reassembly */ + spin_lock(&xtfs->drop_lock); + + xtfs->ra_newskb = skb; + xtfs->ra_wantseq = seq + 1; + if (!hrtimer_is_queued(&xtfs->drop_timer)) { + /* softirq blocked lest the timer fire and interrupt us */ + hrtimer_start(&xtfs->drop_timer, + xtfs->drop_time_ns, + IPTFS_HRTIMER_MODE); + } + + spin_unlock(&xtfs->drop_lock); + + break; + } + iptfs_complete_inner_skb(x, skb); list_add_tail(&skb->list, sublist); } + if (data != tail) + /* this should not happen from the above code */ + XFRM_INC_STATS(net, LINUX_MIB_XFRMINIPTFSERROR); + /* Send the packets! */ list_for_each_entry_safe(skb, next, sublist, list) { skb_list_del_init(skb); if (xfrm_input(skb, 0, 0, -2)) kfree_skb(skb); } - done: return false; } @@ -400,13 +759,18 @@ static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb) struct ip_iptfs_cc_hdr iptcch; struct skb_seq_state skbseq; struct list_head sublist; /* rename this it's just a list */ + struct xfrm_iptfs_data *xtfs; struct ip_iptfs_hdr *ipth; struct net *net; - u32 remaining, data; + u32 blkoff, data, remaining; bool consumed = false; + u64 seq; + xtfs = x->mode_data; net = xs_net(x); + seq = __esp_seq(skb); + /* Large enough to hold both types of header */ ipth = (struct ip_iptfs_hdr *)&iptcch; @@ -441,17 +805,30 @@ static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb) INIT_LIST_HEAD(&sublist); - /* Fragment handling in following commits */ - data += ntohs(ipth->block_offset); + /* Handle fragment at start of payload, and/or waiting reassembly. */ + + blkoff = ntohs(ipth->block_offset); + /* check before locking i.e., maybe */ + if (blkoff || xtfs->ra_runtlen || xtfs->ra_newskb) { + spin_lock(&xtfs->drop_lock); + + /* check again after lock */ + if (blkoff || xtfs->ra_runtlen || xtfs->ra_newskb) { + data = iptfs_reassem_cont(xtfs, seq, &skbseq, skb, data, + blkoff, &sublist); + } + + spin_unlock(&xtfs->drop_lock); + } /* New packets */ consumed = __input_process_payload(x, data, &skbseq, &sublist); done: - skb = skbseq.root_skb; - skb_abort_seq_read(&skbseq); - - if (!consumed) + if (!consumed) { + skb = skbseq.root_skb; + skb_abort_seq_read(&skbseq); kfree_skb(skb); + } /* We always have dealt with the input SKB, either we are re-using it, * or we have freed it. Return EINPROGRESS so that xfrm_input stops @@ -460,6 +837,47 @@ done: return -EINPROGRESS; } +/** + * iptfs_drop_timer() - Handle drop timer expiry. + * @me: the timer + * + * This is similar to our input function. + * + * The drop timer is set when we start an in progress reassembly, and also when + * we save a future packet in the window saved array. + * + * NOTE packets in the save window are always newer WRT drop times as + * they get further in the future. i.e. for: + * + * if slots (S0, S1, ... Sn) and `Dn` is the drop time for slot `Sn`, + * then D(n-1) <= D(n). + * + * So, regardless of why the timer is firing we can always discard any inprogress + * fragment; either it's the reassembly timer, or slot 0 is going to be + * dropped as S0 must have the most recent drop time, and slot 0 holds the + * continuation fragment of the in progress packet. + * + * Returns HRTIMER_NORESTART. + */ +static enum hrtimer_restart iptfs_drop_timer(struct hrtimer *me) +{ + struct xfrm_iptfs_data *xtfs; + struct sk_buff *skb; + + xtfs = container_of(me, typeof(*xtfs), drop_timer); + + /* Drop any in progress packet */ + spin_lock(&xtfs->drop_lock); + skb = xtfs->ra_newskb; + xtfs->ra_newskb = NULL; + spin_unlock(&xtfs->drop_lock); + + if (skb) + kfree_skb_reason(skb, SKB_DROP_REASON_FRAG_REASM_TIMEOUT); + + return HRTIMER_NORESTART; +} + /* ================================= */ /* IPTFS Sending (ingress) Functions */ /* ================================= */ @@ -1214,6 +1632,7 @@ static int iptfs_user_init(struct net *net, struct xfrm_state *x, xc = &xtfs->cfg; xc->max_queue_size = IPTFS_DEFAULT_MAX_QUEUE_SIZE; + xtfs->drop_time_ns = IPTFS_DEFAULT_DROP_TIME_USECS * NSECS_IN_USEC; xtfs->init_delay_ns = IPTFS_DEFAULT_INIT_DELAY_USECS * NSECS_IN_USEC; if (attrs[XFRMA_IPTFS_DONT_FRAG]) @@ -1232,6 +1651,10 @@ static int iptfs_user_init(struct net *net, struct xfrm_state *x, } if (attrs[XFRMA_IPTFS_MAX_QSIZE]) xc->max_queue_size = nla_get_u32(attrs[XFRMA_IPTFS_MAX_QSIZE]); + if (attrs[XFRMA_IPTFS_DROP_TIME]) + xtfs->drop_time_ns = + (u64)nla_get_u32(attrs[XFRMA_IPTFS_DROP_TIME]) * + NSECS_IN_USEC; if (attrs[XFRMA_IPTFS_INIT_DELAY]) xtfs->init_delay_ns = (u64)nla_get_u32(attrs[XFRMA_IPTFS_INIT_DELAY]) * NSECS_IN_USEC; @@ -1249,7 +1672,9 @@ static unsigned int iptfs_sa_len(const struct xfrm_state *x) struct xfrm_iptfs_config *xc = &xtfs->cfg; unsigned int l = 0; - if (x->dir == XFRM_SA_DIR_OUT) { + if (x->dir == XFRM_SA_DIR_IN) { + l += nla_total_size(sizeof(u32)); /* drop time usec */ + } else { if (xc->dont_frag) l += nla_total_size(0); /* dont-frag flag */ l += nla_total_size(sizeof(u32)); /* init delay usec */ @@ -1267,7 +1692,11 @@ static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb) int ret = 0; u64 q; - if (x->dir == XFRM_SA_DIR_OUT) { + if (x->dir == XFRM_SA_DIR_IN) { + q = xtfs->drop_time_ns; + do_div(q, NSECS_IN_USEC); + ret = nla_put_u32(skb, XFRMA_IPTFS_DROP_TIME, q); + } else { if (xc->dont_frag) { ret = nla_put_flag(skb, XFRMA_IPTFS_DONT_FRAG); if (ret) @@ -1297,6 +1726,10 @@ static void __iptfs_init_state(struct xfrm_state *x, hrtimer_init(&xtfs->iptfs_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE); xtfs->iptfs_timer.function = iptfs_delay_timer; + spin_lock_init(&xtfs->drop_lock); + hrtimer_init(&xtfs->drop_timer, CLOCK_MONOTONIC, IPTFS_HRTIMER_MODE); + xtfs->drop_timer.function = iptfs_drop_timer; + /* Modify type (esp) adjustment values */ if (x->props.family == AF_INET) @@ -1323,6 +1756,8 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) x->mode_data = xtfs; xtfs->x = x; + xtfs->ra_newskb = NULL; + return 0; } @@ -1362,6 +1797,13 @@ static void iptfs_destroy_state(struct xfrm_state *x) while ((skb = __skb_dequeue(&list))) kfree_skb(skb); + spin_lock_bh(&xtfs->drop_lock); + hrtimer_cancel(&xtfs->drop_timer); + spin_unlock_bh(&xtfs->drop_lock); + + if (xtfs->ra_newskb) + kfree_skb(xtfs->ra_newskb); + kfree_sensitive(xtfs); module_put(x->mode_cbs->owner); From 3f3339885fb343b7b42d7c34717108ce07da24ae Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:09 -0500 Subject: [PATCH 0132/1386] xfrm: iptfs: add reusing received skb for the tunnel egress packet Add an optimization of re-using the tunnel outer skb re-transmission of the inner packet to avoid skb allocation and copy. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_iptfs.c | 125 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 109 insertions(+), 16 deletions(-) diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 4af1f7b5818e..8538fb02ae8a 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -601,12 +601,12 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, struct list_head *sublist) { u8 hbytes[sizeof(struct ipv6hdr)]; - struct sk_buff *first_skb, *next, *skb; + struct sk_buff *defer, *first_skb, *next, *skb; const unsigned char *old_mac; struct xfrm_iptfs_data *xtfs; struct iphdr *iph; struct net *net; - u32 remaining, iplen, iphlen, tail; + u32 first_iplen, iphlen, iplen, remaining, tail; u32 capturelen; u64 seq; @@ -614,6 +614,7 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, net = xs_net(x); skb = skbseq->root_skb; first_skb = NULL; + defer = NULL; seq = __esp_seq(skb); @@ -688,23 +689,92 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, skb_prepare_seq_read(save, data, tail, skbseq); } - if (!first_skb) + if (first_skb) { + skb = NULL; + } else { first_skb = skb; + first_iplen = iplen; - capturelen = min(iplen, remaining); - skb = iptfs_pskb_extract_seq(iplen, skbseq, data, capturelen); - if (!skb) { - /* skip to next packet or done */ - data += capturelen; - continue; + /* We are going to skip over `data` bytes to reach the + * start of the IP header of `iphlen` len for `iplen` + * inner packet. + */ + + if (skb_has_frag_list(skb)) { + defer = skb; + skb = NULL; + } else if (data + iphlen <= skb_headlen(skb) && + /* make sure our header is 32-bit aligned? */ + /* ((uintptr_t)(skb->data + data) & 0x3) == 0 && */ + skb_tailroom(skb) + tail - data >= iplen) { + /* Reuse the received skb. + * + * We have enough headlen to pull past any + * initial fragment data, leaving at least the + * IP header in the linear buffer space. + * + * For linear buffer space we only require that + * linear buffer space is large enough to + * eventually hold the entire reassembled + * packet (by including tailroom in the check). + * + * For non-linear tailroom is 0 and so we only + * re-use if the entire packet is present + * already. + * + * NOTE: there are many more options for + * sharing, KISS for now. Also, this can produce + * skb's with the IP header unaligned to 32 + * bits. If that ends up being a problem then a + * check should be added to the conditional + * above that the header lies on a 32-bit + * boundary as well. + */ + skb_pull(skb, data); + + /* our range just changed */ + data = 0; + tail = skb->len; + remaining = skb->len; + + skb->protocol = protocol; + skb_mac_header_rebuild(skb); + if (skb->mac_len) + eth_hdr(skb)->h_proto = skb->protocol; + + /* all pointers could be changed now reset walk */ + skb_abort_seq_read(skbseq); + skb_prepare_seq_read(skb, data, tail, skbseq); + } else { + /* We couldn't reuse the input skb so allocate a + * new one. + */ + defer = skb; + skb = NULL; + } + + /* Don't trim `first_skb` until the end as we are + * walking that data now. + */ } - skb->protocol = protocol; - if (old_mac) { - /* rebuild the mac header */ - skb_set_mac_header(skb, -first_skb->mac_len); - memcpy(skb_mac_header(skb), old_mac, first_skb->mac_len); - eth_hdr(skb)->h_proto = skb->protocol; + capturelen = min(iplen, remaining); + if (!skb) { + skb = iptfs_pskb_extract_seq(iplen, skbseq, data, + capturelen); + if (!skb) { + /* skip to next packet or done */ + data += capturelen; + continue; + } + + skb->protocol = protocol; + if (old_mac) { + /* rebuild the mac header */ + skb_set_mac_header(skb, -first_skb->mac_len); + memcpy(skb_mac_header(skb), old_mac, first_skb->mac_len); + eth_hdr(skb)->h_proto = skb->protocol; + } } data += capturelen; @@ -735,6 +805,16 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, /* this should not happen from the above code */ XFRM_INC_STATS(net, LINUX_MIB_XFRMINIPTFSERROR); + if (first_skb && first_iplen && !defer && first_skb != xtfs->ra_newskb) { + /* first_skb is queued b/c !defer and not partial */ + if (pskb_trim(first_skb, first_iplen)) { + /* error trimming */ + list_del(&first_skb->list); + defer = first_skb; + } + first_skb->ip_summed = CHECKSUM_NONE; + } + /* Send the packets! */ list_for_each_entry_safe(skb, next, sublist, list) { skb_list_del_init(skb); @@ -742,7 +822,20 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, kfree_skb(skb); } done: - return false; + skb = skbseq->root_skb; + skb_abort_seq_read(skbseq); + + if (defer) { + consume_skb(defer); + } else if (!first_skb) { + /* skb is the original passed in skb, but we didn't get far + * enough to process it as the first_skb, if we had it would + * either be save in ra_newskb, trimmed and sent on as an skb or + * placed in defer to be freed. + */ + kfree_skb(skb); + } + return true; } /** From 5f2b6a9095743a6bf1f34c43c4fe78fa8bdf5ad7 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:10 -0500 Subject: [PATCH 0133/1386] xfrm: iptfs: add skb-fragment sharing code Avoid copying the inner packet data by sharing the skb data fragments from the output packet skb into new inner packet skb. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_iptfs.c | 296 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 290 insertions(+), 6 deletions(-) diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 8538fb02ae8a..1258158e57ba 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -81,6 +81,9 @@ #define XFRM_IPTFS_MIN_L3HEADROOM 128 #define XFRM_IPTFS_MIN_L2HEADROOM (L1_CACHE_BYTES > 64 ? 64 : 64 + 16) +/* Min to try to share outer iptfs skb data vs copying into new skb */ +#define IPTFS_PKT_SHARE_MIN 129 + #define NSECS_IN_USEC 1000 #define IPTFS_HRTIMER_MODE HRTIMER_MODE_REL_SOFT @@ -234,10 +237,254 @@ static void iptfs_skb_head_to_frag(const struct sk_buff *skb, skb_frag_t *frag) skb_frag_fill_page_desc(frag, page, skb->data - addr, skb_headlen(skb)); } +/** + * struct iptfs_skb_frag_walk - use to track a walk through fragments + * @fragi: current fragment index + * @past: length of data in fragments before @fragi + * @total: length of data in all fragments + * @nr_frags: number of fragments present in array + * @initial_offset: the value passed in to skb_prepare_frag_walk() + * @frags: the page fragments inc. room for head page + * @pp_recycle: copy of skb->pp_recycle + */ +struct iptfs_skb_frag_walk { + u32 fragi; + u32 past; + u32 total; + u32 nr_frags; + u32 initial_offset; + skb_frag_t frags[MAX_SKB_FRAGS + 1]; + bool pp_recycle; +}; + +/** + * iptfs_skb_prepare_frag_walk() - initialize a frag walk over an skb. + * @skb: the skb to walk. + * @initial_offset: start the walk @initial_offset into the skb. + * @walk: the walk to initialize + * + * Future calls to skb_add_frags() will expect the @offset value to be at + * least @initial_offset large. + */ +static void iptfs_skb_prepare_frag_walk(struct sk_buff *skb, u32 initial_offset, + struct iptfs_skb_frag_walk *walk) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + skb_frag_t *frag, *from; + u32 i; + + walk->initial_offset = initial_offset; + walk->fragi = 0; + walk->past = 0; + walk->total = 0; + walk->nr_frags = 0; + walk->pp_recycle = skb->pp_recycle; + + if (skb->head_frag) { + if (initial_offset >= skb_headlen(skb)) { + initial_offset -= skb_headlen(skb); + } else { + frag = &walk->frags[walk->nr_frags++]; + iptfs_skb_head_to_frag(skb, frag); + frag->offset += initial_offset; + frag->len -= initial_offset; + walk->total += frag->len; + initial_offset = 0; + } + } else { + initial_offset -= skb_headlen(skb); + } + + for (i = 0; i < shinfo->nr_frags; i++) { + from = &shinfo->frags[i]; + if (initial_offset >= from->len) { + initial_offset -= from->len; + continue; + } + frag = &walk->frags[walk->nr_frags++]; + *frag = *from; + if (initial_offset) { + frag->offset += initial_offset; + frag->len -= initial_offset; + initial_offset = 0; + } + walk->total += frag->len; + } +} + +static u32 iptfs_skb_reset_frag_walk(struct iptfs_skb_frag_walk *walk, + u32 offset) +{ + /* Adjust offset to refer to internal walk values */ + offset -= walk->initial_offset; + + /* Get to the correct fragment for offset */ + while (offset < walk->past) { + walk->past -= walk->frags[--walk->fragi].len; + if (offset >= walk->past) + break; + } + while (offset >= walk->past + walk->frags[walk->fragi].len) + walk->past += walk->frags[walk->fragi++].len; + + /* offset now relative to this current frag */ + offset -= walk->past; + return offset; +} + +/** + * iptfs_skb_can_add_frags() - check if ok to add frags from walk to skb + * @skb: skb to check for adding frags to + * @walk: the walk that will be used as source for frags. + * @offset: offset from beginning of original skb to start from. + * @len: amount of data to add frag references to in @skb. + * + * Return: true if ok to add frags. + */ +static bool iptfs_skb_can_add_frags(const struct sk_buff *skb, + struct iptfs_skb_frag_walk *walk, + u32 offset, u32 len) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + u32 fragi, nr_frags, fraglen; + + if (skb_has_frag_list(skb) || skb->pp_recycle != walk->pp_recycle) + return false; + + /* Make offset relative to current frag after setting that */ + offset = iptfs_skb_reset_frag_walk(walk, offset); + + /* Verify we have array space for the fragments we need to add */ + fragi = walk->fragi; + nr_frags = shinfo->nr_frags; + while (len && fragi < walk->nr_frags) { + skb_frag_t *frag = &walk->frags[fragi]; + + fraglen = frag->len; + if (offset) { + fraglen -= offset; + offset = 0; + } + if (++nr_frags > MAX_SKB_FRAGS) + return false; + if (len <= fraglen) + return true; + len -= fraglen; + fragi++; + } + /* We may not copy all @len but what we have will fit. */ + return true; +} + +/** + * iptfs_skb_add_frags() - add a range of fragment references into an skb + * @skb: skb to add references into + * @walk: the walk to add referenced fragments from. + * @offset: offset from beginning of original skb to start from. + * @len: amount of data to add frag references to in @skb. + * + * iptfs_skb_can_add_frags() should be called before this function to verify + * that the destination @skb is compatible with the walk and has space in the + * array for the to be added frag references. + * + * Return: The number of bytes not added to @skb b/c we reached the end of the + * walk before adding all of @len. + */ +static int iptfs_skb_add_frags(struct sk_buff *skb, + struct iptfs_skb_frag_walk *walk, u32 offset, + u32 len) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + u32 fraglen; + + if (!walk->nr_frags || offset >= walk->total + walk->initial_offset) + return len; + + /* make offset relative to current frag after setting that */ + offset = iptfs_skb_reset_frag_walk(walk, offset); + + while (len && walk->fragi < walk->nr_frags) { + skb_frag_t *frag = &walk->frags[walk->fragi]; + skb_frag_t *tofrag = &shinfo->frags[shinfo->nr_frags]; + + *tofrag = *frag; + if (offset) { + tofrag->offset += offset; + tofrag->len -= offset; + offset = 0; + } + __skb_frag_ref(tofrag); + shinfo->nr_frags++; + + /* see if we are done */ + fraglen = tofrag->len; + if (len < fraglen) { + tofrag->len = len; + skb->len += len; + skb->data_len += len; + return 0; + } + /* advance to next source fragment */ + len -= fraglen; /* careful, use dst bv_len */ + skb->len += fraglen; /* careful, " " " */ + skb->data_len += fraglen; /* careful, " " " */ + walk->past += frag->len; /* careful, use src bv_len */ + walk->fragi++; + } + return len; +} + /* ================================== */ /* IPTFS Receiving (egress) Functions */ /* ================================== */ +/** + * iptfs_pskb_add_frags() - Create and add frags into a new sk_buff. + * @tpl: template to create new skb from. + * @walk: The source for fragments to add. + * @off: The offset into @walk to add frags from, also used with @st and + * @copy_len. + * @len: The length of data to add covering frags from @walk into @skb. + * This must be <= @skblen. + * @st: The sequence state to copy from into the new head skb. + * @copy_len: Copy @copy_len bytes from @st at offset @off into the new skb + * linear space. + * + * Create a new sk_buff `skb` using the template @tpl. Copy @copy_len bytes from + * @st into the new skb linear space, and then add shared fragments from the + * frag walk for the remaining @len of data (i.e., @len - @copy_len bytes). + * + * Return: The newly allocated sk_buff `skb` or NULL if an error occurs. + */ +static struct sk_buff * +iptfs_pskb_add_frags(struct sk_buff *tpl, struct iptfs_skb_frag_walk *walk, + u32 off, u32 len, struct skb_seq_state *st, u32 copy_len) +{ + struct sk_buff *skb; + + skb = iptfs_alloc_skb(tpl, copy_len, false); + if (!skb) + return NULL; + + /* this should not normally be happening */ + if (!iptfs_skb_can_add_frags(skb, walk, off + copy_len, + len - copy_len)) { + kfree_skb(skb); + return NULL; + } + + if (copy_len && + skb_copy_seq_read(st, off, skb_put(skb, copy_len), copy_len)) { + XFRM_INC_STATS(dev_net(st->root_skb->dev), + LINUX_MIB_XFRMINERROR); + kfree_skb(skb); + return NULL; + } + + iptfs_skb_add_frags(skb, walk, off + copy_len, len - copy_len); + return skb; +} + /** * iptfs_pskb_extract_seq() - Create and load data into a new sk_buff. * @skblen: the total data size for `skb`. @@ -423,6 +670,8 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq, struct skb_seq_state *st, struct sk_buff *skb, u32 data, u32 blkoff, struct list_head *list) { + struct iptfs_skb_frag_walk _fragwalk; + struct iptfs_skb_frag_walk *fragwalk = NULL; struct sk_buff *newskb = xtfs->ra_newskb; u32 remaining = skb->len - data; u32 runtlen = xtfs->ra_runtlen; @@ -567,10 +816,26 @@ static u32 iptfs_reassem_cont(struct xfrm_iptfs_data *xtfs, u64 seq, fraglen = min(blkoff, remaining); copylen = min(fraglen, ipremain); - /* copy fragment data into newskb */ - if (skb_copy_seq_read(st, data, skb_put(newskb, copylen), copylen)) { - XFRM_INC_STATS(dev_net(skb->dev), LINUX_MIB_XFRMINBUFFERERROR); - goto abandon; + /* If we may have the opportunity to share prepare a fragwalk. */ + if (!skb_has_frag_list(skb) && !skb_has_frag_list(newskb) && + (skb->head_frag || skb->len == skb->data_len) && + skb->pp_recycle == newskb->pp_recycle) { + fragwalk = &_fragwalk; + iptfs_skb_prepare_frag_walk(skb, data, fragwalk); + } + + /* Try share then copy. */ + if (fragwalk && + iptfs_skb_can_add_frags(newskb, fragwalk, data, copylen)) { + iptfs_skb_add_frags(newskb, fragwalk, data, copylen); + } else { + /* copy fragment data into newskb */ + if (skb_copy_seq_read(st, data, skb_put(newskb, copylen), + copylen)) { + XFRM_INC_STATS(xs_net(xtfs->x), + LINUX_MIB_XFRMINBUFFERERROR); + goto abandon; + } } if (copylen < ipremain) { @@ -601,6 +866,8 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, struct list_head *sublist) { u8 hbytes[sizeof(struct ipv6hdr)]; + struct iptfs_skb_frag_walk _fragwalk; + struct iptfs_skb_frag_walk *fragwalk = NULL; struct sk_buff *defer, *first_skb, *next, *skb; const unsigned char *old_mac; struct xfrm_iptfs_data *xtfs; @@ -694,6 +961,7 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, } else { first_skb = skb; first_iplen = iplen; + fragwalk = NULL; /* We are going to skip over `data` bytes to reach the * start of the IP header of `iphlen` len for `iplen` @@ -745,6 +1013,13 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, /* all pointers could be changed now reset walk */ skb_abort_seq_read(skbseq); skb_prepare_seq_read(skb, data, tail, skbseq); + } else if (skb->head_frag && + /* We have the IP header right now */ + remaining >= iphlen) { + fragwalk = &_fragwalk; + iptfs_skb_prepare_frag_walk(skb, data, fragwalk); + defer = skb; + skb = NULL; } else { /* We couldn't reuse the input skb so allocate a * new one. @@ -760,8 +1035,17 @@ static bool __input_process_payload(struct xfrm_state *x, u32 data, capturelen = min(iplen, remaining); if (!skb) { - skb = iptfs_pskb_extract_seq(iplen, skbseq, data, - capturelen); + if (!fragwalk || + /* Large enough to be worth sharing */ + iplen < IPTFS_PKT_SHARE_MIN || + /* Have IP header + some data to share. */ + capturelen <= iphlen || + /* Try creating skb and adding frags */ + !(skb = iptfs_pskb_add_frags(first_skb, fragwalk, + data, capturelen, + skbseq, iphlen))) { + skb = iptfs_pskb_extract_seq(iplen, skbseq, data, capturelen); + } if (!skb) { /* skip to next packet or done */ data += capturelen; From 6be02e3e4f376fea468846c8562655ca5ee18204 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:11 -0500 Subject: [PATCH 0134/1386] xfrm: iptfs: handle reordering of received packets Handle the receipt of the outer tunnel packets out-of-order. Pointers to the out-of-order packets are saved in a window (array) awaiting needed prior packets. When the required prior packets are received the now in-order packets are then passed on to the regular packet receive code. A timer is used to consider missing earlier packet as lost so the algorithm will advance. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_iptfs.c | 497 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 484 insertions(+), 13 deletions(-) diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 1258158e57ba..3ca7d2a04ea6 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -39,6 +39,17 @@ */ #define IPTFS_DEFAULT_DROP_TIME_USECS 1000000 +/** + * define IPTFS_DEFAULT_REORDER_WINDOW - default reorder window size + * + * The default IPTFS reorder window size. The reorder window size dictates the + * maximum number of IPTFS tunnel packets in a sequence that may arrive out of + * order. + * + * Default 3. (tcp folks suggested) + */ +#define IPTFS_DEFAULT_REORDER_WINDOW 3 + /* ------------------------------------------------ */ /* IPTFS default SA values (tunnel ingress/dir-out) */ /* ------------------------------------------------ */ @@ -95,14 +106,22 @@ * @max_queue_size: The maximum number of octets allowed to be queued to be sent * over the IPTFS SA. The queue size is measured as the size of all the * packets enqueued. + * @reorder_win_size: the number slots in the reorder window, thus the number of + * packets that may arrive out of order. * @dont_frag: true to inhibit fragmenting across IPTFS outer packets. */ struct xfrm_iptfs_config { u32 pkt_size; /* outer_packet_size or 0 */ u32 max_queue_size; /* octets */ + u16 reorder_win_size; u8 dont_frag : 1; }; +struct skb_wseq { + struct sk_buff *skb; + u64 drop_time; +}; + /** * struct xfrm_iptfs_data - mode specific xfrm state. * @cfg: IPTFS tunnel config. @@ -113,6 +132,10 @@ struct xfrm_iptfs_config { * @init_delay_ns: nanoseconds to wait to send initial IPTFS packet. * @iptfs_timer: output timer. * @payload_mtu: max payload size. + * @w_seq_set: true after first seq received. + * @w_wantseq: waiting for this seq number as next to process (in order). + * @w_saved: the saved buf array (reorder window). + * @w_savedlen: the saved len (not size). * @drop_lock: lock to protect reorder queue. * @drop_timer: timer for considering next packet lost. * @drop_time_ns: timer intervan in nanoseconds. @@ -134,12 +157,16 @@ struct xfrm_iptfs_data { struct hrtimer iptfs_timer; /* output timer */ u32 payload_mtu; /* max payload size */ - /* Tunnel egress */ + /* Tunnel input reordering */ + bool w_seq_set; /* true after first seq received */ + u64 w_wantseq; /* expected next sequence */ + struct skb_wseq *w_saved; /* the saved buf array */ + u32 w_savedlen; /* the saved len (not size) */ spinlock_t drop_lock; struct hrtimer drop_timer; u64 drop_time_ns; - /* Tunnel egress reassembly */ + /* Tunnel input reassembly */ struct sk_buff *ra_newskb; /* new pkt being reassembled */ u64 ra_wantseq; /* expected next sequence */ u8 ra_runt[6]; /* last pkt bytes from last skb */ @@ -1123,15 +1150,13 @@ done: } /** - * iptfs_input() - handle receipt of iptfs payload + * iptfs_input_ordered() - handle next in order IPTFS payload. * @x: xfrm state - * @skb: the packet + * @skb: current packet * * Process the IPTFS payload in `skb` and consume it afterwards. - * - * Returns 0. */ -static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb) +static void iptfs_input_ordered(struct xfrm_state *x, struct sk_buff *skb) { struct ip_iptfs_cc_hdr iptcch; struct skb_seq_state skbseq; @@ -1206,12 +1231,355 @@ done: skb_abort_seq_read(&skbseq); kfree_skb(skb); } +} - /* We always have dealt with the input SKB, either we are re-using it, - * or we have freed it. Return EINPROGRESS so that xfrm_input stops - * processing it. +/* ------------------------------- */ +/* Input (Egress) Re-ordering Code */ +/* ------------------------------- */ + +static void __vec_shift(struct xfrm_iptfs_data *xtfs, u32 shift) +{ + u32 savedlen = xtfs->w_savedlen; + + if (shift > savedlen) + shift = savedlen; + if (shift != savedlen) + memcpy(xtfs->w_saved, xtfs->w_saved + shift, + (savedlen - shift) * sizeof(*xtfs->w_saved)); + memset(xtfs->w_saved + savedlen - shift, 0, + shift * sizeof(*xtfs->w_saved)); + xtfs->w_savedlen -= shift; +} + +static void __reorder_past(struct xfrm_iptfs_data *xtfs, struct sk_buff *inskb, + struct list_head *freelist) +{ + list_add_tail(&inskb->list, freelist); +} + +static u32 __reorder_drop(struct xfrm_iptfs_data *xtfs, struct list_head *list) + +{ + struct skb_wseq *s, *se; + const u32 savedlen = xtfs->w_savedlen; + time64_t now = ktime_get_raw_fast_ns(); + u32 count = 0; + u32 scount = 0; + + if (xtfs->w_saved[0].drop_time > now) + goto set_timer; + + ++xtfs->w_wantseq; + + /* Keep flushing packets until we reach a drop time greater than now. */ + s = xtfs->w_saved; + se = s + savedlen; + do { + /* Walking past empty slots until we reach a packet */ + for (; s < se && !s->skb; s++) { + if (s->drop_time > now) + goto outerdone; + } + /* Sending packets until we hit another empty slot. */ + for (; s < se && s->skb; scount++, s++) + list_add_tail(&s->skb->list, list); + } while (s < se); +outerdone: + + count = s - xtfs->w_saved; + if (count) { + xtfs->w_wantseq += count; + + /* Shift handled slots plus final empty slot into slot 0. */ + __vec_shift(xtfs, count); + } + + if (xtfs->w_savedlen) { +set_timer: + /* Drifting is OK */ + hrtimer_start(&xtfs->drop_timer, + xtfs->w_saved[0].drop_time - now, + IPTFS_HRTIMER_MODE); + } + return scount; +} + +static void __reorder_this(struct xfrm_iptfs_data *xtfs, struct sk_buff *inskb, + struct list_head *list) +{ + struct skb_wseq *s, *se; + const u32 savedlen = xtfs->w_savedlen; + u32 count = 0; + + /* Got what we wanted. */ + list_add_tail(&inskb->list, list); + ++xtfs->w_wantseq; + if (!savedlen) + return; + + /* Flush remaining consecutive packets. */ + + /* Keep sending until we hit another missed pkt. */ + for (s = xtfs->w_saved, se = s + savedlen; s < se && s->skb; s++) + list_add_tail(&s->skb->list, list); + count = s - xtfs->w_saved; + if (count) + xtfs->w_wantseq += count; + + /* Shift handled slots plus final empty slot into slot 0. */ + __vec_shift(xtfs, count + 1); +} + +/* Set the slot's drop time and all the empty slots below it until reaching a + * filled slot which will already be set. + */ +static void iptfs_set_window_drop_times(struct xfrm_iptfs_data *xtfs, int index) +{ + const u32 savedlen = xtfs->w_savedlen; + struct skb_wseq *s = xtfs->w_saved; + time64_t drop_time; + + assert_spin_locked(&xtfs->drop_lock); + + if (savedlen > index + 1) { + /* we are below another, our drop time and the timer are already set */ + return; + } + /* we are the most future so get a new drop time. */ + drop_time = ktime_get_raw_fast_ns(); + drop_time += xtfs->drop_time_ns; + + /* Walk back through the array setting drop times as we go */ + s[index].drop_time = drop_time; + while (index-- > 0 && !s[index].skb) + s[index].drop_time = drop_time; + + /* If we walked all the way back, schedule the drop timer if needed */ + if (index == -1 && !hrtimer_is_queued(&xtfs->drop_timer)) + hrtimer_start(&xtfs->drop_timer, xtfs->drop_time_ns, + IPTFS_HRTIMER_MODE); +} + +static void __reorder_future_fits(struct xfrm_iptfs_data *xtfs, + struct sk_buff *inskb, + struct list_head *freelist) +{ + const u64 inseq = __esp_seq(inskb); + const u64 wantseq = xtfs->w_wantseq; + const u64 distance = inseq - wantseq; + const u32 savedlen = xtfs->w_savedlen; + const u32 index = distance - 1; + + /* Handle future sequence number received which fits in the window. + * + * We know we don't have the seq we want so we won't be able to flush + * anything. */ - return -EINPROGRESS; + + /* slot count is 4, saved size is 3 savedlen is 2 + * + * "window boundary" is based on the fixed window size + * distance is also slot number + * index is an array index (i.e., - 1 of slot) + * : : - implicit NULL after array len + * + * +--------- used length (savedlen == 2) + * | +----- array size (nslots - 1 == 3) + * | | + window boundary (nslots == 4) + * V V | V + * | + * 0 1 2 3 | slot number + * --- 0 1 2 | array index + * [-] [b] : :| array + * + * "2" "3" "4" *5*| seq numbers + * + * We receive seq number 5 + * distance == 3 [inseq(5) - w_wantseq(2)] + * index == 2 [distance(6) - 1] + */ + + if (xtfs->w_saved[index].skb) { + /* a dup of a future */ + list_add_tail(&inskb->list, freelist); + return; + } + + xtfs->w_saved[index].skb = inskb; + xtfs->w_savedlen = max(savedlen, index + 1); + iptfs_set_window_drop_times(xtfs, index); +} + +static void __reorder_future_shifts(struct xfrm_iptfs_data *xtfs, + struct sk_buff *inskb, + struct list_head *list) +{ + const u32 nslots = xtfs->cfg.reorder_win_size + 1; + const u64 inseq = __esp_seq(inskb); + u32 savedlen = xtfs->w_savedlen; + u64 wantseq = xtfs->w_wantseq; + struct skb_wseq *wnext; + struct sk_buff *slot0; + u32 beyond, shifting, slot; + u64 distance; + + /* Handle future sequence number received. + * + * IMPORTANT: we are at least advancing w_wantseq (i.e., wantseq) by 1 + * b/c we are beyond the window boundary. + * + * We know we don't have the wantseq so that counts as a drop. + */ + + /* example: slot count is 4, array size is 3 savedlen is 2, slot 0 is + * the missing sequence number. + * + * the final slot at savedlen (index savedlen - 1) is always occupied. + * + * beyond is "beyond array size" not savedlen. + * + * +--------- array length (savedlen == 2) + * | +----- array size (nslots - 1 == 3) + * | | +- window boundary (nslots == 4) + * V V | + * | + * 0 1 2 3 | slot number + * --- 0 1 2 | array index + * [b] [c] : :| array + * | + * "2" "3" "4" "5"|*6* seq numbers + * + * We receive seq number 6 + * distance == 4 [inseq(6) - w_wantseq(2)] + * newslot == distance + * index == 3 [distance(4) - 1] + * beyond == 1 [newslot(4) - lastslot((nslots(4) - 1))] + * shifting == 1 [min(savedlen(2), beyond(1)] + * slot0_skb == [b], and should match w_wantseq + * + * +--- window boundary (nslots == 4) + * 0 1 2 3 | 4 slot number + * --- 0 1 2 | 3 array index + * [b] : : : :| array + * "2" "3" "4" "5" *6* seq numbers + * + * We receive seq number 6 + * distance == 4 [inseq(6) - w_wantseq(2)] + * newslot == distance + * index == 3 [distance(4) - 1] + * beyond == 1 [newslot(4) - lastslot((nslots(4) - 1))] + * shifting == 1 [min(savedlen(1), beyond(1)] + * slot0_skb == [b] and should match w_wantseq + * + * +-- window boundary (nslots == 4) + * 0 1 2 3 | 4 5 6 slot number + * --- 0 1 2 | 3 4 5 array index + * [-] [c] : :| array + * "2" "3" "4" "5" "6" "7" *8* seq numbers + * + * savedlen = 2, beyond = 3 + * iter 1: slot0 == NULL, missed++, lastdrop = 2 (2+1-1), slot0 = [-] + * iter 2: slot0 == NULL, missed++, lastdrop = 3 (2+2-1), slot0 = [c] + * 2 < 3, extra = 1 (3-2), missed += extra, lastdrop = 4 (2+2+1-1) + * + * We receive seq number 8 + * distance == 6 [inseq(8) - w_wantseq(2)] + * newslot == distance + * index == 5 [distance(6) - 1] + * beyond == 3 [newslot(6) - lastslot((nslots(4) - 1))] + * shifting == 2 [min(savedlen(2), beyond(3)] + * + * slot0_skb == NULL changed from [b] when "savedlen < beyond" is true. + */ + + /* Now send any packets that are being shifted out of saved, and account + * for missing packets that are exiting the window as we shift it. + */ + + distance = inseq - wantseq; + beyond = distance - (nslots - 1); + + /* If savedlen > beyond we are shifting some, else all. */ + shifting = min(savedlen, beyond); + + /* slot0 is the buf that just shifted out and into slot0 */ + slot0 = NULL; + wnext = xtfs->w_saved; + for (slot = 1; slot <= shifting; slot++, wnext++) { + /* handle what was in slot0 before we occupy it */ + if (slot0) + list_add_tail(&slot0->list, list); + slot0 = wnext->skb; + wnext->skb = NULL; + } + + /* slot0 is now either NULL (in which case it's what we now are waiting + * for, or a buf in which case we need to handle it like we received it; + * however, we may be advancing past that buffer as well.. + */ + + /* Handle case where we need to shift more than we had saved, slot0 will + * be NULL iff savedlen is 0, otherwise slot0 will always be + * non-NULL b/c we shifted the final element, which is always set if + * there is any saved, into slot0. + */ + if (savedlen < beyond) { + if (savedlen != 0) + list_add_tail(&slot0->list, list); + slot0 = NULL; + /* slot0 has had an empty slot pushed into it */ + } + + /* Remove the entries */ + __vec_shift(xtfs, beyond); + + /* Advance want seq */ + xtfs->w_wantseq += beyond; + + /* Process drops here when implementing congestion control */ + + /* We've shifted. plug the packet in at the end. */ + xtfs->w_savedlen = nslots - 1; + xtfs->w_saved[xtfs->w_savedlen - 1].skb = inskb; + iptfs_set_window_drop_times(xtfs, xtfs->w_savedlen - 1); + + /* if we don't have a slot0 then we must wait for it */ + if (!slot0) + return; + + /* If slot0, seq must match new want seq */ + + /* slot0 is valid, treat like we received expected. */ + __reorder_this(xtfs, slot0, list); +} + +/* Receive a new packet into the reorder window. Return a list of ordered + * packets from the window. + */ +static void iptfs_input_reorder(struct xfrm_iptfs_data *xtfs, + struct sk_buff *inskb, struct list_head *list, + struct list_head *freelist) +{ + const u32 nslots = xtfs->cfg.reorder_win_size + 1; + u64 inseq = __esp_seq(inskb); + u64 wantseq; + + assert_spin_locked(&xtfs->drop_lock); + + if (unlikely(!xtfs->w_seq_set)) { + xtfs->w_seq_set = true; + xtfs->w_wantseq = inseq; + } + wantseq = xtfs->w_wantseq; + + if (likely(inseq == wantseq)) + __reorder_this(xtfs, inskb, list); + else if (inseq < wantseq) + __reorder_past(xtfs, inskb, freelist); + else if ((inseq - wantseq) < nslots) + __reorder_future_fits(xtfs, inskb, freelist); + else + __reorder_future_shifts(xtfs, inskb, list); } /** @@ -1238,23 +1606,92 @@ done: */ static enum hrtimer_restart iptfs_drop_timer(struct hrtimer *me) { + struct sk_buff *skb, *next; + struct list_head list; struct xfrm_iptfs_data *xtfs; - struct sk_buff *skb; + struct xfrm_state *x; + u32 count; xtfs = container_of(me, typeof(*xtfs), drop_timer); + x = xtfs->x; + + INIT_LIST_HEAD(&list); + + spin_lock(&xtfs->drop_lock); /* Drop any in progress packet */ - spin_lock(&xtfs->drop_lock); skb = xtfs->ra_newskb; xtfs->ra_newskb = NULL; + + /* Now drop as many packets as we should from the reordering window + * saved array + */ + count = xtfs->w_savedlen ? __reorder_drop(xtfs, &list) : 0; + spin_unlock(&xtfs->drop_lock); if (skb) kfree_skb_reason(skb, SKB_DROP_REASON_FRAG_REASM_TIMEOUT); + if (count) { + list_for_each_entry_safe(skb, next, &list, list) { + skb_list_del_init(skb); + iptfs_input_ordered(x, skb); + } + } + return HRTIMER_NORESTART; } +/** + * iptfs_input() - handle receipt of iptfs payload + * @x: xfrm state + * @skb: the packet + * + * We have an IPTFS payload order it if needed, then process newly in order + * packets. + * + * Return: -EINPROGRESS to inform xfrm_input to stop processing the skb. + */ +static int iptfs_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct list_head freelist, list; + struct xfrm_iptfs_data *xtfs = x->mode_data; + struct sk_buff *next; + + /* Fast path for no reorder window. */ + if (xtfs->cfg.reorder_win_size == 0) { + iptfs_input_ordered(x, skb); + goto done; + } + + /* Fetch list of in-order packets from the reordering window as well as + * a list of buffers we need to now free. + */ + INIT_LIST_HEAD(&list); + INIT_LIST_HEAD(&freelist); + + spin_lock(&xtfs->drop_lock); + iptfs_input_reorder(xtfs, skb, &list, &freelist); + spin_unlock(&xtfs->drop_lock); + + list_for_each_entry_safe(skb, next, &list, list) { + skb_list_del_init(skb); + iptfs_input_ordered(x, skb); + } + + list_for_each_entry_safe(skb, next, &freelist, list) { + skb_list_del_init(skb); + kfree_skb(skb); + } +done: + /* We always have dealt with the input SKB, either we are re-using it, + * or we have freed it. Return EINPROGRESS so that xfrm_input stops + * processing it. + */ + return -EINPROGRESS; +} + /* ================================= */ /* IPTFS Sending (ingress) Functions */ /* ================================= */ @@ -2009,11 +2446,24 @@ static int iptfs_user_init(struct net *net, struct xfrm_state *x, xc = &xtfs->cfg; xc->max_queue_size = IPTFS_DEFAULT_MAX_QUEUE_SIZE; + xc->reorder_win_size = IPTFS_DEFAULT_REORDER_WINDOW; xtfs->drop_time_ns = IPTFS_DEFAULT_DROP_TIME_USECS * NSECS_IN_USEC; xtfs->init_delay_ns = IPTFS_DEFAULT_INIT_DELAY_USECS * NSECS_IN_USEC; if (attrs[XFRMA_IPTFS_DONT_FRAG]) xc->dont_frag = true; + if (attrs[XFRMA_IPTFS_REORDER_WINDOW]) + xc->reorder_win_size = + nla_get_u16(attrs[XFRMA_IPTFS_REORDER_WINDOW]); + /* saved array is for saving 1..N seq nums from wantseq */ + if (xc->reorder_win_size) { + xtfs->w_saved = kcalloc(xc->reorder_win_size, + sizeof(*xtfs->w_saved), GFP_KERNEL); + if (!xtfs->w_saved) { + NL_SET_ERR_MSG(extack, "Cannot alloc reorder window"); + return -ENOMEM; + } + } if (attrs[XFRMA_IPTFS_PKT_SIZE]) { xc->pkt_size = nla_get_u32(attrs[XFRMA_IPTFS_PKT_SIZE]); if (!xc->pkt_size) { @@ -2051,6 +2501,7 @@ static unsigned int iptfs_sa_len(const struct xfrm_state *x) if (x->dir == XFRM_SA_DIR_IN) { l += nla_total_size(sizeof(u32)); /* drop time usec */ + l += nla_total_size(sizeof(xc->reorder_win_size)); } else { if (xc->dont_frag) l += nla_total_size(0); /* dont-frag flag */ @@ -2073,6 +2524,11 @@ static int iptfs_copy_to_user(struct xfrm_state *x, struct sk_buff *skb) q = xtfs->drop_time_ns; do_div(q, NSECS_IN_USEC); ret = nla_put_u32(skb, XFRMA_IPTFS_DROP_TIME, q); + if (ret) + return ret; + + ret = nla_put_u16(skb, XFRMA_IPTFS_REORDER_WINDOW, + xc->reorder_win_size); } else { if (xc->dont_frag) { ret = nla_put_flag(skb, XFRMA_IPTFS_DONT_FRAG); @@ -2134,6 +2590,14 @@ static int iptfs_clone_state(struct xfrm_state *x, struct xfrm_state *orig) xtfs->x = x; xtfs->ra_newskb = NULL; + if (xtfs->cfg.reorder_win_size) { + xtfs->w_saved = kcalloc(xtfs->cfg.reorder_win_size, + sizeof(*xtfs->w_saved), GFP_KERNEL); + if (!xtfs->w_saved) { + kfree_sensitive(xtfs); + return -ENOMEM; + } + } return 0; } @@ -2160,6 +2624,7 @@ static void iptfs_destroy_state(struct xfrm_state *x) { struct xfrm_iptfs_data *xtfs = x->mode_data; struct sk_buff_head list; + struct skb_wseq *s, *se; struct sk_buff *skb; if (!xtfs) @@ -2181,6 +2646,12 @@ static void iptfs_destroy_state(struct xfrm_state *x) if (xtfs->ra_newskb) kfree_skb(xtfs->ra_newskb); + for (s = xtfs->w_saved, se = s + xtfs->w_savedlen; s < se; s++) { + if (s->skb) + kfree_skb(s->skb); + } + + kfree_sensitive(xtfs->w_saved); kfree_sensitive(xtfs); module_put(x->mode_cbs->owner); From ed58b186c7737bf0db1ebf57207b30fe740e1d07 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:12 -0500 Subject: [PATCH 0135/1386] xfrm: iptfs: add tracepoint functionality Add tracepoints to the IP-TFS code. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- net/xfrm/trace_iptfs.h | 218 +++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_iptfs.c | 71 +++++++++++++- 2 files changed, 288 insertions(+), 1 deletion(-) create mode 100644 net/xfrm/trace_iptfs.h diff --git a/net/xfrm/trace_iptfs.h b/net/xfrm/trace_iptfs.h new file mode 100644 index 000000000000..74391ba24445 --- /dev/null +++ b/net/xfrm/trace_iptfs.h @@ -0,0 +1,218 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* xfrm_trace_iptfs.h + * + * August 12 2023, Christian Hopps + * + * Copyright (c) 2023, LabN Consulting, L.L.C. + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM iptfs + +#if !defined(_TRACE_IPTFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_IPTFS_H + +#include +#include +#include +#include + +struct xfrm_iptfs_data; + +TRACE_EVENT(iptfs_egress_recv, + TP_PROTO(struct sk_buff *skb, struct xfrm_iptfs_data *xtfs, u16 blkoff), + TP_ARGS(skb, xtfs, blkoff), + TP_STRUCT__entry(__field(struct sk_buff *, skb) + __field(void *, head) + __field(void *, head_pg_addr) + __field(void *, pg0addr) + __field(u32, skb_len) + __field(u32, data_len) + __field(u32, headroom) + __field(u32, tailroom) + __field(u32, tail) + __field(u32, end) + __field(u32, pg0off) + __field(u8, head_frag) + __field(u8, frag_list) + __field(u8, nr_frags) + __field(u16, blkoff)), + TP_fast_assign(__entry->skb = skb; + __entry->head = skb->head; + __entry->skb_len = skb->len; + __entry->data_len = skb->data_len; + __entry->headroom = skb_headroom(skb); + __entry->tailroom = skb_tailroom(skb); + __entry->tail = (u32)skb->tail; + __entry->end = (u32)skb->end; + __entry->head_frag = skb->head_frag; + __entry->frag_list = (bool)skb_shinfo(skb)->frag_list; + __entry->nr_frags = skb_shinfo(skb)->nr_frags; + __entry->blkoff = blkoff; + __entry->head_pg_addr = page_address(virt_to_head_page(skb->head)); + __entry->pg0addr = (__entry->nr_frags + ? page_address(netmem_to_page(skb_shinfo(skb)->frags[0].netmem)) + : NULL); + __entry->pg0off = (__entry->nr_frags + ? skb_shinfo(skb)->frags[0].offset + : 0); + ), + TP_printk("EGRESS: skb=%p len=%u data_len=%u headroom=%u head_frag=%u frag_list=%u nr_frags=%u blkoff=%u\n\t\ttailroom=%u tail=%u end=%u head=%p hdpgaddr=%p pg0->addr=%p pg0->data=%p pg0->off=%u", + __entry->skb, __entry->skb_len, __entry->data_len, __entry->headroom, + __entry->head_frag, __entry->frag_list, __entry->nr_frags, __entry->blkoff, + __entry->tailroom, __entry->tail, __entry->end, __entry->head, + __entry->head_pg_addr, __entry->pg0addr, __entry->pg0addr + __entry->pg0off, + __entry->pg0off) + ) + +DECLARE_EVENT_CLASS(iptfs_ingress_preq_event, + TP_PROTO(struct sk_buff *skb, struct xfrm_iptfs_data *xtfs, + u32 pmtu, u8 was_gso), + TP_ARGS(skb, xtfs, pmtu, was_gso), + TP_STRUCT__entry(__field(struct sk_buff *, skb) + __field(u32, skb_len) + __field(u32, data_len) + __field(u32, pmtu) + __field(u32, queue_size) + __field(u32, proto_seq) + __field(u8, proto) + __field(u8, was_gso) + ), + TP_fast_assign(__entry->skb = skb; + __entry->skb_len = skb->len; + __entry->data_len = skb->data_len; + __entry->queue_size = + xtfs->cfg.max_queue_size - xtfs->queue_size; + __entry->proto = __trace_ip_proto(ip_hdr(skb)); + __entry->proto_seq = __trace_ip_proto_seq(ip_hdr(skb)); + __entry->pmtu = pmtu; + __entry->was_gso = was_gso; + ), + TP_printk("INGRPREQ: skb=%p len=%u data_len=%u qsize=%u proto=%u proto_seq=%u pmtu=%u was_gso=%u", + __entry->skb, __entry->skb_len, __entry->data_len, + __entry->queue_size, __entry->proto, __entry->proto_seq, + __entry->pmtu, __entry->was_gso)); + +DEFINE_EVENT(iptfs_ingress_preq_event, iptfs_enqueue, + TP_PROTO(struct sk_buff *skb, struct xfrm_iptfs_data *xtfs, u32 pmtu, u8 was_gso), + TP_ARGS(skb, xtfs, pmtu, was_gso)); + +DEFINE_EVENT(iptfs_ingress_preq_event, iptfs_no_queue_space, + TP_PROTO(struct sk_buff *skb, struct xfrm_iptfs_data *xtfs, u32 pmtu, u8 was_gso), + TP_ARGS(skb, xtfs, pmtu, was_gso)); + +DEFINE_EVENT(iptfs_ingress_preq_event, iptfs_too_big, + TP_PROTO(struct sk_buff *skb, struct xfrm_iptfs_data *xtfs, u32 pmtu, u8 was_gso), + TP_ARGS(skb, xtfs, pmtu, was_gso)); + +DECLARE_EVENT_CLASS(iptfs_ingress_postq_event, + TP_PROTO(struct sk_buff *skb, u32 mtu, u16 blkoff, struct iphdr *iph), + TP_ARGS(skb, mtu, blkoff, iph), + TP_STRUCT__entry(__field(struct sk_buff *, skb) + __field(u32, skb_len) + __field(u32, data_len) + __field(u32, mtu) + __field(u32, proto_seq) + __field(u16, blkoff) + __field(u8, proto)), + TP_fast_assign(__entry->skb = skb; + __entry->skb_len = skb->len; + __entry->data_len = skb->data_len; + __entry->mtu = mtu; + __entry->blkoff = blkoff; + __entry->proto = iph ? __trace_ip_proto(iph) : 0; + __entry->proto_seq = iph ? __trace_ip_proto_seq(iph) : 0; + ), + TP_printk("INGRPSTQ: skb=%p len=%u data_len=%u mtu=%u blkoff=%u proto=%u proto_seq=%u", + __entry->skb, __entry->skb_len, __entry->data_len, __entry->mtu, + __entry->blkoff, __entry->proto, __entry->proto_seq)); + +DEFINE_EVENT(iptfs_ingress_postq_event, iptfs_first_dequeue, + TP_PROTO(struct sk_buff *skb, u32 mtu, u16 blkoff, + struct iphdr *iph), + TP_ARGS(skb, mtu, blkoff, iph)); + +DEFINE_EVENT(iptfs_ingress_postq_event, iptfs_first_fragmenting, + TP_PROTO(struct sk_buff *skb, u32 mtu, u16 blkoff, + struct iphdr *iph), + TP_ARGS(skb, mtu, blkoff, iph)); + +DEFINE_EVENT(iptfs_ingress_postq_event, iptfs_first_final_fragment, + TP_PROTO(struct sk_buff *skb, u32 mtu, u16 blkoff, + struct iphdr *iph), + TP_ARGS(skb, mtu, blkoff, iph)); + +DEFINE_EVENT(iptfs_ingress_postq_event, iptfs_first_toobig, + TP_PROTO(struct sk_buff *skb, u32 mtu, u16 blkoff, + struct iphdr *iph), + TP_ARGS(skb, mtu, blkoff, iph)); + +TRACE_EVENT(iptfs_ingress_nth_peek, + TP_PROTO(struct sk_buff *skb, u32 remaining), + TP_ARGS(skb, remaining), + TP_STRUCT__entry(__field(struct sk_buff *, skb) + __field(u32, skb_len) + __field(u32, remaining)), + TP_fast_assign(__entry->skb = skb; + __entry->skb_len = skb->len; + __entry->remaining = remaining; + ), + TP_printk("INGRPSTQ: NTHPEEK: skb=%p len=%u remaining=%u", + __entry->skb, __entry->skb_len, __entry->remaining)); + +TRACE_EVENT(iptfs_ingress_nth_add, TP_PROTO(struct sk_buff *skb, u8 share_ok), + TP_ARGS(skb, share_ok), + TP_STRUCT__entry(__field(struct sk_buff *, skb) + __field(u32, skb_len) + __field(u32, data_len) + __field(u8, share_ok) + __field(u8, head_frag) + __field(u8, pp_recycle) + __field(u8, cloned) + __field(u8, shared) + __field(u8, nr_frags) + __field(u8, frag_list) + ), + TP_fast_assign(__entry->skb = skb; + __entry->skb_len = skb->len; + __entry->data_len = skb->data_len; + __entry->share_ok = share_ok; + __entry->head_frag = skb->head_frag; + __entry->pp_recycle = skb->pp_recycle; + __entry->cloned = skb_cloned(skb); + __entry->shared = skb_shared(skb); + __entry->nr_frags = skb_shinfo(skb)->nr_frags; + __entry->frag_list = (bool)skb_shinfo(skb)->frag_list; + ), + TP_printk("INGRPSTQ: NTHADD: skb=%p len=%u data_len=%u share_ok=%u head_frag=%u pp_recycle=%u cloned=%u shared=%u nr_frags=%u frag_list=%u", + __entry->skb, __entry->skb_len, __entry->data_len, __entry->share_ok, + __entry->head_frag, __entry->pp_recycle, __entry->cloned, __entry->shared, + __entry->nr_frags, __entry->frag_list)); + +DECLARE_EVENT_CLASS(iptfs_timer_event, + TP_PROTO(struct xfrm_iptfs_data *xtfs, u64 time_val), + TP_ARGS(xtfs, time_val), + TP_STRUCT__entry(__field(u64, time_val) + __field(u64, set_time)), + TP_fast_assign(__entry->time_val = time_val; + __entry->set_time = xtfs->iptfs_settime; + ), + TP_printk("TIMER: set_time=%llu time_val=%llu", + __entry->set_time, __entry->time_val)); + +DEFINE_EVENT(iptfs_timer_event, iptfs_timer_start, + TP_PROTO(struct xfrm_iptfs_data *xtfs, u64 time_val), + TP_ARGS(xtfs, time_val)); + +DEFINE_EVENT(iptfs_timer_event, iptfs_timer_expire, + TP_PROTO(struct xfrm_iptfs_data *xtfs, u64 time_val), + TP_ARGS(xtfs, time_val)); + +#endif /* _TRACE_IPTFS_H */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../net/xfrm +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace_iptfs +#include diff --git a/net/xfrm/xfrm_iptfs.c b/net/xfrm/xfrm_iptfs.c index 3ca7d2a04ea6..755f1eea8bfa 100644 --- a/net/xfrm/xfrm_iptfs.c +++ b/net/xfrm/xfrm_iptfs.c @@ -19,6 +19,7 @@ #include #include "xfrm_inout.h" +#include "trace_iptfs.h" /* IPTFS encap (header) values. */ #define IPTFS_SUBTYPE_BASIC 0 @@ -131,6 +132,7 @@ struct skb_wseq { * @ecn_queue_size: octets above with ECN mark. * @init_delay_ns: nanoseconds to wait to send initial IPTFS packet. * @iptfs_timer: output timer. + * @iptfs_settime: time the output timer was set. * @payload_mtu: max payload size. * @w_seq_set: true after first seq received. * @w_wantseq: waiting for this seq number as next to process (in order). @@ -155,6 +157,7 @@ struct xfrm_iptfs_data { u32 ecn_queue_size; /* octets above which ECN mark */ u64 init_delay_ns; /* nanoseconds */ struct hrtimer iptfs_timer; /* output timer */ + time64_t iptfs_settime; /* time timer was set */ u32 payload_mtu; /* max payload size */ /* Tunnel input reordering */ @@ -181,6 +184,41 @@ static enum hrtimer_restart iptfs_drop_timer(struct hrtimer *me); /* Utility Functions */ /* ================= */ +#ifdef TRACEPOINTS_ENABLED +static u32 __trace_ip_proto(struct iphdr *iph) +{ + if (iph->version == 4) + return iph->protocol; + return ((struct ipv6hdr *)iph)->nexthdr; +} + +static u32 __trace_ip_proto_seq(struct iphdr *iph) +{ + void *nexthdr; + u32 protocol = 0; + + if (iph->version == 4) { + nexthdr = (void *)(iph + 1); + protocol = iph->protocol; + } else if (iph->version == 6) { + nexthdr = (void *)(((struct ipv6hdr *)(iph)) + 1); + protocol = ((struct ipv6hdr *)(iph))->nexthdr; + } + switch (protocol) { + case IPPROTO_ICMP: + return ntohs(((struct icmphdr *)nexthdr)->un.echo.sequence); + case IPPROTO_ICMPV6: + return ntohs(((struct icmp6hdr *)nexthdr)->icmp6_sequence); + case IPPROTO_TCP: + return ntohl(((struct tcphdr *)nexthdr)->seq); + case IPPROTO_UDP: + return ntohs(((struct udphdr *)nexthdr)->source); + default: + return 0; + } +} +#endif /*TRACEPOINTS_ENABLED*/ + static u64 __esp_seq(struct sk_buff *skb) { u64 seq = ntohl(XFRM_SKB_CB(skb)->seq.input.low); @@ -461,6 +499,13 @@ static int iptfs_skb_add_frags(struct sk_buff *skb, return len; } +/* ================================== */ +/* IPTFS Trace Event Definitions */ +/* ================================== */ + +#define CREATE_TRACE_POINTS +#include "trace_iptfs.h" + /* ================================== */ /* IPTFS Receiving (egress) Functions */ /* ================================== */ @@ -1186,6 +1231,8 @@ static void iptfs_input_ordered(struct xfrm_state *x, struct sk_buff *skb) } data = sizeof(*ipth); + trace_iptfs_egress_recv(skb, xtfs, be16_to_cpu(ipth->block_offset)); + /* Set data past the basic header */ if (ipth->subtype == IPTFS_SUBTYPE_CC) { /* Copy the rest of the CC header */ @@ -1830,6 +1877,7 @@ static int iptfs_output_collect(struct net *net, struct sock *sk, struct sk_buff */ if (!ok) { nospace: + trace_iptfs_no_queue_space(skb, xtfs, pmtu, was_gso); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOQSPACE); kfree_skb_reason(skb, SKB_DROP_REASON_FULL_RING); continue; @@ -1839,6 +1887,7 @@ nospace: * enqueue. */ if (xtfs->cfg.dont_frag && iptfs_is_too_big(sk, skb, pmtu)) { + trace_iptfs_too_big(skb, xtfs, pmtu, was_gso); kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); continue; } @@ -1847,11 +1896,16 @@ nospace: ok = iptfs_enqueue(xtfs, skb); if (!ok) goto nospace; + + trace_iptfs_enqueue(skb, xtfs, pmtu, was_gso); } /* Start a delay timer if we don't have one yet */ - if (!hrtimer_is_queued(&xtfs->iptfs_timer)) + if (!hrtimer_is_queued(&xtfs->iptfs_timer)) { hrtimer_start(&xtfs->iptfs_timer, xtfs->init_delay_ns, IPTFS_HRTIMER_MODE); + xtfs->iptfs_settime = ktime_get_raw_fast_ns(); + trace_iptfs_timer_start(xtfs, xtfs->init_delay_ns); + } spin_unlock_bh(&x->lock); return 0; @@ -1934,6 +1988,7 @@ static int iptfs_copy_create_frags(struct sk_buff **skbp, struct xfrm_iptfs_data struct sk_buff *nskb = *skbp; u32 copy_len, offset; u32 to_copy = skb->len - mtu; + u32 blkoff = 0; int err = 0; INIT_LIST_HEAD(&sublist); @@ -1945,6 +2000,7 @@ static int iptfs_copy_create_frags(struct sk_buff **skbp, struct xfrm_iptfs_data to_copy = skb->len - offset; while (to_copy) { /* Send all but last fragment to allow agg. append */ + trace_iptfs_first_fragmenting(nskb, mtu, to_copy, NULL); list_add_tail(&nskb->list, &sublist); /* FUTURE: if the packet has an odd/non-aligning length we could @@ -1963,11 +2019,14 @@ static int iptfs_copy_create_frags(struct sk_buff **skbp, struct xfrm_iptfs_data iptfs_output_prepare_skb(nskb, to_copy); offset += copy_len; to_copy -= copy_len; + blkoff = to_copy; } skb_abort_seq_read(&skbseq); /* return last fragment that will be unsent (or NULL) */ *skbp = nskb; + if (nskb) + trace_iptfs_first_final_fragment(nskb, mtu, blkoff, NULL); /* trim the original skb to MTU */ if (!err) @@ -2042,6 +2101,8 @@ static int iptfs_first_skb(struct sk_buff **skbp, struct xfrm_iptfs_data *xtfs, /* We've split gso up before queuing */ + trace_iptfs_first_dequeue(skb, mtu, 0, ip_hdr(skb)); + /* Consider the buffer Tx'd and no longer owned */ skb_orphan(skb); @@ -2137,6 +2198,7 @@ static void iptfs_output_queued(struct xfrm_state *x, struct sk_buff_head *list) */ XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTERROR); + trace_iptfs_first_toobig(skb, mtu, 0, ip_hdr(skb)); kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); continue; } @@ -2183,6 +2245,7 @@ static void iptfs_output_queued(struct xfrm_state *x, struct sk_buff_head *list) * case. */ while ((skb2 = skb_peek(list))) { + trace_iptfs_ingress_nth_peek(skb2, remaining); if (skb2->len > remaining) break; @@ -2220,6 +2283,8 @@ static void iptfs_output_queued(struct xfrm_state *x, struct sk_buff_head *list) skb->len += skb2->len; remaining -= skb2->len; + trace_iptfs_ingress_nth_add(skb2, share_ok); + if (share_ok) { iptfs_consume_frags(skb, skb2); } else { @@ -2242,6 +2307,7 @@ static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me) struct sk_buff_head list; struct xfrm_iptfs_data *xtfs; struct xfrm_state *x; + time64_t settime; xtfs = container_of(me, typeof(*xtfs), iptfs_timer); x = xtfs->x; @@ -2258,6 +2324,7 @@ static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me) __skb_queue_head_init(&list); skb_queue_splice_init(&xtfs->queue, &list); xtfs->queue_size = 0; + settime = xtfs->iptfs_settime; spin_unlock(&x->lock); /* After the above unlock, packets can begin queuing again, and the @@ -2266,6 +2333,8 @@ static enum hrtimer_restart iptfs_delay_timer(struct hrtimer *me) * already). */ + trace_iptfs_timer_expire(xtfs, (unsigned long long)(ktime_get_raw_fast_ns() - settime)); + iptfs_output_queued(x, &list); return HRTIMER_NORESTART; From f9a5b34f9251cf530fecf08ef039be64ead8c459 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Thu, 5 Dec 2024 00:09:21 +0200 Subject: [PATCH 0136/1386] net/mlx5: ifc: Reorganize mlx5_ifc_flow_table_context_bits The nested union at the end is not in the same style as the rest of the code, so un-nest it to make the style uniformly applied again. Signed-off-by: Cosmin Ratiu Reviewed-by: Saeed Mahameed Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241204220931.254964-2-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4fbbcf35498b..f3650f989e68 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -6324,6 +6324,20 @@ struct mlx5_ifc_modify_other_hca_cap_in_bits { struct mlx5_ifc_other_hca_cap_bits other_capability; }; +struct mlx5_ifc_sw_owner_icm_root_params_bits { + u8 sw_owner_icm_root_1[0x40]; + + u8 sw_owner_icm_root_0[0x40]; +}; + +struct mlx5_ifc_rtc_params_bits { + u8 rtc_id_0[0x20]; + + u8 rtc_id_1[0x20]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_flow_table_context_bits { u8 reformat_en[0x1]; u8 decap_en[0x1]; @@ -6342,20 +6356,10 @@ struct mlx5_ifc_flow_table_context_bits { u8 lag_master_next_table_id[0x18]; u8 reserved_at_60[0x60]; + union { - struct { - u8 sw_owner_icm_root_1[0x40]; - - u8 sw_owner_icm_root_0[0x40]; - } sws; - struct { - u8 rtc_id_0[0x20]; - - u8 rtc_id_1[0x20]; - - u8 reserved_at_100[0x40]; - - } hws; + struct mlx5_ifc_sw_owner_icm_root_params_bits sws; + struct mlx5_ifc_rtc_params_bits hws; }; }; From e799ac9dd3c485a7cda3586f2a12784b030b9df0 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 5 Dec 2024 00:09:22 +0200 Subject: [PATCH 0137/1386] net/mlx5: Add ConnectX-8 device to ifc In preparation for ConnectX-8 SWS support, add enum for the new device type. Signed-off-by: Yevgeny Kliteynik Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241204220931.254964-3-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f3650f989e68..bd9b1833408e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1590,6 +1590,7 @@ enum { MLX5_STEERING_FORMAT_CONNECTX_5 = 0, MLX5_STEERING_FORMAT_CONNECTX_6DX = 1, MLX5_STEERING_FORMAT_CONNECTX_7 = 2, + MLX5_STEERING_FORMAT_CONNECTX_8 = 3, }; struct mlx5_ifc_cmd_hca_cap_bits { From 03713108e0cccf325bb71941edd9ed6122142907 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 5 Dec 2024 00:09:23 +0200 Subject: [PATCH 0138/1386] net/mlx5: Add support for new scheduling elements Introduce new scheduling elements in the E-Switch QoS hierarchy to enhance traffic management capabilities. This patch adds support for: - Rate Limit scheduling elements: Enables bandwidth limitation across multiple nodes without a shared ancestor, providing a mechanism for more granular control of bandwidth allocation. - Traffic Class Transmit Scheduling Arbiter (TSAR): Introduces the infrastructure for creating Traffic Class TSARs, allowing hierarchical arbitration based on traffic classes. - Traffic Class Arbiter TSAR: Adds support for a TSAR capable of managing arbitration between multiple traffic classes, enabling improved bandwidth prioritization and traffic management. No functional changes are introduced in this patch. Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241204220931.254964-4-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/rl.c | 4 ++++ include/linux/mlx5/mlx5_ifc.h | 14 +++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index e393391966e0..39a209b9b684 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -56,6 +56,8 @@ bool mlx5_qos_tsar_type_supported(struct mlx5_core_dev *dev, int type, u8 hierar return cap & TSAR_TYPE_CAP_MASK_ROUND_ROBIN; case TSAR_ELEMENT_TSAR_TYPE_ETS: return cap & TSAR_TYPE_CAP_MASK_ETS; + case TSAR_ELEMENT_TSAR_TYPE_TC_ARB: + return cap & TSAR_TYPE_CAP_MASK_TC_ARB; } return false; @@ -87,6 +89,8 @@ bool mlx5_qos_element_type_supported(struct mlx5_core_dev *dev, int type, u8 hie return cap & ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; case SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP: return cap & ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT: + return cap & ELEMENT_TYPE_CAP_MASK_RATE_LIMIT; } return false; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index bd9b1833408e..8b202521b774 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1103,7 +1103,8 @@ struct mlx5_ifc_qos_cap_bits { u8 packet_pacing_min_rate[0x20]; - u8 reserved_at_80[0x10]; + u8 reserved_at_80[0xb]; + u8 log_esw_max_rate_limit[0x5]; u8 packet_pacing_rate_table_size[0x10]; u8 esw_element_type[0x10]; @@ -4104,6 +4105,7 @@ enum { SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2, SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP = 0x4, + SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT = 0x5, }; enum { @@ -4112,22 +4114,26 @@ enum { ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2, ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3, ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP = 1 << 4, + ELEMENT_TYPE_CAP_MASK_RATE_LIMIT = 1 << 5, }; enum { TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, + TSAR_ELEMENT_TSAR_TYPE_TC_ARB = 0x3, }; enum { TSAR_TYPE_CAP_MASK_DWRR = 1 << 0, TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1, TSAR_TYPE_CAP_MASK_ETS = 1 << 2, + TSAR_TYPE_CAP_MASK_TC_ARB = 1 << 3, }; struct mlx5_ifc_tsar_element_bits { - u8 reserved_at_0[0x8]; + u8 traffic_class[0x4]; + u8 reserved_at_4[0x4]; u8 tsar_type[0x8]; u8 reserved_at_10[0x10]; }; @@ -4164,7 +4170,9 @@ struct mlx5_ifc_scheduling_context_bits { u8 max_average_bw[0x20]; - u8 reserved_at_e0[0x120]; + u8 max_bw_obj_id[0x20]; + + u8 reserved_at_100[0x100]; }; struct mlx5_ifc_rqtc_bits { From f09ed834a946f9c77088d53af4d4806974728d7b Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Thu, 5 Dec 2024 00:09:24 +0200 Subject: [PATCH 0139/1386] net/mlx5: qos: Add ifc support for cross-esw scheduling This adds the capability bit and the vport element fields related to cross-esw scheduling. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241204220931.254964-5-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8b202521b774..5451ff1d4356 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1095,7 +1095,9 @@ struct mlx5_ifc_qos_cap_bits { u8 log_esw_max_sched_depth[0x4]; u8 reserved_at_10[0x10]; - u8 reserved_at_20[0xb]; + u8 reserved_at_20[0x9]; + u8 esw_cross_esw_sched[0x1]; + u8 reserved_at_2a[0x1]; u8 log_max_qos_nic_queue_group[0x5]; u8 reserved_at_30[0x10]; @@ -4139,13 +4141,16 @@ struct mlx5_ifc_tsar_element_bits { }; struct mlx5_ifc_vport_element_bits { - u8 reserved_at_0[0x10]; + u8 reserved_at_0[0x4]; + u8 eswitch_owner_vhca_id_valid[0x1]; + u8 eswitch_owner_vhca_id[0xb]; u8 vport_number[0x10]; }; struct mlx5_ifc_vport_tc_element_bits { u8 traffic_class[0x4]; - u8 reserved_at_4[0xc]; + u8 eswitch_owner_vhca_id_valid[0x1]; + u8 eswitch_owner_vhca_id[0xb]; u8 vport_number[0x10]; }; From 0600cf40e9b36fe17f9c9f04d4f9cef249eaa5e7 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 3 Dec 2024 13:49:42 +0100 Subject: [PATCH 0140/1386] include: net: add static inline dst_dev_overhead() to dst.h Add static inline dst_dev_overhead() function to include/net/dst.h. This helper function is used by ioam6_iptunnel, rpl_iptunnel and seg6_iptunnel to get the dev's overhead based on a cache entry (dst_entry). If the cache is empty, the default and generic value skb->mac_len is returned. Otherwise, LL_RESERVED_SPACE() over dst's dev is returned. Signed-off-by: Justin Iurman Cc: Alexander Lobakin Cc: Vadim Fedorenko Signed-off-by: Paolo Abeni --- include/net/dst.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/net/dst.h b/include/net/dst.h index 0f303cc60252..08647c99d79c 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -440,6 +440,15 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout) dst->expires = expires; } +static inline unsigned int dst_dev_overhead(struct dst_entry *dst, + struct sk_buff *skb) +{ + if (likely(dst)) + return LL_RESERVED_SPACE(dst->dev); + + return skb->mac_len; +} + INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *, From dce525185bc92864e5a318040285ee070563fe34 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 3 Dec 2024 13:49:43 +0100 Subject: [PATCH 0141/1386] net: ipv6: ioam6_iptunnel: mitigate 2-realloc issue This patch mitigates the two-reallocations issue with ioam6_iptunnel by providing the dst_entry (in the cache) to the first call to skb_cow_head(). As a result, the very first iteration may still trigger two reallocations (i.e., empty cache), while next iterations would only trigger a single reallocation. Performance tests before/after applying this patch, which clearly shows the improvement: - inline mode: - before: https://ibb.co/LhQ8V63 - after: https://ibb.co/x5YT2bS - encap mode: - before: https://ibb.co/3Cjm5m0 - after: https://ibb.co/TwpsxTC - encap mode with tunsrc: - before: https://ibb.co/Gpy9QPg - after: https://ibb.co/PW1bZFT This patch also fixes an incorrect behavior: after the insertion, the second call to skb_cow_head() makes sure that the dev has enough headroom in the skb for layer 2 and stuff. In that case, the "old" dst_entry was used, which is now fixed. After discussing with Paolo, it appears that both patches can be merged into a single one -this one- (for the sake of readability) and target net-next. Signed-off-by: Justin Iurman Signed-off-by: Paolo Abeni --- net/ipv6/ioam6_iptunnel.c | 75 ++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 37 deletions(-) diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 9d8422e350f8..28e5a89dc255 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -253,14 +253,15 @@ static int ioam6_do_fill(struct net *net, struct sk_buff *skb) } static int ioam6_do_inline(struct net *net, struct sk_buff *skb, - struct ioam6_lwt_encap *tuninfo) + struct ioam6_lwt_encap *tuninfo, + struct dst_entry *cache_dst) { struct ipv6hdr *oldhdr, *hdr; int hdrlen, err; hdrlen = (tuninfo->eh.hdrlen + 1) << 3; - err = skb_cow_head(skb, hdrlen + skb->mac_len); + err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb)); if (unlikely(err)) return err; @@ -291,7 +292,8 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo, bool has_tunsrc, struct in6_addr *tunsrc, - struct in6_addr *tundst) + struct in6_addr *tundst, + struct dst_entry *cache_dst) { struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr, *inner_hdr; @@ -300,7 +302,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, hdrlen = (tuninfo->eh.hdrlen + 1) << 3; len = sizeof(*hdr) + hdrlen; - err = skb_cow_head(skb, len + skb->mac_len); + err = skb_cow_head(skb, len + dst_dev_overhead(cache_dst, skb)); if (unlikely(err)) return err; @@ -334,7 +336,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb); + struct dst_entry *dst = skb_dst(skb), *cache_dst; struct in6_addr orig_daddr; struct ioam6_lwt *ilwt; int err = -EINVAL; @@ -352,6 +354,10 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) orig_daddr = ipv6_hdr(skb)->daddr; + local_bh_disable(); + cache_dst = dst_cache_get(&ilwt->cache); + local_bh_enable(); + switch (ilwt->mode) { case IOAM6_IPTUNNEL_MODE_INLINE: do_inline: @@ -359,7 +365,7 @@ do_inline: if (ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP) goto out; - err = ioam6_do_inline(net, skb, &ilwt->tuninfo); + err = ioam6_do_inline(net, skb, &ilwt->tuninfo, cache_dst); if (unlikely(err)) goto drop; @@ -369,7 +375,7 @@ do_encap: /* Encapsulation (ip6ip6) */ err = ioam6_do_encap(net, skb, &ilwt->tuninfo, ilwt->has_tunsrc, &ilwt->tunsrc, - &ilwt->tundst); + &ilwt->tundst, cache_dst); if (unlikely(err)) goto drop; @@ -387,41 +393,36 @@ do_encap: goto drop; } - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); - if (unlikely(err)) - goto drop; + if (unlikely(!cache_dst)) { + struct ipv6hdr *hdr = ipv6_hdr(skb); + struct flowi6 fl6; - if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) { - local_bh_disable(); - dst = dst_cache_get(&ilwt->cache); - local_bh_enable(); + memset(&fl6, 0, sizeof(fl6)); + fl6.daddr = hdr->daddr; + fl6.saddr = hdr->saddr; + fl6.flowlabel = ip6_flowinfo(hdr); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = hdr->nexthdr; - if (unlikely(!dst)) { - struct ipv6hdr *hdr = ipv6_hdr(skb); - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - fl6.daddr = hdr->daddr; - fl6.saddr = hdr->saddr; - fl6.flowlabel = ip6_flowinfo(hdr); - fl6.flowi6_mark = skb->mark; - fl6.flowi6_proto = hdr->nexthdr; - - dst = ip6_route_output(net, NULL, &fl6); - if (dst->error) { - err = dst->error; - dst_release(dst); - goto drop; - } - - local_bh_disable(); - dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); - local_bh_enable(); + cache_dst = ip6_route_output(net, NULL, &fl6); + if (cache_dst->error) { + err = cache_dst->error; + dst_release(cache_dst); + goto drop; } - skb_dst_drop(skb); - skb_dst_set(skb, dst); + local_bh_disable(); + dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); + local_bh_enable(); + err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev)); + if (unlikely(err)) + goto drop; + } + + if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) { + skb_dst_drop(skb); + skb_dst_set(skb, cache_dst); return dst_output(net, sk, skb); } out: From 40475b63761abb6f8fdef960d03228a08662c9c4 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 3 Dec 2024 13:49:44 +0100 Subject: [PATCH 0142/1386] net: ipv6: seg6_iptunnel: mitigate 2-realloc issue This patch mitigates the two-reallocations issue with seg6_iptunnel by providing the dst_entry (in the cache) to the first call to skb_cow_head(). As a result, the very first iteration would still trigger two reallocations (i.e., empty cache), while next iterations would only trigger a single reallocation. Performance tests before/after applying this patch, which clearly shows the improvement: - before: https://ibb.co/3Cg4sNH - after: https://ibb.co/8rQ350r Signed-off-by: Justin Iurman Cc: David Lebrun Signed-off-by: Paolo Abeni --- net/ipv6/seg6_iptunnel.c | 85 ++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 33 deletions(-) diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 098632adc9b5..4bf937bfc263 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -124,8 +124,8 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb, return flowlabel; } -/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ -int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) +static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, + int proto, struct dst_entry *cache_dst) { struct dst_entry *dst = skb_dst(skb); struct net *net = dev_net(dst->dev); @@ -137,7 +137,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) hdrlen = (osrh->hdrlen + 1) << 3; tot_len = hdrlen + sizeof(*hdr); - err = skb_cow_head(skb, tot_len + skb->mac_len); + err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb)); if (unlikely(err)) return err; @@ -197,11 +197,18 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) return 0; } + +/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ +int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) +{ + return __seg6_do_srh_encap(skb, osrh, proto, NULL); +} EXPORT_SYMBOL_GPL(seg6_do_srh_encap); /* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */ static int seg6_do_srh_encap_red(struct sk_buff *skb, - struct ipv6_sr_hdr *osrh, int proto) + struct ipv6_sr_hdr *osrh, int proto, + struct dst_entry *cache_dst) { __u8 first_seg = osrh->first_segment; struct dst_entry *dst = skb_dst(skb); @@ -230,7 +237,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb, tot_len = red_hdrlen + sizeof(struct ipv6hdr); - err = skb_cow_head(skb, tot_len + skb->mac_len); + err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb)); if (unlikely(err)) return err; @@ -317,8 +324,8 @@ out: return 0; } -/* insert an SRH within an IPv6 packet, just after the IPv6 header */ -int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +static int __seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, + struct dst_entry *cache_dst) { struct ipv6hdr *hdr, *oldhdr; struct ipv6_sr_hdr *isrh; @@ -326,7 +333,7 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) hdrlen = (osrh->hdrlen + 1) << 3; - err = skb_cow_head(skb, hdrlen + skb->mac_len); + err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb)); if (unlikely(err)) return err; @@ -369,9 +376,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) return 0; } -EXPORT_SYMBOL_GPL(seg6_do_srh_inline); -static int seg6_do_srh(struct sk_buff *skb) +static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst) { struct dst_entry *dst = skb_dst(skb); struct seg6_iptunnel_encap *tinfo; @@ -384,7 +390,7 @@ static int seg6_do_srh(struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IPV6)) return -EINVAL; - err = seg6_do_srh_inline(skb, tinfo->srh); + err = __seg6_do_srh_inline(skb, tinfo->srh, cache_dst); if (err) return err; break; @@ -402,9 +408,11 @@ static int seg6_do_srh(struct sk_buff *skb) return -EINVAL; if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP) - err = seg6_do_srh_encap(skb, tinfo->srh, proto); + err = __seg6_do_srh_encap(skb, tinfo->srh, + proto, cache_dst); else - err = seg6_do_srh_encap_red(skb, tinfo->srh, proto); + err = seg6_do_srh_encap_red(skb, tinfo->srh, + proto, cache_dst); if (err) return err; @@ -425,11 +433,13 @@ static int seg6_do_srh(struct sk_buff *skb) skb_push(skb, skb->mac_len); if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP) - err = seg6_do_srh_encap(skb, tinfo->srh, - IPPROTO_ETHERNET); + err = __seg6_do_srh_encap(skb, tinfo->srh, + IPPROTO_ETHERNET, + cache_dst); else err = seg6_do_srh_encap_red(skb, tinfo->srh, - IPPROTO_ETHERNET); + IPPROTO_ETHERNET, + cache_dst); if (err) return err; @@ -444,6 +454,13 @@ static int seg6_do_srh(struct sk_buff *skb) return 0; } +/* insert an SRH within an IPv6 packet, just after the IPv6 header */ +int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +{ + return __seg6_do_srh_inline(skb, osrh, NULL); +} +EXPORT_SYMBOL_GPL(seg6_do_srh_inline); + static int seg6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -458,31 +475,33 @@ static int seg6_input_core(struct net *net, struct sock *sk, struct seg6_lwt *slwt; int err; - err = seg6_do_srh(skb); - if (unlikely(err)) - goto drop; - slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); local_bh_disable(); dst = dst_cache_get(&slwt->cache); + local_bh_enable(); + + err = seg6_do_srh(skb, dst); + if (unlikely(err)) + goto drop; if (!dst) { ip6_route_input(skb); dst = skb_dst(skb); if (!dst->error) { + local_bh_disable(); dst_cache_set_ip6(&slwt->cache, dst, &ipv6_hdr(skb)->saddr); + local_bh_enable(); } + + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + if (unlikely(err)) + goto drop; } else { skb_dst_drop(skb); skb_dst_set(skb, dst); } - local_bh_enable(); - - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); - if (unlikely(err)) - goto drop; if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, @@ -528,16 +547,16 @@ static int seg6_output_core(struct net *net, struct sock *sk, struct seg6_lwt *slwt; int err; - err = seg6_do_srh(skb); - if (unlikely(err)) - goto drop; - slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); local_bh_disable(); dst = dst_cache_get(&slwt->cache); local_bh_enable(); + err = seg6_do_srh(skb, dst); + if (unlikely(err)) + goto drop; + if (unlikely(!dst)) { struct ipv6hdr *hdr = ipv6_hdr(skb); struct flowi6 fl6; @@ -559,15 +578,15 @@ static int seg6_output_core(struct net *net, struct sock *sk, local_bh_disable(); dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); local_bh_enable(); + + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + if (unlikely(err)) + goto drop; } skb_dst_drop(skb); skb_dst_set(skb, dst); - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); - if (unlikely(err)) - goto drop; - if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); From 985ec6f5e6235242191370628acb73d7a9f0c0ea Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 3 Dec 2024 13:49:45 +0100 Subject: [PATCH 0143/1386] net: ipv6: rpl_iptunnel: mitigate 2-realloc issue This patch mitigates the two-reallocations issue with rpl_iptunnel by providing the dst_entry (in the cache) to the first call to skb_cow_head(). As a result, the very first iteration would still trigger two reallocations (i.e., empty cache), while next iterations would only trigger a single reallocation. Performance tests before/after applying this patch, which clearly shows there is no impact (it even shows improvement): - before: https://ibb.co/nQJhqwc - after: https://ibb.co/4ZvW6wV Signed-off-by: Justin Iurman Cc: Alexander Aring Signed-off-by: Paolo Abeni --- net/ipv6/rpl_iptunnel.c | 46 ++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index db3c19a42e1c..7ba22d2f2bfe 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -125,7 +125,8 @@ static void rpl_destroy_state(struct lwtunnel_state *lwt) } static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt, - const struct ipv6_rpl_sr_hdr *srh) + const struct ipv6_rpl_sr_hdr *srh, + struct dst_entry *cache_dst) { struct ipv6_rpl_sr_hdr *isrh, *csrh; const struct ipv6hdr *oldhdr; @@ -153,7 +154,7 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt, hdrlen = ((csrh->hdrlen + 1) << 3); - err = skb_cow_head(skb, hdrlen + skb->mac_len); + err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb)); if (unlikely(err)) { kfree(buf); return err; @@ -186,7 +187,8 @@ static int rpl_do_srh_inline(struct sk_buff *skb, const struct rpl_lwt *rlwt, return 0; } -static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt) +static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt, + struct dst_entry *cache_dst) { struct dst_entry *dst = skb_dst(skb); struct rpl_iptunnel_encap *tinfo; @@ -196,7 +198,7 @@ static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt) tinfo = rpl_encap_lwtunnel(dst->lwtstate); - return rpl_do_srh_inline(skb, rlwt, tinfo->srh); + return rpl_do_srh_inline(skb, rlwt, tinfo->srh, cache_dst); } static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -208,14 +210,14 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate); - err = rpl_do_srh(skb, rlwt); - if (unlikely(err)) - goto drop; - local_bh_disable(); dst = dst_cache_get(&rlwt->cache); local_bh_enable(); + err = rpl_do_srh(skb, rlwt, dst); + if (unlikely(err)) + goto drop; + if (unlikely(!dst)) { struct ipv6hdr *hdr = ipv6_hdr(skb); struct flowi6 fl6; @@ -237,15 +239,15 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) local_bh_disable(); dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); local_bh_enable(); + + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + if (unlikely(err)) + goto drop; } skb_dst_drop(skb); skb_dst_set(skb, dst); - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); - if (unlikely(err)) - goto drop; - return dst_output(net, sk, skb); drop: @@ -262,29 +264,31 @@ static int rpl_input(struct sk_buff *skb) rlwt = rpl_lwt_lwtunnel(orig_dst->lwtstate); - err = rpl_do_srh(skb, rlwt); - if (unlikely(err)) - goto drop; - local_bh_disable(); dst = dst_cache_get(&rlwt->cache); + local_bh_enable(); + + err = rpl_do_srh(skb, rlwt, dst); + if (unlikely(err)) + goto drop; if (!dst) { ip6_route_input(skb); dst = skb_dst(skb); if (!dst->error) { + local_bh_disable(); dst_cache_set_ip6(&rlwt->cache, dst, &ipv6_hdr(skb)->saddr); + local_bh_enable(); } + + err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); + if (unlikely(err)) + goto drop; } else { skb_dst_drop(skb); skb_dst_set(skb, dst); } - local_bh_enable(); - - err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); - if (unlikely(err)) - goto drop; return dst_input(skb); From 523d3cc4b6d1ae18bfa516345d48332d455181e6 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:42 -0800 Subject: [PATCH 0144/1386] ynl: support enum-cnt-name attribute in legacy definitions This is similar to existing attr-cnt-name in the attributes to allow changing the name of the 'count' enum entry. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-2-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- Documentation/netlink/genetlink-c.yaml | 3 +++ Documentation/netlink/genetlink-legacy.yaml | 3 +++ Documentation/userspace-api/netlink/c-code-gen.rst | 4 +++- tools/net/ynl/ynl-gen-c.py | 8 ++++++-- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index 4f803eaac6d8..9660ffb1ed6a 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -106,6 +106,9 @@ properties: name-prefix: description: For enum the prefix of the values, optional. type: string + enum-cnt-name: + description: Name of the render-max counter enum entry. + type: string # End genetlink-c attribute-sets: diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index 8db0e22fa72c..16380e12cabe 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -117,6 +117,9 @@ properties: name-prefix: description: For enum the prefix of the values, optional. type: string + enum-cnt-name: + description: Name of the render-max counter enum entry. + type: string # End genetlink-c # Start genetlink-legacy members: diff --git a/Documentation/userspace-api/netlink/c-code-gen.rst b/Documentation/userspace-api/netlink/c-code-gen.rst index 89de42c13350..46415e6d646d 100644 --- a/Documentation/userspace-api/netlink/c-code-gen.rst +++ b/Documentation/userspace-api/netlink/c-code-gen.rst @@ -56,7 +56,9 @@ If ``name-prefix`` is specified it replaces the ``$family-$enum`` portion of the entry name. Boolean ``render-max`` controls creation of the max values -(which are enabled by default for attribute enums). +(which are enabled by default for attribute enums). These max +values are named ``__$pfx-MAX`` and ``$pfx-MAX``. The name +of the first value can be overridden via ``enum-cnt-name`` property. Attributes ========== diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index d8201c4b1520..bfe95826ae3e 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -801,6 +801,7 @@ class EnumSet(SpecEnumSet): self.user_type = 'int' self.value_pfx = yaml.get('name-prefix', f"{family.ident_name}-{yaml['name']}-") + self.enum_cnt_name = yaml.get('enum-cnt-name', None) super().__init__(family, yaml) @@ -2472,9 +2473,12 @@ def render_uapi(family, cw): max_val = f' = {enum.get_mask()},' cw.p(max_name + max_val) else: + cnt_name = enum.enum_cnt_name max_name = c_upper(name_pfx + 'max') - cw.p('__' + max_name + ',') - cw.p(max_name + ' = (__' + max_name + ' - 1)') + if not cnt_name: + cnt_name = '__' + name_pfx + 'max' + cw.p(c_upper(cnt_name) + ',') + cw.p(max_name + ' = (' + c_upper(cnt_name) + ' - 1)') cw.block_end(line=';') cw.nl() elif const['type'] == 'const': From 8c843ecde4e49e11063ad942675246ec685ea19a Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:43 -0800 Subject: [PATCH 0145/1386] ynl: skip rendering attributes with header property in uapi mode To allow omitting some of the attributes in the final generated file. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-3-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index bfe95826ae3e..79829ce39139 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -801,6 +801,7 @@ class EnumSet(SpecEnumSet): self.user_type = 'int' self.value_pfx = yaml.get('name-prefix', f"{family.ident_name}-{yaml['name']}-") + self.header = yaml.get('header', None) self.enum_cnt_name = yaml.get('enum-cnt-name', None) super().__init__(family, yaml) @@ -2441,6 +2442,9 @@ def render_uapi(family, cw): if const['type'] == 'enum' or const['type'] == 'flags': enum = family.consts[const['name']] + if enum.header: + continue + if enum.has_doc(): if enum.has_entry_doc(): cw.p('/**') From 56881d07f0b4cb97f3c460dc3908eee91fc51a17 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:44 -0800 Subject: [PATCH 0146/1386] ynl: support directional specs in ynl-gen-c.py The intent is to generate ethtool uapi headers. For now, some of the things are hard-coded: - _MSG_{USER,KERNEL}_MAX - the split between USER and KERNEL messages Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-4-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 118 +++++++++++++++++++++++++++---------- 1 file changed, 87 insertions(+), 31 deletions(-) diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 79829ce39139..2bf4d992e54a 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2419,6 +2419,87 @@ def uapi_enum_start(family, cw, obj, ckey='', enum_name='enum-name'): cw.block_start(line=start_line) +def render_uapi_unified(family, cw, max_by_define, separate_ntf): + max_name = c_upper(family.get('cmd-max-name', f"{family.op_prefix}MAX")) + cnt_name = c_upper(family.get('cmd-cnt-name', f"__{family.op_prefix}MAX")) + max_value = f"({cnt_name} - 1)" + + uapi_enum_start(family, cw, family['operations'], 'enum-name') + val = 0 + for op in family.msgs.values(): + if separate_ntf and ('notify' in op or 'event' in op): + continue + + suffix = ',' + if op.value != val: + suffix = f" = {op.value}," + val = op.value + cw.p(op.enum_name + suffix) + val += 1 + cw.nl() + cw.p(cnt_name + ('' if max_by_define else ',')) + if not max_by_define: + cw.p(f"{max_name} = {max_value}") + cw.block_end(line=';') + if max_by_define: + cw.p(f"#define {max_name} {max_value}") + cw.nl() + + +def render_uapi_directional(family, cw, max_by_define): + max_name = f"{family.op_prefix}USER_MAX" + cnt_name = f"__{family.op_prefix}USER_CNT" + max_value = f"({cnt_name} - 1)" + + cw.block_start(line='enum') + cw.p(c_upper(f'{family.name}_MSG_USER_NONE = 0,')) + val = 0 + for op in family.msgs.values(): + if 'do' in op and 'event' not in op: + suffix = ',' + if op.value and op.value != val: + suffix = f" = {op.value}," + val = op.value + cw.p(op.enum_name + suffix) + val += 1 + cw.nl() + cw.p(cnt_name + ('' if max_by_define else ',')) + if not max_by_define: + cw.p(f"{max_name} = {max_value}") + cw.block_end(line=';') + if max_by_define: + cw.p(f"#define {max_name} {max_value}") + cw.nl() + + max_name = f"{family.op_prefix}KERNEL_MAX" + cnt_name = f"__{family.op_prefix}KERNEL_CNT" + max_value = f"({cnt_name} - 1)" + + cw.block_start(line='enum') + cw.p(c_upper(f'{family.name}_MSG_KERNEL_NONE = 0,')) + val = 0 + for op in family.msgs.values(): + if ('do' in op and 'reply' in op['do']) or 'notify' in op or 'event' in op: + enum_name = op.enum_name + if 'event' not in op and 'notify' not in op: + enum_name = f'{enum_name}_REPLY' + + suffix = ',' + if op.value and op.value != val: + suffix = f" = {op.value}," + val = op.value + cw.p(enum_name + suffix) + val += 1 + cw.nl() + cw.p(cnt_name + ('' if max_by_define else ',')) + if not max_by_define: + cw.p(f"{max_name} = {max_value}") + cw.block_end(line=';') + if max_by_define: + cw.p(f"#define {max_name} {max_value}") + cw.nl() + + def render_uapi(family, cw): hdr_prot = f"_UAPI_LINUX_{c_upper(family.uapi_header_name)}_H" hdr_prot = hdr_prot.replace('/', '_') @@ -2523,30 +2604,12 @@ def render_uapi(family, cw): # Commands separate_ntf = 'async-prefix' in family['operations'] - max_name = c_upper(family.get('cmd-max-name', f"{family.op_prefix}MAX")) - cnt_name = c_upper(family.get('cmd-cnt-name', f"__{family.op_prefix}MAX")) - max_value = f"({cnt_name} - 1)" - - uapi_enum_start(family, cw, family['operations'], 'enum-name') - val = 0 - for op in family.msgs.values(): - if separate_ntf and ('notify' in op or 'event' in op): - continue - - suffix = ',' - if op.value != val: - suffix = f" = {op.value}," - val = op.value - cw.p(op.enum_name + suffix) - val += 1 - cw.nl() - cw.p(cnt_name + ('' if max_by_define else ',')) - if not max_by_define: - cw.p(f"{max_name} = {max_value}") - cw.block_end(line=';') - if max_by_define: - cw.p(f"#define {max_name} {max_value}") - cw.nl() + if family.msg_id_model == 'unified': + render_uapi_unified(family, cw, max_by_define, separate_ntf) + elif family.msg_id_model == 'directional': + render_uapi_directional(family, cw, max_by_define) + else: + raise Exception(f'Unsupported message enum-model {family.msg_id_model}') if separate_ntf: uapi_enum_start(family, cw, family['operations'], enum_name='async-enum') @@ -2670,13 +2733,6 @@ def main(): os.sys.exit(1) return - supported_models = ['unified'] - if args.mode in ['user', 'kernel']: - supported_models += ['directional'] - if parsed.msg_id_model not in supported_models: - print(f'Message enum-model {parsed.msg_id_model} not supported for {args.mode} generation') - os.sys.exit(1) - cw = CodeWriter(BaseNlLib(), args.out_file, overwrite=(not args.cmp_out)) _, spec_kernel = find_kernel_root(args.spec) From 0187e602c03c876d69cf955dd438bc7fea8c8fd3 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:45 -0800 Subject: [PATCH 0147/1386] ynl: add missing pieces to ethtool spec to better match uapi header - __ETHTOOL_UDP_TUNNEL_TYPE_CNT and render max - skip rendering stringset (empty enum) - skip rendering c33-pse-ext-state (defined in ethtool.h) - rename header flags to ethtool-flag- - add attr-cnt-name to each attribute to use XXX_CNT instead of XXX_MAX - add unspec 0 entry to each attribute - carry some doc entries from the existing header - tcp-header-split Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-5-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 358 ++++++++++++++++++++++- 1 file changed, 346 insertions(+), 12 deletions(-) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 93369f0eb816..c7634e957d9c 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -5,6 +5,7 @@ name: ethtool protocol: genetlink-legacy doc: Partial family for Ethtool Netlink. +uapi-header: linux/ethtool_netlink_generated.h definitions: - @@ -12,43 +13,99 @@ definitions: enum-name: type: enum entries: [ vxlan, geneve, vxlan-gpe ] + enum-cnt-name: __ethtool-udp-tunnel-type-cnt + render-max: true - name: stringset type: enum entries: [] + header: linux/ethtool.h # skip rendering, no actual definition - name: header-flags type: flags - entries: [ compact-bitsets, omit-reply, stats ] + name-prefix: ethtool-flag- + doc: common ethtool header flags + entries: + - + name: compact-bitsets + doc: use compact bitsets in reply + - + name: omit-reply + doc: provide optional reply for SET or ACT requests + - + name: stats + doc: request statistics, if supported by the driver - name: module-fw-flash-status type: enum - entries: [ started, in_progress, completed, error ] + doc: plug-in module firmware flashing status + header: linux/ethtool.h + entries: + - + name: started + doc: The firmware flashing process has started. + - + name: in_progress + doc: The firmware flashing process is in progress. + - + name: completed + doc: The firmware flashing process was completed successfully. + - + name: error + doc: The firmware flashing process was stopped due to an error. - name: c33-pse-ext-state - enum-name: + doc: "groups of PSE extended states functions. IEEE 802.3-2022 33.2.4.4 Variables" type: enum name-prefix: ethtool-c33-pse-ext-state- + header: linux/ethtool.h entries: - - none - - error-condition - - mr-mps-valid - - mr-pse-enable - - option-detect-ted - - option-vport-lim - - ovld-detected - - power-not-available - - short-detected + - + name: none + doc: none + - + name: error-condition + doc: Group of error_condition states + - + name: mr-mps-valid + doc: Group of mr_mps_valid states + - + name: mr-pse-enable + doc: Group of mr_pse_enable states + - + name: option-detect-ted + doc: Group of option_detect_ted states + - + name: option-vport-lim + doc: Group of option_vport_lim states + - + name: ovld-detected + doc: Group of ovld_detected states + - + name: power-not-available + doc: Group of power_not_available states + - + name: short-detected + doc: Group of short_detected states - name: phy-upstream-type enum-name: type: enum entries: [ mac, phy ] + - + name: tcp-data-split + type: enum + entries: [ unknown, disabled, enabled ] attribute-sets: - name: header + attr-cnt-name: __ethtool-a-header-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: dev-index type: u32 @@ -65,7 +122,12 @@ attribute-sets: - name: bitset-bit + attr-cnt-name: __ethtool-a-bitset-bit-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: index type: u32 @@ -77,7 +139,12 @@ attribute-sets: type: flag - name: bitset-bits + attr-cnt-name: __ethtool-a-bitset-bits-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: bit type: nest @@ -85,7 +152,12 @@ attribute-sets: nested-attributes: bitset-bit - name: bitset + attr-cnt-name: __ethtool-a-bitset-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: nomask type: flag @@ -104,7 +176,12 @@ attribute-sets: type: binary - name: string + attr-cnt-name: __ethtool-a-string-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: index type: u32 @@ -113,7 +190,16 @@ attribute-sets: type: string - name: strings + attr-cnt-name: __ethtool-a-strings-cnt attributes: + - + name: unspec + type: unused + value: 0 + - + name: unspec + type: unused + value: 0 - name: string type: nest @@ -121,7 +207,12 @@ attribute-sets: nested-attributes: string - name: stringset + attr-cnt-name: __ethtool-a-stringset-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: id type: u32 @@ -135,7 +226,12 @@ attribute-sets: nested-attributes: strings - name: stringsets + attr-cnt-name: __ethtool-a-stringsets-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: stringset type: nest @@ -143,7 +239,12 @@ attribute-sets: nested-attributes: stringset - name: strset + attr-cnt-name: __ethtool-a-strset-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -158,7 +259,12 @@ attribute-sets: - name: privflags + attr-cnt-name: __ethtool-a-privflags-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -170,7 +276,12 @@ attribute-sets: - name: rings + attr-cnt-name: __ethtool-a-rings-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -205,6 +316,7 @@ attribute-sets: - name: tcp-data-split type: u8 + enum: tcp-data-split - name: cqe-size type: u32 @@ -223,31 +335,48 @@ attribute-sets: - name: mm-stat + attr-cnt-name: __ethtool-a-mm-stat-cnt + doc: MAC Merge (802.3) attributes: + - + name: unspec + type: unused + value: 0 - name: pad type: pad - name: reassembly-errors + doc: aMACMergeFrameAssErrorCount type: u64 - name: smd-errors + doc: aMACMergeFrameSmdErrorCount type: u64 - name: reassembly-ok + doc: aMACMergeFrameAssOkCount type: u64 - name: rx-frag-count + doc: aMACMergeFragCountRx type: u64 - name: tx-frag-count + doc: aMACMergeFragCountTx type: u64 - name: hold-count + doc: aMACMergeHoldCount type: u64 - name: mm + attr-cnt-name: __ethtool-a-mm-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -285,7 +414,12 @@ attribute-sets: nested-attributes: mm-stat - name: linkinfo + attr-cnt-name: __ethtool-a-linkinfo-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -307,7 +441,12 @@ attribute-sets: type: u8 - name: linkmodes + attr-cnt-name: __ethtool-a-linkmodes-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -343,7 +482,12 @@ attribute-sets: type: u8 - name: linkstate + attr-cnt-name: __ethtool-a-linkstate-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -368,7 +512,12 @@ attribute-sets: type: u32 - name: debug + attr-cnt-name: __ethtool-a-debug-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -379,7 +528,12 @@ attribute-sets: nested-attributes: bitset - name: wol + attr-cnt-name: __ethtool-a-wol-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -393,7 +547,12 @@ attribute-sets: type: binary - name: features + attr-cnt-name: __ethtool-a-features-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -416,7 +575,12 @@ attribute-sets: nested-attributes: bitset - name: channels + attr-cnt-name: __ethtool-a-channels-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -448,7 +612,12 @@ attribute-sets: - name: irq-moderation + attr-cnt-name: __ethtool-a-irq-moderation-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: usec type: u32 @@ -460,7 +629,12 @@ attribute-sets: type: u32 - name: profile + attr-cnt-name: __ethtool-a-profile-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: irq-moderation type: nest @@ -468,7 +642,12 @@ attribute-sets: nested-attributes: irq-moderation - name: coalesce + attr-cnt-name: __ethtool-a-coalesce-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -565,7 +744,12 @@ attribute-sets: - name: pause-stat + attr-cnt-name: __ethtool-a-pause-stat-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: pad type: pad @@ -577,7 +761,12 @@ attribute-sets: type: u64 - name: pause + attr-cnt-name: __ethtool-a-pause-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -600,7 +789,12 @@ attribute-sets: type: u32 - name: eee + attr-cnt-name: __ethtool-a-eee-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -627,7 +821,12 @@ attribute-sets: type: u32 - name: ts-stat + attr-cnt-name: __ethtool-a-ts-stat-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: tx-pkts type: uint @@ -639,7 +838,12 @@ attribute-sets: type: uint - name: tsinfo + attr-cnt-name: __ethtool-a-tsinfo-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -665,19 +869,32 @@ attribute-sets: nested-attributes: ts-stat - name: cable-result + attr-cnt-name: __ethtool-a-cable-result-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: pair + doc: ETHTOOL_A_CABLE_PAIR type: u8 - name: code + doc: ETHTOOL_A_CABLE_RESULT_CODE type: u8 - name: src + doc: ETHTOOL_A_CABLE_INF_SRC type: u32 - name: cable-fault-length + attr-cnt-name: __ethtool-a-cable-fault-length-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: pair type: u8 @@ -689,7 +906,12 @@ attribute-sets: type: u32 - name: cable-nest + attr-cnt-name: __ethtool-a-cable-nest-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: result type: nest @@ -700,20 +922,31 @@ attribute-sets: nested-attributes: cable-fault-length - name: cable-test + attr-cnt-name: __ethtool-a-cable-test-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest nested-attributes: header - name: cable-test-ntf + attr-cnt-name: __ethtool-a-cable-test-ntf-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest nested-attributes: header - name: status + doc: _STARTED/_COMPLETE type: u8 - name: nest @@ -721,7 +954,12 @@ attribute-sets: nested-attributes: cable-nest - name: cable-test-tdr-cfg + attr-cnt-name: __ethtool-a-cable-test-tdr-cfg-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: first type: u32 @@ -736,7 +974,12 @@ attribute-sets: type: u8 - name: cable-test-tdr-ntf + attr-cnt-name: __ethtool-a-cable-test-tdr-ntf-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -750,7 +993,12 @@ attribute-sets: nested-attributes: cable-nest - name: cable-test-tdr + attr-cnt-name: __ethtool-a-cable-test-tdr-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -761,7 +1009,12 @@ attribute-sets: nested-attributes: cable-test-tdr-cfg - name: tunnel-udp-entry + attr-cnt-name: __ethtool-a-tunnel-udp-entry-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: port type: u16 @@ -772,7 +1025,12 @@ attribute-sets: enum: udp-tunnel-type - name: tunnel-udp-table + attr-cnt-name: __ethtool-a-tunnel-udp-table-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: size type: u32 @@ -787,14 +1045,24 @@ attribute-sets: nested-attributes: tunnel-udp-entry - name: tunnel-udp + attr-cnt-name: __ethtool-a-tunnel-udp-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: table type: nest nested-attributes: tunnel-udp-table - name: tunnel-info + attr-cnt-name: __ethtool-a-tunnel-info-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -805,7 +1073,12 @@ attribute-sets: nested-attributes: tunnel-udp - name: fec-stat + attr-cnt-name: __ethtool-a-fec-stat-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: pad type: pad @@ -823,7 +1096,12 @@ attribute-sets: sub-type: u64 - name: fec + attr-cnt-name: __ethtool-a-fec-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -844,7 +1122,12 @@ attribute-sets: nested-attributes: fec-stat - name: module-eeprom + attr-cnt-name: __ethtool-a-module-eeprom-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -869,7 +1152,12 @@ attribute-sets: type: binary - name: stats-grp + attr-cnt-name: __ethtool-a-stats-grp-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: pad type: pad @@ -912,7 +1200,12 @@ attribute-sets: name: hist-val - name: stats + attr-cnt-name: __ethtool-a-stats-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: pad type: pad @@ -933,7 +1226,12 @@ attribute-sets: type: u32 - name: phc-vclocks + attr-cnt-name: __ethtool-a-phc-vclocks-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -947,7 +1245,12 @@ attribute-sets: sub-type: s32 - name: module + attr-cnt-name: __ethtool-a-module-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -960,7 +1263,13 @@ attribute-sets: type: u8 - name: c33-pse-pw-limit + attr-cnt-name: __ethtool-a-c33-pse-pw-limit-cnt + attr-max-name: __ethtool-a-c33-pse-pw-limit-max attributes: + - + name: unspec + type: unused + value: 0 - name: min type: u32 @@ -969,7 +1278,12 @@ attribute-sets: type: u32 - name: pse + attr-cnt-name: __ethtool-a-pse-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -1027,7 +1341,12 @@ attribute-sets: nested-attributes: c33-pse-pw-limit - name: rss + attr-cnt-name: __ethtool-a-rss-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -1053,7 +1372,12 @@ attribute-sets: type: u32 - name: plca + attr-cnt-name: __ethtool-a-plca-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -1084,7 +1408,12 @@ attribute-sets: type: u32 - name: module-fw-flash + attr-cnt-name: __ethtool-a-module-fw-flash-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest @@ -1110,7 +1439,12 @@ attribute-sets: type: uint - name: phy + attr-cnt-name: __ethtool-a-phy-cnt attributes: + - + name: unspec + type: unused + value: 0 - name: header type: nest From 001b0b59efbbdf54126c2ae512009d4a7c9f9f88 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:46 -0800 Subject: [PATCH 0148/1386] ynl: include uapi header after all dependencies Essentially reverse the order of headers for userspace generated files. Before (make -C tools/net/ynl/; cat tools/net/ynl/ethtool-user.h): #include #include #include #include After: #include #include While at it, make sure we track which headers we've already included and include the headers only once. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-6-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 2bf4d992e54a..8098bcbb6f40 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2782,12 +2782,17 @@ def main(): else: cw.p(f'#include "{hdr_file}"') cw.p('#include "ynl.h"') - headers = [parsed.uapi_header] + headers = [] for definition in parsed['definitions']: if 'header' in definition: headers.append(definition['header']) + if args.mode == 'user': + headers.append(parsed.uapi_header) + seen_header = [] for one in headers: - cw.p(f"#include <{one}>") + if one not in seen_header: + cw.p(f"#include <{one}>") + seen_header.append(one) cw.nl() if args.mode == "user": From 49922401c2190713c5cc03902dc68c3ecd3f13e8 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:47 -0800 Subject: [PATCH 0149/1386] ethtool: separate definitions that are gonna be generated Reshuffle definitions that are gonna be generated into ethtool_netlink_generated.h and match ynl spec order. This should make it easier to compare the output of the ynl-gen-c to the existing uapi header. No functional changes. Things that are still remaining to be manually defined: - ETHTOOL_FLAG_ALL - probably no good way to add to spec? - some of the cable test bits (not sure whether it's possible to move to spec) - some of the stats definitions (no way currently to move to spec) Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-7-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- include/uapi/linux/ethtool_netlink.h | 893 +---------------- .../uapi/linux/ethtool_netlink_generated.h | 899 ++++++++++++++++++ 3 files changed, 901 insertions(+), 893 deletions(-) create mode 100644 include/uapi/linux/ethtool_netlink_generated.h diff --git a/MAINTAINERS b/MAINTAINERS index 686109008d8e..79756f2100e0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16280,7 +16280,7 @@ F: include/linux/inetdevice.h F: include/linux/netdev* F: include/linux/platform_data/wiznet.h F: include/uapi/linux/cn_proc.h -F: include/uapi/linux/ethtool_netlink.h +F: include/uapi/linux/ethtool_netlink* F: include/uapi/linux/if_* F: include/uapi/linux/net_shaper.h F: include/uapi/linux/netdev* diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 283305f6b063..9c909ce733a5 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -10,545 +10,12 @@ #define _UAPI_LINUX_ETHTOOL_NETLINK_H_ #include - -/* message types - userspace to kernel */ -enum { - ETHTOOL_MSG_USER_NONE, - ETHTOOL_MSG_STRSET_GET, - ETHTOOL_MSG_LINKINFO_GET, - ETHTOOL_MSG_LINKINFO_SET, - ETHTOOL_MSG_LINKMODES_GET, - ETHTOOL_MSG_LINKMODES_SET, - ETHTOOL_MSG_LINKSTATE_GET, - ETHTOOL_MSG_DEBUG_GET, - ETHTOOL_MSG_DEBUG_SET, - ETHTOOL_MSG_WOL_GET, - ETHTOOL_MSG_WOL_SET, - ETHTOOL_MSG_FEATURES_GET, - ETHTOOL_MSG_FEATURES_SET, - ETHTOOL_MSG_PRIVFLAGS_GET, - ETHTOOL_MSG_PRIVFLAGS_SET, - ETHTOOL_MSG_RINGS_GET, - ETHTOOL_MSG_RINGS_SET, - ETHTOOL_MSG_CHANNELS_GET, - ETHTOOL_MSG_CHANNELS_SET, - ETHTOOL_MSG_COALESCE_GET, - ETHTOOL_MSG_COALESCE_SET, - ETHTOOL_MSG_PAUSE_GET, - ETHTOOL_MSG_PAUSE_SET, - ETHTOOL_MSG_EEE_GET, - ETHTOOL_MSG_EEE_SET, - ETHTOOL_MSG_TSINFO_GET, - ETHTOOL_MSG_CABLE_TEST_ACT, - ETHTOOL_MSG_CABLE_TEST_TDR_ACT, - ETHTOOL_MSG_TUNNEL_INFO_GET, - ETHTOOL_MSG_FEC_GET, - ETHTOOL_MSG_FEC_SET, - ETHTOOL_MSG_MODULE_EEPROM_GET, - ETHTOOL_MSG_STATS_GET, - ETHTOOL_MSG_PHC_VCLOCKS_GET, - ETHTOOL_MSG_MODULE_GET, - ETHTOOL_MSG_MODULE_SET, - ETHTOOL_MSG_PSE_GET, - ETHTOOL_MSG_PSE_SET, - ETHTOOL_MSG_RSS_GET, - ETHTOOL_MSG_PLCA_GET_CFG, - ETHTOOL_MSG_PLCA_SET_CFG, - ETHTOOL_MSG_PLCA_GET_STATUS, - ETHTOOL_MSG_MM_GET, - ETHTOOL_MSG_MM_SET, - ETHTOOL_MSG_MODULE_FW_FLASH_ACT, - ETHTOOL_MSG_PHY_GET, - - /* add new constants above here */ - __ETHTOOL_MSG_USER_CNT, - ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1 -}; - -/* message types - kernel to userspace */ -enum { - ETHTOOL_MSG_KERNEL_NONE, - ETHTOOL_MSG_STRSET_GET_REPLY, - ETHTOOL_MSG_LINKINFO_GET_REPLY, - ETHTOOL_MSG_LINKINFO_NTF, - ETHTOOL_MSG_LINKMODES_GET_REPLY, - ETHTOOL_MSG_LINKMODES_NTF, - ETHTOOL_MSG_LINKSTATE_GET_REPLY, - ETHTOOL_MSG_DEBUG_GET_REPLY, - ETHTOOL_MSG_DEBUG_NTF, - ETHTOOL_MSG_WOL_GET_REPLY, - ETHTOOL_MSG_WOL_NTF, - ETHTOOL_MSG_FEATURES_GET_REPLY, - ETHTOOL_MSG_FEATURES_SET_REPLY, - ETHTOOL_MSG_FEATURES_NTF, - ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, - ETHTOOL_MSG_PRIVFLAGS_NTF, - ETHTOOL_MSG_RINGS_GET_REPLY, - ETHTOOL_MSG_RINGS_NTF, - ETHTOOL_MSG_CHANNELS_GET_REPLY, - ETHTOOL_MSG_CHANNELS_NTF, - ETHTOOL_MSG_COALESCE_GET_REPLY, - ETHTOOL_MSG_COALESCE_NTF, - ETHTOOL_MSG_PAUSE_GET_REPLY, - ETHTOOL_MSG_PAUSE_NTF, - ETHTOOL_MSG_EEE_GET_REPLY, - ETHTOOL_MSG_EEE_NTF, - ETHTOOL_MSG_TSINFO_GET_REPLY, - ETHTOOL_MSG_CABLE_TEST_NTF, - ETHTOOL_MSG_CABLE_TEST_TDR_NTF, - ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, - ETHTOOL_MSG_FEC_GET_REPLY, - ETHTOOL_MSG_FEC_NTF, - ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY, - ETHTOOL_MSG_STATS_GET_REPLY, - ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, - ETHTOOL_MSG_MODULE_GET_REPLY, - ETHTOOL_MSG_MODULE_NTF, - ETHTOOL_MSG_PSE_GET_REPLY, - ETHTOOL_MSG_RSS_GET_REPLY, - ETHTOOL_MSG_PLCA_GET_CFG_REPLY, - ETHTOOL_MSG_PLCA_GET_STATUS_REPLY, - ETHTOOL_MSG_PLCA_NTF, - ETHTOOL_MSG_MM_GET_REPLY, - ETHTOOL_MSG_MM_NTF, - ETHTOOL_MSG_MODULE_FW_FLASH_NTF, - ETHTOOL_MSG_PHY_GET_REPLY, - ETHTOOL_MSG_PHY_NTF, - - /* add new constants above here */ - __ETHTOOL_MSG_KERNEL_CNT, - ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1 -}; - -/* request header */ - -enum ethtool_header_flags { - ETHTOOL_FLAG_COMPACT_BITSETS = 1 << 0, /* use compact bitsets in reply */ - ETHTOOL_FLAG_OMIT_REPLY = 1 << 1, /* provide optional reply for SET or ACT requests */ - ETHTOOL_FLAG_STATS = 1 << 2, /* request statistics, if supported by the driver */ -}; +#include #define ETHTOOL_FLAG_ALL (ETHTOOL_FLAG_COMPACT_BITSETS | \ ETHTOOL_FLAG_OMIT_REPLY | \ ETHTOOL_FLAG_STATS) -enum { - ETHTOOL_A_HEADER_UNSPEC, - ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ - ETHTOOL_A_HEADER_DEV_NAME, /* string */ - ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ - ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_HEADER_CNT, - ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1 -}; - -/* bit sets */ - -enum { - ETHTOOL_A_BITSET_BIT_UNSPEC, - ETHTOOL_A_BITSET_BIT_INDEX, /* u32 */ - ETHTOOL_A_BITSET_BIT_NAME, /* string */ - ETHTOOL_A_BITSET_BIT_VALUE, /* flag */ - - /* add new constants above here */ - __ETHTOOL_A_BITSET_BIT_CNT, - ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1 -}; - -enum { - ETHTOOL_A_BITSET_BITS_UNSPEC, - ETHTOOL_A_BITSET_BITS_BIT, /* nest - _A_BITSET_BIT_* */ - - /* add new constants above here */ - __ETHTOOL_A_BITSET_BITS_CNT, - ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1 -}; - -enum { - ETHTOOL_A_BITSET_UNSPEC, - ETHTOOL_A_BITSET_NOMASK, /* flag */ - ETHTOOL_A_BITSET_SIZE, /* u32 */ - ETHTOOL_A_BITSET_BITS, /* nest - _A_BITSET_BITS_* */ - ETHTOOL_A_BITSET_VALUE, /* binary */ - ETHTOOL_A_BITSET_MASK, /* binary */ - - /* add new constants above here */ - __ETHTOOL_A_BITSET_CNT, - ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1 -}; - -/* string sets */ - -enum { - ETHTOOL_A_STRING_UNSPEC, - ETHTOOL_A_STRING_INDEX, /* u32 */ - ETHTOOL_A_STRING_VALUE, /* string */ - - /* add new constants above here */ - __ETHTOOL_A_STRING_CNT, - ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1 -}; - -enum { - ETHTOOL_A_STRINGS_UNSPEC, - ETHTOOL_A_STRINGS_STRING, /* nest - _A_STRINGS_* */ - - /* add new constants above here */ - __ETHTOOL_A_STRINGS_CNT, - ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1 -}; - -enum { - ETHTOOL_A_STRINGSET_UNSPEC, - ETHTOOL_A_STRINGSET_ID, /* u32 */ - ETHTOOL_A_STRINGSET_COUNT, /* u32 */ - ETHTOOL_A_STRINGSET_STRINGS, /* nest - _A_STRINGS_* */ - - /* add new constants above here */ - __ETHTOOL_A_STRINGSET_CNT, - ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1 -}; - -enum { - ETHTOOL_A_STRINGSETS_UNSPEC, - ETHTOOL_A_STRINGSETS_STRINGSET, /* nest - _A_STRINGSET_* */ - - /* add new constants above here */ - __ETHTOOL_A_STRINGSETS_CNT, - ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1 -}; - -/* STRSET */ - -enum { - ETHTOOL_A_STRSET_UNSPEC, - ETHTOOL_A_STRSET_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_STRSET_STRINGSETS, /* nest - _A_STRINGSETS_* */ - ETHTOOL_A_STRSET_COUNTS_ONLY, /* flag */ - - /* add new constants above here */ - __ETHTOOL_A_STRSET_CNT, - ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1 -}; - -/* LINKINFO */ - -enum { - ETHTOOL_A_LINKINFO_UNSPEC, - ETHTOOL_A_LINKINFO_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKINFO_PORT, /* u8 */ - ETHTOOL_A_LINKINFO_PHYADDR, /* u8 */ - ETHTOOL_A_LINKINFO_TP_MDIX, /* u8 */ - ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, /* u8 */ - ETHTOOL_A_LINKINFO_TRANSCEIVER, /* u8 */ - - /* add new constants above here */ - __ETHTOOL_A_LINKINFO_CNT, - ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1 -}; - -/* LINKMODES */ - -enum { - ETHTOOL_A_LINKMODES_UNSPEC, - ETHTOOL_A_LINKMODES_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKMODES_AUTONEG, /* u8 */ - ETHTOOL_A_LINKMODES_OURS, /* bitset */ - ETHTOOL_A_LINKMODES_PEER, /* bitset */ - ETHTOOL_A_LINKMODES_SPEED, /* u32 */ - ETHTOOL_A_LINKMODES_DUPLEX, /* u8 */ - ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, /* u8 */ - ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, /* u8 */ - ETHTOOL_A_LINKMODES_LANES, /* u32 */ - ETHTOOL_A_LINKMODES_RATE_MATCHING, /* u8 */ - - /* add new constants above here */ - __ETHTOOL_A_LINKMODES_CNT, - ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1 -}; - -/* LINKSTATE */ - -enum { - ETHTOOL_A_LINKSTATE_UNSPEC, - ETHTOOL_A_LINKSTATE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKSTATE_LINK, /* u8 */ - ETHTOOL_A_LINKSTATE_SQI, /* u32 */ - ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */ - ETHTOOL_A_LINKSTATE_EXT_STATE, /* u8 */ - ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, /* u8 */ - ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_LINKSTATE_CNT, - ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1 -}; - -/* DEBUG */ - -enum { - ETHTOOL_A_DEBUG_UNSPEC, - ETHTOOL_A_DEBUG_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_DEBUG_MSGMASK, /* bitset */ - - /* add new constants above here */ - __ETHTOOL_A_DEBUG_CNT, - ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1 -}; - -/* WOL */ - -enum { - ETHTOOL_A_WOL_UNSPEC, - ETHTOOL_A_WOL_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_WOL_MODES, /* bitset */ - ETHTOOL_A_WOL_SOPASS, /* binary */ - - /* add new constants above here */ - __ETHTOOL_A_WOL_CNT, - ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1 -}; - -/* FEATURES */ - -enum { - ETHTOOL_A_FEATURES_UNSPEC, - ETHTOOL_A_FEATURES_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_FEATURES_HW, /* bitset */ - ETHTOOL_A_FEATURES_WANTED, /* bitset */ - ETHTOOL_A_FEATURES_ACTIVE, /* bitset */ - ETHTOOL_A_FEATURES_NOCHANGE, /* bitset */ - - /* add new constants above here */ - __ETHTOOL_A_FEATURES_CNT, - ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 -}; - -/* PRIVFLAGS */ - -enum { - ETHTOOL_A_PRIVFLAGS_UNSPEC, - ETHTOOL_A_PRIVFLAGS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PRIVFLAGS_FLAGS, /* bitset */ - - /* add new constants above here */ - __ETHTOOL_A_PRIVFLAGS_CNT, - ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 -}; - -/* RINGS */ - -enum { - ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0, - ETHTOOL_TCP_DATA_SPLIT_DISABLED, - ETHTOOL_TCP_DATA_SPLIT_ENABLED, -}; - -enum { - ETHTOOL_A_RINGS_UNSPEC, - ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_RINGS_RX_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX_MINI_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX_JUMBO_MAX, /* u32 */ - ETHTOOL_A_RINGS_TX_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX, /* u32 */ - ETHTOOL_A_RINGS_RX_MINI, /* u32 */ - ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ - ETHTOOL_A_RINGS_TX, /* u32 */ - ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ - ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ - ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ - ETHTOOL_A_RINGS_TX_PUSH, /* u8 */ - ETHTOOL_A_RINGS_RX_PUSH, /* u8 */ - ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, /* u32 */ - ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_RINGS_CNT, - ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) -}; - -/* CHANNELS */ - -enum { - ETHTOOL_A_CHANNELS_UNSPEC, - ETHTOOL_A_CHANNELS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_CHANNELS_RX_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_TX_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_OTHER_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_COMBINED_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_RX_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_TX_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_OTHER_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_COMBINED_COUNT, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_CHANNELS_CNT, - ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1) -}; - -/* COALESCE */ - -enum { - ETHTOOL_A_COALESCE_UNSPEC, - ETHTOOL_A_COALESCE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_COALESCE_RX_USECS, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, /* u32 */ - ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, /* u8 */ - ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, /* u8 */ - ETHTOOL_A_COALESCE_PKT_RATE_LOW, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_LOW, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_LOW, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, /* u32 */ - ETHTOOL_A_COALESCE_PKT_RATE_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, /* u32 */ - ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, /* u8 */ - ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, /* u8 */ - ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, /* u32 */ - ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, /* u32 */ - /* nest - _A_PROFILE_IRQ_MODERATION */ - ETHTOOL_A_COALESCE_RX_PROFILE, - /* nest - _A_PROFILE_IRQ_MODERATION */ - ETHTOOL_A_COALESCE_TX_PROFILE, - - /* add new constants above here */ - __ETHTOOL_A_COALESCE_CNT, - ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) -}; - -enum { - ETHTOOL_A_PROFILE_UNSPEC, - /* nest, _A_IRQ_MODERATION_* */ - ETHTOOL_A_PROFILE_IRQ_MODERATION, - __ETHTOOL_A_PROFILE_CNT, - ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) -}; - -enum { - ETHTOOL_A_IRQ_MODERATION_UNSPEC, - ETHTOOL_A_IRQ_MODERATION_USEC, /* u32 */ - ETHTOOL_A_IRQ_MODERATION_PKTS, /* u32 */ - ETHTOOL_A_IRQ_MODERATION_COMPS, /* u32 */ - - __ETHTOOL_A_IRQ_MODERATION_CNT, - ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1) -}; - -/* PAUSE */ - -enum { - ETHTOOL_A_PAUSE_UNSPEC, - ETHTOOL_A_PAUSE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PAUSE_AUTONEG, /* u8 */ - ETHTOOL_A_PAUSE_RX, /* u8 */ - ETHTOOL_A_PAUSE_TX, /* u8 */ - ETHTOOL_A_PAUSE_STATS, /* nest - _PAUSE_STAT_* */ - ETHTOOL_A_PAUSE_STATS_SRC, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_PAUSE_CNT, - ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1) -}; - -enum { - ETHTOOL_A_PAUSE_STAT_UNSPEC, - ETHTOOL_A_PAUSE_STAT_PAD, - - ETHTOOL_A_PAUSE_STAT_TX_FRAMES, - ETHTOOL_A_PAUSE_STAT_RX_FRAMES, - - /* add new constants above here - * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats! - */ - __ETHTOOL_A_PAUSE_STAT_CNT, - ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) -}; - -/* EEE */ - -enum { - ETHTOOL_A_EEE_UNSPEC, - ETHTOOL_A_EEE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_EEE_MODES_OURS, /* bitset */ - ETHTOOL_A_EEE_MODES_PEER, /* bitset */ - ETHTOOL_A_EEE_ACTIVE, /* u8 */ - ETHTOOL_A_EEE_ENABLED, /* u8 */ - ETHTOOL_A_EEE_TX_LPI_ENABLED, /* u8 */ - ETHTOOL_A_EEE_TX_LPI_TIMER, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_EEE_CNT, - ETHTOOL_A_EEE_MAX = (__ETHTOOL_A_EEE_CNT - 1) -}; - -/* TSINFO */ - -enum { - ETHTOOL_A_TSINFO_UNSPEC, - ETHTOOL_A_TSINFO_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_TSINFO_TIMESTAMPING, /* bitset */ - ETHTOOL_A_TSINFO_TX_TYPES, /* bitset */ - ETHTOOL_A_TSINFO_RX_FILTERS, /* bitset */ - ETHTOOL_A_TSINFO_PHC_INDEX, /* u32 */ - ETHTOOL_A_TSINFO_STATS, /* nest - _A_TSINFO_STAT */ - - /* add new constants above here */ - __ETHTOOL_A_TSINFO_CNT, - ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) -}; - -enum { - ETHTOOL_A_TS_STAT_UNSPEC, - - ETHTOOL_A_TS_STAT_TX_PKTS, /* uint */ - ETHTOOL_A_TS_STAT_TX_LOST, /* uint */ - ETHTOOL_A_TS_STAT_TX_ERR, /* uint */ - - /* add new constants above here */ - __ETHTOOL_A_TS_STAT_CNT, - ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) - -}; - -/* PHC VCLOCKS */ - -enum { - ETHTOOL_A_PHC_VCLOCKS_UNSPEC, - ETHTOOL_A_PHC_VCLOCKS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PHC_VCLOCKS_NUM, /* u32 */ - ETHTOOL_A_PHC_VCLOCKS_INDEX, /* array, s32 */ - - /* add new constants above here */ - __ETHTOOL_A_PHC_VCLOCKS_CNT, - ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1) -}; - -/* CABLE TEST */ - -enum { - ETHTOOL_A_CABLE_TEST_UNSPEC, - ETHTOOL_A_CABLE_TEST_HEADER, /* nest - _A_HEADER_* */ - - /* add new constants above here */ - __ETHTOOL_A_CABLE_TEST_CNT, - ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1 -}; - /* CABLE TEST NOTIFY */ enum { ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC, @@ -582,74 +49,12 @@ enum { ETHTOOL_A_CABLE_INF_SRC_ALCD, }; -enum { - ETHTOOL_A_CABLE_RESULT_UNSPEC, - ETHTOOL_A_CABLE_RESULT_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ - ETHTOOL_A_CABLE_RESULT_CODE, /* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */ - ETHTOOL_A_CABLE_RESULT_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ - - __ETHTOOL_A_CABLE_RESULT_CNT, - ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1) -}; - -enum { - ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC, - ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ - ETHTOOL_A_CABLE_FAULT_LENGTH_CM, /* u32 */ - ETHTOOL_A_CABLE_FAULT_LENGTH_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ - - __ETHTOOL_A_CABLE_FAULT_LENGTH_CNT, - ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1) -}; - enum { ETHTOOL_A_CABLE_TEST_NTF_STATUS_UNSPEC, ETHTOOL_A_CABLE_TEST_NTF_STATUS_STARTED, ETHTOOL_A_CABLE_TEST_NTF_STATUS_COMPLETED }; -enum { - ETHTOOL_A_CABLE_NEST_UNSPEC, - ETHTOOL_A_CABLE_NEST_RESULT, /* nest - ETHTOOL_A_CABLE_RESULT_ */ - ETHTOOL_A_CABLE_NEST_FAULT_LENGTH, /* nest - ETHTOOL_A_CABLE_FAULT_LENGTH_ */ - __ETHTOOL_A_CABLE_NEST_CNT, - ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1) -}; - -enum { - ETHTOOL_A_CABLE_TEST_NTF_UNSPEC, - ETHTOOL_A_CABLE_TEST_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ - ETHTOOL_A_CABLE_TEST_NTF_NEST, /* nest - of results: */ - - __ETHTOOL_A_CABLE_TEST_NTF_CNT, - ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1) -}; - -/* CABLE TEST TDR */ - -enum { - ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, /* u8 */ - - /* add new constants above here */ - __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT, - ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1 -}; - -enum { - ETHTOOL_A_CABLE_TEST_TDR_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_TDR_CFG, /* nest - *_TDR_CFG_* */ - - /* add new constants above here */ - __ETHTOOL_A_CABLE_TEST_TDR_CNT, - ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1 -}; - /* CABLE TEST TDR NOTIFY */ enum { @@ -689,132 +94,6 @@ enum { ETHTOOL_A_CABLE_TDR_NEST_MAX = (__ETHTOOL_A_CABLE_TDR_NEST_CNT - 1) }; -enum { - ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ - ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, /* nest - of results: */ - - /* add new constants above here */ - __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT, - ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 -}; - -/* TUNNEL INFO */ - -enum { - ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, - ETHTOOL_UDP_TUNNEL_TYPE_GENEVE, - ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE, - - __ETHTOOL_UDP_TUNNEL_TYPE_CNT -}; - -enum { - ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC, - - ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, /* be16 */ - ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT, - ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1) -}; - -enum { - ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC, - - ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, /* u32 */ - ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, /* bitset */ - ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, /* nest - _UDP_ENTRY_* */ - - /* add new constants above here */ - __ETHTOOL_A_TUNNEL_UDP_TABLE_CNT, - ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1) -}; - -enum { - ETHTOOL_A_TUNNEL_UDP_UNSPEC, - - ETHTOOL_A_TUNNEL_UDP_TABLE, /* nest - _UDP_TABLE_* */ - - /* add new constants above here */ - __ETHTOOL_A_TUNNEL_UDP_CNT, - ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1) -}; - -enum { - ETHTOOL_A_TUNNEL_INFO_UNSPEC, - ETHTOOL_A_TUNNEL_INFO_HEADER, /* nest - _A_HEADER_* */ - - ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, /* nest - _UDP_TABLE */ - - /* add new constants above here */ - __ETHTOOL_A_TUNNEL_INFO_CNT, - ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1) -}; - -/* FEC */ - -enum { - ETHTOOL_A_FEC_UNSPEC, - ETHTOOL_A_FEC_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_FEC_MODES, /* bitset */ - ETHTOOL_A_FEC_AUTO, /* u8 */ - ETHTOOL_A_FEC_ACTIVE, /* u32 */ - ETHTOOL_A_FEC_STATS, /* nest - _A_FEC_STAT */ - - __ETHTOOL_A_FEC_CNT, - ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1) -}; - -enum { - ETHTOOL_A_FEC_STAT_UNSPEC, - ETHTOOL_A_FEC_STAT_PAD, - - ETHTOOL_A_FEC_STAT_CORRECTED, /* array, u64 */ - ETHTOOL_A_FEC_STAT_UNCORR, /* array, u64 */ - ETHTOOL_A_FEC_STAT_CORR_BITS, /* array, u64 */ - - /* add new constants above here */ - __ETHTOOL_A_FEC_STAT_CNT, - ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1) -}; - -/* MODULE EEPROM */ - -enum { - ETHTOOL_A_MODULE_EEPROM_UNSPEC, - ETHTOOL_A_MODULE_EEPROM_HEADER, /* nest - _A_HEADER_* */ - - ETHTOOL_A_MODULE_EEPROM_OFFSET, /* u32 */ - ETHTOOL_A_MODULE_EEPROM_LENGTH, /* u32 */ - ETHTOOL_A_MODULE_EEPROM_PAGE, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_BANK, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_DATA, /* binary */ - - __ETHTOOL_A_MODULE_EEPROM_CNT, - ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1) -}; - -/* STATS */ - -enum { - ETHTOOL_A_STATS_UNSPEC, - ETHTOOL_A_STATS_PAD, - ETHTOOL_A_STATS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_STATS_GROUPS, /* bitset */ - - ETHTOOL_A_STATS_GRP, /* nest - _A_STATS_GRP_* */ - - ETHTOOL_A_STATS_SRC, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_STATS_CNT, - ETHTOOL_A_STATS_MAX = (__ETHTOOL_A_STATS_CNT - 1) -}; - enum { ETHTOOL_STATS_ETH_PHY, ETHTOOL_STATS_ETH_MAC, @@ -825,27 +104,6 @@ enum { __ETHTOOL_STATS_CNT }; -enum { - ETHTOOL_A_STATS_GRP_UNSPEC, - ETHTOOL_A_STATS_GRP_PAD, - - ETHTOOL_A_STATS_GRP_ID, /* u32 */ - ETHTOOL_A_STATS_GRP_SS_ID, /* u32 */ - - ETHTOOL_A_STATS_GRP_STAT, /* nest */ - - ETHTOOL_A_STATS_GRP_HIST_RX, /* nest */ - ETHTOOL_A_STATS_GRP_HIST_TX, /* nest */ - - ETHTOOL_A_STATS_GRP_HIST_BKT_LOW, /* u32 */ - ETHTOOL_A_STATS_GRP_HIST_BKT_HI, /* u32 */ - ETHTOOL_A_STATS_GRP_HIST_VAL, /* u64 */ - - /* add new constants above here */ - __ETHTOOL_A_STATS_GRP_CNT, - ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1) -}; - enum { /* 30.3.2.1.5 aSymbolErrorDuringCarrier */ ETHTOOL_A_STATS_ETH_PHY_5_SYM_ERR, @@ -935,155 +193,6 @@ enum { ETHTOOL_A_STATS_RMON_MAX = (__ETHTOOL_A_STATS_RMON_CNT - 1) }; -/* MODULE */ - -enum { - ETHTOOL_A_MODULE_UNSPEC, - ETHTOOL_A_MODULE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MODULE_POWER_MODE_POLICY, /* u8 */ - ETHTOOL_A_MODULE_POWER_MODE, /* u8 */ - - /* add new constants above here */ - __ETHTOOL_A_MODULE_CNT, - ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1) -}; - -/* Power Sourcing Equipment */ -enum { - ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, - ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, /* u32 */ - ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, /* u32 */ -}; - -enum { - ETHTOOL_A_PSE_UNSPEC, - ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PODL_PSE_ADMIN_STATE, /* u32 */ - ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, /* u32 */ - ETHTOOL_A_PODL_PSE_PW_D_STATUS, /* u32 */ - ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */ - ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */ - ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */ - ETHTOOL_A_C33_PSE_PW_CLASS, /* u32 */ - ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ - ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ - ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ - ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, /* u32 */ - ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, /* nest - _C33_PSE_PW_LIMIT_* */ - - /* add new constants above here */ - __ETHTOOL_A_PSE_CNT, - ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) -}; - -enum { - ETHTOOL_A_RSS_UNSPEC, - ETHTOOL_A_RSS_HEADER, - ETHTOOL_A_RSS_CONTEXT, /* u32 */ - ETHTOOL_A_RSS_HFUNC, /* u32 */ - ETHTOOL_A_RSS_INDIR, /* binary */ - ETHTOOL_A_RSS_HKEY, /* binary */ - ETHTOOL_A_RSS_INPUT_XFRM, /* u32 */ - ETHTOOL_A_RSS_START_CONTEXT, /* u32 */ - - __ETHTOOL_A_RSS_CNT, - ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), -}; - -/* PLCA */ - -enum { - ETHTOOL_A_PLCA_UNSPEC, - ETHTOOL_A_PLCA_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PLCA_VERSION, /* u16 */ - ETHTOOL_A_PLCA_ENABLED, /* u8 */ - ETHTOOL_A_PLCA_STATUS, /* u8 */ - ETHTOOL_A_PLCA_NODE_CNT, /* u32 */ - ETHTOOL_A_PLCA_NODE_ID, /* u32 */ - ETHTOOL_A_PLCA_TO_TMR, /* u32 */ - ETHTOOL_A_PLCA_BURST_CNT, /* u32 */ - ETHTOOL_A_PLCA_BURST_TMR, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_PLCA_CNT, - ETHTOOL_A_PLCA_MAX = (__ETHTOOL_A_PLCA_CNT - 1) -}; - -/* MAC Merge (802.3) */ - -enum { - ETHTOOL_A_MM_STAT_UNSPEC, - ETHTOOL_A_MM_STAT_PAD, - - /* aMACMergeFrameAssErrorCount */ - ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS, /* u64 */ - /* aMACMergeFrameSmdErrorCount */ - ETHTOOL_A_MM_STAT_SMD_ERRORS, /* u64 */ - /* aMACMergeFrameAssOkCount */ - ETHTOOL_A_MM_STAT_REASSEMBLY_OK, /* u64 */ - /* aMACMergeFragCountRx */ - ETHTOOL_A_MM_STAT_RX_FRAG_COUNT, /* u64 */ - /* aMACMergeFragCountTx */ - ETHTOOL_A_MM_STAT_TX_FRAG_COUNT, /* u64 */ - /* aMACMergeHoldCount */ - ETHTOOL_A_MM_STAT_HOLD_COUNT, /* u64 */ - - /* add new constants above here */ - __ETHTOOL_A_MM_STAT_CNT, - ETHTOOL_A_MM_STAT_MAX = (__ETHTOOL_A_MM_STAT_CNT - 1) -}; - -enum { - ETHTOOL_A_MM_UNSPEC, - ETHTOOL_A_MM_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MM_PMAC_ENABLED, /* u8 */ - ETHTOOL_A_MM_TX_ENABLED, /* u8 */ - ETHTOOL_A_MM_TX_ACTIVE, /* u8 */ - ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, /* u32 */ - ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, /* u32 */ - ETHTOOL_A_MM_VERIFY_ENABLED, /* u8 */ - ETHTOOL_A_MM_VERIFY_STATUS, /* u8 */ - ETHTOOL_A_MM_VERIFY_TIME, /* u32 */ - ETHTOOL_A_MM_MAX_VERIFY_TIME, /* u32 */ - ETHTOOL_A_MM_STATS, /* nest - _A_MM_STAT_* */ - - /* add new constants above here */ - __ETHTOOL_A_MM_CNT, - ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) -}; - -/* MODULE_FW_FLASH */ - -enum { - ETHTOOL_A_MODULE_FW_FLASH_UNSPEC, - ETHTOOL_A_MODULE_FW_FLASH_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, /* string */ - ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, /* u32 */ - ETHTOOL_A_MODULE_FW_FLASH_STATUS, /* u32 */ - ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, /* string */ - ETHTOOL_A_MODULE_FW_FLASH_DONE, /* uint */ - ETHTOOL_A_MODULE_FW_FLASH_TOTAL, /* uint */ - - /* add new constants above here */ - __ETHTOOL_A_MODULE_FW_FLASH_CNT, - ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1) -}; - -enum { - ETHTOOL_A_PHY_UNSPEC, - ETHTOOL_A_PHY_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PHY_INDEX, /* u32 */ - ETHTOOL_A_PHY_DRVNAME, /* string */ - ETHTOOL_A_PHY_NAME, /* string */ - ETHTOOL_A_PHY_UPSTREAM_TYPE, /* u32 */ - ETHTOOL_A_PHY_UPSTREAM_INDEX, /* u32 */ - ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, /* string */ - ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, /* string */ - - /* add new constants above here */ - __ETHTOOL_A_PHY_CNT, - ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) -}; /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h new file mode 100644 index 000000000000..4b4bf17d1a88 --- /dev/null +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -0,0 +1,899 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +#ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H +#define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H + +/* TUNNEL INFO */ + +enum { + ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, + ETHTOOL_UDP_TUNNEL_TYPE_GENEVE, + ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE, + + __ETHTOOL_UDP_TUNNEL_TYPE_CNT +}; + +/* request header */ + +enum ethtool_header_flags { + ETHTOOL_FLAG_COMPACT_BITSETS = 1 << 0, /* use compact bitsets in reply */ + ETHTOOL_FLAG_OMIT_REPLY = 1 << 1, /* provide optional reply for SET or ACT requests */ + ETHTOOL_FLAG_STATS = 1 << 2, /* request statistics, if supported by the driver */ +}; + +enum { + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0, + ETHTOOL_TCP_DATA_SPLIT_DISABLED, + ETHTOOL_TCP_DATA_SPLIT_ENABLED, +}; + +enum { + ETHTOOL_A_HEADER_UNSPEC, + ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ + ETHTOOL_A_HEADER_DEV_NAME, /* string */ + ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ + ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_HEADER_CNT, + ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1 +}; + +/* bit sets */ + +enum { + ETHTOOL_A_BITSET_BIT_UNSPEC, + ETHTOOL_A_BITSET_BIT_INDEX, /* u32 */ + ETHTOOL_A_BITSET_BIT_NAME, /* string */ + ETHTOOL_A_BITSET_BIT_VALUE, /* flag */ + + /* add new constants above here */ + __ETHTOOL_A_BITSET_BIT_CNT, + ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1 +}; + +enum { + ETHTOOL_A_BITSET_BITS_UNSPEC, + ETHTOOL_A_BITSET_BITS_BIT, /* nest - _A_BITSET_BIT_* */ + + /* add new constants above here */ + __ETHTOOL_A_BITSET_BITS_CNT, + ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1 +}; + +enum { + ETHTOOL_A_BITSET_UNSPEC, + ETHTOOL_A_BITSET_NOMASK, /* flag */ + ETHTOOL_A_BITSET_SIZE, /* u32 */ + ETHTOOL_A_BITSET_BITS, /* nest - _A_BITSET_BITS_* */ + ETHTOOL_A_BITSET_VALUE, /* binary */ + ETHTOOL_A_BITSET_MASK, /* binary */ + + /* add new constants above here */ + __ETHTOOL_A_BITSET_CNT, + ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1 +}; + +/* string sets */ + +enum { + ETHTOOL_A_STRING_UNSPEC, + ETHTOOL_A_STRING_INDEX, /* u32 */ + ETHTOOL_A_STRING_VALUE, /* string */ + + /* add new constants above here */ + __ETHTOOL_A_STRING_CNT, + ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1 +}; + +enum { + ETHTOOL_A_STRINGS_UNSPEC, + ETHTOOL_A_STRINGS_STRING, /* nest - _A_STRINGS_* */ + + /* add new constants above here */ + __ETHTOOL_A_STRINGS_CNT, + ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1 +}; + +enum { + ETHTOOL_A_STRINGSET_UNSPEC, + ETHTOOL_A_STRINGSET_ID, /* u32 */ + ETHTOOL_A_STRINGSET_COUNT, /* u32 */ + ETHTOOL_A_STRINGSET_STRINGS, /* nest - _A_STRINGS_* */ + + /* add new constants above here */ + __ETHTOOL_A_STRINGSET_CNT, + ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1 +}; + +enum { + ETHTOOL_A_STRINGSETS_UNSPEC, + ETHTOOL_A_STRINGSETS_STRINGSET, /* nest - _A_STRINGSET_* */ + + /* add new constants above here */ + __ETHTOOL_A_STRINGSETS_CNT, + ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1 +}; + +/* STRSET */ + +enum { + ETHTOOL_A_STRSET_UNSPEC, + ETHTOOL_A_STRSET_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_STRSET_STRINGSETS, /* nest - _A_STRINGSETS_* */ + ETHTOOL_A_STRSET_COUNTS_ONLY, /* flag */ + + /* add new constants above here */ + __ETHTOOL_A_STRSET_CNT, + ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1 +}; + +/* PRIVFLAGS */ + +enum { + ETHTOOL_A_PRIVFLAGS_UNSPEC, + ETHTOOL_A_PRIVFLAGS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PRIVFLAGS_FLAGS, /* bitset */ + + /* add new constants above here */ + __ETHTOOL_A_PRIVFLAGS_CNT, + ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 +}; + +/* RINGS */ + +enum { + ETHTOOL_A_RINGS_UNSPEC, + ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_RINGS_RX_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX_MINI_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX_JUMBO_MAX, /* u32 */ + ETHTOOL_A_RINGS_TX_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX, /* u32 */ + ETHTOOL_A_RINGS_RX_MINI, /* u32 */ + ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ + ETHTOOL_A_RINGS_TX, /* u32 */ + ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ + ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ + ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ + ETHTOOL_A_RINGS_TX_PUSH, /* u8 */ + ETHTOOL_A_RINGS_RX_PUSH, /* u8 */ + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, /* u32 */ + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_RINGS_CNT, + ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) +}; + +/* MAC Merge (802.3) */ + +enum { + ETHTOOL_A_MM_STAT_UNSPEC, + ETHTOOL_A_MM_STAT_PAD, + + /* aMACMergeFrameAssErrorCount */ + ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS, /* u64 */ + /* aMACMergeFrameSmdErrorCount */ + ETHTOOL_A_MM_STAT_SMD_ERRORS, /* u64 */ + /* aMACMergeFrameAssOkCount */ + ETHTOOL_A_MM_STAT_REASSEMBLY_OK, /* u64 */ + /* aMACMergeFragCountRx */ + ETHTOOL_A_MM_STAT_RX_FRAG_COUNT, /* u64 */ + /* aMACMergeFragCountTx */ + ETHTOOL_A_MM_STAT_TX_FRAG_COUNT, /* u64 */ + /* aMACMergeHoldCount */ + ETHTOOL_A_MM_STAT_HOLD_COUNT, /* u64 */ + + /* add new constants above here */ + __ETHTOOL_A_MM_STAT_CNT, + ETHTOOL_A_MM_STAT_MAX = (__ETHTOOL_A_MM_STAT_CNT - 1) +}; + +enum { + ETHTOOL_A_MM_UNSPEC, + ETHTOOL_A_MM_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_MM_PMAC_ENABLED, /* u8 */ + ETHTOOL_A_MM_TX_ENABLED, /* u8 */ + ETHTOOL_A_MM_TX_ACTIVE, /* u8 */ + ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, /* u32 */ + ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, /* u32 */ + ETHTOOL_A_MM_VERIFY_ENABLED, /* u8 */ + ETHTOOL_A_MM_VERIFY_STATUS, /* u8 */ + ETHTOOL_A_MM_VERIFY_TIME, /* u32 */ + ETHTOOL_A_MM_MAX_VERIFY_TIME, /* u32 */ + ETHTOOL_A_MM_STATS, /* nest - _A_MM_STAT_* */ + + /* add new constants above here */ + __ETHTOOL_A_MM_CNT, + ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) +}; + +/* LINKINFO */ + +enum { + ETHTOOL_A_LINKINFO_UNSPEC, + ETHTOOL_A_LINKINFO_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_LINKINFO_PORT, /* u8 */ + ETHTOOL_A_LINKINFO_PHYADDR, /* u8 */ + ETHTOOL_A_LINKINFO_TP_MDIX, /* u8 */ + ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, /* u8 */ + ETHTOOL_A_LINKINFO_TRANSCEIVER, /* u8 */ + + /* add new constants above here */ + __ETHTOOL_A_LINKINFO_CNT, + ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1 +}; + +/* LINKMODES */ + +enum { + ETHTOOL_A_LINKMODES_UNSPEC, + ETHTOOL_A_LINKMODES_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_LINKMODES_AUTONEG, /* u8 */ + ETHTOOL_A_LINKMODES_OURS, /* bitset */ + ETHTOOL_A_LINKMODES_PEER, /* bitset */ + ETHTOOL_A_LINKMODES_SPEED, /* u32 */ + ETHTOOL_A_LINKMODES_DUPLEX, /* u8 */ + ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, /* u8 */ + ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, /* u8 */ + ETHTOOL_A_LINKMODES_LANES, /* u32 */ + ETHTOOL_A_LINKMODES_RATE_MATCHING, /* u8 */ + + /* add new constants above here */ + __ETHTOOL_A_LINKMODES_CNT, + ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1 +}; + +/* LINKSTATE */ + +enum { + ETHTOOL_A_LINKSTATE_UNSPEC, + ETHTOOL_A_LINKSTATE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_LINKSTATE_LINK, /* u8 */ + ETHTOOL_A_LINKSTATE_SQI, /* u32 */ + ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */ + ETHTOOL_A_LINKSTATE_EXT_STATE, /* u8 */ + ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, /* u8 */ + ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_LINKSTATE_CNT, + ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1 +}; + +/* DEBUG */ + +enum { + ETHTOOL_A_DEBUG_UNSPEC, + ETHTOOL_A_DEBUG_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_DEBUG_MSGMASK, /* bitset */ + + /* add new constants above here */ + __ETHTOOL_A_DEBUG_CNT, + ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1 +}; + +/* WOL */ + +enum { + ETHTOOL_A_WOL_UNSPEC, + ETHTOOL_A_WOL_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_WOL_MODES, /* bitset */ + ETHTOOL_A_WOL_SOPASS, /* binary */ + + /* add new constants above here */ + __ETHTOOL_A_WOL_CNT, + ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1 +}; + +/* FEATURES */ + +enum { + ETHTOOL_A_FEATURES_UNSPEC, + ETHTOOL_A_FEATURES_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_FEATURES_HW, /* bitset */ + ETHTOOL_A_FEATURES_WANTED, /* bitset */ + ETHTOOL_A_FEATURES_ACTIVE, /* bitset */ + ETHTOOL_A_FEATURES_NOCHANGE, /* bitset */ + + /* add new constants above here */ + __ETHTOOL_A_FEATURES_CNT, + ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 +}; + +/* CHANNELS */ + +enum { + ETHTOOL_A_CHANNELS_UNSPEC, + ETHTOOL_A_CHANNELS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_CHANNELS_RX_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_TX_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_OTHER_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_COMBINED_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_RX_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_TX_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_OTHER_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_COMBINED_COUNT, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_CHANNELS_CNT, + ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1) +}; + +enum { + ETHTOOL_A_IRQ_MODERATION_UNSPEC, + ETHTOOL_A_IRQ_MODERATION_USEC, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_PKTS, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_COMPS, /* u32 */ + + __ETHTOOL_A_IRQ_MODERATION_CNT, + ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1) +}; + +enum { + ETHTOOL_A_PROFILE_UNSPEC, + /* nest, _A_IRQ_MODERATION_* */ + ETHTOOL_A_PROFILE_IRQ_MODERATION, + __ETHTOOL_A_PROFILE_CNT, + ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) +}; + +/* COALESCE */ + +enum { + ETHTOOL_A_COALESCE_UNSPEC, + ETHTOOL_A_COALESCE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_COALESCE_RX_USECS, /* u32 */ + ETHTOOL_A_COALESCE_RX_MAX_FRAMES, /* u32 */ + ETHTOOL_A_COALESCE_RX_USECS_IRQ, /* u32 */ + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, /* u32 */ + ETHTOOL_A_COALESCE_TX_USECS, /* u32 */ + ETHTOOL_A_COALESCE_TX_MAX_FRAMES, /* u32 */ + ETHTOOL_A_COALESCE_TX_USECS_IRQ, /* u32 */ + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, /* u32 */ + ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, /* u32 */ + ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, /* u8 */ + ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, /* u8 */ + ETHTOOL_A_COALESCE_PKT_RATE_LOW, /* u32 */ + ETHTOOL_A_COALESCE_RX_USECS_LOW, /* u32 */ + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, /* u32 */ + ETHTOOL_A_COALESCE_TX_USECS_LOW, /* u32 */ + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, /* u32 */ + ETHTOOL_A_COALESCE_PKT_RATE_HIGH, /* u32 */ + ETHTOOL_A_COALESCE_RX_USECS_HIGH, /* u32 */ + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, /* u32 */ + ETHTOOL_A_COALESCE_TX_USECS_HIGH, /* u32 */ + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, /* u32 */ + ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, /* u32 */ + ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, /* u8 */ + ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, /* u8 */ + ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, /* u32 */ + ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, /* u32 */ + ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, /* u32 */ + /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_RX_PROFILE, + /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_TX_PROFILE, + + /* add new constants above here */ + __ETHTOOL_A_COALESCE_CNT, + ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) +}; + +/* PAUSE */ + +enum { + ETHTOOL_A_PAUSE_STAT_UNSPEC, + ETHTOOL_A_PAUSE_STAT_PAD, + + ETHTOOL_A_PAUSE_STAT_TX_FRAMES, + ETHTOOL_A_PAUSE_STAT_RX_FRAMES, + + /* add new constants above here + * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats! + */ + __ETHTOOL_A_PAUSE_STAT_CNT, + ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) +}; + +enum { + ETHTOOL_A_PAUSE_UNSPEC, + ETHTOOL_A_PAUSE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PAUSE_AUTONEG, /* u8 */ + ETHTOOL_A_PAUSE_RX, /* u8 */ + ETHTOOL_A_PAUSE_TX, /* u8 */ + ETHTOOL_A_PAUSE_STATS, /* nest - _PAUSE_STAT_* */ + ETHTOOL_A_PAUSE_STATS_SRC, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_PAUSE_CNT, + ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1) +}; + +/* EEE */ + +enum { + ETHTOOL_A_EEE_UNSPEC, + ETHTOOL_A_EEE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_EEE_MODES_OURS, /* bitset */ + ETHTOOL_A_EEE_MODES_PEER, /* bitset */ + ETHTOOL_A_EEE_ACTIVE, /* u8 */ + ETHTOOL_A_EEE_ENABLED, /* u8 */ + ETHTOOL_A_EEE_TX_LPI_ENABLED, /* u8 */ + ETHTOOL_A_EEE_TX_LPI_TIMER, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_EEE_CNT, + ETHTOOL_A_EEE_MAX = (__ETHTOOL_A_EEE_CNT - 1) +}; + +/* TSINFO */ + +enum { + ETHTOOL_A_TS_STAT_UNSPEC, + + ETHTOOL_A_TS_STAT_TX_PKTS, /* uint */ + ETHTOOL_A_TS_STAT_TX_LOST, /* uint */ + ETHTOOL_A_TS_STAT_TX_ERR, /* uint */ + + /* add new constants above here */ + __ETHTOOL_A_TS_STAT_CNT, + ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) + +}; + +enum { + ETHTOOL_A_TSINFO_UNSPEC, + ETHTOOL_A_TSINFO_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_TSINFO_TIMESTAMPING, /* bitset */ + ETHTOOL_A_TSINFO_TX_TYPES, /* bitset */ + ETHTOOL_A_TSINFO_RX_FILTERS, /* bitset */ + ETHTOOL_A_TSINFO_PHC_INDEX, /* u32 */ + ETHTOOL_A_TSINFO_STATS, /* nest - _A_TSINFO_STAT */ + + /* add new constants above here */ + __ETHTOOL_A_TSINFO_CNT, + ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_RESULT_UNSPEC, + ETHTOOL_A_CABLE_RESULT_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ + ETHTOOL_A_CABLE_RESULT_CODE, /* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */ + ETHTOOL_A_CABLE_RESULT_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ + + __ETHTOOL_A_CABLE_RESULT_CNT, + ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC, + ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ + ETHTOOL_A_CABLE_FAULT_LENGTH_CM, /* u32 */ + ETHTOOL_A_CABLE_FAULT_LENGTH_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ + + __ETHTOOL_A_CABLE_FAULT_LENGTH_CNT, + ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_NEST_UNSPEC, + ETHTOOL_A_CABLE_NEST_RESULT, /* nest - ETHTOOL_A_CABLE_RESULT_ */ + ETHTOOL_A_CABLE_NEST_FAULT_LENGTH, /* nest - ETHTOOL_A_CABLE_FAULT_LENGTH_ */ + __ETHTOOL_A_CABLE_NEST_CNT, + ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1) +}; + +/* CABLE TEST */ + +enum { + ETHTOOL_A_CABLE_TEST_UNSPEC, + ETHTOOL_A_CABLE_TEST_HEADER, /* nest - _A_HEADER_* */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_CNT, + ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1 +}; + +enum { + ETHTOOL_A_CABLE_TEST_NTF_UNSPEC, + ETHTOOL_A_CABLE_TEST_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ + ETHTOOL_A_CABLE_TEST_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ + ETHTOOL_A_CABLE_TEST_NTF_NEST, /* nest - of results: */ + + __ETHTOOL_A_CABLE_TEST_NTF_CNT, + ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1) +}; + +/* CABLE TEST TDR */ + +enum { + ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC, + ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST, /* u32 */ + ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST, /* u32 */ + ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP, /* u32 */ + ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, /* u8 */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT, + ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1 +}; + +enum { + ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC, + ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ + ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ + ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, /* nest - of results: */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT, + ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 +}; + +enum { + ETHTOOL_A_CABLE_TEST_TDR_UNSPEC, + ETHTOOL_A_CABLE_TEST_TDR_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_CABLE_TEST_TDR_CFG, /* nest - *_TDR_CFG_* */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_TDR_CNT, + ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1 +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, /* be16 */ + ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT, + ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, /* u32 */ + ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, /* bitset */ + ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, /* nest - _UDP_ENTRY_* */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_TABLE_CNT, + ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_TABLE, /* nest - _UDP_TABLE_* */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_CNT, + ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_INFO_UNSPEC, + ETHTOOL_A_TUNNEL_INFO_HEADER, /* nest - _A_HEADER_* */ + + ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, /* nest - _UDP_TABLE */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_INFO_CNT, + ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1) +}; + +/* FEC */ + +enum { + ETHTOOL_A_FEC_STAT_UNSPEC, + ETHTOOL_A_FEC_STAT_PAD, + + ETHTOOL_A_FEC_STAT_CORRECTED, /* array, u64 */ + ETHTOOL_A_FEC_STAT_UNCORR, /* array, u64 */ + ETHTOOL_A_FEC_STAT_CORR_BITS, /* array, u64 */ + + /* add new constants above here */ + __ETHTOOL_A_FEC_STAT_CNT, + ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1) +}; + +enum { + ETHTOOL_A_FEC_UNSPEC, + ETHTOOL_A_FEC_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_FEC_MODES, /* bitset */ + ETHTOOL_A_FEC_AUTO, /* u8 */ + ETHTOOL_A_FEC_ACTIVE, /* u32 */ + ETHTOOL_A_FEC_STATS, /* nest - _A_FEC_STAT */ + + __ETHTOOL_A_FEC_CNT, + ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1) +}; + +/* MODULE EEPROM */ + +enum { + ETHTOOL_A_MODULE_EEPROM_UNSPEC, + ETHTOOL_A_MODULE_EEPROM_HEADER, /* nest - _A_HEADER_* */ + + ETHTOOL_A_MODULE_EEPROM_OFFSET, /* u32 */ + ETHTOOL_A_MODULE_EEPROM_LENGTH, /* u32 */ + ETHTOOL_A_MODULE_EEPROM_PAGE, /* u8 */ + ETHTOOL_A_MODULE_EEPROM_BANK, /* u8 */ + ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, /* u8 */ + ETHTOOL_A_MODULE_EEPROM_DATA, /* binary */ + + __ETHTOOL_A_MODULE_EEPROM_CNT, + ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1) +}; + + +enum { + ETHTOOL_A_STATS_GRP_UNSPEC, + ETHTOOL_A_STATS_GRP_PAD, + + ETHTOOL_A_STATS_GRP_ID, /* u32 */ + ETHTOOL_A_STATS_GRP_SS_ID, /* u32 */ + + ETHTOOL_A_STATS_GRP_STAT, /* nest */ + + ETHTOOL_A_STATS_GRP_HIST_RX, /* nest */ + ETHTOOL_A_STATS_GRP_HIST_TX, /* nest */ + + ETHTOOL_A_STATS_GRP_HIST_BKT_LOW, /* u32 */ + ETHTOOL_A_STATS_GRP_HIST_BKT_HI, /* u32 */ + ETHTOOL_A_STATS_GRP_HIST_VAL, /* u64 */ + + /* add new constants above here */ + __ETHTOOL_A_STATS_GRP_CNT, + ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1) +}; + +/* STATS */ + +enum { + ETHTOOL_A_STATS_UNSPEC, + ETHTOOL_A_STATS_PAD, + ETHTOOL_A_STATS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_STATS_GROUPS, /* bitset */ + + ETHTOOL_A_STATS_GRP, /* nest - _A_STATS_GRP_* */ + + ETHTOOL_A_STATS_SRC, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_STATS_CNT, + ETHTOOL_A_STATS_MAX = (__ETHTOOL_A_STATS_CNT - 1) +}; + +/* PHC VCLOCKS */ + +enum { + ETHTOOL_A_PHC_VCLOCKS_UNSPEC, + ETHTOOL_A_PHC_VCLOCKS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PHC_VCLOCKS_NUM, /* u32 */ + ETHTOOL_A_PHC_VCLOCKS_INDEX, /* array, s32 */ + + /* add new constants above here */ + __ETHTOOL_A_PHC_VCLOCKS_CNT, + ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1) +}; + +/* MODULE */ + +enum { + ETHTOOL_A_MODULE_UNSPEC, + ETHTOOL_A_MODULE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_MODULE_POWER_MODE_POLICY, /* u8 */ + ETHTOOL_A_MODULE_POWER_MODE, /* u8 */ + + /* add new constants above here */ + __ETHTOOL_A_MODULE_CNT, + ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1) +}; + +/* Power Sourcing Equipment */ +enum { + ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, + ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, /* u32 */ +}; + +enum { + ETHTOOL_A_PSE_UNSPEC, + ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PODL_PSE_ADMIN_STATE, /* u32 */ + ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, /* u32 */ + ETHTOOL_A_PODL_PSE_PW_D_STATUS, /* u32 */ + ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */ + ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */ + ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */ + ETHTOOL_A_C33_PSE_PW_CLASS, /* u32 */ + ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ + ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ + ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ + ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, /* nest - _C33_PSE_PW_LIMIT_* */ + + /* add new constants above here */ + __ETHTOOL_A_PSE_CNT, + ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) +}; + +enum { + ETHTOOL_A_RSS_UNSPEC, + ETHTOOL_A_RSS_HEADER, + ETHTOOL_A_RSS_CONTEXT, /* u32 */ + ETHTOOL_A_RSS_HFUNC, /* u32 */ + ETHTOOL_A_RSS_INDIR, /* binary */ + ETHTOOL_A_RSS_HKEY, /* binary */ + ETHTOOL_A_RSS_INPUT_XFRM, /* u32 */ + ETHTOOL_A_RSS_START_CONTEXT, /* u32 */ + + __ETHTOOL_A_RSS_CNT, + ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), +}; + +/* PLCA */ + +enum { + ETHTOOL_A_PLCA_UNSPEC, + ETHTOOL_A_PLCA_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PLCA_VERSION, /* u16 */ + ETHTOOL_A_PLCA_ENABLED, /* u8 */ + ETHTOOL_A_PLCA_STATUS, /* u8 */ + ETHTOOL_A_PLCA_NODE_CNT, /* u32 */ + ETHTOOL_A_PLCA_NODE_ID, /* u32 */ + ETHTOOL_A_PLCA_TO_TMR, /* u32 */ + ETHTOOL_A_PLCA_BURST_CNT, /* u32 */ + ETHTOOL_A_PLCA_BURST_TMR, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_PLCA_CNT, + ETHTOOL_A_PLCA_MAX = (__ETHTOOL_A_PLCA_CNT - 1) +}; + +/* MODULE_FW_FLASH */ + +enum { + ETHTOOL_A_MODULE_FW_FLASH_UNSPEC, + ETHTOOL_A_MODULE_FW_FLASH_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, /* string */ + ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, /* u32 */ + ETHTOOL_A_MODULE_FW_FLASH_STATUS, /* u32 */ + ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, /* string */ + ETHTOOL_A_MODULE_FW_FLASH_DONE, /* uint */ + ETHTOOL_A_MODULE_FW_FLASH_TOTAL, /* uint */ + + /* add new constants above here */ + __ETHTOOL_A_MODULE_FW_FLASH_CNT, + ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1) +}; + +enum { + ETHTOOL_A_PHY_UNSPEC, + ETHTOOL_A_PHY_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PHY_INDEX, /* u32 */ + ETHTOOL_A_PHY_DRVNAME, /* string */ + ETHTOOL_A_PHY_NAME, /* string */ + ETHTOOL_A_PHY_UPSTREAM_TYPE, /* u32 */ + ETHTOOL_A_PHY_UPSTREAM_INDEX, /* u32 */ + ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, /* string */ + ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, /* string */ + + /* add new constants above here */ + __ETHTOOL_A_PHY_CNT, + ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) +}; + +/* message types - userspace to kernel */ +enum { + ETHTOOL_MSG_USER_NONE, + ETHTOOL_MSG_STRSET_GET, + ETHTOOL_MSG_LINKINFO_GET, + ETHTOOL_MSG_LINKINFO_SET, + ETHTOOL_MSG_LINKMODES_GET, + ETHTOOL_MSG_LINKMODES_SET, + ETHTOOL_MSG_LINKSTATE_GET, + ETHTOOL_MSG_DEBUG_GET, + ETHTOOL_MSG_DEBUG_SET, + ETHTOOL_MSG_WOL_GET, + ETHTOOL_MSG_WOL_SET, + ETHTOOL_MSG_FEATURES_GET, + ETHTOOL_MSG_FEATURES_SET, + ETHTOOL_MSG_PRIVFLAGS_GET, + ETHTOOL_MSG_PRIVFLAGS_SET, + ETHTOOL_MSG_RINGS_GET, + ETHTOOL_MSG_RINGS_SET, + ETHTOOL_MSG_CHANNELS_GET, + ETHTOOL_MSG_CHANNELS_SET, + ETHTOOL_MSG_COALESCE_GET, + ETHTOOL_MSG_COALESCE_SET, + ETHTOOL_MSG_PAUSE_GET, + ETHTOOL_MSG_PAUSE_SET, + ETHTOOL_MSG_EEE_GET, + ETHTOOL_MSG_EEE_SET, + ETHTOOL_MSG_TSINFO_GET, + ETHTOOL_MSG_CABLE_TEST_ACT, + ETHTOOL_MSG_CABLE_TEST_TDR_ACT, + ETHTOOL_MSG_TUNNEL_INFO_GET, + ETHTOOL_MSG_FEC_GET, + ETHTOOL_MSG_FEC_SET, + ETHTOOL_MSG_MODULE_EEPROM_GET, + ETHTOOL_MSG_STATS_GET, + ETHTOOL_MSG_PHC_VCLOCKS_GET, + ETHTOOL_MSG_MODULE_GET, + ETHTOOL_MSG_MODULE_SET, + ETHTOOL_MSG_PSE_GET, + ETHTOOL_MSG_PSE_SET, + ETHTOOL_MSG_RSS_GET, + ETHTOOL_MSG_PLCA_GET_CFG, + ETHTOOL_MSG_PLCA_SET_CFG, + ETHTOOL_MSG_PLCA_GET_STATUS, + ETHTOOL_MSG_MM_GET, + ETHTOOL_MSG_MM_SET, + ETHTOOL_MSG_MODULE_FW_FLASH_ACT, + ETHTOOL_MSG_PHY_GET, + + /* add new constants above here */ + __ETHTOOL_MSG_USER_CNT, + ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1 +}; + +/* message types - kernel to userspace */ +enum { + ETHTOOL_MSG_KERNEL_NONE, + ETHTOOL_MSG_STRSET_GET_REPLY, + ETHTOOL_MSG_LINKINFO_GET_REPLY, + ETHTOOL_MSG_LINKINFO_NTF, + ETHTOOL_MSG_LINKMODES_GET_REPLY, + ETHTOOL_MSG_LINKMODES_NTF, + ETHTOOL_MSG_LINKSTATE_GET_REPLY, + ETHTOOL_MSG_DEBUG_GET_REPLY, + ETHTOOL_MSG_DEBUG_NTF, + ETHTOOL_MSG_WOL_GET_REPLY, + ETHTOOL_MSG_WOL_NTF, + ETHTOOL_MSG_FEATURES_GET_REPLY, + ETHTOOL_MSG_FEATURES_SET_REPLY, + ETHTOOL_MSG_FEATURES_NTF, + ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, + ETHTOOL_MSG_PRIVFLAGS_NTF, + ETHTOOL_MSG_RINGS_GET_REPLY, + ETHTOOL_MSG_RINGS_NTF, + ETHTOOL_MSG_CHANNELS_GET_REPLY, + ETHTOOL_MSG_CHANNELS_NTF, + ETHTOOL_MSG_COALESCE_GET_REPLY, + ETHTOOL_MSG_COALESCE_NTF, + ETHTOOL_MSG_PAUSE_GET_REPLY, + ETHTOOL_MSG_PAUSE_NTF, + ETHTOOL_MSG_EEE_GET_REPLY, + ETHTOOL_MSG_EEE_NTF, + ETHTOOL_MSG_TSINFO_GET_REPLY, + ETHTOOL_MSG_CABLE_TEST_NTF, + ETHTOOL_MSG_CABLE_TEST_TDR_NTF, + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, + ETHTOOL_MSG_FEC_GET_REPLY, + ETHTOOL_MSG_FEC_NTF, + ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY, + ETHTOOL_MSG_STATS_GET_REPLY, + ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, + ETHTOOL_MSG_MODULE_GET_REPLY, + ETHTOOL_MSG_MODULE_NTF, + ETHTOOL_MSG_PSE_GET_REPLY, + ETHTOOL_MSG_RSS_GET_REPLY, + ETHTOOL_MSG_PLCA_GET_CFG_REPLY, + ETHTOOL_MSG_PLCA_GET_STATUS_REPLY, + ETHTOOL_MSG_PLCA_NTF, + ETHTOOL_MSG_MM_GET_REPLY, + ETHTOOL_MSG_MM_NTF, + ETHTOOL_MSG_MODULE_FW_FLASH_NTF, + ETHTOOL_MSG_PHY_GET_REPLY, + ETHTOOL_MSG_PHY_NTF, + + /* add new constants above here */ + __ETHTOOL_MSG_KERNEL_CNT, + ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1 +}; + +#endif /* _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H */ From dd7cde36de15b071b5f9163d21d7c9142089b424 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:48 -0800 Subject: [PATCH 0150/1386] ethtool: remove the comments that are not gonna be generated Cleanup the header manually to make it easier to review the changes that ynl generator brings in. No functional changes. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-8-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- .../uapi/linux/ethtool_netlink_generated.h | 652 +++++++----------- 1 file changed, 261 insertions(+), 391 deletions(-) diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 4b4bf17d1a88..35a24d490efe 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -2,8 +2,6 @@ #ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H #define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H -/* TUNNEL INFO */ - enum { ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, ETHTOOL_UDP_TUNNEL_TYPE_GENEVE, @@ -12,8 +10,6 @@ enum { __ETHTOOL_UDP_TUNNEL_TYPE_CNT }; -/* request header */ - enum ethtool_header_flags { ETHTOOL_FLAG_COMPACT_BITSETS = 1 << 0, /* use compact bitsets in reply */ ETHTOOL_FLAG_OMIT_REPLY = 1 << 1, /* provide optional reply for SET or ACT requests */ @@ -28,303 +24,250 @@ enum { enum { ETHTOOL_A_HEADER_UNSPEC, - ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ - ETHTOOL_A_HEADER_DEV_NAME, /* string */ - ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ - ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ + ETHTOOL_A_HEADER_DEV_INDEX, + ETHTOOL_A_HEADER_DEV_NAME, + ETHTOOL_A_HEADER_FLAGS, + ETHTOOL_A_HEADER_PHY_INDEX, - /* add new constants above here */ __ETHTOOL_A_HEADER_CNT, ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1 }; -/* bit sets */ - enum { ETHTOOL_A_BITSET_BIT_UNSPEC, - ETHTOOL_A_BITSET_BIT_INDEX, /* u32 */ - ETHTOOL_A_BITSET_BIT_NAME, /* string */ - ETHTOOL_A_BITSET_BIT_VALUE, /* flag */ + ETHTOOL_A_BITSET_BIT_INDEX, + ETHTOOL_A_BITSET_BIT_NAME, + ETHTOOL_A_BITSET_BIT_VALUE, - /* add new constants above here */ __ETHTOOL_A_BITSET_BIT_CNT, ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1 }; enum { ETHTOOL_A_BITSET_BITS_UNSPEC, - ETHTOOL_A_BITSET_BITS_BIT, /* nest - _A_BITSET_BIT_* */ + ETHTOOL_A_BITSET_BITS_BIT, - /* add new constants above here */ __ETHTOOL_A_BITSET_BITS_CNT, ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1 }; enum { ETHTOOL_A_BITSET_UNSPEC, - ETHTOOL_A_BITSET_NOMASK, /* flag */ - ETHTOOL_A_BITSET_SIZE, /* u32 */ - ETHTOOL_A_BITSET_BITS, /* nest - _A_BITSET_BITS_* */ - ETHTOOL_A_BITSET_VALUE, /* binary */ - ETHTOOL_A_BITSET_MASK, /* binary */ + ETHTOOL_A_BITSET_NOMASK, + ETHTOOL_A_BITSET_SIZE, + ETHTOOL_A_BITSET_BITS, + ETHTOOL_A_BITSET_VALUE, + ETHTOOL_A_BITSET_MASK, - /* add new constants above here */ __ETHTOOL_A_BITSET_CNT, ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1 }; -/* string sets */ - enum { ETHTOOL_A_STRING_UNSPEC, - ETHTOOL_A_STRING_INDEX, /* u32 */ - ETHTOOL_A_STRING_VALUE, /* string */ + ETHTOOL_A_STRING_INDEX, + ETHTOOL_A_STRING_VALUE, - /* add new constants above here */ __ETHTOOL_A_STRING_CNT, ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1 }; enum { ETHTOOL_A_STRINGS_UNSPEC, - ETHTOOL_A_STRINGS_STRING, /* nest - _A_STRINGS_* */ + ETHTOOL_A_STRINGS_STRING, - /* add new constants above here */ __ETHTOOL_A_STRINGS_CNT, ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1 }; enum { ETHTOOL_A_STRINGSET_UNSPEC, - ETHTOOL_A_STRINGSET_ID, /* u32 */ - ETHTOOL_A_STRINGSET_COUNT, /* u32 */ - ETHTOOL_A_STRINGSET_STRINGS, /* nest - _A_STRINGS_* */ + ETHTOOL_A_STRINGSET_ID, + ETHTOOL_A_STRINGSET_COUNT, + ETHTOOL_A_STRINGSET_STRINGS, - /* add new constants above here */ __ETHTOOL_A_STRINGSET_CNT, ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1 }; enum { ETHTOOL_A_STRINGSETS_UNSPEC, - ETHTOOL_A_STRINGSETS_STRINGSET, /* nest - _A_STRINGSET_* */ + ETHTOOL_A_STRINGSETS_STRINGSET, - /* add new constants above here */ __ETHTOOL_A_STRINGSETS_CNT, ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1 }; -/* STRSET */ - enum { ETHTOOL_A_STRSET_UNSPEC, - ETHTOOL_A_STRSET_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_STRSET_STRINGSETS, /* nest - _A_STRINGSETS_* */ - ETHTOOL_A_STRSET_COUNTS_ONLY, /* flag */ + ETHTOOL_A_STRSET_HEADER, + ETHTOOL_A_STRSET_STRINGSETS, + ETHTOOL_A_STRSET_COUNTS_ONLY, - /* add new constants above here */ __ETHTOOL_A_STRSET_CNT, ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1 }; -/* PRIVFLAGS */ - enum { ETHTOOL_A_PRIVFLAGS_UNSPEC, - ETHTOOL_A_PRIVFLAGS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PRIVFLAGS_FLAGS, /* bitset */ + ETHTOOL_A_PRIVFLAGS_HEADER, + ETHTOOL_A_PRIVFLAGS_FLAGS, - /* add new constants above here */ __ETHTOOL_A_PRIVFLAGS_CNT, ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 }; -/* RINGS */ - enum { ETHTOOL_A_RINGS_UNSPEC, - ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_RINGS_RX_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX_MINI_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX_JUMBO_MAX, /* u32 */ - ETHTOOL_A_RINGS_TX_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX, /* u32 */ - ETHTOOL_A_RINGS_RX_MINI, /* u32 */ - ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ - ETHTOOL_A_RINGS_TX, /* u32 */ - ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ - ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ - ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ - ETHTOOL_A_RINGS_TX_PUSH, /* u8 */ - ETHTOOL_A_RINGS_RX_PUSH, /* u8 */ - ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, /* u32 */ - ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, /* u32 */ + ETHTOOL_A_RINGS_HEADER, + ETHTOOL_A_RINGS_RX_MAX, + ETHTOOL_A_RINGS_RX_MINI_MAX, + ETHTOOL_A_RINGS_RX_JUMBO_MAX, + ETHTOOL_A_RINGS_TX_MAX, + ETHTOOL_A_RINGS_RX, + ETHTOOL_A_RINGS_RX_MINI, + ETHTOOL_A_RINGS_RX_JUMBO, + ETHTOOL_A_RINGS_TX, + ETHTOOL_A_RINGS_RX_BUF_LEN, + ETHTOOL_A_RINGS_TCP_DATA_SPLIT, + ETHTOOL_A_RINGS_CQE_SIZE, + ETHTOOL_A_RINGS_TX_PUSH, + ETHTOOL_A_RINGS_RX_PUSH, + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, - /* add new constants above here */ __ETHTOOL_A_RINGS_CNT, ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) }; -/* MAC Merge (802.3) */ - enum { ETHTOOL_A_MM_STAT_UNSPEC, ETHTOOL_A_MM_STAT_PAD, + ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS, + ETHTOOL_A_MM_STAT_SMD_ERRORS, + ETHTOOL_A_MM_STAT_REASSEMBLY_OK, + ETHTOOL_A_MM_STAT_RX_FRAG_COUNT, + ETHTOOL_A_MM_STAT_TX_FRAG_COUNT, + ETHTOOL_A_MM_STAT_HOLD_COUNT, - /* aMACMergeFrameAssErrorCount */ - ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS, /* u64 */ - /* aMACMergeFrameSmdErrorCount */ - ETHTOOL_A_MM_STAT_SMD_ERRORS, /* u64 */ - /* aMACMergeFrameAssOkCount */ - ETHTOOL_A_MM_STAT_REASSEMBLY_OK, /* u64 */ - /* aMACMergeFragCountRx */ - ETHTOOL_A_MM_STAT_RX_FRAG_COUNT, /* u64 */ - /* aMACMergeFragCountTx */ - ETHTOOL_A_MM_STAT_TX_FRAG_COUNT, /* u64 */ - /* aMACMergeHoldCount */ - ETHTOOL_A_MM_STAT_HOLD_COUNT, /* u64 */ - - /* add new constants above here */ __ETHTOOL_A_MM_STAT_CNT, ETHTOOL_A_MM_STAT_MAX = (__ETHTOOL_A_MM_STAT_CNT - 1) }; enum { ETHTOOL_A_MM_UNSPEC, - ETHTOOL_A_MM_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MM_PMAC_ENABLED, /* u8 */ - ETHTOOL_A_MM_TX_ENABLED, /* u8 */ - ETHTOOL_A_MM_TX_ACTIVE, /* u8 */ - ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, /* u32 */ - ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, /* u32 */ - ETHTOOL_A_MM_VERIFY_ENABLED, /* u8 */ - ETHTOOL_A_MM_VERIFY_STATUS, /* u8 */ - ETHTOOL_A_MM_VERIFY_TIME, /* u32 */ - ETHTOOL_A_MM_MAX_VERIFY_TIME, /* u32 */ - ETHTOOL_A_MM_STATS, /* nest - _A_MM_STAT_* */ + ETHTOOL_A_MM_HEADER, + ETHTOOL_A_MM_PMAC_ENABLED, + ETHTOOL_A_MM_TX_ENABLED, + ETHTOOL_A_MM_TX_ACTIVE, + ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, + ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, + ETHTOOL_A_MM_VERIFY_ENABLED, + ETHTOOL_A_MM_VERIFY_STATUS, + ETHTOOL_A_MM_VERIFY_TIME, + ETHTOOL_A_MM_MAX_VERIFY_TIME, + ETHTOOL_A_MM_STATS, - /* add new constants above here */ __ETHTOOL_A_MM_CNT, ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) }; -/* LINKINFO */ - enum { ETHTOOL_A_LINKINFO_UNSPEC, - ETHTOOL_A_LINKINFO_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKINFO_PORT, /* u8 */ - ETHTOOL_A_LINKINFO_PHYADDR, /* u8 */ - ETHTOOL_A_LINKINFO_TP_MDIX, /* u8 */ - ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, /* u8 */ - ETHTOOL_A_LINKINFO_TRANSCEIVER, /* u8 */ + ETHTOOL_A_LINKINFO_HEADER, + ETHTOOL_A_LINKINFO_PORT, + ETHTOOL_A_LINKINFO_PHYADDR, + ETHTOOL_A_LINKINFO_TP_MDIX, + ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, + ETHTOOL_A_LINKINFO_TRANSCEIVER, - /* add new constants above here */ __ETHTOOL_A_LINKINFO_CNT, ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1 }; -/* LINKMODES */ - enum { ETHTOOL_A_LINKMODES_UNSPEC, - ETHTOOL_A_LINKMODES_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKMODES_AUTONEG, /* u8 */ - ETHTOOL_A_LINKMODES_OURS, /* bitset */ - ETHTOOL_A_LINKMODES_PEER, /* bitset */ - ETHTOOL_A_LINKMODES_SPEED, /* u32 */ - ETHTOOL_A_LINKMODES_DUPLEX, /* u8 */ - ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, /* u8 */ - ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, /* u8 */ - ETHTOOL_A_LINKMODES_LANES, /* u32 */ - ETHTOOL_A_LINKMODES_RATE_MATCHING, /* u8 */ + ETHTOOL_A_LINKMODES_HEADER, + ETHTOOL_A_LINKMODES_AUTONEG, + ETHTOOL_A_LINKMODES_OURS, + ETHTOOL_A_LINKMODES_PEER, + ETHTOOL_A_LINKMODES_SPEED, + ETHTOOL_A_LINKMODES_DUPLEX, + ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, + ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, + ETHTOOL_A_LINKMODES_LANES, + ETHTOOL_A_LINKMODES_RATE_MATCHING, - /* add new constants above here */ __ETHTOOL_A_LINKMODES_CNT, ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1 }; -/* LINKSTATE */ - enum { ETHTOOL_A_LINKSTATE_UNSPEC, - ETHTOOL_A_LINKSTATE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKSTATE_LINK, /* u8 */ - ETHTOOL_A_LINKSTATE_SQI, /* u32 */ - ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */ - ETHTOOL_A_LINKSTATE_EXT_STATE, /* u8 */ - ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, /* u8 */ - ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, /* u32 */ + ETHTOOL_A_LINKSTATE_HEADER, + ETHTOOL_A_LINKSTATE_LINK, + ETHTOOL_A_LINKSTATE_SQI, + ETHTOOL_A_LINKSTATE_SQI_MAX, + ETHTOOL_A_LINKSTATE_EXT_STATE, + ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, + ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, - /* add new constants above here */ __ETHTOOL_A_LINKSTATE_CNT, ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1 }; -/* DEBUG */ - enum { ETHTOOL_A_DEBUG_UNSPEC, - ETHTOOL_A_DEBUG_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_DEBUG_MSGMASK, /* bitset */ + ETHTOOL_A_DEBUG_HEADER, + ETHTOOL_A_DEBUG_MSGMASK, - /* add new constants above here */ __ETHTOOL_A_DEBUG_CNT, ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1 }; -/* WOL */ - enum { ETHTOOL_A_WOL_UNSPEC, - ETHTOOL_A_WOL_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_WOL_MODES, /* bitset */ - ETHTOOL_A_WOL_SOPASS, /* binary */ + ETHTOOL_A_WOL_HEADER, + ETHTOOL_A_WOL_MODES, + ETHTOOL_A_WOL_SOPASS, - /* add new constants above here */ __ETHTOOL_A_WOL_CNT, ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1 }; -/* FEATURES */ - enum { ETHTOOL_A_FEATURES_UNSPEC, - ETHTOOL_A_FEATURES_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_FEATURES_HW, /* bitset */ - ETHTOOL_A_FEATURES_WANTED, /* bitset */ - ETHTOOL_A_FEATURES_ACTIVE, /* bitset */ - ETHTOOL_A_FEATURES_NOCHANGE, /* bitset */ + ETHTOOL_A_FEATURES_HEADER, + ETHTOOL_A_FEATURES_HW, + ETHTOOL_A_FEATURES_WANTED, + ETHTOOL_A_FEATURES_ACTIVE, + ETHTOOL_A_FEATURES_NOCHANGE, - /* add new constants above here */ __ETHTOOL_A_FEATURES_CNT, ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 }; -/* CHANNELS */ - enum { ETHTOOL_A_CHANNELS_UNSPEC, - ETHTOOL_A_CHANNELS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_CHANNELS_RX_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_TX_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_OTHER_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_COMBINED_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_RX_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_TX_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_OTHER_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_COMBINED_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_HEADER, + ETHTOOL_A_CHANNELS_RX_MAX, + ETHTOOL_A_CHANNELS_TX_MAX, + ETHTOOL_A_CHANNELS_OTHER_MAX, + ETHTOOL_A_CHANNELS_COMBINED_MAX, + ETHTOOL_A_CHANNELS_RX_COUNT, + ETHTOOL_A_CHANNELS_TX_COUNT, + ETHTOOL_A_CHANNELS_OTHER_COUNT, + ETHTOOL_A_CHANNELS_COMBINED_COUNT, - /* add new constants above here */ __ETHTOOL_A_CHANNELS_CNT, ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1) }; enum { ETHTOOL_A_IRQ_MODERATION_UNSPEC, - ETHTOOL_A_IRQ_MODERATION_USEC, /* u32 */ - ETHTOOL_A_IRQ_MODERATION_PKTS, /* u32 */ - ETHTOOL_A_IRQ_MODERATION_COMPS, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_USEC, + ETHTOOL_A_IRQ_MODERATION_PKTS, + ETHTOOL_A_IRQ_MODERATION_COMPS, __ETHTOOL_A_IRQ_MODERATION_CNT, ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1) @@ -332,111 +275,91 @@ enum { enum { ETHTOOL_A_PROFILE_UNSPEC, - /* nest, _A_IRQ_MODERATION_* */ ETHTOOL_A_PROFILE_IRQ_MODERATION, __ETHTOOL_A_PROFILE_CNT, ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) }; -/* COALESCE */ - enum { ETHTOOL_A_COALESCE_UNSPEC, - ETHTOOL_A_COALESCE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_COALESCE_RX_USECS, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, /* u32 */ - ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, /* u8 */ - ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, /* u8 */ - ETHTOOL_A_COALESCE_PKT_RATE_LOW, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_LOW, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_LOW, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, /* u32 */ - ETHTOOL_A_COALESCE_PKT_RATE_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, /* u32 */ - ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, /* u8 */ - ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, /* u8 */ - ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, /* u32 */ - ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, /* u32 */ - /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_HEADER, + ETHTOOL_A_COALESCE_RX_USECS, + ETHTOOL_A_COALESCE_RX_MAX_FRAMES, + ETHTOOL_A_COALESCE_RX_USECS_IRQ, + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, + ETHTOOL_A_COALESCE_TX_USECS, + ETHTOOL_A_COALESCE_TX_MAX_FRAMES, + ETHTOOL_A_COALESCE_TX_USECS_IRQ, + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, + ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, + ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, + ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, + ETHTOOL_A_COALESCE_PKT_RATE_LOW, + ETHTOOL_A_COALESCE_RX_USECS_LOW, + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, + ETHTOOL_A_COALESCE_TX_USECS_LOW, + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, + ETHTOOL_A_COALESCE_PKT_RATE_HIGH, + ETHTOOL_A_COALESCE_RX_USECS_HIGH, + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, + ETHTOOL_A_COALESCE_TX_USECS_HIGH, + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, + ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, + ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, + ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, + ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, + ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, + ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, ETHTOOL_A_COALESCE_RX_PROFILE, - /* nest - _A_PROFILE_IRQ_MODERATION */ ETHTOOL_A_COALESCE_TX_PROFILE, - /* add new constants above here */ __ETHTOOL_A_COALESCE_CNT, ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) }; -/* PAUSE */ - enum { ETHTOOL_A_PAUSE_STAT_UNSPEC, ETHTOOL_A_PAUSE_STAT_PAD, - ETHTOOL_A_PAUSE_STAT_TX_FRAMES, ETHTOOL_A_PAUSE_STAT_RX_FRAMES, - /* add new constants above here - * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats! - */ __ETHTOOL_A_PAUSE_STAT_CNT, ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) }; enum { ETHTOOL_A_PAUSE_UNSPEC, - ETHTOOL_A_PAUSE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PAUSE_AUTONEG, /* u8 */ - ETHTOOL_A_PAUSE_RX, /* u8 */ - ETHTOOL_A_PAUSE_TX, /* u8 */ - ETHTOOL_A_PAUSE_STATS, /* nest - _PAUSE_STAT_* */ - ETHTOOL_A_PAUSE_STATS_SRC, /* u32 */ + ETHTOOL_A_PAUSE_HEADER, + ETHTOOL_A_PAUSE_AUTONEG, + ETHTOOL_A_PAUSE_RX, + ETHTOOL_A_PAUSE_TX, + ETHTOOL_A_PAUSE_STATS, + ETHTOOL_A_PAUSE_STATS_SRC, - /* add new constants above here */ __ETHTOOL_A_PAUSE_CNT, ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1) }; -/* EEE */ - enum { ETHTOOL_A_EEE_UNSPEC, - ETHTOOL_A_EEE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_EEE_MODES_OURS, /* bitset */ - ETHTOOL_A_EEE_MODES_PEER, /* bitset */ - ETHTOOL_A_EEE_ACTIVE, /* u8 */ - ETHTOOL_A_EEE_ENABLED, /* u8 */ - ETHTOOL_A_EEE_TX_LPI_ENABLED, /* u8 */ - ETHTOOL_A_EEE_TX_LPI_TIMER, /* u32 */ + ETHTOOL_A_EEE_HEADER, + ETHTOOL_A_EEE_MODES_OURS, + ETHTOOL_A_EEE_MODES_PEER, + ETHTOOL_A_EEE_ACTIVE, + ETHTOOL_A_EEE_ENABLED, + ETHTOOL_A_EEE_TX_LPI_ENABLED, + ETHTOOL_A_EEE_TX_LPI_TIMER, - /* add new constants above here */ __ETHTOOL_A_EEE_CNT, ETHTOOL_A_EEE_MAX = (__ETHTOOL_A_EEE_CNT - 1) }; -/* TSINFO */ - enum { ETHTOOL_A_TS_STAT_UNSPEC, + ETHTOOL_A_TS_STAT_TX_PKTS, + ETHTOOL_A_TS_STAT_TX_LOST, + ETHTOOL_A_TS_STAT_TX_ERR, - ETHTOOL_A_TS_STAT_TX_PKTS, /* uint */ - ETHTOOL_A_TS_STAT_TX_LOST, /* uint */ - ETHTOOL_A_TS_STAT_TX_ERR, /* uint */ - - /* add new constants above here */ __ETHTOOL_A_TS_STAT_CNT, ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) @@ -444,23 +367,22 @@ enum { enum { ETHTOOL_A_TSINFO_UNSPEC, - ETHTOOL_A_TSINFO_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_TSINFO_TIMESTAMPING, /* bitset */ - ETHTOOL_A_TSINFO_TX_TYPES, /* bitset */ - ETHTOOL_A_TSINFO_RX_FILTERS, /* bitset */ - ETHTOOL_A_TSINFO_PHC_INDEX, /* u32 */ - ETHTOOL_A_TSINFO_STATS, /* nest - _A_TSINFO_STAT */ + ETHTOOL_A_TSINFO_HEADER, + ETHTOOL_A_TSINFO_TIMESTAMPING, + ETHTOOL_A_TSINFO_TX_TYPES, + ETHTOOL_A_TSINFO_RX_FILTERS, + ETHTOOL_A_TSINFO_PHC_INDEX, + ETHTOOL_A_TSINFO_STATS, - /* add new constants above here */ __ETHTOOL_A_TSINFO_CNT, ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) }; enum { ETHTOOL_A_CABLE_RESULT_UNSPEC, - ETHTOOL_A_CABLE_RESULT_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ - ETHTOOL_A_CABLE_RESULT_CODE, /* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */ - ETHTOOL_A_CABLE_RESULT_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ + ETHTOOL_A_CABLE_RESULT_PAIR, + ETHTOOL_A_CABLE_RESULT_CODE, + ETHTOOL_A_CABLE_RESULT_SRC, __ETHTOOL_A_CABLE_RESULT_CNT, ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1) @@ -468,9 +390,9 @@ enum { enum { ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC, - ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ - ETHTOOL_A_CABLE_FAULT_LENGTH_CM, /* u32 */ - ETHTOOL_A_CABLE_FAULT_LENGTH_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ + ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, + ETHTOOL_A_CABLE_FAULT_LENGTH_CM, + ETHTOOL_A_CABLE_FAULT_LENGTH_SRC, __ETHTOOL_A_CABLE_FAULT_LENGTH_CNT, ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1) @@ -478,245 +400,204 @@ enum { enum { ETHTOOL_A_CABLE_NEST_UNSPEC, - ETHTOOL_A_CABLE_NEST_RESULT, /* nest - ETHTOOL_A_CABLE_RESULT_ */ - ETHTOOL_A_CABLE_NEST_FAULT_LENGTH, /* nest - ETHTOOL_A_CABLE_FAULT_LENGTH_ */ + ETHTOOL_A_CABLE_NEST_RESULT, + ETHTOOL_A_CABLE_NEST_FAULT_LENGTH, + __ETHTOOL_A_CABLE_NEST_CNT, ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1) }; -/* CABLE TEST */ - enum { ETHTOOL_A_CABLE_TEST_UNSPEC, - ETHTOOL_A_CABLE_TEST_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_CABLE_TEST_HEADER, - /* add new constants above here */ __ETHTOOL_A_CABLE_TEST_CNT, ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1 }; enum { ETHTOOL_A_CABLE_TEST_NTF_UNSPEC, - ETHTOOL_A_CABLE_TEST_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ - ETHTOOL_A_CABLE_TEST_NTF_NEST, /* nest - of results: */ + ETHTOOL_A_CABLE_TEST_NTF_HEADER, + ETHTOOL_A_CABLE_TEST_NTF_STATUS, + ETHTOOL_A_CABLE_TEST_NTF_NEST, __ETHTOOL_A_CABLE_TEST_NTF_CNT, ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1) }; -/* CABLE TEST TDR */ - enum { ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, /* u8 */ + ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST, + ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST, + ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP, + ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, - /* add new constants above here */ __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT, ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1 }; enum { ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ - ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, /* nest - of results: */ + ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER, + ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS, + ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, - /* add new constants above here */ __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT, ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 }; enum { ETHTOOL_A_CABLE_TEST_TDR_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_TDR_CFG, /* nest - *_TDR_CFG_* */ + ETHTOOL_A_CABLE_TEST_TDR_HEADER, + ETHTOOL_A_CABLE_TEST_TDR_CFG, - /* add new constants above here */ __ETHTOOL_A_CABLE_TEST_TDR_CNT, ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1 }; enum { ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC, + ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, + ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, - ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, /* be16 */ - ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, /* u32 */ - - /* add new constants above here */ __ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT, ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1) }; enum { ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC, + ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, + ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, + ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, - ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, /* u32 */ - ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, /* bitset */ - ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, /* nest - _UDP_ENTRY_* */ - - /* add new constants above here */ __ETHTOOL_A_TUNNEL_UDP_TABLE_CNT, ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1) }; enum { ETHTOOL_A_TUNNEL_UDP_UNSPEC, + ETHTOOL_A_TUNNEL_UDP_TABLE, - ETHTOOL_A_TUNNEL_UDP_TABLE, /* nest - _UDP_TABLE_* */ - - /* add new constants above here */ __ETHTOOL_A_TUNNEL_UDP_CNT, ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1) }; enum { ETHTOOL_A_TUNNEL_INFO_UNSPEC, - ETHTOOL_A_TUNNEL_INFO_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_TUNNEL_INFO_HEADER, + ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, - ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, /* nest - _UDP_TABLE */ - - /* add new constants above here */ __ETHTOOL_A_TUNNEL_INFO_CNT, ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1) }; -/* FEC */ - enum { ETHTOOL_A_FEC_STAT_UNSPEC, ETHTOOL_A_FEC_STAT_PAD, + ETHTOOL_A_FEC_STAT_CORRECTED, + ETHTOOL_A_FEC_STAT_UNCORR, + ETHTOOL_A_FEC_STAT_CORR_BITS, - ETHTOOL_A_FEC_STAT_CORRECTED, /* array, u64 */ - ETHTOOL_A_FEC_STAT_UNCORR, /* array, u64 */ - ETHTOOL_A_FEC_STAT_CORR_BITS, /* array, u64 */ - - /* add new constants above here */ __ETHTOOL_A_FEC_STAT_CNT, ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1) }; enum { ETHTOOL_A_FEC_UNSPEC, - ETHTOOL_A_FEC_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_FEC_MODES, /* bitset */ - ETHTOOL_A_FEC_AUTO, /* u8 */ - ETHTOOL_A_FEC_ACTIVE, /* u32 */ - ETHTOOL_A_FEC_STATS, /* nest - _A_FEC_STAT */ + ETHTOOL_A_FEC_HEADER, + ETHTOOL_A_FEC_MODES, + ETHTOOL_A_FEC_AUTO, + ETHTOOL_A_FEC_ACTIVE, + ETHTOOL_A_FEC_STATS, __ETHTOOL_A_FEC_CNT, ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1) }; -/* MODULE EEPROM */ - enum { ETHTOOL_A_MODULE_EEPROM_UNSPEC, - ETHTOOL_A_MODULE_EEPROM_HEADER, /* nest - _A_HEADER_* */ - - ETHTOOL_A_MODULE_EEPROM_OFFSET, /* u32 */ - ETHTOOL_A_MODULE_EEPROM_LENGTH, /* u32 */ - ETHTOOL_A_MODULE_EEPROM_PAGE, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_BANK, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_DATA, /* binary */ + ETHTOOL_A_MODULE_EEPROM_HEADER, + ETHTOOL_A_MODULE_EEPROM_OFFSET, + ETHTOOL_A_MODULE_EEPROM_LENGTH, + ETHTOOL_A_MODULE_EEPROM_PAGE, + ETHTOOL_A_MODULE_EEPROM_BANK, + ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, + ETHTOOL_A_MODULE_EEPROM_DATA, __ETHTOOL_A_MODULE_EEPROM_CNT, ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1) }; - enum { ETHTOOL_A_STATS_GRP_UNSPEC, ETHTOOL_A_STATS_GRP_PAD, + ETHTOOL_A_STATS_GRP_ID, + ETHTOOL_A_STATS_GRP_SS_ID, + ETHTOOL_A_STATS_GRP_STAT, + ETHTOOL_A_STATS_GRP_HIST_RX, + ETHTOOL_A_STATS_GRP_HIST_TX, + ETHTOOL_A_STATS_GRP_HIST_BKT_LOW, + ETHTOOL_A_STATS_GRP_HIST_BKT_HI, + ETHTOOL_A_STATS_GRP_HIST_VAL, - ETHTOOL_A_STATS_GRP_ID, /* u32 */ - ETHTOOL_A_STATS_GRP_SS_ID, /* u32 */ - - ETHTOOL_A_STATS_GRP_STAT, /* nest */ - - ETHTOOL_A_STATS_GRP_HIST_RX, /* nest */ - ETHTOOL_A_STATS_GRP_HIST_TX, /* nest */ - - ETHTOOL_A_STATS_GRP_HIST_BKT_LOW, /* u32 */ - ETHTOOL_A_STATS_GRP_HIST_BKT_HI, /* u32 */ - ETHTOOL_A_STATS_GRP_HIST_VAL, /* u64 */ - - /* add new constants above here */ __ETHTOOL_A_STATS_GRP_CNT, ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1) }; -/* STATS */ - enum { ETHTOOL_A_STATS_UNSPEC, ETHTOOL_A_STATS_PAD, - ETHTOOL_A_STATS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_STATS_GROUPS, /* bitset */ + ETHTOOL_A_STATS_HEADER, + ETHTOOL_A_STATS_GROUPS, + ETHTOOL_A_STATS_GRP, + ETHTOOL_A_STATS_SRC, - ETHTOOL_A_STATS_GRP, /* nest - _A_STATS_GRP_* */ - - ETHTOOL_A_STATS_SRC, /* u32 */ - - /* add new constants above here */ __ETHTOOL_A_STATS_CNT, ETHTOOL_A_STATS_MAX = (__ETHTOOL_A_STATS_CNT - 1) }; -/* PHC VCLOCKS */ - enum { ETHTOOL_A_PHC_VCLOCKS_UNSPEC, - ETHTOOL_A_PHC_VCLOCKS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PHC_VCLOCKS_NUM, /* u32 */ - ETHTOOL_A_PHC_VCLOCKS_INDEX, /* array, s32 */ + ETHTOOL_A_PHC_VCLOCKS_HEADER, + ETHTOOL_A_PHC_VCLOCKS_NUM, + ETHTOOL_A_PHC_VCLOCKS_INDEX, - /* add new constants above here */ __ETHTOOL_A_PHC_VCLOCKS_CNT, ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1) }; -/* MODULE */ - enum { ETHTOOL_A_MODULE_UNSPEC, - ETHTOOL_A_MODULE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MODULE_POWER_MODE_POLICY, /* u8 */ - ETHTOOL_A_MODULE_POWER_MODE, /* u8 */ + ETHTOOL_A_MODULE_HEADER, + ETHTOOL_A_MODULE_POWER_MODE_POLICY, + ETHTOOL_A_MODULE_POWER_MODE, - /* add new constants above here */ __ETHTOOL_A_MODULE_CNT, ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1) }; -/* Power Sourcing Equipment */ enum { ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, - ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, /* u32 */ - ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, + ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, }; enum { ETHTOOL_A_PSE_UNSPEC, - ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PODL_PSE_ADMIN_STATE, /* u32 */ - ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, /* u32 */ - ETHTOOL_A_PODL_PSE_PW_D_STATUS, /* u32 */ - ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */ - ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */ - ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */ - ETHTOOL_A_C33_PSE_PW_CLASS, /* u32 */ - ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ - ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ - ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ - ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, /* u32 */ - ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, /* nest - _C33_PSE_PW_LIMIT_* */ + ETHTOOL_A_PSE_HEADER, + ETHTOOL_A_PODL_PSE_ADMIN_STATE, + ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, + ETHTOOL_A_PODL_PSE_PW_D_STATUS, + ETHTOOL_A_C33_PSE_ADMIN_STATE, + ETHTOOL_A_C33_PSE_ADMIN_CONTROL, + ETHTOOL_A_C33_PSE_PW_D_STATUS, + ETHTOOL_A_C33_PSE_PW_CLASS, + ETHTOOL_A_C33_PSE_ACTUAL_PW, + ETHTOOL_A_C33_PSE_EXT_STATE, + ETHTOOL_A_C33_PSE_EXT_SUBSTATE, + ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, + ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, - /* add new constants above here */ __ETHTOOL_A_PSE_CNT, ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) }; @@ -724,70 +605,62 @@ enum { enum { ETHTOOL_A_RSS_UNSPEC, ETHTOOL_A_RSS_HEADER, - ETHTOOL_A_RSS_CONTEXT, /* u32 */ - ETHTOOL_A_RSS_HFUNC, /* u32 */ - ETHTOOL_A_RSS_INDIR, /* binary */ - ETHTOOL_A_RSS_HKEY, /* binary */ - ETHTOOL_A_RSS_INPUT_XFRM, /* u32 */ - ETHTOOL_A_RSS_START_CONTEXT, /* u32 */ + ETHTOOL_A_RSS_CONTEXT, + ETHTOOL_A_RSS_HFUNC, + ETHTOOL_A_RSS_INDIR, + ETHTOOL_A_RSS_HKEY, + ETHTOOL_A_RSS_INPUT_XFRM, + ETHTOOL_A_RSS_START_CONTEXT, __ETHTOOL_A_RSS_CNT, ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), }; -/* PLCA */ - enum { ETHTOOL_A_PLCA_UNSPEC, - ETHTOOL_A_PLCA_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PLCA_VERSION, /* u16 */ - ETHTOOL_A_PLCA_ENABLED, /* u8 */ - ETHTOOL_A_PLCA_STATUS, /* u8 */ - ETHTOOL_A_PLCA_NODE_CNT, /* u32 */ - ETHTOOL_A_PLCA_NODE_ID, /* u32 */ - ETHTOOL_A_PLCA_TO_TMR, /* u32 */ - ETHTOOL_A_PLCA_BURST_CNT, /* u32 */ - ETHTOOL_A_PLCA_BURST_TMR, /* u32 */ + ETHTOOL_A_PLCA_HEADER, + ETHTOOL_A_PLCA_VERSION, + ETHTOOL_A_PLCA_ENABLED, + ETHTOOL_A_PLCA_STATUS, + ETHTOOL_A_PLCA_NODE_CNT, + ETHTOOL_A_PLCA_NODE_ID, + ETHTOOL_A_PLCA_TO_TMR, + ETHTOOL_A_PLCA_BURST_CNT, + ETHTOOL_A_PLCA_BURST_TMR, - /* add new constants above here */ __ETHTOOL_A_PLCA_CNT, ETHTOOL_A_PLCA_MAX = (__ETHTOOL_A_PLCA_CNT - 1) }; -/* MODULE_FW_FLASH */ - enum { ETHTOOL_A_MODULE_FW_FLASH_UNSPEC, - ETHTOOL_A_MODULE_FW_FLASH_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, /* string */ - ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, /* u32 */ - ETHTOOL_A_MODULE_FW_FLASH_STATUS, /* u32 */ - ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, /* string */ - ETHTOOL_A_MODULE_FW_FLASH_DONE, /* uint */ - ETHTOOL_A_MODULE_FW_FLASH_TOTAL, /* uint */ + ETHTOOL_A_MODULE_FW_FLASH_HEADER, + ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, + ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, + ETHTOOL_A_MODULE_FW_FLASH_STATUS, + ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, + ETHTOOL_A_MODULE_FW_FLASH_DONE, + ETHTOOL_A_MODULE_FW_FLASH_TOTAL, - /* add new constants above here */ __ETHTOOL_A_MODULE_FW_FLASH_CNT, ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1) }; enum { ETHTOOL_A_PHY_UNSPEC, - ETHTOOL_A_PHY_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PHY_INDEX, /* u32 */ - ETHTOOL_A_PHY_DRVNAME, /* string */ - ETHTOOL_A_PHY_NAME, /* string */ - ETHTOOL_A_PHY_UPSTREAM_TYPE, /* u32 */ - ETHTOOL_A_PHY_UPSTREAM_INDEX, /* u32 */ - ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, /* string */ - ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, /* string */ + ETHTOOL_A_PHY_HEADER, + ETHTOOL_A_PHY_INDEX, + ETHTOOL_A_PHY_DRVNAME, + ETHTOOL_A_PHY_NAME, + ETHTOOL_A_PHY_UPSTREAM_TYPE, + ETHTOOL_A_PHY_UPSTREAM_INDEX, + ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, + ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, - /* add new constants above here */ __ETHTOOL_A_PHY_CNT, ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) }; -/* message types - userspace to kernel */ enum { ETHTOOL_MSG_USER_NONE, ETHTOOL_MSG_STRSET_GET, @@ -836,12 +709,10 @@ enum { ETHTOOL_MSG_MODULE_FW_FLASH_ACT, ETHTOOL_MSG_PHY_GET, - /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1 }; -/* message types - kernel to userspace */ enum { ETHTOOL_MSG_KERNEL_NONE, ETHTOOL_MSG_STRSET_GET_REPLY, @@ -891,7 +762,6 @@ enum { ETHTOOL_MSG_PHY_GET_REPLY, ETHTOOL_MSG_PHY_NTF, - /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1 }; From 8d0580c6ebdd27879c83483f53bc71e2e470f6fe Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:49 -0800 Subject: [PATCH 0151/1386] ethtool: regenerate uapi header from the spec No functional changes. Mostly the following formatting: - extra docs - extra enums - XXX_MAX = __XXX_CNT - 1 -> XXX_MAX = (__XXX_CNT - 1) - newlines Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-9-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- .../uapi/linux/ethtool_netlink_generated.h | 89 ++++++++++++------- 1 file changed, 56 insertions(+), 33 deletions(-) diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 35a24d490efe..b58f352fe4f2 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -1,23 +1,43 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/ethtool.yaml */ +/* YNL-GEN uapi header */ + #ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H #define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H +#define ETHTOOL_FAMILY_NAME "ethtool" +#define ETHTOOL_FAMILY_VERSION 1 + enum { ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, ETHTOOL_UDP_TUNNEL_TYPE_GENEVE, ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE, - __ETHTOOL_UDP_TUNNEL_TYPE_CNT + /* private: */ + __ETHTOOL_UDP_TUNNEL_TYPE_CNT, + ETHTOOL_UDP_TUNNEL_TYPE_MAX = (__ETHTOOL_UDP_TUNNEL_TYPE_CNT - 1) }; +/** + * enum ethtool_header_flags - common ethtool header flags + * @ETHTOOL_FLAG_COMPACT_BITSETS: use compact bitsets in reply + * @ETHTOOL_FLAG_OMIT_REPLY: provide optional reply for SET or ACT requests + * @ETHTOOL_FLAG_STATS: request statistics, if supported by the driver + */ enum ethtool_header_flags { - ETHTOOL_FLAG_COMPACT_BITSETS = 1 << 0, /* use compact bitsets in reply */ - ETHTOOL_FLAG_OMIT_REPLY = 1 << 1, /* provide optional reply for SET or ACT requests */ - ETHTOOL_FLAG_STATS = 1 << 2, /* request statistics, if supported by the driver */ + ETHTOOL_FLAG_COMPACT_BITSETS = 1, + ETHTOOL_FLAG_OMIT_REPLY = 2, + ETHTOOL_FLAG_STATS = 4, }; enum { - ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0, + ETHTOOL_PHY_UPSTREAM_TYPE_MAC, + ETHTOOL_PHY_UPSTREAM_TYPE_PHY, +}; + +enum ethtool_tcp_data_split { + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN, ETHTOOL_TCP_DATA_SPLIT_DISABLED, ETHTOOL_TCP_DATA_SPLIT_ENABLED, }; @@ -30,7 +50,7 @@ enum { ETHTOOL_A_HEADER_PHY_INDEX, __ETHTOOL_A_HEADER_CNT, - ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1 + ETHTOOL_A_HEADER_MAX = (__ETHTOOL_A_HEADER_CNT - 1) }; enum { @@ -40,7 +60,7 @@ enum { ETHTOOL_A_BITSET_BIT_VALUE, __ETHTOOL_A_BITSET_BIT_CNT, - ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1 + ETHTOOL_A_BITSET_BIT_MAX = (__ETHTOOL_A_BITSET_BIT_CNT - 1) }; enum { @@ -48,7 +68,7 @@ enum { ETHTOOL_A_BITSET_BITS_BIT, __ETHTOOL_A_BITSET_BITS_CNT, - ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1 + ETHTOOL_A_BITSET_BITS_MAX = (__ETHTOOL_A_BITSET_BITS_CNT - 1) }; enum { @@ -60,7 +80,7 @@ enum { ETHTOOL_A_BITSET_MASK, __ETHTOOL_A_BITSET_CNT, - ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1 + ETHTOOL_A_BITSET_MAX = (__ETHTOOL_A_BITSET_CNT - 1) }; enum { @@ -69,7 +89,7 @@ enum { ETHTOOL_A_STRING_VALUE, __ETHTOOL_A_STRING_CNT, - ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1 + ETHTOOL_A_STRING_MAX = (__ETHTOOL_A_STRING_CNT - 1) }; enum { @@ -77,7 +97,7 @@ enum { ETHTOOL_A_STRINGS_STRING, __ETHTOOL_A_STRINGS_CNT, - ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1 + ETHTOOL_A_STRINGS_MAX = (__ETHTOOL_A_STRINGS_CNT - 1) }; enum { @@ -87,7 +107,7 @@ enum { ETHTOOL_A_STRINGSET_STRINGS, __ETHTOOL_A_STRINGSET_CNT, - ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1 + ETHTOOL_A_STRINGSET_MAX = (__ETHTOOL_A_STRINGSET_CNT - 1) }; enum { @@ -95,7 +115,7 @@ enum { ETHTOOL_A_STRINGSETS_STRINGSET, __ETHTOOL_A_STRINGSETS_CNT, - ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1 + ETHTOOL_A_STRINGSETS_MAX = (__ETHTOOL_A_STRINGSETS_CNT - 1) }; enum { @@ -105,7 +125,7 @@ enum { ETHTOOL_A_STRSET_COUNTS_ONLY, __ETHTOOL_A_STRSET_CNT, - ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1 + ETHTOOL_A_STRSET_MAX = (__ETHTOOL_A_STRSET_CNT - 1) }; enum { @@ -114,7 +134,7 @@ enum { ETHTOOL_A_PRIVFLAGS_FLAGS, __ETHTOOL_A_PRIVFLAGS_CNT, - ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 + ETHTOOL_A_PRIVFLAGS_MAX = (__ETHTOOL_A_PRIVFLAGS_CNT - 1) }; enum { @@ -182,7 +202,7 @@ enum { ETHTOOL_A_LINKINFO_TRANSCEIVER, __ETHTOOL_A_LINKINFO_CNT, - ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1 + ETHTOOL_A_LINKINFO_MAX = (__ETHTOOL_A_LINKINFO_CNT - 1) }; enum { @@ -199,7 +219,7 @@ enum { ETHTOOL_A_LINKMODES_RATE_MATCHING, __ETHTOOL_A_LINKMODES_CNT, - ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1 + ETHTOOL_A_LINKMODES_MAX = (__ETHTOOL_A_LINKMODES_CNT - 1) }; enum { @@ -213,7 +233,7 @@ enum { ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, __ETHTOOL_A_LINKSTATE_CNT, - ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1 + ETHTOOL_A_LINKSTATE_MAX = (__ETHTOOL_A_LINKSTATE_CNT - 1) }; enum { @@ -222,7 +242,7 @@ enum { ETHTOOL_A_DEBUG_MSGMASK, __ETHTOOL_A_DEBUG_CNT, - ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1 + ETHTOOL_A_DEBUG_MAX = (__ETHTOOL_A_DEBUG_CNT - 1) }; enum { @@ -232,7 +252,7 @@ enum { ETHTOOL_A_WOL_SOPASS, __ETHTOOL_A_WOL_CNT, - ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1 + ETHTOOL_A_WOL_MAX = (__ETHTOOL_A_WOL_CNT - 1) }; enum { @@ -244,7 +264,7 @@ enum { ETHTOOL_A_FEATURES_NOCHANGE, __ETHTOOL_A_FEATURES_CNT, - ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 + ETHTOOL_A_FEATURES_MAX = (__ETHTOOL_A_FEATURES_CNT - 1) }; enum { @@ -276,6 +296,7 @@ enum { enum { ETHTOOL_A_PROFILE_UNSPEC, ETHTOOL_A_PROFILE_IRQ_MODERATION, + __ETHTOOL_A_PROFILE_CNT, ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) }; @@ -362,7 +383,6 @@ enum { __ETHTOOL_A_TS_STAT_CNT, ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) - }; enum { @@ -412,7 +432,7 @@ enum { ETHTOOL_A_CABLE_TEST_HEADER, __ETHTOOL_A_CABLE_TEST_CNT, - ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1 + ETHTOOL_A_CABLE_TEST_MAX = (__ETHTOOL_A_CABLE_TEST_CNT - 1) }; enum { @@ -433,7 +453,7 @@ enum { ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT, - ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1 + ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = (__ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1) }; enum { @@ -443,7 +463,7 @@ enum { ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT, - ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 + ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1) }; enum { @@ -452,7 +472,7 @@ enum { ETHTOOL_A_CABLE_TEST_TDR_CFG, __ETHTOOL_A_CABLE_TEST_TDR_CNT, - ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1 + ETHTOOL_A_CABLE_TEST_TDR_MAX = (__ETHTOOL_A_CABLE_TEST_TDR_CNT - 1) }; enum { @@ -580,6 +600,9 @@ enum { ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, + + __ETHTOOL_A_C33_PSE_PW_LIMIT_CNT, + __ETHTOOL_A_C33_PSE_PW_LIMIT_MAX = (__ETHTOOL_A_C33_PSE_PW_LIMIT_CNT - 1) }; enum { @@ -613,7 +636,7 @@ enum { ETHTOOL_A_RSS_START_CONTEXT, __ETHTOOL_A_RSS_CNT, - ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), + ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1) }; enum { @@ -662,8 +685,8 @@ enum { }; enum { - ETHTOOL_MSG_USER_NONE, - ETHTOOL_MSG_STRSET_GET, + ETHTOOL_MSG_USER_NONE = 0, + ETHTOOL_MSG_STRSET_GET = 1, ETHTOOL_MSG_LINKINFO_GET, ETHTOOL_MSG_LINKINFO_SET, ETHTOOL_MSG_LINKMODES_GET, @@ -710,12 +733,12 @@ enum { ETHTOOL_MSG_PHY_GET, __ETHTOOL_MSG_USER_CNT, - ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1 + ETHTOOL_MSG_USER_MAX = (__ETHTOOL_MSG_USER_CNT - 1) }; enum { - ETHTOOL_MSG_KERNEL_NONE, - ETHTOOL_MSG_STRSET_GET_REPLY, + ETHTOOL_MSG_KERNEL_NONE = 0, + ETHTOOL_MSG_STRSET_GET_REPLY = 1, ETHTOOL_MSG_LINKINFO_GET_REPLY, ETHTOOL_MSG_LINKINFO_NTF, ETHTOOL_MSG_LINKMODES_GET_REPLY, @@ -763,7 +786,7 @@ enum { ETHTOOL_MSG_PHY_NTF, __ETHTOOL_MSG_KERNEL_CNT, - ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1 + ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1) }; #endif /* _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H */ From 77a214317a6adcbfaf0781a2d950247106c6bc1f Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 16 Oct 2024 09:16:07 +0200 Subject: [PATCH 0152/1386] batman-adv: Start new development cycle This version will contain all the (major or even only minor) changes for Linux 6.14. The version number isn't a semantic version number with major and minor information. It is just encoding the year of the expected publishing as Linux -rc1 and the number of published versions this year (starting at 0). Signed-off-by: Simon Wunderlich --- net/batman-adv/main.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 97ea71a052f8..1fbe3a4dd965 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2024.3" +#define BATADV_SOURCE_VERSION "2025.0" #endif /* B.A.T.M.A.N. parameters */ From a7d5100ed0099a9f21b796017908cc7ece5332e4 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Wed, 16 Oct 2024 09:23:06 +0200 Subject: [PATCH 0153/1386] batman-adv: Reorder includes for distributed-arp-table.c The commit 5f60d5f6bbc1 ("move asm/unaligned.h to linux/unaligned.h") changed the include without adjusting the order (to match the rest of the file). Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/distributed-arp-table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 801eff8a40e5..48b72c2be098 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -7,7 +7,6 @@ #include "distributed-arp-table.h" #include "main.h" -#include #include #include #include @@ -32,6 +31,7 @@ #include #include #include +#include #include #include #include From 8587e0e3f562b09bbfe0df1b0c506066095043e4 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Fri, 22 Nov 2024 16:52:51 +0100 Subject: [PATCH 0154/1386] batman-adv: Remove atomic usage for tt.local_changes The tt.local_changes atomic is either written with tt.changes_list_lock or close to it (see batadv_tt_local_event()). Thus the performance gain using an atomic was limited (or because of atomic_read() impact even negative). Using atomic also comes with the need to be wary of potential negative tt.local_changes value. Simplify the tt.local_changes usage by removing the atomic property and modifying it only with tt.changes_list_lock held. Signed-off-by: Remi Pommarel Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/soft-interface.c | 2 +- net/batman-adv/translation-table.c | 24 +++++++++++------------- net/batman-adv/types.h | 4 ++-- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 2758aba47a2f..5666c268cead 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -783,13 +783,13 @@ static int batadv_softif_init_late(struct net_device *dev) atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); atomic_set(&bat_priv->bcast_seqno, 1); atomic_set(&bat_priv->tt.vn, 0); - atomic_set(&bat_priv->tt.local_changes, 0); atomic_set(&bat_priv->tt.ogm_append_cnt, 0); #ifdef CONFIG_BATMAN_ADV_BLA atomic_set(&bat_priv->bla.num_requests, 0); #endif atomic_set(&bat_priv->tp_num, 0); + WRITE_ONCE(bat_priv->tt.local_changes, 0); bat_priv->tt.last_changeset = NULL; bat_priv->tt.last_changeset_len = 0; bat_priv->isolation_mark = 0; diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index b44c382226a1..6e0345b91ece 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -423,8 +423,8 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, struct batadv_tt_change_node *tt_change_node, *entry, *safe; struct batadv_tt_common_entry *common = &tt_local_entry->common; u8 flags = common->flags | event_flags; - bool event_removed = false; bool del_op_requested, del_op_entry; + size_t changes; tt_change_node = kmem_cache_alloc(batadv_tt_change_cache, GFP_ATOMIC); if (!tt_change_node) @@ -440,6 +440,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, /* check for ADD+DEL or DEL+ADD events */ spin_lock_bh(&bat_priv->tt.changes_list_lock); + changes = READ_ONCE(bat_priv->tt.local_changes); list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { if (!batadv_compare_eth(entry->change.addr, common->addr)) @@ -468,21 +469,18 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, del: list_del(&entry->list); kmem_cache_free(batadv_tt_change_cache, entry); + changes--; kmem_cache_free(batadv_tt_change_cache, tt_change_node); - event_removed = true; - goto unlock; + goto update_changes; } /* track the change in the OGMinterval list */ list_add_tail(&tt_change_node->list, &bat_priv->tt.changes_list); + changes++; -unlock: +update_changes: + WRITE_ONCE(bat_priv->tt.local_changes, changes); spin_unlock_bh(&bat_priv->tt.changes_list_lock); - - if (event_removed) - atomic_dec(&bat_priv->tt.local_changes); - else - atomic_inc(&bat_priv->tt.local_changes); } /** @@ -950,7 +948,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) int tt_diff_entries_count = 0; u16 tvlv_len; - tt_diff_entries_num = atomic_read(&bat_priv->tt.local_changes); + tt_diff_entries_num = READ_ONCE(bat_priv->tt.local_changes); tt_diff_len = batadv_tt_len(tt_diff_entries_num); /* if we have too many changes for one packet don't send any @@ -970,7 +968,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) goto container_register; spin_lock_bh(&bat_priv->tt.changes_list_lock); - atomic_set(&bat_priv->tt.local_changes, 0); + WRITE_ONCE(bat_priv->tt.local_changes, 0); list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { @@ -1380,7 +1378,7 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv) kmem_cache_free(batadv_tt_change_cache, entry); } - atomic_set(&bat_priv->tt.local_changes, 0); + WRITE_ONCE(bat_priv->tt.local_changes, 0); spin_unlock_bh(&bat_priv->tt.changes_list_lock); } @@ -3634,7 +3632,7 @@ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv) { lockdep_assert_held(&bat_priv->tt.commit_lock); - if (atomic_read(&bat_priv->tt.local_changes) < 1) { + if (READ_ONCE(bat_priv->tt.local_changes) == 0) { if (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt)) batadv_tt_tvlv_container_update(bat_priv); return; diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 04f6398b3a40..f491bff8c51b 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1022,7 +1022,7 @@ struct batadv_priv_tt { atomic_t ogm_append_cnt; /** @local_changes: changes registered in an originator interval */ - atomic_t local_changes; + size_t local_changes; /** * @changes_list: tracks tt local changes within an originator interval @@ -1044,7 +1044,7 @@ struct batadv_priv_tt { */ struct list_head roam_list; - /** @changes_list_lock: lock protecting changes_list */ + /** @changes_list_lock: lock protecting changes_list & local_changes */ spinlock_t changes_list_lock; /** @req_list_lock: lock protecting req_list */ From fca81aa3e653d68ac696f3ee994384cecacf3845 Mon Sep 17 00:00:00 2001 From: Remi Pommarel Date: Fri, 22 Nov 2024 16:52:52 +0100 Subject: [PATCH 0155/1386] batman-adv: Don't keep redundant TT change events When adding a local TT twice within the same OGM interval (e.g. happens when flag get updated), the flags of the first TT change entry is updated with the second one and both change events is added to the change list. This leads to having the same ADD change entry twice. Similarly, a DEL+DEL scenario is also creating twice the same event. Deduplicate ADD+ADD or DEL+DEL scenarios to reduce the TT change events that need to be sent in both OGM and TT response. Signed-off-by: Remi Pommarel Co-developed-by: Sven Eckelmann Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/translation-table.c | 40 ++++++++++++++---------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 6e0345b91ece..76d5517bb507 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -438,7 +438,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, del_op_requested = flags & BATADV_TT_CLIENT_DEL; - /* check for ADD+DEL or DEL+ADD events */ + /* check for ADD+DEL, DEL+ADD, ADD+ADD or DEL+DEL events */ spin_lock_bh(&bat_priv->tt.changes_list_lock); changes = READ_ONCE(bat_priv->tt.local_changes); list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, @@ -446,30 +446,26 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, if (!batadv_compare_eth(entry->change.addr, common->addr)) continue; - /* DEL+ADD in the same orig interval have no effect and can be - * removed to avoid silly behaviour on the receiver side. The - * other way around (ADD+DEL) can happen in case of roaming of - * a client still in the NEW state. Roaming of NEW clients is - * now possible due to automatically recognition of "temporary" - * clients - */ del_op_entry = entry->change.flags & BATADV_TT_CLIENT_DEL; - if (!del_op_requested && del_op_entry) - goto del; - if (del_op_requested && !del_op_entry) - goto del; - - /* this is a second add in the same originator interval. It - * means that flags have been changed: update them! - */ - if (!del_op_requested && !del_op_entry) + if (del_op_requested != del_op_entry) { + /* DEL+ADD in the same orig interval have no effect and + * can be removed to avoid silly behaviour on the + * receiver side. The other way around (ADD+DEL) can + * happen in case of roaming of a client still in the + * NEW state. Roaming of NEW clients is now possible due + * to automatically recognition of "temporary" clients + */ + list_del(&entry->list); + kmem_cache_free(batadv_tt_change_cache, entry); + changes--; + } else { + /* this is a second add or del in the same originator + * interval. It could mean that flags have been changed + * (e.g. double add): update them + */ entry->change.flags = flags; + } - continue; -del: - list_del(&entry->list); - kmem_cache_free(batadv_tt_change_cache, entry); - changes--; kmem_cache_free(batadv_tt_change_cache, tt_change_node); goto update_changes; } From a5686ae820fa7ab03226a3b0ff529720b7bac599 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Wed, 4 Dec 2024 18:32:10 +0200 Subject: [PATCH 0156/1386] wifi: ath12k: move ATH12K_FLAG_REGISTERED handling to ath12k_mac_register() When hardware device group abstraction is introduced, a group abstraction is registered to mac80211 rather than a particular single device. So we cannot set the device registered when the QMI firmware ready event is received, only after all the devices in group have received the event. To do that set and unset ATH12K_FLAG_REGISTERED flag inside ath12k_mac_register() and ath12k_mac_unregister() respectively. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241204163216.433795-2-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 4 ++++ drivers/net/wireless/ath/ath12k/qmi.c | 4 +--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 129607ac6c1a..1180070278da 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -10839,6 +10839,8 @@ int ath12k_mac_register(struct ath12k_base *ab) goto err; } + set_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); + return 0; err: @@ -10858,6 +10860,8 @@ void ath12k_mac_unregister(struct ath12k_base *ab) struct ath12k_hw *ah; int i; + clear_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); + for (i = ath12k_get_num_hw(ab) - 1; i >= 0; i--) { ah = ath12k_ab_to_ah(ab, i); if (!ah) diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index 77d8ee14bf33..20382b751829 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -3349,11 +3349,9 @@ static void ath12k_qmi_driver_event_work(struct work_struct *work) &ab->dev_flags); clear_bit(ATH12K_FLAG_RECOVERY, &ab->dev_flags); ret = ath12k_core_qmi_firmware_ready(ab); - if (!ret) { + if (!ret) set_bit(ATH12K_FLAG_QMI_FW_READY_COMPLETE, &ab->dev_flags); - set_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags); - } break; default: From 46d16f7e1d1413ad7ff99c1334d8874623717745 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Wed, 4 Dec 2024 18:32:11 +0200 Subject: [PATCH 0157/1386] wifi: ath12k: rename mlo_capable_flags to single_chip_mlo_supp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit At present, the mlo_capable_flags in ath12k_base is used to indicate whether the chip supports inter (QCN9274) or intra (WCN7850) chip MLO. However, it’s possible that the chip supports neither, especially with older firmware versions. Additionally, if intra chip MLO is not supported, inter chip MLO will also be non-functional. Therefore, having two separate flags for this is unnecessary. Therefore, rename this flag to single_chip_mlo_supp. At the same time convert it into a bool data type. Also, get rid of the enums defined earlier. For the QCN9274 family of chipsets, this will be set only when firmware advertises the support during the QMI exchange. For the WCN7850 family of chipsets, since the event is not supported, assumption is made that single chip MLO is supported. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Aditya Kumar Singh Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241204163216.433795-3-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 2 +- drivers/net/wireless/ath/ath12k/core.h | 23 ++--------------------- drivers/net/wireless/ath/ath12k/qmi.c | 13 +++++-------- 3 files changed, 8 insertions(+), 30 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 4da147f7bfac..568c9b6e2c1c 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -1325,7 +1325,7 @@ struct ath12k_base *ath12k_core_alloc(struct device *dev, size_t priv_size, ab->dev = dev; ab->hif.bus = bus; ab->qmi.num_radios = U8_MAX; - ab->mlo_capable_flags = ATH12K_INTRA_DEVICE_MLO_SUPPORT; + ab->single_chip_mlo_supp = false; /* Device index used to identify the devices in a group. * diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 9ddced140056..d93ba844f61d 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -816,21 +816,6 @@ struct ath12k_soc_dp_stats { struct ath12k_soc_dp_tx_err_stats tx_err; }; -/** - * enum ath12k_link_capable_flags - link capable flags - * - * Single/Multi link capability information - * - * @ATH12K_INTRA_DEVICE_MLO_SUPPORT: SLO/MLO form between the radio, where all - * the links (radios) present within a device. - * @ATH12K_INTER_DEVICE_MLO_SUPPORT: SLO/MLO form between the radio, where all - * the links (radios) present across the devices. - */ -enum ath12k_link_capable_flags { - ATH12K_INTRA_DEVICE_MLO_SUPPORT = BIT(0), - ATH12K_INTER_DEVICE_MLO_SUPPORT = BIT(1), -}; - /* Master structure to hold the hw data which may be used in core module */ struct ath12k_base { enum ath12k_hw_rev hw_rev; @@ -996,12 +981,8 @@ struct ath12k_base { const struct hal_rx_ops *hal_rx_ops; - /* mlo_capable_flags denotes the single/multi link operation - * capabilities of the Device. - * - * See enum ath12k_link_capable_flags - */ - u8 mlo_capable_flags; + /* Denotes the whether MLO is possible within the chip */ + bool single_chip_mlo_supp; struct completion restart_completed; diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index 20382b751829..efcf2dfac4ac 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -2023,14 +2023,14 @@ static void ath12k_host_cap_parse_mlo(struct ath12k_base *ab, u8 hw_link_id = 0; int i; - if (!(ab->mlo_capable_flags & ATH12K_INTRA_DEVICE_MLO_SUPPORT)) { + if (!ab->single_chip_mlo_supp) { ath12k_dbg(ab, ATH12K_DBG_QMI, "intra device MLO is disabled hence skip QMI MLO cap"); return; } if (!ab->qmi.num_radios || ab->qmi.num_radios == U8_MAX) { - ab->mlo_capable_flags = 0; + ab->single_chip_mlo_supp = false; ath12k_dbg(ab, ATH12K_DBG_QMI, "skip QMI MLO cap due to invalid num_radio %d\n", @@ -2176,12 +2176,9 @@ static void ath12k_qmi_phy_cap_send(struct ath12k_base *ab) goto out; } - if (resp.single_chip_mlo_support_valid) { - if (resp.single_chip_mlo_support) - ab->mlo_capable_flags |= ATH12K_INTRA_DEVICE_MLO_SUPPORT; - else - ab->mlo_capable_flags &= ~ATH12K_INTRA_DEVICE_MLO_SUPPORT; - } + if (resp.single_chip_mlo_support_valid && + resp.single_chip_mlo_support) + ab->single_chip_mlo_supp = true; if (!resp.num_phy_valid) { ret = -ENODATA; From 6f245ea0ec6c29b90c8fa4fdf6e178c646125d7e Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Wed, 4 Dec 2024 18:32:12 +0200 Subject: [PATCH 0158/1386] wifi: ath12k: introduce device group abstraction Currently, single device is probed, and once firmware is ready, the device is registered to mac80211. For Multi-Link Operation, different bands of different devices or same device are part of a single wiphy and for this, hardware device group abstraction is needed. Hardware device group abstraction - when there are multiple devices (with single radio or dual radio) that are connected by any means of interface for communicating between them, then these devices can be combined together as a single group using a group id to form a group abstraction and register to mac80211. The grouping information of multiple devices would be based on device tree during device probe (will be implemented in future patches). If no such information is available, then a single device will be part of group abstraction and registered to mac80211, else multiple devices advertised in device tree are combined and then registered to mac80211. For device group abstraction, a base structure ath12k_hw_group (ag) and the helpers are implemented. These helpers are used during device probe and mapping the group to the devices involved. An illustration of how multiple devices might be combined together in future based on group id: +------------------------------------------------------------------------+ | +-------------------------------------+ +-------------------+ | | | +-----------+ | | +-----------+ | | +-----------+ | | | | | ar (2GHz) | | | | ar (5GHz) | | | | ar (6GHz) | | | | | +-----------+ | | +-----------+ | | +-----------+ | | | | ath12k_base (ab) | | ath12k_base (ab) | | | | (Dual band device) | | | | | +-------------------------------------+ +-------------------+ | | ath12k_hw_group (ag) based on group id | +------------------------------------------------------------------------+ In the above representation, two devices are combined into single group based on group id. Add base code changes where single device would be part of a group with an invalid group id forming an group abstraction. Multi device grouping will be introduced in future. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Co-developed-by: Harshitha Prem Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241204163216.433795-4-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 237 +++++++++++++++++++++++-- drivers/net/wireless/ath/ath12k/core.h | 17 ++ drivers/net/wireless/ath/ath12k/pci.c | 1 + 3 files changed, 239 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 568c9b6e2c1c..41e3454b60f5 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -22,6 +22,11 @@ unsigned int ath12k_debug_mask; module_param_named(debug_mask, ath12k_debug_mask, uint, 0644); MODULE_PARM_DESC(debug_mask, "Debugging mask"); +/* protected with ath12k_hw_group_mutex */ +static struct list_head ath12k_hw_group_list = LIST_HEAD_INIT(ath12k_hw_group_list); + +static DEFINE_MUTEX(ath12k_hw_group_mutex); + static int ath12k_core_rfkill_config(struct ath12k_base *ab) { struct ath12k *ar; @@ -1244,27 +1249,112 @@ static void ath12k_core_panic_notifier_unregister(struct ath12k_base *ab) &ab->panic_nb); } -int ath12k_core_init(struct ath12k_base *ab) +static inline +bool ath12k_core_hw_group_create_ready(struct ath12k_hw_group *ag) { - int ret; + lockdep_assert_held(&ag->mutex); - ret = ath12k_core_soc_create(ab); - if (ret) { - ath12k_err(ab, "failed to create soc core: %d\n", ret); - return ret; - } - - ret = ath12k_core_panic_notifier_register(ab); - if (ret) - ath12k_warn(ab, "failed to register panic handler: %d\n", ret); - - return 0; + return (ag->num_probed == ag->num_devices); } -void ath12k_core_deinit(struct ath12k_base *ab) +static struct ath12k_hw_group *ath12k_core_hw_group_alloc(u8 id, u8 max_devices) { - ath12k_core_panic_notifier_unregister(ab); + struct ath12k_hw_group *ag; + lockdep_assert_held(&ath12k_hw_group_mutex); + + ag = kzalloc(sizeof(*ag), GFP_KERNEL); + if (!ag) + return NULL; + + ag->id = id; + ag->num_devices = max_devices; + list_add(&ag->list, &ath12k_hw_group_list); + mutex_init(&ag->mutex); + + return ag; +} + +static void ath12k_core_hw_group_free(struct ath12k_hw_group *ag) +{ + mutex_lock(&ath12k_hw_group_mutex); + + list_del(&ag->list); + kfree(ag); + + mutex_unlock(&ath12k_hw_group_mutex); +} + +static struct ath12k_hw_group *ath12k_core_hw_group_assign(struct ath12k_base *ab) +{ + u32 group_id = ATH12K_INVALID_GROUP_ID; + struct ath12k_hw_group *ag; + + lockdep_assert_held(&ath12k_hw_group_mutex); + + /* The grouping of multiple devices will be done based on device tree file. + * TODO: device tree file parsing to know about the devices involved in group. + * + * The platforms that do not have any valid group information would have each + * device to be part of its own invalid group. + * + * Currently, we are not parsing any device tree information and hence, grouping + * of multiple devices is not involved. Thus, single device is added to device + * group. + */ + ag = ath12k_core_hw_group_alloc(group_id, 1); + if (!ag) { + ath12k_warn(ab, "unable to create new hw group\n"); + return NULL; + } + + ath12k_dbg(ab, ATH12K_DBG_BOOT, "single device added to hardware group\n"); + + ab->device_id = ag->num_probed++; + ag->ab[ab->device_id] = ab; + ab->ag = ag; + + return ag; +} + +void ath12k_core_hw_group_unassign(struct ath12k_base *ab) +{ + struct ath12k_hw_group *ag = ab->ag; + u8 device_id = ab->device_id; + int num_probed; + + if (!ag) + return; + + mutex_lock(&ag->mutex); + + if (WARN_ON(device_id >= ag->num_devices)) { + mutex_unlock(&ag->mutex); + return; + } + + if (WARN_ON(ag->ab[device_id] != ab)) { + mutex_unlock(&ag->mutex); + return; + } + + ag->ab[device_id] = NULL; + ab->ag = NULL; + ab->device_id = ATH12K_INVALID_DEVICE_ID; + + if (ag->num_probed) + ag->num_probed--; + + num_probed = ag->num_probed; + + mutex_unlock(&ag->mutex); + + if (!num_probed) + ath12k_core_hw_group_free(ag); +} + +static void ath12k_core_device_cleanup(struct ath12k_base *ab) +{ mutex_lock(&ab->core_lock); ath12k_hif_irq_disable(ab); @@ -1274,8 +1364,123 @@ void ath12k_core_deinit(struct ath12k_base *ab) ath12k_core_stop(ab); mutex_unlock(&ab->core_lock); +} - ath12k_core_soc_destroy(ab); +static void ath12k_core_hw_group_destroy(struct ath12k_hw_group *ag) +{ + struct ath12k_base *ab; + int i; + + if (WARN_ON(!ag)) + return; + + for (i = 0; i < ag->num_devices; i++) { + ab = ag->ab[i]; + if (!ab) + continue; + + ath12k_core_soc_destroy(ab); + } +} + +static void ath12k_core_hw_group_cleanup(struct ath12k_hw_group *ag) +{ + struct ath12k_base *ab; + int i; + + if (!ag) + return; + + mutex_lock(&ag->mutex); + + for (i = 0; i < ag->num_devices; i++) { + ab = ag->ab[i]; + if (!ab) + continue; + + ath12k_core_device_cleanup(ab); + } + + mutex_unlock(&ag->mutex); +} + +static int ath12k_core_hw_group_create(struct ath12k_hw_group *ag) +{ + struct ath12k_base *ab; + int i, ret; + + lockdep_assert_held(&ag->mutex); + + for (i = 0; i < ag->num_devices; i++) { + ab = ag->ab[i]; + if (!ab) + continue; + + mutex_lock(&ab->core_lock); + + ret = ath12k_core_soc_create(ab); + if (ret) { + mutex_unlock(&ab->core_lock); + ath12k_err(ab, "failed to create soc core: %d\n", ret); + return ret; + } + + mutex_unlock(&ab->core_lock); + } + + return 0; +} + +int ath12k_core_init(struct ath12k_base *ab) +{ + struct ath12k_hw_group *ag; + int ret; + + ret = ath12k_core_panic_notifier_register(ab); + if (ret) + ath12k_warn(ab, "failed to register panic handler: %d\n", ret); + + mutex_lock(&ath12k_hw_group_mutex); + + ag = ath12k_core_hw_group_assign(ab); + if (!ag) { + mutex_unlock(&ath12k_hw_group_mutex); + ath12k_warn(ab, "unable to get hw group\n"); + return -ENODEV; + } + + mutex_unlock(&ath12k_hw_group_mutex); + + mutex_lock(&ag->mutex); + + ath12k_dbg(ab, ATH12K_DBG_BOOT, "num devices %d num probed %d\n", + ag->num_devices, ag->num_probed); + + if (ath12k_core_hw_group_create_ready(ag)) { + ret = ath12k_core_hw_group_create(ag); + if (ret) { + mutex_unlock(&ag->mutex); + ath12k_warn(ab, "unable to create hw group\n"); + goto err; + } + } + + mutex_unlock(&ag->mutex); + + return 0; + +err: + ath12k_core_hw_group_destroy(ab->ag); + ath12k_core_hw_group_unassign(ab); + return ret; +} + +void ath12k_core_deinit(struct ath12k_base *ab) +{ + ath12k_core_panic_notifier_unregister(ab); + ath12k_core_hw_group_cleanup(ab->ag); + ath12k_core_hw_group_destroy(ab->ag); + ath12k_core_hw_group_unassign(ab); } void ath12k_core_free(struct ath12k_base *ab) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index d93ba844f61d..dca4b9a3538f 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -816,6 +816,20 @@ struct ath12k_soc_dp_stats { struct ath12k_soc_dp_tx_err_stats tx_err; }; +/* Holds info on the group of devices that are registered as a single + * wiphy, protected with struct ath12k_hw_group::mutex. + */ +struct ath12k_hw_group { + struct list_head list; + u8 id; + u8 num_devices; + u8 num_probed; + struct ath12k_base *ab[ATH12K_MAX_SOCS]; + + /* protects access to this struct */ + struct mutex mutex; +}; + /* Master structure to hold the hw data which may be used in core module */ struct ath12k_base { enum ath12k_hw_rev hw_rev; @@ -1005,6 +1019,8 @@ struct ath12k_base { struct notifier_block panic_nb; + struct ath12k_hw_group *ag; + /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); }; @@ -1035,6 +1051,7 @@ int ath12k_core_resume_early(struct ath12k_base *ab); int ath12k_core_resume(struct ath12k_base *ab); int ath12k_core_suspend(struct ath12k_base *ab); int ath12k_core_suspend_late(struct ath12k_base *ab); +void ath12k_core_hw_group_unassign(struct ath12k_base *ab); const struct firmware *ath12k_core_firmware_request(struct ath12k_base *ab, const char *filename); diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index 8dbc7377ae7c..06cff3849ab8 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -1725,6 +1725,7 @@ static void ath12k_pci_remove(struct pci_dev *pdev) if (test_bit(ATH12K_FLAG_QMI_FAIL, &ab->dev_flags)) { ath12k_pci_power_down(ab, false); ath12k_qmi_deinit_service(ab); + ath12k_core_hw_group_unassign(ab); goto qmi_fail; } From ee146e11b4d9183e01d8b7e4963941730ed4af6d Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Wed, 4 Dec 2024 18:32:13 +0200 Subject: [PATCH 0159/1386] wifi: ath12k: refactor core start based on hardware group Currently, mac allocate/register and core_pdev_create are initiated immediately when QMI firmware ready event is received for a particular device. With hardware device group abstraction, QMI firmware ready event can be received simultaneously for different devices in the group and so, it should not be registered immediately rather it has to be deferred until all devices in the group has received QMI firmware ready. To handle this, refactor the code of core start to have registering within ath12k_core_hw_group_start() and unregistering in ath12k_core_hw_group_stop(). Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Co-developed-by: Harshitha Prem Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241204163216.433795-5-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 199 ++++++++++++++++++------- drivers/net/wireless/ath/ath12k/core.h | 22 +++ drivers/net/wireless/ath/ath12k/qmi.c | 4 +- 3 files changed, 172 insertions(+), 53 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 41e3454b60f5..dea2c53bcc07 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -604,6 +604,8 @@ u32 ath12k_core_get_max_num_tids(struct ath12k_base *ab) static void ath12k_core_stop(struct ath12k_base *ab) { + ath12k_core_stopped(ab); + if (!test_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags)) ath12k_qmi_firmware_stop(ab); @@ -743,6 +745,8 @@ static int ath12k_core_start(struct ath12k_base *ab, { int ret; + lockdep_assert_held(&ab->core_lock); + ret = ath12k_wmi_attach(ab); if (ret) { ath12k_err(ab, "failed to attach wmi: %d\n", ret); @@ -836,6 +840,10 @@ static int ath12k_core_start(struct ath12k_base *ab, /* ACPI is optional so continue in case of an error */ ath12k_dbg(ab, ATH12K_DBG_BOOT, "acpi failed: %d\n", ret); + if (!test_bit(ATH12K_FLAG_RECOVERY, &ab->dev_flags)) + /* Indicate the core start in the appropriate group */ + ath12k_core_started(ab); + return 0; err_reo_cleanup: @@ -847,6 +855,96 @@ err_wmi_detach: return ret; } +static void ath12k_core_device_cleanup(struct ath12k_base *ab) +{ + mutex_lock(&ab->core_lock); + + ath12k_hif_irq_disable(ab); + ath12k_core_pdev_destroy(ab); + ath12k_mac_unregister(ab); + ath12k_mac_destroy(ab); + + mutex_unlock(&ab->core_lock); +} + +static void ath12k_core_hw_group_stop(struct ath12k_hw_group *ag) +{ + struct ath12k_base *ab; + int i; + + lockdep_assert_held(&ag->mutex); + + for (i = ag->num_devices - 1; i >= 0; i--) { + ab = ag->ab[i]; + if (!ab) + continue; + ath12k_core_device_cleanup(ab); + } +} + +static int ath12k_core_hw_group_start(struct ath12k_hw_group *ag) +{ + struct ath12k_base *ab; + int ret, i; + + lockdep_assert_held(&ag->mutex); + + for (i = 0; i < ag->num_devices; i++) { + ab = ag->ab[i]; + if (!ab) + continue; + + mutex_lock(&ab->core_lock); + + /* Check if already registered or not, since same flow + * execute for HW restart case. + */ + if (test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags)) + goto core_pdev_create; + + ret = ath12k_mac_allocate(ab); + if (ret) { + ath12k_err(ab, "failed to create new hw device with mac80211 :%d\n", + ret); + mutex_unlock(&ab->core_lock); + return ret; + } + + ret = ath12k_mac_register(ab); + if (ret) { + ath12k_err(ab, "failed to register radio with mac80211: %d\n", + ret); + mutex_unlock(&ab->core_lock); + goto err; + } + +core_pdev_create: + ret = ath12k_core_pdev_create(ab); + if (ret) { + ath12k_err(ab, "failed to create pdev core %d\n", ret); + mutex_unlock(&ab->core_lock); + goto err; + } + + ath12k_hif_irq_enable(ab); + + ret = ath12k_core_rfkill_config(ab); + if (ret && ret != -EOPNOTSUPP) { + mutex_unlock(&ab->core_lock); + goto err; + } + + mutex_unlock(&ab->core_lock); + } + + return 0; + +err: + ath12k_core_hw_group_stop(ag); + + return ret; +} + static int ath12k_core_start_firmware(struct ath12k_base *ab, enum ath12k_firmware_mode mode) { @@ -864,9 +962,18 @@ static int ath12k_core_start_firmware(struct ath12k_base *ab, return ret; } +static inline +bool ath12k_core_hw_group_start_ready(struct ath12k_hw_group *ag) +{ + lockdep_assert_held(&ag->mutex); + + return (ag->num_started == ag->num_devices); +} + int ath12k_core_qmi_firmware_ready(struct ath12k_base *ab) { - int ret; + struct ath12k_hw_group *ag = ath12k_ab_to_ag(ab); + int ret, i; ret = ath12k_core_start_firmware(ab, ATH12K_FIRMWARE_MODE_NORMAL); if (ret) { @@ -886,59 +993,50 @@ int ath12k_core_qmi_firmware_ready(struct ath12k_base *ab) goto err_firmware_stop; } + mutex_lock(&ag->mutex); mutex_lock(&ab->core_lock); + ret = ath12k_core_start(ab, ATH12K_FIRMWARE_MODE_NORMAL); if (ret) { ath12k_err(ab, "failed to start core: %d\n", ret); goto err_dp_free; } - ret = ath12k_mac_allocate(ab); - if (ret) { - ath12k_err(ab, "failed to create new hw device with mac80211 :%d\n", - ret); - goto err_core_stop; - } - - ret = ath12k_mac_register(ab); - if (ret) { - ath12k_err(ab, "failed register the radio with mac80211: %d\n", ret); - goto err_mac_destroy; - } - - ret = ath12k_core_pdev_create(ab); - if (ret) { - ath12k_err(ab, "failed to create pdev core: %d\n", ret); - goto err_mac_unregister; - } - - ath12k_hif_irq_enable(ab); - - ret = ath12k_core_rfkill_config(ab); - if (ret && ret != -EOPNOTSUPP) { - ath12k_err(ab, "failed to config rfkill: %d\n", ret); - goto err_hif_irq_disable; - } - mutex_unlock(&ab->core_lock); + if (ath12k_core_hw_group_start_ready(ag)) { + ret = ath12k_core_hw_group_start(ag); + if (ret) { + ath12k_warn(ab, "unable to start hw group\n"); + goto err_core_stop; + } + ath12k_dbg(ab, ATH12K_DBG_BOOT, "group %d started\n", ag->id); + } + + mutex_unlock(&ag->mutex); + return 0; -err_hif_irq_disable: - ath12k_hif_irq_disable(ab); - ath12k_core_pdev_destroy(ab); -err_mac_unregister: - ath12k_mac_unregister(ab); -err_mac_destroy: - ath12k_mac_destroy(ab); err_core_stop: - ath12k_core_stop(ab); + for (i = ag->num_devices - 1; i >= 0; i--) { + ab = ag->ab[i]; + if (!ab) + continue; + + mutex_lock(&ab->core_lock); + ath12k_core_stop(ab); + mutex_unlock(&ab->core_lock); + } + goto exit; + err_dp_free: ath12k_dp_free(ab); mutex_unlock(&ab->core_lock); err_firmware_stop: ath12k_qmi_firmware_stop(ab); +exit: + mutex_unlock(&ag->mutex); return ret; } @@ -1135,6 +1233,14 @@ static void ath12k_core_restart(struct work_struct *work) } if (ab->is_reset) { + if (!test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags)) { + atomic_dec(&ab->reset_count); + complete(&ab->reset_complete); + ab->is_reset = false; + atomic_set(&ab->fail_cont_count, 0); + ath12k_dbg(ab, ATH12K_DBG_BOOT, "reset success\n"); + } + for (i = 0; i < ath12k_get_num_hw(ab); i++) { ah = ath12k_ab_to_ah(ab, i); ieee80211_restart_hw(ah->hw); @@ -1319,7 +1425,7 @@ static struct ath12k_hw_group *ath12k_core_hw_group_assign(struct ath12k_base *a void ath12k_core_hw_group_unassign(struct ath12k_base *ab) { - struct ath12k_hw_group *ag = ab->ag; + struct ath12k_hw_group *ag = ath12k_ab_to_ag(ab); u8 device_id = ab->device_id; int num_probed; @@ -1353,19 +1459,6 @@ void ath12k_core_hw_group_unassign(struct ath12k_base *ab) ath12k_core_hw_group_free(ag); } -static void ath12k_core_device_cleanup(struct ath12k_base *ab) -{ - mutex_lock(&ab->core_lock); - - ath12k_hif_irq_disable(ab); - ath12k_core_pdev_destroy(ab); - ath12k_mac_unregister(ab); - ath12k_mac_destroy(ab); - ath12k_core_stop(ab); - - mutex_unlock(&ab->core_lock); -} - static void ath12k_core_hw_group_destroy(struct ath12k_hw_group *ag) { struct ath12k_base *ab; @@ -1393,12 +1486,16 @@ static void ath12k_core_hw_group_cleanup(struct ath12k_hw_group *ag) mutex_lock(&ag->mutex); + ath12k_core_hw_group_stop(ag); + for (i = 0; i < ag->num_devices; i++) { ab = ag->ab[i]; if (!ab) continue; - ath12k_core_device_cleanup(ab); + mutex_lock(&ab->core_lock); + ath12k_core_stop(ab); + mutex_unlock(&ab->core_lock); } mutex_unlock(&ag->mutex); diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index dca4b9a3538f..b8b1ee8d3302 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -824,6 +824,8 @@ struct ath12k_hw_group { u8 id; u8 num_devices; u8 num_probed; + u8 num_started; + unsigned long flags; struct ath12k_base *ab[ATH12K_MAX_SOCS]; /* protects access to this struct */ @@ -1175,4 +1177,24 @@ static inline int ath12k_get_num_hw(struct ath12k_base *ab) { return ab->num_hw; } + +static inline struct ath12k_hw_group *ath12k_ab_to_ag(struct ath12k_base *ab) +{ + return ab->ag; +} + +static inline void ath12k_core_started(struct ath12k_base *ab) +{ + lockdep_assert_held(&ab->ag->mutex); + + ab->ag->num_started++; +} + +static inline void ath12k_core_stopped(struct ath12k_base *ab) +{ + lockdep_assert_held(&ab->ag->mutex); + + ab->ag->num_started--; +} + #endif /* _CORE_H_ */ diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index efcf2dfac4ac..8b4d500fe426 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -3321,7 +3321,6 @@ static void ath12k_qmi_driver_event_work(struct work_struct *work) break; case ATH12K_QMI_EVENT_SERVER_EXIT: set_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags); - set_bit(ATH12K_FLAG_RECOVERY, &ab->dev_flags); break; case ATH12K_QMI_EVENT_REQUEST_MEM: ret = ath12k_qmi_event_mem_request(qmi); @@ -3338,13 +3337,14 @@ static void ath12k_qmi_driver_event_work(struct work_struct *work) if (test_bit(ATH12K_FLAG_QMI_FW_READY_COMPLETE, &ab->dev_flags)) { if (ab->is_reset) ath12k_hal_dump_srng_stats(ab); + + set_bit(ATH12K_FLAG_RECOVERY, &ab->dev_flags); queue_work(ab->workqueue, &ab->restart_work); break; } clear_bit(ATH12K_FLAG_CRASH_FLUSH, &ab->dev_flags); - clear_bit(ATH12K_FLAG_RECOVERY, &ab->dev_flags); ret = ath12k_core_qmi_firmware_ready(ab); if (!ret) set_bit(ATH12K_FLAG_QMI_FW_READY_COMPLETE, From a343d97f27f514015e6d5e789672cf4ab4111720 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Wed, 4 Dec 2024 18:32:14 +0200 Subject: [PATCH 0160/1386] wifi: ath12k: move struct ath12k_hw from per device to group Currently, hardware abstractions (ah) of different radio bands are tightly coupled to a single device (ab). But, with hardware device group abstraction (ag), multiple radios across different devices in a group can form different combinations of hardware abstractions (ah) within the group. Hence, the mapping between ah to ab can be removed and instead it can be mapped with struct ath12k_hw_group (ag). Current mapping between struct ath12k_hw (ah), struct ath12k_base (ab) and struct ath12k_hw_group (ag): +------------------------------------------------+ | +-------------------------------------+ | | | +---------------+ +---------------+ | | | | |ath12k_hw (ah) | |ath12k_hw (ah) | | | | | +---------------+ +---------------+ | | | | | | | | +-----------+ | +-----------+ | | | | | ar (2GHz) | | | ar (5GHz) | | | | | +-----------+ | +-----------+ | | | | Dual band device-1 (ab) | | | +-------------------------------------+ | | ath12k_hw_group (ag) based on group id | +------------------------------------------------+ After hardware device group abstraction moving ah array out of ab to ag: +----------------------------------------------+ | +---------------+ +---------------+ | | |ath12k_hw (ah) | |ath12k_hw (ah) | | | +---------------+ +---------------+ | | +-------------------------------------+ | | | +-----------+ +-----------+ | | | | | ar (2GHz) | | ar (5GHz) | | | | | +-----------+ +-----------+ | | | | Dual band device-1 (ab) | | | +-------------------------------------+ | | ath12k_hw_group (ag) based on group id | +----------------------------------------------+ This decoupling of struct ath12k_hw (ah) from struct ath12k_base (ab) and mapping it to struct ath12k_hw_group (ag) will help in forming different combinations of multi-link devices. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241204163216.433795-6-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 49 +++++++------- drivers/net/wireless/ath/ath12k/core.h | 29 +++++---- drivers/net/wireless/ath/ath12k/dp.c | 19 ++---- drivers/net/wireless/ath/ath12k/dp.h | 2 +- drivers/net/wireless/ath/ath12k/mac.c | 89 ++++++++++++++++++-------- drivers/net/wireless/ath/ath12k/mac.h | 9 +-- 6 files changed, 115 insertions(+), 82 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index dea2c53bcc07..bbfa57d097af 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -861,8 +861,6 @@ static void ath12k_core_device_cleanup(struct ath12k_base *ab) ath12k_hif_irq_disable(ab); ath12k_core_pdev_destroy(ab); - ath12k_mac_unregister(ab); - ath12k_mac_destroy(ab); mutex_unlock(&ab->core_lock); } @@ -874,12 +872,18 @@ static void ath12k_core_hw_group_stop(struct ath12k_hw_group *ag) lockdep_assert_held(&ag->mutex); + clear_bit(ATH12K_GROUP_FLAG_REGISTERED, &ag->flags); + + ath12k_mac_unregister(ag); + for (i = ag->num_devices - 1; i >= 0; i--) { ab = ag->ab[i]; if (!ab) continue; ath12k_core_device_cleanup(ab); } + + ath12k_mac_destroy(ag); } static int ath12k_core_hw_group_start(struct ath12k_hw_group *ag) @@ -889,6 +893,20 @@ static int ath12k_core_hw_group_start(struct ath12k_hw_group *ag) lockdep_assert_held(&ag->mutex); + if (test_bit(ATH12K_GROUP_FLAG_REGISTERED, &ag->flags)) + goto core_pdev_create; + + ret = ath12k_mac_allocate(ag); + if (WARN_ON(ret)) + return ret; + + ret = ath12k_mac_register(ag); + if (WARN_ON(ret)) + goto err_mac_destroy; + + set_bit(ATH12K_GROUP_FLAG_REGISTERED, &ag->flags); + +core_pdev_create: for (i = 0; i < ag->num_devices; i++) { ab = ag->ab[i]; if (!ab) @@ -896,29 +914,6 @@ static int ath12k_core_hw_group_start(struct ath12k_hw_group *ag) mutex_lock(&ab->core_lock); - /* Check if already registered or not, since same flow - * execute for HW restart case. - */ - if (test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags)) - goto core_pdev_create; - - ret = ath12k_mac_allocate(ab); - if (ret) { - ath12k_err(ab, "failed to create new hw device with mac80211 :%d\n", - ret); - mutex_unlock(&ab->core_lock); - return ret; - } - - ret = ath12k_mac_register(ab); - if (ret) { - ath12k_err(ab, "failed to register radio with mac80211: %d\n", - ret); - mutex_unlock(&ab->core_lock); - goto err; - } - -core_pdev_create: ret = ath12k_core_pdev_create(ab); if (ret) { ath12k_err(ab, "failed to create pdev core %d\n", ret); @@ -941,6 +936,10 @@ core_pdev_create: err: ath12k_core_hw_group_stop(ag); + return ret; + +err_mac_destroy: + ath12k_mac_destroy(ag); return ret; } diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index b8b1ee8d3302..64252d6491cd 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -64,6 +64,7 @@ #define ATH12K_RECOVER_START_TIMEOUT_HZ (20 * HZ) #define ATH12K_MAX_SOCS 3 +#define ATH12K_GROUP_MAX_RADIO (ATH12K_MAX_SOCS * MAX_RADIOS) #define ATH12K_INVALID_GROUP_ID 0xFF #define ATH12K_INVALID_DEVICE_ID 0xFF @@ -216,6 +217,10 @@ enum ath12k_scan_state { ATH12K_SCAN_ABORTING, }; +enum ath12k_hw_group_flags { + ATH12K_GROUP_FLAG_REGISTERED, +}; + enum ath12k_dev_flags { ATH12K_CAC_RUNNING, ATH12K_FLAG_CRASH_FLUSH, @@ -830,6 +835,15 @@ struct ath12k_hw_group { /* protects access to this struct */ struct mutex mutex; + + /* Holds information of wiphy (hw) registration. + * + * In Multi/Single Link Operation case, all pdevs are registered as + * a single wiphy. In other (legacy/Non-MLO) cases, each pdev is + * registered as separate wiphys. + */ + struct ath12k_hw *ah[ATH12K_GROUP_MAX_RADIO]; + u8 num_hw; }; /* Master structure to hold the hw data which may be used in core module */ @@ -895,15 +909,6 @@ struct ath12k_base { struct ath12k_pdev __rcu *pdevs_active[MAX_RADIOS]; - /* Holds information of wiphy (hw) registration. - * - * In Multi/Single Link Operation case, all pdevs are registered as - * a single wiphy. In other (legacy/Non-MLO) cases, each pdev is - * registered as separate wiphys. - */ - struct ath12k_hw *ah[MAX_RADIOS]; - u8 num_hw; - struct ath12k_wmi_hal_reg_capabilities_ext_arg hal_reg_cap[MAX_RADIOS]; unsigned long long free_vdev_map; unsigned long long free_vdev_stats_id_map; @@ -1164,18 +1169,18 @@ static inline struct ieee80211_hw *ath12k_ar_to_hw(struct ath12k *ar) static inline struct ath12k_hw *ath12k_ab_to_ah(struct ath12k_base *ab, int idx) { - return ab->ah[idx]; + return ab->ag->ah[idx]; } static inline void ath12k_ab_set_ah(struct ath12k_base *ab, int idx, struct ath12k_hw *ah) { - ab->ah[idx] = ah; + ab->ag->ah[idx] = ah; } static inline int ath12k_get_num_hw(struct ath12k_base *ab) { - return ab->num_hw; + return ab->ag->num_hw; } static inline struct ath12k_hw_group *ath12k_ab_to_ag(struct ath12k_base *ab) diff --git a/drivers/net/wireless/ath/ath12k/dp.c b/drivers/net/wireless/ath/ath12k/dp.c index ce823b1c175f..68abe9d4ab45 100644 --- a/drivers/net/wireless/ath/ath12k/dp.c +++ b/drivers/net/wireless/ath/ath12k/dp.c @@ -991,21 +991,14 @@ void ath12k_dp_pdev_free(struct ath12k_base *ab) ath12k_dp_rx_pdev_free(ab, i); } -void ath12k_dp_pdev_pre_alloc(struct ath12k_base *ab) +void ath12k_dp_pdev_pre_alloc(struct ath12k *ar) { - struct ath12k *ar; - struct ath12k_pdev_dp *dp; - int i; + struct ath12k_pdev_dp *dp = &ar->dp; - for (i = 0; i < ab->num_radios; i++) { - ar = ab->pdevs[i].ar; - dp = &ar->dp; - dp->mac_id = i; - atomic_set(&dp->num_tx_pending, 0); - init_waitqueue_head(&dp->tx_empty_waitq); - - /* TODO: Add any RXDMA setup required per pdev */ - } + dp->mac_id = ar->pdev_idx; + atomic_set(&dp->num_tx_pending, 0); + init_waitqueue_head(&dp->tx_empty_waitq); + /* TODO: Add any RXDMA setup required per pdev */ } bool ath12k_dp_wmask_compaction_rx_tlv_supported(struct ath12k_base *ab) diff --git a/drivers/net/wireless/ath/ath12k/dp.h b/drivers/net/wireless/ath/ath12k/dp.h index a120b7a8477d..021cd9e8ee1d 100644 --- a/drivers/net/wireless/ath/ath12k/dp.h +++ b/drivers/net/wireless/ath/ath12k/dp.h @@ -1806,7 +1806,7 @@ void ath12k_dp_free(struct ath12k_base *ab); int ath12k_dp_alloc(struct ath12k_base *ab); void ath12k_dp_cc_config(struct ath12k_base *ab); int ath12k_dp_pdev_alloc(struct ath12k_base *ab); -void ath12k_dp_pdev_pre_alloc(struct ath12k_base *ab); +void ath12k_dp_pdev_pre_alloc(struct ath12k *ar); void ath12k_dp_pdev_free(struct ath12k_base *ab); int ath12k_dp_tx_htt_srng_setup(struct ath12k_base *ab, u32 ring_id, int mac_id, enum hal_ring_type ring_type); diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 1180070278da..1cf724a530b5 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -10818,19 +10818,13 @@ static void ath12k_mac_setup(struct ath12k *ar) skb_queue_head_init(&ar->wmi_mgmt_tx_queue); } -int ath12k_mac_register(struct ath12k_base *ab) +int ath12k_mac_register(struct ath12k_hw_group *ag) { + struct ath12k_base *ab = ag->ab[0]; struct ath12k_hw *ah; int i; int ret; - if (test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags)) - return 0; - - /* Initialize channel counters frequency value in hertz */ - ab->cc_freq_hz = 320000; - ab->free_vdev_map = (1LL << (ab->num_radios * TARGET_NUM_VDEVS)) - 1; - for (i = 0; i < ath12k_get_num_hw(ab); i++) { ah = ath12k_ab_to_ah(ab, i); @@ -10855,8 +10849,9 @@ err: return ret; } -void ath12k_mac_unregister(struct ath12k_base *ab) +void ath12k_mac_unregister(struct ath12k_hw_group *ag) { + struct ath12k_base *ab = ag->ab[0]; struct ath12k_hw *ah; int i; @@ -10876,12 +10871,13 @@ static void ath12k_mac_hw_destroy(struct ath12k_hw *ah) ieee80211_free_hw(ah->hw); } -static struct ath12k_hw *ath12k_mac_hw_allocate(struct ath12k_base *ab, +static struct ath12k_hw *ath12k_mac_hw_allocate(struct ath12k_hw_group *ag, struct ath12k_pdev_map *pdev_map, u8 num_pdev_map) { struct ieee80211_hw *hw; struct ath12k *ar; + struct ath12k_base *ab; struct ath12k_pdev *pdev; struct ath12k_hw *ah; int i; @@ -10913,23 +10909,30 @@ static struct ath12k_hw *ath12k_mac_hw_allocate(struct ath12k_base *ab, pdev->ar = ar; ath12k_mac_setup(ar); + ath12k_dp_pdev_pre_alloc(ar); } return ah; } -void ath12k_mac_destroy(struct ath12k_base *ab) +void ath12k_mac_destroy(struct ath12k_hw_group *ag) { struct ath12k_pdev *pdev; + struct ath12k_base *ab = ag->ab[0]; + int i, j; struct ath12k_hw *ah; - int i; - for (i = 0; i < ab->num_radios; i++) { - pdev = &ab->pdevs[i]; - if (!pdev->ar) + for (i = 0; i < ag->num_devices; i++) { + ab = ag->ab[i]; + if (!ab) continue; - pdev->ar = NULL; + for (j = 0; j < ab->num_radios; j++) { + pdev = &ab->pdevs[j]; + if (!pdev->ar) + continue; + pdev->ar = NULL; + } } for (i = 0; i < ath12k_get_num_hw(ab); i++) { @@ -10942,26 +10945,59 @@ void ath12k_mac_destroy(struct ath12k_base *ab) } } -int ath12k_mac_allocate(struct ath12k_base *ab) +static void ath12k_mac_set_device_defaults(struct ath12k_base *ab) { + /* Initialize channel counters frequency value in hertz */ + ab->cc_freq_hz = 320000; + ab->free_vdev_map = (1LL << (ab->num_radios * TARGET_NUM_VDEVS)) - 1; +} + +int ath12k_mac_allocate(struct ath12k_hw_group *ag) +{ + struct ath12k_pdev_map pdev_map[ATH12K_GROUP_MAX_RADIO]; + int mac_id, device_id, total_radio, num_hw; + struct ath12k_base *ab; struct ath12k_hw *ah; - struct ath12k_pdev_map pdev_map[MAX_RADIOS]; int ret, i, j; u8 radio_per_hw; - if (test_bit(ATH12K_FLAG_REGISTERED, &ab->dev_flags)) - return 0; + total_radio = 0; + for (i = 0; i < ag->num_devices; i++) + total_radio += ag->ab[i]->num_radios; - ab->num_hw = ab->num_radios; + /* All pdev get combined and register as single wiphy based on + * hardware group which participate in multi-link operation else + * each pdev get register separately. + * + * Currently, registering as single pdevs. + */ radio_per_hw = 1; + num_hw = total_radio / radio_per_hw; - for (i = 0; i < ath12k_get_num_hw(ab); i++) { + if (WARN_ON(num_hw >= ATH12K_GROUP_MAX_RADIO)) + return -ENOSPC; + + ag->num_hw = 0; + device_id = 0; + mac_id = 0; + for (i = 0; i < num_hw; i++) { for (j = 0; j < radio_per_hw; j++) { + ab = ag->ab[device_id]; pdev_map[j].ab = ab; - pdev_map[j].pdev_idx = (i * radio_per_hw) + j; + pdev_map[j].pdev_idx = mac_id; + mac_id++; + + /* If mac_id falls beyond the current device MACs then + * move to next device + */ + if (mac_id >= ab->num_radios) { + mac_id = 0; + device_id++; + ath12k_mac_set_device_defaults(ab); + } } - ah = ath12k_mac_hw_allocate(ab, pdev_map, radio_per_hw); + ah = ath12k_mac_hw_allocate(ag, pdev_map, radio_per_hw); if (!ah) { ath12k_warn(ab, "failed to allocate mac80211 hw device for hw_idx %d\n", i); @@ -10971,11 +11007,10 @@ int ath12k_mac_allocate(struct ath12k_base *ab) ah->dev = ab->dev; - ath12k_ab_set_ah(ab, i, ah); + ag->ah[i] = ah; + ag->num_hw++; } - ath12k_dp_pdev_pre_alloc(ab); - return 0; err: diff --git a/drivers/net/wireless/ath/ath12k/mac.h b/drivers/net/wireless/ath/ath12k/mac.h index abdc9a6c0740..ccfc215d83ff 100644 --- a/drivers/net/wireless/ath/ath12k/mac.h +++ b/drivers/net/wireless/ath/ath12k/mac.h @@ -14,6 +14,7 @@ struct ath12k; struct ath12k_base; struct ath12k_hw; +struct ath12k_hw_group; struct ath12k_pdev_map; struct ath12k_generic_iter { @@ -60,10 +61,10 @@ enum ath12k_supported_bw { extern const struct htt_rx_ring_tlv_filter ath12k_mac_mon_status_filter_default; -void ath12k_mac_destroy(struct ath12k_base *ab); -void ath12k_mac_unregister(struct ath12k_base *ab); -int ath12k_mac_register(struct ath12k_base *ab); -int ath12k_mac_allocate(struct ath12k_base *ab); +void ath12k_mac_destroy(struct ath12k_hw_group *ag); +void ath12k_mac_unregister(struct ath12k_hw_group *ag); +int ath12k_mac_register(struct ath12k_hw_group *ag); +int ath12k_mac_allocate(struct ath12k_hw_group *ag); int ath12k_mac_hw_ratecode_to_legacy_rate(u8 hw_rc, u8 preamble, u8 *rateidx, u16 *rate); u8 ath12k_mac_bitrate_to_idx(const struct ieee80211_supported_band *sband, From d302ac65ac938516487f57ae20f11e9cf6327606 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Wed, 4 Dec 2024 18:32:15 +0200 Subject: [PATCH 0161/1386] wifi: ath12k: send QMI host capability after device group is ready QMI host capability has the information regarding MLO parameters such as device_id, MLO capability, group id and the information of each devices involved in the group and sent immediately on QMI server arrive event. Currently, only one device is involved in the group and hence, single device information is sent as part of MLO capability of host. But, in future when multi device group abstraction is introduced, host should send all the device information involved in the group as part of QMI MLO host capability rather than single device. Hence, sending QMI host capability immediately on server arrive of a device might not be ideal for multi device group abstraction as the details of other devices in the group would not be available. Hence, once QMI server arrive event is received, request for QMI PHY capabilities of device, and defer the host capability send for that device. After QMI PHY capability is received for all the devices in the group trigger the host capability event for the deferred devices in the group. Hence, add changes to defer the QMI host capability event until the device group is ready and then resume the QMI exchange for all the device with host capabilities. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241204163216.433795-7-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 21 ++++++ drivers/net/wireless/ath/ath12k/qmi.c | 98 ++++++++++++++++++++++++-- drivers/net/wireless/ath/ath12k/qmi.h | 20 ++++++ 3 files changed, 134 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index bbfa57d097af..18b29515c6ae 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -969,6 +969,25 @@ bool ath12k_core_hw_group_start_ready(struct ath12k_hw_group *ag) return (ag->num_started == ag->num_devices); } +static void ath12k_core_trigger_partner(struct ath12k_base *ab) +{ + struct ath12k_hw_group *ag = ab->ag; + struct ath12k_base *partner_ab; + bool found = false; + int i; + + for (i = 0; i < ag->num_devices; i++) { + partner_ab = ag->ab[i]; + if (!partner_ab) + continue; + + if (found) + ath12k_qmi_trigger_host_cap(partner_ab); + + found = (partner_ab == ab); + } +} + int ath12k_core_qmi_firmware_ready(struct ath12k_base *ab) { struct ath12k_hw_group *ag = ath12k_ab_to_ag(ab); @@ -1010,6 +1029,8 @@ int ath12k_core_qmi_firmware_ready(struct ath12k_base *ab) goto err_core_stop; } ath12k_dbg(ab, ATH12K_DBG_BOOT, "group %d started\n", ag->id); + } else { + ath12k_core_trigger_partner(ab); } mutex_unlock(&ag->mutex); diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index 8b4d500fe426..2591d132a3fc 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -3088,11 +3088,69 @@ ath12k_qmi_driver_event_post(struct ath12k_qmi *qmi, return 0; } +void ath12k_qmi_trigger_host_cap(struct ath12k_base *ab) +{ + struct ath12k_qmi *qmi = &ab->qmi; + + spin_lock(&qmi->event_lock); + + if (ath12k_qmi_get_event_block(qmi)) + ath12k_qmi_set_event_block(qmi, false); + + spin_unlock(&qmi->event_lock); + + ath12k_dbg(ab, ATH12K_DBG_QMI, "trigger host cap for device id %d\n", + ab->device_id); + + ath12k_qmi_driver_event_post(qmi, ATH12K_QMI_EVENT_HOST_CAP, NULL); +} + +static bool ath12k_qmi_hw_group_host_cap_ready(struct ath12k_hw_group *ag) +{ + struct ath12k_base *ab; + int i; + + for (i = 0; i < ag->num_devices; i++) { + ab = ag->ab[i]; + + if (!(ab && ab->qmi.num_radios != U8_MAX)) + return false; + } + + return true; +} + +static struct ath12k_base *ath12k_qmi_hw_group_find_blocked(struct ath12k_hw_group *ag) +{ + struct ath12k_base *ab; + int i; + + lockdep_assert_held(&ag->mutex); + + for (i = 0; i < ag->num_devices; i++) { + ab = ag->ab[i]; + if (!ab) + continue; + + spin_lock(&ab->qmi.event_lock); + + if (ath12k_qmi_get_event_block(&ab->qmi)) { + spin_unlock(&ab->qmi.event_lock); + return ab; + } + + spin_unlock(&ab->qmi.event_lock); + } + + return NULL; +} + /* clang stack usage explodes if this is inlined */ static noinline_for_stack int ath12k_qmi_event_server_arrive(struct ath12k_qmi *qmi) { - struct ath12k_base *ab = qmi->ab; + struct ath12k_base *ab = qmi->ab, *block_ab; + struct ath12k_hw_group *ag = ab->ag; int ret; ath12k_qmi_phy_cap_send(ab); @@ -3103,12 +3161,22 @@ int ath12k_qmi_event_server_arrive(struct ath12k_qmi *qmi) return ret; } - ret = ath12k_qmi_host_cap_send(ab); - if (ret < 0) { - ath12k_warn(ab, "qmi failed to send host cap QMI:%d\n", ret); - return ret; + spin_lock(&qmi->event_lock); + + ath12k_qmi_set_event_block(qmi, true); + + spin_unlock(&qmi->event_lock); + + mutex_lock(&ag->mutex); + + if (ath12k_qmi_hw_group_host_cap_ready(ag)) { + block_ab = ath12k_qmi_hw_group_find_blocked(ag); + if (block_ab) + ath12k_qmi_trigger_host_cap(block_ab); } + mutex_unlock(&ag->mutex); + return ret; } @@ -3295,6 +3363,21 @@ static const struct qmi_ops ath12k_qmi_ops = { .del_server = ath12k_qmi_ops_del_server, }; +static int ath12k_qmi_event_host_cap(struct ath12k_qmi *qmi) +{ + struct ath12k_base *ab = qmi->ab; + int ret; + + ret = ath12k_qmi_host_cap_send(ab); + if (ret < 0) { + ath12k_warn(ab, "failed to send qmi host cap for device id %d: %d\n", + ab->device_id, ret); + return ret; + } + + return ret; +} + static void ath12k_qmi_driver_event_work(struct work_struct *work) { struct ath12k_qmi *qmi = container_of(work, struct ath12k_qmi, @@ -3351,6 +3434,11 @@ static void ath12k_qmi_driver_event_work(struct work_struct *work) &ab->dev_flags); break; + case ATH12K_QMI_EVENT_HOST_CAP: + ret = ath12k_qmi_event_host_cap(qmi); + if (ret < 0) + set_bit(ATH12K_FLAG_QMI_FAIL, &ab->dev_flags); + break; default: ath12k_warn(ab, "invalid event type: %d", event->type); break; diff --git a/drivers/net/wireless/ath/ath12k/qmi.h b/drivers/net/wireless/ath/ath12k/qmi.h index 0dfcbd8cb59b..98f6009ab21e 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.h +++ b/drivers/net/wireless/ath/ath12k/qmi.h @@ -68,6 +68,7 @@ enum ath12k_qmi_event_type { ATH12K_QMI_EVENT_FORCE_FW_ASSERT, ATH12K_QMI_EVENT_POWER_UP, ATH12K_QMI_EVENT_POWER_DOWN, + ATH12K_QMI_EVENT_HOST_CAP, ATH12K_QMI_EVENT_MAX, }; @@ -142,6 +143,10 @@ struct ath12k_qmi { u32 target_mem_mode; bool target_mem_delayed; u8 cal_done; + + /* protected with struct ath12k_qmi::event_lock */ + bool block_event; + u8 num_radios; struct target_info target; struct m3_mem_region m3_mem; @@ -594,11 +599,26 @@ struct qmi_wlanfw_wlan_ini_resp_msg_v01 { struct qmi_response_type_v01 resp; }; +static inline void ath12k_qmi_set_event_block(struct ath12k_qmi *qmi, bool block) +{ + lockdep_assert_held(&qmi->event_lock); + + qmi->block_event = block; +} + +static inline bool ath12k_qmi_get_event_block(struct ath12k_qmi *qmi) +{ + lockdep_assert_held(&qmi->event_lock); + + return qmi->block_event; +} + int ath12k_qmi_firmware_start(struct ath12k_base *ab, u32 mode); void ath12k_qmi_firmware_stop(struct ath12k_base *ab); void ath12k_qmi_deinit_service(struct ath12k_base *ab); int ath12k_qmi_init_service(struct ath12k_base *ab); void ath12k_qmi_free_resource(struct ath12k_base *ab); +void ath12k_qmi_trigger_host_cap(struct ath12k_base *ab); #endif From da8656797ae10b524a7a0c3d5eeb6237fa3ddd70 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Wed, 4 Dec 2024 18:32:16 +0200 Subject: [PATCH 0162/1386] wifi: ath12k: introduce mlo_capable flag for device group Currently, during mac80211 allocate and register single device is considered for the registration. But, in future, during multi device group abstraction is introduced, all the devices has to be combined together as a single abstraction and then hardware should be allocated. All the devices in the group should be combined together only if it supports inter device mlo capability. The decision of whether to combine the devices or not can be based on the mlo capability flag in ath12k_hw_group. By default, mlo_capable flag in the group would be set as false. During QMI PHY capability exchange, only when we have more than one chip in the group or if one chip, then that chip supports inter MLO, then mlo_capable flag in the group will be enabled. Add changes to introduce mlo_capable flag for device group and refactor ath12k_mac_hw_allocate() api based on device group (ag) rather than device (ab). Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241204163216.433795-8-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 17 +++++++++++++++++ drivers/net/wireless/ath/ath12k/core.h | 3 +++ drivers/net/wireless/ath/ath12k/mac.c | 8 +++++--- drivers/net/wireless/ath/ath12k/qmi.c | 6 ++++-- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 18b29515c6ae..49d1ac15cb7a 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -1439,6 +1439,7 @@ static struct ath12k_hw_group *ath12k_core_hw_group_assign(struct ath12k_base *a ab->device_id = ag->num_probed++; ag->ab[ab->device_id] = ab; ab->ag = ag; + ag->mlo_capable = false; return ag; } @@ -1548,6 +1549,22 @@ static int ath12k_core_hw_group_create(struct ath12k_hw_group *ag) return 0; } +void ath12k_core_hw_group_set_mlo_capable(struct ath12k_hw_group *ag) +{ + lockdep_assert_held(&ag->mutex); + + /* If more than one devices are grouped, then inter MLO + * functionality can work still independent of whether internally + * each device supports single_chip_mlo or not. + * Only when there is one device, then it depends whether the + * device can support intra chip MLO or not + */ + if (ag->num_devices > 1) + ag->mlo_capable = true; + else + ag->mlo_capable = ag->ab[0]->single_chip_mlo_supp; +} + int ath12k_core_init(struct ath12k_base *ab) { struct ath12k_hw_group *ag; diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 64252d6491cd..458e3d0071a8 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -844,6 +844,7 @@ struct ath12k_hw_group { */ struct ath12k_hw *ah[ATH12K_GROUP_MAX_RADIO]; u8 num_hw; + bool mlo_capable; }; /* Master structure to hold the hw data which may be used in core module */ @@ -1066,6 +1067,8 @@ u32 ath12k_core_get_max_station_per_radio(struct ath12k_base *ab); u32 ath12k_core_get_max_peers_per_radio(struct ath12k_base *ab); u32 ath12k_core_get_max_num_tids(struct ath12k_base *ab); +void ath12k_core_hw_group_set_mlo_capable(struct ath12k_hw_group *ag); + static inline const char *ath12k_scan_state_str(enum ath12k_scan_state state) { switch (state) { diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 1cf724a530b5..c4eab4c1c10e 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -10968,10 +10968,12 @@ int ath12k_mac_allocate(struct ath12k_hw_group *ag) /* All pdev get combined and register as single wiphy based on * hardware group which participate in multi-link operation else * each pdev get register separately. - * - * Currently, registering as single pdevs. */ - radio_per_hw = 1; + if (ag->mlo_capable) + radio_per_hw = total_radio; + else + radio_per_hw = 1; + num_hw = total_radio / radio_per_hw; if (WARN_ON(num_hw >= ATH12K_GROUP_MAX_RADIO)) diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index 2591d132a3fc..ba3cd2342465 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -2023,9 +2023,9 @@ static void ath12k_host_cap_parse_mlo(struct ath12k_base *ab, u8 hw_link_id = 0; int i; - if (!ab->single_chip_mlo_supp) { + if (!ab->ag->mlo_capable) { ath12k_dbg(ab, ATH12K_DBG_QMI, - "intra device MLO is disabled hence skip QMI MLO cap"); + "MLO is disabled hence skip QMI MLO cap"); return; } @@ -3170,6 +3170,8 @@ int ath12k_qmi_event_server_arrive(struct ath12k_qmi *qmi) mutex_lock(&ag->mutex); if (ath12k_qmi_hw_group_host_cap_ready(ag)) { + ath12k_core_hw_group_set_mlo_capable(ag); + block_ab = ath12k_qmi_hw_group_find_blocked(ag); if (block_ab) ath12k_qmi_trigger_host_cap(block_ab); From 0bee36d1a51366fa57b731f8975f26f92943b43e Mon Sep 17 00:00:00 2001 From: Song Yoong Siang Date: Thu, 5 Dec 2024 12:42:58 +0800 Subject: [PATCH 0163/1386] selftests/bpf: Actuate tx_metadata_len in xdp_hw_metadata set XDP_UMEM_TX_METADATA_LEN flag to reserve tx_metadata_len bytes of per-chunk metadata. Fixes: d5e726d9143c ("xsk: Require XDP_UMEM_TX_METADATA_LEN to actuate tx_metadata_len") Signed-off-by: Song Yoong Siang Signed-off-by: Martin KaFai Lau Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20241205044258.3155799-1-yoong.siang.song@intel.com --- tools/testing/selftests/bpf/xdp_hw_metadata.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 6f9956eed797..ad6c08dfd6c8 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -79,7 +79,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE, - .flags = XSK_UMEM__DEFAULT_FLAGS, + .flags = XDP_UMEM_TX_METADATA_LEN, .tx_metadata_len = sizeof(struct xsk_tx_metadata), }; __u32 idx = 0; From 2309132fc5d9d87deb15bda3497326aded6bfe4a Mon Sep 17 00:00:00 2001 From: Song Yoong Siang Date: Thu, 5 Dec 2024 13:19:36 +0800 Subject: [PATCH 0164/1386] selftests/bpf: Enable Tx hwtstamp in xdp_hw_metadata Currently, user needs to manually enable transmit hardware timestamp feature of certain Ethernet drivers, e.g. stmmac and igc drivers, through following command after running the xdp_hw_metadata app. sudo hwstamp_ctl -i eth0 -t 1 To simplify the step test of xdp_hw_metadata, set tx_type to HWTSTAMP_TX_ON to enable hardware timestamping for all outgoing packets, so that user no longer need to execute hwstamp_ctl command. Signed-off-by: Song Yoong Siang Signed-off-by: Martin KaFai Lau Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20241205051936.3156307-1-yoong.siang.song@intel.com --- tools/testing/selftests/bpf/xdp_hw_metadata.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index ad6c08dfd6c8..e38675d9b118 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -551,6 +551,7 @@ static void hwtstamp_enable(const char *ifname) { struct hwtstamp_config cfg = { .rx_filter = HWTSTAMP_FILTER_ALL, + .tx_type = HWTSTAMP_TX_ON, }; hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg); From 5765c7f6e3173eb894889a29963a497aeb721c5e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Dec 2024 17:19:50 +0000 Subject: [PATCH 0165/1386] net_sched: sch_fq: add three drop_reason MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add three new drop_reason, more precise than generic QDISC_DROP: "tc -s qd" show aggregate counters, it might be more useful to use drop_reason infrastructure for bug hunting. 1) SKB_DROP_REASON_FQ_BAND_LIMIT Whenever a packet is added while its band limit is hit. Corresponding value in "tc -s qd" is bandX_drops XXXX 2) SKB_DROP_REASON_FQ_HORIZON_LIMIT Whenever a packet has a timestamp too far in the future. Corresponding value in "tc -s qd" is horizon_drops XXXX 3) SKB_DROP_REASON_FQ_FLOW_LIMIT Whenever a flow has reached its limit. Corresponding value in "tc -s qd" is flows_plimit XXXX Tested: tc qd replace dev eth1 root fq flow_limit 10 limit 100000 perf record -a -e skb:kfree_skb sleep 1; perf script udp_stream 12329 [004] 216.929492: skb:kfree_skb: skbaddr=0xffff888eabe17e00 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_FLOW_LIMIT udp_stream 12385 [006] 216.929593: skb:kfree_skb: skbaddr=0xffff888ef8827f00 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_FLOW_LIMIT udp_stream 12389 [005] 216.929871: skb:kfree_skb: skbaddr=0xffff888ecb9ba500 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_FLOW_LIMIT udp_stream 12316 [009] 216.930398: skb:kfree_skb: skbaddr=0xffff888eca286b00 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_FLOW_LIMIT udp_stream 12400 [008] 216.930490: skb:kfree_skb: skbaddr=0xffff888eabf93d00 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_FLOW_LIMIT tc qd replace dev eth1 root fq flow_limit 100 limit 10000 perf record -a -e skb:kfree_skb sleep 1; perf script udp_stream 18074 [001] 1058.318040: skb:kfree_skb: skbaddr=0xffffa23c881fc000 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_BAND_LIMIT udp_stream 18126 [005] 1058.320651: skb:kfree_skb: skbaddr=0xffffa23c6aad4000 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_BAND_LIMIT udp_stream 18118 [006] 1058.321065: skb:kfree_skb: skbaddr=0xffffa23df0d48a00 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_BAND_LIMIT udp_stream 18074 [001] 1058.321126: skb:kfree_skb: skbaddr=0xffffa23c881ffa00 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_BAND_LIMIT udp_stream 15815 [003] 1058.321224: skb:kfree_skb: skbaddr=0xffffa23c9835db00 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_BAND_LIMIT tc -s -d qd sh dev eth1 qdisc fq 8023: root refcnt 257 limit 10000p flow_limit 100p buckets 1024 orphan_mask 1023 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 weights 589824 196608 65536 quantum 18Kb initial_quantum 92120b low_rate_threshold 550Kbit refill_delay 40ms timer_slack 10us horizon 10s horizon_drop Sent 492439603330 bytes 336953991 pkt (dropped 61724094, overlimits 0 requeues 4463) backlog 14611228b 9995p requeues 4463 flows 2965 (inactive 1151 throttled 0) band0_pkts 0 band1_pkts 9993 band2_pkts 0 gc 6347 highprio 0 fastpath 30 throttled 5 latency 2.32us flows_plimit 7403693 band1_drops 54320401 Signed-off-by: Eric Dumazet Reviewed-by: Victor Nogueira Reviewed-by: Toke Høiland-Jørgensen Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20241204171950.89829-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 18 ++++++++++++++++++ include/net/sch_generic.h | 8 ++++++++ net/sched/sch_fq.c | 14 ++++++++++---- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 6c5a1ea209a2..c29282fabae6 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -58,6 +58,9 @@ FN(TC_EGRESS) \ FN(SECURITY_HOOK) \ FN(QDISC_DROP) \ + FN(FQ_BAND_LIMIT) \ + FN(FQ_HORIZON_LIMIT) \ + FN(FQ_FLOW_LIMIT) \ FN(CPU_BACKLOG) \ FN(XDP) \ FN(TC_INGRESS) \ @@ -311,6 +314,21 @@ enum skb_drop_reason { * failed to enqueue to current qdisc) */ SKB_DROP_REASON_QDISC_DROP, + /** + * @SKB_DROP_REASON_FQ_BAND_LIMIT: dropped by fq qdisc when per band + * limit is reached. + */ + SKB_DROP_REASON_FQ_BAND_LIMIT, + /** + * @SKB_DROP_REASON_FQ_HORIZON_LIMIT: dropped by fq qdisc when packet + * timestamp is too far in the future. + */ + SKB_DROP_REASON_FQ_HORIZON_LIMIT, + /** + * @SKB_DROP_REASON_FQ_FLOW_LIMIT: dropped by fq qdisc when a flow + * exceeds its limits. + */ + SKB_DROP_REASON_FQ_FLOW_LIMIT, /** * @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU * backlog queue. This can be caused by backlog queue full (see diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 5d74fa7e694c..8074322dd636 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1245,6 +1245,14 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_DROP; } +static inline int qdisc_drop_reason(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free, + enum skb_drop_reason reason) +{ + tcf_set_drop_reason(skb, reason); + return qdisc_drop(skb, sch, to_free); +} + static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a5e87f9ea986..2ca5332cfcc5 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -537,6 +537,8 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb, return unlikely((s64)skb->tstamp > (s64)(now + q->horizon)); } +#define FQDR(reason) SKB_DROP_REASON_FQ_##reason + static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -548,7 +550,8 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, band = fq_prio2band(q->prio2band, skb->priority & TC_PRIO_MAX); if (unlikely(q->band_pkt_count[band] >= sch->limit)) { q->stat_band_drops[band]++; - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, + FQDR(BAND_LIMIT)); } now = ktime_get_ns(); @@ -558,8 +561,9 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Check if packet timestamp is too far in the future. */ if (fq_packet_beyond_horizon(skb, q, now)) { if (q->horizon_drop) { - q->stat_horizon_drops++; - return qdisc_drop(skb, sch, to_free); + q->stat_horizon_drops++; + return qdisc_drop_reason(skb, sch, to_free, + FQDR(HORIZON_LIMIT)); } q->stat_horizon_caps++; skb->tstamp = now + q->horizon; @@ -572,7 +576,8 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (f != &q->internal) { if (unlikely(f->qlen >= q->flow_plimit)) { q->stat_flows_plimit++; - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, + FQDR(FLOW_LIMIT)); } if (fq_flow_is_detached(f)) { @@ -597,6 +602,7 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_SUCCESS; } +#undef FQDR static void fq_check_throttled(struct fq_sched_data *q, u64 now) { From 10685681bafce6febb39770f3387621bf5d67d0b Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Tue, 3 Dec 2024 19:05:19 -0800 Subject: [PATCH 0166/1386] net_sched: sch_sfq: don't allow 1 packet limit The current implementation does not work correctly with a limit of 1. iproute2 actually checks for this and this patch adds the check in kernel as well. This fixes the following syzkaller reported crash: UBSAN: array-index-out-of-bounds in net/sched/sch_sfq.c:210:6 index 65535 is out of range for type 'struct sfq_head[128]' CPU: 0 PID: 2569 Comm: syz-executor101 Not tainted 5.10.0-smp-DEV #1 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Call Trace: __dump_stack lib/dump_stack.c:79 [inline] dump_stack+0x125/0x19f lib/dump_stack.c:120 ubsan_epilogue lib/ubsan.c:148 [inline] __ubsan_handle_out_of_bounds+0xed/0x120 lib/ubsan.c:347 sfq_link net/sched/sch_sfq.c:210 [inline] sfq_dec+0x528/0x600 net/sched/sch_sfq.c:238 sfq_dequeue+0x39b/0x9d0 net/sched/sch_sfq.c:500 sfq_reset+0x13/0x50 net/sched/sch_sfq.c:525 qdisc_reset+0xfe/0x510 net/sched/sch_generic.c:1026 tbf_reset+0x3d/0x100 net/sched/sch_tbf.c:319 qdisc_reset+0xfe/0x510 net/sched/sch_generic.c:1026 dev_reset_queue+0x8c/0x140 net/sched/sch_generic.c:1296 netdev_for_each_tx_queue include/linux/netdevice.h:2350 [inline] dev_deactivate_many+0x6dc/0xc20 net/sched/sch_generic.c:1362 __dev_close_many+0x214/0x350 net/core/dev.c:1468 dev_close_many+0x207/0x510 net/core/dev.c:1506 unregister_netdevice_many+0x40f/0x16b0 net/core/dev.c:10738 unregister_netdevice_queue+0x2be/0x310 net/core/dev.c:10695 unregister_netdevice include/linux/netdevice.h:2893 [inline] __tun_detach+0x6b6/0x1600 drivers/net/tun.c:689 tun_detach drivers/net/tun.c:705 [inline] tun_chr_close+0x104/0x1b0 drivers/net/tun.c:3640 __fput+0x203/0x840 fs/file_table.c:280 task_work_run+0x129/0x1b0 kernel/task_work.c:185 exit_task_work include/linux/task_work.h:33 [inline] do_exit+0x5ce/0x2200 kernel/exit.c:931 do_group_exit+0x144/0x310 kernel/exit.c:1046 __do_sys_exit_group kernel/exit.c:1057 [inline] __se_sys_exit_group kernel/exit.c:1055 [inline] __x64_sys_exit_group+0x3b/0x40 kernel/exit.c:1055 do_syscall_64+0x6c/0xd0 entry_SYSCALL_64_after_hwframe+0x61/0xcb RIP: 0033:0x7fe5e7b52479 Code: Unable to access opcode bytes at RIP 0x7fe5e7b5244f. RSP: 002b:00007ffd3c800398 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fe5e7b52479 RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000000 RBP: 00007fe5e7bcd2d0 R08: ffffffffffffffb8 R09: 0000000000000014 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe5e7bcd2d0 R13: 0000000000000000 R14: 00007fe5e7bcdd20 R15: 00007fe5e7b24270 The crash can be also be reproduced with the following (with a tc recompiled to allow for sfq limits of 1): tc qdisc add dev dummy0 handle 1: root tbf rate 1Kbit burst 100b lat 1s ../iproute2-6.9.0/tc/tc qdisc add dev dummy0 handle 2: parent 1:10 sfq limit 1 ifconfig dummy0 up ping -I dummy0 -f -c2 -W0.1 8.8.8.8 sleep 1 Scenario that triggers the crash: * the first packet is sent and queued in TBF and SFQ; qdisc qlen is 1 * TBF dequeues: it peeks from SFQ which moves the packet to the gso_skb list and keeps qdisc qlen set to 1. TBF is out of tokens so it schedules itself for later. * the second packet is sent and TBF tries to queues it to SFQ. qdisc qlen is now 2 and because the SFQ limit is 1 the packet is dropped by SFQ. At this point qlen is 1, and all of the SFQ slots are empty, however q->tail is not NULL. At this point, assuming no more packets are queued, when sch_dequeue runs again it will decrement the qlen for the current empty slot causing an underflow and the subsequent out of bounds access. Reported-by: syzbot Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Octavian Purdila Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241204030520.2084663-2-tavip@google.com Signed-off-by: Jakub Kicinski --- net/sched/sch_sfq.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index a4b8296a2fa1..65d5b59da583 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -652,6 +652,10 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt, if (!p) return -ENOMEM; } + if (ctl->limit == 1) { + NL_SET_ERR_MSG_MOD(extack, "invalid limit"); + return -EINVAL; + } sch_tree_lock(sch); if (ctl->quantum) q->quantum = ctl->quantum; From 1e7e1f0e8be147ae98fe88ec82150c97265965a6 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Tue, 3 Dec 2024 19:05:20 -0800 Subject: [PATCH 0167/1386] selftests/tc-testing: sfq: test that kernel rejects limit of 1 Add test to check that the kernel rejects a configuration with the limit set to 1. Signed-off-by: Octavian Purdila Link: https://patch.msgid.link/20241204030520.2084663-3-tavip@google.com Signed-off-by: Jakub Kicinski --- .../tc-testing/scripts/sfq_rejects_limit_1.py | 21 +++++++++++++++++++ .../tc-testing/tc-tests/qdiscs/sfq.json | 20 ++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100755 tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py diff --git a/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py b/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py new file mode 100755 index 000000000000..0f44a6199495 --- /dev/null +++ b/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Script that checks that SFQ rejects a limit of 1 at the kernel +# level. We can't use iproute2's tc because it does not accept a limit +# of 1. + +import sys +import os + +from pyroute2 import IPRoute +from pyroute2.netlink.exceptions import NetlinkError + +ip = IPRoute() +ifidx = ip.link_lookup(ifname=sys.argv[1]) + +try: + ip.tc('add', 'sfq', ifidx, limit=1) + sys.exit(1) +except NetlinkError: + sys.exit(0) diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json index 16d51936b385..50e8d72781cb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json @@ -208,5 +208,25 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "4d6f", + "name": "Check that limit of 1 is rejected", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "./scripts/sfq_rejects_limit_1.py $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [ + ] } ] From ca5c94949facce1f67a4a9a9528a27f635ff3e78 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 3 Dec 2024 18:37:24 +0100 Subject: [PATCH 0168/1386] xsk: align &xdp_buff_xsk harder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the series "XSk buff on a diet" by Maciej, the greatest pow-2 which &xdp_buff_xsk can be divided got reduced from 16 to 8 on x86_64. Also, sizeof(xdp_buff_xsk) now is 120 bytes, which, taking the previous sentence into account, leads to that it leaves 8 bytes at the end of cacheline, which means an array of buffs will have its elements messed between the cachelines chaotically. Use __aligned_largest for this struct. This alignment is usually 16 bytes, which makes it fill two full cachelines and align an array nicely. ___cacheline_aligned may be excessive here, especially on arches with 128-256 byte CLs, as well as 32-bit arches (76 -> 96 bytes on MIPS32R2), while not doing better than _largest. Signed-off-by: Alexander Lobakin Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241203173733.3181246-2-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/xsk_buff_pool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index bb03cee716b3..7637799b6c19 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -29,7 +29,7 @@ struct xdp_buff_xsk { dma_addr_t frame_dma; struct xsk_buff_pool *pool; struct list_head list_node; -}; +} __aligned_largest; #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb)) #define XSK_TX_COMPL_FITS(t) BUILD_BUG_ON(sizeof(struct xsk_tx_metadata_compl) > sizeof(t)) From 7cd1107f48e2a246c6a628c2381e1b8aafa4675a Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 3 Dec 2024 18:37:25 +0100 Subject: [PATCH 0169/1386] bpf, xdp: constify some bpf_prog * function arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In lots of places, bpf_prog pointer is used only for tracing or other stuff that doesn't modify the structure itself. Same for net_device. Address at least some of them and add `const` attributes there. The object code didn't change, but that may prevent unwanted data modifications and also allow more helpers to have const arguments. Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Signed-off-by: Jakub Kicinski --- include/linux/bpf.h | 12 ++++++------ include/linux/filter.h | 9 +++++---- include/linux/netdevice.h | 6 +++--- include/linux/skbuff.h | 2 +- kernel/bpf/devmap.c | 8 ++++---- net/core/dev.c | 10 +++++----- net/core/filter.c | 29 ++++++++++++++++------------- net/core/skbuff.c | 2 +- 8 files changed, 41 insertions(+), 37 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index eaee2a819f4c..ec3acb16359e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2591,10 +2591,10 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog); + const struct bpf_prog *xdp_prog); int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog, struct bpf_map *map, - bool exclude_ingress); + const struct bpf_prog *xdp_prog, + struct bpf_map *map, bool exclude_ingress); void __cpu_map_flush(struct list_head *flush_list); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, @@ -2864,15 +2864,15 @@ struct sk_buff; static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { return 0; } static inline int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog, struct bpf_map *map, - bool exclude_ingress) + const struct bpf_prog *xdp_prog, + struct bpf_map *map, bool exclude_ingress) { return 0; } diff --git a/include/linux/filter.h b/include/linux/filter.h index 3a21947f2fd4..9a5d23ae3855 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1179,17 +1179,18 @@ static inline int xdp_ok_fwd_dev(const struct net_device *fwd, * This does not appear to be a real limitation for existing software. */ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, - struct xdp_buff *xdp, struct bpf_prog *prog); + struct xdp_buff *xdp, const struct bpf_prog *prog); int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *prog); + const struct bpf_prog *prog); int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp, struct xdp_frame *xdpf, - struct bpf_prog *prog); + const struct bpf_prog *prog); void xdp_do_flush(void); -void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act); +void bpf_warn_invalid_xdp_action(const struct net_device *dev, + const struct bpf_prog *prog, u32 act); #ifdef CONFIG_INET struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ecc686409161..ecca21387a68 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3958,9 +3958,9 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) } u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog); -void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb); + const struct bpf_prog *xdp_prog); +void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog); +int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb); int netif_rx(struct sk_buff *skb); int __netif_rx(struct sk_buff *skb); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 58009fa66102..95452d1a07fc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3627,7 +3627,7 @@ static inline netmem_ref skb_frag_netmem(const skb_frag_t *frag) int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, - struct bpf_prog *prog); + const struct bpf_prog *prog); /** * skb_frag_address - gets the address of the data contained in a paged fragment diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 7878be18e9d2..effde52bc857 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -678,7 +678,7 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, } int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { int err; @@ -701,7 +701,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, static int dev_map_redirect_clone(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { struct sk_buff *nskb; int err; @@ -720,8 +720,8 @@ static int dev_map_redirect_clone(struct bpf_dtab_netdev *dst, } int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog, struct bpf_map *map, - bool exclude_ingress) + const struct bpf_prog *xdp_prog, + struct bpf_map *map, bool exclude_ingress) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *dst, *last_dst = NULL; diff --git a/net/core/dev.c b/net/core/dev.c index 45a8c3dd4a64..40a2332e3fa0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4931,7 +4931,7 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb) } u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { void *orig_data, *orig_data_end, *hard_start; struct netdev_rx_queue *rxqueue; @@ -5033,7 +5033,7 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, } static int -netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog) +netif_skb_check_for_xdp(struct sk_buff **pskb, const struct bpf_prog *prog) { struct sk_buff *skb = *pskb; int err, hroom, troom; @@ -5057,7 +5057,7 @@ netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog) static u32 netif_receive_generic_xdp(struct sk_buff **pskb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { struct sk_buff *skb = *pskb; u32 mac_len, act = XDP_DROP; @@ -5110,7 +5110,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff **pskb, * and DDOS attacks will be more effective. In-driver-XDP use dedicated TX * queues, so they do not have this starvation issue. */ -void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) +void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog) { struct net_device *dev = skb->dev; struct netdev_queue *txq; @@ -5135,7 +5135,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb) +int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb) { struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; diff --git a/net/core/filter.c b/net/core/filter.c index 6625b3f563a4..fac245065b0a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4348,9 +4348,9 @@ u32 xdp_master_redirect(struct xdp_buff *xdp) EXPORT_SYMBOL_GPL(xdp_master_redirect); static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri, - struct net_device *dev, + const struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; @@ -4371,10 +4371,10 @@ err: return err; } -static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, - struct net_device *dev, - struct xdp_frame *xdpf, - struct bpf_prog *xdp_prog) +static __always_inline int +__xdp_do_redirect_frame(struct bpf_redirect_info *ri, struct net_device *dev, + struct xdp_frame *xdpf, + const struct bpf_prog *xdp_prog) { enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; @@ -4443,7 +4443,7 @@ err: } int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); enum bpf_map_type map_type = ri->map_type; @@ -4457,7 +4457,8 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, EXPORT_SYMBOL_GPL(xdp_do_redirect); int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp, - struct xdp_frame *xdpf, struct bpf_prog *xdp_prog) + struct xdp_frame *xdpf, + const struct bpf_prog *xdp_prog) { struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); enum bpf_map_type map_type = ri->map_type; @@ -4472,9 +4473,9 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect_frame); static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, void *fwd, - enum bpf_map_type map_type, u32 map_id, - u32 flags) + const struct bpf_prog *xdp_prog, + void *fwd, enum bpf_map_type map_type, + u32 map_id, u32 flags) { struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); struct bpf_map *map; @@ -4528,7 +4529,8 @@ err: } int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, - struct xdp_buff *xdp, struct bpf_prog *xdp_prog) + struct xdp_buff *xdp, + const struct bpf_prog *xdp_prog) { struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); enum bpf_map_type map_type = ri->map_type; @@ -9075,7 +9077,8 @@ static bool xdp_is_valid_access(int off, int size, return __is_valid_xdp_access(off, size); } -void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act) +void bpf_warn_invalid_xdp_action(const struct net_device *dev, + const struct bpf_prog *prog, u32 act) { const u32 act_max = XDP_REDIRECT; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6841e61a6bd0..a441613a1e6c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1009,7 +1009,7 @@ int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, EXPORT_SYMBOL(skb_pp_cow_data); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, - struct bpf_prog *prog) + const struct bpf_prog *prog) { if (!prog->aux->xdp_has_frags) return -EINVAL; From dcf3827cde8621d2317a7f98e069adbdc2112982 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 3 Dec 2024 18:37:26 +0100 Subject: [PATCH 0170/1386] xdp, xsk: constify read-only arguments of some static inline helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lots of read-only helpers for &xdp_buff and &xdp_frame, such as getting the frame length, skb_shared_info etc., don't have their arguments marked with `const` for no reason. Add the missing annotations to leave less place for mistakes and more for optimization. Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241203173733.3181246-4-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/xdp.h | 29 +++++++++++++++++------------ include/net/xdp_sock_drv.h | 11 ++++++----- include/net/xsk_buff_pool.h | 2 +- 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/include/net/xdp.h b/include/net/xdp.h index e6770dd40c91..197808df1ee1 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -88,7 +88,7 @@ struct xdp_buff { u32 flags; /* supported values defined in xdp_buff_flags */ }; -static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp) +static __always_inline bool xdp_buff_has_frags(const struct xdp_buff *xdp) { return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS); } @@ -103,7 +103,8 @@ static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp) xdp->flags &= ~XDP_FLAGS_HAS_FRAGS; } -static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp) +static __always_inline bool +xdp_buff_is_frag_pfmemalloc(const struct xdp_buff *xdp) { return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); } @@ -144,15 +145,16 @@ xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) static inline struct skb_shared_info * -xdp_get_shared_info_from_buff(struct xdp_buff *xdp) +xdp_get_shared_info_from_buff(const struct xdp_buff *xdp) { return (struct skb_shared_info *)xdp_data_hard_end(xdp); } -static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp) +static __always_inline unsigned int +xdp_get_buff_len(const struct xdp_buff *xdp) { unsigned int len = xdp->data_end - xdp->data; - struct skb_shared_info *sinfo; + const struct skb_shared_info *sinfo; if (likely(!xdp_buff_has_frags(xdp))) goto out; @@ -177,12 +179,13 @@ struct xdp_frame { u32 flags; /* supported values defined in xdp_buff_flags */ }; -static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame) +static __always_inline bool xdp_frame_has_frags(const struct xdp_frame *frame) { return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); } -static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame) +static __always_inline bool +xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame) { return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); } @@ -201,7 +204,7 @@ static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) } static inline struct skb_shared_info * -xdp_get_shared_info_from_frame(struct xdp_frame *frame) +xdp_get_shared_info_from_frame(const struct xdp_frame *frame) { void *data_hard_start = frame->data - frame->headroom - sizeof(*frame); @@ -249,7 +252,8 @@ int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp); struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf); static inline -void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp) +void xdp_convert_frame_to_buff(const struct xdp_frame *frame, + struct xdp_buff *xdp) { xdp->data_hard_start = frame->data - frame->headroom - sizeof(*frame); xdp->data = frame->data; @@ -260,7 +264,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp) } static inline -int xdp_update_frame_from_buff(struct xdp_buff *xdp, +int xdp_update_frame_from_buff(const struct xdp_buff *xdp, struct xdp_frame *xdp_frame) { int metasize, headroom; @@ -317,9 +321,10 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq); -static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf) +static __always_inline unsigned int +xdp_get_frame_len(const struct xdp_frame *xdpf) { - struct skb_shared_info *sinfo; + const struct skb_shared_info *sinfo; unsigned int len = xdpf->len; if (likely(!xdp_frame_has_frags(xdpf))) diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 40085afd9160..f3175a5d28f7 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -101,7 +101,7 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool) return xp_alloc(pool); } -static inline bool xsk_is_eop_desc(struct xdp_desc *desc) +static inline bool xsk_is_eop_desc(const struct xdp_desc *desc) { return !xp_mb_desc(desc); } @@ -143,7 +143,7 @@ static inline void xsk_buff_add_frag(struct xdp_buff *xdp) list_add_tail(&frag->list_node, &frag->pool->xskb_list); } -static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) +static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first) { struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp); struct xdp_buff *ret = NULL; @@ -200,7 +200,8 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) XDP_TXMD_FLAGS_CHECKSUM | \ 0) -static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta) +static inline bool +xsk_buff_valid_tx_metadata(const struct xsk_tx_metadata *meta) { return !(meta->flags & ~XDP_TXMD_FLAGS_VALID); } @@ -337,7 +338,7 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool) return NULL; } -static inline bool xsk_is_eop_desc(struct xdp_desc *desc) +static inline bool xsk_is_eop_desc(const struct xdp_desc *desc) { return false; } @@ -360,7 +361,7 @@ static inline void xsk_buff_add_frag(struct xdp_buff *xdp) { } -static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) +static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first) { return NULL; } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 7637799b6c19..50779406bc2d 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -183,7 +183,7 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool, !(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK); } -static inline bool xp_mb_desc(struct xdp_desc *desc) +static inline bool xp_mb_desc(const struct xdp_desc *desc) { return desc->options & XDP_PKT_CONTD; } From f65966fe0178c06065d354c22fb456fc4370b527 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 3 Dec 2024 18:37:27 +0100 Subject: [PATCH 0171/1386] xdp: allow attaching already registered memory model to xdp_rxq_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One may need to register memory model separately from xdp_rxq_info. One simple example may be XDP test run code, but in general, it might be useful when memory model registering is managed by one layer and then XDP RxQ info by a different one. Allow such scenarios by adding a simple helper which "attaches" already registered memory model to the desired xdp_rxq_info. As this is mostly needed for Page Pool, add a special function to do that for a &page_pool pointer. Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241203173733.3181246-5-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/xdp.h | 32 +++++++++++++++++++++++++++ net/core/xdp.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/include/net/xdp.h b/include/net/xdp.h index 197808df1ee1..1253fe21ede7 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -356,6 +356,38 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq); int xdp_reg_mem_model(struct xdp_mem_info *mem, enum xdp_mem_type type, void *allocator); void xdp_unreg_mem_model(struct xdp_mem_info *mem); +int xdp_reg_page_pool(struct page_pool *pool); +void xdp_unreg_page_pool(const struct page_pool *pool); +void xdp_rxq_info_attach_page_pool(struct xdp_rxq_info *xdp_rxq, + const struct page_pool *pool); + +/** + * xdp_rxq_info_attach_mem_model - attach registered mem info to RxQ info + * @xdp_rxq: XDP RxQ info to attach the memory info to + * @mem: already registered memory info + * + * If the driver registers its memory providers manually, it must use this + * function instead of xdp_rxq_info_reg_mem_model(). + */ +static inline void +xdp_rxq_info_attach_mem_model(struct xdp_rxq_info *xdp_rxq, + const struct xdp_mem_info *mem) +{ + xdp_rxq->mem = *mem; +} + +/** + * xdp_rxq_info_detach_mem_model - detach registered mem info from RxQ info + * @xdp_rxq: XDP RxQ info to detach the memory info from + * + * If the driver registers its memory providers manually and then attaches it + * via xdp_rxq_info_attach_mem_model(), it must call this function before + * xdp_rxq_info_unreg(). + */ +static inline void xdp_rxq_info_detach_mem_model(struct xdp_rxq_info *xdp_rxq) +{ + xdp_rxq->mem = (struct xdp_mem_info){ }; +} /* Drivers not supporting XDP metadata can use this helper, which * rejects any room expansion for metadata as a result. diff --git a/net/core/xdp.c b/net/core/xdp.c index bcc5551c6424..885a2a664bce 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -365,6 +365,62 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); +/** + * xdp_reg_page_pool - register &page_pool as a memory provider for XDP + * @pool: &page_pool to register + * + * Can be used to register pools manually without connecting to any XDP RxQ + * info, so that the XDP layer will be aware of them. Then, they can be + * attached to an RxQ info manually via xdp_rxq_info_attach_page_pool(). + * + * Return: %0 on success, -errno on error. + */ +int xdp_reg_page_pool(struct page_pool *pool) +{ + struct xdp_mem_info mem; + + return xdp_reg_mem_model(&mem, MEM_TYPE_PAGE_POOL, pool); +} +EXPORT_SYMBOL_GPL(xdp_reg_page_pool); + +/** + * xdp_unreg_page_pool - unregister &page_pool from the memory providers list + * @pool: &page_pool to unregister + * + * A shorthand for manual unregistering page pools. If the pool was previously + * attached to an RxQ info, it must be detached first. + */ +void xdp_unreg_page_pool(const struct page_pool *pool) +{ + struct xdp_mem_info mem = { + .type = MEM_TYPE_PAGE_POOL, + .id = pool->xdp_mem_id, + }; + + xdp_unreg_mem_model(&mem); +} +EXPORT_SYMBOL_GPL(xdp_unreg_page_pool); + +/** + * xdp_rxq_info_attach_page_pool - attach registered pool to RxQ info + * @xdp_rxq: XDP RxQ info to attach the pool to + * @pool: pool to attach + * + * If the pool was registered manually, this function must be called instead + * of xdp_rxq_info_reg_mem_model() to connect it to the RxQ info. + */ +void xdp_rxq_info_attach_page_pool(struct xdp_rxq_info *xdp_rxq, + const struct page_pool *pool) +{ + struct xdp_mem_info mem = { + .type = MEM_TYPE_PAGE_POOL, + .id = pool->xdp_mem_id, + }; + + xdp_rxq_info_attach_mem_model(xdp_rxq, &mem); +} +EXPORT_SYMBOL_GPL(xdp_rxq_info_attach_page_pool); + /* XDP RX runs under NAPI protection, and in different delivery error * scenarios (e.g. queue full), it is possible to return the xdp_frame * while still leveraging this protection. The @napi_direct boolean From 9e25dd9d65d27aa94220831fe6453d935988801c Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 3 Dec 2024 18:37:28 +0100 Subject: [PATCH 0172/1386] xsk: allow attaching XSk pool via xdp_rxq_info_reg_mem_model() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When you register an XSk pool as XDP Rxq info memory model, you then need to manually attach it after the registration. Let the user combine both actions into one by just passing a pointer to the pool directly to xdp_rxq_info_reg_mem_model(), which will take care of calling xsk_pool_set_rxq_info(). This looks similar to how a &page_pool gets registered and reduce repeating driver code. Acked-by: Maciej Fijalkowski Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241203173733.3181246-6-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- net/core/xdp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/xdp.c b/net/core/xdp.c index 885a2a664bce..de1e9cb78718 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -358,6 +358,9 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, if (IS_ERR(xdp_alloc)) return PTR_ERR(xdp_alloc); + if (type == MEM_TYPE_XSK_BUFF_POOL && allocator) + xsk_pool_set_rxq_info(allocator, xdp_rxq); + if (trace_mem_connect_enabled() && xdp_alloc) trace_mem_connect(xdp_alloc, xdp_rxq); return 0; From e77d9aee951341119be16a991fcfc76d1154d22a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Tue, 3 Dec 2024 18:37:29 +0100 Subject: [PATCH 0173/1386] xdp: register system page pool as an XDP memory model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make the system page pool usable as a source for allocating XDP frames, we need to register it with xdp_reg_mem_model(), so that page return works correctly. This is done in preparation for using the system page_pool to convert XDP_PASS XSk frames to skbs; for the same reason, make the per-cpu variable non-static so we can access it from other source files as well (but w/o exporting). Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241203173733.3181246-7-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + net/core/dev.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ecca21387a68..d1a8d98b132c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3322,6 +3322,7 @@ struct softnet_data { }; DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); +DECLARE_PER_CPU(struct page_pool *, system_page_pool); #ifndef CONFIG_PREEMPT_RT static inline int dev_recursion_level(void) diff --git a/net/core/dev.c b/net/core/dev.c index 40a2332e3fa0..c7f3dea3e0eb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -460,7 +460,7 @@ EXPORT_PER_CPU_SYMBOL(softnet_data); * PP consumers must pay attention to run APIs in the appropriate context * (e.g. NAPI context). */ -static DEFINE_PER_CPU(struct page_pool *, system_page_pool); +DEFINE_PER_CPU(struct page_pool *, system_page_pool); #ifdef CONFIG_LOCKDEP /* @@ -12152,11 +12152,18 @@ static int net_page_pool_create(int cpuid) .nid = cpu_to_mem(cpuid), }; struct page_pool *pp_ptr; + int err; pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid); if (IS_ERR(pp_ptr)) return -ENOMEM; + err = xdp_reg_page_pool(pp_ptr); + if (err) { + page_pool_destroy(pp_ptr); + return err; + } + per_cpu(system_page_pool, cpuid) = pp_ptr; #endif return 0; @@ -12290,6 +12297,7 @@ out: if (!pp_ptr) continue; + xdp_unreg_page_pool(pp_ptr); page_pool_destroy(pp_ptr); per_cpu(system_page_pool, i) = NULL; } From 9bd9f72a74344b54cfb6fcabf1173e6c6e5c6952 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 3 Dec 2024 18:37:30 +0100 Subject: [PATCH 0174/1386] netmem: add a couple of page helper wrappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the following netmem counterparts: * virt_to_netmem() -- simple page_to_netmem(virt_to_page()) wrapper; * netmem_is_pfmemalloc() -- page_is_pfmemalloc() for page-backed netmems, false otherwise; and the following "unsafe" versions: * __netmem_to_page() * __netmem_get_pp() * __netmem_address() They do the same as their non-underscored buddies, but assume the netmem is always page-backed. When working with header &page_pools, you don't need to check whether netmem belongs to the host memory and you can never get NULL instead of &page. Checks for the LSB, clearing the LSB, branches take cycles and increase object code size, sometimes significantly. When you're sure your PP is always host, you can avoid this by using the underscored counterparts. Signed-off-by: Alexander Lobakin Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241203173733.3181246-8-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/netmem.h | 78 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 2 deletions(-) diff --git a/include/net/netmem.h b/include/net/netmem.h index 8a6e20be4b9d..1b58faa4f20f 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -72,6 +72,22 @@ static inline bool netmem_is_net_iov(const netmem_ref netmem) return (__force unsigned long)netmem & NET_IOV; } +/** + * __netmem_to_page - unsafely get pointer to the &page backing @netmem + * @netmem: netmem reference to convert + * + * Unsafe version of netmem_to_page(). When @netmem is always page-backed, + * e.g. when it's a header buffer, performs faster and generates smaller + * object code (no check for the LSB, no WARN). When @netmem points to IOV, + * provokes undefined behaviour. + * + * Return: pointer to the &page (garbage if @netmem is not page-backed). + */ +static inline struct page *__netmem_to_page(netmem_ref netmem) +{ + return (__force struct page *)netmem; +} + /* This conversion fails (returns NULL) if the netmem_ref is not struct page * backed. */ @@ -80,7 +96,7 @@ static inline struct page *netmem_to_page(netmem_ref netmem) if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) return NULL; - return (__force struct page *)netmem; + return __netmem_to_page(netmem); } static inline struct net_iov *netmem_to_net_iov(netmem_ref netmem) @@ -103,6 +119,17 @@ static inline netmem_ref page_to_netmem(struct page *page) return (__force netmem_ref)page; } +/** + * virt_to_netmem - convert virtual memory pointer to a netmem reference + * @data: host memory pointer to convert + * + * Return: netmem reference to the &page backing this virtual address. + */ +static inline netmem_ref virt_to_netmem(const void *data) +{ + return page_to_netmem(virt_to_page(data)); +} + static inline int netmem_ref_count(netmem_ref netmem) { /* The non-pp refcount of net_iov is always 1. On net_iov, we only @@ -127,6 +154,22 @@ static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem) return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV); } +/** + * __netmem_get_pp - unsafely get pointer to the &page_pool backing @netmem + * @netmem: netmem reference to get the pointer from + * + * Unsafe version of netmem_get_pp(). When @netmem is always page-backed, + * e.g. when it's a header buffer, performs faster and generates smaller + * object code (avoids clearing the LSB). When @netmem points to IOV, + * provokes invalid memory access. + * + * Return: pointer to the &page_pool (garbage if @netmem is not page-backed). + */ +static inline struct page_pool *__netmem_get_pp(netmem_ref netmem) +{ + return __netmem_to_page(netmem)->pp; +} + static inline struct page_pool *netmem_get_pp(netmem_ref netmem) { return __netmem_clear_lsb(netmem)->pp; @@ -158,12 +201,43 @@ static inline netmem_ref netmem_compound_head(netmem_ref netmem) return page_to_netmem(compound_head(netmem_to_page(netmem))); } +/** + * __netmem_address - unsafely get pointer to the memory backing @netmem + * @netmem: netmem reference to get the pointer for + * + * Unsafe version of netmem_address(). When @netmem is always page-backed, + * e.g. when it's a header buffer, performs faster and generates smaller + * object code (no check for the LSB). When @netmem points to IOV, provokes + * undefined behaviour. + * + * Return: pointer to the memory (garbage if @netmem is not page-backed). + */ +static inline void *__netmem_address(netmem_ref netmem) +{ + return page_address(__netmem_to_page(netmem)); +} + static inline void *netmem_address(netmem_ref netmem) { if (netmem_is_net_iov(netmem)) return NULL; - return page_address(netmem_to_page(netmem)); + return __netmem_address(netmem); +} + +/** + * netmem_is_pfmemalloc - check if @netmem was allocated under memory pressure + * @netmem: netmem reference to check + * + * Return: true if @netmem is page-backed and the page was allocated under + * memory pressure, false otherwise. + */ +static inline bool netmem_is_pfmemalloc(netmem_ref netmem) +{ + if (netmem_is_net_iov(netmem)) + return false; + + return page_is_pfmemalloc(netmem_to_page(netmem)); } static inline unsigned long netmem_get_dma_addr(netmem_ref netmem) From 024bfd2e9d80d7131f1178eb2235030b96f7ef0e Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 3 Dec 2024 18:37:31 +0100 Subject: [PATCH 0175/1386] page_pool: make page_pool_put_page_bulk() handle array of netmems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, page_pool_put_page_bulk() indeed takes an array of pointers to the data, not pages, despite the name. As one side effect, when you're freeing frags from &skb_shared_info, xdp_return_frame_bulk() converts page pointers to virtual addresses and then page_pool_put_page_bulk() converts them back. Moreover, data pointers assume every frag is placed in the host memory, making this function non-universal. Make page_pool_put_page_bulk() handle array of netmems. Pass frag netmems directly and use virt_to_netmem() when freeing xdpf->data, so that the PP core will then get the compound netmem and take care of the rest. Signed-off-by: Alexander Lobakin Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241203173733.3181246-9-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 8 ++++---- include/net/xdp.h | 2 +- net/core/page_pool.c | 30 +++++++++++++++--------------- net/core/xdp.c | 6 +++--- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index c022c410abe3..1ea16b0e9c79 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -259,8 +259,8 @@ void page_pool_disable_direct_recycling(struct page_pool *pool); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), const struct xdp_mem_info *mem); -void page_pool_put_page_bulk(struct page_pool *pool, void **data, - int count); +void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data, + u32 count); #else static inline void page_pool_destroy(struct page_pool *pool) { @@ -272,8 +272,8 @@ static inline void page_pool_use_xdp_mem(struct page_pool *pool, { } -static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, - int count) +static inline void page_pool_put_netmem_bulk(struct page_pool *pool, + netmem_ref *data, u32 count) { } #endif diff --git a/include/net/xdp.h b/include/net/xdp.h index 1253fe21ede7..f4020b29122f 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -194,7 +194,7 @@ xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame) struct xdp_frame_bulk { int count; void *xa; - void *q[XDP_BULK_QUEUE_SIZE]; + netmem_ref q[XDP_BULK_QUEUE_SIZE]; }; static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index f89cf93f6eb4..4c85b77cfdac 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -840,22 +840,22 @@ void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, EXPORT_SYMBOL(page_pool_put_unrefed_page); /** - * page_pool_put_page_bulk() - release references on multiple pages + * page_pool_put_netmem_bulk() - release references on multiple netmems * @pool: pool from which pages were allocated - * @data: array holding page pointers - * @count: number of pages in @data + * @data: array holding netmem references + * @count: number of entries in @data * - * Tries to refill a number of pages into the ptr_ring cache holding ptr_ring - * producer lock. If the ptr_ring is full, page_pool_put_page_bulk() - * will release leftover pages to the page allocator. - * page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx + * Tries to refill a number of netmems into the ptr_ring cache holding ptr_ring + * producer lock. If the ptr_ring is full, page_pool_put_netmem_bulk() + * will release leftover netmems to the memory provider. + * page_pool_put_netmem_bulk() is suitable to be run inside the driver NAPI tx * completion loop for the XDP_REDIRECT use case. * * Please note the caller must not use data area after running - * page_pool_put_page_bulk(), as this function overwrites it. + * page_pool_put_netmem_bulk(), as this function overwrites it. */ -void page_pool_put_page_bulk(struct page_pool *pool, void **data, - int count) +void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data, + u32 count) { int i, bulk_len = 0; bool allow_direct; @@ -864,7 +864,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, allow_direct = page_pool_napi_local(pool); for (i = 0; i < count; i++) { - netmem_ref netmem = page_to_netmem(virt_to_head_page(data[i])); + netmem_ref netmem = netmem_compound_head(data[i]); /* It is not the last user for the page frag case */ if (!page_pool_is_last_ref(netmem)) @@ -873,7 +873,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, netmem = __page_pool_put_page(pool, netmem, -1, allow_direct); /* Approved for bulk recycling in ptr_ring cache */ if (netmem) - data[bulk_len++] = (__force void *)netmem; + data[bulk_len++] = netmem; } if (!bulk_len) @@ -882,7 +882,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, /* Bulk producer into ptr_ring page_pool cache */ in_softirq = page_pool_producer_lock(pool); for (i = 0; i < bulk_len; i++) { - if (__ptr_ring_produce(&pool->ring, data[i])) { + if (__ptr_ring_produce(&pool->ring, (__force void *)data[i])) { /* ring full */ recycle_stat_inc(pool, ring_full); break; @@ -899,9 +899,9 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, * since put_page() with refcnt == 1 can be an expensive operation */ for (; i < bulk_len; i++) - page_pool_return_page(pool, (__force netmem_ref)data[i]); + page_pool_return_page(pool, data[i]); } -EXPORT_SYMBOL(page_pool_put_page_bulk); +EXPORT_SYMBOL(page_pool_put_netmem_bulk); static netmem_ref page_pool_drain_frag(struct page_pool *pool, netmem_ref netmem) diff --git a/net/core/xdp.c b/net/core/xdp.c index de1e9cb78718..938ad15c9857 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -518,7 +518,7 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) if (unlikely(!xa || !bq->count)) return; - page_pool_put_page_bulk(xa->page_pool, bq->q, bq->count); + page_pool_put_netmem_bulk(xa->page_pool, bq->q, bq->count); /* bq->xa is not cleared to save lookup, if mem.id same in next bulk */ bq->count = 0; } @@ -559,12 +559,12 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf, for (i = 0; i < sinfo->nr_frags; i++) { skb_frag_t *frag = &sinfo->frags[i]; - bq->q[bq->count++] = skb_frag_address(frag); + bq->q[bq->count++] = skb_frag_netmem(frag); if (bq->count == XDP_BULK_QUEUE_SIZE) xdp_flush_frame_bulk(bq); } } - bq->q[bq->count++] = xdpf->data; + bq->q[bq->count++] = virt_to_netmem(xdpf->data); } EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); From 3e42bb998c6d574cbd683bd2d4ba1a2abf5aa044 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 3 Dec 2024 13:43:12 +0100 Subject: [PATCH 0176/1386] net: freescale: ucc_geth: Drop support for the "interface" DT property In april 2007, ucc_geth was converted to phylib with : commit 728de4c927a3 ("ucc_geth: migrate ucc_geth to phylib"). In that commit, the device-tree property "interface", that could be used to retrieve the PHY interface mode was deprecated. DTS files that still used that property were converted along the way, in the following commit, also dating from april 2007 : commit 0fd8c47cccb1 ("[POWERPC] Replace undocumented interface properties in dts files") 17 years later, there's no users of that property left and I hope it's safe to say we can remove support from that in the ucc_geth driver, making the probe() function a bit simpler. Should there be any users that have a DT that was generated when 2.6.21 was cutting-edge, print an error message with hints on how to convert the devicetree if the 'interface' property is found. With that property gone, we can greatly simplify the parsing of the phy-interface-mode from the devicetree by using of_get_phy_mode(), allowing the removal of the open-coded parsing in the driver. Reviewed-by: Andrew Lunn Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth.c | 63 +++++------------------ 1 file changed, 12 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 6663c1768089..b023a1a1dc5c 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3469,32 +3469,6 @@ static int ucc_geth_resume(struct platform_device *ofdev) #define ucc_geth_resume NULL #endif -static phy_interface_t to_phy_interface(const char *phy_connection_type) -{ - if (strcasecmp(phy_connection_type, "mii") == 0) - return PHY_INTERFACE_MODE_MII; - if (strcasecmp(phy_connection_type, "gmii") == 0) - return PHY_INTERFACE_MODE_GMII; - if (strcasecmp(phy_connection_type, "tbi") == 0) - return PHY_INTERFACE_MODE_TBI; - if (strcasecmp(phy_connection_type, "rmii") == 0) - return PHY_INTERFACE_MODE_RMII; - if (strcasecmp(phy_connection_type, "rgmii") == 0) - return PHY_INTERFACE_MODE_RGMII; - if (strcasecmp(phy_connection_type, "rgmii-id") == 0) - return PHY_INTERFACE_MODE_RGMII_ID; - if (strcasecmp(phy_connection_type, "rgmii-txid") == 0) - return PHY_INTERFACE_MODE_RGMII_TXID; - if (strcasecmp(phy_connection_type, "rgmii-rxid") == 0) - return PHY_INTERFACE_MODE_RGMII_RXID; - if (strcasecmp(phy_connection_type, "rtbi") == 0) - return PHY_INTERFACE_MODE_RTBI; - if (strcasecmp(phy_connection_type, "sgmii") == 0) - return PHY_INTERFACE_MODE_SGMII; - - return PHY_INTERFACE_MODE_MII; -} - static int ucc_geth_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { struct ucc_geth_private *ugeth = netdev_priv(dev); @@ -3564,19 +3538,6 @@ static int ucc_geth_probe(struct platform_device* ofdev) int err, ucc_num, max_speed = 0; const unsigned int *prop; phy_interface_t phy_interface; - static const int enet_to_speed[] = { - SPEED_10, SPEED_10, SPEED_10, - SPEED_100, SPEED_100, SPEED_100, - SPEED_1000, SPEED_1000, SPEED_1000, SPEED_1000, - }; - static const phy_interface_t enet_to_phy_interface[] = { - PHY_INTERFACE_MODE_MII, PHY_INTERFACE_MODE_RMII, - PHY_INTERFACE_MODE_RGMII, PHY_INTERFACE_MODE_MII, - PHY_INTERFACE_MODE_RMII, PHY_INTERFACE_MODE_RGMII, - PHY_INTERFACE_MODE_GMII, PHY_INTERFACE_MODE_RGMII, - PHY_INTERFACE_MODE_TBI, PHY_INTERFACE_MODE_RTBI, - PHY_INTERFACE_MODE_SGMII, - }; ugeth_vdbg("%s: IN", __func__); @@ -3627,18 +3588,18 @@ static int ucc_geth_probe(struct platform_device* ofdev) /* Find the TBI PHY node. If it's not there, we don't support SGMII */ ug_info->tbi_node = of_parse_phandle(np, "tbi-handle", 0); - /* get the phy interface type, or default to MII */ - prop = of_get_property(np, "phy-connection-type", NULL); - if (!prop) { - /* handle interface property present in old trees */ - prop = of_get_property(ug_info->phy_node, "interface", NULL); - if (prop != NULL) { - phy_interface = enet_to_phy_interface[*prop]; - max_speed = enet_to_speed[*prop]; - } else - phy_interface = PHY_INTERFACE_MODE_MII; - } else { - phy_interface = to_phy_interface((const char *)prop); + prop = of_get_property(ug_info->phy_node, "interface", NULL); + if (prop) { + dev_err(&ofdev->dev, + "Device-tree property 'interface' is no longer supported. Please use 'phy-connection-type' instead."); + err = -EINVAL; + goto err_deregister_fixed_link; + } + + err = of_get_phy_mode(np, &phy_interface); + if (err) { + dev_err(&ofdev->dev, "Invalid phy-connection-type"); + goto err_deregister_fixed_link; } /* get speed, or derive from PHY interface */ From 1e59fd163100c2e21a65004c96f81b458e86b457 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 3 Dec 2024 13:43:13 +0100 Subject: [PATCH 0177/1386] net: freescale: ucc_geth: split adjust_link for phylink conversion Preparing the phylink conversion, split the adjust_link callbaclk, by clearly separating the mac configuration, link_up and link_down phases. Reviewed-by: Andrew Lunn Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth.c | 180 +++++++++++----------- 1 file changed, 93 insertions(+), 87 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index b023a1a1dc5c..bf9f5901b405 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1548,6 +1548,94 @@ static void ugeth_activate(struct ucc_geth_private *ugeth) __netdev_watchdog_up(ugeth->ndev); } +static void ugeth_link_up(struct ucc_geth_private *ugeth, + struct phy_device *phy, + phy_interface_t interface, int speed, int duplex) +{ + struct ucc_geth __iomem *ug_regs = ugeth->ug_regs; + struct ucc_fast __iomem *uf_regs = ugeth->uccf->uf_regs; + u32 tempval = in_be32(&ug_regs->maccfg2); + u32 upsmr = in_be32(&uf_regs->upsmr); + int new_state = 0; + + /* Now we make sure that we can be in full duplex mode. + * If not, we operate in half-duplex mode. + */ + if (duplex != ugeth->oldduplex) { + new_state = 1; + if (duplex == DUPLEX_HALF) + tempval &= ~(MACCFG2_FDX); + else + tempval |= MACCFG2_FDX; + ugeth->oldduplex = duplex; + } + + if (speed != ugeth->oldspeed) { + new_state = 1; + switch (speed) { + case SPEED_1000: + tempval = ((tempval & + ~(MACCFG2_INTERFACE_MODE_MASK)) | + MACCFG2_INTERFACE_MODE_BYTE); + break; + case SPEED_100: + case SPEED_10: + tempval = ((tempval & + ~(MACCFG2_INTERFACE_MODE_MASK)) | + MACCFG2_INTERFACE_MODE_NIBBLE); + /* if reduced mode, re-set UPSMR.R10M */ + if (interface == PHY_INTERFACE_MODE_RMII || + phy_interface_mode_is_rgmii(interface) || + interface == PHY_INTERFACE_MODE_RTBI) { + if (speed == SPEED_10) + upsmr |= UCC_GETH_UPSMR_R10M; + else + upsmr &= ~UCC_GETH_UPSMR_R10M; + } + break; + default: + if (netif_msg_link(ugeth)) + pr_warn("%s: Speed (%d) is not 10/100/1000!", + netdev_name(ugeth->ndev), speed); + break; + } + ugeth->oldspeed = speed; + } + + if (!ugeth->oldlink) { + new_state = 1; + ugeth->oldlink = 1; + } + + if (new_state) { + /* + * To change the MAC configuration we need to disable + * the controller. To do so, we have to either grab + * ugeth->lock, which is a bad idea since 'graceful + * stop' commands might take quite a while, or we can + * quiesce driver's activity. + */ + ugeth_quiesce(ugeth); + ugeth_disable(ugeth, COMM_DIR_RX_AND_TX); + + out_be32(&ug_regs->maccfg2, tempval); + out_be32(&uf_regs->upsmr, upsmr); + + ugeth_enable(ugeth, COMM_DIR_RX_AND_TX); + ugeth_activate(ugeth); + } + + if (netif_msg_link(ugeth)) + phy_print_status(phy); +} + +static void ugeth_link_down(struct ucc_geth_private *ugeth) +{ + ugeth->oldlink = 0; + ugeth->oldspeed = 0; + ugeth->oldduplex = -1; +} + /* Called every time the controller might need to be made * aware of new link state. The PHY code conveys this * information through variables in the ugeth structure, and this @@ -1558,95 +1646,13 @@ static void ugeth_activate(struct ucc_geth_private *ugeth) static void adjust_link(struct net_device *dev) { struct ucc_geth_private *ugeth = netdev_priv(dev); - struct ucc_geth __iomem *ug_regs; - struct ucc_fast __iomem *uf_regs; struct phy_device *phydev = ugeth->phydev; - int new_state = 0; - ug_regs = ugeth->ug_regs; - uf_regs = ugeth->uccf->uf_regs; - - if (phydev->link) { - u32 tempval = in_be32(&ug_regs->maccfg2); - u32 upsmr = in_be32(&uf_regs->upsmr); - /* Now we make sure that we can be in full duplex mode. - * If not, we operate in half-duplex mode. */ - if (phydev->duplex != ugeth->oldduplex) { - new_state = 1; - if (!(phydev->duplex)) - tempval &= ~(MACCFG2_FDX); - else - tempval |= MACCFG2_FDX; - ugeth->oldduplex = phydev->duplex; - } - - if (phydev->speed != ugeth->oldspeed) { - new_state = 1; - switch (phydev->speed) { - case SPEED_1000: - tempval = ((tempval & - ~(MACCFG2_INTERFACE_MODE_MASK)) | - MACCFG2_INTERFACE_MODE_BYTE); - break; - case SPEED_100: - case SPEED_10: - tempval = ((tempval & - ~(MACCFG2_INTERFACE_MODE_MASK)) | - MACCFG2_INTERFACE_MODE_NIBBLE); - /* if reduced mode, re-set UPSMR.R10M */ - if ((ugeth->phy_interface == PHY_INTERFACE_MODE_RMII) || - (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII) || - (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII_ID) || - (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID) || - (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) || - (ugeth->phy_interface == PHY_INTERFACE_MODE_RTBI)) { - if (phydev->speed == SPEED_10) - upsmr |= UCC_GETH_UPSMR_R10M; - else - upsmr &= ~UCC_GETH_UPSMR_R10M; - } - break; - default: - if (netif_msg_link(ugeth)) - pr_warn( - "%s: Ack! Speed (%d) is not 10/100/1000!", - dev->name, phydev->speed); - break; - } - ugeth->oldspeed = phydev->speed; - } - - if (!ugeth->oldlink) { - new_state = 1; - ugeth->oldlink = 1; - } - - if (new_state) { - /* - * To change the MAC configuration we need to disable - * the controller. To do so, we have to either grab - * ugeth->lock, which is a bad idea since 'graceful - * stop' commands might take quite a while, or we can - * quiesce driver's activity. - */ - ugeth_quiesce(ugeth); - ugeth_disable(ugeth, COMM_DIR_RX_AND_TX); - - out_be32(&ug_regs->maccfg2, tempval); - out_be32(&uf_regs->upsmr, upsmr); - - ugeth_enable(ugeth, COMM_DIR_RX_AND_TX); - ugeth_activate(ugeth); - } - } else if (ugeth->oldlink) { - new_state = 1; - ugeth->oldlink = 0; - ugeth->oldspeed = 0; - ugeth->oldduplex = -1; - } - - if (new_state && netif_msg_link(ugeth)) - phy_print_status(phydev); + if (phydev->link) + ugeth_link_up(ugeth, phydev, phydev->interface, + phydev->speed, phydev->duplex); + else + ugeth_link_down(ugeth); } /* Initialize TBI PHY interface for communicating with the From 43068024cc2a2abdf39c73d3c1ed63a77207ae31 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 3 Dec 2024 13:43:14 +0100 Subject: [PATCH 0178/1386] net: freescale: ucc_geth: Use netdev->phydev to access the PHY As this driver pre-dates phylib, it uses a private pointer to get a reference to the attached phy_device. Drop that pointer and use the netdev's pointer instead. Reviewed-by: Andrew Lunn Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth.c | 27 ++++++++----------- drivers/net/ethernet/freescale/ucc_geth.h | 1 - .../net/ethernet/freescale/ucc_geth_ethtool.c | 17 ++++++------ 3 files changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index bf9f5901b405..cc5f9ca42a78 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1646,7 +1646,7 @@ static void ugeth_link_down(struct ucc_geth_private *ugeth) static void adjust_link(struct net_device *dev) { struct ucc_geth_private *ugeth = netdev_priv(dev); - struct phy_device *phydev = ugeth->phydev; + struct phy_device *phydev = dev->phydev; if (phydev->link) ugeth_link_up(ugeth, phydev, phydev->interface, @@ -1727,8 +1727,6 @@ static int init_phy(struct net_device *dev) phy_set_max_speed(phydev, priv->max_speed); - priv->phydev = phydev; - return 0; } @@ -2001,7 +1999,7 @@ static void ucc_geth_set_multi(struct net_device *dev) static void ucc_geth_stop(struct ucc_geth_private *ugeth) { struct ucc_geth __iomem *ug_regs = ugeth->ug_regs; - struct phy_device *phydev = ugeth->phydev; + struct phy_device *phydev = ugeth->ndev->phydev; ugeth_vdbg("%s: IN", __func__); @@ -3316,13 +3314,13 @@ static int ucc_geth_open(struct net_device *dev) goto err; } - phy_start(ugeth->phydev); + phy_start(dev->phydev); napi_enable(&ugeth->napi); netdev_reset_queue(dev); netif_start_queue(dev); device_set_wakeup_capable(&dev->dev, - qe_alive_during_sleep() || ugeth->phydev->irq); + qe_alive_during_sleep() || dev->phydev->irq); device_set_wakeup_enable(&dev->dev, ugeth->wol_en); return err; @@ -3343,8 +3341,7 @@ static int ucc_geth_close(struct net_device *dev) cancel_work_sync(&ugeth->timeout_work); ucc_geth_stop(ugeth); - phy_disconnect(ugeth->phydev); - ugeth->phydev = NULL; + phy_disconnect(dev->phydev); free_irq(ugeth->ug_info->uf_info.irq, ugeth->ndev); @@ -3378,7 +3375,7 @@ static void ucc_geth_timeout_work(struct work_struct *work) ucc_geth_stop(ugeth); ucc_geth_init_mac(ugeth); /* Must start PHY here */ - phy_start(ugeth->phydev); + phy_start(dev->phydev); netif_tx_start_all_queues(dev); } @@ -3421,7 +3418,7 @@ static int ucc_geth_suspend(struct platform_device *ofdev, pm_message_t state) setbits32(&ugeth->ug_regs->maccfg2, MACCFG2_MPE); ucc_fast_enable(ugeth->uccf, COMM_DIR_RX_AND_TX); } else if (!(ugeth->wol_en & WAKE_PHY)) { - phy_stop(ugeth->phydev); + phy_stop(ndev->phydev); } return 0; @@ -3461,8 +3458,8 @@ static int ucc_geth_resume(struct platform_device *ofdev) ugeth->oldspeed = 0; ugeth->oldduplex = -1; - phy_stop(ugeth->phydev); - phy_start(ugeth->phydev); + phy_stop(ndev->phydev); + phy_start(ndev->phydev); napi_enable(&ugeth->napi); netif_device_attach(ndev); @@ -3477,15 +3474,13 @@ static int ucc_geth_resume(struct platform_device *ofdev) static int ucc_geth_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { - struct ucc_geth_private *ugeth = netdev_priv(dev); - if (!netif_running(dev)) return -EINVAL; - if (!ugeth->phydev) + if (!dev->phydev) return -ENODEV; - return phy_mii_ioctl(ugeth->phydev, rq, cmd); + return phy_mii_ioctl(dev->phydev, rq, cmd); } static const struct net_device_ops ucc_geth_netdev_ops = { diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index 4294ed096ebb..c08a56b7c9fe 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -1210,7 +1210,6 @@ struct ucc_geth_private { u16 skb_dirtytx[NUM_TX_QUEUES]; struct ugeth_mii_info *mii_info; - struct phy_device *phydev; phy_interface_t phy_interface; int max_speed; uint32_t msg_enable; diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c index 699f346faf5c..fb5254d7d1ba 100644 --- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c +++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c @@ -103,8 +103,7 @@ static const char rx_fw_stat_gstrings[][ETH_GSTRING_LEN] = { static int uec_get_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { - struct ucc_geth_private *ugeth = netdev_priv(netdev); - struct phy_device *phydev = ugeth->phydev; + struct phy_device *phydev = netdev->phydev; if (!phydev) return -ENODEV; @@ -118,8 +117,7 @@ static int uec_set_ksettings(struct net_device *netdev, const struct ethtool_link_ksettings *cmd) { - struct ucc_geth_private *ugeth = netdev_priv(netdev); - struct phy_device *phydev = ugeth->phydev; + struct phy_device *phydev = netdev->phydev; if (!phydev) return -ENODEV; @@ -132,8 +130,10 @@ uec_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { struct ucc_geth_private *ugeth = netdev_priv(netdev); + struct phy_device *phydev = netdev->phydev; - pause->autoneg = ugeth->phydev->autoneg; + if (phydev) + pause->autoneg = phydev->autoneg; if (ugeth->ug_info->receiveFlowControl) pause->rx_pause = 1; @@ -146,12 +146,13 @@ uec_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { struct ucc_geth_private *ugeth = netdev_priv(netdev); + struct phy_device *phydev = netdev->phydev; int ret = 0; ugeth->ug_info->receiveFlowControl = pause->rx_pause; ugeth->ug_info->transmitFlowControl = pause->tx_pause; - if (ugeth->phydev->autoneg) { + if (phydev && phydev->autoneg) { if (netif_running(netdev)) { /* FIXME: automatically restart */ netdev_info(netdev, "Please re-open the interface\n"); @@ -343,7 +344,7 @@ uec_get_drvinfo(struct net_device *netdev, static void uec_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { struct ucc_geth_private *ugeth = netdev_priv(netdev); - struct phy_device *phydev = ugeth->phydev; + struct phy_device *phydev = netdev->phydev; if (phydev && phydev->irq) wol->supported |= WAKE_PHY; @@ -356,7 +357,7 @@ static void uec_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) static int uec_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { struct ucc_geth_private *ugeth = netdev_priv(netdev); - struct phy_device *phydev = ugeth->phydev; + struct phy_device *phydev = netdev->phydev; if (wol->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) return -EINVAL; From d2adc441a19a592ce104e2b257ad9d002eaec53f Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 3 Dec 2024 13:43:15 +0100 Subject: [PATCH 0179/1386] net: freescale: ucc_geth: Fix WOL configuration The get/set_wol ethtool ops rely on querying the PHY for its WoL capabilities, checking for the presence of a PHY and a PHY interrupts isn't enough. Address that by cleaning up the WoL configuration sequence. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth.c | 4 +- drivers/net/ethernet/freescale/ucc_geth.h | 1 + .../net/ethernet/freescale/ucc_geth_ethtool.c | 38 +++++++++++++++---- 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index cc5f9ca42a78..587bcbc079da 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3413,11 +3413,11 @@ static int ucc_geth_suspend(struct platform_device *ofdev, pm_message_t state) */ ugeth_disable(ugeth, COMM_DIR_RX_AND_TX); - if (ugeth->wol_en & WAKE_MAGIC) { + if (ugeth->wol_en & WAKE_MAGIC && !ugeth->phy_wol_en) { setbits32(ugeth->uccf->p_uccm, UCC_GETH_UCCE_MPD); setbits32(&ugeth->ug_regs->maccfg2, MACCFG2_MPE); ucc_fast_enable(ugeth->uccf, COMM_DIR_RX_AND_TX); - } else if (!(ugeth->wol_en & WAKE_PHY)) { + } else if (!ugeth->phy_wol_en) { phy_stop(ndev->phydev); } diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index c08a56b7c9fe..e08cfc8d8904 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -1217,6 +1217,7 @@ struct ucc_geth_private { int oldduplex; int oldlink; int wol_en; + u32 phy_wol_en; struct device_node *node; }; diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c index fb5254d7d1ba..89b323ef8145 100644 --- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c +++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c @@ -346,26 +346,48 @@ static void uec_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) struct ucc_geth_private *ugeth = netdev_priv(netdev); struct phy_device *phydev = netdev->phydev; - if (phydev && phydev->irq) - wol->supported |= WAKE_PHY; + wol->supported = 0; + wol->wolopts = 0; + + if (phydev) + phy_ethtool_get_wol(phydev, wol); + if (qe_alive_during_sleep()) wol->supported |= WAKE_MAGIC; - wol->wolopts = ugeth->wol_en; + wol->wolopts |= ugeth->wol_en; } static int uec_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { struct ucc_geth_private *ugeth = netdev_priv(netdev); struct phy_device *phydev = netdev->phydev; + int ret = 0; - if (wol->wolopts & ~(WAKE_PHY | WAKE_MAGIC)) - return -EINVAL; - else if (wol->wolopts & WAKE_PHY && (!phydev || !phydev->irq)) - return -EINVAL; - else if (wol->wolopts & WAKE_MAGIC && !qe_alive_during_sleep()) + if (phydev) { + ret = phy_ethtool_set_wol(phydev, wol); + if (ret == -EOPNOTSUPP) { + ugeth->phy_wol_en = 0; + } else if (ret) { + return ret; + } else { + ugeth->phy_wol_en = wol->wolopts; + goto out; + } + } + + /* If the PHY isn't handling the WoL and the MAC is asked to more than + * WAKE_MAGIC, error-out + */ + if (!ugeth->phy_wol_en && + wol->wolopts & ~WAKE_MAGIC) return -EINVAL; + if (wol->wolopts & WAKE_MAGIC && + !qe_alive_during_sleep()) + return -EINVAL; + +out: ugeth->wol_en = wol->wolopts; device_set_wakeup_enable(&netdev->dev, ugeth->wol_en); From 420d56e4de5247b78fa1e1d5084f246e547e95a9 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 3 Dec 2024 13:43:16 +0100 Subject: [PATCH 0180/1386] net: freescale: ucc_geth: Use the correct type to store WoL opts The WoL opts are represented through a bitmask stored in a u32. As this mask is copied as-is in the driver, make sure we use the exact same type to store them internally. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index e08cfc8d8904..60fd804a616a 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -1216,7 +1216,7 @@ struct ucc_geth_private { int oldspeed; int oldduplex; int oldlink; - int wol_en; + u32 wol_en; u32 phy_wol_en; struct device_node *node; From 270ec339126a09564fab67209da8330a20fe446f Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 3 Dec 2024 13:43:17 +0100 Subject: [PATCH 0181/1386] net: freescale: ucc_geth: Simplify frame length check The frame length check is configured when the phy interface is setup. However, it's configured according to an internal flag that is always false. So, just make so that we disable the relevant bit in the MACCFG2 register upon accessing it for other MAC configuration operations. Reviewed-by: Andrew Lunn Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth.c | 21 +++------------------ drivers/net/ethernet/freescale/ucc_geth.h | 1 - 2 files changed, 3 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 587bcbc079da..566f53e24d28 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1205,22 +1205,6 @@ static int init_mac_station_addr_regs(u8 address_byte_0, return 0; } -static int init_check_frame_length_mode(int length_check, - u32 __iomem *maccfg2_register) -{ - u32 value = 0; - - value = in_be32(maccfg2_register); - - if (length_check) - value |= MACCFG2_LC; - else - value &= ~MACCFG2_LC; - - out_be32(maccfg2_register, value); - return 0; -} - static int init_preamble_length(u8 preamble_length, u32 __iomem *maccfg2_register) { @@ -1304,6 +1288,9 @@ static int adjust_enet_interface(struct ucc_geth_private *ugeth) /* Set MACCFG2 */ maccfg2 = in_be32(&ug_regs->maccfg2); + + /* Disable frame length check */ + maccfg2 &= ~MACCFG2_LC; maccfg2 &= ~MACCFG2_INTERFACE_MODE_MASK; if ((ugeth->max_speed == SPEED_10) || (ugeth->max_speed == SPEED_100)) @@ -1365,8 +1352,6 @@ static int adjust_enet_interface(struct ucc_geth_private *ugeth) put_device(&tbiphy->mdio.dev); } - init_check_frame_length_mode(ug_info->lengthCheckRx, &ug_regs->maccfg2); - ret_val = init_preamble_length(ug_info->prel, &ug_regs->maccfg2); if (ret_val != 0) { if (netif_msg_probe(ugeth)) diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index 60fd804a616a..2365b61c743a 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -1088,7 +1088,6 @@ struct ucc_geth_info { u8 miminumInterFrameGapEnforcement; u8 backToBackInterFrameGap; int ipAddressAlignment; - int lengthCheckRx; u32 mblinterval; u16 nortsrbytetime; u8 fracsiz; From dba25f75383fd8c2fe6f0390aa7f7a3b0dd72a63 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 3 Dec 2024 13:43:18 +0100 Subject: [PATCH 0182/1386] net: freescale: ucc_geth: Hardcode the preamble length to 7 bytes The preamble length can be configured in ucc_geth, however it just ends-up always being configured to 7 bytes, as nothing ever changes the default value of 7. Make that value the default value when the MACCFG2 register gets initialized, and remove the code to configure that value altogether. Reviewed-by: Andrew Lunn Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth.c | 21 --------------------- drivers/net/ethernet/freescale/ucc_geth.h | 4 ++-- 2 files changed, 2 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 566f53e24d28..81aefe291d80 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -132,7 +132,6 @@ static const struct ucc_geth_info ugeth_primary_info = { .transmitFlowControl = 1, .maxGroupAddrInHash = 4, .maxIndAddrInHash = 4, - .prel = 7, .maxFrameLength = 1518+16, /* Add extra bytes for VLANs etc. */ .minFrameLength = 64, .maxD1Length = 1520+16, /* Add extra bytes for VLANs etc. */ @@ -1205,18 +1204,6 @@ static int init_mac_station_addr_regs(u8 address_byte_0, return 0; } -static int init_preamble_length(u8 preamble_length, - u32 __iomem *maccfg2_register) -{ - if ((preamble_length < 3) || (preamble_length > 7)) - return -EINVAL; - - clrsetbits_be32(maccfg2_register, MACCFG2_PREL_MASK, - preamble_length << MACCFG2_PREL_SHIFT); - - return 0; -} - static int init_rx_parameters(int reject_broadcast, int receive_short_frames, int promiscuous, u32 __iomem *upsmr_register) @@ -1276,7 +1263,6 @@ static int adjust_enet_interface(struct ucc_geth_private *ugeth) struct ucc_geth_info *ug_info; struct ucc_geth __iomem *ug_regs; struct ucc_fast __iomem *uf_regs; - int ret_val; u32 upsmr, maccfg2; u16 value; @@ -1352,13 +1338,6 @@ static int adjust_enet_interface(struct ucc_geth_private *ugeth) put_device(&tbiphy->mdio.dev); } - ret_val = init_preamble_length(ug_info->prel, &ug_regs->maccfg2); - if (ret_val != 0) { - if (netif_msg_probe(ugeth)) - pr_err("Preamble length must be between 3 and 7 inclusive\n"); - return ret_val; - } - return 0; } diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index 2365b61c743a..dfb727327093 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -921,7 +921,8 @@ struct ucc_geth_hardware_statistics { #define UCC_GETH_UPSMR_INIT UCC_GETH_UPSMR_RES1 #define UCC_GETH_MACCFG1_INIT 0 -#define UCC_GETH_MACCFG2_INIT (MACCFG2_RESERVED_1) +#define UCC_GETH_MACCFG2_INIT (MACCFG2_RESERVED_1 | \ + (7 << MACCFG2_PREL_SHIFT)) /* Ethernet Address Type. */ enum enet_addr_type { @@ -1113,7 +1114,6 @@ struct ucc_geth_info { int transmitFlowControl; u8 maxGroupAddrInHash; u8 maxIndAddrInHash; - u8 prel; u16 maxFrameLength; u16 minFrameLength; u16 maxD1Length; From efc52055b756a231b2c4c6fdec4369c8903afa1e Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 3 Dec 2024 13:43:19 +0100 Subject: [PATCH 0183/1386] net: freescale: ucc_geth: Move the serdes configuration around The uec_configure_serdes() function deals with serialized linkmodes settings. It's used during the link bringup sequence. It is planned to be used during the phylink conversion for mac configuration, but it needs to me moved around in the process. To make the phylink port clearer, this commit moves the function without any feature change. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth.c | 93 +++++++++++------------ 1 file changed, 46 insertions(+), 47 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 81aefe291d80..f6dd36dc03fe 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1512,6 +1512,52 @@ static void ugeth_activate(struct ucc_geth_private *ugeth) __netdev_watchdog_up(ugeth->ndev); } +/* Initialize TBI PHY interface for communicating with the + * SERDES lynx PHY on the chip. We communicate with this PHY + * through the MDIO bus on each controller, treating it as a + * "normal" PHY at the address found in the UTBIPA register. We assume + * that the UTBIPA register is valid. Either the MDIO bus code will set + * it to a value that doesn't conflict with other PHYs on the bus, or the + * value doesn't matter, as there are no other PHYs on the bus. + */ +static void uec_configure_serdes(struct net_device *dev) +{ + struct ucc_geth_private *ugeth = netdev_priv(dev); + struct ucc_geth_info *ug_info = ugeth->ug_info; + struct phy_device *tbiphy; + + if (!ug_info->tbi_node) { + dev_warn(&dev->dev, "SGMII mode requires that the device tree specify a tbi-handle\n"); + return; + } + + tbiphy = of_phy_find_device(ug_info->tbi_node); + if (!tbiphy) { + dev_err(&dev->dev, "error: Could not get TBI device\n"); + return; + } + + /* + * If the link is already up, we must already be ok, and don't need to + * configure and reset the TBI<->SerDes link. Maybe U-Boot configured + * everything for us? Resetting it takes the link down and requires + * several seconds for it to come back. + */ + if (phy_read(tbiphy, ENET_TBI_MII_SR) & TBISR_LSTATUS) { + put_device(&tbiphy->mdio.dev); + return; + } + + /* Single clk mode, mii mode off(for serdes communication) */ + phy_write(tbiphy, ENET_TBI_MII_ANA, TBIANA_SETTINGS); + + phy_write(tbiphy, ENET_TBI_MII_TBICON, TBICON_CLK_SELECT); + + phy_write(tbiphy, ENET_TBI_MII_CR, TBICR_SETTINGS); + + put_device(&tbiphy->mdio.dev); +} + static void ugeth_link_up(struct ucc_geth_private *ugeth, struct phy_device *phy, phy_interface_t interface, int speed, int duplex) @@ -1619,53 +1665,6 @@ static void adjust_link(struct net_device *dev) ugeth_link_down(ugeth); } -/* Initialize TBI PHY interface for communicating with the - * SERDES lynx PHY on the chip. We communicate with this PHY - * through the MDIO bus on each controller, treating it as a - * "normal" PHY at the address found in the UTBIPA register. We assume - * that the UTBIPA register is valid. Either the MDIO bus code will set - * it to a value that doesn't conflict with other PHYs on the bus, or the - * value doesn't matter, as there are no other PHYs on the bus. - */ -static void uec_configure_serdes(struct net_device *dev) -{ - struct ucc_geth_private *ugeth = netdev_priv(dev); - struct ucc_geth_info *ug_info = ugeth->ug_info; - struct phy_device *tbiphy; - - if (!ug_info->tbi_node) { - dev_warn(&dev->dev, "SGMII mode requires that the device " - "tree specify a tbi-handle\n"); - return; - } - - tbiphy = of_phy_find_device(ug_info->tbi_node); - if (!tbiphy) { - dev_err(&dev->dev, "error: Could not get TBI device\n"); - return; - } - - /* - * If the link is already up, we must already be ok, and don't need to - * configure and reset the TBI<->SerDes link. Maybe U-Boot configured - * everything for us? Resetting it takes the link down and requires - * several seconds for it to come back. - */ - if (phy_read(tbiphy, ENET_TBI_MII_SR) & TBISR_LSTATUS) { - put_device(&tbiphy->mdio.dev); - return; - } - - /* Single clk mode, mii mode off(for serdes communication) */ - phy_write(tbiphy, ENET_TBI_MII_ANA, TBIANA_SETTINGS); - - phy_write(tbiphy, ENET_TBI_MII_TBICON, TBICON_CLK_SELECT); - - phy_write(tbiphy, ENET_TBI_MII_CR, TBICR_SETTINGS); - - put_device(&tbiphy->mdio.dev); -} - /* Configure the PHY for dev. * returns 0 if success. -1 if failure */ From 02d4a6498b3028f64f93ac61c0ff346e8ab661e0 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 3 Dec 2024 13:43:20 +0100 Subject: [PATCH 0184/1386] net: freescale: ucc_geth: Introduce a helper to check Reduced modes A number of parallel MII interfaces also exist in a "Reduced" mode, usually with higher clock rates and fewer data lines, to ease the hardware design. This is what the 'R' stands for in RGMII, RMII, RTBI, RXAUI, etc. The UCC Geth controller has a special configuration bit that needs to be set when the MII mode is one of the supported reduced modes. Add a local helper for that. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index f6dd36dc03fe..57debcba124c 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -1258,6 +1258,13 @@ static int init_min_frame_len(u16 min_frame_length, return 0; } +static bool phy_interface_mode_is_reduced(phy_interface_t interface) +{ + return phy_interface_mode_is_rgmii(interface) || + interface == PHY_INTERFACE_MODE_RMII || + interface == PHY_INTERFACE_MODE_RTBI; +} + static int adjust_enet_interface(struct ucc_geth_private *ugeth) { struct ucc_geth_info *ug_info; @@ -1290,12 +1297,7 @@ static int adjust_enet_interface(struct ucc_geth_private *ugeth) upsmr = in_be32(&uf_regs->upsmr); upsmr &= ~(UCC_GETH_UPSMR_RPM | UCC_GETH_UPSMR_R10M | UCC_GETH_UPSMR_TBIM | UCC_GETH_UPSMR_RMM); - if ((ugeth->phy_interface == PHY_INTERFACE_MODE_RMII) || - (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII) || - (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII_ID) || - (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII_RXID) || - (ugeth->phy_interface == PHY_INTERFACE_MODE_RGMII_TXID) || - (ugeth->phy_interface == PHY_INTERFACE_MODE_RTBI)) { + if (phy_interface_mode_is_reduced(ugeth->phy_interface)) { if (ugeth->phy_interface != PHY_INTERFACE_MODE_RMII) upsmr |= UCC_GETH_UPSMR_RPM; switch (ugeth->max_speed) { @@ -1594,9 +1596,7 @@ static void ugeth_link_up(struct ucc_geth_private *ugeth, ~(MACCFG2_INTERFACE_MODE_MASK)) | MACCFG2_INTERFACE_MODE_NIBBLE); /* if reduced mode, re-set UPSMR.R10M */ - if (interface == PHY_INTERFACE_MODE_RMII || - phy_interface_mode_is_rgmii(interface) || - interface == PHY_INTERFACE_MODE_RTBI) { + if (phy_interface_mode_is_reduced(interface)) { if (speed == SPEED_10) upsmr |= UCC_GETH_UPSMR_R10M; else From 53036aa8d03178a8d056a24a52a301ad290877d4 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Tue, 3 Dec 2024 13:43:21 +0100 Subject: [PATCH 0185/1386] net: freescale: ucc_geth: phylink conversion ucc_geth is quite capable in terms of supported interfaces, and even includes an externally controlled PCS (well, TBI). Port that driver to phylink. Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/Kconfig | 3 +- drivers/net/ethernet/freescale/ucc_geth.c | 457 ++++++++---------- drivers/net/ethernet/freescale/ucc_geth.h | 13 +- .../net/ethernet/freescale/ucc_geth_ethtool.c | 73 +-- 4 files changed, 215 insertions(+), 331 deletions(-) diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index 75401d2a5fb4..a2d7300925a8 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -81,8 +81,7 @@ config UCC_GETH tristate "Freescale QE Gigabit Ethernet" depends on QUICC_ENGINE && PPC32 select FSL_PQ_MDIO - select PHYLIB - select FIXED_PHY + select PHYLINK help This driver supports the Gigabit Ethernet mode of the QUICC Engine, which is available on some Freescale SOCs. diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 57debcba124c..f47f8177a93b 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -1265,84 +1266,6 @@ static bool phy_interface_mode_is_reduced(phy_interface_t interface) interface == PHY_INTERFACE_MODE_RTBI; } -static int adjust_enet_interface(struct ucc_geth_private *ugeth) -{ - struct ucc_geth_info *ug_info; - struct ucc_geth __iomem *ug_regs; - struct ucc_fast __iomem *uf_regs; - u32 upsmr, maccfg2; - u16 value; - - ugeth_vdbg("%s: IN", __func__); - - ug_info = ugeth->ug_info; - ug_regs = ugeth->ug_regs; - uf_regs = ugeth->uccf->uf_regs; - - /* Set MACCFG2 */ - maccfg2 = in_be32(&ug_regs->maccfg2); - - /* Disable frame length check */ - maccfg2 &= ~MACCFG2_LC; - maccfg2 &= ~MACCFG2_INTERFACE_MODE_MASK; - if ((ugeth->max_speed == SPEED_10) || - (ugeth->max_speed == SPEED_100)) - maccfg2 |= MACCFG2_INTERFACE_MODE_NIBBLE; - else if (ugeth->max_speed == SPEED_1000) - maccfg2 |= MACCFG2_INTERFACE_MODE_BYTE; - maccfg2 |= ug_info->padAndCrc; - out_be32(&ug_regs->maccfg2, maccfg2); - - /* Set UPSMR */ - upsmr = in_be32(&uf_regs->upsmr); - upsmr &= ~(UCC_GETH_UPSMR_RPM | UCC_GETH_UPSMR_R10M | - UCC_GETH_UPSMR_TBIM | UCC_GETH_UPSMR_RMM); - if (phy_interface_mode_is_reduced(ugeth->phy_interface)) { - if (ugeth->phy_interface != PHY_INTERFACE_MODE_RMII) - upsmr |= UCC_GETH_UPSMR_RPM; - switch (ugeth->max_speed) { - case SPEED_10: - upsmr |= UCC_GETH_UPSMR_R10M; - fallthrough; - case SPEED_100: - if (ugeth->phy_interface != PHY_INTERFACE_MODE_RTBI) - upsmr |= UCC_GETH_UPSMR_RMM; - } - } - if ((ugeth->phy_interface == PHY_INTERFACE_MODE_TBI) || - (ugeth->phy_interface == PHY_INTERFACE_MODE_RTBI)) { - upsmr |= UCC_GETH_UPSMR_TBIM; - } - if (ugeth->phy_interface == PHY_INTERFACE_MODE_SGMII) - upsmr |= UCC_GETH_UPSMR_SGMM; - - out_be32(&uf_regs->upsmr, upsmr); - - /* Disable autonegotiation in tbi mode, because by default it - comes up in autonegotiation mode. */ - /* Note that this depends on proper setting in utbipar register. */ - if ((ugeth->phy_interface == PHY_INTERFACE_MODE_TBI) || - (ugeth->phy_interface == PHY_INTERFACE_MODE_RTBI)) { - struct ucc_geth_info *ug_info = ugeth->ug_info; - struct phy_device *tbiphy; - - if (!ug_info->tbi_node) - pr_warn("TBI mode requires that the device tree specify a tbi-handle\n"); - - tbiphy = of_phy_find_device(ug_info->tbi_node); - if (!tbiphy) - pr_warn("Could not get TBI device\n"); - - value = phy_read(tbiphy, ENET_TBI_MII_CR); - value &= ~0x1000; /* Turn off autonegotiation */ - phy_write(tbiphy, ENET_TBI_MII_CR, value); - - put_device(&tbiphy->mdio.dev); - } - - return 0; -} - static int ugeth_graceful_stop_tx(struct ucc_geth_private *ugeth) { struct ucc_fast_private *uccf; @@ -1560,64 +1483,62 @@ static void uec_configure_serdes(struct net_device *dev) put_device(&tbiphy->mdio.dev); } -static void ugeth_link_up(struct ucc_geth_private *ugeth, - struct phy_device *phy, - phy_interface_t interface, int speed, int duplex) +static void ugeth_mac_link_up(struct phylink_config *config, struct phy_device *phy, + unsigned int mode, phy_interface_t interface, + int speed, int duplex, bool tx_pause, bool rx_pause) { + struct net_device *ndev = to_net_dev(config->dev); + struct ucc_geth_private *ugeth = netdev_priv(ndev); + struct ucc_geth_info *ug_info = ugeth->ug_info; struct ucc_geth __iomem *ug_regs = ugeth->ug_regs; struct ucc_fast __iomem *uf_regs = ugeth->uccf->uf_regs; - u32 tempval = in_be32(&ug_regs->maccfg2); - u32 upsmr = in_be32(&uf_regs->upsmr); - int new_state = 0; + u32 old_maccfg2, maccfg2 = in_be32(&ug_regs->maccfg2); + u32 old_upsmr, upsmr = in_be32(&uf_regs->upsmr); - /* Now we make sure that we can be in full duplex mode. - * If not, we operate in half-duplex mode. - */ - if (duplex != ugeth->oldduplex) { - new_state = 1; - if (duplex == DUPLEX_HALF) - tempval &= ~(MACCFG2_FDX); - else - tempval |= MACCFG2_FDX; - ugeth->oldduplex = duplex; - } + old_maccfg2 = maccfg2; + old_upsmr = upsmr; + + /* No length check */ + maccfg2 &= ~MACCFG2_LC; + maccfg2 &= ~MACCFG2_INTERFACE_MODE_MASK; + upsmr &= ~(UCC_GETH_UPSMR_RPM | UCC_GETH_UPSMR_R10M | + UCC_GETH_UPSMR_TBIM | UCC_GETH_UPSMR_RMM); + + if (speed == SPEED_10 || speed == SPEED_100) + maccfg2 |= MACCFG2_INTERFACE_MODE_NIBBLE; + else if (speed == SPEED_1000) + maccfg2 |= MACCFG2_INTERFACE_MODE_BYTE; + + maccfg2 |= ug_info->padAndCrc; + + if (phy_interface_mode_is_reduced(interface)) { + + if (interface != PHY_INTERFACE_MODE_RMII) + upsmr |= UCC_GETH_UPSMR_RPM; - if (speed != ugeth->oldspeed) { - new_state = 1; switch (speed) { - case SPEED_1000: - tempval = ((tempval & - ~(MACCFG2_INTERFACE_MODE_MASK)) | - MACCFG2_INTERFACE_MODE_BYTE); - break; - case SPEED_100: case SPEED_10: - tempval = ((tempval & - ~(MACCFG2_INTERFACE_MODE_MASK)) | - MACCFG2_INTERFACE_MODE_NIBBLE); - /* if reduced mode, re-set UPSMR.R10M */ - if (phy_interface_mode_is_reduced(interface)) { - if (speed == SPEED_10) - upsmr |= UCC_GETH_UPSMR_R10M; - else - upsmr &= ~UCC_GETH_UPSMR_R10M; - } - break; - default: - if (netif_msg_link(ugeth)) - pr_warn("%s: Speed (%d) is not 10/100/1000!", - netdev_name(ugeth->ndev), speed); - break; + upsmr |= UCC_GETH_UPSMR_R10M; + fallthrough; + case SPEED_100: + if (interface != PHY_INTERFACE_MODE_RTBI) + upsmr |= UCC_GETH_UPSMR_RMM; } - ugeth->oldspeed = speed; } - if (!ugeth->oldlink) { - new_state = 1; - ugeth->oldlink = 1; - } + if (interface == PHY_INTERFACE_MODE_TBI || + interface == PHY_INTERFACE_MODE_RTBI) + upsmr |= UCC_GETH_UPSMR_TBIM; - if (new_state) { + if (interface == PHY_INTERFACE_MODE_SGMII) + upsmr |= UCC_GETH_UPSMR_SGMM; + + if (duplex == DUPLEX_HALF) + maccfg2 &= ~(MACCFG2_FDX); + else + maccfg2 |= MACCFG2_FDX; + + if (maccfg2 != old_maccfg2 || upsmr != old_upsmr) { /* * To change the MAC configuration we need to disable * the controller. To do so, we have to either grab @@ -1628,69 +1549,79 @@ static void ugeth_link_up(struct ucc_geth_private *ugeth, ugeth_quiesce(ugeth); ugeth_disable(ugeth, COMM_DIR_RX_AND_TX); - out_be32(&ug_regs->maccfg2, tempval); + out_be32(&ug_regs->maccfg2, maccfg2); out_be32(&uf_regs->upsmr, upsmr); ugeth_enable(ugeth, COMM_DIR_RX_AND_TX); ugeth_activate(ugeth); } - if (netif_msg_link(ugeth)) - phy_print_status(phy); -} + if (interface == PHY_INTERFACE_MODE_SGMII) + uec_configure_serdes(ndev); -static void ugeth_link_down(struct ucc_geth_private *ugeth) -{ - ugeth->oldlink = 0; - ugeth->oldspeed = 0; - ugeth->oldduplex = -1; -} + if (!phylink_autoneg_inband(mode)) { + ug_info->aufc = 0; + ug_info->receiveFlowControl = rx_pause; + ug_info->transmitFlowControl = tx_pause; -/* Called every time the controller might need to be made - * aware of new link state. The PHY code conveys this - * information through variables in the ugeth structure, and this - * function converts those variables into the appropriate - * register values, and can bring down the device if needed. - */ - -static void adjust_link(struct net_device *dev) -{ - struct ucc_geth_private *ugeth = netdev_priv(dev); - struct phy_device *phydev = dev->phydev; - - if (phydev->link) - ugeth_link_up(ugeth, phydev, phydev->interface, - phydev->speed, phydev->duplex); - else - ugeth_link_down(ugeth); -} - -/* Configure the PHY for dev. - * returns 0 if success. -1 if failure - */ -static int init_phy(struct net_device *dev) -{ - struct ucc_geth_private *priv = netdev_priv(dev); - struct ucc_geth_info *ug_info = priv->ug_info; - struct phy_device *phydev; - - priv->oldlink = 0; - priv->oldspeed = 0; - priv->oldduplex = -1; - - phydev = of_phy_connect(dev, ug_info->phy_node, &adjust_link, 0, - priv->phy_interface); - if (!phydev) { - dev_err(&dev->dev, "Could not attach to PHY\n"); - return -ENODEV; + init_flow_control_params(ug_info->aufc, + ug_info->receiveFlowControl, + ug_info->transmitFlowControl, + ug_info->pausePeriod, + ug_info->extensionField, + &ugeth->uccf->uf_regs->upsmr, + &ugeth->ug_regs->uempr, + &ugeth->ug_regs->maccfg1); } - if (priv->phy_interface == PHY_INTERFACE_MODE_SGMII) - uec_configure_serdes(dev); + ugeth_enable(ugeth, COMM_DIR_RX_AND_TX); +} - phy_set_max_speed(phydev, priv->max_speed); +static void ugeth_mac_link_down(struct phylink_config *config, + unsigned int mode, phy_interface_t interface) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct ucc_geth_private *ugeth = netdev_priv(ndev); - return 0; + ugeth_disable(ugeth, COMM_DIR_RX_AND_TX); +} + +static void ugeth_mac_config(struct phylink_config *config, unsigned int mode, + const struct phylink_link_state *state) +{ + struct net_device *ndev = to_net_dev(config->dev); + struct ucc_geth_private *ugeth = netdev_priv(ndev); + struct ucc_geth_info *ug_info = ugeth->ug_info; + u16 value; + + if (state->interface == PHY_INTERFACE_MODE_TBI || + state->interface == PHY_INTERFACE_MODE_RTBI) { + struct phy_device *tbiphy; + + if (!ug_info->tbi_node) + pr_warn("TBI mode requires that the device tree specify a tbi-handle\n"); + + tbiphy = of_phy_find_device(ug_info->tbi_node); + if (!tbiphy) + pr_warn("Could not get TBI device\n"); + + value = phy_read(tbiphy, ENET_TBI_MII_CR); + value &= ~0x1000; /* Turn off autonegotiation */ + phy_write(tbiphy, ENET_TBI_MII_CR, value); + + put_device(&tbiphy->mdio.dev); + } + + if (phylink_autoneg_inband(mode)) { + ug_info->aufc = 1; + + init_flow_control_params(ug_info->aufc, 1, 1, + ug_info->pausePeriod, + ug_info->extensionField, + &ugeth->uccf->uf_regs->upsmr, + &ugeth->ug_regs->uempr, + &ugeth->ug_regs->maccfg1); + } } static void ugeth_dump_regs(struct ucc_geth_private *ugeth) @@ -1962,7 +1893,6 @@ static void ucc_geth_set_multi(struct net_device *dev) static void ucc_geth_stop(struct ucc_geth_private *ugeth) { struct ucc_geth __iomem *ug_regs = ugeth->ug_regs; - struct phy_device *phydev = ugeth->ndev->phydev; ugeth_vdbg("%s: IN", __func__); @@ -1971,7 +1901,7 @@ static void ucc_geth_stop(struct ucc_geth_private *ugeth) * Must be done before disabling the controller * or deadlock may happen. */ - phy_stop(phydev); + phylink_stop(ugeth->phylink); /* Disable the controller */ ugeth_disable(ugeth, COMM_DIR_RX_AND_TX); @@ -3213,12 +3143,6 @@ static int ucc_geth_init_mac(struct ucc_geth_private *ugeth) goto err; } - err = adjust_enet_interface(ugeth); - if (err) { - netif_err(ugeth, ifup, dev, "Cannot configure net device, aborting\n"); - goto err; - } - /* Set MACSTNADDR1, MACSTNADDR2 */ /* For more details see the hardware spec. */ init_mac_station_addr_regs(dev->dev_addr[0], @@ -3230,12 +3154,6 @@ static int ucc_geth_init_mac(struct ucc_geth_private *ugeth) &ugeth->ug_regs->macstnaddr1, &ugeth->ug_regs->macstnaddr2); - err = ugeth_enable(ugeth, COMM_DIR_RX_AND_TX); - if (err) { - netif_err(ugeth, ifup, dev, "Cannot enable net device, aborting\n"); - goto err; - } - return 0; err: ucc_geth_stop(ugeth); @@ -3258,10 +3176,10 @@ static int ucc_geth_open(struct net_device *dev) return -EINVAL; } - err = init_phy(dev); + err = phylink_of_phy_connect(ugeth->phylink, ugeth->dev->of_node, 0); if (err) { - netif_err(ugeth, ifup, dev, "Cannot initialize PHY, aborting\n"); - return err; + dev_err(&dev->dev, "Could not attach to PHY\n"); + return -ENODEV; } err = ucc_geth_init_mac(ugeth); @@ -3277,7 +3195,7 @@ static int ucc_geth_open(struct net_device *dev) goto err; } - phy_start(dev->phydev); + phylink_start(ugeth->phylink); napi_enable(&ugeth->napi); netdev_reset_queue(dev); netif_start_queue(dev); @@ -3304,7 +3222,7 @@ static int ucc_geth_close(struct net_device *dev) cancel_work_sync(&ugeth->timeout_work); ucc_geth_stop(ugeth); - phy_disconnect(dev->phydev); + phylink_disconnect_phy(ugeth->phylink); free_irq(ugeth->ug_info->uf_info.irq, ugeth->ndev); @@ -3338,7 +3256,7 @@ static void ucc_geth_timeout_work(struct work_struct *work) ucc_geth_stop(ugeth); ucc_geth_init_mac(ugeth); /* Must start PHY here */ - phy_start(dev->phydev); + phylink_start(ugeth->phylink); netif_tx_start_all_queues(dev); } @@ -3363,6 +3281,7 @@ static int ucc_geth_suspend(struct platform_device *ofdev, pm_message_t state) { struct net_device *ndev = platform_get_drvdata(ofdev); struct ucc_geth_private *ugeth = netdev_priv(ndev); + bool mac_wol = false; if (!netif_running(ndev)) return 0; @@ -3380,10 +3299,13 @@ static int ucc_geth_suspend(struct platform_device *ofdev, pm_message_t state) setbits32(ugeth->uccf->p_uccm, UCC_GETH_UCCE_MPD); setbits32(&ugeth->ug_regs->maccfg2, MACCFG2_MPE); ucc_fast_enable(ugeth->uccf, COMM_DIR_RX_AND_TX); - } else if (!ugeth->phy_wol_en) { - phy_stop(ndev->phydev); + mac_wol = true; } + rtnl_lock(); + phylink_suspend(ugeth->phylink, mac_wol); + rtnl_unlock(); + return 0; } @@ -3417,12 +3339,9 @@ static int ucc_geth_resume(struct platform_device *ofdev) } } - ugeth->oldlink = 0; - ugeth->oldspeed = 0; - ugeth->oldduplex = -1; - - phy_stop(ndev->phydev); - phy_start(ndev->phydev); + rtnl_lock(); + phylink_resume(ugeth->phylink); + rtnl_unlock(); napi_enable(&ugeth->napi); netif_device_attach(ndev); @@ -3437,13 +3356,12 @@ static int ucc_geth_resume(struct platform_device *ofdev) static int ucc_geth_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { + struct ucc_geth_private *ugeth = netdev_priv(dev); + if (!netif_running(dev)) return -EINVAL; - if (!dev->phydev) - return -ENODEV; - - return phy_mii_ioctl(dev->phydev, rq, cmd); + return phylink_mii_ioctl(ugeth->phylink, rq, cmd); } static const struct net_device_ops ucc_geth_netdev_ops = { @@ -3451,7 +3369,6 @@ static const struct net_device_ops ucc_geth_netdev_ops = { .ndo_stop = ucc_geth_close, .ndo_start_xmit = ucc_geth_start_xmit, .ndo_validate_addr = eth_validate_addr, - .ndo_change_carrier = fixed_phy_change_carrier, .ndo_set_mac_address = ucc_geth_set_mac_addr, .ndo_set_rx_mode = ucc_geth_set_multi, .ndo_tx_timeout = ucc_geth_timeout, @@ -3491,6 +3408,12 @@ static int ucc_geth_parse_clock(struct device_node *np, const char *which, return 0; } +struct phylink_mac_ops ugeth_mac_ops = { + .mac_link_up = ugeth_mac_link_up, + .mac_link_down = ugeth_mac_link_down, + .mac_config = ugeth_mac_config, +}; + static int ucc_geth_probe(struct platform_device* ofdev) { struct device *device = &ofdev->dev; @@ -3498,8 +3421,10 @@ static int ucc_geth_probe(struct platform_device* ofdev) struct net_device *dev = NULL; struct ucc_geth_private *ugeth = NULL; struct ucc_geth_info *ug_info; + struct device_node *phy_node; + struct phylink *phylink; struct resource res; - int err, ucc_num, max_speed = 0; + int err, ucc_num; const unsigned int *prop; phy_interface_t phy_interface; @@ -3537,57 +3462,35 @@ static int ucc_geth_probe(struct platform_device* ofdev) ug_info->uf_info.regs = res.start; ug_info->uf_info.irq = irq_of_parse_and_map(np, 0); - ug_info->phy_node = of_parse_phandle(np, "phy-handle", 0); - if (!ug_info->phy_node && of_phy_is_fixed_link(np)) { - /* - * In the case of a fixed PHY, the DT node associated - * to the PHY is the Ethernet MAC DT node. - */ - err = of_phy_register_fixed_link(np); - if (err) - return err; - ug_info->phy_node = of_node_get(np); - } - /* Find the TBI PHY node. If it's not there, we don't support SGMII */ ug_info->tbi_node = of_parse_phandle(np, "tbi-handle", 0); - prop = of_get_property(ug_info->phy_node, "interface", NULL); - if (prop) { - dev_err(&ofdev->dev, - "Device-tree property 'interface' is no longer supported. Please use 'phy-connection-type' instead."); - err = -EINVAL; - goto err_deregister_fixed_link; + phy_node = of_parse_phandle(np, "phy-handle", 0); + if (phy_node) { + prop = of_get_property(phy_node, "interface", NULL); + if (prop) { + dev_err(&ofdev->dev, + "Device-tree property 'interface' is no longer supported. Please use 'phy-connection-type' instead."); + of_node_put(phy_node); + err = -EINVAL; + goto err_put_tbi; + } + of_node_put(phy_node); } err = of_get_phy_mode(np, &phy_interface); if (err) { dev_err(&ofdev->dev, "Invalid phy-connection-type"); - goto err_deregister_fixed_link; + goto err_put_tbi; } - /* get speed, or derive from PHY interface */ - if (max_speed == 0) - switch (phy_interface) { - case PHY_INTERFACE_MODE_GMII: - case PHY_INTERFACE_MODE_RGMII: - case PHY_INTERFACE_MODE_RGMII_ID: - case PHY_INTERFACE_MODE_RGMII_RXID: - case PHY_INTERFACE_MODE_RGMII_TXID: - case PHY_INTERFACE_MODE_TBI: - case PHY_INTERFACE_MODE_RTBI: - case PHY_INTERFACE_MODE_SGMII: - max_speed = SPEED_1000; - break; - default: - max_speed = SPEED_100; - break; - } - - if (max_speed == SPEED_1000) { + if (phy_interface == PHY_INTERFACE_MODE_GMII || + phy_interface_mode_is_rgmii(phy_interface) || + phy_interface == PHY_INTERFACE_MODE_TBI || + phy_interface == PHY_INTERFACE_MODE_RTBI || + phy_interface == PHY_INTERFACE_MODE_SGMII) { unsigned int snums = qe_get_num_of_snums(); - /* configure muram FIFOs for gigabit operation */ ug_info->uf_info.urfs = UCC_GETH_URFS_GIGA_INIT; ug_info->uf_info.urfet = UCC_GETH_URFET_GIGA_INIT; ug_info->uf_info.urfset = UCC_GETH_URFSET_GIGA_INIT; @@ -3616,7 +3519,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) dev = devm_alloc_etherdev(&ofdev->dev, sizeof(*ugeth)); if (!dev) { err = -ENOMEM; - goto err_deregister_fixed_link; + goto err_put_tbi; } ugeth = netdev_priv(dev); @@ -3643,23 +3546,50 @@ static int ucc_geth_probe(struct platform_device* ofdev) dev->max_mtu = 1518; ugeth->msg_enable = netif_msg_init(debug.msg_enable, UGETH_MSG_DEFAULT); - ugeth->phy_interface = phy_interface; - ugeth->max_speed = max_speed; - /* Carrier starts down, phylib will bring it up */ - netif_carrier_off(dev); + ugeth->phylink_config.dev = &dev->dev; + ugeth->phylink_config.type = PHYLINK_NETDEV; + + ugeth->phylink_config.mac_capabilities = + MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD; + + __set_bit(PHY_INTERFACE_MODE_MII, + ugeth->phylink_config.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_RMII, + ugeth->phylink_config.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_GMII, + ugeth->phylink_config.supported_interfaces); + phy_interface_set_rgmii(ugeth->phylink_config.supported_interfaces); + + if (ug_info->tbi_node) { + __set_bit(PHY_INTERFACE_MODE_SGMII, + ugeth->phylink_config.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_TBI, + ugeth->phylink_config.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_RTBI, + ugeth->phylink_config.supported_interfaces); + } + + phylink = phylink_create(&ugeth->phylink_config, dev_fwnode(&dev->dev), + phy_interface, &ugeth_mac_ops); + if (IS_ERR(phylink)) { + err = PTR_ERR(phylink); + goto err_put_tbi; + } + + ugeth->phylink = phylink; err = devm_register_netdev(&ofdev->dev, dev); if (err) { if (netif_msg_probe(ugeth)) pr_err("%s: Cannot register net device, aborting\n", dev->name); - goto err_deregister_fixed_link; + goto err_destroy_phylink; } err = of_get_ethdev_address(np, dev); if (err == -EPROBE_DEFER) - goto err_deregister_fixed_link; + goto err_destroy_phylink; ugeth->ug_info = ug_info; ugeth->dev = device; @@ -3668,11 +3598,11 @@ static int ucc_geth_probe(struct platform_device* ofdev) return 0; -err_deregister_fixed_link: - if (of_phy_is_fixed_link(np)) - of_phy_deregister_fixed_link(np); +err_destroy_phylink: + phylink_destroy(phylink); +err_put_tbi: of_node_put(ug_info->tbi_node); - of_node_put(ug_info->phy_node); + return err; } @@ -3680,13 +3610,10 @@ static void ucc_geth_remove(struct platform_device* ofdev) { struct net_device *dev = platform_get_drvdata(ofdev); struct ucc_geth_private *ugeth = netdev_priv(dev); - struct device_node *np = ofdev->dev.of_node; ucc_geth_memclean(ugeth); - if (of_phy_is_fixed_link(np)) - of_phy_deregister_fixed_link(np); + phylink_destroy(ugeth->phylink); of_node_put(ugeth->ug_info->tbi_node); - of_node_put(ugeth->ug_info->phy_node); } static const struct of_device_id ucc_geth_match[] = { diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index dfb727327093..38789faae706 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -1074,6 +1075,9 @@ struct ucc_geth_tad_params { u16 vid; }; +struct phylink; +struct phylink_config; + /* GETH protocol initialization structure */ struct ucc_geth_info { struct ucc_fast_info uf_info; @@ -1124,7 +1128,6 @@ struct ucc_geth_info { u32 eventRegMask; u16 pausePeriod; u16 extensionField; - struct device_node *phy_node; struct device_node *tbi_node; u8 weightfactor[NUM_TX_QUEUES]; u8 interruptcoalescingmaxvalue[NUM_RX_QUEUES]; @@ -1209,15 +1212,13 @@ struct ucc_geth_private { u16 skb_dirtytx[NUM_TX_QUEUES]; struct ugeth_mii_info *mii_info; - phy_interface_t phy_interface; - int max_speed; uint32_t msg_enable; - int oldspeed; - int oldduplex; - int oldlink; u32 wol_en; u32 phy_wol_en; + struct phylink *phylink; + struct phylink_config phylink_config; + struct device_node *node; }; diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c index 89b323ef8145..1fb49e5a414a 100644 --- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c +++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c @@ -103,26 +103,18 @@ static const char rx_fw_stat_gstrings[][ETH_GSTRING_LEN] = { static int uec_get_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { - struct phy_device *phydev = netdev->phydev; + struct ucc_geth_private *ugeth = netdev_priv(netdev); - if (!phydev) - return -ENODEV; - - phy_ethtool_ksettings_get(phydev, cmd); - - return 0; + return phylink_ethtool_ksettings_get(ugeth->phylink, cmd); } static int uec_set_ksettings(struct net_device *netdev, const struct ethtool_link_ksettings *cmd) { - struct phy_device *phydev = netdev->phydev; + struct ucc_geth_private *ugeth = netdev_priv(netdev); - if (!phydev) - return -ENODEV; - - return phy_ethtool_ksettings_set(phydev, cmd); + return phylink_ethtool_ksettings_set(ugeth->phylink, cmd); } static void @@ -130,15 +122,8 @@ uec_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { struct ucc_geth_private *ugeth = netdev_priv(netdev); - struct phy_device *phydev = netdev->phydev; - if (phydev) - pause->autoneg = phydev->autoneg; - - if (ugeth->ug_info->receiveFlowControl) - pause->rx_pause = 1; - if (ugeth->ug_info->transmitFlowControl) - pause->tx_pause = 1; + return phylink_ethtool_get_pauseparam(ugeth->phylink, pause); } static int @@ -146,31 +131,11 @@ uec_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) { struct ucc_geth_private *ugeth = netdev_priv(netdev); - struct phy_device *phydev = netdev->phydev; - int ret = 0; ugeth->ug_info->receiveFlowControl = pause->rx_pause; ugeth->ug_info->transmitFlowControl = pause->tx_pause; - if (phydev && phydev->autoneg) { - if (netif_running(netdev)) { - /* FIXME: automatically restart */ - netdev_info(netdev, "Please re-open the interface\n"); - } - } else { - struct ucc_geth_info *ug_info = ugeth->ug_info; - - ret = init_flow_control_params(ug_info->aufc, - ug_info->receiveFlowControl, - ug_info->transmitFlowControl, - ug_info->pausePeriod, - ug_info->extensionField, - &ugeth->uccf->uf_regs->upsmr, - &ugeth->ug_regs->uempr, - &ugeth->ug_regs->maccfg1); - } - - return ret; + return phylink_ethtool_set_pauseparam(ugeth->phylink, pause); } static uint32_t @@ -344,13 +309,8 @@ uec_get_drvinfo(struct net_device *netdev, static void uec_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { struct ucc_geth_private *ugeth = netdev_priv(netdev); - struct phy_device *phydev = netdev->phydev; - wol->supported = 0; - wol->wolopts = 0; - - if (phydev) - phy_ethtool_get_wol(phydev, wol); + phylink_ethtool_get_wol(ugeth->phylink, wol); if (qe_alive_during_sleep()) wol->supported |= WAKE_MAGIC; @@ -361,19 +321,16 @@ static void uec_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) static int uec_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { struct ucc_geth_private *ugeth = netdev_priv(netdev); - struct phy_device *phydev = netdev->phydev; int ret = 0; - if (phydev) { - ret = phy_ethtool_set_wol(phydev, wol); - if (ret == -EOPNOTSUPP) { - ugeth->phy_wol_en = 0; - } else if (ret) { - return ret; - } else { - ugeth->phy_wol_en = wol->wolopts; - goto out; - } + ret = phylink_ethtool_set_wol(ugeth->phylink, wol); + if (ret == -EOPNOTSUPP) { + ugeth->phy_wol_en = 0; + } else if (ret) { + return ret; + } else { + ugeth->phy_wol_en = wol->wolopts; + goto out; } /* If the PHY isn't handling the WoL and the MAC is asked to more than From e36d46b9af682bac7c376638cf0fd98d18b98653 Mon Sep 17 00:00:00 2001 From: Rosen Penev Date: Tue, 3 Dec 2024 15:13:37 -0800 Subject: [PATCH 0186/1386] net: simplify resource acquisition + ioremap get resource + request_mem_region + ioremap can all be done by a single function. Replace them with devm_platform_get_and_ioremap_resource or\ devm_platform_ioremap_resource where res is not used. Signed-off-by: Rosen Penev Reviewed-by: Vincent Mailhol # sja1000_platform.c Link: https://patch.msgid.link/20241203231337.182391-1-rosenp@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/can/sja1000/sja1000_platform.c | 15 ++-------- drivers/net/ethernet/freescale/fman/fman.c | 35 +++++----------------- drivers/net/ethernet/lantiq_etop.c | 25 ++-------------- drivers/net/mdio/mdio-octeon.c | 25 +++------------- 4 files changed, 17 insertions(+), 83 deletions(-) diff --git a/drivers/net/can/sja1000/sja1000_platform.c b/drivers/net/can/sja1000/sja1000_platform.c index c42ebe9da55a..2d555f854008 100644 --- a/drivers/net/can/sja1000/sja1000_platform.c +++ b/drivers/net/can/sja1000/sja1000_platform.c @@ -230,18 +230,9 @@ static int sp_probe(struct platform_device *pdev) return -ENODEV; } - res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res_mem) - return -ENODEV; - - if (!devm_request_mem_region(&pdev->dev, res_mem->start, - resource_size(res_mem), DRV_NAME)) - return -EBUSY; - - addr = devm_ioremap(&pdev->dev, res_mem->start, - resource_size(res_mem)); - if (!addr) - return -ENOMEM; + addr = devm_platform_get_and_ioremap_resource(pdev, 0, &res_mem); + if (IS_ERR(addr)) + return PTR_ERR(addr); if (of) { irq = platform_get_irq(pdev, 0); diff --git a/drivers/net/ethernet/freescale/fman/fman.c b/drivers/net/ethernet/freescale/fman/fman.c index fb416d60dcd7..11887458f050 100644 --- a/drivers/net/ethernet/freescale/fman/fman.c +++ b/drivers/net/ethernet/freescale/fman/fman.c @@ -2690,13 +2690,12 @@ static struct fman *read_dts_node(struct platform_device *of_dev) { struct fman *fman; struct device_node *fm_node, *muram_node; + void __iomem *base_addr; struct resource *res; u32 val, range[2]; int err, irq; struct clk *clk; u32 clk_rate; - phys_addr_t phys_base_addr; - resource_size_t mem_size; fman = kzalloc(sizeof(*fman), GFP_KERNEL); if (!fman) @@ -2724,18 +2723,6 @@ static struct fman *read_dts_node(struct platform_device *of_dev) goto fman_node_put; fman->dts_params.err_irq = err; - /* Get the FM address */ - res = platform_get_resource(of_dev, IORESOURCE_MEM, 0); - if (!res) { - err = -EINVAL; - dev_err(&of_dev->dev, "%s: Can't get FMan memory resource\n", - __func__); - goto fman_node_put; - } - - phys_base_addr = res->start; - mem_size = resource_size(res); - clk = of_clk_get(fm_node, 0); if (IS_ERR(clk)) { err = PTR_ERR(clk); @@ -2803,24 +2790,16 @@ static struct fman *read_dts_node(struct platform_device *of_dev) } } - fman->dts_params.res = - devm_request_mem_region(&of_dev->dev, phys_base_addr, - mem_size, "fman"); - if (!fman->dts_params.res) { - err = -EBUSY; - dev_err(&of_dev->dev, "%s: request_mem_region() failed\n", - __func__); - goto fman_free; - } - - fman->dts_params.base_addr = - devm_ioremap(&of_dev->dev, phys_base_addr, mem_size); - if (!fman->dts_params.base_addr) { - err = -ENOMEM; + base_addr = devm_platform_get_and_ioremap_resource(of_dev, 0, &res); + if (IS_ERR(base_addr)) { + err = PTR_ERR(base_addr); dev_err(&of_dev->dev, "%s: devm_ioremap() failed\n", __func__); goto fman_free; } + fman->dts_params.base_addr = base_addr; + fman->dts_params.res = res; + fman->dev = &of_dev->dev; err = of_platform_populate(fm_node, NULL, NULL, &of_dev->dev); diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c index 660dff5426e7..83ce3bfefa5c 100644 --- a/drivers/net/ethernet/lantiq_etop.c +++ b/drivers/net/ethernet/lantiq_etop.c @@ -90,7 +90,6 @@ struct ltq_etop_priv { struct net_device *netdev; struct platform_device *pdev; struct ltq_eth_data *pldata; - struct resource *res; struct mii_bus *mii_bus; @@ -643,31 +642,14 @@ ltq_etop_probe(struct platform_device *pdev) { struct net_device *dev; struct ltq_etop_priv *priv; - struct resource *res; int err; int i; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "failed to get etop resource\n"); - err = -ENOENT; - goto err_out; - } - - res = devm_request_mem_region(&pdev->dev, res->start, - resource_size(res), dev_name(&pdev->dev)); - if (!res) { - dev_err(&pdev->dev, "failed to request etop resource\n"); - err = -EBUSY; - goto err_out; - } - - ltq_etop_membase = devm_ioremap(&pdev->dev, res->start, - resource_size(res)); - if (!ltq_etop_membase) { + ltq_etop_membase = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(ltq_etop_membase)) { dev_err(&pdev->dev, "failed to remap etop engine %d\n", pdev->id); - err = -ENOMEM; + err = PTR_ERR(ltq_etop_membase); goto err_out; } @@ -679,7 +661,6 @@ ltq_etop_probe(struct platform_device *pdev) dev->netdev_ops = <q_eth_netdev_ops; dev->ethtool_ops = <q_etop_ethtool_ops; priv = netdev_priv(dev); - priv->res = res; priv->pdev = pdev; priv->pldata = dev_get_platdata(&pdev->dev); priv->netdev = dev; diff --git a/drivers/net/mdio/mdio-octeon.c b/drivers/net/mdio/mdio-octeon.c index 2beb83154d39..cb53dccbde1a 100644 --- a/drivers/net/mdio/mdio-octeon.c +++ b/drivers/net/mdio/mdio-octeon.c @@ -17,37 +17,20 @@ static int octeon_mdiobus_probe(struct platform_device *pdev) { struct cavium_mdiobus *bus; struct mii_bus *mii_bus; - struct resource *res_mem; - resource_size_t mdio_phys; - resource_size_t regsize; union cvmx_smix_en smi_en; - int err = -ENOENT; + int err; mii_bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(*bus)); if (!mii_bus) return -ENOMEM; - res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res_mem == NULL) { - dev_err(&pdev->dev, "found no memory resource\n"); - return -ENXIO; - } - bus = mii_bus->priv; bus->mii_bus = mii_bus; - mdio_phys = res_mem->start; - regsize = resource_size(res_mem); - if (!devm_request_mem_region(&pdev->dev, mdio_phys, regsize, - res_mem->name)) { - dev_err(&pdev->dev, "request_mem_region failed\n"); - return -ENXIO; - } - - bus->register_base = devm_ioremap(&pdev->dev, mdio_phys, regsize); - if (!bus->register_base) { + bus->register_base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(bus->register_base)) { dev_err(&pdev->dev, "dev_ioremap failed\n"); - return -ENOMEM; + return PTR_ERR(bus->register_base); } smi_en.u64 = 0; From 6c36b5c244d6cb22ef8ea2f6b5da46f5171b37a5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Dec 2024 21:02:34 +0000 Subject: [PATCH 0187/1386] net: tipc: remove one synchronize_net() from tipc_nametbl_stop() tipc_exit_net() is very slow and is abused by syzbot. tipc_nametbl_stop() is called for each netns being dismantled. Calling synchronize_net() right before freeing tn->nametbl is a big hammer. Replace this with kfree_rcu(). Note that RCU is not properly used here, otherwise tn->nametbl should be cleared before the synchronize_net() or kfree_rcu(), or even before the cleanup loop. We might need to fix this at some point. Also note tipc uses other synchronize_rcu() calls, more work is needed to make tipc_exit_net() much faster. List of remaining calls to synchronize_rcu() tipc_detach_loopback() (dev_remove_pack()) tipc_bcast_stop() tipc_sk_rht_destroy() Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20241204210234.319484-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/tipc/name_table.c | 4 ++-- net/tipc/name_table.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index d1180370fdf4..e74940eab3a4 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -949,8 +949,8 @@ void tipc_nametbl_stop(struct net *net) } spin_unlock_bh(&tn->nametbl_lock); - synchronize_net(); - kfree(nt); + /* TODO: clear tn->nametbl, implement proper RCU rules ? */ + kfree_rcu(nt, rcu); } static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 3bcd9ef8cee3..7ff6eeebaae6 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -90,6 +90,7 @@ struct publication { /** * struct name_table - table containing all existing port name publications + * @rcu: RCU callback head used for deferred freeing * @services: name sequence hash lists * @node_scope: all local publications with node scope * - used by name_distr during re-init of name table @@ -102,6 +103,7 @@ struct publication { * @snd_nxt: next sequence number to be used */ struct name_table { + struct rcu_head rcu; struct hlist_head services[TIPC_NAMETBL_SIZE]; struct list_head node_scope; struct list_head cluster_scope; From 48697bdfb65d21bab8c686830b04bf2e47b96d52 Mon Sep 17 00:00:00 2001 From: Joe Damato Date: Wed, 4 Dec 2024 16:32:39 +0000 Subject: [PATCH 0188/1386] selftests: net: cleanup busy_poller.c Fix various integer type conversions by using strtoull and a temporary variable which is bounds checked before being casted into the appropriate cfg_* variable for use by the test program. While here: - free the strdup'd cfg string for overall hygenie. - initialize napi_id = 0 in setup_queue to avoid warnings on some compilers. Signed-off-by: Joe Damato Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204163239.294123-1-jdamato@fastly.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/busy_poller.c | 88 +++++++++++++---------- 1 file changed, 50 insertions(+), 38 deletions(-) diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c index 99b0e8c17fca..04c7ff577bb8 100644 --- a/tools/testing/selftests/net/busy_poller.c +++ b/tools/testing/selftests/net/busy_poller.c @@ -54,16 +54,16 @@ struct epoll_params { #define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) #endif -static uint32_t cfg_port = 8000; +static uint16_t cfg_port = 8000; static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY }; static char *cfg_outfile; static int cfg_max_events = 8; -static int cfg_ifindex; +static uint32_t cfg_ifindex; /* busy poll params */ static uint32_t cfg_busy_poll_usecs; -static uint32_t cfg_busy_poll_budget; -static uint32_t cfg_prefer_busy_poll; +static uint16_t cfg_busy_poll_budget; +static uint8_t cfg_prefer_busy_poll; /* IRQ params */ static uint32_t cfg_defer_hard_irqs; @@ -79,6 +79,7 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { + unsigned long long tmp; int ret; int c; @@ -86,31 +87,40 @@ static void parse_opts(int argc, char **argv) usage(argv[0]); while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) { + /* most options take integer values, except o and b, so reduce + * code duplication a bit for the common case by calling + * strtoull here and leave bounds checking and casting per + * option below. + */ + if (c != 'o' && c != 'b') + tmp = strtoull(optarg, NULL, 0); + switch (c) { case 'u': - cfg_busy_poll_usecs = strtoul(optarg, NULL, 0); - if (cfg_busy_poll_usecs == ULONG_MAX || - cfg_busy_poll_usecs > UINT32_MAX) + if (tmp == ULLONG_MAX || tmp > UINT32_MAX) error(1, ERANGE, "busy_poll_usecs too large"); + + cfg_busy_poll_usecs = (uint32_t)tmp; break; case 'P': - cfg_prefer_busy_poll = strtoul(optarg, NULL, 0); - if (cfg_prefer_busy_poll == ULONG_MAX || - cfg_prefer_busy_poll > 1) + if (tmp == ULLONG_MAX || tmp > 1) error(1, ERANGE, "prefer busy poll should be 0 or 1"); + + cfg_prefer_busy_poll = (uint8_t)tmp; break; case 'g': - cfg_busy_poll_budget = strtoul(optarg, NULL, 0); - if (cfg_busy_poll_budget == ULONG_MAX || - cfg_busy_poll_budget > UINT16_MAX) + if (tmp == ULLONG_MAX || tmp > UINT16_MAX) error(1, ERANGE, "busy poll budget must be [0, UINT16_MAX]"); + + cfg_busy_poll_budget = (uint16_t)tmp; break; case 'p': - cfg_port = strtoul(optarg, NULL, 0); - if (cfg_port > UINT16_MAX) + if (tmp == ULLONG_MAX || tmp > UINT16_MAX) error(1, ERANGE, "port must be <= 65535"); + + cfg_port = (uint16_t)tmp; break; case 'b': ret = inet_aton(optarg, &cfg_bind_addr); @@ -124,41 +134,39 @@ static void parse_opts(int argc, char **argv) error(1, 0, "outfile invalid"); break; case 'm': - cfg_max_events = strtol(optarg, NULL, 0); - - if (cfg_max_events == LONG_MIN || - cfg_max_events == LONG_MAX || - cfg_max_events <= 0) + if (tmp == ULLONG_MAX || tmp > INT_MAX) error(1, ERANGE, - "max events must be > 0 and < LONG_MAX"); + "max events must be > 0 and <= INT_MAX"); + + cfg_max_events = (int)tmp; break; case 'd': - cfg_defer_hard_irqs = strtoul(optarg, NULL, 0); - - if (cfg_defer_hard_irqs == ULONG_MAX || - cfg_defer_hard_irqs > INT32_MAX) + if (tmp == ULLONG_MAX || tmp > INT32_MAX) error(1, ERANGE, "defer_hard_irqs must be <= INT32_MAX"); + + cfg_defer_hard_irqs = (uint32_t)tmp; break; case 'r': - cfg_gro_flush_timeout = strtoull(optarg, NULL, 0); - - if (cfg_gro_flush_timeout == ULLONG_MAX) + if (tmp == ULLONG_MAX || tmp > UINT64_MAX) error(1, ERANGE, - "gro_flush_timeout must be < ULLONG_MAX"); + "gro_flush_timeout must be < UINT64_MAX"); + + cfg_gro_flush_timeout = (uint64_t)tmp; break; case 's': - cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0); - - if (cfg_irq_suspend_timeout == ULLONG_MAX) + if (tmp == ULLONG_MAX || tmp > UINT64_MAX) error(1, ERANGE, "irq_suspend_timeout must be < ULLONG_MAX"); + + cfg_irq_suspend_timeout = (uint64_t)tmp; break; case 'i': - cfg_ifindex = strtoul(optarg, NULL, 0); - if (cfg_ifindex == ULONG_MAX) + if (tmp == ULLONG_MAX || tmp > INT_MAX) error(1, ERANGE, - "ifindex must be < ULONG_MAX"); + "ifindex must be <= INT_MAX"); + + cfg_ifindex = (int)tmp; break; } } @@ -215,7 +223,7 @@ static void setup_queue(void) struct netdev_napi_set_req *set_req = NULL; struct ynl_sock *ys; struct ynl_error yerr; - uint32_t napi_id; + uint32_t napi_id = 0; ys = ynl_sock_create(&ynl_netdev_family, &yerr); if (!ys) @@ -277,8 +285,8 @@ static void run_poller(void) * here */ epoll_params.busy_poll_usecs = cfg_busy_poll_usecs; - epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget; - epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll; + epoll_params.busy_poll_budget = cfg_busy_poll_budget; + epoll_params.prefer_busy_poll = cfg_prefer_busy_poll; epoll_params.__pad = 0; val = 1; @@ -342,5 +350,9 @@ int main(int argc, char *argv[]) parse_opts(argc, argv); setup_queue(); run_poller(); + + if (cfg_outfile) + free(cfg_outfile); + return 0; } From bac3d0f21c5a42f042ac9b9f6dcbc11544efdefa Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Dec 2024 10:42:00 +0000 Subject: [PATCH 0189/1386] net: phy: marvell: use phydev->eee_cfg.eee_enabled Rather than calling genphy_c45_ethtool_get_eee() to retrieve whether EEE is enabled, use the value stored in the phy_device eee_cfg structure. Signed-off-by: Russell King (Oracle) Reviewed-by: Heiner Kallweit Link: https://patch.msgid.link/E1tJ9J2-006LIh-Fl@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/marvell.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index b885bc0fe6e0..ffe223ad9e5f 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1550,7 +1550,6 @@ static int m88e1540_get_fld(struct phy_device *phydev, u8 *msecs) static int m88e1540_set_fld(struct phy_device *phydev, const u8 *msecs) { - struct ethtool_keee eee; int val, ret; if (*msecs == ETHTOOL_PHY_FAST_LINK_DOWN_OFF) @@ -1560,8 +1559,7 @@ static int m88e1540_set_fld(struct phy_device *phydev, const u8 *msecs) /* According to the Marvell data sheet EEE must be disabled for * Fast Link Down detection to work properly */ - ret = genphy_c45_ethtool_get_eee(phydev, &eee); - if (!ret && eee.eee_enabled) { + if (phydev->eee_cfg.eee_enabled) { phydev_warn(phydev, "Fast Link Down detection requires EEE to be disabled!\n"); return -EBUSY; } From 92f7acb825ec272261a2057e0f9e0b1c76198dae Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Dec 2024 10:42:05 +0000 Subject: [PATCH 0190/1386] net: phy: avoid genphy_c45_ethtool_get_eee() setting eee_enabled genphy_c45_ethtool_get_eee() is only called from phy_ethtool_get_eee(), which then calls eeecfg_to_eee(). eeecfg_to_eee() will overwrite keee.eee_enabled, so there's no point setting keee.eee_enabled in genphy_c45_ethtool_get_eee(). Remove this assignment. Signed-off-by: Russell King (Oracle) Reviewed-by: Heiner Kallweit Link: https://patch.msgid.link/E1tJ9J7-006LIn-Jr@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy-c45.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index 944ae98ad110..d162f78bc68d 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -1521,15 +1521,13 @@ EXPORT_SYMBOL(genphy_c45_eee_is_active); int genphy_c45_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data) { - bool is_enabled; int ret; ret = genphy_c45_eee_is_active(phydev, data->advertised, - data->lp_advertised, &is_enabled); + data->lp_advertised, NULL); if (ret < 0) return ret; - data->eee_enabled = is_enabled; data->eee_active = phydev->eee_active; linkmode_copy(data->supported, phydev->supported_eee); From 8f1c716090a7ed20fea802b63b37758169d59b81 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Dec 2024 10:42:10 +0000 Subject: [PATCH 0191/1386] net: phy: remove genphy_c45_eee_is_active()'s is_enabled arg All callers to genphy_c45_eee_is_active() now pass NULL as the is_enabled argument, which means we never use the value computed in this function. Remove the argument and clean up this function. Signed-off-by: Russell King (Oracle) Reviewed-by: Heiner Kallweit Link: https://patch.msgid.link/E1tJ9JC-006LIt-Ne@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy-c45.c | 12 ++++-------- drivers/net/phy/phy.c | 5 ++--- include/linux/phy.h | 2 +- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index d162f78bc68d..0dac08e85304 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -1469,18 +1469,17 @@ EXPORT_SYMBOL_GPL(genphy_c45_plca_get_status); * @phydev: target phy_device struct * @adv: variable to store advertised linkmodes * @lp: variable to store LP advertised linkmodes - * @is_enabled: variable to store EEE enabled/disabled configuration value * * Description: this function will read local and link partner PHY * advertisements. Compare them return current EEE state. */ int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, - unsigned long *lp, bool *is_enabled) + unsigned long *lp) { __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp_adv) = {}; __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp_lp) = {}; __ETHTOOL_DECLARE_LINK_MODE_MASK(common); - bool eee_enabled, eee_active; + bool eee_active; int ret; ret = genphy_c45_read_eee_adv(phydev, tmp_adv); @@ -1491,9 +1490,8 @@ int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, if (ret) return ret; - eee_enabled = !linkmode_empty(tmp_adv); linkmode_and(common, tmp_adv, tmp_lp); - if (eee_enabled && !linkmode_empty(common)) + if (!linkmode_empty(tmp_adv) && !linkmode_empty(common)) eee_active = phy_check_valid(phydev->speed, phydev->duplex, common); else @@ -1503,8 +1501,6 @@ int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, linkmode_copy(adv, tmp_adv); if (lp) linkmode_copy(lp, tmp_lp); - if (is_enabled) - *is_enabled = eee_enabled; return eee_active; } @@ -1524,7 +1520,7 @@ int genphy_c45_ethtool_get_eee(struct phy_device *phydev, int ret; ret = genphy_c45_eee_is_active(phydev, data->advertised, - data->lp_advertised, NULL); + data->lp_advertised); if (ret < 0) return ret; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 0c228aa18019..4cf344254237 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -988,8 +988,7 @@ static int phy_check_link_status(struct phy_device *phydev) if (phydev->link && phydev->state != PHY_RUNNING) { phy_check_downshift(phydev); phydev->state = PHY_RUNNING; - err = genphy_c45_eee_is_active(phydev, - NULL, NULL, NULL); + err = genphy_c45_eee_is_active(phydev, NULL, NULL); phydev->eee_active = err > 0; phydev->enable_tx_lpi = phydev->eee_cfg.tx_lpi_enabled && phydev->eee_active; @@ -1658,7 +1657,7 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) if (!phydev->drv) return -EIO; - ret = genphy_c45_eee_is_active(phydev, NULL, NULL, NULL); + ret = genphy_c45_eee_is_active(phydev, NULL, NULL); if (ret < 0) return ret; if (!ret) diff --git a/include/linux/phy.h b/include/linux/phy.h index 61a1bc81f597..bb157136351e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1991,7 +1991,7 @@ int genphy_c45_plca_set_cfg(struct phy_device *phydev, int genphy_c45_plca_get_status(struct phy_device *phydev, struct phy_plca_status *plca_st); int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, - unsigned long *lp, bool *is_enabled); + unsigned long *lp); int genphy_c45_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data); int genphy_c45_ethtool_set_eee(struct phy_device *phydev, From f899c594e138eda72804b16babbdeff92707d7b0 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Dec 2024 10:42:15 +0000 Subject: [PATCH 0192/1386] net: phy: update phy_ethtool_get_eee() documentation Update the phy_ethtool_get_eee() documentation to make it clear that all members of struct ethtool_keee are written by this function. keee.supported, keee.advertised, keee.lp_advertised and keee.eee_active are all written by genphy_c45_ethtool_get_eee(). keee.tx_lpi_timer, keee.tx_lpi_enabled and keee.eee_enabled are all written by eeecfg_to_eee(). Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1tJ9JH-006LIz-SO@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 4cf344254237..e4b04cdaa995 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1701,8 +1701,8 @@ EXPORT_SYMBOL(phy_get_eee_err); * @phydev: target phy_device struct * @data: ethtool_keee data * - * Description: reports the Supported/Advertisement/LP Advertisement - * capabilities, etc. + * Description: get the current EEE settings, filling in all members of + * @data. */ int phy_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data) { From 7b60c3bf93fa813e6522686025aae31ab54db2d2 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 4 Dec 2024 09:41:33 +0100 Subject: [PATCH 0193/1386] net: usb: lan78xx: Remove LAN8835 PHY fixup Remove the PHY fixup for the LAN8835 PHY in the lan78xx driver due to the following reasons: - There is no publicly available information about the LAN8835 PHY. However, it appears to be the integrated PHY used in the LAN7800 and LAN7850 USB Ethernet controllers. These PHYs use the GMII interface, not RGMII as configured by the fixup. - The correct driver for handling the LAN8835 PHY functionality is the Microchip PHY driver (`drivers/net/phy/microchip.c`), which properly supports these integrated PHYs. - The PHY ID `0x0007C130` is actually used by the LAN8742A PHY, which only supports RMII. This interface is incompatible with the LAN78xx MAC, as the LAN7801 (the only LAN78xx version without an integrated PHY) supports only RGMII. - The mask applied for this fixup is overly broad, inadvertently covering both Microchip LAN88xx PHYs and unrelated SMSC LAN8742A PHYs, leading to potential conflicts with other devices. - Testing has shown that removing this fixup for LAN7800 and LAN7850 does not result in any noticeable difference in functionality, as the Microchip PHY driver (`drivers/net/phy/microchip.c`) handles all necessary configurations for these integrated PHYs. - Registering this fixup globally (not limited to USB devices) risks conflicts by unintentionally modifying other interfaces whenever a LAN7801 adapter is connected to the system. Note that both LAN7800 and LAN7850 USB Ethernet controllers use an integrated PHY with the ID `0x0007C132`. Additionally, the LAN7515, a specialized part for Raspberry Pi, includes an integrated LAN7800 USB Ethernet controller and USB hub in a multifunctional chip design, and it also uses the same PHY ID (`0x0007C132`). Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241204084142.1152696-2-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 531b1b6a37d1..6e468e77d796 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -473,7 +473,6 @@ struct lan78xx_net { }; /* define external phy id */ -#define PHY_LAN8835 (0x0007C130) #define PHY_KSZ9031RNX (0x00221620) /* use ethtool to change the level for any given device */ @@ -2234,29 +2233,6 @@ static void lan78xx_remove_irq_domain(struct lan78xx_net *dev) dev->domain_data.irqdomain = NULL; } -static int lan8835_fixup(struct phy_device *phydev) -{ - int buf; - struct lan78xx_net *dev = netdev_priv(phydev->attached_dev); - - /* LED2/PME_N/IRQ_N/RGMII_ID pin to IRQ_N mode */ - buf = phy_read_mmd(phydev, MDIO_MMD_PCS, 0x8010); - buf &= ~0x1800; - buf |= 0x0800; - phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8010, buf); - - /* RGMII MAC TXC Delay Enable */ - lan78xx_write_reg(dev, MAC_RGMII_ID, - MAC_RGMII_ID_TXC_DELAY_EN_); - - /* RGMII TX DLL Tune Adjust */ - lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00); - - dev->interface = PHY_INTERFACE_MODE_RGMII_TXID; - - return 1; -} - static int ksz9031rnx_fixup(struct phy_device *phydev) { struct lan78xx_net *dev = netdev_priv(phydev->attached_dev); @@ -2315,14 +2291,6 @@ static struct phy_device *lan7801_phy_init(struct lan78xx_net *dev) netdev_err(dev->net, "Failed to register fixup for PHY_KSZ9031RNX\n"); return NULL; } - /* external PHY fixup for LAN8835 */ - ret = phy_register_fixup_for_uid(PHY_LAN8835, 0xfffffff0, - lan8835_fixup); - if (ret < 0) { - netdev_err(dev->net, "Failed to register fixup for PHY_LAN8835\n"); - return NULL; - } - /* add more external PHY fixup here if needed */ phydev->is_internal = false; } @@ -2384,8 +2352,6 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) } else { phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0); - phy_unregister_fixup_for_uid(PHY_LAN8835, - 0xfffffff0); } } return -EIO; @@ -4243,7 +4209,6 @@ static void lan78xx_disconnect(struct usb_interface *intf) phydev = net->phydev; phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0); - phy_unregister_fixup_for_uid(PHY_LAN8835, 0xfffffff0); phy_disconnect(net->phydev); From 6782d06a47ad6f8844e71f3912ab60a47f7bc7c3 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 4 Dec 2024 09:41:34 +0100 Subject: [PATCH 0194/1386] net: usb: lan78xx: Remove KSZ9031 PHY fixup Remove the KSZ9031RNX PHY fixup from the lan78xx driver. The fixup applied specific RGMII pad skew configurations globally, but these settings violate the RGMII specification and cause more harm than benefit. Key issues with the fixup: 1. **Non-Compliant Timing**: The fixup's delay settings fall outside the RGMII specification requirements of 1.5 ns to 2.0 ns: - RX Path: Total delay of **2.16 ns** (PHY internal delay of 1.2 ns + 0.96 ns skew). - TX Path: Total delay of **0.96 ns**, significantly below the RGMII minimum of 1.5 ns. 2. **Redundant or Incorrect Configurations**: - The RGMII skew registers written by the fixup do not meaningfully alter the PHY's default behavior and fail to account for its internal delays. - The TX_DATA pad skew was not configured, relying on power-on defaults that are insufficient for RGMII compliance. 3. **Micrel Driver Support**: By setting `PHY_INTERFACE_MODE_RGMII_ID`, the Micrel driver can calculate and assign appropriate skew values for the KSZ9031 PHY. This ensures better timing configurations without relying on external fixups. 4. **System Interference**: The fixup applied globally, reconfiguring all KSZ9031 PHYs in the system, even those unrelated to the LAN78xx adapter. This could lead to unintended and harmful behavior on unrelated interfaces. While the fixup is removed, a better mechanism is still needed to dynamically determine the optimal combination of PHY and MAC delays to fully meet RGMII requirements without relying on Device Tree or global fixups. This would allow for robust operation across different hardware configurations. The Micrel driver is capable of using the interface mode value to calculate and apply better skew values, providing a configuration much closer to the RGMII specification than the fixup. Removing the fixup ensures better default behavior and prevents harm to other system interfaces. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241204084142.1152696-3-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 38 +++++--------------------------------- 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 6e468e77d796..918b88bd9524 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -472,9 +472,6 @@ struct lan78xx_net { struct irq_domain_data domain_data; }; -/* define external phy id */ -#define PHY_KSZ9031RNX (0x00221620) - /* use ethtool to change the level for any given device */ static int msg_level = -1; module_param(msg_level, int, 0); @@ -2233,23 +2230,6 @@ static void lan78xx_remove_irq_domain(struct lan78xx_net *dev) dev->domain_data.irqdomain = NULL; } -static int ksz9031rnx_fixup(struct phy_device *phydev) -{ - struct lan78xx_net *dev = netdev_priv(phydev->attached_dev); - - /* Micrel9301RNX PHY configuration */ - /* RGMII Control Signal Pad Skew */ - phy_write_mmd(phydev, MDIO_MMD_WIS, 4, 0x0077); - /* RGMII RX Data Pad Skew */ - phy_write_mmd(phydev, MDIO_MMD_WIS, 5, 0x7777); - /* RGMII RX Clock Pad Skew */ - phy_write_mmd(phydev, MDIO_MMD_WIS, 8, 0x1FF); - - dev->interface = PHY_INTERFACE_MODE_RGMII_RXID; - - return 1; -} - static struct phy_device *lan7801_phy_init(struct lan78xx_net *dev) { u32 buf; @@ -2283,14 +2263,11 @@ static struct phy_device *lan7801_phy_init(struct lan78xx_net *dev) netdev_err(dev->net, "no PHY driver found\n"); return NULL; } - dev->interface = PHY_INTERFACE_MODE_RGMII; - /* external PHY fixup for KSZ9031RNX */ - ret = phy_register_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0, - ksz9031rnx_fixup); - if (ret < 0) { - netdev_err(dev->net, "Failed to register fixup for PHY_KSZ9031RNX\n"); - return NULL; - } + dev->interface = PHY_INTERFACE_MODE_RGMII_ID; + /* The PHY driver is responsible to configure proper RGMII + * interface delays. Disable RGMII delays on MAC side. + */ + lan78xx_write_reg(dev, MAC_RGMII_ID, 0); phydev->is_internal = false; } @@ -2349,9 +2326,6 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) if (phy_is_pseudo_fixed_link(phydev)) { fixed_phy_unregister(phydev); phy_device_free(phydev); - } else { - phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, - 0xfffffff0); } } return -EIO; @@ -4208,8 +4182,6 @@ static void lan78xx_disconnect(struct usb_interface *intf) phydev = net->phydev; - phy_unregister_fixup_for_uid(PHY_KSZ9031RNX, 0xfffffff0); - phy_disconnect(net->phydev); if (phy_is_pseudo_fixed_link(phydev)) { From 39aa1d620d10cdd276f4728da50f136dbe939643 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 4 Dec 2024 09:41:35 +0100 Subject: [PATCH 0195/1386] net: usb: lan78xx: move functions to avoid forward definitions Move following functions to avoid forward declarations in the code: - lan78xx_start_hw() - lan78xx_stop_hw() - lan78xx_flush_fifo() - lan78xx_start_tx_path() - lan78xx_stop_tx_path() - lan78xx_flush_tx_fifo() - lan78xx_start_rx_path() - lan78xx_stop_rx_path() - lan78xx_flush_rx_fifo() These functions will be used in an upcoming PHYlink migration patch. No modifications to the functionality of the code are made. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241204084142.1152696-4-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 300 +++++++++++++++++++------------------- 1 file changed, 150 insertions(+), 150 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 918b88bd9524..dd9b5d3abcb3 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -808,6 +808,156 @@ static void lan78xx_update_stats(struct lan78xx_net *dev) usb_autopm_put_interface(dev->intf); } +static int lan78xx_start_hw(struct lan78xx_net *dev, u32 reg, u32 hw_enable) +{ + return lan78xx_update_reg(dev, reg, hw_enable, hw_enable); +} + +static int lan78xx_stop_hw(struct lan78xx_net *dev, u32 reg, u32 hw_enabled, + u32 hw_disabled) +{ + unsigned long timeout; + bool stopped = true; + int ret; + u32 buf; + + /* Stop the h/w block (if not already stopped) */ + + ret = lan78xx_read_reg(dev, reg, &buf); + if (ret < 0) + return ret; + + if (buf & hw_enabled) { + buf &= ~hw_enabled; + + ret = lan78xx_write_reg(dev, reg, buf); + if (ret < 0) + return ret; + + stopped = false; + timeout = jiffies + HW_DISABLE_TIMEOUT; + do { + ret = lan78xx_read_reg(dev, reg, &buf); + if (ret < 0) + return ret; + + if (buf & hw_disabled) + stopped = true; + else + msleep(HW_DISABLE_DELAY_MS); + } while (!stopped && !time_after(jiffies, timeout)); + } + + ret = stopped ? 0 : -ETIME; + + return ret; +} + +static int lan78xx_flush_fifo(struct lan78xx_net *dev, u32 reg, u32 fifo_flush) +{ + return lan78xx_update_reg(dev, reg, fifo_flush, fifo_flush); +} + +static int lan78xx_start_tx_path(struct lan78xx_net *dev) +{ + int ret; + + netif_dbg(dev, drv, dev->net, "start tx path"); + + /* Start the MAC transmitter */ + + ret = lan78xx_start_hw(dev, MAC_TX, MAC_TX_TXEN_); + if (ret < 0) + return ret; + + /* Start the Tx FIFO */ + + ret = lan78xx_start_hw(dev, FCT_TX_CTL, FCT_TX_CTL_EN_); + if (ret < 0) + return ret; + + return 0; +} + +static int lan78xx_stop_tx_path(struct lan78xx_net *dev) +{ + int ret; + + netif_dbg(dev, drv, dev->net, "stop tx path"); + + /* Stop the Tx FIFO */ + + ret = lan78xx_stop_hw(dev, FCT_TX_CTL, FCT_TX_CTL_EN_, FCT_TX_CTL_DIS_); + if (ret < 0) + return ret; + + /* Stop the MAC transmitter */ + + ret = lan78xx_stop_hw(dev, MAC_TX, MAC_TX_TXEN_, MAC_TX_TXD_); + if (ret < 0) + return ret; + + return 0; +} + +/* The caller must ensure the Tx path is stopped before calling + * lan78xx_flush_tx_fifo(). + */ +static int lan78xx_flush_tx_fifo(struct lan78xx_net *dev) +{ + return lan78xx_flush_fifo(dev, FCT_TX_CTL, FCT_TX_CTL_RST_); +} + +static int lan78xx_start_rx_path(struct lan78xx_net *dev) +{ + int ret; + + netif_dbg(dev, drv, dev->net, "start rx path"); + + /* Start the Rx FIFO */ + + ret = lan78xx_start_hw(dev, FCT_RX_CTL, FCT_RX_CTL_EN_); + if (ret < 0) + return ret; + + /* Start the MAC receiver*/ + + ret = lan78xx_start_hw(dev, MAC_RX, MAC_RX_RXEN_); + if (ret < 0) + return ret; + + return 0; +} + +static int lan78xx_stop_rx_path(struct lan78xx_net *dev) +{ + int ret; + + netif_dbg(dev, drv, dev->net, "stop rx path"); + + /* Stop the MAC receiver */ + + ret = lan78xx_stop_hw(dev, MAC_RX, MAC_RX_RXEN_, MAC_RX_RXD_); + if (ret < 0) + return ret; + + /* Stop the Rx FIFO */ + + ret = lan78xx_stop_hw(dev, FCT_RX_CTL, FCT_RX_CTL_EN_, FCT_RX_CTL_DIS_); + if (ret < 0) + return ret; + + return 0; +} + +/* The caller must ensure the Rx path is stopped before calling + * lan78xx_flush_rx_fifo(). + */ +static int lan78xx_flush_rx_fifo(struct lan78xx_net *dev) +{ + return lan78xx_flush_fifo(dev, FCT_RX_CTL, FCT_RX_CTL_RST_); +} + /* Loop until the read is completed with timeout called with phy_mutex held */ static int lan78xx_phy_wait_not_busy(struct lan78xx_net *dev) { @@ -2662,156 +2812,6 @@ static int lan78xx_urb_config_init(struct lan78xx_net *dev) return result; } -static int lan78xx_start_hw(struct lan78xx_net *dev, u32 reg, u32 hw_enable) -{ - return lan78xx_update_reg(dev, reg, hw_enable, hw_enable); -} - -static int lan78xx_stop_hw(struct lan78xx_net *dev, u32 reg, u32 hw_enabled, - u32 hw_disabled) -{ - unsigned long timeout; - bool stopped = true; - int ret; - u32 buf; - - /* Stop the h/w block (if not already stopped) */ - - ret = lan78xx_read_reg(dev, reg, &buf); - if (ret < 0) - return ret; - - if (buf & hw_enabled) { - buf &= ~hw_enabled; - - ret = lan78xx_write_reg(dev, reg, buf); - if (ret < 0) - return ret; - - stopped = false; - timeout = jiffies + HW_DISABLE_TIMEOUT; - do { - ret = lan78xx_read_reg(dev, reg, &buf); - if (ret < 0) - return ret; - - if (buf & hw_disabled) - stopped = true; - else - msleep(HW_DISABLE_DELAY_MS); - } while (!stopped && !time_after(jiffies, timeout)); - } - - ret = stopped ? 0 : -ETIME; - - return ret; -} - -static int lan78xx_flush_fifo(struct lan78xx_net *dev, u32 reg, u32 fifo_flush) -{ - return lan78xx_update_reg(dev, reg, fifo_flush, fifo_flush); -} - -static int lan78xx_start_tx_path(struct lan78xx_net *dev) -{ - int ret; - - netif_dbg(dev, drv, dev->net, "start tx path"); - - /* Start the MAC transmitter */ - - ret = lan78xx_start_hw(dev, MAC_TX, MAC_TX_TXEN_); - if (ret < 0) - return ret; - - /* Start the Tx FIFO */ - - ret = lan78xx_start_hw(dev, FCT_TX_CTL, FCT_TX_CTL_EN_); - if (ret < 0) - return ret; - - return 0; -} - -static int lan78xx_stop_tx_path(struct lan78xx_net *dev) -{ - int ret; - - netif_dbg(dev, drv, dev->net, "stop tx path"); - - /* Stop the Tx FIFO */ - - ret = lan78xx_stop_hw(dev, FCT_TX_CTL, FCT_TX_CTL_EN_, FCT_TX_CTL_DIS_); - if (ret < 0) - return ret; - - /* Stop the MAC transmitter */ - - ret = lan78xx_stop_hw(dev, MAC_TX, MAC_TX_TXEN_, MAC_TX_TXD_); - if (ret < 0) - return ret; - - return 0; -} - -/* The caller must ensure the Tx path is stopped before calling - * lan78xx_flush_tx_fifo(). - */ -static int lan78xx_flush_tx_fifo(struct lan78xx_net *dev) -{ - return lan78xx_flush_fifo(dev, FCT_TX_CTL, FCT_TX_CTL_RST_); -} - -static int lan78xx_start_rx_path(struct lan78xx_net *dev) -{ - int ret; - - netif_dbg(dev, drv, dev->net, "start rx path"); - - /* Start the Rx FIFO */ - - ret = lan78xx_start_hw(dev, FCT_RX_CTL, FCT_RX_CTL_EN_); - if (ret < 0) - return ret; - - /* Start the MAC receiver*/ - - ret = lan78xx_start_hw(dev, MAC_RX, MAC_RX_RXEN_); - if (ret < 0) - return ret; - - return 0; -} - -static int lan78xx_stop_rx_path(struct lan78xx_net *dev) -{ - int ret; - - netif_dbg(dev, drv, dev->net, "stop rx path"); - - /* Stop the MAC receiver */ - - ret = lan78xx_stop_hw(dev, MAC_RX, MAC_RX_RXEN_, MAC_RX_RXD_); - if (ret < 0) - return ret; - - /* Stop the Rx FIFO */ - - ret = lan78xx_stop_hw(dev, FCT_RX_CTL, FCT_RX_CTL_EN_, FCT_RX_CTL_DIS_); - if (ret < 0) - return ret; - - return 0; -} - -/* The caller must ensure the Rx path is stopped before calling - * lan78xx_flush_rx_fifo(). - */ -static int lan78xx_flush_rx_fifo(struct lan78xx_net *dev) -{ - return lan78xx_flush_fifo(dev, FCT_RX_CTL, FCT_RX_CTL_RST_); -} - static int lan78xx_reset(struct lan78xx_net *dev) { struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]); From 9bcdc610cfabe8784f80b8c84f950cc5693f146b Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 4 Dec 2024 09:41:36 +0100 Subject: [PATCH 0196/1386] net: usb: lan78xx: Improve error reporting with %pe specifier Replace integer error codes with the `%pe` format specifier in register read and write error messages. This change provides human-readable error strings, making logs more informative and debugging easier. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241204084142.1152696-5-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index dd9b5d3abcb3..94320deaaeea 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -621,8 +621,8 @@ static int lan78xx_read_reg(struct lan78xx_net *dev, u32 index, u32 *data) *data = *buf; } else if (net_ratelimit()) { netdev_warn(dev->net, - "Failed to read register index 0x%08x. ret = %d", - index, ret); + "Failed to read register index 0x%08x. ret = %pe", + index, ERR_PTR(ret)); } kfree(buf); @@ -652,8 +652,8 @@ static int lan78xx_write_reg(struct lan78xx_net *dev, u32 index, u32 data) if (unlikely(ret < 0) && net_ratelimit()) { netdev_warn(dev->net, - "Failed to write register index 0x%08x. ret = %d", - index, ret); + "Failed to write register index 0x%08x. ret = %pe", + index, ERR_PTR(ret)); } kfree(buf); From 32ee0dc764505278229078e496e7b56a6d65224b Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 4 Dec 2024 09:41:37 +0100 Subject: [PATCH 0197/1386] net: usb: lan78xx: Fix error handling in MII read/write functions Ensure proper error handling in `lan78xx_mdiobus_read` and `lan78xx_mdiobus_write` by checking return values of register read/write operations and returning errors to the caller. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241204084142.1152696-6-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 94320deaaeea..ee308be1e618 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2136,12 +2136,16 @@ static int lan78xx_mdiobus_read(struct mii_bus *bus, int phy_id, int idx) /* set the address, index & direction (read from PHY) */ addr = mii_access(phy_id, idx, MII_READ); ret = lan78xx_write_reg(dev, MII_ACC, addr); + if (ret < 0) + goto done; ret = lan78xx_phy_wait_not_busy(dev); if (ret < 0) goto done; ret = lan78xx_read_reg(dev, MII_DATA, &val); + if (ret < 0) + goto done; ret = (int)(val & 0xFFFF); @@ -2172,10 +2176,14 @@ static int lan78xx_mdiobus_write(struct mii_bus *bus, int phy_id, int idx, val = (u32)regval; ret = lan78xx_write_reg(dev, MII_DATA, val); + if (ret < 0) + goto done; /* set the address, index & direction (write to PHY) */ addr = mii_access(phy_id, idx, MII_WRITE); ret = lan78xx_write_reg(dev, MII_ACC, addr); + if (ret < 0) + goto done; ret = lan78xx_phy_wait_not_busy(dev); if (ret < 0) @@ -2184,7 +2192,7 @@ static int lan78xx_mdiobus_write(struct mii_bus *bus, int phy_id, int idx, done: mutex_unlock(&dev->phy_mutex); usb_autopm_put_interface(dev->intf); - return 0; + return ret; } static int lan78xx_mdio_init(struct lan78xx_net *dev) From 8b1b2ca83b200fa46fdfb81e80ad5fe34537e6d4 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 4 Dec 2024 09:41:38 +0100 Subject: [PATCH 0198/1386] net: usb: lan78xx: Improve error handling in EEPROM and OTP operations Refine error handling in EEPROM and OTP read/write functions by: - Return error values immediately upon detection. - Avoid overwriting correct error codes with `-EIO`. - Preserve initial error codes as they were appropriate for specific failures. - Use `-ETIMEDOUT` for timeout conditions instead of `-EIO`. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241204084142.1152696-7-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 240 ++++++++++++++++++++++++-------------- 1 file changed, 152 insertions(+), 88 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index ee308be1e618..29f6e1a36e20 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1000,8 +1000,8 @@ static int lan78xx_wait_eeprom(struct lan78xx_net *dev) do { ret = lan78xx_read_reg(dev, E2P_CMD, &val); - if (unlikely(ret < 0)) - return -EIO; + if (ret < 0) + return ret; if (!(val & E2P_CMD_EPC_BUSY_) || (val & E2P_CMD_EPC_TIMEOUT_)) @@ -1011,7 +1011,7 @@ static int lan78xx_wait_eeprom(struct lan78xx_net *dev) if (val & (E2P_CMD_EPC_TIMEOUT_ | E2P_CMD_EPC_BUSY_)) { netdev_warn(dev->net, "EEPROM read operation timeout"); - return -EIO; + return -ETIMEDOUT; } return 0; @@ -1025,8 +1025,8 @@ static int lan78xx_eeprom_confirm_not_busy(struct lan78xx_net *dev) do { ret = lan78xx_read_reg(dev, E2P_CMD, &val); - if (unlikely(ret < 0)) - return -EIO; + if (ret < 0) + return ret; if (!(val & E2P_CMD_EPC_BUSY_)) return 0; @@ -1035,75 +1035,81 @@ static int lan78xx_eeprom_confirm_not_busy(struct lan78xx_net *dev) } while (!time_after(jiffies, start_time + HZ)); netdev_warn(dev->net, "EEPROM is busy"); - return -EIO; + return -ETIMEDOUT; } static int lan78xx_read_raw_eeprom(struct lan78xx_net *dev, u32 offset, u32 length, u8 *data) { - u32 val; - u32 saved; + u32 val, saved; int i, ret; - int retval; /* depends on chip, some EEPROM pins are muxed with LED function. * disable & restore LED function to access EEPROM. */ ret = lan78xx_read_reg(dev, HW_CFG, &val); + if (ret < 0) + return ret; + saved = val; if (dev->chipid == ID_REV_CHIP_ID_7800_) { val &= ~(HW_CFG_LED1_EN_ | HW_CFG_LED0_EN_); ret = lan78xx_write_reg(dev, HW_CFG, val); + if (ret < 0) + return ret; } - retval = lan78xx_eeprom_confirm_not_busy(dev); - if (retval) - return retval; + ret = lan78xx_eeprom_confirm_not_busy(dev); + if (ret == -ETIMEDOUT) + goto read_raw_eeprom_done; + /* If USB fails, there is nothing to do */ + if (ret < 0) + return ret; for (i = 0; i < length; i++) { val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_READ_; val |= (offset & E2P_CMD_EPC_ADDR_MASK_); ret = lan78xx_write_reg(dev, E2P_CMD, val); - if (unlikely(ret < 0)) { - retval = -EIO; - goto exit; - } + if (ret < 0) + return ret; - retval = lan78xx_wait_eeprom(dev); - if (retval < 0) - goto exit; + ret = lan78xx_wait_eeprom(dev); + /* Looks like not USB specific error, try to recover */ + if (ret == -ETIMEDOUT) + goto read_raw_eeprom_done; + /* If USB fails, there is nothing to do */ + if (ret < 0) + return ret; ret = lan78xx_read_reg(dev, E2P_DATA, &val); - if (unlikely(ret < 0)) { - retval = -EIO; - goto exit; - } + if (ret < 0) + return ret; data[i] = val & 0xFF; offset++; } - retval = 0; -exit: +read_raw_eeprom_done: if (dev->chipid == ID_REV_CHIP_ID_7800_) - ret = lan78xx_write_reg(dev, HW_CFG, saved); + return lan78xx_write_reg(dev, HW_CFG, saved); - return retval; + return 0; } static int lan78xx_read_eeprom(struct lan78xx_net *dev, u32 offset, u32 length, u8 *data) { - u8 sig; int ret; + u8 sig; ret = lan78xx_read_raw_eeprom(dev, 0, 1, &sig); - if ((ret == 0) && (sig == EEPROM_INDICATOR)) - ret = lan78xx_read_raw_eeprom(dev, offset, length, data); - else - ret = -EINVAL; + if (ret < 0) + return ret; - return ret; + if (sig != EEPROM_INDICATOR) + return -ENODATA; + + return lan78xx_read_raw_eeprom(dev, offset, length, data); } static int lan78xx_write_raw_eeprom(struct lan78xx_net *dev, u32 offset, @@ -1112,113 +1118,144 @@ static int lan78xx_write_raw_eeprom(struct lan78xx_net *dev, u32 offset, u32 val; u32 saved; int i, ret; - int retval; /* depends on chip, some EEPROM pins are muxed with LED function. * disable & restore LED function to access EEPROM. */ ret = lan78xx_read_reg(dev, HW_CFG, &val); + if (ret < 0) + return ret; + saved = val; if (dev->chipid == ID_REV_CHIP_ID_7800_) { val &= ~(HW_CFG_LED1_EN_ | HW_CFG_LED0_EN_); ret = lan78xx_write_reg(dev, HW_CFG, val); + if (ret < 0) + return ret; } - retval = lan78xx_eeprom_confirm_not_busy(dev); - if (retval) - goto exit; + ret = lan78xx_eeprom_confirm_not_busy(dev); + /* Looks like not USB specific error, try to recover */ + if (ret == -ETIMEDOUT) + goto write_raw_eeprom_done; + /* If USB fails, there is nothing to do */ + if (ret < 0) + return ret; /* Issue write/erase enable command */ val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_EWEN_; ret = lan78xx_write_reg(dev, E2P_CMD, val); - if (unlikely(ret < 0)) { - retval = -EIO; - goto exit; - } + if (ret < 0) + return ret; - retval = lan78xx_wait_eeprom(dev); - if (retval < 0) - goto exit; + ret = lan78xx_wait_eeprom(dev); + /* Looks like not USB specific error, try to recover */ + if (ret == -ETIMEDOUT) + goto write_raw_eeprom_done; + /* If USB fails, there is nothing to do */ + if (ret < 0) + return ret; for (i = 0; i < length; i++) { /* Fill data register */ val = data[i]; ret = lan78xx_write_reg(dev, E2P_DATA, val); - if (ret < 0) { - retval = -EIO; - goto exit; - } + if (ret < 0) + return ret; /* Send "write" command */ val = E2P_CMD_EPC_BUSY_ | E2P_CMD_EPC_CMD_WRITE_; val |= (offset & E2P_CMD_EPC_ADDR_MASK_); ret = lan78xx_write_reg(dev, E2P_CMD, val); - if (ret < 0) { - retval = -EIO; - goto exit; - } + if (ret < 0) + return ret; - retval = lan78xx_wait_eeprom(dev); - if (retval < 0) - goto exit; + ret = lan78xx_wait_eeprom(dev); + /* Looks like not USB specific error, try to recover */ + if (ret == -ETIMEDOUT) + goto write_raw_eeprom_done; + /* If USB fails, there is nothing to do */ + if (ret < 0) + return ret; offset++; } - retval = 0; -exit: +write_raw_eeprom_done: if (dev->chipid == ID_REV_CHIP_ID_7800_) - ret = lan78xx_write_reg(dev, HW_CFG, saved); + return lan78xx_write_reg(dev, HW_CFG, saved); - return retval; + return 0; } static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset, u32 length, u8 *data) { - int i; - u32 buf; unsigned long timeout; + int ret, i; + u32 buf; - lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + if (ret < 0) + return ret; if (buf & OTP_PWR_DN_PWRDN_N_) { /* clear it and wait to be cleared */ - lan78xx_write_reg(dev, OTP_PWR_DN, 0); + ret = lan78xx_write_reg(dev, OTP_PWR_DN, 0); + if (ret < 0) + return ret; timeout = jiffies + HZ; do { usleep_range(1, 10); - lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + if (ret < 0) + return ret; + if (time_after(jiffies, timeout)) { netdev_warn(dev->net, "timeout on OTP_PWR_DN"); - return -EIO; + return -ETIMEDOUT; } } while (buf & OTP_PWR_DN_PWRDN_N_); } for (i = 0; i < length; i++) { - lan78xx_write_reg(dev, OTP_ADDR1, - ((offset + i) >> 8) & OTP_ADDR1_15_11); - lan78xx_write_reg(dev, OTP_ADDR2, - ((offset + i) & OTP_ADDR2_10_3)); + ret = lan78xx_write_reg(dev, OTP_ADDR1, + ((offset + i) >> 8) & OTP_ADDR1_15_11); + if (ret < 0) + return ret; - lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_); - lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); + ret = lan78xx_write_reg(dev, OTP_ADDR2, + ((offset + i) & OTP_ADDR2_10_3)); + if (ret < 0) + return ret; + + ret = lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_); + if (ret < 0) + return ret; + + ret = lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); + if (ret < 0) + return ret; timeout = jiffies + HZ; do { udelay(1); - lan78xx_read_reg(dev, OTP_STATUS, &buf); + ret = lan78xx_read_reg(dev, OTP_STATUS, &buf); + if (ret < 0) + return ret; + if (time_after(jiffies, timeout)) { netdev_warn(dev->net, "timeout on OTP_STATUS"); - return -EIO; + return -ETIMEDOUT; } } while (buf & OTP_STATUS_BUSY_); - lan78xx_read_reg(dev, OTP_RD_DATA, &buf); + ret = lan78xx_read_reg(dev, OTP_RD_DATA, &buf); + if (ret < 0) + return ret; data[i] = (u8)(buf & 0xFF); } @@ -1232,45 +1269,72 @@ static int lan78xx_write_raw_otp(struct lan78xx_net *dev, u32 offset, int i; u32 buf; unsigned long timeout; + int ret; - lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + if (ret < 0) + return ret; if (buf & OTP_PWR_DN_PWRDN_N_) { /* clear it and wait to be cleared */ - lan78xx_write_reg(dev, OTP_PWR_DN, 0); + ret = lan78xx_write_reg(dev, OTP_PWR_DN, 0); + if (ret < 0) + return ret; timeout = jiffies + HZ; do { udelay(1); - lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + if (ret < 0) + return ret; + if (time_after(jiffies, timeout)) { netdev_warn(dev->net, "timeout on OTP_PWR_DN completion"); - return -EIO; + return -ETIMEDOUT; } } while (buf & OTP_PWR_DN_PWRDN_N_); } /* set to BYTE program mode */ - lan78xx_write_reg(dev, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_); + ret = lan78xx_write_reg(dev, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_); + if (ret < 0) + return ret; for (i = 0; i < length; i++) { - lan78xx_write_reg(dev, OTP_ADDR1, - ((offset + i) >> 8) & OTP_ADDR1_15_11); - lan78xx_write_reg(dev, OTP_ADDR2, - ((offset + i) & OTP_ADDR2_10_3)); - lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]); - lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_); - lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); + ret = lan78xx_write_reg(dev, OTP_ADDR1, + ((offset + i) >> 8) & OTP_ADDR1_15_11); + if (ret < 0) + return ret; + + ret = lan78xx_write_reg(dev, OTP_ADDR2, + ((offset + i) & OTP_ADDR2_10_3)); + if (ret < 0) + return ret; + + ret = lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]); + if (ret < 0) + return ret; + + ret = lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_); + if (ret < 0) + return ret; + + ret = lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); + if (ret < 0) + return ret; timeout = jiffies + HZ; do { udelay(1); - lan78xx_read_reg(dev, OTP_STATUS, &buf); + ret = lan78xx_read_reg(dev, OTP_STATUS, &buf); + if (ret < 0) + return ret; + if (time_after(jiffies, timeout)) { netdev_warn(dev->net, "Timeout on OTP_STATUS completion"); - return -EIO; + return -ETIMEDOUT; } } while (buf & OTP_STATUS_BUSY_); } From 77586156b517c1d38a22c0a8662fe9401ab0f580 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 4 Dec 2024 09:41:39 +0100 Subject: [PATCH 0199/1386] net: usb: lan78xx: Add error handling to lan78xx_init_ltm Convert `lan78xx_init_ltm` to return error codes and handle errors properly. Previously, errors during the LTM initialization process were not propagated, potentially leading to undetected issues. This patch ensures: - Errors in `lan78xx_read_reg` and `lan78xx_write_reg` are checked and handled. - Errors are logged with detailed messages using `%pe` for clarity. - The function exits immediately on error, returning the error code. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241204084142.1152696-8-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 50 ++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 29f6e1a36e20..33cda7f3dd12 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2807,13 +2807,16 @@ static int lan78xx_vlan_rx_kill_vid(struct net_device *netdev, return 0; } -static void lan78xx_init_ltm(struct lan78xx_net *dev) +static int lan78xx_init_ltm(struct lan78xx_net *dev) { + u32 regs[6] = { 0 }; int ret; u32 buf; - u32 regs[6] = { 0 }; ret = lan78xx_read_reg(dev, USB_CFG1, &buf); + if (ret < 0) + goto init_ltm_failed; + if (buf & USB_CFG1_LTM_ENABLE_) { u8 temp[2]; /* Get values from EEPROM first */ @@ -2824,7 +2827,7 @@ static void lan78xx_init_ltm(struct lan78xx_net *dev) 24, (u8 *)regs); if (ret < 0) - return; + return ret; } } else if (lan78xx_read_otp(dev, 0x3F, 2, temp) == 0) { if (temp[0] == 24) { @@ -2833,17 +2836,40 @@ static void lan78xx_init_ltm(struct lan78xx_net *dev) 24, (u8 *)regs); if (ret < 0) - return; + return ret; } } } - lan78xx_write_reg(dev, LTM_BELT_IDLE0, regs[0]); - lan78xx_write_reg(dev, LTM_BELT_IDLE1, regs[1]); - lan78xx_write_reg(dev, LTM_BELT_ACT0, regs[2]); - lan78xx_write_reg(dev, LTM_BELT_ACT1, regs[3]); - lan78xx_write_reg(dev, LTM_INACTIVE0, regs[4]); - lan78xx_write_reg(dev, LTM_INACTIVE1, regs[5]); + ret = lan78xx_write_reg(dev, LTM_BELT_IDLE0, regs[0]); + if (ret < 0) + goto init_ltm_failed; + + ret = lan78xx_write_reg(dev, LTM_BELT_IDLE1, regs[1]); + if (ret < 0) + goto init_ltm_failed; + + ret = lan78xx_write_reg(dev, LTM_BELT_ACT0, regs[2]); + if (ret < 0) + goto init_ltm_failed; + + ret = lan78xx_write_reg(dev, LTM_BELT_ACT1, regs[3]); + if (ret < 0) + goto init_ltm_failed; + + ret = lan78xx_write_reg(dev, LTM_INACTIVE0, regs[4]); + if (ret < 0) + goto init_ltm_failed; + + ret = lan78xx_write_reg(dev, LTM_INACTIVE1, regs[5]); + if (ret < 0) + goto init_ltm_failed; + + return 0; + +init_ltm_failed: + netdev_err(dev->net, "Failed to init LTM with error %pe\n", ERR_PTR(ret)); + return ret; } static int lan78xx_urb_config_init(struct lan78xx_net *dev) @@ -2939,7 +2965,9 @@ static int lan78xx_reset(struct lan78xx_net *dev) return ret; /* Init LTM */ - lan78xx_init_ltm(dev); + ret = lan78xx_init_ltm(dev); + if (ret < 0) + return ret; ret = lan78xx_write_reg(dev, BURST_CAP, dev->burst_cap); if (ret < 0) From 65520a70cb09200d916464ddaa04e996e689c576 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 4 Dec 2024 09:41:40 +0100 Subject: [PATCH 0200/1386] net: usb: lan78xx: Add error handling to set_rx_max_frame_length and set_mtu Improve error handling in `lan78xx_set_rx_max_frame_length` by: - Checking return values from register read/write operations and propagating errors. - Exiting immediately on failure to ensure proper error reporting. In `lan78xx_change_mtu`, log errors when changing MTU fails, using `%pe` for clear error representation. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241204084142.1152696-9-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 33cda7f3dd12..2d16c1fc850e 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2599,27 +2599,36 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) static int lan78xx_set_rx_max_frame_length(struct lan78xx_net *dev, int size) { - u32 buf; bool rxenabled; + u32 buf; + int ret; - lan78xx_read_reg(dev, MAC_RX, &buf); + ret = lan78xx_read_reg(dev, MAC_RX, &buf); + if (ret < 0) + return ret; rxenabled = ((buf & MAC_RX_RXEN_) != 0); if (rxenabled) { buf &= ~MAC_RX_RXEN_; - lan78xx_write_reg(dev, MAC_RX, buf); + ret = lan78xx_write_reg(dev, MAC_RX, buf); + if (ret < 0) + return ret; } /* add 4 to size for FCS */ buf &= ~MAC_RX_MAX_SIZE_MASK_; buf |= (((size + 4) << MAC_RX_MAX_SIZE_SHIFT_) & MAC_RX_MAX_SIZE_MASK_); - lan78xx_write_reg(dev, MAC_RX, buf); + ret = lan78xx_write_reg(dev, MAC_RX, buf); + if (ret < 0) + return ret; if (rxenabled) { buf |= MAC_RX_RXEN_; - lan78xx_write_reg(dev, MAC_RX, buf); + ret = lan78xx_write_reg(dev, MAC_RX, buf); + if (ret < 0) + return ret; } return 0; @@ -2685,7 +2694,10 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu) return ret; ret = lan78xx_set_rx_max_frame_length(dev, max_frame_len); - if (!ret) + if (ret < 0) + netdev_err(dev->net, "MTU changed to %d from %d failed with %pe\n", + new_mtu, netdev->mtu, ERR_PTR(ret)); + else WRITE_ONCE(netdev->mtu, new_mtu); usb_autopm_put_interface(dev->intf); From 0da202e6a56f6ec137fde151c1a1a9d39a4135c0 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 4 Dec 2024 09:41:41 +0100 Subject: [PATCH 0201/1386] net: usb: lan78xx: Add error handling to lan78xx_irq_bus_sync_unlock Update `lan78xx_irq_bus_sync_unlock` to handle errors in register read/write operations. If an error occurs, log it and exit the function appropriately. This ensures proper handling of failures during IRQ synchronization. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241204084142.1152696-10-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 2d16c1fc850e..2ae9565b5044 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2382,13 +2382,22 @@ static void lan78xx_irq_bus_sync_unlock(struct irq_data *irqd) struct lan78xx_net *dev = container_of(data, struct lan78xx_net, domain_data); u32 buf; + int ret; /* call register access here because irq_bus_lock & irq_bus_sync_unlock * are only two callbacks executed in non-atomic contex. */ - lan78xx_read_reg(dev, INT_EP_CTL, &buf); + ret = lan78xx_read_reg(dev, INT_EP_CTL, &buf); + if (ret < 0) + goto irq_bus_sync_unlock; + if (buf != data->irqenable) - lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable); + ret = lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable); + +irq_bus_sync_unlock: + if (ret < 0) + netdev_err(dev->net, "Failed to sync IRQ enable register: %pe\n", + ERR_PTR(ret)); mutex_unlock(&data->irq_lock); } From 48fb3d3c4be602f0977f81d20de7deb0e3807575 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Wed, 4 Dec 2024 09:41:42 +0100 Subject: [PATCH 0202/1386] net: usb: lan78xx: Improve error handling in dataport and multicast writes Update `lan78xx_dataport_write` and `lan78xx_deferred_multicast_write` to: - Handle errors during register read/write operations. - Exit immediately on errors and log them using `%pe` for clarity. - Avoid silent failures by propagating error codes properly. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241204084142.1152696-11-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 67 ++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 22 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 2ae9565b5044..d5f6367d3714 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1371,7 +1371,7 @@ static int lan78xx_dataport_wait_not_busy(struct lan78xx_net *dev) ret = lan78xx_read_reg(dev, DP_SEL, &dp_sel); if (unlikely(ret < 0)) - return -EIO; + return ret; if (dp_sel & DP_SEL_DPRDY_) return 0; @@ -1381,44 +1381,51 @@ static int lan78xx_dataport_wait_not_busy(struct lan78xx_net *dev) netdev_warn(dev->net, "%s timed out", __func__); - return -EIO; + return -ETIMEDOUT; } static int lan78xx_dataport_write(struct lan78xx_net *dev, u32 ram_select, u32 addr, u32 length, u32 *buf) { struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]); - u32 dp_sel; int i, ret; - if (usb_autopm_get_interface(dev->intf) < 0) - return 0; + ret = usb_autopm_get_interface(dev->intf); + if (ret < 0) + return ret; mutex_lock(&pdata->dataport_mutex); ret = lan78xx_dataport_wait_not_busy(dev); if (ret < 0) - goto done; + goto dataport_write; - ret = lan78xx_read_reg(dev, DP_SEL, &dp_sel); - - dp_sel &= ~DP_SEL_RSEL_MASK_; - dp_sel |= ram_select; - ret = lan78xx_write_reg(dev, DP_SEL, dp_sel); + ret = lan78xx_update_reg(dev, DP_SEL, DP_SEL_RSEL_MASK_, ram_select); + if (ret < 0) + goto dataport_write; for (i = 0; i < length; i++) { ret = lan78xx_write_reg(dev, DP_ADDR, addr + i); + if (ret < 0) + goto dataport_write; ret = lan78xx_write_reg(dev, DP_DATA, buf[i]); + if (ret < 0) + goto dataport_write; ret = lan78xx_write_reg(dev, DP_CMD, DP_CMD_WRITE_); + if (ret < 0) + goto dataport_write; ret = lan78xx_dataport_wait_not_busy(dev); if (ret < 0) - goto done; + goto dataport_write; } -done: +dataport_write: + if (ret < 0) + netdev_warn(dev->net, "dataport write failed %pe", ERR_PTR(ret)); + mutex_unlock(&pdata->dataport_mutex); usb_autopm_put_interface(dev->intf); @@ -1454,23 +1461,39 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param) struct lan78xx_priv *pdata = container_of(param, struct lan78xx_priv, set_multicast); struct lan78xx_net *dev = pdata->dev; - int i; + int i, ret; netif_dbg(dev, drv, dev->net, "deferred multicast write 0x%08x\n", pdata->rfe_ctl); - lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_, DP_SEL_VHF_VLAN_LEN, - DP_SEL_VHF_HASH_LEN, pdata->mchash_table); + ret = lan78xx_dataport_write(dev, DP_SEL_RSEL_VLAN_DA_, + DP_SEL_VHF_VLAN_LEN, + DP_SEL_VHF_HASH_LEN, pdata->mchash_table); + if (ret < 0) + goto multicast_write_done; for (i = 1; i < NUM_OF_MAF; i++) { - lan78xx_write_reg(dev, MAF_HI(i), 0); - lan78xx_write_reg(dev, MAF_LO(i), - pdata->pfilter_table[i][1]); - lan78xx_write_reg(dev, MAF_HI(i), - pdata->pfilter_table[i][0]); + ret = lan78xx_write_reg(dev, MAF_HI(i), 0); + if (ret < 0) + goto multicast_write_done; + + ret = lan78xx_write_reg(dev, MAF_LO(i), + pdata->pfilter_table[i][1]); + if (ret < 0) + goto multicast_write_done; + + ret = lan78xx_write_reg(dev, MAF_HI(i), + pdata->pfilter_table[i][0]); + if (ret < 0) + goto multicast_write_done; } - lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); + ret = lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); + +multicast_write_done: + if (ret < 0) + netdev_warn(dev->net, "multicast write failed %pe", ERR_PTR(ret)); + return; } static void lan78xx_set_multicast(struct net_device *netdev) From 18eabadd73ae60023ab05e376246bd725fb0c113 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 4 Dec 2024 13:11:21 +0100 Subject: [PATCH 0203/1386] vrf: Make pcpu_dstats update functions available to other modules. Currently vrf is the only module that uses NETDEV_PCPU_STAT_DSTATS. In order to make this kind of statistics available to other modules, we need to define the update functions in netdevice.h. Therefore, let's define dev_dstats_*() functions for RX and TX packet updates (packets, bytes and drops). Use these new functions in vrf.c instead of vrf_rx_stats() and the other manual counter updates. While there, update the type of the "len" variables to "unsigned int", so that there're aligned with both skb->len and the new dstats update functions. Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/d7a552ee382c79f4854e7fcc224cf176cd21150d.1733313925.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 49 +++++++++++---------------------------- include/linux/netdevice.h | 40 ++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 35 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 67d25f4f94ef..ca81b212a246 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -122,16 +122,6 @@ struct net_vrf { int ifindex; }; -static void vrf_rx_stats(struct net_device *dev, int len) -{ - struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); - - u64_stats_update_begin(&dstats->syncp); - u64_stats_inc(&dstats->rx_packets); - u64_stats_add(&dstats->rx_bytes, len); - u64_stats_update_end(&dstats->syncp); -} - static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb) { vrf_dev->stats.tx_errors++; @@ -369,7 +359,7 @@ static bool qdisc_tx_is_default(const struct net_device *dev) static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, struct dst_entry *dst) { - int len = skb->len; + unsigned int len = skb->len; skb_orphan(skb); @@ -382,15 +372,10 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, skb->protocol = eth_type_trans(skb, dev); - if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) { - vrf_rx_stats(dev, len); - } else { - struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); - - u64_stats_update_begin(&dstats->syncp); - u64_stats_inc(&dstats->rx_drops); - u64_stats_update_end(&dstats->syncp); - } + if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) + dev_dstats_rx_add(dev, len); + else + dev_dstats_rx_dropped(dev); return NETDEV_TX_OK; } @@ -578,20 +563,14 @@ static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev) static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) { - struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + unsigned int len = skb->len; + netdev_tx_t ret; - int len = skb->len; - netdev_tx_t ret = is_ip_tx_frame(skb, dev); - - u64_stats_update_begin(&dstats->syncp); - if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { - - u64_stats_inc(&dstats->tx_packets); - u64_stats_add(&dstats->tx_bytes, len); - } else { - u64_stats_inc(&dstats->tx_drops); - } - u64_stats_update_end(&dstats->syncp); + ret = is_ip_tx_frame(skb, dev); + if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) + dev_dstats_tx_add(dev, len); + else + dev_dstats_tx_dropped(dev); return ret; } @@ -1364,7 +1343,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, if (!is_ndisc) { struct net_device *orig_dev = skb->dev; - vrf_rx_stats(vrf_dev, skb->len); + dev_dstats_rx_add(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; @@ -1420,7 +1399,7 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, goto out; } - vrf_rx_stats(vrf_dev, skb->len); + dev_dstats_rx_add(vrf_dev, skb->len); if (!list_empty(&vrf_dev->ptype_all)) { int err; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d1a8d98b132c..135105441681 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2854,6 +2854,46 @@ static inline void dev_lstats_add(struct net_device *dev, unsigned int len) u64_stats_update_end(&lstats->syncp); } +static inline void dev_dstats_rx_add(struct net_device *dev, + unsigned int len) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->rx_packets); + u64_stats_add(&dstats->rx_bytes, len); + u64_stats_update_end(&dstats->syncp); +} + +static inline void dev_dstats_rx_dropped(struct net_device *dev) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->rx_drops); + u64_stats_update_end(&dstats->syncp); +} + +static inline void dev_dstats_tx_add(struct net_device *dev, + unsigned int len) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->tx_packets); + u64_stats_add(&dstats->tx_bytes, len); + u64_stats_update_end(&dstats->syncp); +} + +static inline void dev_dstats_tx_dropped(struct net_device *dev) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->tx_drops); + u64_stats_update_end(&dstats->syncp); +} + #define __netdev_alloc_pcpu_stats(type, gfp) \ ({ \ typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\ From be226352e8dc77d3313c096b2d8e7f69bf6980fc Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 4 Dec 2024 13:11:27 +0100 Subject: [PATCH 0204/1386] vxlan: Handle stats using NETDEV_PCPU_STAT_DSTATS. VXLAN uses the TSTATS infrastructure (dev_sw_netstats_*()) for RX and TX packet counters. It also uses the device core stats (dev_core_stats_*()) for RX and TX drops. Let's consolidate that using the DSTATS infrastructure, which can handle both packet counters and packet drops. Statistics that don't fit DSTATS are still updated atomically with DEV_STATS_INC(). While there, convert the "len" variable of vxlan_encap_bypass() to unsigned int, to respect the types of skb->len and dev_dstats_[rt]x_add(). Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/145558b184b3cda77911ca5682b6eb83c3ffed8e.1733313925.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 9ea63059d52d..b46a799bd390 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1818,14 +1818,14 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) if (unlikely(!(vxlan->dev->flags & IFF_UP))) { rcu_read_unlock(); - dev_core_stats_rx_dropped_inc(vxlan->dev); + dev_dstats_rx_dropped(vxlan->dev); vxlan_vnifilter_count(vxlan, vni, vninode, VXLAN_VNI_STATS_RX_DROPS, 0); reason = SKB_DROP_REASON_DEV_READY; goto drop; } - dev_sw_netstats_rx_add(vxlan->dev, skb->len); + dev_dstats_rx_add(vxlan->dev, skb->len); vxlan_vnifilter_count(vxlan, vni, vninode, VXLAN_VNI_STATS_RX, skb->len); gro_cells_receive(&vxlan->gro_cells, skb); @@ -1880,7 +1880,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) goto out; if (!pskb_may_pull(skb, arp_hdr_len(dev))) { - dev_core_stats_tx_dropped_inc(dev); + dev_dstats_tx_dropped(dev); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); goto out; @@ -1938,7 +1938,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) reply->pkt_type = PACKET_HOST; if (netif_rx(reply) == NET_RX_DROP) { - dev_core_stats_rx_dropped_inc(dev); + dev_dstats_rx_dropped(dev); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_RX_DROPS, 0); } @@ -2097,7 +2097,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) goto out; if (netif_rx(reply) == NET_RX_DROP) { - dev_core_stats_rx_dropped_inc(dev); + dev_dstats_rx_dropped(dev); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_RX_DROPS, 0); } @@ -2271,8 +2271,8 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, { union vxlan_addr loopback; union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip; + unsigned int len = skb->len; struct net_device *dev; - int len = skb->len; skb->pkt_type = PACKET_HOST; skb->encapsulation = 0; @@ -2299,16 +2299,16 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, if ((dst_vxlan->cfg.flags & VXLAN_F_LEARN) && snoop) vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni); - dev_sw_netstats_tx_add(src_vxlan->dev, 1, len); + dev_dstats_tx_add(src_vxlan->dev, len); vxlan_vnifilter_count(src_vxlan, vni, NULL, VXLAN_VNI_STATS_TX, len); if (__netif_rx(skb) == NET_RX_SUCCESS) { - dev_sw_netstats_rx_add(dst_vxlan->dev, len); + dev_dstats_rx_add(dst_vxlan->dev, len); vxlan_vnifilter_count(dst_vxlan, vni, NULL, VXLAN_VNI_STATS_RX, len); } else { drop: - dev_core_stats_rx_dropped_inc(dev); + dev_dstats_rx_dropped(dev); vxlan_vnifilter_count(dst_vxlan, vni, NULL, VXLAN_VNI_STATS_RX_DROPS, 0); } @@ -2621,7 +2621,7 @@ out_unlock: return; drop: - dev_core_stats_tx_dropped_inc(dev); + dev_dstats_tx_dropped(dev); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); kfree_skb_reason(skb, reason); return; @@ -2666,7 +2666,7 @@ static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev, return; drop: - dev_core_stats_tx_dropped_inc(dev); + dev_dstats_tx_dropped(dev); vxlan_vnifilter_count(netdev_priv(dev), vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); dev_kfree_skb(skb); @@ -2704,7 +2704,7 @@ static netdev_tx_t vxlan_xmit_nhid(struct sk_buff *skb, struct net_device *dev, return NETDEV_TX_OK; drop: - dev_core_stats_tx_dropped_inc(dev); + dev_dstats_tx_dropped(dev); vxlan_vnifilter_count(netdev_priv(dev), vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); dev_kfree_skb(skb); @@ -2801,7 +2801,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) !is_multicast_ether_addr(eth->h_dest)) vxlan_fdb_miss(vxlan, eth->h_dest); - dev_core_stats_tx_dropped_inc(dev); + dev_dstats_tx_dropped(dev); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); kfree_skb_reason(skb, SKB_DROP_REASON_VXLAN_NO_REMOTE); @@ -3371,7 +3371,7 @@ static void vxlan_setup(struct net_device *dev) dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = ETH_MAX_MTU; - dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; INIT_LIST_HEAD(&vxlan->next); timer_setup(&vxlan->age_timer, vxlan_cleanup, TIMER_DEFERRABLE); From 6fa6de30224619f836f4cb1209b5af3f5319806e Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 4 Dec 2024 13:11:30 +0100 Subject: [PATCH 0205/1386] geneve: Handle stats using NETDEV_PCPU_STAT_DSTATS. Geneve uses the TSTATS infrastructure (dev_sw_netstats_*()) for RX packet counters. All other counters are handled using atomic increments with DEV_STATS_INC(). Let's convert packet stats handling to DSTATS, which has a per-cpu counter for packet drops too, to avoid the cost of atomic increments in these cases. Statistics that don't fit DSTATS are still updated atomically with DEV_STATS_INC(). Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/7af5c09f3c26f0f231fbe383822ca5d1ce0278fa.1733313925.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/geneve.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index bc658bc60885..642155cb8315 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -235,7 +235,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, vni_to_tunnel_id(gnvh->vni), gnvh->opt_len * 4); if (!tun_dst) { - DEV_STATS_INC(geneve->dev, rx_dropped); + dev_dstats_rx_dropped(geneve->dev); goto drop; } /* Update tunnel dst according to Geneve options. */ @@ -322,7 +322,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, len = skb->len; err = gro_cells_receive(&geneve->gro_cells, skb); if (likely(err == NET_RX_SUCCESS)) - dev_sw_netstats_rx_add(geneve->dev, len); + dev_dstats_rx_add(geneve->dev, len); return; drop: @@ -387,14 +387,14 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (unlikely((!geneve->cfg.inner_proto_inherit && inner_proto != htons(ETH_P_TEB)))) { - DEV_STATS_INC(geneve->dev, rx_dropped); + dev_dstats_rx_dropped(geneve->dev); goto drop; } opts_len = geneveh->opt_len * 4; if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto, !net_eq(geneve->net, dev_net(geneve->dev)))) { - DEV_STATS_INC(geneve->dev, rx_dropped); + dev_dstats_rx_dropped(geneve->dev); goto drop; } @@ -1023,7 +1023,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { netdev_dbg(dev, "no tunnel metadata\n"); dev_kfree_skb(skb); - DEV_STATS_INC(dev, tx_dropped); + dev_dstats_tx_dropped(dev); return NETDEV_TX_OK; } } else { @@ -1202,7 +1202,7 @@ static void geneve_setup(struct net_device *dev) dev->hw_features |= NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; - dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; /* MTU range: 68 - (something less than 65535) */ dev->min_mtu = ETH_MIN_MTU; /* The max_mtu calculation does not take account of GENEVE From c77200c074917d0fd51e5c029c50c76c07b6d310 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 4 Dec 2024 13:11:32 +0100 Subject: [PATCH 0206/1386] bareudp: Handle stats using NETDEV_PCPU_STAT_DSTATS. Bareudp uses the TSTATS infrastructure (dev_sw_netstats_*()) for RX packet counters. It was also recently converted to use the device core stats (dev_core_stats_*()) for RX and TX drops (see commit 788d5d655bc9 ("bareudp: Use pcpu stats to update rx_dropped counter.")). Since core stats are to be avoided in drivers, and for consistency with VXLAN and Geneve, let's convert packet stats handling to DSTATS, which can handle RX/TX stats and packet drops. Statistics that don't fit DSTATS are still updated atomically with DEV_STATS_INC(). Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/0f4f8448db3ff449ac6e939872b28cf3f8982da7.1733313925.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index a2abfade82dd..70814303aab8 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -84,7 +84,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (skb_copy_bits(skb, BAREUDP_BASE_HLEN, &ipversion, sizeof(ipversion))) { - dev_core_stats_rx_dropped_inc(bareudp->dev); + dev_dstats_rx_dropped(bareudp->dev); goto drop; } ipversion >>= 4; @@ -94,7 +94,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) } else if (ipversion == 6 && bareudp->multi_proto_mode) { proto = htons(ETH_P_IPV6); } else { - dev_core_stats_rx_dropped_inc(bareudp->dev); + dev_dstats_rx_dropped(bareudp->dev); goto drop; } } else if (bareudp->ethertype == htons(ETH_P_MPLS_UC)) { @@ -108,7 +108,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) ipv4_is_multicast(tunnel_hdr->daddr)) { proto = htons(ETH_P_MPLS_MC); } else { - dev_core_stats_rx_dropped_inc(bareudp->dev); + dev_dstats_rx_dropped(bareudp->dev); goto drop; } } else { @@ -124,7 +124,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) (addr_type & IPV6_ADDR_MULTICAST)) { proto = htons(ETH_P_MPLS_MC); } else { - dev_core_stats_rx_dropped_inc(bareudp->dev); + dev_dstats_rx_dropped(bareudp->dev); goto drop; } } @@ -136,7 +136,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) proto, !net_eq(bareudp->net, dev_net(bareudp->dev)))) { - dev_core_stats_rx_dropped_inc(bareudp->dev); + dev_dstats_rx_dropped(bareudp->dev); goto drop; } @@ -144,7 +144,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) tun_dst = udp_tun_rx_dst(skb, family, key, 0, 0); if (!tun_dst) { - dev_core_stats_rx_dropped_inc(bareudp->dev); + dev_dstats_rx_dropped(bareudp->dev); goto drop; } skb_dst_set(skb, &tun_dst->dst); @@ -194,7 +194,7 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) len = skb->len; err = gro_cells_receive(&bareudp->gro_cells, skb); if (likely(err == NET_RX_SUCCESS)) - dev_sw_netstats_rx_add(bareudp->dev, len); + dev_dstats_rx_add(bareudp->dev, len); return 0; drop: @@ -589,7 +589,7 @@ static void bareudp_setup(struct net_device *dev) dev->priv_flags |= IFF_NO_QUEUE; dev->lltx = true; dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; - dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; } static int bareudp_validate(struct nlattr *tb[], struct nlattr *data[], From 00ab246750821b226f14ebc94ad21431dc82820b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Dec 2024 11:30:56 +0100 Subject: [PATCH 0207/1386] tools: ynl-gen-c: annotate valid choices for --mode This makes argparse validate the input and helps users understand which modes are possible. Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20241206113100.e2ab5cf6937c.Ie149a0ca5df713860964b44fe9d9ae547f2e1553@changeid Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 8098bcbb6f40..7f6e5157770d 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2706,7 +2706,8 @@ def find_kernel_root(full_path): def main(): parser = argparse.ArgumentParser(description='Netlink simple parsing generator') - parser.add_argument('--mode', dest='mode', type=str, required=True) + parser.add_argument('--mode', dest='mode', type=str, required=True, + choices=('user', 'kernel', 'uapi')) parser.add_argument('--spec', dest='spec', type=str, required=True) parser.add_argument('--header', dest='header', action='store_true', default=None) parser.add_argument('--source', dest='header', action='store_false') From 81d89e6e88d5d592c1792940753d69d9753b3a8a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 6 Dec 2024 11:30:57 +0100 Subject: [PATCH 0208/1386] tools: ynl-gen-c: don't require -o argument Without -o the tool currently crashes, but it's not marked as required. The only thing we can't do without it is to generate the correct #include for user source files, but we can put a placeholder instead. Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20241206113100.89d35bf124d6.I9228fb704e6d5c9d8e046ef15025a47a48439c1e@changeid Signed-off-by: Jakub Kicinski --- tools/net/ynl/ynl-gen-c.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 7f6e5157770d..ec2288948795 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2761,7 +2761,10 @@ def main(): cw.p('#define ' + hdr_prot) cw.nl() - hdr_file=os.path.basename(args.out_file[:-2]) + ".h" + if args.out_file: + hdr_file = os.path.basename(args.out_file[:-2]) + ".h" + else: + hdr_file = "generated_header_file.h" if args.mode == 'kernel': cw.p('#include ') From 3ca459eaba1bf96a8c7878de84fa8872259a01e3 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Thu, 5 Dec 2024 10:36:14 +0300 Subject: [PATCH 0209/1386] tun: fix group permission check Currently tun checks the group permission even if the user have matched. Besides going against the usual permission semantic, this has a very interesting implication: if the tun group is not among the supplementary groups of the tun user, then effectively no one can access the tun device. CAP_SYS_ADMIN still can, but its the same as not setting the tun ownership. This patch relaxes the group checking so that either the user match or the group match is enough. This avoids the situation when no one can access the device even though the ownership is properly set. Also I simplified the logic by removing the redundant inversions: tun_not_capable() --> !tun_capable() Signed-off-by: Stas Sergeev Reviewed-by: Willem de Bruijn Acked-by: Jason Wang Link: https://patch.msgid.link/20241205073614.294773-1-stsp2@yandex.ru Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index d7a865ef370b..8e94df88392c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -574,14 +574,18 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, return ret; } -static inline bool tun_not_capable(struct tun_struct *tun) +static inline bool tun_capable(struct tun_struct *tun) { const struct cred *cred = current_cred(); struct net *net = dev_net(tun->dev); - return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || - (gid_valid(tun->group) && !in_egroup_p(tun->group))) && - !ns_capable(net->user_ns, CAP_NET_ADMIN); + if (ns_capable(net->user_ns, CAP_NET_ADMIN)) + return 1; + if (uid_valid(tun->owner) && uid_eq(cred->euid, tun->owner)) + return 1; + if (gid_valid(tun->group) && in_egroup_p(tun->group)) + return 1; + return 0; } static void tun_set_real_num_queues(struct tun_struct *tun) @@ -2778,7 +2782,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) !!(tun->flags & IFF_MULTI_QUEUE)) return -EINVAL; - if (tun_not_capable(tun)) + if (!tun_capable(tun)) return -EPERM; err = security_tun_dev_open(tun->security); if (err < 0) From 6561f0e547be221f411fda5eddfcc5bd8bb058a5 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Dec 2024 09:42:24 +0000 Subject: [PATCH 0210/1386] net: pcs: pcs-lynx: implement pcs_inband_caps() method Report the PCS in-band capabilities to phylink for the Lynx PCS. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tJ8NM-006L5J-AH@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-lynx.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c index b79aedad855b..767a8c0714ac 100644 --- a/drivers/net/pcs/pcs-lynx.c +++ b/drivers/net/pcs/pcs-lynx.c @@ -35,6 +35,27 @@ enum sgmii_speed { #define phylink_pcs_to_lynx(pl_pcs) container_of((pl_pcs), struct lynx_pcs, pcs) #define lynx_to_phylink_pcs(lynx) (&(lynx)->pcs) +static unsigned int lynx_pcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface) +{ + switch (interface) { + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_QSGMII: + return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE; + + case PHY_INTERFACE_MODE_10GBASER: + case PHY_INTERFACE_MODE_2500BASEX: + return LINK_INBAND_DISABLE; + + case PHY_INTERFACE_MODE_USXGMII: + return LINK_INBAND_ENABLE; + + default: + return 0; + } +} + static void lynx_pcs_get_state_usxgmii(struct mdio_device *pcs, struct phylink_link_state *state) { @@ -306,6 +327,7 @@ static void lynx_pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, } static const struct phylink_pcs_ops lynx_pcs_phylink_ops = { + .pcs_inband_caps = lynx_pcs_inband_caps, .pcs_get_state = lynx_pcs_get_state, .pcs_config = lynx_pcs_config, .pcs_an_restart = lynx_pcs_an_restart, From 520d29bdda86915b3caf8c72825a574bff212553 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Dec 2024 09:42:29 +0000 Subject: [PATCH 0211/1386] net: pcs: pcs-mtk-lynxi: implement pcs_inband_caps() method Report the PCS in-band capabilities to phylink for the LynxI PCS. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tJ8NR-006L5P-E3@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-mtk-lynxi.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/pcs/pcs-mtk-lynxi.c b/drivers/net/pcs/pcs-mtk-lynxi.c index 4f63abe638c4..7de804535229 100644 --- a/drivers/net/pcs/pcs-mtk-lynxi.c +++ b/drivers/net/pcs/pcs-mtk-lynxi.c @@ -88,6 +88,21 @@ static struct mtk_pcs_lynxi *pcs_to_mtk_pcs_lynxi(struct phylink_pcs *pcs) return container_of(pcs, struct mtk_pcs_lynxi, pcs); } +static unsigned int mtk_pcs_lynxi_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface) +{ + switch (interface) { + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_QSGMII: + return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE; + + default: + return 0; + } +} + static void mtk_pcs_lynxi_get_state(struct phylink_pcs *pcs, struct phylink_link_state *state) { @@ -241,6 +256,7 @@ static void mtk_pcs_lynxi_disable(struct phylink_pcs *pcs) } static const struct phylink_pcs_ops mtk_pcs_lynxi_ops = { + .pcs_inband_caps = mtk_pcs_lynxi_inband_caps, .pcs_get_state = mtk_pcs_lynxi_get_state, .pcs_config = mtk_pcs_lynxi_config, .pcs_an_restart = mtk_pcs_lynxi_restart_an, From 484d0170d6c6bbb5213d037664e9a551f793bacd Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Dec 2024 09:42:34 +0000 Subject: [PATCH 0212/1386] net: pcs: xpcs: implement pcs_inband_caps() method Report the PCS inband capabilities to phylink for XPCS. Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tJ8NW-006L5V-I9@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-xpcs.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index 7246a910728d..f70ca39f0905 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -567,6 +567,33 @@ static int xpcs_validate(struct phylink_pcs *pcs, unsigned long *supported, return 0; } +static unsigned int xpcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface) +{ + struct dw_xpcs *xpcs = phylink_pcs_to_xpcs(pcs); + const struct dw_xpcs_compat *compat; + + compat = xpcs_find_compat(xpcs, interface); + if (!compat) + return 0; + + switch (compat->an_mode) { + case DW_AN_C73: + return LINK_INBAND_ENABLE; + + case DW_AN_C37_SGMII: + case DW_AN_C37_1000BASEX: + return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE; + + case DW_10GBASER: + case DW_2500BASEX: + return LINK_INBAND_DISABLE; + + default: + return 0; + } +} + void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces) { const struct dw_xpcs_compat *compat; @@ -1306,6 +1333,7 @@ static const struct dw_xpcs_desc xpcs_desc_list[] = { static const struct phylink_pcs_ops xpcs_phylink_ops = { .pcs_validate = xpcs_validate, + .pcs_inband_caps = xpcs_inband_caps, .pcs_pre_config = xpcs_pre_config, .pcs_config = xpcs_config, .pcs_get_state = xpcs_get_state, From 7ea2745766d776866cfbc981b21ed3cfdf50124e Mon Sep 17 00:00:00 2001 From: Justin Lai Date: Fri, 6 Dec 2024 16:48:51 +0800 Subject: [PATCH 0213/1386] rtase: Refine the if statement Refine the if statement to improve readability. Signed-off-by: Justin Lai Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20241206084851.760475-1-justinlai0215@realtek.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/rtase/rtase_main.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/realtek/rtase/rtase_main.c b/drivers/net/ethernet/realtek/rtase/rtase_main.c index 6106aa5333bc..585d0b21c9e0 100644 --- a/drivers/net/ethernet/realtek/rtase/rtase_main.c +++ b/drivers/net/ethernet/realtek/rtase/rtase_main.c @@ -2018,7 +2018,7 @@ static int rtase_init_board(struct pci_dev *pdev, struct net_device **dev_out, SET_NETDEV_DEV(dev, &pdev->dev); ret = pci_enable_device(pdev); - if (ret < 0) + if (ret) goto err_out_free_dev; /* make sure PCI base addr 1 is MMIO */ @@ -2034,7 +2034,7 @@ static int rtase_init_board(struct pci_dev *pdev, struct net_device **dev_out, } ret = pci_request_regions(pdev, KBUILD_MODNAME); - if (ret < 0) + if (ret) goto err_out_disable; ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); @@ -2110,7 +2110,7 @@ static int rtase_init_one(struct pci_dev *pdev, dev_dbg(&pdev->dev, "Automotive Switch Ethernet driver loaded\n"); ret = rtase_init_board(pdev, &dev, &ioaddr); - if (ret != 0) + if (ret) return ret; tp = netdev_priv(dev); @@ -2120,7 +2120,7 @@ static int rtase_init_one(struct pci_dev *pdev, /* identify chip attached to board */ ret = rtase_check_mac_version_valid(tp); - if (ret != 0) { + if (ret) { dev_err(&pdev->dev, "unknown chip version: 0x%08x, contact rtase maintainers (see MAINTAINERS file)\n", tp->hw_ver); @@ -2131,7 +2131,7 @@ static int rtase_init_one(struct pci_dev *pdev, rtase_init_hardware(tp); ret = rtase_alloc_interrupt(pdev, tp); - if (ret < 0) { + if (ret) { dev_err(&pdev->dev, "unable to alloc MSIX/MSI\n"); goto err_out_del_napi; } @@ -2176,7 +2176,7 @@ static int rtase_init_one(struct pci_dev *pdev, netif_carrier_off(dev); ret = register_netdev(dev); - if (ret != 0) + if (ret) goto err_out_free_dma; netdev_dbg(dev, "%pM, IRQ %d\n", dev->dev_addr, dev->irq); From 195c3d4631816f02071f0e01d2d2def51cf5067a Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 4 Dec 2024 11:26:52 +0530 Subject: [PATCH 0214/1386] octeontx2-pf: map skb data as device writeable Crypto hardware need write permission for in-place encrypt or decrypt operation on skb-data to support IPsec crypto offload. That patch uses skb_unshare to make skb data writeable for ipsec crypto offload and map skb fragment memory as device read-write. Signed-off-by: Bharat Bhushan Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/nic/otx2_txrx.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 04bc06a80e23..3b0457e52a6a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "otx2_reg.h" #include "otx2_common.h" @@ -83,10 +84,17 @@ static unsigned int frag_num(unsigned int i) static dma_addr_t otx2_dma_map_skb_frag(struct otx2_nic *pfvf, struct sk_buff *skb, int seg, int *len) { + enum dma_data_direction dir = DMA_TO_DEVICE; const skb_frag_t *frag; struct page *page; int offset; + /* Crypto hardware need write permission for ipsec crypto offload */ + if (unlikely(xfrm_offload(skb))) { + dir = DMA_BIDIRECTIONAL; + skb = skb_unshare(skb, GFP_ATOMIC); + } + /* First segment is always skb->data */ if (!seg) { page = virt_to_page(skb->data); @@ -98,16 +106,22 @@ static dma_addr_t otx2_dma_map_skb_frag(struct otx2_nic *pfvf, offset = skb_frag_off(frag); *len = skb_frag_size(frag); } - return otx2_dma_map_page(pfvf, page, offset, *len, DMA_TO_DEVICE); + return otx2_dma_map_page(pfvf, page, offset, *len, dir); } static void otx2_dma_unmap_skb_frags(struct otx2_nic *pfvf, struct sg_list *sg) { + enum dma_data_direction dir = DMA_TO_DEVICE; + struct sk_buff *skb = NULL; int seg; + skb = (struct sk_buff *)sg->skb; + if (unlikely(xfrm_offload(skb))) + dir = DMA_BIDIRECTIONAL; + for (seg = 0; seg < sg->num_segs; seg++) { otx2_dma_unmap_page(pfvf, sg->dma_addr[seg], - sg->size[seg], DMA_TO_DEVICE); + sg->size[seg], dir); } sg->num_segs = 0; } From c460b7442a6b020c6f0a2a5f837436b1ce56e95b Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 4 Dec 2024 11:26:53 +0530 Subject: [PATCH 0215/1386] octeontx2-pf: Move skb fragment map/unmap to common code Move skb fragment map/unmap function to common file so as to reuse same for outbound IPsec crypto offload Signed-off-by: Bharat Bhushan Signed-off-by: David S. Miller --- .../marvell/octeontx2/nic/otx2_common.c | 46 +++++++++++++++++++ .../marvell/octeontx2/nic/otx2_common.h | 4 ++ .../marvell/octeontx2/nic/otx2_txrx.c | 46 ------------------- 3 files changed, 50 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 523ecb798a7a..971115a5d2cc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "otx2_reg.h" #include "otx2_common.h" @@ -1947,3 +1948,48 @@ EXPORT_SYMBOL(otx2_mbox_up_handler_ ## _fn_name); MBOX_UP_CGX_MESSAGES MBOX_UP_MCS_MESSAGES #undef M + +dma_addr_t otx2_dma_map_skb_frag(struct otx2_nic *pfvf, + struct sk_buff *skb, int seg, int *len) +{ + enum dma_data_direction dir = DMA_TO_DEVICE; + const skb_frag_t *frag; + struct page *page; + int offset; + + /* Crypto hardware need write permission for ipsec crypto offload */ + if (unlikely(xfrm_offload(skb))) { + dir = DMA_BIDIRECTIONAL; + skb = skb_unshare(skb, GFP_ATOMIC); + } + + /* First segment is always skb->data */ + if (!seg) { + page = virt_to_page(skb->data); + offset = offset_in_page(skb->data); + *len = skb_headlen(skb); + } else { + frag = &skb_shinfo(skb)->frags[seg - 1]; + page = skb_frag_page(frag); + offset = skb_frag_off(frag); + *len = skb_frag_size(frag); + } + return otx2_dma_map_page(pfvf, page, offset, *len, dir); +} + +void otx2_dma_unmap_skb_frags(struct otx2_nic *pfvf, struct sg_list *sg) +{ + enum dma_data_direction dir = DMA_TO_DEVICE; + struct sk_buff *skb = NULL; + int seg; + + skb = (struct sk_buff *)sg->skb; + if (unlikely(xfrm_offload(skb))) + dir = DMA_BIDIRECTIONAL; + + for (seg = 0; seg < sg->num_segs; seg++) { + otx2_dma_unmap_page(pfvf, sg->dma_addr[seg], + sg->size[seg], dir); + } + sg->num_segs = 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 566848663fea..bb78d825046d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -1149,4 +1149,8 @@ static inline int mcam_entry_cmp(const void *a, const void *b) { return *(u16 *)a - *(u16 *)b; } + +dma_addr_t otx2_dma_map_skb_frag(struct otx2_nic *pfvf, + struct sk_buff *skb, int seg, int *len); +void otx2_dma_unmap_skb_frags(struct otx2_nic *pfvf, struct sg_list *sg); #endif /* OTX2_COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 3b0457e52a6a..a49041e55c33 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -11,7 +11,6 @@ #include #include #include -#include #include "otx2_reg.h" #include "otx2_common.h" @@ -81,51 +80,6 @@ static unsigned int frag_num(unsigned int i) #endif } -static dma_addr_t otx2_dma_map_skb_frag(struct otx2_nic *pfvf, - struct sk_buff *skb, int seg, int *len) -{ - enum dma_data_direction dir = DMA_TO_DEVICE; - const skb_frag_t *frag; - struct page *page; - int offset; - - /* Crypto hardware need write permission for ipsec crypto offload */ - if (unlikely(xfrm_offload(skb))) { - dir = DMA_BIDIRECTIONAL; - skb = skb_unshare(skb, GFP_ATOMIC); - } - - /* First segment is always skb->data */ - if (!seg) { - page = virt_to_page(skb->data); - offset = offset_in_page(skb->data); - *len = skb_headlen(skb); - } else { - frag = &skb_shinfo(skb)->frags[seg - 1]; - page = skb_frag_page(frag); - offset = skb_frag_off(frag); - *len = skb_frag_size(frag); - } - return otx2_dma_map_page(pfvf, page, offset, *len, dir); -} - -static void otx2_dma_unmap_skb_frags(struct otx2_nic *pfvf, struct sg_list *sg) -{ - enum dma_data_direction dir = DMA_TO_DEVICE; - struct sk_buff *skb = NULL; - int seg; - - skb = (struct sk_buff *)sg->skb; - if (unlikely(xfrm_offload(skb))) - dir = DMA_BIDIRECTIONAL; - - for (seg = 0; seg < sg->num_segs; seg++) { - otx2_dma_unmap_page(pfvf, sg->dma_addr[seg], - sg->size[seg], dir); - } - sg->num_segs = 0; -} - static void otx2_xdp_snd_pkt_handler(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, struct nix_cqe_tx_s *cqe) From a7ef63dbd5886c396aa1130d5ce42634ab1db91e Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 4 Dec 2024 11:26:54 +0530 Subject: [PATCH 0216/1386] octeontx2-af: Disable backpressure between CPT and NIX NIX can assert backpressure to CPT on the NIX<=>CPT link. Keep the backpressure disabled for now. NIX block anyways handles backpressure asserted by MAC due to PFC or flow control pkts. Signed-off-by: Bharat Bhushan Signed-off-by: David S. Miller --- .../net/ethernet/marvell/octeontx2/af/mbox.h | 4 ++ .../ethernet/marvell/octeontx2/af/rvu_nix.c | 68 ++++++++++++++++--- .../marvell/octeontx2/nic/otx2_common.c | 44 ++++++++++-- .../marvell/octeontx2/nic/otx2_common.h | 1 + .../marvell/octeontx2/nic/otx2_dcbnl.c | 3 + .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 3 + 6 files changed, 106 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 62c07407eb94..005ca8a056c0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -313,6 +313,10 @@ M(NIX_BANDPROF_FREE, 0x801e, nix_bandprof_free, nix_bandprof_free_req, \ msg_rsp) \ M(NIX_BANDPROF_GET_HWINFO, 0x801f, nix_bandprof_get_hwinfo, msg_req, \ nix_bandprof_get_hwinfo_rsp) \ +M(NIX_CPT_BP_ENABLE, 0x8020, nix_cpt_bp_enable, nix_bp_cfg_req, \ + nix_bp_cfg_rsp) \ +M(NIX_CPT_BP_DISABLE, 0x8021, nix_cpt_bp_disable, nix_bp_cfg_req, \ + msg_rsp) \ M(NIX_READ_INLINE_IPSEC_CFG, 0x8023, nix_read_inline_ipsec_cfg, \ msg_req, nix_inline_ipsec_cfg) \ M(NIX_MCAST_GRP_CREATE, 0x802b, nix_mcast_grp_create, nix_mcast_grp_create_req, \ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index a5d1e2bddd58..613655fcd34f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -569,9 +569,17 @@ void rvu_nix_flr_free_bpids(struct rvu *rvu, u16 pcifunc) mutex_unlock(&rvu->rsrc_lock); } -int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu, - struct nix_bp_cfg_req *req, - struct msg_rsp *rsp) +static u16 nix_get_channel(u16 chan, bool cpt_link) +{ + /* CPT channel for a given link channel is always + * assumed to be BIT(11) set in link channel. + */ + return cpt_link ? chan | BIT(11) : chan; +} + +static int nix_bp_disable(struct rvu *rvu, + struct nix_bp_cfg_req *req, + struct msg_rsp *rsp, bool cpt_link) { u16 pcifunc = req->hdr.pcifunc; int blkaddr, pf, type, err; @@ -579,6 +587,7 @@ int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu, struct rvu_pfvf *pfvf; struct nix_hw *nix_hw; struct nix_bp *bp; + u16 chan_v; u64 cfg; pf = rvu_get_pf(pcifunc); @@ -589,6 +598,9 @@ int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu, if (is_sdp_pfvf(pcifunc)) type = NIX_INTF_TYPE_SDP; + if (cpt_link && !rvu->hw->cpt_links) + return 0; + pfvf = rvu_get_pfvf(rvu, pcifunc); err = nix_get_struct_ptrs(rvu, pcifunc, &nix_hw, &blkaddr); if (err) @@ -597,8 +609,9 @@ int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu, bp = &nix_hw->bp; chan_base = pfvf->rx_chan_base + req->chan_base; for (chan = chan_base; chan < (chan_base + req->chan_cnt); chan++) { - cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan)); - rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan), + chan_v = nix_get_channel(chan, cpt_link); + cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan_v)); + rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan_v), cfg & ~BIT_ULL(16)); if (type == NIX_INTF_TYPE_LBK) { @@ -617,6 +630,20 @@ int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu, return 0; } +int rvu_mbox_handler_nix_bp_disable(struct rvu *rvu, + struct nix_bp_cfg_req *req, + struct msg_rsp *rsp) +{ + return nix_bp_disable(rvu, req, rsp, false); +} + +int rvu_mbox_handler_nix_cpt_bp_disable(struct rvu *rvu, + struct nix_bp_cfg_req *req, + struct msg_rsp *rsp) +{ + return nix_bp_disable(rvu, req, rsp, true); +} + static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req, int type, int chan_id) { @@ -696,15 +723,17 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req, return bpid; } -int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu, - struct nix_bp_cfg_req *req, - struct nix_bp_cfg_rsp *rsp) +static int nix_bp_enable(struct rvu *rvu, + struct nix_bp_cfg_req *req, + struct nix_bp_cfg_rsp *rsp, + bool cpt_link) { int blkaddr, pf, type, chan_id = 0; u16 pcifunc = req->hdr.pcifunc; struct rvu_pfvf *pfvf; u16 chan_base, chan; s16 bpid, bpid_base; + u16 chan_v; u64 cfg; pf = rvu_get_pf(pcifunc); @@ -717,6 +746,9 @@ int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu, type != NIX_INTF_TYPE_SDP) return 0; + if (cpt_link && !rvu->hw->cpt_links) + return 0; + pfvf = rvu_get_pfvf(rvu, pcifunc); blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); @@ -730,9 +762,11 @@ int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu, return -EINVAL; } - cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan)); + chan_v = nix_get_channel(chan, cpt_link); + + cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan_v)); cfg &= ~GENMASK_ULL(8, 0); - rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan), + rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(chan_v), cfg | (bpid & GENMASK_ULL(8, 0)) | BIT_ULL(16)); chan_id++; bpid = rvu_nix_get_bpid(rvu, req, type, chan_id); @@ -750,6 +784,20 @@ int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu, return 0; } +int rvu_mbox_handler_nix_bp_enable(struct rvu *rvu, + struct nix_bp_cfg_req *req, + struct nix_bp_cfg_rsp *rsp) +{ + return nix_bp_enable(rvu, req, rsp, false); +} + +int rvu_mbox_handler_nix_cpt_bp_enable(struct rvu *rvu, + struct nix_bp_cfg_req *req, + struct nix_bp_cfg_rsp *rsp) +{ + return nix_bp_enable(rvu, req, rsp, true); +} + static void nix_setup_lso_tso_l3(struct rvu *rvu, int blkaddr, u64 format, bool v4, u64 *fidx) { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 971115a5d2cc..4c8774899eaf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -17,6 +17,11 @@ #include "otx2_struct.h" #include "cn10k.h" +static bool otx2_is_pfc_enabled(struct otx2_nic *pfvf) +{ + return IS_ENABLED(CONFIG_DCB) && !!pfvf->pfc_en; +} + static void otx2_nix_rq_op_stats(struct queue_stats *stats, struct otx2_nic *pfvf, int qidx) { @@ -1723,18 +1728,43 @@ int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable) return -ENOMEM; req->chan_base = 0; -#ifdef CONFIG_DCB - req->chan_cnt = pfvf->pfc_en ? IEEE_8021QAZ_MAX_TCS : 1; - req->bpid_per_chan = pfvf->pfc_en ? 1 : 0; -#else - req->chan_cnt = 1; - req->bpid_per_chan = 0; -#endif + if (otx2_is_pfc_enabled(pfvf)) { + req->chan_cnt = IEEE_8021QAZ_MAX_TCS; + req->bpid_per_chan = 1; + } else { + req->chan_cnt = 1; + req->bpid_per_chan = 0; + } return otx2_sync_mbox_msg(&pfvf->mbox); } EXPORT_SYMBOL(otx2_nix_config_bp); +int otx2_nix_cpt_config_bp(struct otx2_nic *pfvf, bool enable) +{ + struct nix_bp_cfg_req *req; + + if (enable) + req = otx2_mbox_alloc_msg_nix_cpt_bp_enable(&pfvf->mbox); + else + req = otx2_mbox_alloc_msg_nix_cpt_bp_disable(&pfvf->mbox); + + if (!req) + return -ENOMEM; + + req->chan_base = 0; + if (otx2_is_pfc_enabled(pfvf)) { + req->chan_cnt = IEEE_8021QAZ_MAX_TCS; + req->bpid_per_chan = 1; + } else { + req->chan_cnt = 1; + req->bpid_per_chan = 0; + } + + return otx2_sync_mbox_msg(&pfvf->mbox); +} +EXPORT_SYMBOL(otx2_nix_cpt_config_bp); + /* Mbox message handlers */ void mbox_handler_cgx_stats(struct otx2_nic *pfvf, struct cgx_stats_rsp *rsp) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index bb78d825046d..37a32ac0dae7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -985,6 +985,7 @@ int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable); void otx2_ctx_disable(struct mbox *mbox, int type, bool npa); int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable); +int otx2_nix_cpt_config_bp(struct otx2_nic *pfvf, bool enable); void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, int qidx); void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq); int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c index 294fba58b670..f110dfa42360 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c @@ -435,6 +435,9 @@ process_pfc: return err; } + /* Default disable backpressure on NIX-CPT */ + otx2_nix_cpt_config_bp(pfvf, false); + /* Request Per channel Bpids */ if (pfc->pfc_en) otx2_nix_config_bp(pfvf, true); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index e310f99b1736..8ec0296dee84 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1551,6 +1551,9 @@ int otx2_init_hw_resources(struct otx2_nic *pf) if (err) goto err_free_npa_lf; + /* Default disable backpressure on NIX-CPT */ + otx2_nix_cpt_config_bp(pf, false); + /* Enable backpressure for CGX mapped PF/VFs */ if (!is_otx2_lbkvf(pf->pdev)) otx2_nix_config_bp(pf, true); From fe079ab05d49ffaac1e333cb38cf2c2792f7cf40 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 4 Dec 2024 11:26:55 +0530 Subject: [PATCH 0217/1386] cn10k-ipsec: Init hardware for outbound ipsec crypto offload One crypto hardware logical function (cpt-lf) per netdev is required for outbound ipsec crypto offload. Allocate, attach and initialize one crypto hardware function when enabling outbound ipsec crypto offload. Crypto hardware function will be detached and freed on disabling outbound ipsec crypto offload. Signed-off-by: Bharat Bhushan Signed-off-by: David S. Miller --- MAINTAINERS | 1 + .../ethernet/marvell/octeontx2/nic/Makefile | 1 + .../marvell/octeontx2/nic/cn10k_ipsec.c | 415 ++++++++++++++++++ .../marvell/octeontx2/nic/cn10k_ipsec.h | 109 +++++ .../marvell/octeontx2/nic/otx2_common.h | 18 + .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 14 +- .../ethernet/marvell/octeontx2/nic/otx2_vf.c | 10 +- 7 files changed, 566 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c create mode 100644 drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h diff --git a/MAINTAINERS b/MAINTAINERS index 79756f2100e0..2d75560e64ac 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13942,6 +13942,7 @@ M: Sunil Goutham M: Geetha sowjanya M: Subbaraya Sundeep M: hariprasad +M: Bharat Bhushan L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/marvell/octeontx2/nic/ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index dbc971266865..cb6513ab35e7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -15,5 +15,6 @@ rvu_rep-y := rep.o rvu_nicpf-$(CONFIG_DCB) += otx2_dcbnl.o rvu_nicpf-$(CONFIG_MACSEC) += cn10k_macsec.o +rvu_nicpf-$(CONFIG_XFRM_OFFLOAD) += cn10k_ipsec.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c new file mode 100644 index 000000000000..e09ce42075c7 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell IPSEC offload driver + * + * Copyright (C) 2024 Marvell. + */ + +#include +#include +#include + +#include "otx2_common.h" +#include "cn10k_ipsec.h" + +static bool is_dev_support_ipsec_offload(struct pci_dev *pdev) +{ + return is_dev_cn10ka_b0(pdev) || is_dev_cn10kb(pdev); +} + +static bool cn10k_cpt_device_set_inuse(struct otx2_nic *pf) +{ + enum cn10k_cpt_hw_state_e state; + + while (true) { + state = atomic_cmpxchg(&pf->ipsec.cpt_state, + CN10K_CPT_HW_AVAILABLE, + CN10K_CPT_HW_IN_USE); + if (state == CN10K_CPT_HW_AVAILABLE) + return true; + if (state == CN10K_CPT_HW_UNAVAILABLE) + return false; + + mdelay(1); + } +} + +static void cn10k_cpt_device_set_available(struct otx2_nic *pf) +{ + atomic_set(&pf->ipsec.cpt_state, CN10K_CPT_HW_AVAILABLE); +} + +static void cn10k_cpt_device_set_unavailable(struct otx2_nic *pf) +{ + atomic_set(&pf->ipsec.cpt_state, CN10K_CPT_HW_UNAVAILABLE); +} + +static int cn10k_outb_cptlf_attach(struct otx2_nic *pf) +{ + struct rsrc_attach *attach; + int ret = -ENOMEM; + + mutex_lock(&pf->mbox.lock); + /* Get memory to put this msg */ + attach = otx2_mbox_alloc_msg_attach_resources(&pf->mbox); + if (!attach) + goto unlock; + + attach->cptlfs = true; + attach->modify = true; + + /* Send attach request to AF */ + ret = otx2_sync_mbox_msg(&pf->mbox); + +unlock: + mutex_unlock(&pf->mbox.lock); + return ret; +} + +static int cn10k_outb_cptlf_detach(struct otx2_nic *pf) +{ + struct rsrc_detach *detach; + int ret = -ENOMEM; + + mutex_lock(&pf->mbox.lock); + detach = otx2_mbox_alloc_msg_detach_resources(&pf->mbox); + if (!detach) + goto unlock; + + detach->partial = true; + detach->cptlfs = true; + + /* Send detach request to AF */ + ret = otx2_sync_mbox_msg(&pf->mbox); + +unlock: + mutex_unlock(&pf->mbox.lock); + return ret; +} + +static int cn10k_outb_cptlf_alloc(struct otx2_nic *pf) +{ + struct cpt_lf_alloc_req_msg *req; + int ret = -ENOMEM; + + mutex_lock(&pf->mbox.lock); + req = otx2_mbox_alloc_msg_cpt_lf_alloc(&pf->mbox); + if (!req) + goto unlock; + + /* PF function */ + req->nix_pf_func = pf->pcifunc; + /* Enable SE-IE Engine Group */ + req->eng_grpmsk = 1 << CN10K_DEF_CPT_IPSEC_EGRP; + + ret = otx2_sync_mbox_msg(&pf->mbox); + +unlock: + mutex_unlock(&pf->mbox.lock); + return ret; +} + +static void cn10k_outb_cptlf_free(struct otx2_nic *pf) +{ + mutex_lock(&pf->mbox.lock); + otx2_mbox_alloc_msg_cpt_lf_free(&pf->mbox); + otx2_sync_mbox_msg(&pf->mbox); + mutex_unlock(&pf->mbox.lock); +} + +static int cn10k_outb_cptlf_config(struct otx2_nic *pf) +{ + struct cpt_inline_ipsec_cfg_msg *req; + int ret = -ENOMEM; + + mutex_lock(&pf->mbox.lock); + req = otx2_mbox_alloc_msg_cpt_inline_ipsec_cfg(&pf->mbox); + if (!req) + goto unlock; + + req->dir = CPT_INLINE_OUTBOUND; + req->enable = 1; + req->nix_pf_func = pf->pcifunc; + ret = otx2_sync_mbox_msg(&pf->mbox); +unlock: + mutex_unlock(&pf->mbox.lock); + return ret; +} + +static void cn10k_outb_cptlf_iq_enable(struct otx2_nic *pf) +{ + u64 reg_val; + + /* Set Execution Enable of instruction queue */ + reg_val = otx2_read64(pf, CN10K_CPT_LF_INPROG); + reg_val |= BIT_ULL(16); + otx2_write64(pf, CN10K_CPT_LF_INPROG, reg_val); + + /* Set iqueue's enqueuing */ + reg_val = otx2_read64(pf, CN10K_CPT_LF_CTL); + reg_val |= BIT_ULL(0); + otx2_write64(pf, CN10K_CPT_LF_CTL, reg_val); +} + +static void cn10k_outb_cptlf_iq_disable(struct otx2_nic *pf) +{ + u32 inflight, grb_cnt, gwb_cnt; + u32 nq_ptr, dq_ptr; + int timeout = 20; + u64 reg_val; + int cnt; + + /* Disable instructions enqueuing */ + otx2_write64(pf, CN10K_CPT_LF_CTL, 0ull); + + /* Wait for instruction queue to become empty. + * CPT_LF_INPROG.INFLIGHT count is zero + */ + do { + reg_val = otx2_read64(pf, CN10K_CPT_LF_INPROG); + inflight = FIELD_GET(CPT_LF_INPROG_INFLIGHT, reg_val); + if (!inflight) + break; + + usleep_range(10000, 20000); + if (timeout-- < 0) { + netdev_err(pf->netdev, "Timeout to cleanup CPT IQ\n"); + break; + } + } while (1); + + /* Disable executions in the LF's queue, + * the queue should be empty at this point + */ + reg_val &= ~BIT_ULL(16); + otx2_write64(pf, CN10K_CPT_LF_INPROG, reg_val); + + /* Wait for instruction queue to become empty */ + cnt = 0; + do { + reg_val = otx2_read64(pf, CN10K_CPT_LF_INPROG); + if (reg_val & BIT_ULL(31)) + cnt = 0; + else + cnt++; + reg_val = otx2_read64(pf, CN10K_CPT_LF_Q_GRP_PTR); + nq_ptr = FIELD_GET(CPT_LF_Q_GRP_PTR_DQ_PTR, reg_val); + dq_ptr = FIELD_GET(CPT_LF_Q_GRP_PTR_DQ_PTR, reg_val); + } while ((cnt < 10) && (nq_ptr != dq_ptr)); + + cnt = 0; + do { + reg_val = otx2_read64(pf, CN10K_CPT_LF_INPROG); + inflight = FIELD_GET(CPT_LF_INPROG_INFLIGHT, reg_val); + grb_cnt = FIELD_GET(CPT_LF_INPROG_GRB_CNT, reg_val); + gwb_cnt = FIELD_GET(CPT_LF_INPROG_GWB_CNT, reg_val); + if (inflight == 0 && gwb_cnt < 40 && + (grb_cnt == 0 || grb_cnt == 40)) + cnt++; + else + cnt = 0; + } while (cnt < 10); +} + +/* Allocate memory for CPT outbound Instruction queue. + * Instruction queue memory format is: + * ----------------------------- + * | Instruction Group memory | + * | (CPT_LF_Q_SIZE[SIZE_DIV40] | + * | x 16 Bytes) | + * | | + * ----------------------------- <-- CPT_LF_Q_BASE[ADDR] + * | Flow Control (128 Bytes) | + * | | + * ----------------------------- + * | Instruction Memory | + * | (CPT_LF_Q_SIZE[SIZE_DIV40] | + * | × 40 × 64 bytes) | + * | | + * ----------------------------- + */ +static int cn10k_outb_cptlf_iq_alloc(struct otx2_nic *pf) +{ + struct cn10k_cpt_inst_queue *iq = &pf->ipsec.iq; + + iq->size = CN10K_CPT_INST_QLEN_BYTES + CN10K_CPT_Q_FC_LEN + + CN10K_CPT_INST_GRP_QLEN_BYTES + OTX2_ALIGN; + + iq->real_vaddr = dma_alloc_coherent(pf->dev, iq->size, + &iq->real_dma_addr, GFP_KERNEL); + if (!iq->real_vaddr) + return -ENOMEM; + + /* iq->vaddr/dma_addr points to Flow Control location */ + iq->vaddr = iq->real_vaddr + CN10K_CPT_INST_GRP_QLEN_BYTES; + iq->dma_addr = iq->real_dma_addr + CN10K_CPT_INST_GRP_QLEN_BYTES; + + /* Align pointers */ + iq->vaddr = PTR_ALIGN(iq->vaddr, OTX2_ALIGN); + iq->dma_addr = PTR_ALIGN(iq->dma_addr, OTX2_ALIGN); + return 0; +} + +static void cn10k_outb_cptlf_iq_free(struct otx2_nic *pf) +{ + struct cn10k_cpt_inst_queue *iq = &pf->ipsec.iq; + + if (iq->real_vaddr) + dma_free_coherent(pf->dev, iq->size, iq->real_vaddr, + iq->real_dma_addr); + + iq->real_vaddr = NULL; + iq->vaddr = NULL; +} + +static int cn10k_outb_cptlf_iq_init(struct otx2_nic *pf) +{ + u64 reg_val; + int ret; + + /* Allocate Memory for CPT IQ */ + ret = cn10k_outb_cptlf_iq_alloc(pf); + if (ret) + return ret; + + /* Disable IQ */ + cn10k_outb_cptlf_iq_disable(pf); + + /* Set IQ base address */ + otx2_write64(pf, CN10K_CPT_LF_Q_BASE, pf->ipsec.iq.dma_addr); + + /* Set IQ size */ + reg_val = FIELD_PREP(CPT_LF_Q_SIZE_DIV40, CN10K_CPT_SIZE_DIV40 + + CN10K_CPT_EXTRA_SIZE_DIV40); + otx2_write64(pf, CN10K_CPT_LF_Q_SIZE, reg_val); + + return 0; +} + +static int cn10k_outb_cptlf_init(struct otx2_nic *pf) +{ + int ret; + + /* Initialize CPTLF Instruction Queue (IQ) */ + ret = cn10k_outb_cptlf_iq_init(pf); + if (ret) + return ret; + + /* Configure CPTLF for outbound ipsec offload */ + ret = cn10k_outb_cptlf_config(pf); + if (ret) + goto iq_clean; + + /* Enable CPTLF IQ */ + cn10k_outb_cptlf_iq_enable(pf); + return 0; +iq_clean: + cn10k_outb_cptlf_iq_free(pf); + return ret; +} + +static int cn10k_outb_cpt_init(struct net_device *netdev) +{ + struct otx2_nic *pf = netdev_priv(netdev); + int ret; + + /* Attach a CPT LF for outbound ipsec offload */ + ret = cn10k_outb_cptlf_attach(pf); + if (ret) + return ret; + + /* Allocate a CPT LF for outbound ipsec offload */ + ret = cn10k_outb_cptlf_alloc(pf); + if (ret) + goto detach; + + /* Initialize the CPTLF for outbound ipsec offload */ + ret = cn10k_outb_cptlf_init(pf); + if (ret) + goto lf_free; + + pf->ipsec.io_addr = (__force u64)otx2_get_regaddr(pf, + CN10K_CPT_LF_NQX(0)); + + /* Set ipsec offload enabled for this device */ + pf->flags |= OTX2_FLAG_IPSEC_OFFLOAD_ENABLED; + + cn10k_cpt_device_set_available(pf); + return 0; + +lf_free: + cn10k_outb_cptlf_free(pf); +detach: + cn10k_outb_cptlf_detach(pf); + return ret; +} + +static int cn10k_outb_cpt_clean(struct otx2_nic *pf) +{ + int ret; + + if (!cn10k_cpt_device_set_inuse(pf)) { + netdev_err(pf->netdev, "CPT LF device unavailable\n"); + return -ENODEV; + } + + /* Set ipsec offload disabled for this device */ + pf->flags &= ~OTX2_FLAG_IPSEC_OFFLOAD_ENABLED; + + /* Disable CPTLF Instruction Queue (IQ) */ + cn10k_outb_cptlf_iq_disable(pf); + + /* Set IQ base address and size to 0 */ + otx2_write64(pf, CN10K_CPT_LF_Q_BASE, 0); + otx2_write64(pf, CN10K_CPT_LF_Q_SIZE, 0); + + /* Free CPTLF IQ */ + cn10k_outb_cptlf_iq_free(pf); + + /* Free and detach CPT LF */ + cn10k_outb_cptlf_free(pf); + ret = cn10k_outb_cptlf_detach(pf); + if (ret) + netdev_err(pf->netdev, "Failed to detach CPT LF\n"); + + cn10k_cpt_device_set_unavailable(pf); + return ret; +} + +int cn10k_ipsec_ethtool_init(struct net_device *netdev, bool enable) +{ + struct otx2_nic *pf = netdev_priv(netdev); + + /* IPsec offload supported on cn10k */ + if (!is_dev_support_ipsec_offload(pf->pdev)) + return -EOPNOTSUPP; + + /* Initialize CPT for outbound ipsec offload */ + if (enable) + return cn10k_outb_cpt_init(netdev); + + return cn10k_outb_cpt_clean(pf); +} + +int cn10k_ipsec_init(struct net_device *netdev) +{ + struct otx2_nic *pf = netdev_priv(netdev); + + if (!is_dev_support_ipsec_offload(pf->pdev)) + return 0; + + cn10k_cpt_device_set_unavailable(pf); + return 0; +} +EXPORT_SYMBOL(cn10k_ipsec_init); + +void cn10k_ipsec_clean(struct otx2_nic *pf) +{ + if (!is_dev_support_ipsec_offload(pf->pdev)) + return; + + if (!(pf->flags & OTX2_FLAG_IPSEC_OFFLOAD_ENABLED)) + return; + + cn10k_outb_cpt_clean(pf); +} +EXPORT_SYMBOL(cn10k_ipsec_clean); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h new file mode 100644 index 000000000000..f3eb5aee4b9d --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell IPSEC offload driver + * + * Copyright (C) 2024 Marvell. + */ + +#ifndef CN10K_IPSEC_H +#define CN10K_IPSEC_H + +#include + +/* CPT instruction size in bytes */ +#define CN10K_CPT_INST_SIZE 64 + +/* CPT instruction (CPT_INST_S) queue length */ +#define CN10K_CPT_INST_QLEN 8200 + +/* CPT instruction queue size passed to HW is in units of + * 40*CPT_INST_S messages. + */ +#define CN10K_CPT_SIZE_DIV40 (CN10K_CPT_INST_QLEN / 40) + +/* CPT needs 320 free entries */ +#define CN10K_CPT_INST_QLEN_EXTRA_BYTES (320 * CN10K_CPT_INST_SIZE) +#define CN10K_CPT_EXTRA_SIZE_DIV40 (320 / 40) + +/* CPT instruction queue length in bytes */ +#define CN10K_CPT_INST_QLEN_BYTES \ + ((CN10K_CPT_SIZE_DIV40 * 40 * CN10K_CPT_INST_SIZE) + \ + CN10K_CPT_INST_QLEN_EXTRA_BYTES) + +/* CPT instruction group queue length in bytes */ +#define CN10K_CPT_INST_GRP_QLEN_BYTES \ + ((CN10K_CPT_SIZE_DIV40 + CN10K_CPT_EXTRA_SIZE_DIV40) * 16) + +/* CPT FC length in bytes */ +#define CN10K_CPT_Q_FC_LEN 128 + +/* Default CPT engine group for ipsec offload */ +#define CN10K_DEF_CPT_IPSEC_EGRP 1 + +/* CN10K CPT LF registers */ +#define CPT_LFBASE (BLKTYPE_CPT << RVU_FUNC_BLKADDR_SHIFT) +#define CN10K_CPT_LF_CTL (CPT_LFBASE | 0x10) +#define CN10K_CPT_LF_INPROG (CPT_LFBASE | 0x40) +#define CN10K_CPT_LF_Q_BASE (CPT_LFBASE | 0xf0) +#define CN10K_CPT_LF_Q_SIZE (CPT_LFBASE | 0x100) +#define CN10K_CPT_LF_Q_INST_PTR (CPT_LFBASE | 0x110) +#define CN10K_CPT_LF_Q_GRP_PTR (CPT_LFBASE | 0x120) +#define CN10K_CPT_LF_NQX(a) (CPT_LFBASE | 0x400 | (a) << 3) +#define CN10K_CPT_LF_CTX_FLUSH (CPT_LFBASE | 0x510) + +struct cn10k_cpt_inst_queue { + u8 *vaddr; + u8 *real_vaddr; + dma_addr_t dma_addr; + dma_addr_t real_dma_addr; + u32 size; +}; + +enum cn10k_cpt_hw_state_e { + CN10K_CPT_HW_UNAVAILABLE, + CN10K_CPT_HW_AVAILABLE, + CN10K_CPT_HW_IN_USE +}; + +struct cn10k_ipsec { + /* Outbound CPT */ + u64 io_addr; + atomic_t cpt_state; + struct cn10k_cpt_inst_queue iq; +}; + +/* CPT LF_INPROG Register */ +#define CPT_LF_INPROG_INFLIGHT GENMASK_ULL(8, 0) +#define CPT_LF_INPROG_GRB_CNT GENMASK_ULL(39, 32) +#define CPT_LF_INPROG_GWB_CNT GENMASK_ULL(47, 40) + +/* CPT LF_Q_GRP_PTR Register */ +#define CPT_LF_Q_GRP_PTR_DQ_PTR GENMASK_ULL(14, 0) +#define CPT_LF_Q_GRP_PTR_NQ_PTR GENMASK_ULL(46, 32) + +/* CPT LF_Q_SIZE Register */ +#define CPT_LF_Q_BASE_ADDR GENMASK_ULL(52, 7) + +/* CPT LF_Q_SIZE Register */ +#define CPT_LF_Q_SIZE_DIV40 GENMASK_ULL(14, 0) + +#ifdef CONFIG_XFRM_OFFLOAD +int cn10k_ipsec_init(struct net_device *netdev); +void cn10k_ipsec_clean(struct otx2_nic *pf); +int cn10k_ipsec_ethtool_init(struct net_device *netdev, bool enable); +#else +static inline __maybe_unused int cn10k_ipsec_init(struct net_device *netdev) +{ + return 0; +} + +static inline __maybe_unused void cn10k_ipsec_clean(struct otx2_nic *pf) +{ +} + +static inline __maybe_unused +int cn10k_ipsec_ethtool_init(struct net_device *netdev, bool enable) +{ + return 0; +} +#endif +#endif // CN10K_IPSEC_H diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 37a32ac0dae7..5e2da67d58bb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -30,6 +30,7 @@ #include #include "qos.h" #include "rep.h" +#include "cn10k_ipsec.h" /* IPv4 flag more fragment bit */ #define IPV4_FLAG_MORE 0x20 @@ -40,6 +41,7 @@ #define PCI_DEVID_OCTEONTX2_RVU_AFVF 0xA0F8 #define PCI_SUBSYS_DEVID_96XX_RVU_PFVF 0xB200 +#define PCI_SUBSYS_DEVID_CN10K_A_RVU_PFVF 0xB900 #define PCI_SUBSYS_DEVID_CN10K_B_RVU_PFVF 0xBD00 #define PCI_DEVID_OCTEONTX2_SDP_REP 0xA0F7 @@ -448,6 +450,7 @@ struct otx2_nic { #define OTX2_FLAG_TC_MARK_ENABLED BIT_ULL(17) #define OTX2_FLAG_REP_MODE_ENABLED BIT_ULL(18) #define OTX2_FLAG_PORT_UP BIT_ULL(19) +#define OTX2_FLAG_IPSEC_OFFLOAD_ENABLED BIT_ULL(20) u64 flags; u64 *cq_op_addr; @@ -522,6 +525,9 @@ struct otx2_nic { u16 rep_pf_map[RVU_MAX_REP]; u16 esw_mode; #endif + + /* Inline ipsec */ + struct cn10k_ipsec ipsec; }; static inline bool is_otx2_lbkvf(struct pci_dev *pdev) @@ -572,6 +578,15 @@ static inline bool is_dev_cn10kb(struct pci_dev *pdev) return pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_B_RVU_PFVF; } +static inline bool is_dev_cn10ka_b0(struct pci_dev *pdev) +{ + if (pdev->subsystem_device == PCI_SUBSYS_DEVID_CN10K_A_RVU_PFVF && + (pdev->revision & 0xFF) == 0x54) + return true; + + return false; +} + static inline void otx2_setup_dev_hw_settings(struct otx2_nic *pfvf) { struct otx2_hw *hw = &pfvf->hw; @@ -621,6 +636,9 @@ static inline void __iomem *otx2_get_regaddr(struct otx2_nic *nic, u64 offset) case BLKTYPE_NPA: blkaddr = BLKADDR_NPA; break; + case BLKTYPE_CPT: + blkaddr = BLKADDR_CPT0; + break; default: blkaddr = BLKADDR_RVUM; break; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 8ec0296dee84..2f652035d854 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -26,6 +26,7 @@ #include "cn10k.h" #include "qos.h" #include +#include "cn10k_ipsec.h" #define DRV_NAME "rvu_nicpf" #define DRV_STRING "Marvell RVU NIC Physical Function Driver" @@ -2276,6 +2277,10 @@ static int otx2_set_features(struct net_device *netdev, return otx2_enable_rxvlan(pf, features & NETIF_F_HW_VLAN_CTAG_RX); + if (changed & NETIF_F_HW_ESP) + return cn10k_ipsec_ethtool_init(netdev, + features & NETIF_F_HW_ESP); + return otx2_handle_ntuple_tc_features(netdev, features); } @@ -3165,10 +3170,14 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* reset CGX/RPM MAC stats */ otx2_reset_mac_stats(pf); + err = cn10k_ipsec_init(netdev); + if (err) + goto err_mcs_free; + err = register_netdev(netdev); if (err) { dev_err(dev, "Failed to register netdevice\n"); - goto err_mcs_free; + goto err_ipsec_clean; } err = otx2_wq_init(pf); @@ -3209,6 +3218,8 @@ err_mcam_flow_del: otx2_mcam_flow_del(pf); err_unreg_netdev: unregister_netdev(netdev); +err_ipsec_clean: + cn10k_ipsec_clean(pf); err_mcs_free: cn10k_mcs_free(pf); err_del_mcam_entries: @@ -3406,6 +3417,7 @@ static void otx2_remove(struct pci_dev *pdev) otx2_unregister_dl(pf); unregister_netdev(netdev); + cn10k_ipsec_clean(pf); cn10k_mcs_free(pf); otx2_sriov_disable(pf->pdev); otx2_sriov_vfcfg_cleanup(pf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 839fc77c11b2..e926c6ce96cf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -14,6 +14,7 @@ #include "otx2_reg.h" #include "otx2_ptp.h" #include "cn10k.h" +#include "cn10k_ipsec.h" #define DRV_NAME "rvu_nicvf" #define DRV_STRING "Marvell RVU NIC Virtual Function Driver" @@ -693,10 +694,14 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) pdev->bus->number, n); } + err = cn10k_ipsec_init(netdev); + if (err) + goto err_ptp_destroy; + err = register_netdev(netdev); if (err) { dev_err(dev, "Failed to register netdevice\n"); - goto err_ptp_destroy; + goto err_ipsec_clean; } err = otx2_vf_wq_init(vf); @@ -730,6 +735,8 @@ err_shutdown_tc: otx2_shutdown_tc(vf); err_unreg_netdev: unregister_netdev(netdev); +err_ipsec_clean: + cn10k_ipsec_clean(vf); err_ptp_destroy: otx2_ptp_destroy(vf); err_detach_rsrc: @@ -782,6 +789,7 @@ static void otx2vf_remove(struct pci_dev *pdev) unregister_netdev(netdev); if (vf->otx2_wq) destroy_workqueue(vf->otx2_wq); + cn10k_ipsec_clean(vf); otx2_ptp_destroy(vf); otx2_mcam_flow_del(vf); otx2_shutdown_tc(vf); From c45211c2369734d1b03c75165988878d16867040 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 4 Dec 2024 11:26:56 +0530 Subject: [PATCH 0218/1386] cn10k-ipsec: Add SA add/del support for outb ipsec crypto offload This patch adds support to add and delete Security Association (SA) xfrm ops. Hardware maintains SA context in memory allocated by software. Each SA context is 128 byte aligned and size of each context is multiple of 128-byte. Add support for transport and tunnel ipsec mode, ESP protocol, aead aes-gcm-icv16, key size 128/192/256-bits with 32bit salt. Signed-off-by: Bharat Bhushan Signed-off-by: David S. Miller --- .../marvell/octeontx2/nic/cn10k_ipsec.c | 409 ++++++++++++++++++ .../marvell/octeontx2/nic/cn10k_ipsec.h | 114 +++++ 2 files changed, 523 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c index e09ce42075c7..106a241625dc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c @@ -375,6 +375,385 @@ static int cn10k_outb_cpt_clean(struct otx2_nic *pf) return ret; } +static void cn10k_cpt_inst_flush(struct otx2_nic *pf, struct cpt_inst_s *inst, + u64 size) +{ + struct otx2_lmt_info *lmt_info; + u64 val = 0, tar_addr = 0; + + lmt_info = per_cpu_ptr(pf->hw.lmt_info, smp_processor_id()); + /* FIXME: val[0:10] LMT_ID. + * [12:15] no of LMTST - 1 in the burst. + * [19:63] data size of each LMTST in the burst except first. + */ + val = (lmt_info->lmt_id & 0x7FF); + /* Target address for LMTST flush tells HW how many 128bit + * words are present. + * tar_addr[6:4] size of first LMTST - 1 in units of 128b. + */ + tar_addr |= pf->ipsec.io_addr | (((size / 16) - 1) & 0x7) << 4; + dma_wmb(); + memcpy((u64 *)lmt_info->lmt_addr, inst, size); + cn10k_lmt_flush(val, tar_addr); +} + +static int cn10k_wait_for_cpt_respose(struct otx2_nic *pf, + struct cpt_res_s *res) +{ + unsigned long timeout = jiffies + msecs_to_jiffies(100); + u64 *completion_ptr = (u64 *)res; + + do { + if (time_after(jiffies, timeout)) { + netdev_err(pf->netdev, "CPT response timeout\n"); + return -EBUSY; + } + } while ((READ_ONCE(*completion_ptr) & CN10K_CPT_COMP_E_MASK) == + CN10K_CPT_COMP_E_NOTDONE); + + if (!(res->compcode == CN10K_CPT_COMP_E_GOOD || + res->compcode == CN10K_CPT_COMP_E_WARN) || res->uc_compcode) { + netdev_err(pf->netdev, "compcode=%x doneint=%x\n", + res->compcode, res->doneint); + netdev_err(pf->netdev, "uc_compcode=%x uc_info=%llx esn=%llx\n", + res->uc_compcode, (u64)res->uc_info, res->esn); + } + return 0; +} + +static int cn10k_outb_write_sa(struct otx2_nic *pf, struct qmem *sa_info) +{ + dma_addr_t res_iova, dptr_iova, sa_iova; + struct cn10k_tx_sa_s *sa_dptr; + struct cpt_inst_s inst = {}; + struct cpt_res_s *res; + u32 sa_size, off; + u64 *sptr, *dptr; + u64 reg_val; + int ret; + + sa_iova = sa_info->iova; + if (!sa_iova) + return -EINVAL; + + res = dma_alloc_coherent(pf->dev, sizeof(struct cpt_res_s), + &res_iova, GFP_ATOMIC); + if (!res) + return -ENOMEM; + + sa_size = sizeof(struct cn10k_tx_sa_s); + sa_dptr = dma_alloc_coherent(pf->dev, sa_size, &dptr_iova, GFP_ATOMIC); + if (!sa_dptr) { + dma_free_coherent(pf->dev, sizeof(struct cpt_res_s), res, + res_iova); + return -ENOMEM; + } + + sptr = (__force u64 *)sa_info->base; + dptr = (__force u64 *)sa_dptr; + for (off = 0; off < (sa_size / 8); off++) + *(dptr + off) = (__force u64)cpu_to_be64(*(sptr + off)); + + res->compcode = CN10K_CPT_COMP_E_NOTDONE; + inst.res_addr = res_iova; + inst.dptr = (u64)dptr_iova; + inst.param2 = sa_size >> 3; + inst.dlen = sa_size; + inst.opcode_major = CN10K_IPSEC_MAJOR_OP_WRITE_SA; + inst.opcode_minor = CN10K_IPSEC_MINOR_OP_WRITE_SA; + inst.cptr = sa_iova; + inst.ctx_val = 1; + inst.egrp = CN10K_DEF_CPT_IPSEC_EGRP; + + /* Check if CPT-LF available */ + if (!cn10k_cpt_device_set_inuse(pf)) { + ret = -ENODEV; + goto free_mem; + } + + cn10k_cpt_inst_flush(pf, &inst, sizeof(struct cpt_inst_s)); + dma_wmb(); + ret = cn10k_wait_for_cpt_respose(pf, res); + if (ret) + goto set_available; + + /* Trigger CTX flush to write dirty data back to DRAM */ + reg_val = FIELD_PREP(CPT_LF_CTX_FLUSH, sa_iova >> 7); + otx2_write64(pf, CN10K_CPT_LF_CTX_FLUSH, reg_val); + +set_available: + cn10k_cpt_device_set_available(pf); +free_mem: + dma_free_coherent(pf->dev, sa_size, sa_dptr, dptr_iova); + dma_free_coherent(pf->dev, sizeof(struct cpt_res_s), res, res_iova); + return ret; +} + +static int cn10k_ipsec_get_hw_ctx_offset(void) +{ + /* Offset on Hardware-context offset in word */ + return (offsetof(struct cn10k_tx_sa_s, hw_ctx) / sizeof(u64)) & 0x7F; +} + +static int cn10k_ipsec_get_ctx_push_size(void) +{ + /* Context push size is round up and in multiple of 8 Byte */ + return (roundup(offsetof(struct cn10k_tx_sa_s, hw_ctx), 8) / 8) & 0x7F; +} + +static int cn10k_ipsec_get_aes_key_len(int key_len) +{ + /* key_len is aes key length in bytes */ + switch (key_len) { + case 16: + return CN10K_IPSEC_SA_AES_KEY_LEN_128; + case 24: + return CN10K_IPSEC_SA_AES_KEY_LEN_192; + default: + return CN10K_IPSEC_SA_AES_KEY_LEN_256; + } +} + +static void cn10k_outb_prepare_sa(struct xfrm_state *x, + struct cn10k_tx_sa_s *sa_entry) +{ + int key_len = (x->aead->alg_key_len + 7) / 8; + struct net_device *netdev = x->xso.dev; + u8 *key = x->aead->alg_key; + struct otx2_nic *pf; + u32 *tmp_salt; + u64 *tmp_key; + int idx; + + memset(sa_entry, 0, sizeof(struct cn10k_tx_sa_s)); + + /* context size, 128 Byte aligned up */ + pf = netdev_priv(netdev); + sa_entry->ctx_size = (pf->ipsec.sa_size / OTX2_ALIGN) & 0xF; + sa_entry->hw_ctx_off = cn10k_ipsec_get_hw_ctx_offset(); + sa_entry->ctx_push_size = cn10k_ipsec_get_ctx_push_size(); + + /* Ucode to skip two words of CPT_CTX_HW_S */ + sa_entry->ctx_hdr_size = 1; + + /* Allow Atomic operation (AOP) */ + sa_entry->aop_valid = 1; + + /* Outbound, ESP TRANSPORT/TUNNEL Mode, AES-GCM with */ + sa_entry->sa_dir = CN10K_IPSEC_SA_DIR_OUTB; + sa_entry->ipsec_protocol = CN10K_IPSEC_SA_IPSEC_PROTO_ESP; + sa_entry->enc_type = CN10K_IPSEC_SA_ENCAP_TYPE_AES_GCM; + sa_entry->iv_src = CN10K_IPSEC_SA_IV_SRC_PACKET; + if (x->props.mode == XFRM_MODE_TUNNEL) + sa_entry->ipsec_mode = CN10K_IPSEC_SA_IPSEC_MODE_TUNNEL; + else + sa_entry->ipsec_mode = CN10K_IPSEC_SA_IPSEC_MODE_TRANSPORT; + + /* Last 4 bytes are salt */ + key_len -= 4; + sa_entry->aes_key_len = cn10k_ipsec_get_aes_key_len(key_len); + memcpy(sa_entry->cipher_key, key, key_len); + tmp_key = (u64 *)sa_entry->cipher_key; + + for (idx = 0; idx < key_len / 8; idx++) + tmp_key[idx] = (__force u64)cpu_to_be64(tmp_key[idx]); + + memcpy(&sa_entry->iv_gcm_salt, key + key_len, 4); + tmp_salt = (u32 *)&sa_entry->iv_gcm_salt; + *tmp_salt = (__force u32)cpu_to_be32(*tmp_salt); + + /* Write SA context data to memory before enabling */ + wmb(); + + /* Enable SA */ + sa_entry->sa_valid = 1; +} + +static int cn10k_ipsec_validate_state(struct xfrm_state *x, + struct netlink_ext_ack *extack) +{ + if (x->props.aalgo != SADB_AALG_NONE) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot offload authenticated xfrm states"); + return -EINVAL; + } + if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) { + NL_SET_ERR_MSG_MOD(extack, + "Only AES-GCM-ICV16 xfrm state may be offloaded"); + return -EINVAL; + } + if (x->props.calgo != SADB_X_CALG_NONE) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot offload compressed xfrm states"); + return -EINVAL; + } + if (x->props.flags & XFRM_STATE_ESN) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload ESN xfrm states"); + return -EINVAL; + } + if (x->props.family != AF_INET && x->props.family != AF_INET6) { + NL_SET_ERR_MSG_MOD(extack, + "Only IPv4/v6 xfrm states may be offloaded"); + return -EINVAL; + } + if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot offload other than crypto-mode"); + return -EINVAL; + } + if (x->props.mode != XFRM_MODE_TRANSPORT && + x->props.mode != XFRM_MODE_TUNNEL) { + NL_SET_ERR_MSG_MOD(extack, + "Only tunnel/transport xfrm states may be offloaded"); + return -EINVAL; + } + if (x->id.proto != IPPROTO_ESP) { + NL_SET_ERR_MSG_MOD(extack, + "Only ESP xfrm state may be offloaded"); + return -EINVAL; + } + if (x->encap) { + NL_SET_ERR_MSG_MOD(extack, + "Encapsulated xfrm state may not be offloaded"); + return -EINVAL; + } + if (!x->aead) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot offload xfrm states without aead"); + return -EINVAL; + } + + if (x->aead->alg_icv_len != 128) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot offload xfrm states with AEAD ICV length other than 128bit"); + return -EINVAL; + } + if (x->aead->alg_key_len != 128 + 32 && + x->aead->alg_key_len != 192 + 32 && + x->aead->alg_key_len != 256 + 32) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot offload xfrm states with AEAD key length other than 128/192/256bit"); + return -EINVAL; + } + if (x->tfcpad) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot offload xfrm states with tfc padding"); + return -EINVAL; + } + if (!x->geniv) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot offload xfrm states without geniv"); + return -EINVAL; + } + if (strcmp(x->geniv, "seqiv")) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot offload xfrm states with geniv other than seqiv"); + return -EINVAL; + } + return 0; +} + +static int cn10k_ipsec_inb_add_state(struct xfrm_state *x, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG_MOD(extack, "xfrm inbound offload not supported"); + return -EOPNOTSUPP; +} + +static int cn10k_ipsec_outb_add_state(struct xfrm_state *x, + struct netlink_ext_ack *extack) +{ + struct net_device *netdev = x->xso.dev; + struct cn10k_tx_sa_s *sa_entry; + struct qmem *sa_info; + struct otx2_nic *pf; + int err; + + err = cn10k_ipsec_validate_state(x, extack); + if (err) + return err; + + pf = netdev_priv(netdev); + + err = qmem_alloc(pf->dev, &sa_info, pf->ipsec.sa_size, OTX2_ALIGN); + if (err) + return err; + + sa_entry = (struct cn10k_tx_sa_s *)sa_info->base; + cn10k_outb_prepare_sa(x, sa_entry); + + err = cn10k_outb_write_sa(pf, sa_info); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Error writing outbound SA"); + qmem_free(pf->dev, sa_info); + return err; + } + + x->xso.offload_handle = (unsigned long)sa_info; + pf->ipsec.outb_sa_count++; + return 0; +} + +static int cn10k_ipsec_add_state(struct xfrm_state *x, + struct netlink_ext_ack *extack) +{ + if (x->xso.dir == XFRM_DEV_OFFLOAD_IN) + return cn10k_ipsec_inb_add_state(x, extack); + else + return cn10k_ipsec_outb_add_state(x, extack); +} + +static void cn10k_ipsec_del_state(struct xfrm_state *x) +{ + struct net_device *netdev = x->xso.dev; + struct cn10k_tx_sa_s *sa_entry; + struct qmem *sa_info; + struct otx2_nic *pf; + int err; + + if (x->xso.dir == XFRM_DEV_OFFLOAD_IN) + return; + + pf = netdev_priv(netdev); + + sa_info = (struct qmem *)x->xso.offload_handle; + sa_entry = (struct cn10k_tx_sa_s *)sa_info->base; + memset(sa_entry, 0, sizeof(struct cn10k_tx_sa_s)); + /* Disable SA in CPT h/w */ + sa_entry->ctx_push_size = cn10k_ipsec_get_ctx_push_size(); + sa_entry->ctx_size = (pf->ipsec.sa_size / OTX2_ALIGN) & 0xF; + sa_entry->aop_valid = 1; + + err = cn10k_outb_write_sa(pf, sa_info); + if (err) + netdev_err(netdev, "Error (%d) deleting SA\n", err); + + x->xso.offload_handle = 0; + qmem_free(pf->dev, sa_info); + + /* If no more SA's then update netdev feature for potential change + * in NETIF_F_HW_ESP. + */ + if (!--pf->ipsec.outb_sa_count) + queue_work(pf->ipsec.sa_workq, &pf->ipsec.sa_work); +} + +static const struct xfrmdev_ops cn10k_ipsec_xfrmdev_ops = { + .xdo_dev_state_add = cn10k_ipsec_add_state, + .xdo_dev_state_delete = cn10k_ipsec_del_state, +}; + +static void cn10k_ipsec_sa_wq_handler(struct work_struct *work) +{ + struct cn10k_ipsec *ipsec = container_of(work, struct cn10k_ipsec, + sa_work); + struct otx2_nic *pf = container_of(ipsec, struct otx2_nic, ipsec); + + rtnl_lock(); + netdev_update_features(pf->netdev); + rtnl_unlock(); +} + int cn10k_ipsec_ethtool_init(struct net_device *netdev, bool enable) { struct otx2_nic *pf = netdev_priv(netdev); @@ -387,16 +766,41 @@ int cn10k_ipsec_ethtool_init(struct net_device *netdev, bool enable) if (enable) return cn10k_outb_cpt_init(netdev); + /* Don't do CPT cleanup if SA installed */ + if (pf->ipsec.outb_sa_count) { + netdev_err(pf->netdev, "SA installed on this device\n"); + return -EBUSY; + } + return cn10k_outb_cpt_clean(pf); } int cn10k_ipsec_init(struct net_device *netdev) { struct otx2_nic *pf = netdev_priv(netdev); + u32 sa_size; if (!is_dev_support_ipsec_offload(pf->pdev)) return 0; + /* Each SA entry size is 128 Byte round up in size */ + sa_size = sizeof(struct cn10k_tx_sa_s) % OTX2_ALIGN ? + (sizeof(struct cn10k_tx_sa_s) / OTX2_ALIGN + 1) * + OTX2_ALIGN : sizeof(struct cn10k_tx_sa_s); + pf->ipsec.sa_size = sa_size; + + INIT_WORK(&pf->ipsec.sa_work, cn10k_ipsec_sa_wq_handler); + pf->ipsec.sa_workq = alloc_workqueue("cn10k_ipsec_sa_workq", 0, 0); + if (!pf->ipsec.sa_workq) { + netdev_err(pf->netdev, "SA alloc workqueue failed\n"); + return -ENOMEM; + } + + /* Set xfrm device ops + * NETIF_F_HW_ESP is not set as ipsec setup is not yet complete. + */ + netdev->xfrmdev_ops = &cn10k_ipsec_xfrmdev_ops; + cn10k_cpt_device_set_unavailable(pf); return 0; } @@ -410,6 +814,11 @@ void cn10k_ipsec_clean(struct otx2_nic *pf) if (!(pf->flags & OTX2_FLAG_IPSEC_OFFLOAD_ENABLED)) return; + if (pf->ipsec.sa_workq) { + destroy_workqueue(pf->ipsec.sa_workq); + pf->ipsec.sa_workq = NULL; + } + cn10k_outb_cpt_clean(pf); } EXPORT_SYMBOL(cn10k_ipsec_clean); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h index f3eb5aee4b9d..5ac4de4ae974 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h @@ -50,6 +50,20 @@ #define CN10K_CPT_LF_NQX(a) (CPT_LFBASE | 0x400 | (a) << 3) #define CN10K_CPT_LF_CTX_FLUSH (CPT_LFBASE | 0x510) +/* IPSEC Instruction opcodes */ +#define CN10K_IPSEC_MAJOR_OP_WRITE_SA 0x01UL +#define CN10K_IPSEC_MINOR_OP_WRITE_SA 0x09UL + +enum cn10k_cpt_comp_e { + CN10K_CPT_COMP_E_NOTDONE = 0x00, + CN10K_CPT_COMP_E_GOOD = 0x01, + CN10K_CPT_COMP_E_FAULT = 0x02, + CN10K_CPT_COMP_E_HWERR = 0x04, + CN10K_CPT_COMP_E_INSTERR = 0x05, + CN10K_CPT_COMP_E_WARN = 0x06, + CN10K_CPT_COMP_E_MASK = 0x3F +}; + struct cn10k_cpt_inst_queue { u8 *vaddr; u8 *real_vaddr; @@ -69,6 +83,103 @@ struct cn10k_ipsec { u64 io_addr; atomic_t cpt_state; struct cn10k_cpt_inst_queue iq; + + /* SA info */ + u32 sa_size; + u32 outb_sa_count; + struct work_struct sa_work; + struct workqueue_struct *sa_workq; +}; + +/* CN10K IPSEC Security Association (SA) */ +/* SA direction */ +#define CN10K_IPSEC_SA_DIR_INB 0 +#define CN10K_IPSEC_SA_DIR_OUTB 1 +/* SA protocol */ +#define CN10K_IPSEC_SA_IPSEC_PROTO_AH 0 +#define CN10K_IPSEC_SA_IPSEC_PROTO_ESP 1 +/* SA Encryption Type */ +#define CN10K_IPSEC_SA_ENCAP_TYPE_AES_GCM 5 +/* SA IPSEC mode Transport/Tunnel */ +#define CN10K_IPSEC_SA_IPSEC_MODE_TRANSPORT 0 +#define CN10K_IPSEC_SA_IPSEC_MODE_TUNNEL 1 +/* SA AES Key Length */ +#define CN10K_IPSEC_SA_AES_KEY_LEN_128 1 +#define CN10K_IPSEC_SA_AES_KEY_LEN_192 2 +#define CN10K_IPSEC_SA_AES_KEY_LEN_256 3 +/* IV Source */ +#define CN10K_IPSEC_SA_IV_SRC_COUNTER 0 +#define CN10K_IPSEC_SA_IV_SRC_PACKET 3 + +struct cn10k_tx_sa_s { + u64 esn_en : 1; /* W0 */ + u64 rsvd_w0_1_8 : 8; + u64 hw_ctx_off : 7; + u64 ctx_id : 16; + u64 rsvd_w0_32_47 : 16; + u64 ctx_push_size : 7; + u64 rsvd_w0_55 : 1; + u64 ctx_hdr_size : 2; + u64 aop_valid : 1; + u64 rsvd_w0_59 : 1; + u64 ctx_size : 4; + u64 w1; /* W1 */ + u64 sa_valid : 1; /* W2 */ + u64 sa_dir : 1; + u64 rsvd_w2_2_3 : 2; + u64 ipsec_mode : 1; + u64 ipsec_protocol : 1; + u64 aes_key_len : 2; + u64 enc_type : 3; + u64 rsvd_w2_11_19 : 9; + u64 iv_src : 2; + u64 rsvd_w2_22_31 : 10; + u64 rsvd_w2_32_63 : 32; + u64 w3; /* W3 */ + u8 cipher_key[32]; /* W4 - W7 */ + u32 rsvd_w8_0_31; /* W8 : IV */ + u32 iv_gcm_salt; + u64 rsvd_w9_w30[22]; /* W9 - W30 */ + u64 hw_ctx[6]; /* W31 - W36 */ +}; + +/* CPT Instruction Structure */ +struct cpt_inst_s { + u64 nixtxl : 3; /* W0 */ + u64 doneint : 1; + u64 rsvd_w0_4_15 : 12; + u64 dat_offset : 8; + u64 ext_param1 : 8; + u64 nixtx_offset : 20; + u64 rsvd_w0_52_63 : 12; + u64 res_addr; /* W1 */ + u64 tag : 32; /* W2 */ + u64 tt : 2; + u64 grp : 10; + u64 rsvd_w2_44_47 : 4; + u64 rvu_pf_func : 16; + u64 qord : 1; /* W3 */ + u64 rsvd_w3_1_2 : 2; + u64 wqe_ptr : 61; + u64 dlen : 16; /* W4 */ + u64 param2 : 16; + u64 param1 : 16; + u64 opcode_major : 8; + u64 opcode_minor : 8; + u64 dptr; /* W5 */ + u64 rptr; /* W6 */ + u64 cptr : 60; /* W7 */ + u64 ctx_val : 1; + u64 egrp : 3; +}; + +/* CPT Instruction Result Structure */ +struct cpt_res_s { + u64 compcode : 7; /* W0 */ + u64 doneint : 1; + u64 uc_compcode : 8; + u64 uc_info : 48; + u64 esn; /* W1 */ }; /* CPT LF_INPROG Register */ @@ -86,6 +197,9 @@ struct cn10k_ipsec { /* CPT LF_Q_SIZE Register */ #define CPT_LF_Q_SIZE_DIV40 GENMASK_ULL(14, 0) +/* CPT LF CTX Flush Register */ +#define CPT_LF_CTX_FLUSH GENMASK_ULL(45, 0) + #ifdef CONFIG_XFRM_OFFLOAD int cn10k_ipsec_init(struct net_device *netdev); void cn10k_ipsec_clean(struct otx2_nic *pf); From 6a77a158848a8c68930df27b8840660db8531222 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 4 Dec 2024 11:26:57 +0530 Subject: [PATCH 0219/1386] cn10k-ipsec: Process outbound ipsec crypto offload Prepare and submit crypto hardware (CPT) instruction for outbound ipsec crypto offload. The CPT instruction have authentication offset, IV offset and encapsulation offset in input packet. Also provide SA context pointer which have details about algo, keys, salt etc. Crypto hardware encrypt, authenticate and provide the ESP packet to networking hardware. Signed-off-by: Bharat Bhushan Signed-off-by: David S. Miller --- .../marvell/octeontx2/nic/cn10k_ipsec.c | 219 ++++++++++++++++++ .../marvell/octeontx2/nic/cn10k_ipsec.h | 42 ++++ .../marvell/octeontx2/nic/otx2_common.c | 23 ++ .../marvell/octeontx2/nic/otx2_common.h | 3 + .../ethernet/marvell/octeontx2/nic/otx2_pf.c | 2 + .../marvell/octeontx2/nic/otx2_txrx.c | 32 ++- .../marvell/octeontx2/nic/otx2_txrx.h | 3 + 7 files changed, 321 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c index 106a241625dc..9a9b06f4c2cc 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c @@ -7,10 +7,15 @@ #include #include #include +#include +#include #include "otx2_common.h" +#include "otx2_struct.h" #include "cn10k_ipsec.h" +DEFINE_STATIC_KEY_FALSE(cn10k_ipsec_sa_enabled); + static bool is_dev_support_ipsec_offload(struct pci_dev *pdev) { return is_dev_cn10ka_b0(pdev) || is_dev_cn10kb(pdev); @@ -690,6 +695,9 @@ static int cn10k_ipsec_outb_add_state(struct xfrm_state *x, } x->xso.offload_handle = (unsigned long)sa_info; + /* Enable static branch when first SA setup */ + if (!pf->ipsec.outb_sa_count) + static_branch_enable(&cn10k_ipsec_sa_enabled); pf->ipsec.outb_sa_count++; return 0; } @@ -749,6 +757,8 @@ static void cn10k_ipsec_sa_wq_handler(struct work_struct *work) sa_work); struct otx2_nic *pf = container_of(ipsec, struct otx2_nic, ipsec); + /* Disable static branch when no more SA enabled */ + static_branch_disable(&cn10k_ipsec_sa_enabled); rtnl_lock(); netdev_update_features(pf->netdev); rtnl_unlock(); @@ -822,3 +832,212 @@ void cn10k_ipsec_clean(struct otx2_nic *pf) cn10k_outb_cpt_clean(pf); } EXPORT_SYMBOL(cn10k_ipsec_clean); + +static u16 cn10k_ipsec_get_ip_data_len(struct xfrm_state *x, + struct sk_buff *skb) +{ + struct ipv6hdr *ipv6h; + struct iphdr *iph; + u8 *src; + + src = (u8 *)skb->data + ETH_HLEN; + + if (x->props.family == AF_INET) { + iph = (struct iphdr *)src; + return ntohs(iph->tot_len); + } + + ipv6h = (struct ipv6hdr *)src; + return ntohs(ipv6h->payload_len) + sizeof(struct ipv6hdr); +} + +/* Prepare CPT and NIX SQE scatter/gather subdescriptor structure. + * SG of NIX and CPT are same in size. + * Layout of a NIX SQE and CPT SG entry: + * ----------------------------- + * | CPT Scatter Gather | + * | (SQE SIZE) | + * | | + * ----------------------------- + * | NIX SQE | + * | (SQE SIZE) | + * | | + * ----------------------------- + */ +bool otx2_sqe_add_sg_ipsec(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, + struct sk_buff *skb, int num_segs, int *offset) +{ + struct cpt_sg_s *cpt_sg = NULL; + struct nix_sqe_sg_s *sg = NULL; + u64 dma_addr, *iova = NULL; + u64 *cpt_iova = NULL; + u16 *sg_lens = NULL; + int seg, len; + + sq->sg[sq->head].num_segs = 0; + cpt_sg = (struct cpt_sg_s *)(sq->sqe_base - sq->sqe_size); + + for (seg = 0; seg < num_segs; seg++) { + if ((seg % MAX_SEGS_PER_SG) == 0) { + sg = (struct nix_sqe_sg_s *)(sq->sqe_base + *offset); + sg->ld_type = NIX_SEND_LDTYPE_LDD; + sg->subdc = NIX_SUBDC_SG; + sg->segs = 0; + sg_lens = (void *)sg; + iova = (void *)sg + sizeof(*sg); + /* Next subdc always starts at a 16byte boundary. + * So if sg->segs is whether 2 or 3, offset += 16bytes. + */ + if ((num_segs - seg) >= (MAX_SEGS_PER_SG - 1)) + *offset += sizeof(*sg) + (3 * sizeof(u64)); + else + *offset += sizeof(*sg) + sizeof(u64); + + cpt_sg += (seg / MAX_SEGS_PER_SG) * 4; + cpt_iova = (void *)cpt_sg + sizeof(*cpt_sg); + } + dma_addr = otx2_dma_map_skb_frag(pfvf, skb, seg, &len); + if (dma_mapping_error(pfvf->dev, dma_addr)) + return false; + + sg_lens[seg % MAX_SEGS_PER_SG] = len; + sg->segs++; + *iova++ = dma_addr; + *cpt_iova++ = dma_addr; + + /* Save DMA mapping info for later unmapping */ + sq->sg[sq->head].dma_addr[seg] = dma_addr; + sq->sg[sq->head].size[seg] = len; + sq->sg[sq->head].num_segs++; + + *cpt_sg = *(struct cpt_sg_s *)sg; + cpt_sg->rsvd_63_50 = 0; + } + + sq->sg[sq->head].skb = (u64)skb; + return true; +} + +static u16 cn10k_ipsec_get_param1(u8 iv_offset) +{ + u16 param1_val; + + /* Set Crypto mode, disable L3/L4 checksum */ + param1_val = CN10K_IPSEC_INST_PARAM1_DIS_L4_CSUM | + CN10K_IPSEC_INST_PARAM1_DIS_L3_CSUM; + param1_val |= (u16)iv_offset << CN10K_IPSEC_INST_PARAM1_IV_OFFSET_SHIFT; + return param1_val; +} + +bool cn10k_ipsec_transmit(struct otx2_nic *pf, struct netdev_queue *txq, + struct otx2_snd_queue *sq, struct sk_buff *skb, + int num_segs, int size) +{ + struct cpt_inst_s inst; + struct cpt_res_s *res; + struct xfrm_state *x; + struct qmem *sa_info; + dma_addr_t dptr_iova; + struct sec_path *sp; + u8 encap_offset; + u8 auth_offset; + u8 gthr_size; + u8 iv_offset; + u16 dlen; + + /* Check for IPSEC offload enabled */ + if (!(pf->flags & OTX2_FLAG_IPSEC_OFFLOAD_ENABLED)) + goto drop; + + sp = skb_sec_path(skb); + if (unlikely(!sp->len)) + goto drop; + + x = xfrm_input_state(skb); + if (unlikely(!x)) + goto drop; + + if (x->props.mode != XFRM_MODE_TRANSPORT && + x->props.mode != XFRM_MODE_TUNNEL) + goto drop; + + dlen = cn10k_ipsec_get_ip_data_len(x, skb); + if (dlen == 0 && netif_msg_tx_err(pf)) { + netdev_err(pf->netdev, "Invalid IP header, ip-length zero\n"); + goto drop; + } + + /* Check for valid SA context */ + sa_info = (struct qmem *)x->xso.offload_handle; + if (!sa_info) + goto drop; + + memset(&inst, 0, sizeof(struct cpt_inst_s)); + + /* Get authentication offset */ + if (x->props.family == AF_INET) + auth_offset = sizeof(struct iphdr); + else + auth_offset = sizeof(struct ipv6hdr); + + /* IV offset is after ESP header */ + iv_offset = auth_offset + sizeof(struct ip_esp_hdr); + /* Encap will start after IV */ + encap_offset = iv_offset + GCM_RFC4106_IV_SIZE; + + /* CPT Instruction word-1 */ + res = (struct cpt_res_s *)(sq->cpt_resp->base + (64 * sq->head)); + res->compcode = 0; + inst.res_addr = sq->cpt_resp->iova + (64 * sq->head); + + /* CPT Instruction word-2 */ + inst.rvu_pf_func = pf->pcifunc; + + /* CPT Instruction word-3: + * Set QORD to force CPT_RES_S write completion + */ + inst.qord = 1; + + /* CPT Instruction word-4 */ + /* inst.dlen should not include ICV length */ + inst.dlen = dlen + ETH_HLEN - (x->aead->alg_icv_len / 8); + inst.opcode_major = CN10K_IPSEC_MAJOR_OP_OUTB_IPSEC; + inst.param1 = cn10k_ipsec_get_param1(iv_offset); + + inst.param2 = encap_offset << + CN10K_IPSEC_INST_PARAM2_ENC_DATA_OFFSET_SHIFT; + inst.param2 |= (u16)auth_offset << + CN10K_IPSEC_INST_PARAM2_AUTH_DATA_OFFSET_SHIFT; + + /* CPT Instruction word-5 */ + gthr_size = num_segs / MAX_SEGS_PER_SG; + gthr_size = (num_segs % MAX_SEGS_PER_SG) ? gthr_size + 1 : gthr_size; + + gthr_size &= 0xF; + dptr_iova = (sq->sqe_ring->iova + (sq->head * (sq->sqe_size * 2))); + inst.dptr = dptr_iova | ((u64)gthr_size << 60); + + /* CPT Instruction word-6 */ + inst.rptr = inst.dptr; + + /* CPT Instruction word-7 */ + inst.cptr = sa_info->iova; + inst.ctx_val = 1; + inst.egrp = CN10K_DEF_CPT_IPSEC_EGRP; + + /* CPT Instruction word-0 */ + inst.nixtxl = (size / 16) - 1; + inst.dat_offset = ETH_HLEN; + inst.nixtx_offset = sq->sqe_size; + + netdev_tx_sent_queue(txq, skb->len); + + /* Finally Flush the CPT instruction */ + sq->head++; + sq->head &= (sq->sqe_cnt - 1); + cn10k_cpt_inst_flush(pf, &inst, sizeof(struct cpt_inst_s)); + return true; +drop: + dev_kfree_skb_any(skb); + return false; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h index 5ac4de4ae974..9965df0faa3e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.h @@ -9,6 +9,8 @@ #include +DECLARE_STATIC_KEY_FALSE(cn10k_ipsec_sa_enabled); + /* CPT instruction size in bytes */ #define CN10K_CPT_INST_SIZE 64 @@ -53,6 +55,7 @@ /* IPSEC Instruction opcodes */ #define CN10K_IPSEC_MAJOR_OP_WRITE_SA 0x01UL #define CN10K_IPSEC_MINOR_OP_WRITE_SA 0x09UL +#define CN10K_IPSEC_MAJOR_OP_OUTB_IPSEC 0x2AUL enum cn10k_cpt_comp_e { CN10K_CPT_COMP_E_NOTDONE = 0x00, @@ -143,6 +146,16 @@ struct cn10k_tx_sa_s { u64 hw_ctx[6]; /* W31 - W36 */ }; +/* CPT instruction parameter-1 */ +#define CN10K_IPSEC_INST_PARAM1_DIS_L4_CSUM 0x1 +#define CN10K_IPSEC_INST_PARAM1_DIS_L3_CSUM 0x2 +#define CN10K_IPSEC_INST_PARAM1_CRYPTO_MODE 0x20 +#define CN10K_IPSEC_INST_PARAM1_IV_OFFSET_SHIFT 8 + +/* CPT instruction parameter-2 */ +#define CN10K_IPSEC_INST_PARAM2_ENC_DATA_OFFSET_SHIFT 0 +#define CN10K_IPSEC_INST_PARAM2_AUTH_DATA_OFFSET_SHIFT 8 + /* CPT Instruction Structure */ struct cpt_inst_s { u64 nixtxl : 3; /* W0 */ @@ -182,6 +195,15 @@ struct cpt_res_s { u64 esn; /* W1 */ }; +/* CPT SG structure */ +struct cpt_sg_s { + u64 seg1_size : 16; + u64 seg2_size : 16; + u64 seg3_size : 16; + u64 segs : 2; + u64 rsvd_63_50 : 14; +}; + /* CPT LF_INPROG Register */ #define CPT_LF_INPROG_INFLIGHT GENMASK_ULL(8, 0) #define CPT_LF_INPROG_GRB_CNT GENMASK_ULL(39, 32) @@ -204,6 +226,11 @@ struct cpt_res_s { int cn10k_ipsec_init(struct net_device *netdev); void cn10k_ipsec_clean(struct otx2_nic *pf); int cn10k_ipsec_ethtool_init(struct net_device *netdev, bool enable); +bool otx2_sqe_add_sg_ipsec(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, + struct sk_buff *skb, int num_segs, int *offset); +bool cn10k_ipsec_transmit(struct otx2_nic *pf, struct netdev_queue *txq, + struct otx2_snd_queue *sq, struct sk_buff *skb, + int num_segs, int size); #else static inline __maybe_unused int cn10k_ipsec_init(struct net_device *netdev) { @@ -219,5 +246,20 @@ int cn10k_ipsec_ethtool_init(struct net_device *netdev, bool enable) { return 0; } + +static inline bool __maybe_unused +otx2_sqe_add_sg_ipsec(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, + struct sk_buff *skb, int num_segs, int *offset) +{ + return true; +} + +static inline bool __maybe_unused +cn10k_ipsec_transmit(struct otx2_nic *pf, struct netdev_queue *txq, + struct otx2_snd_queue *sq, struct sk_buff *skb, + int num_segs, int size) +{ + return true; +} #endif #endif // CN10K_IPSEC_H diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 4c8774899eaf..bf56888e7fe7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -970,6 +970,29 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) if (err) return err; + /* Allocate memory for NIX SQE (which includes NIX SG) and CPT SG. + * SG of NIX and CPT are same in size. Allocate memory for CPT SG + * same as NIX SQE for base address alignment. + * Layout of a NIX SQE and CPT SG entry: + * ----------------------------- + * | CPT Scatter Gather | + * | (SQE SIZE) | + * | | + * ----------------------------- + * | NIX SQE | + * | (SQE SIZE) | + * | | + * ----------------------------- + */ + err = qmem_alloc(pfvf->dev, &sq->sqe_ring, qset->sqe_cnt, + sq->sqe_size * 2); + if (err) + return err; + + err = qmem_alloc(pfvf->dev, &sq->cpt_resp, qset->sqe_cnt, 64); + if (err) + return err; + if (qidx < pfvf->hw.tx_queues) { err = qmem_alloc(pfvf->dev, &sq->tso_hdrs, qset->sqe_cnt, TSO_HEADER_SIZE); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 5e2da67d58bb..44d737a0dd09 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -57,6 +57,9 @@ #define NIX_PF_PFC_PRIO_MAX 8 #endif +/* Number of segments per SG structure */ +#define MAX_SEGS_PER_SG 3 + enum arua_mapped_qtypes { AURA_NIX_RQ, AURA_NIX_SQ, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 2f652035d854..e1dde93e8af8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1485,6 +1485,8 @@ static void otx2_free_sq_res(struct otx2_nic *pf) if (!sq->sqe) continue; qmem_free(pf->dev, sq->sqe); + qmem_free(pf->dev, sq->sqe_ring); + qmem_free(pf->dev, sq->cpt_resp); qmem_free(pf->dev, sq->tso_hdrs); kfree(sq->sg); kfree(sq->sqb_ptrs); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index a49041e55c33..4e0133d1d892 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "otx2_reg.h" #include "otx2_common.h" @@ -32,6 +33,17 @@ static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, bool *need_xdp_flush); +static void otx2_sq_set_sqe_base(struct otx2_snd_queue *sq, + struct sk_buff *skb) +{ + if (static_branch_unlikely(&cn10k_ipsec_sa_enabled) && + (xfrm_offload(skb))) + sq->sqe_base = sq->sqe_ring->base + sq->sqe_size + + (sq->head * (sq->sqe_size * 2)); + else + sq->sqe_base = sq->sqe->base; +} + static int otx2_nix_cq_op_status(struct otx2_nic *pfvf, struct otx2_cq_queue *cq) { @@ -593,7 +605,6 @@ void otx2_sqe_flush(void *dev, struct otx2_snd_queue *sq, sq->head &= (sq->sqe_cnt - 1); } -#define MAX_SEGS_PER_SG 3 /* Add SQE scatter/gather subdescriptor structure */ static bool otx2_sqe_add_sg(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, struct sk_buff *skb, int num_segs, int *offset) @@ -1129,6 +1140,7 @@ bool otx2_sq_append_skb(void *dev, struct netdev_queue *txq, int offset, num_segs, free_desc; struct nix_sqe_hdr_s *sqe_hdr; struct otx2_nic *pfvf = dev; + bool ret; /* Check if there is enough room between producer * and consumer index. @@ -1145,6 +1157,7 @@ bool otx2_sq_append_skb(void *dev, struct netdev_queue *txq, /* If SKB doesn't fit in a single SQE, linearize it. * TODO: Consider adding JUMP descriptor instead. */ + if (unlikely(num_segs > OTX2_MAX_FRAGS_IN_SQE)) { if (__skb_linearize(skb)) { dev_kfree_skb_any(skb); @@ -1164,6 +1177,9 @@ bool otx2_sq_append_skb(void *dev, struct netdev_queue *txq, return true; } + /* Set sqe base address */ + otx2_sq_set_sqe_base(sq, skb); + /* Set SQE's SEND_HDR. * Do not clear the first 64bit as it contains constant info. */ @@ -1176,7 +1192,13 @@ bool otx2_sq_append_skb(void *dev, struct netdev_queue *txq, otx2_sqe_add_ext(pfvf, sq, skb, &offset); /* Add SG subdesc with data frags */ - if (!otx2_sqe_add_sg(pfvf, sq, skb, num_segs, &offset)) { + if (static_branch_unlikely(&cn10k_ipsec_sa_enabled) && + (xfrm_offload(skb))) + ret = otx2_sqe_add_sg_ipsec(pfvf, sq, skb, num_segs, &offset); + else + ret = otx2_sqe_add_sg(pfvf, sq, skb, num_segs, &offset); + + if (!ret) { otx2_dma_unmap_skb_frags(pfvf, &sq->sg[sq->head]); return false; } @@ -1185,11 +1207,15 @@ bool otx2_sq_append_skb(void *dev, struct netdev_queue *txq, sqe_hdr->sizem1 = (offset / 16) - 1; + if (static_branch_unlikely(&cn10k_ipsec_sa_enabled) && + (xfrm_offload(skb))) + return cn10k_ipsec_transmit(pfvf, txq, sq, skb, num_segs, + offset); + netdev_tx_sent_queue(txq, skb->len); /* Flush SQE to HW */ pfvf->hw_ops->sqe_flush(pfvf, sq, offset, qidx); - return true; } EXPORT_SYMBOL(otx2_sq_append_skb); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h index e1db5f961877..d23810963fdb 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h @@ -101,6 +101,9 @@ struct otx2_snd_queue { struct queue_stats stats; u16 sqb_count; u64 *sqb_ptrs; + /* SQE ring and CPT response queue for Inline IPSEC */ + struct qmem *sqe_ring; + struct qmem *cpt_resp; } ____cacheline_aligned_in_smp; enum cq_type { From 32188be805d052a91b999a723fd93698d83a7fa5 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 4 Dec 2024 11:26:58 +0530 Subject: [PATCH 0220/1386] cn10k-ipsec: Allow ipsec crypto offload for skb with SA Allow to use hardware offload for outbound ipsec crypto mode if security association (SA) is set for a given skb. Signed-off-by: Bharat Bhushan Signed-off-by: David S. Miller --- .../ethernet/marvell/octeontx2/nic/cn10k_ipsec.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c index 9a9b06f4c2cc..e9bf4632695e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c @@ -746,9 +746,24 @@ static void cn10k_ipsec_del_state(struct xfrm_state *x) queue_work(pf->ipsec.sa_workq, &pf->ipsec.sa_work); } +static bool cn10k_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) +{ + if (x->props.family == AF_INET) { + /* Offload with IPv4 options is not supported yet */ + if (ip_hdr(skb)->ihl > 5) + return false; + } else { + /* Offload with IPv6 extension headers is not support yet */ + if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr)) + return false; + } + return true; +} + static const struct xfrmdev_ops cn10k_ipsec_xfrmdev_ops = { .xdo_dev_state_add = cn10k_ipsec_add_state, .xdo_dev_state_delete = cn10k_ipsec_del_state, + .xdo_dev_offload_ok = cn10k_ipsec_offload_ok, }; static void cn10k_ipsec_sa_wq_handler(struct work_struct *work) From b3ae3dc3a30f3de78c0c3675ea980639b9ba212c Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 4 Dec 2024 11:26:59 +0530 Subject: [PATCH 0221/1386] cn10k-ipsec: Enable outbound ipsec crypto offload Hardware is initialized and netdev transmit flow is hooked up for outbound ipsec crypto offload, so finally enable ipsec offload. Signed-off-by: Bharat Bhushan Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c index e9bf4632695e..c333e04daad3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c @@ -821,10 +821,10 @@ int cn10k_ipsec_init(struct net_device *netdev) return -ENOMEM; } - /* Set xfrm device ops - * NETIF_F_HW_ESP is not set as ipsec setup is not yet complete. - */ + /* Set xfrm device ops */ netdev->xfrmdev_ops = &cn10k_ipsec_xfrmdev_ops; + netdev->hw_features |= NETIF_F_HW_ESP; + netdev->hw_enc_features |= NETIF_F_HW_ESP; cn10k_cpt_device_set_unavailable(pf); return 0; From 52e8726d6782a14c7f9e0fea5a5bc8e6a1992fd4 Mon Sep 17 00:00:00 2001 From: Norbert van Bolhuis Date: Fri, 8 Nov 2024 13:52:30 +0100 Subject: [PATCH 0222/1386] wifi: brcmfmac: fix scatter-gather handling by detecting end of sg list The scatter-gather handling uses a pre-allocated list (with nents entries). If the driver runs out of sg entries it will result in an oops. Let's detect this instead and make the SDIO block transfer fail. Signed-off-by: Norbert van Bolhuis Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241108125609.107016-1-nvbolhuis@gmail.com --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index 42d991d9f8cb..60eb95fc19a5 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -455,6 +455,11 @@ static int brcmf_sdiod_sglist_rw(struct brcmf_sdio_dev *sdiodev, if (sg_data_sz > max_req_sz - req_sz) sg_data_sz = max_req_sz - req_sz; + if (!sgl) { + /* out of (pre-allocated) scatterlist entries */ + ret = -ENOMEM; + goto exit; + } sg_set_buf(sgl, pkt_data, sg_data_sz); sg_cnt++; From 01e767d6f7832f1ef171816953547b466bba9937 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 11 Nov 2024 14:40:35 +0100 Subject: [PATCH 0223/1386] wifi: wlcore: testmode: Constify strutc nla_policy 'struct nla_policy' is not modified in this driver. Constifying this structure moves some data to a read-only section, so increase overall security, especially when the structure holds some function pointers. On a x86_64, with allmodconfig: Before: ====== text data bss dec hex filename 5062 528 0 5590 15d6 drivers/net/wireless/ti/wlcore/testmode.o After: ===== text data bss dec hex filename 5178 404 0 5582 15ce drivers/net/wireless/ti/wlcore/testmode.o Signed-off-by: Christophe JAILLET Reviewed-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://patch.msgid.link/78810e3ebb74ddbd3a4538f182bf1143b89baba7.1731332414.git.christophe.jaillet@wanadoo.fr --- drivers/net/wireless/ti/wlcore/testmode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ti/wlcore/testmode.c b/drivers/net/wireless/ti/wlcore/testmode.c index 3f338b8096c7..fc8ea58bc165 100644 --- a/drivers/net/wireless/ti/wlcore/testmode.c +++ b/drivers/net/wireless/ti/wlcore/testmode.c @@ -45,7 +45,7 @@ enum wl1271_tm_attrs { }; #define WL1271_TM_ATTR_MAX (__WL1271_TM_ATTR_AFTER_LAST - 1) -static struct nla_policy wl1271_tm_policy[WL1271_TM_ATTR_MAX + 1] = { +static const struct nla_policy wl1271_tm_policy[WL1271_TM_ATTR_MAX + 1] = { [WL1271_TM_ATTR_CMD_ID] = { .type = NLA_U32 }, [WL1271_TM_ATTR_ANSWER] = { .type = NLA_U8 }, [WL1271_TM_ATTR_DATA] = { .type = NLA_BINARY, From aba23b0a6a0df84b06ed0323ce127bf7257e4025 Mon Sep 17 00:00:00 2001 From: Renjaya Raga Zenta Date: Thu, 21 Nov 2024 14:09:27 +0700 Subject: [PATCH 0224/1386] wifi: brcmfmac: fix brcmf_vif_clear_mgmt_ies when stopping AP This removes the following error log when stopping AP: ieee80211 phy0: brcmf_vif_set_mgmt_ie: vndr ie set error : -52 It happened if: 1) previously wlan interface was in station mode (wpa_supplicant) and connected to a hotspot 2) then started AP mode (hostapd) 3) and then stopped AP mode. The error happened when it tried to clear BRCMF_VNDR_IE_PRBREQ_FLAG. This flag is not set in `brcmf_config_ap_mgmt_ie`, but BRCMF_VNDR_IE_ASSOCRSP_FLAG is set instead. Signed-off-by: Renjaya Raga Zenta Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241121-brcmfmac-v1-1-02fc3fb427c2@gmail.com --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 689e779fe00f..902ac3108782 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -4999,12 +4999,16 @@ exit: s32 brcmf_vif_clear_mgmt_ies(struct brcmf_cfg80211_vif *vif) { static const s32 pktflags[] = { - BRCMF_VNDR_IE_PRBREQ_FLAG, BRCMF_VNDR_IE_PRBRSP_FLAG, BRCMF_VNDR_IE_BEACON_FLAG }; int i; + if (vif->wdev.iftype == NL80211_IFTYPE_AP) + brcmf_vif_set_mgmt_ie(vif, BRCMF_VNDR_IE_ASSOCRSP_FLAG, NULL, 0); + else + brcmf_vif_set_mgmt_ie(vif, BRCMF_VNDR_IE_PRBREQ_FLAG, NULL, 0); + for (i = 0; i < ARRAY_SIZE(pktflags); i++) brcmf_vif_set_mgmt_ie(vif, pktflags[i], NULL, 0); From f143cece43dd05fa651fa14d97726b67b92e9d03 Mon Sep 17 00:00:00 2001 From: Pin-yen Lin Date: Wed, 27 Nov 2024 18:55:43 +0800 Subject: [PATCH 0225/1386] wifi: mwifiex: decrease timeout waiting for host sleep from 10s to 5s In commit 52250cbee7f6 ("mwifiex: use timeout variant for wait_event_interruptible") it was noted that sometimes we seemed to miss the signal that our host sleep settings took effect. A 10 second timeout was added to the code to make sure we didn't hang forever waiting. It appears that this problem still exists and we hit the timeout sometimes for Chromebooks in the field. Recently on ChromeOS we've started setting the DPM watchdog to trip if full system suspend takes over 10 seconds. Given the timeout in the original patch, obviously we're hitting the DPM watchdog before mwifiex gets a chance to timeout. While we could increase the DPM watchdog in ChromeOS to avoid this problem, it's probably better to simply decrease the timeout. Any time we're waiting several seconds for the firmware to respond it's likely that the firmware won't ever respond. With that in mind, decrease the timeout in mwifiex from 10 seconds to 5 seconds. Suggested-by: Doug Anderson Signed-off-by: Pin-yen Lin Reviewed-by: Douglas Anderson Acked-by: Brian Norris Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241127105709.4014302-1-treapking@chromium.org --- drivers/net/wireless/marvell/mwifiex/sta_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c index e06a0622973e..f79589cafe57 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c @@ -545,7 +545,7 @@ int mwifiex_enable_hs(struct mwifiex_adapter *adapter) if (wait_event_interruptible_timeout(adapter->hs_activate_wait_q, adapter->hs_activate_wait_q_woken, - (10 * HZ)) <= 0) { + (5 * HZ)) <= 0) { mwifiex_dbg(adapter, ERROR, "hs_activate_wait_q terminated\n"); return false; From d1fd972914239996dbd15c5142d7f6e09d95a002 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:29 +0000 Subject: [PATCH 0226/1386] ktime: Add us_to_ktime() Add a us_to_ktime() helper to go with ms_to_ktime() and ns_to_ktime(). Signed-off-by: David Howells cc: Thomas Gleixner cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-2-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/ktime.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 3a4e723eae0f..383ed9985802 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -222,6 +222,11 @@ static inline ktime_t ns_to_ktime(u64 ns) return ns; } +static inline ktime_t us_to_ktime(u64 us) +{ + return us * NSEC_PER_USEC; +} + static inline ktime_t ms_to_ktime(u64 ms) { return ms * NSEC_PER_MSEC; From 0e56ebde245e4799ce74d38419426f2a80d39950 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:30 +0000 Subject: [PATCH 0227/1386] rxrpc: Fix handling of received connection abort Fix the handling of a connection abort that we've received. Though the abort is at the connection level, it needs propagating to the calls on that connection. Whilst the propagation bit is performed, the calls aren't then woken up to go and process their termination, and as no further input is forthcoming, they just hang. Also add some tracing for the logging of connection aborts. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-3-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 25 +++++++++++++++++++++++++ net/rxrpc/conn_event.c | 12 ++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d03e0bd8c028..27c23873c881 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -117,6 +117,7 @@ #define rxrpc_call_poke_traces \ EM(rxrpc_call_poke_abort, "Abort") \ EM(rxrpc_call_poke_complete, "Compl") \ + EM(rxrpc_call_poke_conn_abort, "Conn-abort") \ EM(rxrpc_call_poke_error, "Error") \ EM(rxrpc_call_poke_idle, "Idle") \ EM(rxrpc_call_poke_set_timeout, "Set-timo") \ @@ -282,6 +283,7 @@ EM(rxrpc_call_see_activate_client, "SEE act-clnt") \ EM(rxrpc_call_see_connect_failed, "SEE con-fail") \ EM(rxrpc_call_see_connected, "SEE connect ") \ + EM(rxrpc_call_see_conn_abort, "SEE conn-abt") \ EM(rxrpc_call_see_disconnected, "SEE disconn ") \ EM(rxrpc_call_see_distribute_error, "SEE dist-err") \ EM(rxrpc_call_see_input, "SEE input ") \ @@ -981,6 +983,29 @@ TRACE_EVENT(rxrpc_rx_abort, __entry->abort_code) ); +TRACE_EVENT(rxrpc_rx_conn_abort, + TP_PROTO(const struct rxrpc_connection *conn, const struct sk_buff *skb), + + TP_ARGS(conn, skb), + + TP_STRUCT__entry( + __field(unsigned int, conn) + __field(rxrpc_serial_t, serial) + __field(u32, abort_code) + ), + + TP_fast_assign( + __entry->conn = conn->debug_id; + __entry->serial = rxrpc_skb(skb)->hdr.serial; + __entry->abort_code = skb->priority; + ), + + TP_printk("C=%08x ABORT %08x ac=%d", + __entry->conn, + __entry->serial, + __entry->abort_code) + ); + TRACE_EVENT(rxrpc_rx_challenge, TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial, u32 version, u32 nonce, u32 min_level), diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 598b4ee389fc..2a1396cd892f 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -63,11 +63,12 @@ int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb, /* * Mark a connection as being remotely aborted. */ -static bool rxrpc_input_conn_abort(struct rxrpc_connection *conn, +static void rxrpc_input_conn_abort(struct rxrpc_connection *conn, struct sk_buff *skb) { - return rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED, - RXRPC_CALL_REMOTELY_ABORTED); + trace_rxrpc_rx_conn_abort(conn, skb); + rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED, + RXRPC_CALL_REMOTELY_ABORTED); } /* @@ -202,11 +203,14 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn) for (i = 0; i < RXRPC_MAXCALLS; i++) { call = conn->channels[i].call; - if (call) + if (call) { + rxrpc_see_call(call, rxrpc_call_see_conn_abort); rxrpc_set_call_completion(call, conn->completion, conn->abort_code, conn->error); + rxrpc_poke_call(call, rxrpc_call_poke_conn_abort); + } } _leave(""); From 29e03ec757292e55fa0f7efa051c84ddc4f3e668 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:31 +0000 Subject: [PATCH 0228/1386] rxrpc: Use umin() and umax() rather than min_t()/max_t() where possible Use umin() and umax() rather than min_t()/max_t() where the type specified is an unsigned type. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-4-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/call_event.c | 5 ++--- net/rxrpc/call_object.c | 4 ++-- net/rxrpc/conn_client.c | 2 +- net/rxrpc/input.c | 13 +++++-------- net/rxrpc/insecure.c | 2 +- net/rxrpc/io_thread.c | 2 +- net/rxrpc/output.c | 2 +- net/rxrpc/rtt.c | 6 +++--- net/rxrpc/rxkad.c | 6 +++--- net/rxrpc/rxperf.c | 2 +- net/rxrpc/sendmsg.c | 2 +- 11 files changed, 21 insertions(+), 25 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 7bbb68504766..c4754cc9b8d4 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -233,8 +233,7 @@ static void rxrpc_close_tx_phase(struct rxrpc_call *call) static bool rxrpc_tx_window_has_space(struct rxrpc_call *call) { - unsigned int winsize = min_t(unsigned int, call->tx_winsize, - call->cong_cwnd + call->cong_extra); + unsigned int winsize = umin(call->tx_winsize, call->cong_cwnd + call->cong_extra); rxrpc_seq_t window = call->acks_hard_ack, wtop = window + winsize; rxrpc_seq_t tx_top = call->tx_top; int space; @@ -467,7 +466,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) } else { unsigned long nowj = jiffies, delayj, nextj; - delayj = max(nsecs_to_jiffies(delay), 1); + delayj = umax(nsecs_to_jiffies(delay), 1); nextj = nowj + delayj; if (time_before(nextj, call->timer.expires) || !timer_pending(&call->timer)) { diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f9e983a12c14..0df647d1d3a2 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -220,9 +220,9 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, __set_bit(RXRPC_CALL_EXCLUSIVE, &call->flags); if (p->timeouts.normal) - call->next_rx_timo = min(p->timeouts.normal, 1); + call->next_rx_timo = umin(p->timeouts.normal, 1); if (p->timeouts.idle) - call->next_req_timo = min(p->timeouts.idle, 1); + call->next_req_timo = umin(p->timeouts.idle, 1); if (p->timeouts.hard) call->hard_timo = p->timeouts.hard; diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index bb11e8289d6d..86fb18bcd188 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -231,7 +231,7 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) distance = id - id_cursor; if (distance < 0) distance = -distance; - limit = max_t(unsigned long, atomic_read(&rxnet->nr_conns) * 4, 1024); + limit = umax(atomic_read(&rxnet->nr_conns) * 4, 1024); if (distance > limit) goto mark_dont_reuse; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 16d49a861dbb..49e35be7dc13 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -44,8 +44,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) { summary->retrans_timeo = true; - call->cong_ssthresh = max_t(unsigned int, - summary->flight_size / 2, 2); + call->cong_ssthresh = umax(summary->flight_size / 2, 2); cwnd = 1; if (cwnd >= call->cong_ssthresh && call->cong_mode == RXRPC_CALL_SLOW_START) { @@ -113,8 +112,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, change = rxrpc_cong_begin_retransmission; call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT; - call->cong_ssthresh = max_t(unsigned int, - summary->flight_size / 2, 2); + call->cong_ssthresh = umax(summary->flight_size / 2, 2); cwnd = call->cong_ssthresh + 3; call->cong_extra = 0; call->cong_dup_acks = 0; @@ -206,9 +204,8 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) rxrpc_inc_stat(call->rxnet, stat_tx_data_cwnd_reset); call->tx_last_sent = now; call->cong_mode = RXRPC_CALL_SLOW_START; - call->cong_ssthresh = max_t(unsigned int, call->cong_ssthresh, - call->cong_cwnd * 3 / 4); - call->cong_cwnd = max_t(unsigned int, call->cong_cwnd / 2, RXRPC_MIN_CWND); + call->cong_ssthresh = umax(call->cong_ssthresh, call->cong_cwnd * 3 / 4); + call->cong_cwnd = umax(call->cong_cwnd / 2, RXRPC_MIN_CWND); } /* @@ -709,7 +706,7 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb call->tx_winsize = rwind; } - mtu = min(ntohl(trailer->maxMTU), ntohl(trailer->ifMTU)); + mtu = umin(ntohl(trailer->maxMTU), ntohl(trailer->ifMTU)); peer = call->peer; if (mtu < peer->maxdata) { diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index 6716c021a532..751eb621021d 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -19,7 +19,7 @@ static int none_init_connection_security(struct rxrpc_connection *conn, */ static struct rxrpc_txbuf *none_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp) { - return rxrpc_alloc_data_txbuf(call, min_t(size_t, remain, RXRPC_JUMBO_DATALEN), 1, gfp); + return rxrpc_alloc_data_txbuf(call, umin(remain, RXRPC_JUMBO_DATALEN), 1, gfp); } static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 07c74c77d802..7af5adf53b25 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -558,7 +558,7 @@ int rxrpc_io_thread(void *data) } timeout = nsecs_to_jiffies(delay_ns); - timeout = max(timeout, 1UL); + timeout = umax(timeout, 1); schedule_timeout(timeout); __set_current_state(TASK_RUNNING); continue; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 5ea9601efd05..85112ea31a39 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -118,7 +118,7 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call, txb->kvec[1].iov_len = ack->nAcks; wrap = RXRPC_SACK_SIZE - sack; - to = min_t(unsigned int, ack->nAcks, RXRPC_SACK_SIZE); + to = umin(ack->nAcks, RXRPC_SACK_SIZE); if (sack + ack->nAcks <= RXRPC_SACK_SIZE) { memcpy(sackp, call->ackr_sack_table + sack, ack->nAcks); diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c index cdab7b7d08a0..6dc51486b5a6 100644 --- a/net/rxrpc/rtt.c +++ b/net/rxrpc/rtt.c @@ -27,7 +27,7 @@ static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) static u32 rxrpc_bound_rto(u32 rto) { - return min(rto, RXRPC_RTO_MAX); + return umin(rto, RXRPC_RTO_MAX); } /* @@ -91,11 +91,11 @@ static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) /* no previous measure. */ srtt = m << 3; /* take the measured time to be rtt */ peer->mdev_us = m << 1; /* make sure rto = 3*rtt */ - peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer)); + peer->rttvar_us = umax(peer->mdev_us, rxrpc_rto_min_us(peer)); peer->mdev_max_us = peer->rttvar_us; } - peer->srtt_us = max(1U, srtt); + peer->srtt_us = umax(srtt, 1); } /* diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 48a1475e6b06..e3194d73dd84 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -150,11 +150,11 @@ static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t rem struct rxrpc_txbuf *txb; size_t shdr, space; - remain = min(remain, 65535 - sizeof(struct rxrpc_wire_header)); + remain = umin(remain, 65535 - sizeof(struct rxrpc_wire_header)); switch (call->conn->security_level) { default: - space = min_t(size_t, remain, RXRPC_JUMBO_DATALEN); + space = umin(remain, RXRPC_JUMBO_DATALEN); return rxrpc_alloc_data_txbuf(call, space, 1, gfp); case RXRPC_SECURITY_AUTH: shdr = sizeof(struct rxkad_level1_hdr); @@ -164,7 +164,7 @@ static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t rem break; } - space = min_t(size_t, round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN), remain + shdr); + space = umin(round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN), remain + shdr); space = round_up(space, RXKAD_ALIGN); txb = rxrpc_alloc_data_txbuf(call, space, RXKAD_ALIGN, gfp); diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c index 085e7892d310..7ef93407be83 100644 --- a/net/rxrpc/rxperf.c +++ b/net/rxrpc/rxperf.c @@ -503,7 +503,7 @@ static int rxperf_process_call(struct rxperf_call *call) reply_len + sizeof(rxperf_magic_cookie)); while (reply_len > 0) { - len = min_t(size_t, reply_len, PAGE_SIZE); + len = umin(reply_len, PAGE_SIZE); bvec_set_page(&bv, ZERO_PAGE(0), len, 0); iov_iter_bvec(&msg.msg_iter, WRITE, &bv, 1, len); msg.msg_flags = MSG_MORE; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 6abb8eec1b2b..b04afb5df241 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -360,7 +360,7 @@ reload: /* append next segment of data to the current buffer */ if (msg_data_left(msg) > 0) { - size_t copy = min_t(size_t, txb->space, msg_data_left(msg)); + size_t copy = umin(txb->space, msg_data_left(msg)); _debug("add %zu", copy); if (!copy_from_iter_full(txb->kvec[0].iov_base + txb->offset, From efa95c32352b2ac7ff09d680144e22c0f25244cb Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:32 +0000 Subject: [PATCH 0229/1386] rxrpc: Clean up Tx header flags generation handling Clean up the generation of the header flags when building packet headers for transmission: (1) Assemble the flags in a local variable rather than in the txb->flags. (2) Do the flags masking and JUMBO-PACKET setting in one bit of code for both the main header and the jumbo headers. (3) Generate the REQUEST-ACK flag afresh each time. There's a possibility we might want to do jumbo retransmission packets in future. (4) Pass the local flags variable to the rxrpc_tx_data tracepoint rather than the combination of the txb flags and the wire header flags (the latter belong only to the first subpacket). Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-5-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 1 - net/rxrpc/ar-internal.h | 2 +- net/rxrpc/output.c | 18 ++++++++++++------ net/rxrpc/proc.c | 3 +-- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 27c23873c881..62064f63d6eb 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -452,7 +452,6 @@ #define rxrpc_req_ack_traces \ EM(rxrpc_reqack_ack_lost, "ACK-LOST ") \ - EM(rxrpc_reqack_already_on, "ALREADY-ON") \ EM(rxrpc_reqack_more_rtt, "MORE-RTT ") \ EM(rxrpc_reqack_no_srv_last, "NO-SRVLAST") \ EM(rxrpc_reqack_old_rtt, "OLD-RTT ") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index d0fd37bdcfe9..fcdfbc1d5aaf 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -110,7 +110,7 @@ struct rxrpc_net { atomic_t stat_tx_acks[256]; atomic_t stat_rx_acks[256]; - atomic_t stat_why_req_ack[8]; + atomic_t stat_why_req_ack[7]; atomic_t stat_io_loop; }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 85112ea31a39..50d5f2a02458 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -330,6 +330,8 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; enum rxrpc_req_ack_trace why; struct rxrpc_connection *conn = call->conn; + bool last; + u8 flags; _enter("%x,{%d}", txb->seq, txb->len); @@ -339,6 +341,10 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t txb->seq == 1) whdr->userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; + txb->flags &= ~RXRPC_REQUEST_ACK; + flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; + last = txb->flags & RXRPC_LAST_PACKET; + /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. * @@ -346,9 +352,7 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t * service call, lest OpenAFS incorrectly send us an ACK with some * soft-ACKs in it and then never follow up with a proper hard ACK. */ - if (txb->flags & RXRPC_REQUEST_ACK) - why = rxrpc_reqack_already_on; - else if ((txb->flags & RXRPC_LAST_PACKET) && rxrpc_sending_to_client(txb)) + if (last && rxrpc_sending_to_client(txb)) why = rxrpc_reqack_no_srv_last; else if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) why = rxrpc_reqack_ack_lost; @@ -367,15 +371,17 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]); trace_rxrpc_req_ack(call->debug_id, txb->seq, why); - if (why != rxrpc_reqack_no_srv_last) + if (why != rxrpc_reqack_no_srv_last) { txb->flags |= RXRPC_REQUEST_ACK; + flags |= RXRPC_REQUEST_ACK; + } dont_set_request_ack: - whdr->flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; + whdr->flags = flags; whdr->serial = htonl(txb->serial); whdr->cksum = txb->cksum; - trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, false); + trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, false); } /* diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 263a2251e3d2..3b7e34dd4385 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -519,9 +519,8 @@ int rxrpc_stats_show(struct seq_file *seq, void *v) atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_DELAY]), atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_IDLE])); seq_printf(seq, - "Why-Req-A: acklost=%u already=%u mrtt=%u ortt=%u\n", + "Why-Req-A: acklost=%u mrtt=%u ortt=%u\n", atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_ack_lost]), - atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_already_on]), atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_more_rtt]), atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_old_rtt])); seq_printf(seq, From cbe0d89095c31afcede96e4ce9cd58c4bed62d63 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:33 +0000 Subject: [PATCH 0230/1386] rxrpc: Don't set the MORE-PACKETS rxrpc wire header flag The MORE-PACKETS rxrpc header flag hasn't actually been looked at by anything since 1988 and not all implementations generate it. Change rxrpc so that it doesn't set MORE-PACKETS at all rather than setting it inconsistently. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-6-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/sendmsg.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index b04afb5df241..546abb463c3f 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -385,9 +385,6 @@ reload: (msg_data_left(msg) == 0 && !more)) { if (msg_data_left(msg) == 0 && !more) txb->flags |= RXRPC_LAST_PACKET; - else if (call->tx_top - call->acks_hard_ack < - call->tx_winsize) - txb->flags |= RXRPC_MORE_PACKETS; ret = call->security->secure_packet(call, txb); if (ret < 0) From ff992adbc470c86d2dcb66f5ed837fbb3c1a561e Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:34 +0000 Subject: [PATCH 0231/1386] rxrpc: Show stats counter for received reason-0 ACKs In /proc/net/rxrpc/stats, show the stats counter for received ACKs that have the reason code set to 0 as some implementations do this. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-7-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 3b7e34dd4385..cdf32f0d8e0e 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -508,7 +508,7 @@ int rxrpc_stats_show(struct seq_file *seq, void *v) atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_DELAY]), atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_IDLE])); seq_printf(seq, - "Ack-Rx : req=%u dup=%u oos=%u exw=%u nos=%u png=%u prs=%u dly=%u idl=%u\n", + "Ack-Rx : req=%u dup=%u oos=%u exw=%u nos=%u png=%u prs=%u dly=%u idl=%u z=%u\n", atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_REQUESTED]), atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_DUPLICATE]), atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_OUT_OF_SEQUENCE]), @@ -517,7 +517,8 @@ int rxrpc_stats_show(struct seq_file *seq, void *v) atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_PING]), atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_PING_RESPONSE]), atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_DELAY]), - atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_IDLE])); + atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_IDLE]), + atomic_read(&rxnet->stat_rx_acks[0])); seq_printf(seq, "Why-Req-A: acklost=%u mrtt=%u ortt=%u\n", atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_ack_lost]), From 8b5823ea437624b53ecf084b6dd582760f110394 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:35 +0000 Subject: [PATCH 0232/1386] rxrpc: Request an ACK on impending Tx stall Set the REQUEST-ACK flag on the DATA packet we're about to send if we're about to stall transmission because the app layer isn't keeping up supplying us with data to transmit. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-8-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 1 + net/rxrpc/ar-internal.h | 2 +- net/rxrpc/output.c | 7 ++++++- net/rxrpc/proc.c | 5 +++-- 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 62064f63d6eb..d86b5f07d292 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -452,6 +452,7 @@ #define rxrpc_req_ack_traces \ EM(rxrpc_reqack_ack_lost, "ACK-LOST ") \ + EM(rxrpc_reqack_app_stall, "APP-STALL ") \ EM(rxrpc_reqack_more_rtt, "MORE-RTT ") \ EM(rxrpc_reqack_no_srv_last, "NO-SRVLAST") \ EM(rxrpc_reqack_old_rtt, "OLD-RTT ") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index fcdfbc1d5aaf..d0fd37bdcfe9 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -110,7 +110,7 @@ struct rxrpc_net { atomic_t stat_tx_acks[256]; atomic_t stat_rx_acks[256]; - atomic_t stat_why_req_ack[7]; + atomic_t stat_why_req_ack[8]; atomic_t stat_io_loop; }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 50d5f2a02458..b93a5d50be3e 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -330,7 +330,7 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; enum rxrpc_req_ack_trace why; struct rxrpc_connection *conn = call->conn; - bool last; + bool last, more; u8 flags; _enter("%x,{%d}", txb->seq, txb->len); @@ -345,6 +345,9 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; last = txb->flags & RXRPC_LAST_PACKET; + more = (!list_is_last(&txb->call_link, &call->tx_buffer) || + !list_empty(&call->tx_sendmsg)); + /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. * @@ -366,6 +369,8 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t why = rxrpc_reqack_more_rtt; else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real())) why = rxrpc_reqack_old_rtt; + else if (!last && !more) + why = rxrpc_reqack_app_stall; else goto dont_set_request_ack; diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index cdf32f0d8e0e..ce4d48bdfbe9 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -520,10 +520,11 @@ int rxrpc_stats_show(struct seq_file *seq, void *v) atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_IDLE]), atomic_read(&rxnet->stat_rx_acks[0])); seq_printf(seq, - "Why-Req-A: acklost=%u mrtt=%u ortt=%u\n", + "Why-Req-A: acklost=%u mrtt=%u ortt=%u stall=%u\n", atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_ack_lost]), atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_more_rtt]), - atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_old_rtt])); + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_old_rtt]), + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_app_stall])); seq_printf(seq, "Why-Req-A: nolast=%u retx=%u slows=%u smtxw=%u\n", atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_no_srv_last]), From 420f8af502877a34dd371a7c8b6b943594487ebb Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:36 +0000 Subject: [PATCH 0233/1386] rxrpc: Use a large kvec[] in rxrpc_local rather than every rxrpc_txbuf Use a single large kvec[] in the rxrpc_local struct rather than one in every rxrpc_txbuf struct to build large packets to save on memory. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-9-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/ar-internal.h | 6 ++++++ net/rxrpc/output.c | 45 ++++++++++++++++++++++++++++++----------- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index d0fd37bdcfe9..ab8e565cb20b 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -320,6 +320,12 @@ struct rxrpc_local { struct list_head new_client_calls; /* Newly created client calls need connection */ spinlock_t client_call_lock; /* Lock for ->new_client_calls */ struct sockaddr_rxrpc srx; /* local address */ + /* Provide a kvec table sufficiently large to manage either a DATA + * packet with a maximum set of jumbo subpackets or a PING ACK padded + * out to 64K with zeropages for PMTUD. + */ + struct kvec kvec[RXRPC_MAX_NR_JUMBO > 3 + 16 ? + RXRPC_MAX_NR_JUMBO : 3 + 16]; }; /* diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index b93a5d50be3e..f8bb5250e849 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -175,9 +175,11 @@ no_slot: /* * Transmit an ACK packet. */ -static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) +static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb, + int nr_kv) { - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; + struct kvec *kv = call->local->kvec; + struct rxrpc_wire_header *whdr = kv[0].iov_base; struct rxrpc_connection *conn; struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); struct msghdr msg; @@ -206,8 +208,9 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t rxrpc_inc_stat(call->rxnet, stat_tx_ack_send); - iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, txb->len); + iov_iter_kvec(&msg.msg_iter, WRITE, kv, nr_kv, txb->len); rxrpc_local_dont_fragment(conn->local, false); + ret = do_udp_sendmsg(conn->local->socket, &msg, txb->len); call->peer->last_tx_at = ktime_get_seconds(); if (ret < 0) { @@ -233,6 +236,8 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why) { struct rxrpc_txbuf *txb; + struct kvec *kv = call->local->kvec; + int nr_kv; if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return; @@ -248,12 +253,19 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, txb->ack_why = why; rxrpc_fill_out_ack(call, txb, ack_reason, serial); + + nr_kv = txb->nr_kvec; + kv[0] = txb->kvec[0]; + kv[1] = txb->kvec[1]; + kv[2] = txb->kvec[2]; + // TODO: Extend a path MTU probe ACK + call->ackr_nr_unacked = 0; atomic_set(&call->ackr_nr_consumed, 0); clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags); trace_rxrpc_send_ack(call, why, ack_reason, serial); - rxrpc_send_ack_packet(call, txb); + rxrpc_send_ack_packet(call, txb, nr_kv); rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx); } @@ -324,12 +336,15 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) /* * Prepare a (sub)packet for transmission. */ -static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_txbuf *txb, - rxrpc_serial_t serial) +static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_txbuf *txb, + rxrpc_serial_t serial, + int subpkt) { struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; enum rxrpc_req_ack_trace why; struct rxrpc_connection *conn = call->conn; + struct kvec *kv = &call->local->kvec[subpkt]; + size_t len = txb->len; bool last, more; u8 flags; @@ -385,8 +400,13 @@ dont_set_request_ack: whdr->flags = flags; whdr->serial = htonl(txb->serial); whdr->cksum = txb->cksum; + whdr->serviceId = htons(conn->service_id); + kv->iov_base = whdr; + // TODO: Convert into a jumbo header for tail subpackets trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, false); + kv->iov_len = len; + return len; } /* @@ -395,13 +415,15 @@ dont_set_request_ack: static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { rxrpc_serial_t serial; + size_t len = 0; /* Each transmission of a Tx packet needs a new serial number */ serial = rxrpc_get_next_serial(call->conn); - rxrpc_prepare_data_subpacket(call, txb, serial); + len += rxrpc_prepare_data_subpacket(call, txb, serial, 0); + // TODO: Loop around adding tail subpackets - return txb->len; + return len; } /* @@ -442,7 +464,6 @@ static void rxrpc_tstamp_data_packets(struct rxrpc_call *call, struct rxrpc_txbu */ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; struct rxrpc_connection *conn = call->conn; enum rxrpc_tx_point frag; struct msghdr msg; @@ -463,7 +484,7 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t } } - iov_iter_kvec(&msg.msg_iter, WRITE, txb->kvec, txb->nr_kvec, len); + iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, 1, len); msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; @@ -480,7 +501,7 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t /* send the packet with the don't fragment bit set if we currently * think it's small enough */ - if (txb->len >= call->peer->maxdata) { + if (len >= sizeof(struct rxrpc_wire_header) + call->peer->maxdata) { rxrpc_local_dont_fragment(conn->local, false); frag = rxrpc_tx_point_call_data_frag; } else { @@ -503,7 +524,7 @@ retry: rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag); } else { - trace_rxrpc_tx_packet(call->debug_id, whdr, frag); + trace_rxrpc_tx_packet(call->debug_id, call->local->kvec[0].iov_base, frag); } rxrpc_tx_backoff(call, ret); From eeaedc5449d9fccf2b56e844a018df9d3720d59e Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:37 +0000 Subject: [PATCH 0234/1386] rxrpc: Implement path-MTU probing using padded PING ACKs (RFC8899) Implement path-MTU probing (along the lines of RFC8899) by padding some of the PING ACKs we send. PING ACKs get their own individual responses quite apart from the acking of data (though, as ACKs, they fulfil that role also). The probing concentrates on packet sizes that correspond how many subpackets can be stuffed inside a jumbo packet as jumbo DATA packets are just aggregations of individual DATA packets and can be split easily for retransmission purposes. If we want to perform probing, we advertise this by setting the maximum number of jumbo subpackets to 0 in the ack trailer when we send an ACK and see if the peer is also advertising the service. This is interpreted by non-supporting Rx stacks as an indication that jumbo packets aren't supported. The MTU sizes advertised in the ACK trailer AF_RXRPC transmits are pegged at a maximum of 1444 unless pmtud is supported by both sides. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-10-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 124 +++++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 25 +++++-- net/rxrpc/call_event.c | 5 ++ net/rxrpc/conn_event.c | 17 +++-- net/rxrpc/conn_object.c | 6 ++ net/rxrpc/input.c | 26 +++++--- net/rxrpc/io_thread.c | 6 ++ net/rxrpc/misc.c | 4 +- net/rxrpc/output.c | 67 +++++++++++++++---- net/rxrpc/peer_event.c | 104 +++++++++++++++++++++++++++-- net/rxrpc/peer_object.c | 24 +++++-- net/rxrpc/proc.c | 9 +-- net/rxrpc/protocol.h | 13 ++-- net/rxrpc/sysctl.c | 6 +- net/rxrpc/txbuf.c | 3 +- 15 files changed, 382 insertions(+), 57 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d86b5f07d292..9dcadad88e76 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -364,6 +364,7 @@ EM(rxrpc_propose_ack_ping_for_lost_ack, "LostAck") \ EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \ EM(rxrpc_propose_ack_ping_for_0_retrans, "0-Retrn") \ + EM(rxrpc_propose_ack_ping_for_mtu_probe, "MTUProb") \ EM(rxrpc_propose_ack_ping_for_old_rtt, "OldRtt ") \ EM(rxrpc_propose_ack_ping_for_params, "Params ") \ EM(rxrpc_propose_ack_ping_for_rtt, "Rtt ") \ @@ -478,6 +479,11 @@ EM(rxrpc_txbuf_see_send_more, "SEE SEND+ ") \ E_(rxrpc_txbuf_see_unacked, "SEE UNACKED") +#define rxrpc_pmtud_reduce_traces \ + EM(rxrpc_pmtud_reduce_ack, "Ack ") \ + EM(rxrpc_pmtud_reduce_icmp, "Icmp ") \ + E_(rxrpc_pmtud_reduce_route, "Route") + /* * Generate enums for tracing information. */ @@ -498,6 +504,7 @@ enum rxrpc_congest_change { rxrpc_congest_changes } __mode(byte); enum rxrpc_conn_trace { rxrpc_conn_traces } __mode(byte); enum rxrpc_local_trace { rxrpc_local_traces } __mode(byte); enum rxrpc_peer_trace { rxrpc_peer_traces } __mode(byte); +enum rxrpc_pmtud_reduce_trace { rxrpc_pmtud_reduce_traces } __mode(byte); enum rxrpc_propose_ack_outcome { rxrpc_propose_ack_outcomes } __mode(byte); enum rxrpc_propose_ack_trace { rxrpc_propose_ack_traces } __mode(byte); enum rxrpc_receive_trace { rxrpc_receive_traces } __mode(byte); @@ -534,6 +541,7 @@ rxrpc_congest_changes; rxrpc_congest_modes; rxrpc_conn_traces; rxrpc_local_traces; +rxrpc_pmtud_reduce_traces; rxrpc_propose_ack_traces; rxrpc_receive_traces; rxrpc_recvmsg_traces; @@ -2040,6 +2048,122 @@ TRACE_EVENT(rxrpc_sack, __entry->sack) ); +TRACE_EVENT(rxrpc_pmtud_tx, + TP_PROTO(struct rxrpc_call *call), + + TP_ARGS(call), + + TP_STRUCT__entry( + __field(unsigned int, peer_debug_id) + __field(unsigned int, call_debug_id) + __field(rxrpc_serial_t, ping_serial) + __field(unsigned short, pmtud_trial) + __field(unsigned short, pmtud_good) + __field(unsigned short, pmtud_bad) + ), + + TP_fast_assign( + __entry->peer_debug_id = call->peer->debug_id; + __entry->call_debug_id = call->debug_id; + __entry->ping_serial = call->conn->pmtud_probe; + __entry->pmtud_trial = call->peer->pmtud_trial; + __entry->pmtud_good = call->peer->pmtud_good; + __entry->pmtud_bad = call->peer->pmtud_bad; + ), + + TP_printk("P=%08x c=%08x pr=%08x %u-%u-%u", + __entry->peer_debug_id, + __entry->call_debug_id, + __entry->ping_serial, + __entry->pmtud_good, + __entry->pmtud_trial, + __entry->pmtud_bad) + ); + +TRACE_EVENT(rxrpc_pmtud_rx, + TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t resp_serial), + + TP_ARGS(conn, resp_serial), + + TP_STRUCT__entry( + __field(unsigned int, peer_debug_id) + __field(unsigned int, call_debug_id) + __field(rxrpc_serial_t, ping_serial) + __field(rxrpc_serial_t, resp_serial) + __field(unsigned short, max_data) + __field(u8, jumbo_max) + ), + + TP_fast_assign( + __entry->peer_debug_id = conn->peer->debug_id; + __entry->call_debug_id = conn->pmtud_call; + __entry->ping_serial = conn->pmtud_probe; + __entry->resp_serial = resp_serial; + __entry->max_data = conn->peer->max_data; + __entry->jumbo_max = conn->peer->pmtud_jumbo; + ), + + TP_printk("P=%08x c=%08x pr=%08x rr=%08x max=%u jm=%u", + __entry->peer_debug_id, + __entry->call_debug_id, + __entry->ping_serial, + __entry->resp_serial, + __entry->max_data, + __entry->jumbo_max) + ); + +TRACE_EVENT(rxrpc_pmtud_lost, + TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t resp_serial), + + TP_ARGS(conn, resp_serial), + + TP_STRUCT__entry( + __field(unsigned int, peer_debug_id) + __field(unsigned int, call_debug_id) + __field(rxrpc_serial_t, ping_serial) + __field(rxrpc_serial_t, resp_serial) + ), + + TP_fast_assign( + __entry->peer_debug_id = conn->peer->debug_id; + __entry->call_debug_id = conn->pmtud_call; + __entry->ping_serial = conn->pmtud_probe; + __entry->resp_serial = resp_serial; + ), + + TP_printk("P=%08x c=%08x pr=%08x rr=%08x", + __entry->peer_debug_id, + __entry->call_debug_id, + __entry->ping_serial, + __entry->resp_serial) + ); + +TRACE_EVENT(rxrpc_pmtud_reduce, + TP_PROTO(struct rxrpc_peer *peer, rxrpc_serial_t serial, + unsigned int max_data, enum rxrpc_pmtud_reduce_trace reason), + + TP_ARGS(peer, serial, max_data, reason), + + TP_STRUCT__entry( + __field(unsigned int, peer_debug_id) + __field(rxrpc_serial_t, serial) + __field(unsigned int, max_data) + __field(enum rxrpc_pmtud_reduce_trace, reason) + ), + + TP_fast_assign( + __entry->peer_debug_id = peer->debug_id; + __entry->serial = serial; + __entry->max_data = max_data; + __entry->reason = reason; + ), + + TP_printk("P=%08x %s r=%08x m=%u", + __entry->peer_debug_id, + __print_symbolic(__entry->reason, rxrpc_pmtud_reduce_traces), + __entry->serial, __entry->max_data) + ); + #undef EM #undef E_ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ab8e565cb20b..69e6f4b20bad 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -344,13 +344,25 @@ struct rxrpc_peer { time64_t last_tx_at; /* Last time packet sent here */ seqlock_t service_conn_lock; spinlock_t lock; /* access lock */ - unsigned int if_mtu; /* interface MTU for this peer */ - unsigned int mtu; /* network MTU for this peer */ - unsigned int maxdata; /* data size (MTU - hdrsize) */ - unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */ int debug_id; /* debug ID for printks */ struct sockaddr_rxrpc srx; /* remote address */ + /* Path MTU discovery [RFC8899] */ + unsigned int pmtud_trial; /* Current MTU probe size */ + unsigned int pmtud_good; /* Largest working MTU probe we've tried */ + unsigned int pmtud_bad; /* Smallest non-working MTU probe we've tried */ + bool pmtud_lost; /* T if MTU probe was lost */ + bool pmtud_probing; /* T if we have an active probe outstanding */ + bool pmtud_pending; /* T if a call to this peer should send a probe */ + u8 pmtud_jumbo; /* Max jumbo packets for the MTU */ + bool ackr_adv_pmtud; /* T if the peer advertises path-MTU */ + unsigned int ackr_max_data; /* Maximum data advertised by peer */ + seqcount_t mtu_lock; /* Lockless MTU access management */ + unsigned int if_mtu; /* Local interface MTU (- hdrsize) for this peer */ + unsigned int max_data; /* Maximum packet data capacity for this peer */ + unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */ + unsigned short tx_seg_max; /* Maximum number of transmissable segments */ + /* calculated RTT cache */ #define RXRPC_RTT_CACHE_SIZE 32 spinlock_t rtt_input_lock; /* RTT lock for input routine */ @@ -531,6 +543,8 @@ struct rxrpc_connection { int debug_id; /* debug ID for printks */ rxrpc_serial_t tx_serial; /* Outgoing packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ + rxrpc_serial_t pmtud_probe; /* Serial of MTU probe (or 0) */ + unsigned int pmtud_call; /* ID of call used for probe */ u32 service_id; /* Service ID, possibly upgraded */ u32 security_level; /* Security level selected */ u8 security_ix; /* security type */ @@ -1155,6 +1169,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net) */ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why); +void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call); int rxrpc_send_abort_packet(struct rxrpc_call *); void rxrpc_send_conn_abort(struct rxrpc_connection *conn); void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb); @@ -1166,6 +1181,8 @@ void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb); */ void rxrpc_input_error(struct rxrpc_local *, struct sk_buff *); void rxrpc_peer_keepalive_worker(struct work_struct *); +void rxrpc_input_probe_for_pmtud(struct rxrpc_connection *conn, rxrpc_serial_t acked_serial, + bool sendmsg_fail); /* * peer_object.c diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index c4754cc9b8d4..1d889b6f0366 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -483,6 +483,11 @@ out: rxrpc_disconnect_call(call); if (call->security) call->security->free_call_crypto(call); + } else { + if (skb && + call->peer->ackr_adv_pmtud && + call->peer->pmtud_pending) + rxrpc_send_probe_for_pmtud(call); } if (call->acks_hard_ack != call->tx_bottom) rxrpc_shrink_call_tx_buffer(call); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 2a1396cd892f..f6c02cc44d98 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -92,7 +92,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct rxrpc_acktrailer trailer; size_t len; int ret, ioc; - u32 serial, mtu, call_id, padding; + u32 serial, max_mtu, if_mtu, call_id, padding; _enter("%d", conn->debug_id); @@ -150,8 +150,13 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, break; case RXRPC_PACKET_TYPE_ACK: - mtu = conn->peer->if_mtu; - mtu -= conn->peer->hdrsize; + if_mtu = conn->peer->if_mtu - conn->peer->hdrsize; + if (conn->peer->ackr_adv_pmtud) { + max_mtu = umax(conn->peer->max_data, rxrpc_rx_mtu); + } else { + if_mtu = umin(1444, if_mtu); + max_mtu = if_mtu; + } pkt.ack.bufferSpace = 0; pkt.ack.maxSkew = htons(skb ? skb->priority : 0); pkt.ack.firstPacket = htonl(chan->last_seq + 1); @@ -159,10 +164,10 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0); pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE; pkt.ack.nAcks = 0; - trailer.maxMTU = htonl(rxrpc_rx_mtu); - trailer.ifMTU = htonl(mtu); + trailer.maxMTU = htonl(max_mtu); + trailer.ifMTU = htonl(if_mtu); trailer.rwind = htonl(rxrpc_rx_window_size); - trailer.jumbo_max = htonl(rxrpc_rx_jumbo_max); + trailer.jumbo_max = 0; pkt.whdr.flags |= RXRPC_SLOW_START_OK; padding = 0; iov[0].iov_len += sizeof(pkt.ack); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 694c4df7a1a3..b0627398311b 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -321,6 +321,12 @@ static void rxrpc_clean_up_connection(struct work_struct *work) list_del_init(&conn->proc_link); write_unlock(&rxnet->conn_lock); + if (conn->pmtud_probe) { + trace_rxrpc_pmtud_lost(conn, 0); + conn->peer->pmtud_probing = false; + conn->peer->pmtud_pending = true; + } + rxrpc_purge_queue(&conn->rx_queue); rxrpc_kill_client_conn(conn); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 49e35be7dc13..fd08d813ef29 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -692,8 +692,8 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb struct rxrpc_acktrailer *trailer) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_peer *peer; - unsigned int mtu; + struct rxrpc_peer *peer = call->peer; + unsigned int max_data; bool wake = false; u32 rwind = ntohl(trailer->rwind); @@ -706,14 +706,22 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb call->tx_winsize = rwind; } - mtu = umin(ntohl(trailer->maxMTU), ntohl(trailer->ifMTU)); + if (trailer->jumbo_max == 0) { + /* The peer says it supports pmtu discovery */ + peer->ackr_adv_pmtud = true; + } else { + peer->ackr_adv_pmtud = false; + } - peer = call->peer; - if (mtu < peer->maxdata) { - spin_lock(&peer->lock); - peer->maxdata = mtu; - peer->mtu = mtu + peer->hdrsize; - spin_unlock(&peer->lock); + max_data = ntohl(trailer->maxMTU); + peer->ackr_max_data = max_data; + + if (max_data < peer->max_data) { + trace_rxrpc_pmtud_reduce(peer, sp->hdr.serial, max_data, + rxrpc_pmtud_reduce_ack); + write_seqcount_begin(&peer->mtu_lock); + peer->max_data = max_data; + write_seqcount_end(&peer->mtu_lock); } if (wake) diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 7af5adf53b25..bd6d4f5e97b4 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -364,6 +364,12 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, if (sp->hdr.callNumber == 0) return rxrpc_input_conn_packet(conn, skb); + /* Deal with path MTU discovery probing. */ + if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK && + conn->pmtud_probe && + after_eq(sp->ack.acked_serial, conn->pmtud_probe)) + rxrpc_input_probe_for_pmtud(conn, sp->ack.acked_serial, false); + /* Call-bound packets are routed by connection channel. */ channel = sp->hdr.cid & RXRPC_CHANNELMASK; chan = &conn->channels[channel]; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 657cf35089a6..8fcc8139d771 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -46,13 +46,13 @@ unsigned int rxrpc_rx_window_size = 255; * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet * made by gluing normal packets together that we're willing to handle. */ -unsigned int rxrpc_rx_mtu = 5692; +unsigned int rxrpc_rx_mtu = RXRPC_JUMBO(46); /* * The maximum number of fragments in a received jumbo packet that we tell the * sender that we're willing to handle. */ -unsigned int rxrpc_rx_jumbo_max = 4; +unsigned int rxrpc_rx_jumbo_max = 46; #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY /* diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f8bb5250e849..a91be871ad96 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -82,10 +82,9 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call, struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; struct rxrpc_acktrailer *trailer = txb->kvec[2].iov_base + 3; struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); - unsigned int qsize, sack, wrap, to; + unsigned int qsize, sack, wrap, to, max_mtu, if_mtu; rxrpc_seq_t window, wtop; int rsize; - u32 mtu, jmax; u8 *filler = txb->kvec[2].iov_base; u8 *sackp = txb->kvec[1].iov_base; @@ -132,16 +131,22 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call, ack->reason = RXRPC_ACK_IDLE; } - mtu = call->peer->if_mtu; - mtu -= call->peer->hdrsize; - jmax = rxrpc_rx_jumbo_max; qsize = (window - 1) - call->rx_consumed; rsize = max_t(int, call->rx_winsize - qsize, 0); txb->ack_rwind = rsize; - trailer->maxMTU = htonl(rxrpc_rx_mtu); - trailer->ifMTU = htonl(mtu); + + if_mtu = call->peer->if_mtu - call->peer->hdrsize; + if (call->peer->ackr_adv_pmtud) { + max_mtu = umax(call->peer->max_data, rxrpc_rx_mtu); + } else { + if_mtu = umin(if_mtu, 1444); + max_mtu = if_mtu; + } + + trailer->maxMTU = htonl(max_mtu); + trailer->ifMTU = htonl(if_mtu); trailer->rwind = htonl(rsize); - trailer->jumbo_max = htonl(jmax); + trailer->jumbo_max = 0; /* Advertise pmtu discovery */ } /* @@ -176,7 +181,7 @@ no_slot: * Transmit an ACK packet. */ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb, - int nr_kv) + int nr_kv, enum rxrpc_propose_ack_trace why) { struct kvec *kv = call->local->kvec; struct rxrpc_wire_header *whdr = kv[0].iov_base; @@ -209,13 +214,16 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t rxrpc_inc_stat(call->rxnet, stat_tx_ack_send); iov_iter_kvec(&msg.msg_iter, WRITE, kv, nr_kv, txb->len); - rxrpc_local_dont_fragment(conn->local, false); + rxrpc_local_dont_fragment(conn->local, why == rxrpc_propose_ack_ping_for_mtu_probe); ret = do_udp_sendmsg(conn->local->socket, &msg, txb->len); call->peer->last_tx_at = ktime_get_seconds(); if (ret < 0) { trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, rxrpc_tx_point_call_ack); + if (why == rxrpc_propose_ack_ping_for_mtu_probe && + ret == -EMSGSIZE) + rxrpc_input_probe_for_pmtud(conn, txb->serial, true); } else { trace_rxrpc_tx_packet(call->debug_id, whdr, rxrpc_tx_point_call_ack); @@ -225,6 +233,13 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t if (txb->flags & RXRPC_REQUEST_ACK) call->peer->rtt_last_req = now; rxrpc_set_keepalive(call, now); + if (why == rxrpc_propose_ack_ping_for_mtu_probe) { + call->peer->pmtud_pending = false; + call->peer->pmtud_probing = true; + call->conn->pmtud_probe = txb->serial; + call->conn->pmtud_call = call->debug_id; + trace_rxrpc_pmtud_tx(call); + } } rxrpc_tx_backoff(call, ret); } @@ -254,21 +269,45 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_fill_out_ack(call, txb, ack_reason, serial); + /* Extend a path MTU probe ACK. */ nr_kv = txb->nr_kvec; kv[0] = txb->kvec[0]; kv[1] = txb->kvec[1]; kv[2] = txb->kvec[2]; - // TODO: Extend a path MTU probe ACK + if (why == rxrpc_propose_ack_ping_for_mtu_probe) { + size_t probe_mtu = call->peer->pmtud_trial + sizeof(struct rxrpc_wire_header); + + if (txb->len > probe_mtu) + goto skip; + while (txb->len < probe_mtu) { + size_t part = umin(probe_mtu - txb->len, PAGE_SIZE); + + kv[nr_kv].iov_base = page_address(ZERO_PAGE(0)); + kv[nr_kv].iov_len = part; + txb->len += part; + nr_kv++; + } + } call->ackr_nr_unacked = 0; atomic_set(&call->ackr_nr_consumed, 0); clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags); trace_rxrpc_send_ack(call, why, ack_reason, serial); - rxrpc_send_ack_packet(call, txb, nr_kv); + rxrpc_send_ack_packet(call, txb, nr_kv, why); +skip: rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx); } +/* + * Send an ACK probe for path MTU discovery. + */ +void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call) +{ + rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, + rxrpc_propose_ack_ping_for_mtu_probe); +} + /* * Send an ABORT call packet. */ @@ -501,7 +540,7 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t /* send the packet with the don't fragment bit set if we currently * think it's small enough */ - if (len >= sizeof(struct rxrpc_wire_header) + call->peer->maxdata) { + if (len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) { rxrpc_local_dont_fragment(conn->local, false); frag = rxrpc_tx_point_call_data_frag; } else { @@ -548,7 +587,7 @@ done: RX_USER_ABORT, ret); } - _leave(" = %d [%u]", ret, call->peer->maxdata); + _leave(" = %d [%u]", ret, call->peer->max_data); return ret; } diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 552ba84a255c..8fc9464a960c 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -102,6 +102,8 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local, */ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) { + unsigned int max_data; + /* wind down the local interface MTU */ if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) peer->if_mtu = mtu; @@ -120,11 +122,17 @@ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) } } - if (mtu < peer->mtu) { - spin_lock(&peer->lock); - peer->mtu = mtu; - peer->maxdata = peer->mtu - peer->hdrsize; - spin_unlock(&peer->lock); + max_data = max_t(int, mtu - peer->hdrsize, 500); + if (max_data < peer->max_data) { + if (peer->pmtud_good > max_data) + peer->pmtud_good = max_data; + if (peer->pmtud_bad > max_data + 1) + peer->pmtud_bad = max_data + 1; + + trace_rxrpc_pmtud_reduce(peer, 0, max_data, rxrpc_pmtud_reduce_icmp); + write_seqcount_begin(&peer->mtu_lock); + peer->max_data = max_data; + write_seqcount_end(&peer->mtu_lock); } } @@ -347,3 +355,89 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work) _leave(""); } + +/* + * Do path MTU probing. + */ +void rxrpc_input_probe_for_pmtud(struct rxrpc_connection *conn, rxrpc_serial_t acked_serial, + bool sendmsg_fail) +{ + struct rxrpc_peer *peer = conn->peer; + unsigned int max_data = peer->max_data; + int good, trial, bad, jumbo; + + good = peer->pmtud_good; + trial = peer->pmtud_trial; + bad = peer->pmtud_bad; + if (good >= bad - 1) { + conn->pmtud_probe = 0; + peer->pmtud_lost = false; + return; + } + + if (!peer->pmtud_probing) + goto send_probe; + + if (sendmsg_fail || after(acked_serial, conn->pmtud_probe)) { + /* Retry a lost probe. */ + if (!peer->pmtud_lost) { + trace_rxrpc_pmtud_lost(conn, acked_serial); + conn->pmtud_probe = 0; + peer->pmtud_lost = true; + goto send_probe; + } + + /* The probed size didn't seem to get through. */ + bad = trial; + peer->pmtud_bad = bad; + if (bad <= max_data) + max_data = bad - 1; + } else { + /* It did get through. */ + good = trial; + peer->pmtud_good = good; + if (good > max_data) + max_data = good; + } + + max_data = umin(max_data, peer->ackr_max_data); + if (max_data != peer->max_data) { + preempt_disable(); + write_seqcount_begin(&peer->mtu_lock); + peer->max_data = max_data; + write_seqcount_end(&peer->mtu_lock); + preempt_enable(); + } + + jumbo = max_data + sizeof(struct rxrpc_jumbo_header); + jumbo /= RXRPC_JUMBO_SUBPKTLEN; + peer->pmtud_jumbo = jumbo; + + trace_rxrpc_pmtud_rx(conn, acked_serial); + conn->pmtud_probe = 0; + peer->pmtud_lost = false; + + if (good < RXRPC_JUMBO(2) && bad > RXRPC_JUMBO(2)) + trial = RXRPC_JUMBO(2); + else if (good < RXRPC_JUMBO(4) && bad > RXRPC_JUMBO(4)) + trial = RXRPC_JUMBO(4); + else if (good < RXRPC_JUMBO(3) && bad > RXRPC_JUMBO(3)) + trial = RXRPC_JUMBO(3); + else if (good < RXRPC_JUMBO(6) && bad > RXRPC_JUMBO(6)) + trial = RXRPC_JUMBO(6); + else if (good < RXRPC_JUMBO(5) && bad > RXRPC_JUMBO(5)) + trial = RXRPC_JUMBO(5); + else if (good < RXRPC_JUMBO(8) && bad > RXRPC_JUMBO(8)) + trial = RXRPC_JUMBO(8); + else if (good < RXRPC_JUMBO(7) && bad > RXRPC_JUMBO(7)) + trial = RXRPC_JUMBO(7); + else + trial = (good + bad) / 2; + peer->pmtud_trial = trial; + + if (good >= bad) + return; + +send_probe: + peer->pmtud_pending = true; +} diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 49dcda67a0d5..80ef6f06d512 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -162,6 +162,11 @@ static void rxrpc_assess_MTU_size(struct rxrpc_local *local, #endif peer->if_mtu = 1500; + if (peer->max_data < peer->if_mtu - peer->hdrsize) { + trace_rxrpc_pmtud_reduce(peer, 0, peer->if_mtu - peer->hdrsize, + rxrpc_pmtud_reduce_route); + peer->max_data = peer->if_mtu - peer->hdrsize; + } memset(&fl, 0, sizeof(fl)); switch (peer->srx.transport.family) { @@ -199,8 +204,16 @@ static void rxrpc_assess_MTU_size(struct rxrpc_local *local, } peer->if_mtu = dst_mtu(dst); + peer->hdrsize += dst->header_len + dst->trailer_len; + peer->tx_seg_max = dst->dev->gso_max_segs; dst_release(dst); + peer->max_data = umin(RXRPC_JUMBO(1), peer->if_mtu - peer->hdrsize); + peer->pmtud_good = 500; + peer->pmtud_bad = peer->if_mtu - peer->hdrsize + 1; + peer->pmtud_trial = umin(peer->max_data, peer->pmtud_bad - 1); + peer->pmtud_pending = true; + _leave(" [if_mtu %u]", peer->if_mtu); } @@ -223,6 +236,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp, seqlock_init(&peer->service_conn_lock); spin_lock_init(&peer->lock); spin_lock_init(&peer->rtt_input_lock); + seqcount_init(&peer->mtu_lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); rxrpc_peer_init_rtt(peer); @@ -242,9 +256,7 @@ static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer, unsigned long hash_key) { peer->hash_key = hash_key; - rxrpc_assess_MTU_size(local, peer); - peer->mtu = peer->if_mtu; - peer->rtt_last_req = ktime_get_real(); + switch (peer->srx.transport.family) { case AF_INET: @@ -268,7 +280,11 @@ static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer, } peer->hdrsize += sizeof(struct rxrpc_wire_header); - peer->maxdata = peer->mtu - peer->hdrsize; + peer->max_data = peer->if_mtu - peer->hdrsize; + + rxrpc_assess_MTU_size(local, peer); + + peer->rtt_last_req = ktime_get_real(); } /* diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index ce4d48bdfbe9..44722c226064 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -283,9 +283,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) { seq_puts(seq, - "Proto Local " - " Remote " - " Use SST MTU LastUse RTT RTO\n" + "Proto Local Remote Use SST Maxd LastUse RTT RTO\n" ); return 0; } @@ -298,13 +296,12 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) now = ktime_get_seconds(); seq_printf(seq, - "UDP %-47.47s %-47.47s %3u" - " %3u %5u %6llus %8u %8u\n", + "UDP %-47.47s %-47.47s %3u %4u %5u %6llus %8u %8u\n", lbuff, rbuff, refcount_read(&peer->ref), peer->cong_ssthresh, - peer->mtu, + peer->max_data, now - peer->last_tx_at, peer->srtt_us >> 3, peer->rto_us); diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h index 4fe6b4d20ada..42f70e4636f8 100644 --- a/net/rxrpc/protocol.h +++ b/net/rxrpc/protocol.h @@ -92,11 +92,16 @@ struct rxrpc_jumbo_header { /* * The maximum number of subpackets that can possibly fit in a UDP packet is: * - * ((max_IP - IP_hdr - UDP_hdr) / RXRPC_JUMBO_SUBPKTLEN) + 1 - * = ((65535 - 28 - 28) / 1416) + 1 - * = 46 non-terminal packets and 1 terminal packet. + * (max_UDP - wirehdr + jumbohdr) / (jumbohdr + 1412) + * = ((65535 - 28 + 4) / 1416) + * = 45 non-terminal packets and 1 terminal packet. */ -#define RXRPC_MAX_NR_JUMBO 47 +#define RXRPC_MAX_NR_JUMBO 46 + +/* Size of a jumbo packet with N subpackets, excluding UDP+IP */ +#define RXRPC_JUMBO(N) ((int)sizeof(struct rxrpc_wire_header) + \ + RXRPC_JUMBO_DATALEN + \ + ((N) - 1) * RXRPC_JUMBO_SUBPKTLEN) /*****************************************************************************/ /* diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 9bf9a1f6e4cb..46a20cf4c402 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -11,6 +11,8 @@ #include "ar-internal.h" static struct ctl_table_header *rxrpc_sysctl_reg_table; +static const unsigned int rxrpc_rx_mtu_min = 500; +static const unsigned int rxrpc_jumbo_max = RXRPC_MAX_NR_JUMBO; static const unsigned int four = 4; static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1; static const unsigned int n_65535 = 65535; @@ -115,7 +117,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (void *)SYSCTL_ONE, + .extra1 = (void *)&rxrpc_rx_mtu_min, .extra2 = (void *)&n_65535, }, { @@ -125,7 +127,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = (void *)SYSCTL_ONE, - .extra2 = (void *)&four, + .extra2 = (void *)&rxrpc_jumbo_max, }, }; diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index c3913d8a50d3..2a4291617d40 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -179,7 +179,8 @@ static void rxrpc_free_txbuf(struct rxrpc_txbuf *txb) trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0, rxrpc_txbuf_free); for (i = 0; i < txb->nr_kvec; i++) - if (txb->kvec[i].iov_base) + if (txb->kvec[i].iov_base && + !is_zero_pfn(page_to_pfn(virt_to_page(txb->kvec[i].iov_base)))) page_frag_free(txb->kvec[i].iov_base); kfree(txb); atomic_dec(&rxrpc_nr_txbuf); From 3d2bdf73cea57d7f6bf314aa1c948af11af94980 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:38 +0000 Subject: [PATCH 0235/1386] rxrpc: Separate the packet length from the data length in rxrpc_txbuf Separate the packet length from the data length (txb->len) stored in the rxrpc_txbuf to make security calculations easier. Also store the allocation size as that's an upper bound on the size of the security wrapper and change a number of fields to unsigned short as the amount of data can't exceed the capacity of a UDP packet. Also, whilst we're at it, use kzalloc() for txbufs. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-11-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/ar-internal.h | 8 +++++--- net/rxrpc/insecure.c | 1 + net/rxrpc/output.c | 7 ++++--- net/rxrpc/rxkad.c | 44 ++++++++++++++++++++++------------------- net/rxrpc/sendmsg.c | 1 - net/rxrpc/txbuf.c | 7 ++----- 6 files changed, 36 insertions(+), 32 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 69e6f4b20bad..a5c0bc917641 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -821,9 +821,11 @@ struct rxrpc_txbuf { rxrpc_serial_t serial; /* Last serial number transmitted with */ unsigned int call_debug_id; unsigned int debug_id; - unsigned int len; /* Amount of data in buffer */ - unsigned int space; /* Remaining data space */ - unsigned int offset; /* Offset of fill point */ + unsigned short len; /* Amount of data in buffer */ + unsigned short space; /* Remaining data space */ + unsigned short offset; /* Offset of fill point */ + unsigned short pkt_len; /* Size of packet content */ + unsigned short alloc_size; /* Amount of bufferage allocated */ unsigned int flags; #define RXRPC_TXBUF_WIRE_FLAGS 0xff /* The wire protocol flags */ #define RXRPC_TXBUF_RESENT 0x100 /* Set if has been resent */ diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index 751eb621021d..d665f486be5f 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -24,6 +24,7 @@ static struct rxrpc_txbuf *none_alloc_txbuf(struct rxrpc_call *call, size_t rema static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { + txb->pkt_len = txb->len; return 0; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index a91be871ad96..df9af4ad4260 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -383,11 +383,11 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc enum rxrpc_req_ack_trace why; struct rxrpc_connection *conn = call->conn; struct kvec *kv = &call->local->kvec[subpkt]; - size_t len = txb->len; + size_t len = txb->pkt_len; bool last, more; u8 flags; - _enter("%x,{%d}", txb->seq, txb->len); + _enter("%x,%zd", txb->seq, len); txb->serial = serial; @@ -441,6 +441,7 @@ dont_set_request_ack: whdr->cksum = txb->cksum; whdr->serviceId = htons(conn->service_id); kv->iov_base = whdr; + len += sizeof(*whdr); // TODO: Convert into a jumbo header for tail subpackets trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, false); @@ -509,7 +510,7 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t size_t len; int ret; - _enter("%x,{%d}", txb->seq, txb->len); + _enter("%x,{%d}", txb->seq, txb->pkt_len); len = rxrpc_prepare_data_packet(call, txb); diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index e3194d73dd84..755897fab626 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -148,14 +148,14 @@ error: static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t remain, gfp_t gfp) { struct rxrpc_txbuf *txb; - size_t shdr, space; + size_t shdr, alloc, limit, part; remain = umin(remain, 65535 - sizeof(struct rxrpc_wire_header)); switch (call->conn->security_level) { default: - space = umin(remain, RXRPC_JUMBO_DATALEN); - return rxrpc_alloc_data_txbuf(call, space, 1, gfp); + alloc = umin(remain, RXRPC_JUMBO_DATALEN); + return rxrpc_alloc_data_txbuf(call, alloc, 1, gfp); case RXRPC_SECURITY_AUTH: shdr = sizeof(struct rxkad_level1_hdr); break; @@ -164,15 +164,21 @@ static struct rxrpc_txbuf *rxkad_alloc_txbuf(struct rxrpc_call *call, size_t rem break; } - space = umin(round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN), remain + shdr); - space = round_up(space, RXKAD_ALIGN); + limit = round_down(RXRPC_JUMBO_DATALEN, RXKAD_ALIGN) - shdr; + if (remain < limit) { + part = remain; + alloc = round_up(shdr + part, RXKAD_ALIGN); + } else { + part = limit; + alloc = RXRPC_JUMBO_DATALEN; + } - txb = rxrpc_alloc_data_txbuf(call, space, RXKAD_ALIGN, gfp); + txb = rxrpc_alloc_data_txbuf(call, alloc, RXKAD_ALIGN, gfp); if (!txb) return NULL; txb->offset += shdr; - txb->space -= shdr; + txb->space = part; return txb; } @@ -263,13 +269,13 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, check = txb->seq ^ call->call_id; hdr->data_size = htonl((u32)check << 16 | txb->len); - txb->len += sizeof(struct rxkad_level1_hdr); - pad = txb->len; + txb->pkt_len = sizeof(struct rxkad_level1_hdr) + txb->len; + pad = txb->pkt_len; pad = RXKAD_ALIGN - pad; pad &= RXKAD_ALIGN - 1; if (pad) { memset(txb->kvec[0].iov_base + txb->offset, 0, pad); - txb->len += pad; + txb->pkt_len += pad; } /* start the encryption afresh */ @@ -298,7 +304,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, struct rxkad_level2_hdr *rxkhdr = (void *)(whdr + 1); struct rxrpc_crypt iv; struct scatterlist sg; - size_t pad; + size_t content, pad; u16 check; int ret; @@ -309,23 +315,20 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, rxkhdr->data_size = htonl(txb->len | (u32)check << 16); rxkhdr->checksum = 0; - txb->len += sizeof(struct rxkad_level2_hdr); - pad = txb->len; - pad = RXKAD_ALIGN - pad; - pad &= RXKAD_ALIGN - 1; - if (pad) { + content = sizeof(struct rxkad_level2_hdr) + txb->len; + txb->pkt_len = round_up(content, RXKAD_ALIGN); + pad = txb->pkt_len - content; + if (pad) memset(txb->kvec[0].iov_base + txb->offset, 0, pad); - txb->len += pad; - } /* encrypt from the session key */ token = call->conn->key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); - sg_init_one(&sg, rxkhdr, txb->len); + sg_init_one(&sg, rxkhdr, txb->pkt_len); skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, &sg, &sg, txb->len, iv.x); + skcipher_request_set_crypt(req, &sg, &sg, txb->pkt_len, iv.x); ret = crypto_skcipher_encrypt(req); skcipher_request_zero(req); return ret; @@ -384,6 +387,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) switch (call->conn->security_level) { case RXRPC_SECURITY_PLAIN: + txb->pkt_len = txb->len; ret = 0; break; case RXRPC_SECURITY_AUTH: diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 546abb463c3f..786c1fb1369a 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -391,7 +391,6 @@ reload: goto out; txb->kvec[0].iov_len += txb->len; - txb->len = txb->kvec[0].iov_len; rxrpc_queue_packet(rx, call, txb, notify_end_tx); txb = NULL; } diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index 2a4291617d40..8b7c854ed3d7 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -24,7 +24,7 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_ size_t total, hoff; void *buf; - txb = kmalloc(sizeof(*txb), gfp); + txb = kzalloc(sizeof(*txb), gfp); if (!txb) return NULL; @@ -49,14 +49,11 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_ txb->last_sent = KTIME_MIN; txb->call_debug_id = call->debug_id; txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); + txb->alloc_size = data_size; txb->space = data_size; - txb->len = 0; txb->offset = sizeof(*whdr); txb->flags = call->conn->out_clientflag; - txb->ack_why = 0; txb->seq = call->tx_prepared + 1; - txb->serial = 0; - txb->cksum = 0; txb->nr_kvec = 1; txb->kvec[0].iov_base = whdr; txb->kvec[0].iov_len = sizeof(*whdr); From b7313009c2e56d6e8bffd3d21c1a3a67a9149e2e Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:39 +0000 Subject: [PATCH 0236/1386] rxrpc: Prepare to be able to send jumbo DATA packets Prepare to be able to send jumbo DATA packets if the we decide to, but don't enable that yet. This will allow larger chunks of data to be sent without reducing the retryability as the subpackets in a jumbo packet can also be retransmitted individually. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-12-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/ar-internal.h | 18 +++++++++- net/rxrpc/call_event.c | 48 ++++++++++++++----------- net/rxrpc/input.c | 36 +++++++++++-------- net/rxrpc/insecure.c | 2 ++ net/rxrpc/output.c | 80 ++++++++++++++++++++++++++++------------- net/rxrpc/rxkad.c | 13 +++++++ 6 files changed, 137 insertions(+), 60 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a5c0bc917641..4386b2e6cca5 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -832,6 +832,7 @@ struct rxrpc_txbuf { __be16 cksum; /* Checksum to go in header */ unsigned short ack_rwind; /* ACK receive window */ u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */ + bool jumboable; /* Can be non-terminal jumbo subpacket */ u8 nr_kvec; /* Amount of kvec[] used */ struct kvec kvec[3]; }; @@ -862,6 +863,21 @@ static inline rxrpc_serial_t rxrpc_get_next_serial(struct rxrpc_connection *conn return serial; } +/* + * Allocate the next serial n numbers on a connection. 0 must be skipped. + */ +static inline rxrpc_serial_t rxrpc_get_next_serials(struct rxrpc_connection *conn, + unsigned int n) +{ + rxrpc_serial_t serial; + + serial = conn->tx_serial; + if (serial + n <= n) + serial = 1; + conn->tx_serial = serial + n; + return serial; +} + /* * af_rxrpc.c */ @@ -1176,7 +1192,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *); void rxrpc_send_conn_abort(struct rxrpc_connection *conn); void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb); void rxrpc_send_keepalive(struct rxrpc_peer *); -void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb); +void rxrpc_transmit_data(struct rxrpc_call *call, struct rxrpc_txbuf *txb, int n); /* * peer_event.c diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 1d889b6f0366..3379adfaaf65 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -124,7 +124,7 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) ktime_sub(resend_at, now)); txb->flags |= RXRPC_TXBUF_RESENT; - rxrpc_transmit_one(call, txb); + rxrpc_transmit_data(call, txb, 1); did_send = true; now = ktime_get_real(); @@ -164,7 +164,7 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) unacked = true; txb->flags |= RXRPC_TXBUF_RESENT; - rxrpc_transmit_one(call, txb); + rxrpc_transmit_data(call, txb, 1); did_send = true; rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); now = ktime_get_real(); @@ -231,15 +231,12 @@ static void rxrpc_close_tx_phase(struct rxrpc_call *call) } } -static bool rxrpc_tx_window_has_space(struct rxrpc_call *call) +static unsigned int rxrpc_tx_window_space(struct rxrpc_call *call) { - unsigned int winsize = umin(call->tx_winsize, call->cong_cwnd + call->cong_extra); - rxrpc_seq_t window = call->acks_hard_ack, wtop = window + winsize; - rxrpc_seq_t tx_top = call->tx_top; - int space; + int winsize = umin(call->tx_winsize, call->cong_cwnd + call->cong_extra); + int in_flight = call->tx_top - call->acks_hard_ack; - space = wtop - tx_top; - return space > 0; + return max(winsize - in_flight, 0); } /* @@ -247,7 +244,7 @@ static bool rxrpc_tx_window_has_space(struct rxrpc_call *call) */ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) { - struct rxrpc_txbuf *txb; + int space = rxrpc_tx_window_space(call); if (!test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { if (list_empty(&call->tx_sendmsg)) @@ -255,22 +252,33 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) rxrpc_expose_client_call(call); } - while ((txb = list_first_entry_or_null(&call->tx_sendmsg, - struct rxrpc_txbuf, call_link))) { + while (space > 0) { + struct rxrpc_txbuf *head = NULL, *txb; + int count = 0, limit = min(space, 1); + + if (list_empty(&call->tx_sendmsg)) + break; + spin_lock(&call->tx_lock); - list_del(&txb->call_link); + do { + txb = list_first_entry(&call->tx_sendmsg, + struct rxrpc_txbuf, call_link); + if (!head) + head = txb; + list_move_tail(&txb->call_link, &call->tx_buffer); + count++; + if (!txb->jumboable) + break; + } while (count < limit && !list_empty(&call->tx_sendmsg)); + spin_unlock(&call->tx_lock); call->tx_top = txb->seq; - list_add_tail(&txb->call_link, &call->tx_buffer); - if (txb->flags & RXRPC_LAST_PACKET) rxrpc_close_tx_phase(call); - rxrpc_transmit_one(call, txb); - - if (!rxrpc_tx_window_has_space(call)) - break; + space -= count; + rxrpc_transmit_data(call, head, count); } } @@ -285,7 +293,7 @@ static void rxrpc_transmit_some_data(struct rxrpc_call *call) case RXRPC_CALL_SERVER_SEND_REPLY: case RXRPC_CALL_CLIENT_SEND_REQUEST: - if (!rxrpc_tx_window_has_space(call)) + if (!rxrpc_tx_window_space(call)) return; if (list_empty(&call->tx_sendmsg)) { rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index fd08d813ef29..8398fa10ee8d 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -693,9 +693,12 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_peer *peer = call->peer; - unsigned int max_data; + unsigned int max_data, capacity; bool wake = false; - u32 rwind = ntohl(trailer->rwind); + u32 max_mtu = ntohl(trailer->maxMTU); + //u32 if_mtu = ntohl(trailer->ifMTU); + u32 rwind = ntohl(trailer->rwind); + u32 jumbo_max = ntohl(trailer->jumbo_max); if (rwind > RXRPC_TX_MAX_WINDOW) rwind = RXRPC_TX_MAX_WINDOW; @@ -706,24 +709,29 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb call->tx_winsize = rwind; } - if (trailer->jumbo_max == 0) { + max_mtu = clamp(max_mtu, 500, 65535); + peer->ackr_max_data = max_mtu; + + if (max_mtu < peer->max_data) { + trace_rxrpc_pmtud_reduce(peer, sp->hdr.serial, max_mtu, + rxrpc_pmtud_reduce_ack); + write_seqcount_begin(&peer->mtu_lock); + peer->max_data = max_mtu; + write_seqcount_end(&peer->mtu_lock); + } + + max_data = umin(max_mtu, peer->max_data); + capacity = max_data; + capacity += sizeof(struct rxrpc_jumbo_header); /* First subpacket has main hdr, not jumbo */ + capacity /= sizeof(struct rxrpc_jumbo_header) + RXRPC_JUMBO_DATALEN; + + if (jumbo_max == 0) { /* The peer says it supports pmtu discovery */ peer->ackr_adv_pmtud = true; } else { peer->ackr_adv_pmtud = false; } - max_data = ntohl(trailer->maxMTU); - peer->ackr_max_data = max_data; - - if (max_data < peer->max_data) { - trace_rxrpc_pmtud_reduce(peer, sp->hdr.serial, max_data, - rxrpc_pmtud_reduce_ack); - write_seqcount_begin(&peer->mtu_lock); - peer->max_data = max_data; - write_seqcount_end(&peer->mtu_lock); - } - if (wake) wake_up(&call->waitq); } diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index d665f486be5f..e068f9b79d02 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -25,6 +25,8 @@ static struct rxrpc_txbuf *none_alloc_txbuf(struct rxrpc_call *call, size_t rema static int none_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { txb->pkt_len = txb->len; + if (txb->len == RXRPC_JUMBO_DATALEN) + txb->jumboable = true; return 0; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index df9af4ad4260..aededdd474d7 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -377,9 +377,10 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) */ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_txbuf *txb, rxrpc_serial_t serial, - int subpkt) + int subpkt, int nr_subpkts) { struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; + struct rxrpc_jumbo_header *jumbo = (void *)(whdr + 1) - sizeof(*jumbo); enum rxrpc_req_ack_trace why; struct rxrpc_connection *conn = call->conn; struct kvec *kv = &call->local->kvec[subpkt]; @@ -399,6 +400,11 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; last = txb->flags & RXRPC_LAST_PACKET; + if (subpkt < nr_subpkts - 1) { + len = RXRPC_JUMBO_DATALEN; + goto dont_set_request_ack; + } + more = (!list_is_last(&txb->call_link, &call->tx_buffer) || !list_empty(&call->tx_sendmsg)); @@ -436,13 +442,25 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc } dont_set_request_ack: - whdr->flags = flags; - whdr->serial = htonl(txb->serial); - whdr->cksum = txb->cksum; - whdr->serviceId = htons(conn->service_id); - kv->iov_base = whdr; - len += sizeof(*whdr); - // TODO: Convert into a jumbo header for tail subpackets + /* The jumbo header overlays the wire header in the txbuf. */ + if (subpkt < nr_subpkts - 1) + flags |= RXRPC_JUMBO_PACKET; + else + flags &= ~RXRPC_JUMBO_PACKET; + if (subpkt == 0) { + whdr->flags = flags; + whdr->serial = htonl(txb->serial); + whdr->cksum = txb->cksum; + whdr->serviceId = htons(conn->service_id); + kv->iov_base = whdr; + len += sizeof(*whdr); + } else { + jumbo->flags = flags; + jumbo->pad = 0; + jumbo->cksum = txb->cksum; + kv->iov_base = jumbo; + len += sizeof(*jumbo); + } trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, false); kv->iov_len = len; @@ -450,18 +468,22 @@ dont_set_request_ack: } /* - * Prepare a packet for transmission. + * Prepare a (jumbo) packet for transmission. */ -static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) +static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *head, int n) { + struct rxrpc_txbuf *txb = head; rxrpc_serial_t serial; size_t len = 0; /* Each transmission of a Tx packet needs a new serial number */ - serial = rxrpc_get_next_serial(call->conn); + serial = rxrpc_get_next_serials(call->conn, n); - len += rxrpc_prepare_data_subpacket(call, txb, serial, 0); - // TODO: Loop around adding tail subpackets + for (int i = 0; i < n; i++) { + len += rxrpc_prepare_data_subpacket(call, txb, serial, i, n); + serial++; + txb = list_next_entry(txb, call_link); + } return len; } @@ -469,16 +491,24 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_tx /* * Set timeouts after transmitting a packet. */ -static void rxrpc_tstamp_data_packets(struct rxrpc_call *call, struct rxrpc_txbuf *txb) +static void rxrpc_tstamp_data_packets(struct rxrpc_call *call, struct rxrpc_txbuf *txb, int n) { + rxrpc_serial_t serial; ktime_t now = ktime_get_real(); bool ack_requested = txb->flags & RXRPC_REQUEST_ACK; + int i; call->tx_last_sent = now; - txb->last_sent = now; + + for (i = 0; i < n; i++) { + txb->last_sent = now; + ack_requested |= txb->flags & RXRPC_REQUEST_ACK; + serial = txb->serial; + txb = list_next_entry(txb, call_link); + } if (ack_requested) { - rxrpc_begin_rtt_probe(call, txb->serial, now, rxrpc_rtt_tx_data); + rxrpc_begin_rtt_probe(call, serial, now, rxrpc_rtt_tx_data); call->peer->rtt_last_req = now; if (call->peer->rtt_count > 1) { @@ -502,7 +532,7 @@ static void rxrpc_tstamp_data_packets(struct rxrpc_call *call, struct rxrpc_txbu /* * send a packet through the transport endpoint */ -static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) +static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb, int n) { struct rxrpc_connection *conn = call->conn; enum rxrpc_tx_point frag; @@ -512,7 +542,7 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t _enter("%x,{%d}", txb->seq, txb->pkt_len); - len = rxrpc_prepare_data_packet(call, txb); + len = rxrpc_prepare_data_packet(call, txb, n); if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; @@ -524,7 +554,7 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t } } - iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, 1, len); + iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, n, len); msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; @@ -537,7 +567,7 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t * yet. */ if (txb->seq == call->tx_transmitted + 1) - call->tx_transmitted = txb->seq; + call->tx_transmitted = txb->seq + n - 1; /* send the packet with the don't fragment bit set if we currently * think it's small enough */ @@ -568,7 +598,7 @@ retry: } rxrpc_tx_backoff(call, ret); - if (ret == -EMSGSIZE && frag == rxrpc_tx_point_call_data_frag) { + if (ret == -EMSGSIZE && frag == rxrpc_tx_point_call_data_nofrag) { rxrpc_local_dont_fragment(conn->local, false); frag = rxrpc_tx_point_call_data_frag; goto retry; @@ -576,7 +606,7 @@ retry: done: if (ret >= 0) { - rxrpc_tstamp_data_packets(call, txb); + rxrpc_tstamp_data_packets(call, txb, n); } else { /* Cancel the call if the initial transmission fails, * particularly if that's due to network routing issues that @@ -776,13 +806,13 @@ static inline void rxrpc_instant_resend(struct rxrpc_call *call, } /* - * Transmit one packet. + * Transmit a packet, possibly gluing several subpackets together. */ -void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb) +void rxrpc_transmit_data(struct rxrpc_call *call, struct rxrpc_txbuf *txb, int n) { int ret; - ret = rxrpc_send_data_packet(call, txb); + ret = rxrpc_send_data_packet(call, txb, n); if (ret < 0) { switch (ret) { case -ENETUNREACH: diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 755897fab626..62b09d23ec08 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -392,15 +392,28 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) break; case RXRPC_SECURITY_AUTH: ret = rxkad_secure_packet_auth(call, txb, req); + if (txb->alloc_size == RXRPC_JUMBO_DATALEN) + txb->jumboable = true; break; case RXRPC_SECURITY_ENCRYPT: ret = rxkad_secure_packet_encrypt(call, txb, req); + if (txb->alloc_size == RXRPC_JUMBO_DATALEN) + txb->jumboable = true; break; default: ret = -EPERM; break; } + /* Clear excess space in the packet */ + if (txb->pkt_len < txb->alloc_size) { + struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; + size_t gap = txb->alloc_size - txb->pkt_len; + void *p = whdr + 1; + + memset(p + txb->pkt_len, 0, gap); + } + skcipher_request_free(req); _leave(" = %d [set %x]", ret, y); return ret; From 149d002bee706f51772bd320cda90c922844bb0e Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:40 +0000 Subject: [PATCH 0237/1386] rxrpc: Add a tracepoint to show variables pertinent to jumbo packet size Add a tracepoint to be called right before packets are transmitted for the first time that shows variable values that are pertinent to how many subpackets will be added to a jumbo DATA packet. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-13-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 41 ++++++++++++++++++++++++++++++++++++ net/rxrpc/call_event.c | 2 ++ 2 files changed, 43 insertions(+) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 9dcadad88e76..71f07e726a90 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -903,6 +903,47 @@ TRACE_EVENT(rxrpc_txqueue, __entry->tx_winsize) ); +TRACE_EVENT(rxrpc_transmit, + TP_PROTO(struct rxrpc_call *call, int space), + + TP_ARGS(call, space), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_seq_t, seq) + __field(u16, space) + __field(u16, tx_winsize) + __field(u16, cong_cwnd) + __field(u16, cong_extra) + __field(u16, in_flight) + __field(u16, prepared) + __field(u16, pmtud_jumbo) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->seq = call->tx_bottom; + __entry->space = space; + __entry->tx_winsize = call->tx_winsize; + __entry->cong_cwnd = call->cong_cwnd; + __entry->cong_extra = call->cong_extra; + __entry->prepared = call->tx_prepared - call->tx_bottom; + __entry->in_flight = call->tx_top - call->acks_hard_ack; + __entry->pmtud_jumbo = call->peer->pmtud_jumbo; + ), + + TP_printk("c=%08x q=%08x sp=%u tw=%u cw=%u+%u pr=%u if=%u pj=%u", + __entry->call, + __entry->seq, + __entry->space, + __entry->tx_winsize, + __entry->cong_cwnd, + __entry->cong_extra, + __entry->prepared, + __entry->in_flight, + __entry->pmtud_jumbo) + ); + TRACE_EVENT(rxrpc_rx_data, TP_PROTO(unsigned int call, rxrpc_seq_t seq, rxrpc_serial_t serial, u8 flags), diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 3379adfaaf65..1f716f09d441 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -259,6 +259,8 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) if (list_empty(&call->tx_sendmsg)) break; + trace_rxrpc_transmit(call, space); + spin_lock(&call->tx_lock); do { txb = list_first_entry(&call->tx_sendmsg, From 9e3cccd176b5ec6ff78693287fb03097e453e69c Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:41 +0000 Subject: [PATCH 0238/1386] rxrpc: Fix CPU time starvation in I/O thread Starvation can happen in the rxrpc I/O thread because it goes back to the top of the I/O loop after it does any one thing without trying to give any other connection or call CPU time. Also, because it processes one call packet at a time, it tries to do the retransmission loop after each ACK without checking to see if there are other ACKs already in the queue that can update the SACK state. Fix this by: (1) Add a received-packet queue on each call. (2) Distribute packets from the master Rx queue to the individual call, conn and error queues and 'poking' calls to add them to the attend queue first thing in the I/O thread. (3) Go through all the attention-seeking connections and calls before going back to the top of the I/O thread. Each queue is extracted as a whole and then gone through so that new additions to insert themselves into the queue. (4) Make the call event handler go through all the packets currently on the call's rx_queue before transmitting and retransmitting DATA packets. (5) Drop the skb argument from the call event handler as this is now replaced with the rx_queue. Instead, keep track of whether we received a packet or an ACK for the tests that used to rely on that. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-14-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 3 + net/rxrpc/ar-internal.h | 10 +++- net/rxrpc/call_accept.c | 2 +- net/rxrpc/call_event.c | 34 ++++++----- net/rxrpc/call_object.c | 2 + net/rxrpc/conn_client.c | 12 ++-- net/rxrpc/input.c | 2 +- net/rxrpc/io_thread.c | 108 ++++++++++++++++++----------------- net/rxrpc/peer_event.c | 2 +- 9 files changed, 98 insertions(+), 77 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 71f07e726a90..28fa7be31ff8 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -120,6 +120,7 @@ EM(rxrpc_call_poke_conn_abort, "Conn-abort") \ EM(rxrpc_call_poke_error, "Error") \ EM(rxrpc_call_poke_idle, "Idle") \ + EM(rxrpc_call_poke_rx_packet, "Rx-packet") \ EM(rxrpc_call_poke_set_timeout, "Set-timo") \ EM(rxrpc_call_poke_start, "Start") \ EM(rxrpc_call_poke_timer, "Timer") \ @@ -128,6 +129,7 @@ #define rxrpc_skb_traces \ EM(rxrpc_skb_eaten_by_unshare, "ETN unshare ") \ EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \ + EM(rxrpc_skb_get_call_rx, "GET call-rx ") \ EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ EM(rxrpc_skb_get_conn_work, "GET conn-work") \ EM(rxrpc_skb_get_last_nack, "GET last-nack") \ @@ -139,6 +141,7 @@ EM(rxrpc_skb_new_error_report, "NEW error-rpt") \ EM(rxrpc_skb_new_jumbo_subpacket, "NEW jumbo-sub") \ EM(rxrpc_skb_new_unshared, "NEW unshared ") \ + EM(rxrpc_skb_put_call_rx, "PUT call-rx ") \ EM(rxrpc_skb_put_conn_secured, "PUT conn-secd") \ EM(rxrpc_skb_put_conn_work, "PUT conn-work") \ EM(rxrpc_skb_put_error_report, "PUT error-rep") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 4386b2e6cca5..55cc68dd1b40 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -705,6 +705,7 @@ struct rxrpc_call { /* Received data tracking */ struct sk_buff_head recvmsg_queue; /* Queue of packets ready for recvmsg() */ + struct sk_buff_head rx_queue; /* Queue of packets for this call to receive */ struct sk_buff_head rx_oos_queue; /* Queue of out of sequence packets */ rxrpc_seq_t rx_highest_seq; /* Higest sequence number received */ @@ -906,7 +907,7 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t, void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *); void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb); -bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb); +bool rxrpc_input_call_event(struct rxrpc_call *call); /* * call_object.c @@ -1352,6 +1353,13 @@ static inline bool after_eq(u32 seq1, u32 seq2) return (s32)(seq1 - seq2) >= 0; } +static inline void rxrpc_queue_rx_call_packet(struct rxrpc_call *call, struct sk_buff *skb) +{ + rxrpc_get_skb(skb, rxrpc_skb_get_call_rx); + __skb_queue_tail(&call->rx_queue, skb); + rxrpc_poke_call(call, rxrpc_call_poke_rx_packet); +} + /* * debug tracing */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 0f5a1d77b890..a6776b1604ba 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -408,7 +408,7 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local, } _leave(" = %p{%d}", call, call->debug_id); - rxrpc_input_call_event(call, skb); + rxrpc_queue_rx_call_packet(call, skb); rxrpc_put_call(call, rxrpc_call_put_input); return true; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 1f716f09d441..ef47de3f41c6 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -324,10 +324,11 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call) /* * Handle retransmission and deferred ACK/abort generation. */ -bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) +bool rxrpc_input_call_event(struct rxrpc_call *call) { + struct sk_buff *skb; ktime_t now, t; - bool resend = false; + bool resend = false, did_receive = false, saw_ack = false; s32 abort_code; rxrpc_see_call(call, rxrpc_call_see_input); @@ -337,9 +338,6 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)], call->events); - if (__rxrpc_call_is_complete(call)) - goto out; - /* Handle abort request locklessly, vs rxrpc_propose_abort(). */ abort_code = smp_load_acquire(&call->send_abort); if (abort_code) { @@ -348,11 +346,21 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) goto out; } - if (skb && skb->mark == RXRPC_SKB_MARK_ERROR) - goto out; + while ((skb = __skb_dequeue(&call->rx_queue))) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + if (__rxrpc_call_is_complete(call) || + skb->mark == RXRPC_SKB_MARK_ERROR) { + rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); + goto out; + } + + saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK; - if (skb) rxrpc_input_call_packet(call, skb); + rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); + did_receive = true; + } /* If we see our async-event poke, check for timeout trippage. */ now = ktime_get_real(); @@ -418,12 +426,8 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ack_ping_for_keepalive); } - if (skb) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK) - rxrpc_congestion_degrade(call); - } + if (saw_ack) + rxrpc_congestion_degrade(call); if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events)) rxrpc_send_initial_ping(call); @@ -494,7 +498,7 @@ out: if (call->security) call->security->free_call_crypto(call); } else { - if (skb && + if (did_receive && call->peer->ackr_adv_pmtud && call->peer->pmtud_pending) rxrpc_send_probe_for_pmtud(call); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 0df647d1d3a2..c026f16f891e 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -148,6 +148,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, INIT_LIST_HEAD(&call->attend_link); INIT_LIST_HEAD(&call->tx_sendmsg); INIT_LIST_HEAD(&call->tx_buffer); + skb_queue_head_init(&call->rx_queue); skb_queue_head_init(&call->recvmsg_queue); skb_queue_head_init(&call->rx_oos_queue); init_waitqueue_head(&call->waitq); @@ -536,6 +537,7 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why) static void rxrpc_cleanup_ring(struct rxrpc_call *call) { rxrpc_purge_queue(&call->recvmsg_queue); + rxrpc_purge_queue(&call->rx_queue); rxrpc_purge_queue(&call->rx_oos_queue); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 86fb18bcd188..706631e6ac2f 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -508,16 +508,18 @@ static void rxrpc_activate_channels(struct rxrpc_bundle *bundle) void rxrpc_connect_client_calls(struct rxrpc_local *local) { struct rxrpc_call *call; + LIST_HEAD(new_client_calls); - while ((call = list_first_entry_or_null(&local->new_client_calls, - struct rxrpc_call, wait_link)) - ) { + spin_lock(&local->client_call_lock); + list_splice_tail_init(&local->new_client_calls, &new_client_calls); + spin_unlock(&local->client_call_lock); + + while ((call = list_first_entry_or_null(&new_client_calls, + struct rxrpc_call, wait_link))) { struct rxrpc_bundle *bundle = call->bundle; - spin_lock(&local->client_call_lock); list_move_tail(&call->wait_link, &bundle->waiting_calls); rxrpc_see_call(call, rxrpc_call_see_waiting_call); - spin_unlock(&local->client_call_lock); if (rxrpc_bundle_has_space(bundle)) rxrpc_activate_channels(bundle); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 8398fa10ee8d..96fe005c5e81 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1124,5 +1124,5 @@ void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb) break; } - rxrpc_input_call_event(call, skb); + rxrpc_input_call_event(call); } diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index bd6d4f5e97b4..bc678a299bd8 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -338,7 +338,6 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, struct rxrpc_channel *chan; struct rxrpc_call *call = NULL; unsigned int channel; - bool ret; if (sp->hdr.securityIndex != conn->security_ix) return rxrpc_direct_abort(skb, rxrpc_eproto_wrong_security, @@ -425,9 +424,9 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, peer_srx, skb); } - ret = rxrpc_input_call_event(call, skb); + rxrpc_queue_rx_call_packet(call, skb); rxrpc_put_call(call, rxrpc_call_put_input); - return ret; + return true; } /* @@ -444,6 +443,8 @@ int rxrpc_io_thread(void *data) ktime_t now; #endif bool should_stop; + LIST_HEAD(conn_attend_q); + LIST_HEAD(call_attend_q); complete(&local->io_thread_ready); @@ -454,43 +455,25 @@ int rxrpc_io_thread(void *data) for (;;) { rxrpc_inc_stat(local->rxnet, stat_io_loop); - /* Deal with connections that want immediate attention. */ - conn = list_first_entry_or_null(&local->conn_attend_q, - struct rxrpc_connection, - attend_link); - if (conn) { - spin_lock_bh(&local->lock); - list_del_init(&conn->attend_link); - spin_unlock_bh(&local->lock); + /* Inject a delay into packets if requested. */ +#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY + now = ktime_get_real(); + while ((skb = skb_peek(&local->rx_delay_queue))) { + if (ktime_before(now, skb->tstamp)) + break; + skb = skb_dequeue(&local->rx_delay_queue); + skb_queue_tail(&local->rx_queue, skb); + } +#endif - rxrpc_input_conn_event(conn, NULL); - rxrpc_put_connection(conn, rxrpc_conn_put_poke); - continue; + if (!skb_queue_empty(&local->rx_queue)) { + spin_lock_irq(&local->rx_queue.lock); + skb_queue_splice_tail_init(&local->rx_queue, &rx_queue); + spin_unlock_irq(&local->rx_queue.lock); } - if (test_and_clear_bit(RXRPC_CLIENT_CONN_REAP_TIMER, - &local->client_conn_flags)) - rxrpc_discard_expired_client_conns(local); - - /* Deal with calls that want immediate attention. */ - if ((call = list_first_entry_or_null(&local->call_attend_q, - struct rxrpc_call, - attend_link))) { - spin_lock_bh(&local->lock); - list_del_init(&call->attend_link); - spin_unlock_bh(&local->lock); - - trace_rxrpc_call_poked(call); - rxrpc_input_call_event(call, NULL); - rxrpc_put_call(call, rxrpc_call_put_poke); - continue; - } - - if (!list_empty(&local->new_client_calls)) - rxrpc_connect_client_calls(local); - - /* Process received packets and errors. */ - if ((skb = __skb_dequeue(&rx_queue))) { + /* Distribute packets and errors. */ + while ((skb = __skb_dequeue(&rx_queue))) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); switch (skb->mark) { case RXRPC_SKB_MARK_PACKET: @@ -514,27 +497,46 @@ int rxrpc_io_thread(void *data) rxrpc_free_skb(skb, rxrpc_skb_put_unknown); break; } - continue; } - /* Inject a delay into packets if requested. */ -#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY - now = ktime_get_real(); - while ((skb = skb_peek(&local->rx_delay_queue))) { - if (ktime_before(now, skb->tstamp)) - break; - skb = skb_dequeue(&local->rx_delay_queue); - skb_queue_tail(&local->rx_queue, skb); - } -#endif + /* Deal with connections that want immediate attention. */ + spin_lock_bh(&local->lock); + list_splice_tail_init(&local->conn_attend_q, &conn_attend_q); + spin_unlock_bh(&local->lock); - if (!skb_queue_empty(&local->rx_queue)) { - spin_lock_irq(&local->rx_queue.lock); - skb_queue_splice_tail_init(&local->rx_queue, &rx_queue); - spin_unlock_irq(&local->rx_queue.lock); - continue; + while ((conn = list_first_entry_or_null(&conn_attend_q, + struct rxrpc_connection, + attend_link))) { + spin_lock_bh(&local->lock); + list_del_init(&conn->attend_link); + spin_unlock_bh(&local->lock); + rxrpc_input_conn_event(conn, NULL); + rxrpc_put_connection(conn, rxrpc_conn_put_poke); } + if (test_and_clear_bit(RXRPC_CLIENT_CONN_REAP_TIMER, + &local->client_conn_flags)) + rxrpc_discard_expired_client_conns(local); + + /* Deal with calls that want immediate attention. */ + spin_lock_bh(&local->lock); + list_splice_tail_init(&local->call_attend_q, &call_attend_q); + spin_unlock_bh(&local->lock); + + while ((call = list_first_entry_or_null(&call_attend_q, + struct rxrpc_call, + attend_link))) { + spin_lock_bh(&local->lock); + list_del_init(&call->attend_link); + spin_unlock_bh(&local->lock); + trace_rxrpc_call_poked(call); + rxrpc_input_call_event(call); + rxrpc_put_call(call, rxrpc_call_put_poke); + } + + if (!list_empty(&local->new_client_calls)) + rxrpc_connect_client_calls(local); + set_current_state(TASK_INTERRUPTIBLE); should_stop = kthread_should_stop(); if (!skb_queue_empty(&local->rx_queue) || diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 8fc9464a960c..ff30e0c05507 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -224,7 +224,7 @@ static void rxrpc_distribute_error(struct rxrpc_peer *peer, struct sk_buff *skb, rxrpc_see_call(call, rxrpc_call_see_distribute_error); rxrpc_set_call_completion(call, compl, 0, -err); - rxrpc_input_call_event(call, skb); + rxrpc_input_call_event(call); spin_lock(&peer->lock); } From cd69a07b6d186eeb7df20d8bcbef18d7bbd84c4b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:42 +0000 Subject: [PATCH 0239/1386] rxrpc: Fix injection of packet loss Fix the code that injects packet loss for testing to make sure call->tx_transmitted is updated. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-15-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/output.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index aededdd474d7..ca0da5e5d278 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -544,16 +544,6 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t len = rxrpc_prepare_data_packet(call, txb, n); - if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { - static int lose; - if ((lose++ & 7) == 7) { - ret = 0; - trace_rxrpc_tx_data(call, txb->seq, txb->serial, - txb->flags, true); - goto done; - } - } - iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, n, len); msg.msg_name = &call->peer->srx.transport; @@ -579,6 +569,17 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t frag = rxrpc_tx_point_call_data_nofrag; } + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { + static int lose; + + if ((lose++ & 7) == 7) { + ret = 0; + trace_rxrpc_tx_data(call, txb->seq, txb->serial, + txb->flags, true); + goto done; + } + } + retry: /* send the packet by UDP * - returns -EMSGSIZE if UDP would have to fragment the packet From 81e7761be58aa915cc825afc6ff35dec63bf0b2f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:43 +0000 Subject: [PATCH 0240/1386] rxrpc: Only set DF=1 on initial DATA transmission Change how the DF flag is managed on DATA transmissions. Set it on initial transmission and don't set it on retransmissions. Then remove the handling for EMSGSIZE in rxrpc_send_data_packet() and just pretend it didn't happen, leaving it to the retransmission path to retry. The path-MTU discovery using PING ACKs is then used to probe for the maximum DATA size - though notification by ICMP will be used if one is received. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-16-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/output.c | 32 ++++++++++++++++---------------- net/rxrpc/proc.c | 5 +++-- 3 files changed, 20 insertions(+), 18 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 55cc68dd1b40..84efa21f176c 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -98,6 +98,7 @@ struct rxrpc_net { atomic_t stat_tx_data_send; atomic_t stat_tx_data_send_frag; atomic_t stat_tx_data_send_fail; + atomic_t stat_tx_data_send_msgsize; atomic_t stat_tx_data_underflow; atomic_t stat_tx_data_cwnd_reset; atomic_t stat_rx_data; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index ca0da5e5d278..3d992023f80f 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -552,16 +552,11 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t msg.msg_controllen = 0; msg.msg_flags = MSG_SPLICE_PAGES; - /* Track what we've attempted to transmit at least once so that the - * retransmission algorithm doesn't try to resend what we haven't sent - * yet. + /* Send the packet with the don't fragment bit set unless we think it's + * too big or if this is a retransmission. */ - if (txb->seq == call->tx_transmitted + 1) - call->tx_transmitted = txb->seq + n - 1; - - /* send the packet with the don't fragment bit set if we currently - * think it's small enough */ - if (len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) { + if (txb->seq == call->tx_transmitted + 1 && + len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) { rxrpc_local_dont_fragment(conn->local, false); frag = rxrpc_tx_point_call_data_frag; } else { @@ -569,6 +564,13 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t frag = rxrpc_tx_point_call_data_nofrag; } + /* Track what we've attempted to transmit at least once so that the + * retransmission algorithm doesn't try to resend what we haven't sent + * yet. + */ + if (txb->seq == call->tx_transmitted + 1) + call->tx_transmitted = txb->seq + n - 1; + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; @@ -580,7 +582,6 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t } } -retry: /* send the packet by UDP * - returns -EMSGSIZE if UDP would have to fragment the packet * to go out of the interface @@ -591,7 +592,11 @@ retry: ret = do_udp_sendmsg(conn->local->socket, &msg, len); conn->peer->last_tx_at = ktime_get_seconds(); - if (ret < 0) { + if (ret == -EMSGSIZE) { + rxrpc_inc_stat(call->rxnet, stat_tx_data_send_msgsize); + trace_rxrpc_tx_packet(call->debug_id, call->local->kvec[0].iov_base, frag); + ret = 0; + } else if (ret < 0) { rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, frag); } else { @@ -599,11 +604,6 @@ retry: } rxrpc_tx_backoff(call, ret); - if (ret == -EMSGSIZE && frag == rxrpc_tx_point_call_data_nofrag) { - rxrpc_local_dont_fragment(conn->local, false); - frag = rxrpc_tx_point_call_data_frag; - goto retry; - } done: if (ret >= 0) { diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 44722c226064..249e1ed9c5c9 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -473,10 +473,11 @@ int rxrpc_stats_show(struct seq_file *seq, void *v) struct rxrpc_net *rxnet = rxrpc_net(seq_file_single_net(seq)); seq_printf(seq, - "Data : send=%u sendf=%u fail=%u\n", + "Data : send=%u sendf=%u fail=%u emsz=%u\n", atomic_read(&rxnet->stat_tx_data_send), atomic_read(&rxnet->stat_tx_data_send_frag), - atomic_read(&rxnet->stat_tx_data_send_fail)); + atomic_read(&rxnet->stat_tx_data_send_fail), + atomic_read(&rxnet->stat_tx_data_send_msgsize)); seq_printf(seq, "Data-Tx : nr=%u retrans=%u uf=%u cwr=%u\n", atomic_read(&rxnet->stat_tx_data), From 976b0ca5aae741ef33f4cf4079a9a026331eae88 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:44 +0000 Subject: [PATCH 0241/1386] rxrpc: Timestamp DATA packets before transmitting them Move to setting the timestamp on DATA packets before transmitting them as part of the preparation. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-17-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/output.c | 56 ++++++++++++++-------------------------------- 1 file changed, 17 insertions(+), 39 deletions(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 3d992023f80f..400c3389d492 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -377,7 +377,8 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) */ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_txbuf *txb, rxrpc_serial_t serial, - int subpkt, int nr_subpkts) + int subpkt, int nr_subpkts, + ktime_t now) { struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; struct rxrpc_jumbo_header *jumbo = (void *)(whdr + 1) - sizeof(*jumbo); @@ -437,8 +438,9 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]); trace_rxrpc_req_ack(call->debug_id, txb->seq, why); if (why != rxrpc_reqack_no_srv_last) { - txb->flags |= RXRPC_REQUEST_ACK; flags |= RXRPC_REQUEST_ACK; + rxrpc_begin_rtt_probe(call, serial, now, rxrpc_rtt_tx_data); + call->peer->rtt_last_req = now; } dont_set_request_ack: @@ -474,49 +476,25 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_tx { struct rxrpc_txbuf *txb = head; rxrpc_serial_t serial; + ktime_t now = ktime_get_real(); size_t len = 0; /* Each transmission of a Tx packet needs a new serial number */ serial = rxrpc_get_next_serials(call->conn, n); for (int i = 0; i < n; i++) { - len += rxrpc_prepare_data_subpacket(call, txb, serial, i, n); + txb->last_sent = now; + len += rxrpc_prepare_data_subpacket(call, txb, serial, i, n, now); serial++; txb = list_next_entry(txb, call_link); } - return len; -} + /* Set timeouts */ + if (call->peer->rtt_count > 1) { + ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); -/* - * Set timeouts after transmitting a packet. - */ -static void rxrpc_tstamp_data_packets(struct rxrpc_call *call, struct rxrpc_txbuf *txb, int n) -{ - rxrpc_serial_t serial; - ktime_t now = ktime_get_real(); - bool ack_requested = txb->flags & RXRPC_REQUEST_ACK; - int i; - - call->tx_last_sent = now; - - for (i = 0; i < n; i++) { - txb->last_sent = now; - ack_requested |= txb->flags & RXRPC_REQUEST_ACK; - serial = txb->serial; - txb = list_next_entry(txb, call_link); - } - - if (ack_requested) { - rxrpc_begin_rtt_probe(call, serial, now, rxrpc_rtt_tx_data); - - call->peer->rtt_last_req = now; - if (call->peer->rtt_count > 1) { - ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); - - call->ack_lost_at = ktime_add(now, delay); - trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack); - } + call->ack_lost_at = ktime_add(now, delay); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack); } if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) { @@ -527,6 +505,7 @@ static void rxrpc_tstamp_data_packets(struct rxrpc_call *call, struct rxrpc_txbu } rxrpc_set_keepalive(call, now); + return len; } /* @@ -538,6 +517,7 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t enum rxrpc_tx_point frag; struct msghdr msg; size_t len; + bool new_call = test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags); int ret; _enter("%x,{%d}", txb->seq, txb->pkt_len); @@ -605,20 +585,18 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t rxrpc_tx_backoff(call, ret); -done: - if (ret >= 0) { - rxrpc_tstamp_data_packets(call, txb, n); - } else { + if (ret < 0) { /* Cancel the call if the initial transmission fails, * particularly if that's due to network routing issues that * aren't going away anytime soon. The layer above can arrange * the retransmission. */ - if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) + if (new_call) rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_USER_ABORT, ret); } +done: _leave(" = %d [%u]", ret, call->peer->max_data); return ret; } From 6396b48ac0a77165f9c2c40ab03d6c8188c89739 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:45 +0000 Subject: [PATCH 0242/1386] rxrpc: Don't need barrier for ->tx_bottom and ->acks_hard_ack We don't need a barrier for the ->tx_bottom value (which indicates the lowest sequence still in the transmission queue) and the ->acks_hard_ack value (which tracks the DATA packets hard-ack'd by the latest ACK packet received and thus indicates which DATA packets can now be discarded) as the app thread doesn't use either value as a reference to memory to access. Rather, the app thread merely uses these as a guide to how much space is available in the transmission queue Change the code to use READ/WRITE_ONCE() instead. Also, change rxrpc_check_tx_space() to use the same value for tx_bottom throughout. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-18-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/sendmsg.c | 8 +++++--- net/rxrpc/txbuf.c | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 786c1fb1369a..467c9402882e 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -94,9 +94,11 @@ no_wait: */ static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win) { + rxrpc_seq_t tx_bottom = READ_ONCE(call->tx_bottom); + if (_tx_win) - *_tx_win = call->tx_bottom; - return call->tx_prepared - call->tx_bottom < 256; + *_tx_win = tx_bottom; + return call->tx_prepared - tx_bottom < 256; } /* @@ -138,7 +140,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, rtt = 2; timeout = rtt; - tx_start = smp_load_acquire(&call->acks_hard_ack); + tx_start = READ_ONCE(call->acks_hard_ack); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index 8b7c854ed3d7..0cc8f49d69a9 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -214,14 +214,14 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call) while ((txb = list_first_entry_or_null(&call->tx_buffer, struct rxrpc_txbuf, call_link))) { - hard_ack = smp_load_acquire(&call->acks_hard_ack); + hard_ack = call->acks_hard_ack; if (before(hard_ack, txb->seq)) break; if (txb->seq != call->tx_bottom + 1) rxrpc_see_txbuf(txb, rxrpc_txbuf_see_out_of_step); ASSERTCMP(txb->seq, ==, call->tx_bottom + 1); - smp_store_release(&call->tx_bottom, call->tx_bottom + 1); + WRITE_ONCE(call->tx_bottom, call->tx_bottom + 1); list_del_rcu(&txb->call_link); trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue); From b341a0263b1b804d329f864c2dc24815364510ec Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:46 +0000 Subject: [PATCH 0243/1386] rxrpc: Implement progressive transmission queue struct We need to scan the buffers in the transmission queue occasionally when processing ACKs, but the transmission queue is currently a linked list of transmission buffers which, when we eventually expand the Tx window to 8192 packets will be very slow to walk. Instead, pull the fields we need to examine a lot (last sent time, retransmitted flag) into a new struct rxrpc_txqueue and make each one hold an array of 32 or 64 packets. The transmission queue is then a list of these structs, each pointing to a contiguous set of packets. Scanning is then a lot faster as the flags and timestamps are concentrated in the CPU dcache. The transmission timestamps are stored as a number of microseconds from a base ktime to reduce memory requirements. This should be fine provided we manage to transmit an entire buffer within an hour. This will make implementing RACK-TLP [RFC8985] easier as it will be less costly to scan the transmission buffers. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-19-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 98 ++++++++++++++--- net/rxrpc/ar-internal.h | 47 ++++++-- net/rxrpc/call_event.c | 202 ++++++++++++++++++++++------------- net/rxrpc/call_object.c | 38 ++++--- net/rxrpc/input.c | 74 ++++++++++--- net/rxrpc/output.c | 165 ++++++++++++++-------------- net/rxrpc/sendmsg.c | 69 +++++++++--- net/rxrpc/txbuf.c | 41 +------ 8 files changed, 468 insertions(+), 266 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 28fa7be31ff8..e6cf9ec940aa 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -297,7 +297,6 @@ #define rxrpc_txqueue_traces \ EM(rxrpc_txqueue_await_reply, "AWR") \ - EM(rxrpc_txqueue_dequeue, "DEQ") \ EM(rxrpc_txqueue_end, "END") \ EM(rxrpc_txqueue_queue, "QUE") \ EM(rxrpc_txqueue_queue_last, "QLS") \ @@ -482,6 +481,19 @@ EM(rxrpc_txbuf_see_send_more, "SEE SEND+ ") \ E_(rxrpc_txbuf_see_unacked, "SEE UNACKED") +#define rxrpc_tq_traces \ + EM(rxrpc_tq_alloc, "ALLOC") \ + EM(rxrpc_tq_cleaned, "CLEAN") \ + EM(rxrpc_tq_decant, "DCNT ") \ + EM(rxrpc_tq_decant_advance, "DCNT>") \ + EM(rxrpc_tq_queue, "QUEUE") \ + EM(rxrpc_tq_queue_dup, "QUE!!") \ + EM(rxrpc_tq_rotate, "ROT ") \ + EM(rxrpc_tq_rotate_and_free, "ROT-F") \ + EM(rxrpc_tq_rotate_and_keep, "ROT-K") \ + EM(rxrpc_tq_transmit, "XMIT ") \ + E_(rxrpc_tq_transmit_advance, "XMIT>") + #define rxrpc_pmtud_reduce_traces \ EM(rxrpc_pmtud_reduce_ack, "Ack ") \ EM(rxrpc_pmtud_reduce_icmp, "Icmp ") \ @@ -518,6 +530,7 @@ enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte); enum rxrpc_sack_trace { rxrpc_sack_traces } __mode(byte); enum rxrpc_skb_trace { rxrpc_skb_traces } __mode(byte); enum rxrpc_timer_trace { rxrpc_timer_traces } __mode(byte); +enum rxrpc_tq_trace { rxrpc_tq_traces } __mode(byte); enum rxrpc_tx_point { rxrpc_tx_points } __mode(byte); enum rxrpc_txbuf_trace { rxrpc_txbuf_traces } __mode(byte); enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte); @@ -554,6 +567,7 @@ rxrpc_rtt_tx_traces; rxrpc_sack_traces; rxrpc_skb_traces; rxrpc_timer_traces; +rxrpc_tq_traces; rxrpc_tx_points; rxrpc_txbuf_traces; rxrpc_txqueue_traces; @@ -881,7 +895,7 @@ TRACE_EVENT(rxrpc_txqueue, __field(rxrpc_seq_t, acks_hard_ack) __field(rxrpc_seq_t, tx_bottom) __field(rxrpc_seq_t, tx_top) - __field(rxrpc_seq_t, tx_prepared) + __field(rxrpc_seq_t, send_top) __field(int, tx_winsize) ), @@ -891,7 +905,7 @@ TRACE_EVENT(rxrpc_txqueue, __entry->acks_hard_ack = call->acks_hard_ack; __entry->tx_bottom = call->tx_bottom; __entry->tx_top = call->tx_top; - __entry->tx_prepared = call->tx_prepared; + __entry->send_top = call->send_top; __entry->tx_winsize = call->tx_winsize; ), @@ -902,14 +916,14 @@ TRACE_EVENT(rxrpc_txqueue, __entry->acks_hard_ack, __entry->tx_top - __entry->tx_bottom, __entry->tx_top - __entry->acks_hard_ack, - __entry->tx_prepared - __entry->tx_bottom, + __entry->send_top - __entry->tx_top, __entry->tx_winsize) ); TRACE_EVENT(rxrpc_transmit, - TP_PROTO(struct rxrpc_call *call, int space), + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t send_top, int space), - TP_ARGS(call, space), + TP_ARGS(call, send_top, space), TP_STRUCT__entry( __field(unsigned int, call) @@ -925,12 +939,12 @@ TRACE_EVENT(rxrpc_transmit, TP_fast_assign( __entry->call = call->debug_id; - __entry->seq = call->tx_bottom; + __entry->seq = call->tx_top + 1; __entry->space = space; __entry->tx_winsize = call->tx_winsize; __entry->cong_cwnd = call->cong_cwnd; __entry->cong_extra = call->cong_extra; - __entry->prepared = call->tx_prepared - call->tx_bottom; + __entry->prepared = send_top - call->tx_bottom; __entry->in_flight = call->tx_top - call->acks_hard_ack; __entry->pmtud_jumbo = call->peer->pmtud_jumbo; ), @@ -947,6 +961,32 @@ TRACE_EVENT(rxrpc_transmit, __entry->pmtud_jumbo) ); +TRACE_EVENT(rxrpc_tx_rotate, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, rxrpc_seq_t to), + + TP_ARGS(call, seq, to), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_seq_t, seq) + __field(rxrpc_seq_t, to) + __field(rxrpc_seq_t, top) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->seq = seq; + __entry->to = to; + __entry->top = call->tx_top; + ), + + TP_printk("c=%08x q=%08x-%08x-%08x", + __entry->call, + __entry->seq, + __entry->to, + __entry->top) + ); + TRACE_EVENT(rxrpc_rx_data, TP_PROTO(unsigned int call, rxrpc_seq_t seq, rxrpc_serial_t serial, u8 flags), @@ -1621,10 +1661,11 @@ TRACE_EVENT(rxrpc_drop_ack, ); TRACE_EVENT(rxrpc_retransmit, - TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, - rxrpc_serial_t serial, ktime_t expiry), + TP_PROTO(struct rxrpc_call *call, + struct rxrpc_send_data_req *req, + struct rxrpc_txbuf *txb, ktime_t expiry), - TP_ARGS(call, seq, serial, expiry), + TP_ARGS(call, req, txb, expiry), TP_STRUCT__entry( __field(unsigned int, call) @@ -1635,8 +1676,8 @@ TRACE_EVENT(rxrpc_retransmit, TP_fast_assign( __entry->call = call->debug_id; - __entry->seq = seq; - __entry->serial = serial; + __entry->seq = req->seq; + __entry->serial = txb->serial; __entry->expiry = expiry; ), @@ -1714,9 +1755,9 @@ TRACE_EVENT(rxrpc_reset_cwnd, __entry->cwnd = call->cong_cwnd; __entry->extra = call->cong_extra; __entry->hard_ack = call->acks_hard_ack; - __entry->prepared = call->tx_prepared - call->tx_bottom; + __entry->prepared = call->send_top - call->tx_bottom; __entry->since_last_tx = ktime_sub(now, call->tx_last_sent); - __entry->has_data = !list_empty(&call->tx_sendmsg); + __entry->has_data = call->tx_bottom != call->tx_top; ), TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu d=%u", @@ -2024,6 +2065,33 @@ TRACE_EVENT(rxrpc_txbuf, __entry->ref) ); +TRACE_EVENT(rxrpc_tq, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq, + rxrpc_seq_t seq, enum rxrpc_tq_trace trace), + + TP_ARGS(call, tq, seq, trace), + + TP_STRUCT__entry( + __field(unsigned int, call_debug_id) + __field(rxrpc_seq_t, qbase) + __field(rxrpc_seq_t, seq) + __field(enum rxrpc_tq_trace, trace) + ), + + TP_fast_assign( + __entry->call_debug_id = call->debug_id; + __entry->qbase = tq ? tq->qbase : call->tx_qbase; + __entry->seq = seq; + __entry->trace = trace; + ), + + TP_printk("c=%08x bq=%08x q=%08x %s", + __entry->call_debug_id, + __entry->qbase, + __entry->seq, + __print_symbolic(__entry->trace, rxrpc_tq_traces)) + ); + TRACE_EVENT(rxrpc_poke_call, TP_PROTO(struct rxrpc_call *call, bool busy, enum rxrpc_call_poke_trace what), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 84efa21f176c..bcce4862b0b7 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -30,6 +30,7 @@ struct rxrpc_crypt { struct key_preparsed_payload; struct rxrpc_connection; struct rxrpc_txbuf; +struct rxrpc_txqueue; /* * Mark applied to socket buffers in skb->mark. skb->priority is used @@ -691,13 +692,17 @@ struct rxrpc_call { unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ unsigned short rx_pkt_len; /* Current recvmsg packet len */ + /* Sendmsg data tracking. */ + rxrpc_seq_t send_top; /* Highest Tx slot filled by sendmsg. */ + struct rxrpc_txqueue *send_queue; /* Queue that sendmsg is writing into */ + /* Transmitted data tracking. */ spinlock_t tx_lock; /* Transmit queue lock */ - struct list_head tx_sendmsg; /* Sendmsg prepared packets */ - struct list_head tx_buffer; /* Buffer of transmissible packets */ + struct rxrpc_txqueue *tx_queue; /* Start of transmission buffers */ + struct rxrpc_txqueue *tx_qtail; /* End of transmission buffers */ + rxrpc_seq_t tx_qbase; /* First slot in tx_queue */ rxrpc_seq_t tx_bottom; /* First packet in buffer */ rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */ - rxrpc_seq_t tx_prepared; /* Highest Tx slot prepared. */ rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ u16 tx_backoff; /* Delay to insert due to Tx failure (ms) */ u8 tx_winsize; /* Maximum size of Tx window */ @@ -815,9 +820,6 @@ struct rxrpc_send_params { * Buffer of data to be output as a packet. */ struct rxrpc_txbuf { - struct list_head call_link; /* Link in call->tx_sendmsg/tx_buffer */ - struct list_head tx_link; /* Link in live Enc queue or Tx queue */ - ktime_t last_sent; /* Time at which last transmitted */ refcount_t ref; rxrpc_seq_t seq; /* Sequence number of this packet */ rxrpc_serial_t serial; /* Last serial number transmitted with */ @@ -849,6 +851,36 @@ static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb) return !rxrpc_sending_to_server(txb); } +/* + * Transmit queue element, including RACK [RFC8985] per-segment metadata. The + * transmission timestamp is in usec from the base. + */ +struct rxrpc_txqueue { + /* Start with the members we want to prefetch. */ + struct rxrpc_txqueue *next; + ktime_t xmit_ts_base; + rxrpc_seq_t qbase; + + /* The arrays we want to pack into as few cache lines as possible. */ + struct { +#define RXRPC_NR_TXQUEUE BITS_PER_LONG +#define RXRPC_TXQ_MASK (RXRPC_NR_TXQUEUE - 1) + struct rxrpc_txbuf *bufs[RXRPC_NR_TXQUEUE]; + unsigned int segment_xmit_ts[RXRPC_NR_TXQUEUE]; + } ____cacheline_aligned; +}; + +/* + * Data transmission request. + */ +struct rxrpc_send_data_req { + ktime_t now; /* Current time */ + struct rxrpc_txqueue *tq; /* Tx queue segment holding first DATA */ + rxrpc_seq_t seq; /* Sequence of first data */ + int n; /* Number of DATA packets to glue into jumbo */ + bool did_send; /* T if did actually send */ +}; + #include /* @@ -905,7 +937,6 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial, enum rxrpc_propose_ack_trace why); void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t, enum rxrpc_propose_ack_trace); -void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *); void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb); bool rxrpc_input_call_event(struct rxrpc_call *call); @@ -1191,10 +1222,10 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why); void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call); int rxrpc_send_abort_packet(struct rxrpc_call *); +void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req); void rxrpc_send_conn_abort(struct rxrpc_connection *conn); void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb); void rxrpc_send_keepalive(struct rxrpc_peer *); -void rxrpc_transmit_data(struct rxrpc_call *call, struct rxrpc_txbuf *txb, int n); /* * peer_event.c diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index ef47de3f41c6..90e3d9395675 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -62,57 +62,85 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags); } +/* + * Retransmit one or more packets. + */ +static void rxrpc_retransmit_data(struct rxrpc_call *call, + struct rxrpc_send_data_req *req, + ktime_t rto) +{ + struct rxrpc_txqueue *tq = req->tq; + unsigned int ix = req->seq & RXRPC_TXQ_MASK; + struct rxrpc_txbuf *txb = tq->bufs[ix]; + ktime_t xmit_ts, resend_at; + + _enter("%x,%x,%x,%x", tq->qbase, req->seq, ix, txb->debug_id); + + xmit_ts = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]); + resend_at = ktime_add(xmit_ts, rto); + trace_rxrpc_retransmit(call, req, txb, + ktime_sub(resend_at, req->now)); + + txb->flags |= RXRPC_TXBUF_RESENT; + rxrpc_send_data_packet(call, req); + rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); + + req->tq = NULL; + req->n = 0; + req->did_send = true; + req->now = ktime_get_real(); +} + /* * Perform retransmission of NAK'd and unack'd packets. */ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) { + struct rxrpc_send_data_req req = { + .now = ktime_get_real(), + }; struct rxrpc_ackpacket *ack = NULL; struct rxrpc_skb_priv *sp; + struct rxrpc_txqueue *tq; struct rxrpc_txbuf *txb; - rxrpc_seq_t transmitted = call->tx_transmitted; + rxrpc_seq_t transmitted = call->tx_transmitted, seq; ktime_t next_resend = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC); - ktime_t resend_at = KTIME_MAX, now, delay; + ktime_t resend_at = KTIME_MAX, delay; bool unacked = false, did_send = false; - unsigned int i; + unsigned int qix; _enter("{%d,%d}", call->acks_hard_ack, call->tx_top); - now = ktime_get_real(); - - if (list_empty(&call->tx_buffer)) + if (call->tx_bottom == call->tx_top) goto no_resend; trace_rxrpc_resend(call, ack_skb); - txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link); + tq = call->tx_queue; + seq = call->tx_bottom; - /* Scan the soft ACK table without dropping the lock and resend any - * explicitly NAK'd packets. - */ + /* Scan the soft ACK table and resend any explicitly NAK'd packets. */ if (ack_skb) { sp = rxrpc_skb(ack_skb); ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header); - for (i = 0; i < sp->ack.nr_acks; i++) { - rxrpc_seq_t seq; + for (int i = 0; i < sp->ack.nr_acks; i++) { + rxrpc_seq_t aseq; if (ack->acks[i] & 1) continue; - seq = sp->ack.first_ack + i; - if (after(txb->seq, transmitted)) - break; - if (after(txb->seq, seq)) - continue; /* A new hard ACK probably came in */ - list_for_each_entry_from(txb, &call->tx_buffer, call_link) { - if (txb->seq == seq) - goto found_txb; - } - goto no_further_resend; + aseq = sp->ack.first_ack + i; + while (after_eq(aseq, tq->qbase + RXRPC_NR_TXQUEUE)) + tq = tq->next; + seq = aseq; + qix = seq - tq->qbase; + txb = tq->bufs[qix]; + if (after(seq, transmitted)) + goto no_further_resend; - found_txb: - resend_at = ktime_add(txb->last_sent, rto); + resend_at = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[qix]); + resend_at = ktime_add(resend_at, rto); if (after(txb->serial, call->acks_highest_serial)) { - if (ktime_after(resend_at, now) && + if (ktime_after(resend_at, req.now) && ktime_before(resend_at, next_resend)) next_resend = resend_at; continue; /* Ack point not yet reached */ @@ -120,17 +148,13 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked); - trace_rxrpc_retransmit(call, txb->seq, txb->serial, - ktime_sub(resend_at, now)); + req.tq = tq; + req.seq = seq; + req.n = 1; + rxrpc_retransmit_data(call, &req, rto); - txb->flags |= RXRPC_TXBUF_RESENT; - rxrpc_transmit_data(call, txb, 1); - did_send = true; - now = ktime_get_real(); - - if (list_is_last(&txb->call_link, &call->tx_buffer)) + if (after_eq(seq, call->tx_top)) goto no_further_resend; - txb = list_next_entry(txb, call_link); } } @@ -139,35 +163,43 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) * ACK'd or NACK'd in due course, so don't worry about it here; here we * need to consider retransmitting anything beyond that point. */ - if (after_eq(call->acks_prev_seq, call->tx_transmitted)) + seq = call->acks_prev_seq; + if (after_eq(seq, call->tx_transmitted)) goto no_further_resend; + seq++; - list_for_each_entry_from(txb, &call->tx_buffer, call_link) { - resend_at = ktime_add(txb->last_sent, rto); + while (after_eq(seq, tq->qbase + RXRPC_NR_TXQUEUE)) + tq = tq->next; - if (before_eq(txb->seq, call->acks_prev_seq)) + while (before_eq(seq, call->tx_transmitted)) { + qix = seq - tq->qbase; + if (qix >= RXRPC_NR_TXQUEUE) { + tq = tq->next; continue; - if (after(txb->seq, call->tx_transmitted)) - break; /* Not transmitted yet */ + } + txb = tq->bufs[qix]; + resend_at = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[qix]); + resend_at = ktime_add(resend_at, rto); if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE && before(txb->serial, ntohl(ack->serial))) goto do_resend; /* Wasn't accounted for by a more recent ping. */ - if (ktime_after(resend_at, now)) { + if (ktime_after(resend_at, req.now)) { if (ktime_before(resend_at, next_resend)) next_resend = resend_at; + seq++; continue; } do_resend: unacked = true; - txb->flags |= RXRPC_TXBUF_RESENT; - rxrpc_transmit_data(call, txb, 1); - did_send = true; - rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); - now = ktime_get_real(); + req.tq = tq; + req.seq = seq; + req.n = 1; + rxrpc_retransmit_data(call, &req, rto); + seq++; } no_further_resend: @@ -175,7 +207,8 @@ no_resend: if (resend_at < KTIME_MAX) { delay = rxrpc_get_rto_backoff(call->peer, did_send); resend_at = ktime_add(resend_at, delay); - trace_rxrpc_timer_set(call, resend_at - now, rxrpc_timer_trace_resend_reset); + trace_rxrpc_timer_set(call, resend_at - req.now, + rxrpc_timer_trace_resend_reset); } call->resend_at = resend_at; @@ -186,11 +219,11 @@ no_resend: * that an ACK got lost somewhere. Send a ping to find out instead of * retransmitting data. */ - if (!did_send) { + if (!req.did_send) { ktime_t next_ping = ktime_add_us(call->acks_latest_ts, call->peer->srtt_us >> 3); - if (ktime_sub(next_ping, now) <= 0) + if (ktime_sub(next_ping, req.now) <= 0) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_0_retrans); } @@ -240,47 +273,68 @@ static unsigned int rxrpc_tx_window_space(struct rxrpc_call *call) } /* - * Decant some if the sendmsg prepared queue into the transmission buffer. + * Transmit some as-yet untransmitted data. */ -static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) +static void rxrpc_transmit_fresh_data(struct rxrpc_call *call) { int space = rxrpc_tx_window_space(call); if (!test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { - if (list_empty(&call->tx_sendmsg)) + if (call->send_top == call->tx_top) return; rxrpc_expose_client_call(call); } while (space > 0) { - struct rxrpc_txbuf *head = NULL, *txb; - int count = 0, limit = min(space, 1); + struct rxrpc_send_data_req req = { + .now = ktime_get_real(), + .seq = call->tx_transmitted + 1, + .n = 0, + }; + struct rxrpc_txqueue *tq; + struct rxrpc_txbuf *txb; + rxrpc_seq_t send_top, seq; + int limit = min(space, 1); - if (list_empty(&call->tx_sendmsg)) + /* Order send_top before the contents of the new txbufs and + * txqueue pointers + */ + send_top = smp_load_acquire(&call->send_top); + if (call->tx_top == send_top) break; - trace_rxrpc_transmit(call, space); + trace_rxrpc_transmit(call, send_top, space); + + tq = call->tx_qtail; + seq = call->tx_top; + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_decant); - spin_lock(&call->tx_lock); do { - txb = list_first_entry(&call->tx_sendmsg, - struct rxrpc_txbuf, call_link); - if (!head) - head = txb; - list_move_tail(&txb->call_link, &call->tx_buffer); - count++; + int ix; + + seq++; + ix = seq & RXRPC_TXQ_MASK; + if (!ix) { + tq = tq->next; + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_decant_advance); + } + if (!req.tq) + req.tq = tq; + txb = tq->bufs[ix]; + req.n++; if (!txb->jumboable) break; - } while (count < limit && !list_empty(&call->tx_sendmsg)); + } while (req.n < limit && before(seq, send_top)); - spin_unlock(&call->tx_lock); - - call->tx_top = txb->seq; - if (txb->flags & RXRPC_LAST_PACKET) + if (txb->flags & RXRPC_LAST_PACKET) { rxrpc_close_tx_phase(call); + tq = NULL; + } + call->tx_qtail = tq; + call->tx_top = seq; - space -= count; - rxrpc_transmit_data(call, head, count); + space -= req.n; + rxrpc_send_data_packet(call, &req); } } @@ -288,7 +342,7 @@ static void rxrpc_transmit_some_data(struct rxrpc_call *call) { switch (__rxrpc_call_state(call)) { case RXRPC_CALL_SERVER_ACK_REQUEST: - if (list_empty(&call->tx_sendmsg)) + if (call->tx_bottom == READ_ONCE(call->send_top)) return; rxrpc_begin_service_reply(call); fallthrough; @@ -297,11 +351,11 @@ static void rxrpc_transmit_some_data(struct rxrpc_call *call) case RXRPC_CALL_CLIENT_SEND_REQUEST: if (!rxrpc_tx_window_space(call)) return; - if (list_empty(&call->tx_sendmsg)) { + if (call->tx_bottom == READ_ONCE(call->send_top)) { rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow); return; } - rxrpc_decant_prepared_tx(call); + rxrpc_transmit_fresh_data(call); break; default: return; @@ -503,8 +557,6 @@ out: call->peer->pmtud_pending) rxrpc_send_probe_for_pmtud(call); } - if (call->acks_hard_ack != call->tx_bottom) - rxrpc_shrink_call_tx_buffer(call); _leave(""); return true; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index c026f16f891e..a9682b31a4f9 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -146,8 +146,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, INIT_LIST_HEAD(&call->recvmsg_link); INIT_LIST_HEAD(&call->sock_link); INIT_LIST_HEAD(&call->attend_link); - INIT_LIST_HEAD(&call->tx_sendmsg); - INIT_LIST_HEAD(&call->tx_buffer); skb_queue_head_init(&call->rx_queue); skb_queue_head_init(&call->recvmsg_queue); skb_queue_head_init(&call->rx_oos_queue); @@ -532,9 +530,26 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why) } /* - * Clean up the Rx skb ring. + * Clean up the transmission buffers. */ -static void rxrpc_cleanup_ring(struct rxrpc_call *call) +static void rxrpc_cleanup_tx_buffers(struct rxrpc_call *call) +{ + struct rxrpc_txqueue *tq, *next; + + for (tq = call->tx_queue; tq; tq = next) { + next = tq->next; + for (int i = 0; i < RXRPC_NR_TXQUEUE; i++) + if (tq->bufs[i]) + rxrpc_put_txbuf(tq->bufs[i], rxrpc_txbuf_put_cleaned); + trace_rxrpc_tq(call, tq, 0, rxrpc_tq_cleaned); + kfree(tq); + } +} + +/* + * Clean up the receive buffers. + */ +static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call) { rxrpc_purge_queue(&call->recvmsg_queue); rxrpc_purge_queue(&call->rx_queue); @@ -673,23 +688,12 @@ static void rxrpc_rcu_free_call(struct rcu_head *rcu) static void rxrpc_destroy_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, destroyer); - struct rxrpc_txbuf *txb; del_timer_sync(&call->timer); rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); - rxrpc_cleanup_ring(call); - while ((txb = list_first_entry_or_null(&call->tx_sendmsg, - struct rxrpc_txbuf, call_link))) { - list_del(&txb->call_link); - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned); - } - while ((txb = list_first_entry_or_null(&call->tx_buffer, - struct rxrpc_txbuf, call_link))) { - list_del(&txb->call_link); - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned); - } - + rxrpc_cleanup_tx_buffers(call); + rxrpc_cleanup_rx_buffers(call); rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned); rxrpc_put_connection(call->conn, rxrpc_conn_put_call); rxrpc_deactivate_bundle(call->bundle); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 96fe005c5e81..cfdd23042d4c 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -214,24 +214,71 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, struct rxrpc_ack_summary *summary) { - struct rxrpc_txbuf *txb; + struct rxrpc_txqueue *tq = call->tx_queue; + rxrpc_seq_t seq = call->tx_bottom + 1; bool rot_last = false; - list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) { - if (before_eq(txb->seq, call->acks_hard_ack)) - continue; - if (txb->flags & RXRPC_LAST_PACKET) { + _enter("%x,%x,%x", call->tx_bottom, call->acks_hard_ack, to); + + trace_rxrpc_tx_rotate(call, seq, to); + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate); + + /* We may have a left over fully-consumed buffer at the front that we + * couldn't drop before (rotate_and_keep below). + */ + if (seq == call->tx_qbase + RXRPC_NR_TXQUEUE) { + call->tx_qbase += RXRPC_NR_TXQUEUE; + call->tx_queue = tq->next; + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_free); + kfree(tq); + tq = call->tx_queue; + } + + do { + unsigned int ix = seq - call->tx_qbase; + + _debug("tq=%x seq=%x i=%d f=%x", tq->qbase, seq, ix, tq->bufs[ix]->flags); + if (tq->bufs[ix]->flags & RXRPC_LAST_PACKET) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); rot_last = true; } - if (txb->seq == to) - break; + rxrpc_put_txbuf(tq->bufs[ix], rxrpc_txbuf_put_rotated); + tq->bufs[ix] = NULL; + + WRITE_ONCE(call->tx_bottom, seq); + WRITE_ONCE(call->acks_hard_ack, seq); + trace_rxrpc_txqueue(call, (rot_last ? + rxrpc_txqueue_rotate_last : + rxrpc_txqueue_rotate)); + + seq++; + if (!(seq & RXRPC_TXQ_MASK)) { + prefetch(tq->next); + if (tq != call->tx_qtail) { + call->tx_qbase += RXRPC_NR_TXQUEUE; + call->tx_queue = tq->next; + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_free); + kfree(tq); + tq = call->tx_queue; + } else { + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_keep); + tq = NULL; + break; + } + } + + } while (before_eq(seq, to)); + + if (rot_last) { + set_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags); + if (tq) { + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_free); + kfree(tq); + call->tx_queue = NULL; + } } - if (rot_last) - set_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags); - - _enter("%x,%x,%x,%d", to, call->acks_hard_ack, call->tx_top, rot_last); + _debug("%x,%x,%x,%d", to, call->acks_hard_ack, call->tx_top, rot_last); if (call->acks_lowest_nak == call->acks_hard_ack) { call->acks_lowest_nak = to; @@ -240,11 +287,6 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, call->acks_lowest_nak = to; } - smp_store_release(&call->acks_hard_ack, to); - - trace_rxrpc_txqueue(call, (rot_last ? - rxrpc_txqueue_rotate_last : - rxrpc_txqueue_rotate)); wake_up(&call->waitq); return rot_last; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 400c3389d492..c2044d593237 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -375,10 +375,10 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) /* * Prepare a (sub)packet for transmission. */ -static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_txbuf *txb, - rxrpc_serial_t serial, - int subpkt, int nr_subpkts, - ktime_t now) +static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, + struct rxrpc_send_data_req *req, + struct rxrpc_txbuf *txb, + rxrpc_serial_t serial, int subpkt) { struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; struct rxrpc_jumbo_header *jumbo = (void *)(whdr + 1) - sizeof(*jumbo); @@ -386,7 +386,7 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc struct rxrpc_connection *conn = call->conn; struct kvec *kv = &call->local->kvec[subpkt]; size_t len = txb->pkt_len; - bool last, more; + bool last; u8 flags; _enter("%x,%zd", txb->seq, len); @@ -401,14 +401,11 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; last = txb->flags & RXRPC_LAST_PACKET; - if (subpkt < nr_subpkts - 1) { + if (subpkt < req->n - 1) { len = RXRPC_JUMBO_DATALEN; goto dont_set_request_ack; } - more = (!list_is_last(&txb->call_link, &call->tx_buffer) || - !list_empty(&call->tx_sendmsg)); - /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. * @@ -430,7 +427,7 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc why = rxrpc_reqack_more_rtt; else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real())) why = rxrpc_reqack_old_rtt; - else if (!last && !more) + else if (!last && !after(READ_ONCE(call->send_top), txb->seq)) why = rxrpc_reqack_app_stall; else goto dont_set_request_ack; @@ -439,13 +436,13 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc trace_rxrpc_req_ack(call->debug_id, txb->seq, why); if (why != rxrpc_reqack_no_srv_last) { flags |= RXRPC_REQUEST_ACK; - rxrpc_begin_rtt_probe(call, serial, now, rxrpc_rtt_tx_data); - call->peer->rtt_last_req = now; + rxrpc_begin_rtt_probe(call, serial, req->now, rxrpc_rtt_tx_data); + call->peer->rtt_last_req = req->now; } dont_set_request_ack: /* The jumbo header overlays the wire header in the txbuf. */ - if (subpkt < nr_subpkts - 1) + if (subpkt < req->n - 1) flags |= RXRPC_JUMBO_PACKET; else flags &= ~RXRPC_JUMBO_PACKET; @@ -469,62 +466,100 @@ dont_set_request_ack: return len; } +/* + * Prepare a transmission queue object for initial transmission. Returns the + * number of microseconds since the transmission queue base timestamp. + */ +static unsigned int rxrpc_prepare_txqueue(struct rxrpc_txqueue *tq, + struct rxrpc_send_data_req *req) +{ + if (!tq) + return 0; + if (tq->xmit_ts_base == KTIME_MIN) { + tq->xmit_ts_base = req->now; + return 0; + } + return ktime_to_us(ktime_sub(req->now, tq->xmit_ts_base)); +} + /* * Prepare a (jumbo) packet for transmission. */ -static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *head, int n) +static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req) { - struct rxrpc_txbuf *txb = head; + struct rxrpc_txqueue *tq = req->tq; rxrpc_serial_t serial; - ktime_t now = ktime_get_real(); + unsigned int xmit_ts; + rxrpc_seq_t seq = req->seq; size_t len = 0; - /* Each transmission of a Tx packet needs a new serial number */ - serial = rxrpc_get_next_serials(call->conn, n); + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit); - for (int i = 0; i < n; i++) { - txb->last_sent = now; - len += rxrpc_prepare_data_subpacket(call, txb, serial, i, n, now); + /* Each transmission of a Tx packet needs a new serial number */ + serial = rxrpc_get_next_serials(call->conn, req->n); + + call->tx_last_sent = req->now; + xmit_ts = rxrpc_prepare_txqueue(tq, req); + prefetch(tq->next); + + for (int i = 0;;) { + int ix = seq & RXRPC_TXQ_MASK; + struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK]; + + _debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq); + tq->segment_xmit_ts[ix] = xmit_ts; + len += rxrpc_prepare_data_subpacket(call, req, txb, serial, i); serial++; - txb = list_next_entry(txb, call_link); + seq++; + i++; + if (i >= req->n) + break; + if (!(seq & RXRPC_TXQ_MASK)) { + tq = tq->next; + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit_advance); + xmit_ts = rxrpc_prepare_txqueue(tq, req); + } } /* Set timeouts */ - if (call->peer->rtt_count > 1) { - ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); - - call->ack_lost_at = ktime_add(now, delay); - trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack); - } - if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) { ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo)); - call->expect_rx_by = ktime_add(now, delay); + call->expect_rx_by = ktime_add(req->now, delay); trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx); } + if (call->resend_at == KTIME_MAX) { + ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); - rxrpc_set_keepalive(call, now); + call->resend_at = ktime_add(req->now, delay); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend); + } + + rxrpc_set_keepalive(call, req->now); return len; } /* - * send a packet through the transport endpoint + * Send one or more packets through the transport endpoint */ -static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb, int n) +void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req) { struct rxrpc_connection *conn = call->conn; enum rxrpc_tx_point frag; + struct rxrpc_txqueue *tq = req->tq; + struct rxrpc_txbuf *txb; struct msghdr msg; + rxrpc_seq_t seq = req->seq; size_t len; bool new_call = test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags); int ret; - _enter("%x,{%d}", txb->seq, txb->pkt_len); + _enter("%x,%x-%x", tq->qbase, seq, seq + req->n - 1); - len = rxrpc_prepare_data_packet(call, txb, n); + len = rxrpc_prepare_data_packet(call, req); + txb = tq->bufs[seq & RXRPC_TXQ_MASK]; - iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, n, len); + iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, req->n, len); msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; @@ -535,7 +570,7 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t /* Send the packet with the don't fragment bit set unless we think it's * too big or if this is a retransmission. */ - if (txb->seq == call->tx_transmitted + 1 && + if (seq == call->tx_transmitted + 1 && len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) { rxrpc_local_dont_fragment(conn->local, false); frag = rxrpc_tx_point_call_data_frag; @@ -548,8 +583,8 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t * retransmission algorithm doesn't try to resend what we haven't sent * yet. */ - if (txb->seq == call->tx_transmitted + 1) - call->tx_transmitted = txb->seq + n - 1; + if (seq == call->tx_transmitted + 1) + call->tx_transmitted = seq + req->n - 1; if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; @@ -586,19 +621,21 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t rxrpc_tx_backoff(call, ret); if (ret < 0) { - /* Cancel the call if the initial transmission fails, - * particularly if that's due to network routing issues that - * aren't going away anytime soon. The layer above can arrange - * the retransmission. + /* Cancel the call if the initial transmission fails or if we + * hit due to network routing issues that aren't going away + * anytime soon. The layer above can arrange the + * retransmission. */ - if (new_call) + if (new_call || + ret == -ENETUNREACH || + ret == -EHOSTUNREACH || + ret == -ECONNREFUSED) rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_USER_ABORT, ret); } done: _leave(" = %d [%u]", ret, call->peer->max_data); - return ret; } /* @@ -773,41 +810,3 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer) peer->last_tx_at = ktime_get_seconds(); _leave(""); } - -/* - * Schedule an instant Tx resend. - */ -static inline void rxrpc_instant_resend(struct rxrpc_call *call, - struct rxrpc_txbuf *txb) -{ - if (!__rxrpc_call_is_complete(call)) - kdebug("resend"); -} - -/* - * Transmit a packet, possibly gluing several subpackets together. - */ -void rxrpc_transmit_data(struct rxrpc_call *call, struct rxrpc_txbuf *txb, int n) -{ - int ret; - - ret = rxrpc_send_data_packet(call, txb, n); - if (ret < 0) { - switch (ret) { - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - 0, ret); - break; - default: - _debug("need instant resend %d", ret); - rxrpc_instant_resend(call, txb); - } - } else { - ktime_t delay = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC); - - call->resend_at = ktime_add(ktime_get_real(), delay); - trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend_tx); - } -} diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 467c9402882e..85b35b11755d 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -98,7 +98,7 @@ static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win) if (_tx_win) *_tx_win = tx_bottom; - return call->tx_prepared - tx_bottom < 256; + return call->send_top - tx_bottom < 256; } /* @@ -242,36 +242,74 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, struct rxrpc_txbuf *txb, rxrpc_notify_end_tx_t notify_end_tx) { + struct rxrpc_txqueue *sq = call->send_queue; rxrpc_seq_t seq = txb->seq; bool poke, last = txb->flags & RXRPC_LAST_PACKET; - + int ix = seq & RXRPC_TXQ_MASK; rxrpc_inc_stat(call->rxnet, stat_tx_data); - ASSERTCMP(txb->seq, ==, call->tx_prepared + 1); - - /* We have to set the timestamp before queueing as the retransmit - * algorithm can see the packet as soon as we queue it. - */ - txb->last_sent = ktime_get_real(); + ASSERTCMP(txb->seq, ==, call->send_top + 1); if (last) trace_rxrpc_txqueue(call, rxrpc_txqueue_queue_last); else trace_rxrpc_txqueue(call, rxrpc_txqueue_queue); + if (WARN_ON_ONCE(sq->bufs[ix])) + trace_rxrpc_tq(call, sq, seq, rxrpc_tq_queue_dup); + else + trace_rxrpc_tq(call, sq, seq, rxrpc_tq_queue); + /* Add the packet to the call's output buffer */ spin_lock(&call->tx_lock); - poke = list_empty(&call->tx_sendmsg); - list_add_tail(&txb->call_link, &call->tx_sendmsg); - call->tx_prepared = seq; - if (last) + poke = (READ_ONCE(call->tx_bottom) == call->send_top); + sq->bufs[ix] = txb; + /* Order send_top after the queue->next pointer and txb content. */ + smp_store_release(&call->send_top, seq); + if (last) { rxrpc_notify_end_tx(rx, call, notify_end_tx); + call->send_queue = NULL; + } spin_unlock(&call->tx_lock); if (poke) rxrpc_poke_call(call, rxrpc_call_poke_start); } +/* + * Allocate a new txqueue unit and add it to the transmission queue. + */ +static int rxrpc_alloc_txqueue(struct sock *sk, struct rxrpc_call *call) +{ + struct rxrpc_txqueue *tq; + + tq = kzalloc(sizeof(*tq), sk->sk_allocation); + if (!tq) + return -ENOMEM; + + tq->xmit_ts_base = KTIME_MIN; + for (int i = 0; i < RXRPC_NR_TXQUEUE; i++) + tq->segment_xmit_ts[i] = UINT_MAX; + + if (call->send_queue) { + tq->qbase = call->send_top + 1; + call->send_queue->next = tq; + call->send_queue = tq; + } else if (WARN_ON(call->tx_queue)) { + kfree(tq); + return -ENOMEM; + } else { + tq->qbase = 0; + call->tx_qbase = 0; + call->send_queue = tq; + call->tx_qtail = tq; + call->tx_queue = tq; + } + + trace_rxrpc_tq(call, tq, call->send_top, rxrpc_tq_alloc); + return 0; +} + /* * send data through a socket * - must be called in process context @@ -346,6 +384,13 @@ reload: if (!rxrpc_check_tx_space(call, NULL)) goto wait_for_space; + /* See if we need to begin/extend the Tx queue. */ + if (!call->send_queue || !((call->send_top + 1) & RXRPC_TXQ_MASK)) { + ret = rxrpc_alloc_txqueue(sk, call); + if (ret < 0) + goto maybe_error; + } + /* Work out the maximum size of a packet. Assume that * the security header is going to be in the padded * region (enc blocksize), but the trailer is not. diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index 0cc8f49d69a9..067223c8c35f 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -43,17 +43,14 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_ whdr = buf + hoff; - INIT_LIST_HEAD(&txb->call_link); - INIT_LIST_HEAD(&txb->tx_link); refcount_set(&txb->ref, 1); - txb->last_sent = KTIME_MIN; txb->call_debug_id = call->debug_id; txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); txb->alloc_size = data_size; txb->space = data_size; txb->offset = sizeof(*whdr); txb->flags = call->conn->out_clientflag; - txb->seq = call->tx_prepared + 1; + txb->seq = call->send_top + 1; txb->nr_kvec = 1; txb->kvec[0].iov_base = whdr; txb->kvec[0].iov_len = sizeof(*whdr); @@ -114,8 +111,6 @@ struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_s filler = buf + sizeof(*whdr) + sizeof(*ack) + 1; trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3; - INIT_LIST_HEAD(&txb->call_link); - INIT_LIST_HEAD(&txb->tx_link); refcount_set(&txb->ref, 1); txb->call_debug_id = call->debug_id; txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); @@ -200,37 +195,3 @@ void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) rxrpc_free_txbuf(txb); } } - -/* - * Shrink the transmit buffer. - */ -void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call) -{ - struct rxrpc_txbuf *txb; - rxrpc_seq_t hard_ack = smp_load_acquire(&call->acks_hard_ack); - bool wake = false; - - _enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top); - - while ((txb = list_first_entry_or_null(&call->tx_buffer, - struct rxrpc_txbuf, call_link))) { - hard_ack = call->acks_hard_ack; - if (before(hard_ack, txb->seq)) - break; - - if (txb->seq != call->tx_bottom + 1) - rxrpc_see_txbuf(txb, rxrpc_txbuf_see_out_of_step); - ASSERTCMP(txb->seq, ==, call->tx_bottom + 1); - WRITE_ONCE(call->tx_bottom, call->tx_bottom + 1); - list_del_rcu(&txb->call_link); - - trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue); - - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated); - if (after(call->acks_hard_ack, call->tx_bottom + 128)) - wake = true; - } - - if (wake) - wake_up(&call->waitq); -} From 692c4caa074c0d6092bd713babc6fc3872b5592a Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:47 +0000 Subject: [PATCH 0244/1386] rxrpc: call->acks_hard_ack is now the same call->tx_bottom, so remove it Now that packets are removed from the Tx queue in the rotation function rather than being cleaned up later, call->acks_hard_ack now advances in step with call->tx_bottom, so remove it. Some of the places call->acks_hard_ack is used in the rxrpc tracepoints are replaced by call->acks_first_seq instead as that's the peer's reported idea of the hard-ACK point. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-20-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 20 ++++++++++---------- net/rxrpc/ar-internal.h | 1 - net/rxrpc/call_event.c | 4 ++-- net/rxrpc/input.c | 17 ++++++++--------- net/rxrpc/proc.c | 6 +++--- net/rxrpc/sendmsg.c | 6 +++--- 6 files changed, 26 insertions(+), 28 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e6cf9ec940aa..0f253287de00 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -892,8 +892,8 @@ TRACE_EVENT(rxrpc_txqueue, TP_STRUCT__entry( __field(unsigned int, call) __field(enum rxrpc_txqueue_trace, why) - __field(rxrpc_seq_t, acks_hard_ack) __field(rxrpc_seq_t, tx_bottom) + __field(rxrpc_seq_t, acks_first_seq) __field(rxrpc_seq_t, tx_top) __field(rxrpc_seq_t, send_top) __field(int, tx_winsize) @@ -902,8 +902,8 @@ TRACE_EVENT(rxrpc_txqueue, TP_fast_assign( __entry->call = call->debug_id; __entry->why = why; - __entry->acks_hard_ack = call->acks_hard_ack; __entry->tx_bottom = call->tx_bottom; + __entry->acks_first_seq = call->acks_first_seq; __entry->tx_top = call->tx_top; __entry->send_top = call->send_top; __entry->tx_winsize = call->tx_winsize; @@ -913,9 +913,9 @@ TRACE_EVENT(rxrpc_txqueue, __entry->call, __print_symbolic(__entry->why, rxrpc_txqueue_traces), __entry->tx_bottom, - __entry->acks_hard_ack, - __entry->tx_top - __entry->tx_bottom, - __entry->tx_top - __entry->acks_hard_ack, + __entry->acks_first_seq, + __entry->acks_first_seq - __entry->tx_bottom, + __entry->tx_top - __entry->acks_first_seq, __entry->send_top - __entry->tx_top, __entry->tx_winsize) ); @@ -945,7 +945,7 @@ TRACE_EVENT(rxrpc_transmit, __entry->cong_cwnd = call->cong_cwnd; __entry->cong_extra = call->cong_extra; __entry->prepared = send_top - call->tx_bottom; - __entry->in_flight = call->tx_top - call->acks_hard_ack; + __entry->in_flight = call->tx_top - call->tx_bottom; __entry->pmtud_jumbo = call->peer->pmtud_jumbo; ), @@ -1707,7 +1707,7 @@ TRACE_EVENT(rxrpc_congest, TP_fast_assign( __entry->call = call->debug_id; __entry->change = change; - __entry->hard_ack = call->acks_hard_ack; + __entry->hard_ack = call->acks_first_seq; __entry->top = call->tx_top; __entry->lowest_nak = call->acks_lowest_nak; __entry->ack_serial = ack_serial; @@ -1754,7 +1754,7 @@ TRACE_EVENT(rxrpc_reset_cwnd, __entry->mode = call->cong_mode; __entry->cwnd = call->cong_cwnd; __entry->extra = call->cong_extra; - __entry->hard_ack = call->acks_hard_ack; + __entry->hard_ack = call->acks_first_seq; __entry->prepared = call->send_top - call->tx_bottom; __entry->since_last_tx = ktime_sub(now, call->tx_last_sent); __entry->has_data = call->tx_bottom != call->tx_top; @@ -1855,7 +1855,7 @@ TRACE_EVENT(rxrpc_resend, TP_fast_assign( struct rxrpc_skb_priv *sp = ack ? rxrpc_skb(ack) : NULL; __entry->call = call->debug_id; - __entry->seq = call->acks_hard_ack; + __entry->seq = call->acks_first_seq; __entry->transmitted = call->tx_transmitted; __entry->ack_serial = sp ? sp->hdr.serial : 0; ), @@ -1944,7 +1944,7 @@ TRACE_EVENT(rxrpc_call_reset, __entry->call_id = call->call_id; __entry->call_serial = call->rx_serial; __entry->conn_serial = call->conn->hi_serial; - __entry->tx_seq = call->acks_hard_ack; + __entry->tx_seq = call->acks_first_seq; __entry->rx_seq = call->rx_highest_seq; ), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index bcce4862b0b7..6683043cee3f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -759,7 +759,6 @@ struct rxrpc_call { ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ rxrpc_seq_t acks_first_seq; /* first sequence number received */ rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */ - rxrpc_seq_t acks_hard_ack; /* Latest hard-ack point */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */ }; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 90e3d9395675..2311e5c737e8 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -109,7 +109,7 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) bool unacked = false, did_send = false; unsigned int qix; - _enter("{%d,%d}", call->acks_hard_ack, call->tx_top); + _enter("{%d,%d}", call->tx_bottom, call->tx_top); if (call->tx_bottom == call->tx_top) goto no_resend; @@ -267,7 +267,7 @@ static void rxrpc_close_tx_phase(struct rxrpc_call *call) static unsigned int rxrpc_tx_window_space(struct rxrpc_call *call) { int winsize = umin(call->tx_winsize, call->cong_cwnd + call->cong_extra); - int in_flight = call->tx_top - call->acks_hard_ack; + int in_flight = call->tx_top - call->tx_bottom; return max(winsize - in_flight, 0); } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index cfdd23042d4c..afb87a3322da 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -40,7 +40,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, bool resend = false; summary->flight_size = - (call->tx_top - call->acks_hard_ack) - summary->nr_acks; + (call->tx_top - call->tx_bottom) - summary->nr_acks; if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) { summary->retrans_timeo = true; @@ -175,7 +175,7 @@ send_extra_data: * state. */ if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) || - summary->nr_acks != call->tx_top - call->acks_hard_ack) { + summary->nr_acks != call->tx_top - call->tx_bottom) { call->cong_extra++; wake_up(&call->waitq); } @@ -218,7 +218,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, rxrpc_seq_t seq = call->tx_bottom + 1; bool rot_last = false; - _enter("%x,%x,%x", call->tx_bottom, call->acks_hard_ack, to); + _enter("%x,%x", call->tx_bottom, to); trace_rxrpc_tx_rotate(call, seq, to); trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate); @@ -246,7 +246,6 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, tq->bufs[ix] = NULL; WRITE_ONCE(call->tx_bottom, seq); - WRITE_ONCE(call->acks_hard_ack, seq); trace_rxrpc_txqueue(call, (rot_last ? rxrpc_txqueue_rotate_last : rxrpc_txqueue_rotate)); @@ -278,9 +277,9 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, } } - _debug("%x,%x,%x,%d", to, call->acks_hard_ack, call->tx_top, rot_last); + _debug("%x,%x,%x,%d", to, call->tx_bottom, call->tx_top, rot_last); - if (call->acks_lowest_nak == call->acks_hard_ack) { + if (call->acks_lowest_nak == call->tx_bottom) { call->acks_lowest_nak = to; } else if (after(to, call->acks_lowest_nak)) { summary->new_low_nack = true; @@ -968,7 +967,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (unlikely(summary.ack_reason == RXRPC_ACK_OUT_OF_SEQUENCE) && first_soft_ack == 1 && prev_pkt == 0 && - call->acks_hard_ack == 0 && + call->tx_bottom == 0 && rxrpc_is_client_call(call)) { rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, 0, -ENETRESET); @@ -1033,13 +1032,13 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) goto send_response; } - if (before(hard_ack, call->acks_hard_ack) || + if (before(hard_ack, call->tx_bottom) || after(hard_ack, call->tx_top)) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_outside_window); if (nr_acks > call->tx_top - hard_ack) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow); - if (after(hard_ack, call->acks_hard_ack)) { + if (after(hard_ack, call->tx_bottom)) { if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack); goto send_response; diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 249e1ed9c5c9..a8325b8e33c2 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -52,7 +52,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) struct rxrpc_call *call; struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); enum rxrpc_call_state state; - rxrpc_seq_t acks_hard_ack; + rxrpc_seq_t tx_bottom; char lbuff[50], rbuff[50]; long timeout = 0; @@ -79,7 +79,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) if (state != RXRPC_CALL_SERVER_PREALLOC) timeout = ktime_ms_delta(READ_ONCE(call->expect_rx_by), ktime_get_real()); - acks_hard_ack = READ_ONCE(call->acks_hard_ack); + tx_bottom = READ_ONCE(call->tx_bottom); seq_printf(seq, "UDP %-47.47s %-47.47s %4x %08x %08x %s %3u" " %-8.8s %08x %08x %08x %02x %08x %02x %08x %02x %06lx\n", @@ -93,7 +93,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) rxrpc_call_states[state], call->abort_code, call->debug_id, - acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack, + tx_bottom, READ_ONCE(call->tx_top) - tx_bottom, call->ackr_window, call->ackr_wtop - call->ackr_window, call->rx_serial, call->cong_cwnd, diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 85b35b11755d..dfbf9f4b24b6 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -140,7 +140,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, rtt = 2; timeout = rtt; - tx_start = READ_ONCE(call->acks_hard_ack); + tx_start = READ_ONCE(call->tx_bottom); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); @@ -197,8 +197,8 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, DECLARE_WAITQUEUE(myself, current); int ret; - _enter(",{%u,%u,%u,%u}", - call->tx_bottom, call->acks_hard_ack, call->tx_top, call->tx_winsize); + _enter(",{%u,%u,%u}", + call->tx_bottom, call->tx_top, call->tx_winsize); add_wait_queue(&call->waitq, &myself); From 203457e11b591f80ada571f981dd5f4d683b0009 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:48 +0000 Subject: [PATCH 0245/1386] rxrpc: Replace call->acks_first_seq with tracking of the hard ACK point Replace the call->acks_first_seq variable (which holds ack.firstPacket from the latest ACK packet and indicates the sequence number of the first ack slot in the SACK table) with call->acks_hard_ack which will hold the highest sequence hard ACK'd. This is 1 less than call->acks_first_seq, but it fits in the same schema as the other tracking variables which hold the sequence of a packet, not one past it. This will fix the rxrpc_congest tracepoint's calculation of SACK window size which shows one fewer than it should - and will occasionally go to -1. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-21-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 68 +++++++++++++++++------------------- net/rxrpc/ar-internal.h | 2 +- net/rxrpc/input.c | 56 ++++++++++++++--------------- 3 files changed, 59 insertions(+), 67 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 0f253287de00..91108e0de3af 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -893,7 +893,7 @@ TRACE_EVENT(rxrpc_txqueue, __field(unsigned int, call) __field(enum rxrpc_txqueue_trace, why) __field(rxrpc_seq_t, tx_bottom) - __field(rxrpc_seq_t, acks_first_seq) + __field(rxrpc_seq_t, acks_hard_ack) __field(rxrpc_seq_t, tx_top) __field(rxrpc_seq_t, send_top) __field(int, tx_winsize) @@ -903,19 +903,19 @@ TRACE_EVENT(rxrpc_txqueue, __entry->call = call->debug_id; __entry->why = why; __entry->tx_bottom = call->tx_bottom; - __entry->acks_first_seq = call->acks_first_seq; + __entry->acks_hard_ack = call->acks_hard_ack; __entry->tx_top = call->tx_top; __entry->send_top = call->send_top; __entry->tx_winsize = call->tx_winsize; ), - TP_printk("c=%08x %s f=%08x h=%08x n=%u/%u/%u/%u", + TP_printk("c=%08x %s b=%08x h=%08x n=%u/%u/%u/%u", __entry->call, __print_symbolic(__entry->why, rxrpc_txqueue_traces), __entry->tx_bottom, - __entry->acks_first_seq, - __entry->acks_first_seq - __entry->tx_bottom, - __entry->tx_top - __entry->acks_first_seq, + __entry->acks_hard_ack, + __entry->acks_hard_ack - __entry->tx_bottom, + __entry->tx_top - __entry->acks_hard_ack, __entry->send_top - __entry->tx_top, __entry->tx_winsize) ); @@ -1015,11 +1015,9 @@ TRACE_EVENT(rxrpc_rx_data, ); TRACE_EVENT(rxrpc_rx_ack, - TP_PROTO(struct rxrpc_call *call, - rxrpc_serial_t serial, rxrpc_serial_t ack_serial, - rxrpc_seq_t first, rxrpc_seq_t prev, u8 reason, u8 n_acks), + TP_PROTO(struct rxrpc_call *call, struct rxrpc_skb_priv *sp), - TP_ARGS(call, serial, ack_serial, first, prev, reason, n_acks), + TP_ARGS(call, sp), TP_STRUCT__entry( __field(unsigned int, call) @@ -1032,13 +1030,13 @@ TRACE_EVENT(rxrpc_rx_ack, ), TP_fast_assign( - __entry->call = call->debug_id; - __entry->serial = serial; - __entry->ack_serial = ack_serial; - __entry->first = first; - __entry->prev = prev; - __entry->reason = reason; - __entry->n_acks = n_acks; + __entry->call = call->debug_id; + __entry->serial = sp->hdr.serial; + __entry->ack_serial = sp->ack.acked_serial; + __entry->first = sp->ack.first_ack; + __entry->prev = sp->ack.prev_ack; + __entry->reason = sp->ack.reason; + __entry->n_acks = sp->ack.nr_acks; ), TP_printk("c=%08x %08x %s r=%08x f=%08x p=%08x n=%u", @@ -1707,7 +1705,7 @@ TRACE_EVENT(rxrpc_congest, TP_fast_assign( __entry->call = call->debug_id; __entry->change = change; - __entry->hard_ack = call->acks_first_seq; + __entry->hard_ack = call->acks_hard_ack; __entry->top = call->tx_top; __entry->lowest_nak = call->acks_lowest_nak; __entry->ack_serial = ack_serial; @@ -1754,7 +1752,7 @@ TRACE_EVENT(rxrpc_reset_cwnd, __entry->mode = call->cong_mode; __entry->cwnd = call->cong_cwnd; __entry->extra = call->cong_extra; - __entry->hard_ack = call->acks_first_seq; + __entry->hard_ack = call->acks_hard_ack; __entry->prepared = call->send_top - call->tx_bottom; __entry->since_last_tx = ktime_sub(now, call->tx_last_sent); __entry->has_data = call->tx_bottom != call->tx_top; @@ -1855,7 +1853,7 @@ TRACE_EVENT(rxrpc_resend, TP_fast_assign( struct rxrpc_skb_priv *sp = ack ? rxrpc_skb(ack) : NULL; __entry->call = call->debug_id; - __entry->seq = call->acks_first_seq; + __entry->seq = call->acks_hard_ack; __entry->transmitted = call->tx_transmitted; __entry->ack_serial = sp ? sp->hdr.serial : 0; ), @@ -1944,7 +1942,7 @@ TRACE_EVENT(rxrpc_call_reset, __entry->call_id = call->call_id; __entry->call_serial = call->rx_serial; __entry->conn_serial = call->conn->hi_serial; - __entry->tx_seq = call->acks_first_seq; + __entry->tx_seq = call->acks_hard_ack; __entry->rx_seq = call->rx_highest_seq; ), @@ -1976,38 +1974,36 @@ TRACE_EVENT(rxrpc_notify_socket, ); TRACE_EVENT(rxrpc_rx_discard_ack, - TP_PROTO(unsigned int debug_id, rxrpc_serial_t serial, - rxrpc_seq_t first_soft_ack, rxrpc_seq_t call_ackr_first, - rxrpc_seq_t prev_pkt, rxrpc_seq_t call_ackr_prev), + TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial, + rxrpc_seq_t hard_ack, rxrpc_seq_t prev_pkt), - TP_ARGS(debug_id, serial, first_soft_ack, call_ackr_first, - prev_pkt, call_ackr_prev), + TP_ARGS(call, serial, hard_ack, prev_pkt), TP_STRUCT__entry( __field(unsigned int, debug_id) __field(rxrpc_serial_t, serial) - __field(rxrpc_seq_t, first_soft_ack) - __field(rxrpc_seq_t, call_ackr_first) + __field(rxrpc_seq_t, hard_ack) __field(rxrpc_seq_t, prev_pkt) - __field(rxrpc_seq_t, call_ackr_prev) + __field(rxrpc_seq_t, acks_hard_ack) + __field(rxrpc_seq_t, acks_prev_seq) ), TP_fast_assign( - __entry->debug_id = debug_id; + __entry->debug_id = call->debug_id; __entry->serial = serial; - __entry->first_soft_ack = first_soft_ack; - __entry->call_ackr_first = call_ackr_first; + __entry->hard_ack = hard_ack; __entry->prev_pkt = prev_pkt; - __entry->call_ackr_prev = call_ackr_prev; + __entry->acks_hard_ack = call->acks_hard_ack; + __entry->acks_prev_seq = call->acks_prev_seq; ), TP_printk("c=%08x r=%08x %08x<%08x %08x<%08x", __entry->debug_id, __entry->serial, - __entry->first_soft_ack, - __entry->call_ackr_first, + __entry->hard_ack, + __entry->acks_hard_ack, __entry->prev_pkt, - __entry->call_ackr_prev) + __entry->acks_prev_seq) ); TRACE_EVENT(rxrpc_req_ack, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 6683043cee3f..3e57cef7385f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -757,7 +757,7 @@ struct rxrpc_call { /* Transmission-phase ACK management (ACKs we've received). */ ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ - rxrpc_seq_t acks_first_seq; /* first sequence number received */ + rxrpc_seq_t acks_hard_ack; /* Highest sequence hard acked */ rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index afb87a3322da..b89fd0dee324 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -782,12 +782,12 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb */ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, - rxrpc_seq_t seq) + rxrpc_seq_t hard_ack) { struct sk_buff *skb = call->cong_last_nack; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int i, new_acks = 0, retained_nacks = 0; - rxrpc_seq_t old_seq = sp->ack.first_ack; + rxrpc_seq_t seq = hard_ack + 1, old_seq = sp->ack.first_ack; u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); if (after_eq(seq, old_seq + sp->ack.nr_acks)) { @@ -810,7 +810,7 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, summary->nr_retained_nacks = retained_nacks; } - return old_seq + sp->ack.nr_acks; + return old_seq + sp->ack.nr_acks - 1; } /* @@ -825,22 +825,23 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, static void rxrpc_input_soft_acks(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, struct sk_buff *skb, - rxrpc_seq_t seq, rxrpc_seq_t since) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int i, old_nacks = 0; - rxrpc_seq_t lowest_nak = seq + sp->ack.nr_acks; + rxrpc_seq_t lowest_nak = call->acks_hard_ack + sp->ack.nr_acks + 1; + rxrpc_seq_t seq = call->acks_hard_ack; u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); for (i = 0; i < sp->ack.nr_acks; i++) { + seq++; if (acks[i] == RXRPC_ACK_TYPE_ACK) { summary->nr_acks++; - if (after_eq(seq, since)) + if (after(seq, since)) summary->nr_new_acks++; } else { summary->saw_nacks = true; - if (before(seq, since)) { + if (before_eq(seq, since)) { /* Overlap with previous ACK */ old_nacks++; } else { @@ -851,7 +852,6 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, if (before(seq, lowest_nak)) lowest_nak = seq; } - seq++; } if (lowest_nak != call->acks_lowest_nak) { @@ -874,21 +874,21 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, * with respect to the ack state conveyed by preceding ACKs. */ static bool rxrpc_is_ack_valid(struct rxrpc_call *call, - rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt) + rxrpc_seq_t hard_ack, rxrpc_seq_t prev_pkt) { - rxrpc_seq_t base = READ_ONCE(call->acks_first_seq); + rxrpc_seq_t base = READ_ONCE(call->acks_hard_ack); - if (after(first_pkt, base)) + if (after(hard_ack, base)) return true; /* The window advanced */ - if (before(first_pkt, base)) + if (before(hard_ack, base)) return false; /* firstPacket regressed */ if (after_eq(prev_pkt, call->acks_prev_seq)) return true; /* previousPacket hasn't regressed. */ /* Some rx implementations put a serial number in previousPacket. */ - if (after_eq(prev_pkt, base + call->tx_winsize)) + if (after(prev_pkt, base + call->tx_winsize)) return false; return true; } @@ -906,8 +906,8 @@ static bool rxrpc_is_ack_valid(struct rxrpc_call *call, static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_ack_summary summary = { 0 }; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_acktrailer trailer; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); rxrpc_serial_t ack_serial, acked_serial; rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since; int nr_acks, offset, ioffset; @@ -925,9 +925,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ? sp->ack.reason : RXRPC_ACK__INVALID); - trace_rxrpc_rx_ack(call, ack_serial, acked_serial, - first_soft_ack, prev_pkt, - summary.ack_reason, nr_acks); + trace_rxrpc_rx_ack(call, sp); rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]); if (acked_serial != 0) { @@ -952,7 +950,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) * lost the call because it switched to a different peer. */ if (unlikely(summary.ack_reason == RXRPC_ACK_EXCEEDS_WINDOW) && - first_soft_ack == 1 && + hard_ack == 0 && prev_pkt == 0 && rxrpc_is_client_call(call)) { rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, @@ -965,7 +963,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) * if we still have it buffered to the beginning. */ if (unlikely(summary.ack_reason == RXRPC_ACK_OUT_OF_SEQUENCE) && - first_soft_ack == 1 && + hard_ack == 0 && prev_pkt == 0 && call->tx_bottom == 0 && rxrpc_is_client_call(call)) { @@ -975,10 +973,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) } /* Discard any out-of-order or duplicate ACKs (outside lock). */ - if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { - trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, - first_soft_ack, call->acks_first_seq, - prev_pkt, call->acks_prev_seq); + if (!rxrpc_is_ack_valid(call, hard_ack, prev_pkt)) { + trace_rxrpc_rx_discard_ack(call, ack_serial, hard_ack, prev_pkt); goto send_response; } @@ -992,17 +988,17 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) skb_condense(skb); if (call->cong_last_nack) { - since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack); + since = rxrpc_input_check_prev_ack(call, &summary, hard_ack); rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); call->cong_last_nack = NULL; } else { - summary.nr_new_acks = first_soft_ack - call->acks_first_seq; - call->acks_lowest_nak = first_soft_ack + nr_acks; - since = first_soft_ack; + summary.nr_new_acks = hard_ack - call->acks_hard_ack; + call->acks_lowest_nak = hard_ack + nr_acks; + since = hard_ack; } call->acks_latest_ts = skb->tstamp; - call->acks_first_seq = first_soft_ack; + call->acks_hard_ack = hard_ack; call->acks_prev_seq = prev_pkt; switch (summary.ack_reason) { @@ -1018,7 +1014,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (trailer.maxMTU) rxrpc_input_ack_trailer(call, skb, &trailer); - if (first_soft_ack == 0) + if (hard_ack + 1 == 0) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero); /* Ignore ACKs unless we are or have just been transmitting. */ @@ -1048,7 +1044,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (nr_acks > 0) { if (offset > (int)skb->len - nr_acks) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack); - rxrpc_input_soft_acks(call, &summary, skb, first_soft_ack, since); + rxrpc_input_soft_acks(call, &summary, skb, since); rxrpc_get_skb(skb, rxrpc_skb_get_last_nack); call->cong_last_nack = skb; } From f003e4038f0e14b3b374f7dae76dfeef9591f006 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:49 +0000 Subject: [PATCH 0246/1386] rxrpc: Display stats about jumbo packets transmitted and received In /proc/net/rxrpc/stats, display statistics about the numbers of different sizes of jumbo packets transmitted and received, showing counts for 1 subpacket (ie. a non-jumbo packet), 2 subpackets, 3, ... to 8 and then 9+. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-22-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/ar-internal.h | 2 ++ net/rxrpc/input.c | 6 +++++- net/rxrpc/output.c | 5 ++++- net/rxrpc/proc.c | 26 ++++++++++++++++++++++++++ 4 files changed, 37 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 3e57cef7385f..840293f913a3 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -111,6 +111,8 @@ struct rxrpc_net { atomic_t stat_tx_ack_skip; atomic_t stat_tx_acks[256]; atomic_t stat_rx_acks[256]; + atomic_t stat_tx_jumbo[10]; + atomic_t stat_rx_jumbo[10]; atomic_t stat_why_req_ack[8]; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index b89fd0dee324..8d7ab4b9d7d0 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -568,7 +568,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len = skb->len - offset; bool notify = false; - int ack_reason = 0; + int ack_reason = 0, count = 1, stat_ix; while (sp->hdr.flags & RXRPC_JUMBO_PACKET) { if (len < RXRPC_JUMBO_SUBPKTLEN) @@ -597,12 +597,16 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb sp->hdr.serial++; offset += RXRPC_JUMBO_SUBPKTLEN; len -= RXRPC_JUMBO_SUBPKTLEN; + count++; } sp->offset = offset; sp->len = len; rxrpc_input_data_one(call, skb, ¬ify, &ack_serial, &ack_reason); + stat_ix = umin(count, ARRAY_SIZE(call->rxnet->stat_rx_jumbo)) - 1; + atomic_inc(&call->rxnet->stat_rx_jumbo[stat_ix]); + if (ack_reason > 0) { rxrpc_send_ACK(call, ack_reason, ack_serial, rxrpc_propose_ack_input_data); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index c2044d593237..3886777d1bb6 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -552,10 +552,13 @@ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req rxrpc_seq_t seq = req->seq; size_t len; bool new_call = test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags); - int ret; + int ret, stat_ix; _enter("%x,%x-%x", tq->qbase, seq, seq + req->n - 1); + stat_ix = umin(req->n, ARRAY_SIZE(call->rxnet->stat_tx_jumbo)) - 1; + atomic_inc(&call->rxnet->stat_tx_jumbo[stat_ix]); + len = rxrpc_prepare_data_packet(call, req); txb = tq->bufs[seq & RXRPC_TXQ_MASK]; diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index a8325b8e33c2..5f974ec13d69 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -529,6 +529,30 @@ int rxrpc_stats_show(struct seq_file *seq, void *v) atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_retrans]), atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_slow_start]), atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_small_txwin])); + seq_printf(seq, + "Jumbo-Tx : %u,%u,%u,%u,%u,%u,%u,%u,%u,%u\n", + atomic_read(&rxnet->stat_tx_jumbo[0]), + atomic_read(&rxnet->stat_tx_jumbo[1]), + atomic_read(&rxnet->stat_tx_jumbo[2]), + atomic_read(&rxnet->stat_tx_jumbo[3]), + atomic_read(&rxnet->stat_tx_jumbo[4]), + atomic_read(&rxnet->stat_tx_jumbo[5]), + atomic_read(&rxnet->stat_tx_jumbo[6]), + atomic_read(&rxnet->stat_tx_jumbo[7]), + atomic_read(&rxnet->stat_tx_jumbo[8]), + atomic_read(&rxnet->stat_tx_jumbo[9])); + seq_printf(seq, + "Jumbo-Rx : %u,%u,%u,%u,%u,%u,%u,%u,%u,%u\n", + atomic_read(&rxnet->stat_rx_jumbo[0]), + atomic_read(&rxnet->stat_rx_jumbo[1]), + atomic_read(&rxnet->stat_rx_jumbo[2]), + atomic_read(&rxnet->stat_rx_jumbo[3]), + atomic_read(&rxnet->stat_rx_jumbo[4]), + atomic_read(&rxnet->stat_rx_jumbo[5]), + atomic_read(&rxnet->stat_rx_jumbo[6]), + atomic_read(&rxnet->stat_rx_jumbo[7]), + atomic_read(&rxnet->stat_rx_jumbo[8]), + atomic_read(&rxnet->stat_rx_jumbo[9])); seq_printf(seq, "Buffers : txb=%u rxb=%u\n", atomic_read(&rxrpc_nr_txbuf), @@ -566,6 +590,8 @@ int rxrpc_stats_clear(struct file *file, char *buf, size_t size) atomic_set(&rxnet->stat_tx_ack_skip, 0); memset(&rxnet->stat_tx_acks, 0, sizeof(rxnet->stat_tx_acks)); memset(&rxnet->stat_rx_acks, 0, sizeof(rxnet->stat_rx_acks)); + memset(&rxnet->stat_tx_jumbo, 0, sizeof(rxnet->stat_tx_jumbo)); + memset(&rxnet->stat_rx_jumbo, 0, sizeof(rxnet->stat_rx_jumbo)); memset(&rxnet->stat_why_req_ack, 0, sizeof(rxnet->stat_why_req_ack)); From f7dd0dc9651326f609579fa81cbdda69b0467c2a Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:50 +0000 Subject: [PATCH 0247/1386] rxrpc: Adjust names and types of congestion-related fields Adjust some of the names of fields and constants to make them look a bit more like the TCP congestion symbol names, such as flight_size -> in_flight and congest_mode to ca_state. Move the persistent congestion-related fields from the rxrpc_ack_summary struct into the rxrpc_call struct rather than copying them out and back in again. The rxrpc_congest tracepoint can fetch them from the call struct. Rename the counters for soft acks and nacks to have an 's' on the front to reflect the softness, e.g. nr_acks -> nr_sacks. Make fields counting numbers of packets or numbers of acks u16 rather than u8 to allow for windows of up to 8192 DATA packets in flight in future. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-23-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 58 ++++++++------ net/rxrpc/ar-internal.h | 51 ++++++------ net/rxrpc/conn_client.c | 4 +- net/rxrpc/input.c | 151 ++++++++++++++++------------------- net/rxrpc/output.c | 2 +- 5 files changed, 132 insertions(+), 134 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 91108e0de3af..d47b8235fad3 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -378,11 +378,11 @@ EM(rxrpc_propose_ack_rx_idle, "RxIdle ") \ E_(rxrpc_propose_ack_terminal_ack, "ClTerm ") -#define rxrpc_congest_modes \ - EM(RXRPC_CALL_CONGEST_AVOIDANCE, "CongAvoid") \ - EM(RXRPC_CALL_FAST_RETRANSMIT, "FastReTx ") \ - EM(RXRPC_CALL_PACKET_LOSS, "PktLoss ") \ - E_(RXRPC_CALL_SLOW_START, "SlowStart") +#define rxrpc_ca_states \ + EM(RXRPC_CA_CONGEST_AVOIDANCE, "CongAvoid") \ + EM(RXRPC_CA_FAST_RETRANSMIT, "FastReTx ") \ + EM(RXRPC_CA_PACKET_LOSS, "PktLoss ") \ + E_(RXRPC_CA_SLOW_START, "SlowStart") #define rxrpc_congest_changes \ EM(rxrpc_cong_begin_retransmission, " Retrans") \ @@ -550,11 +550,11 @@ enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte); rxrpc_abort_reasons; rxrpc_bundle_traces; +rxrpc_ca_states; rxrpc_call_poke_traces; rxrpc_call_traces; rxrpc_client_traces; rxrpc_congest_changes; -rxrpc_congest_modes; rxrpc_conn_traces; rxrpc_local_traces; rxrpc_pmtud_reduce_traces; @@ -1688,27 +1688,39 @@ TRACE_EVENT(rxrpc_retransmit, TRACE_EVENT(rxrpc_congest, TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, - rxrpc_serial_t ack_serial, enum rxrpc_congest_change change), + rxrpc_serial_t ack_serial), - TP_ARGS(call, summary, ack_serial, change), + TP_ARGS(call, summary, ack_serial), TP_STRUCT__entry( __field(unsigned int, call) - __field(enum rxrpc_congest_change, change) + __field(enum rxrpc_ca_state, ca_state) __field(rxrpc_seq_t, hard_ack) __field(rxrpc_seq_t, top) __field(rxrpc_seq_t, lowest_nak) __field(rxrpc_serial_t, ack_serial) + __field(u16, nr_sacks) + __field(u16, nr_snacks) + __field(u16, cwnd) + __field(u16, ssthresh) + __field(u16, cumul_acks) + __field(u16, dup_acks) __field_struct(struct rxrpc_ack_summary, sum) ), TP_fast_assign( __entry->call = call->debug_id; - __entry->change = change; + __entry->ca_state = call->cong_ca_state; __entry->hard_ack = call->acks_hard_ack; __entry->top = call->tx_top; __entry->lowest_nak = call->acks_lowest_nak; __entry->ack_serial = ack_serial; + __entry->nr_sacks = call->acks_nr_sacks; + __entry->nr_snacks = call->acks_nr_snacks; + __entry->cwnd = call->cong_cwnd; + __entry->ssthresh = call->cong_ssthresh; + __entry->cumul_acks = call->cong_cumul_acks; + __entry->dup_acks = call->cong_dup_acks; memcpy(&__entry->sum, summary, sizeof(__entry->sum)); ), @@ -1717,17 +1729,17 @@ TRACE_EVENT(rxrpc_congest, __entry->ack_serial, __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), __entry->hard_ack, - __print_symbolic(__entry->sum.mode, rxrpc_congest_modes), - __entry->sum.cwnd, - __entry->sum.ssthresh, - __entry->sum.nr_acks, __entry->sum.nr_retained_nacks, - __entry->sum.nr_new_acks, - __entry->sum.nr_new_nacks, + __print_symbolic(__entry->ca_state, rxrpc_ca_states), + __entry->cwnd, + __entry->ssthresh, + __entry->nr_sacks, __entry->sum.nr_retained_snacks, + __entry->sum.nr_new_sacks, + __entry->sum.nr_new_snacks, __entry->top - __entry->hard_ack, - __entry->sum.cumulative_acks, - __entry->sum.dup_acks, - __entry->lowest_nak, __entry->sum.new_low_nack ? "!" : "", - __print_symbolic(__entry->change, rxrpc_congest_changes), + __entry->cumul_acks, + __entry->dup_acks, + __entry->lowest_nak, __entry->sum.new_low_snack ? "!" : "", + __print_symbolic(__entry->sum.change, rxrpc_congest_changes), __entry->sum.retrans_timeo ? " rTxTo" : "") ); @@ -1738,7 +1750,7 @@ TRACE_EVENT(rxrpc_reset_cwnd, TP_STRUCT__entry( __field(unsigned int, call) - __field(enum rxrpc_congest_mode, mode) + __field(enum rxrpc_ca_state, ca_state) __field(unsigned short, cwnd) __field(unsigned short, extra) __field(rxrpc_seq_t, hard_ack) @@ -1749,7 +1761,7 @@ TRACE_EVENT(rxrpc_reset_cwnd, TP_fast_assign( __entry->call = call->debug_id; - __entry->mode = call->cong_mode; + __entry->ca_state = call->cong_ca_state; __entry->cwnd = call->cong_cwnd; __entry->extra = call->cong_extra; __entry->hard_ack = call->acks_hard_ack; @@ -1761,7 +1773,7 @@ TRACE_EVENT(rxrpc_reset_cwnd, TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu d=%u", __entry->call, __entry->hard_ack, - __print_symbolic(__entry->mode, rxrpc_congest_modes), + __print_symbolic(__entry->ca_state, rxrpc_ca_states), __entry->cwnd, __entry->extra, __entry->prepared, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 840293f913a3..f6e6b2ab6c2a 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -623,13 +623,13 @@ enum rxrpc_call_state { /* * Call Tx congestion management modes. */ -enum rxrpc_congest_mode { - RXRPC_CALL_SLOW_START, - RXRPC_CALL_CONGEST_AVOIDANCE, - RXRPC_CALL_PACKET_LOSS, - RXRPC_CALL_FAST_RETRANSMIT, - NR__RXRPC_CONGEST_MODES -}; +enum rxrpc_ca_state { + RXRPC_CA_SLOW_START, + RXRPC_CA_CONGEST_AVOIDANCE, + RXRPC_CA_PACKET_LOSS, + RXRPC_CA_FAST_RETRANSMIT, + NR__RXRPC_CA_STATES +} __mode(byte); /* * RxRPC call definition @@ -727,12 +727,12 @@ struct rxrpc_call { */ #define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN #define RXRPC_MIN_CWND 4 - u8 cong_cwnd; /* Congestion window size */ + enum rxrpc_ca_state cong_ca_state; /* Congestion control state */ u8 cong_extra; /* Extra to send for congestion management */ - u8 cong_ssthresh; /* Slow-start threshold */ - enum rxrpc_congest_mode cong_mode:8; /* Congestion management mode */ - u8 cong_dup_acks; /* Count of ACKs showing missing packets */ - u8 cong_cumul_acks; /* Cumulative ACK count */ + u16 cong_cwnd; /* Congestion window size */ + u16 cong_ssthresh; /* Slow-start threshold */ + u16 cong_dup_acks; /* Count of ACKs showing missing packets */ + u16 cong_cumul_acks; /* Cumulative ACK count */ ktime_t cong_tstamp; /* Last time cwnd was changed */ struct sk_buff *cong_last_nack; /* Last ACK with nacks received */ @@ -763,27 +763,24 @@ struct rxrpc_call { rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */ + unsigned short acks_nr_sacks; /* Number of soft acks recorded */ + unsigned short acks_nr_snacks; /* Number of soft nacks recorded */ }; /* * Summary of a new ACK and the changes it made to the Tx buffer packet states. */ struct rxrpc_ack_summary { - u16 nr_acks; /* Number of ACKs in packet */ - u16 nr_new_acks; /* Number of new ACKs in packet */ - u16 nr_new_nacks; /* Number of new nacks in packet */ - u16 nr_retained_nacks; /* Number of nacks retained between ACKs */ - u8 ack_reason; - bool saw_nacks; /* Saw NACKs in packet */ - bool new_low_nack; /* T if new low NACK found */ - bool retrans_timeo; /* T if reTx due to timeout happened */ - u8 flight_size; /* Number of unreceived transmissions */ - /* Place to stash values for tracing */ - enum rxrpc_congest_mode mode:8; - u8 cwnd; - u8 ssthresh; - u8 dup_acks; - u8 cumulative_acks; + u16 in_flight; /* Number of unreceived transmissions */ + u16 nr_new_hacks; /* Number of rotated new ACKs */ + u16 nr_new_sacks; /* Number of new soft ACKs in packet */ + u16 nr_new_snacks; /* Number of new soft nacks in packet */ + u16 nr_retained_snacks; /* Number of nacks retained between ACKs */ + u8 ack_reason; + bool saw_snacks:1; /* T if we saw a soft NACK */ + bool new_low_snack:1; /* T if new low soft NACK found */ + bool retrans_timeo:1; /* T if reTx due to timeout happened */ + u8 /*enum rxrpc_congest_change*/ change; }; /* diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 706631e6ac2f..5f76bd90567c 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -437,9 +437,9 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, call->dest_srx.srx_service = conn->service_id; call->cong_ssthresh = call->peer->cong_ssthresh; if (call->cong_cwnd >= call->cong_ssthresh) - call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; else - call->cong_mode = RXRPC_CALL_SLOW_START; + call->cong_ca_state = RXRPC_CA_SLOW_START; chan->call_id = call_id; chan->call_debug_id = call->debug_id; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 8d7ab4b9d7d0..c25d816aafee 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -34,49 +34,41 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, rxrpc_serial_t acked_serial) { - enum rxrpc_congest_change change = rxrpc_cong_no_change; - unsigned int cumulative_acks = call->cong_cumul_acks; - unsigned int cwnd = call->cong_cwnd; bool resend = false; - summary->flight_size = - (call->tx_top - call->tx_bottom) - summary->nr_acks; + summary->change = rxrpc_cong_no_change; + summary->in_flight = (call->tx_top - call->tx_bottom) - call->acks_nr_sacks; if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) { summary->retrans_timeo = true; - call->cong_ssthresh = umax(summary->flight_size / 2, 2); - cwnd = 1; - if (cwnd >= call->cong_ssthresh && - call->cong_mode == RXRPC_CALL_SLOW_START) { - call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_ssthresh = umax(summary->in_flight / 2, 2); + call->cong_cwnd = 1; + if (call->cong_cwnd >= call->cong_ssthresh && + call->cong_ca_state == RXRPC_CA_SLOW_START) { + call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; call->cong_tstamp = skb->tstamp; - cumulative_acks = 0; + call->cong_cumul_acks = 0; } } - cumulative_acks += summary->nr_new_acks; - if (cumulative_acks > 255) - cumulative_acks = 255; + call->cong_cumul_acks += summary->nr_new_sacks; + if (call->cong_cumul_acks > 255) + call->cong_cumul_acks = 255; - summary->cwnd = call->cong_cwnd; - summary->ssthresh = call->cong_ssthresh; - summary->cumulative_acks = cumulative_acks; - summary->dup_acks = call->cong_dup_acks; - - switch (call->cong_mode) { - case RXRPC_CALL_SLOW_START: - if (summary->saw_nacks) + switch (call->cong_ca_state) { + case RXRPC_CA_SLOW_START: + if (summary->saw_snacks) goto packet_loss_detected; - if (summary->cumulative_acks > 0) - cwnd += 1; - if (cwnd >= call->cong_ssthresh) { - call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + if (call->cong_cumul_acks > 0) + call->cong_cwnd += 1; + if (call->cong_cwnd >= call->cong_ssthresh) { + call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; call->cong_tstamp = skb->tstamp; } goto out; - case RXRPC_CALL_CONGEST_AVOIDANCE: - if (summary->saw_nacks) + case RXRPC_CA_CONGEST_AVOIDANCE: + if (summary->saw_snacks) goto packet_loss_detected; /* We analyse the number of packets that get ACK'd per RTT @@ -88,18 +80,18 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, ktime_add_us(call->cong_tstamp, call->peer->srtt_us >> 3))) goto out_no_clear_ca; - change = rxrpc_cong_rtt_window_end; + summary->change = rxrpc_cong_rtt_window_end; call->cong_tstamp = skb->tstamp; - if (cumulative_acks >= cwnd) - cwnd++; + if (call->cong_cumul_acks >= call->cong_cwnd) + call->cong_cwnd++; goto out; - case RXRPC_CALL_PACKET_LOSS: - if (!summary->saw_nacks) + case RXRPC_CA_PACKET_LOSS: + if (!summary->saw_snacks) goto resume_normality; - if (summary->new_low_nack) { - change = rxrpc_cong_new_low_nack; + if (summary->new_low_snack) { + summary->change = rxrpc_cong_new_low_nack; call->cong_dup_acks = 1; if (call->cong_extra > 1) call->cong_extra = 1; @@ -110,29 +102,29 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, if (call->cong_dup_acks < 3) goto send_extra_data; - change = rxrpc_cong_begin_retransmission; - call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT; - call->cong_ssthresh = umax(summary->flight_size / 2, 2); - cwnd = call->cong_ssthresh + 3; + summary->change = rxrpc_cong_begin_retransmission; + call->cong_ca_state = RXRPC_CA_FAST_RETRANSMIT; + call->cong_ssthresh = umax(summary->in_flight / 2, 2); + call->cong_cwnd = call->cong_ssthresh + 3; call->cong_extra = 0; call->cong_dup_acks = 0; resend = true; goto out; - case RXRPC_CALL_FAST_RETRANSMIT: - if (!summary->new_low_nack) { - if (summary->nr_new_acks == 0) - cwnd += 1; + case RXRPC_CA_FAST_RETRANSMIT: + if (!summary->new_low_snack) { + if (summary->nr_new_sacks == 0) + call->cong_cwnd += 1; call->cong_dup_acks++; if (call->cong_dup_acks == 2) { - change = rxrpc_cong_retransmit_again; + summary->change = rxrpc_cong_retransmit_again; call->cong_dup_acks = 0; resend = true; } } else { - change = rxrpc_cong_progress; - cwnd = call->cong_ssthresh; - if (!summary->saw_nacks) + summary->change = rxrpc_cong_progress; + call->cong_cwnd = call->cong_ssthresh; + if (!summary->saw_snacks) goto resume_normality; } goto out; @@ -143,30 +135,27 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, } resume_normality: - change = rxrpc_cong_cleared_nacks; + summary->change = rxrpc_cong_cleared_nacks; call->cong_dup_acks = 0; call->cong_extra = 0; call->cong_tstamp = skb->tstamp; - if (cwnd < call->cong_ssthresh) - call->cong_mode = RXRPC_CALL_SLOW_START; + if (call->cong_cwnd < call->cong_ssthresh) + call->cong_ca_state = RXRPC_CA_SLOW_START; else - call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; out: - cumulative_acks = 0; + call->cong_cumul_acks = 0; out_no_clear_ca: - if (cwnd >= RXRPC_TX_MAX_WINDOW) - cwnd = RXRPC_TX_MAX_WINDOW; - call->cong_cwnd = cwnd; - call->cong_cumul_acks = cumulative_acks; - summary->mode = call->cong_mode; - trace_rxrpc_congest(call, summary, acked_serial, change); + if (call->cong_cwnd >= RXRPC_TX_MAX_WINDOW) + call->cong_cwnd = RXRPC_TX_MAX_WINDOW; + trace_rxrpc_congest(call, summary, acked_serial); if (resend) rxrpc_resend(call, skb); return; packet_loss_detected: - change = rxrpc_cong_saw_nack; - call->cong_mode = RXRPC_CALL_PACKET_LOSS; + summary->change = rxrpc_cong_saw_nack; + call->cong_ca_state = RXRPC_CA_PACKET_LOSS; call->cong_dup_acks = 0; goto send_extra_data; @@ -175,7 +164,7 @@ send_extra_data: * state. */ if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) || - summary->nr_acks != call->tx_top - call->tx_bottom) { + call->acks_nr_sacks != call->tx_top - call->tx_bottom) { call->cong_extra++; wake_up(&call->waitq); } @@ -189,8 +178,8 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) { ktime_t rtt, now; - if (call->cong_mode != RXRPC_CALL_SLOW_START && - call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE) + if (call->cong_ca_state != RXRPC_CA_SLOW_START && + call->cong_ca_state != RXRPC_CA_CONGEST_AVOIDANCE) return; if (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_REPLY) return; @@ -203,7 +192,7 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) trace_rxrpc_reset_cwnd(call, now); rxrpc_inc_stat(call->rxnet, stat_tx_data_cwnd_reset); call->tx_last_sent = now; - call->cong_mode = RXRPC_CALL_SLOW_START; + call->cong_ca_state = RXRPC_CA_SLOW_START; call->cong_ssthresh = umax(call->cong_ssthresh, call->cong_cwnd * 3 / 4); call->cong_cwnd = umax(call->cong_cwnd / 2, RXRPC_MIN_CWND); } @@ -282,7 +271,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, if (call->acks_lowest_nak == call->tx_bottom) { call->acks_lowest_nak = to; } else if (after(to, call->acks_lowest_nak)) { - summary->new_low_nack = true; + summary->new_low_snack = true; call->acks_lowest_nak = to; } @@ -795,11 +784,11 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); if (after_eq(seq, old_seq + sp->ack.nr_acks)) { - summary->nr_new_acks += sp->ack.nr_nacks; - summary->nr_new_acks += seq - (old_seq + sp->ack.nr_acks); - summary->nr_retained_nacks = 0; + summary->nr_new_sacks += sp->ack.nr_nacks; + summary->nr_new_sacks += seq - (old_seq + sp->ack.nr_acks); + summary->nr_retained_snacks = 0; } else if (seq == old_seq) { - summary->nr_retained_nacks = sp->ack.nr_nacks; + summary->nr_retained_snacks = sp->ack.nr_nacks; } else { for (i = 0; i < sp->ack.nr_acks; i++) { if (acks[i] == RXRPC_ACK_TYPE_NACK) { @@ -810,8 +799,8 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, } } - summary->nr_new_acks += new_acks; - summary->nr_retained_nacks = retained_nacks; + summary->nr_new_sacks += new_acks; + summary->nr_retained_snacks = retained_nacks; } return old_seq + sp->ack.nr_acks - 1; @@ -840,16 +829,16 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, for (i = 0; i < sp->ack.nr_acks; i++) { seq++; if (acks[i] == RXRPC_ACK_TYPE_ACK) { - summary->nr_acks++; + call->acks_nr_sacks++; if (after(seq, since)) - summary->nr_new_acks++; + summary->nr_new_sacks++; } else { - summary->saw_nacks = true; + summary->saw_snacks = true; if (before_eq(seq, since)) { /* Overlap with previous ACK */ old_nacks++; } else { - summary->nr_new_nacks++; + summary->nr_new_snacks++; sp->ack.nr_nacks++; } @@ -860,7 +849,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, if (lowest_nak != call->acks_lowest_nak) { call->acks_lowest_nak = lowest_nak; - summary->new_low_nack = true; + summary->new_low_snack = true; } /* We *can* have more nacks than we did - the peer is permitted to drop @@ -868,9 +857,9 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, * possible for the nack distribution to change whilst the number of * nacks stays the same or goes down. */ - if (old_nacks < summary->nr_retained_nacks) - summary->nr_new_acks += summary->nr_retained_nacks - old_nacks; - summary->nr_retained_nacks = old_nacks; + if (old_nacks < summary->nr_retained_snacks) + summary->nr_new_sacks += summary->nr_retained_snacks - old_nacks; + summary->nr_retained_snacks = old_nacks; } /* @@ -996,7 +985,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); call->cong_last_nack = NULL; } else { - summary.nr_new_acks = hard_ack - call->acks_hard_ack; + summary.nr_new_sacks = hard_ack - call->acks_hard_ack; call->acks_lowest_nak = hard_ack + nr_acks; since = hard_ack; } @@ -1054,7 +1043,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) } if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && - summary.nr_acks == call->tx_top - hard_ack && + call->acks_nr_sacks == call->tx_top - hard_ack && rxrpc_is_client_call(call)) rxrpc_propose_ping(call, ack_serial, rxrpc_propose_ack_ping_for_lost_reply); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 3886777d1bb6..7ed928b6f0e1 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -419,7 +419,7 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, why = rxrpc_reqack_ack_lost; else if (txb->flags & RXRPC_TXBUF_RESENT) why = rxrpc_reqack_retrans; - else if (call->cong_mode == RXRPC_CALL_SLOW_START && call->cong_cwnd <= 2) + else if (call->cong_ca_state == RXRPC_CA_SLOW_START && call->cong_cwnd <= 2) why = rxrpc_reqack_slow_start; else if (call->tx_winsize <= 2) why = rxrpc_reqack_small_txwin; From 9b052c6b92f9316d670bf50566f70e183d0d19cb Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:51 +0000 Subject: [PATCH 0248/1386] rxrpc: Use the new rxrpc_tx_queue struct to more efficiently process ACKs With the change in the structure of the transmission buffer to store buffers in bunches of 32 or 64 (BITS_PER_LONG) we can place sets of per-buffer flags into the rxrpc_tx_queue struct rather than storing them in rxrpc_tx_buf, thereby vastly increasing efficiency when assessing the SACK table in an ACK packet. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-24-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 88 ++++++++++-- net/rxrpc/ar-internal.h | 23 +++- net/rxrpc/call_event.c | 187 +++++++++++++------------- net/rxrpc/call_object.c | 1 - net/rxrpc/input.c | 252 ++++++++++++++++++++++------------- net/rxrpc/output.c | 10 +- net/rxrpc/sendmsg.c | 3 + 7 files changed, 356 insertions(+), 208 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d47b8235fad3..609522a5bd0f 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -132,7 +132,6 @@ EM(rxrpc_skb_get_call_rx, "GET call-rx ") \ EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ EM(rxrpc_skb_get_conn_work, "GET conn-work") \ - EM(rxrpc_skb_get_last_nack, "GET last-nack") \ EM(rxrpc_skb_get_local_work, "GET locl-work") \ EM(rxrpc_skb_get_reject_work, "GET rej-work ") \ EM(rxrpc_skb_get_to_recvmsg, "GET to-recv ") \ @@ -147,7 +146,6 @@ EM(rxrpc_skb_put_error_report, "PUT error-rep") \ EM(rxrpc_skb_put_input, "PUT input ") \ EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \ - EM(rxrpc_skb_put_last_nack, "PUT last-nack") \ EM(rxrpc_skb_put_purge, "PUT purge ") \ EM(rxrpc_skb_put_rotate, "PUT rotate ") \ EM(rxrpc_skb_put_unknown, "PUT unknown ") \ @@ -499,6 +497,11 @@ EM(rxrpc_pmtud_reduce_icmp, "Icmp ") \ E_(rxrpc_pmtud_reduce_route, "Route") +#define rxrpc_rotate_traces \ + EM(rxrpc_rotate_trace_hack, "hard-ack") \ + EM(rxrpc_rotate_trace_sack, "soft-ack") \ + E_(rxrpc_rotate_trace_snak, "soft-nack") + /* * Generate enums for tracing information. */ @@ -525,6 +528,7 @@ enum rxrpc_propose_ack_trace { rxrpc_propose_ack_traces } __mode(byte); enum rxrpc_receive_trace { rxrpc_receive_traces } __mode(byte); enum rxrpc_recvmsg_trace { rxrpc_recvmsg_traces } __mode(byte); enum rxrpc_req_ack_trace { rxrpc_req_ack_traces } __mode(byte); +enum rxrpc_rotate_trace { rxrpc_rotate_traces } __mode(byte); enum rxrpc_rtt_rx_trace { rxrpc_rtt_rx_traces } __mode(byte); enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte); enum rxrpc_sack_trace { rxrpc_sack_traces } __mode(byte); @@ -562,6 +566,7 @@ rxrpc_propose_ack_traces; rxrpc_receive_traces; rxrpc_recvmsg_traces; rxrpc_req_ack_traces; +rxrpc_rotate_traces; rxrpc_rtt_rx_traces; rxrpc_rtt_tx_traces; rxrpc_sack_traces; @@ -1667,6 +1672,7 @@ TRACE_EVENT(rxrpc_retransmit, TP_STRUCT__entry( __field(unsigned int, call) + __field(unsigned int, qbase) __field(rxrpc_seq_t, seq) __field(rxrpc_serial_t, serial) __field(ktime_t, expiry) @@ -1674,13 +1680,15 @@ TRACE_EVENT(rxrpc_retransmit, TP_fast_assign( __entry->call = call->debug_id; + __entry->qbase = req->tq->qbase; __entry->seq = req->seq; __entry->serial = txb->serial; __entry->expiry = expiry; ), - TP_printk("c=%08x q=%x r=%x xp=%lld", + TP_printk("c=%08x tq=%x q=%x r=%x xp=%lld", __entry->call, + __entry->qbase, __entry->seq, __entry->serial, ktime_to_us(__entry->expiry)) @@ -1724,7 +1732,7 @@ TRACE_EVENT(rxrpc_congest, memcpy(&__entry->sum, summary, sizeof(__entry->sum)); ), - TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u,%u b=%u u=%u d=%u l=%x%s%s%s", + TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u A=%u+%u/%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s", __entry->call, __entry->ack_serial, __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), @@ -1732,9 +1740,9 @@ TRACE_EVENT(rxrpc_congest, __print_symbolic(__entry->ca_state, rxrpc_ca_states), __entry->cwnd, __entry->ssthresh, - __entry->nr_sacks, __entry->sum.nr_retained_snacks, - __entry->sum.nr_new_sacks, - __entry->sum.nr_new_snacks, + __entry->nr_sacks, __entry->sum.nr_new_sacks, + __entry->nr_snacks, __entry->sum.nr_new_snacks, + __entry->sum.nr_new_hacks, __entry->top - __entry->hard_ack, __entry->cumul_acks, __entry->dup_acks, @@ -1850,10 +1858,36 @@ TRACE_EVENT(rxrpc_connect_call, &__entry->srx.transport) ); -TRACE_EVENT(rxrpc_resend, - TP_PROTO(struct rxrpc_call *call, struct sk_buff *ack), +TRACE_EVENT(rxrpc_apply_acks, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq), - TP_ARGS(call, ack), + TP_ARGS(call, tq), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(unsigned int, nr_rep) + __field(rxrpc_seq_t, qbase) + __field(unsigned long, acks) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->qbase = tq->qbase; + __entry->acks = tq->segment_acked; + __entry->nr_rep = tq->nr_reported_acks; + ), + + TP_printk("c=%08x tq=%x acks=%016lx rep=%u", + __entry->call, + __entry->qbase, + __entry->acks, + __entry->nr_rep) + ); + +TRACE_EVENT(rxrpc_resend, + TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t ack_serial), + + TP_ARGS(call, ack_serial), TP_STRUCT__entry( __field(unsigned int, call) @@ -1863,11 +1897,10 @@ TRACE_EVENT(rxrpc_resend, ), TP_fast_assign( - struct rxrpc_skb_priv *sp = ack ? rxrpc_skb(ack) : NULL; __entry->call = call->debug_id; __entry->seq = call->acks_hard_ack; __entry->transmitted = call->tx_transmitted; - __entry->ack_serial = sp ? sp->hdr.serial : 0; + __entry->ack_serial = ack_serial; ), TP_printk("c=%08x r=%x q=%x tq=%x", @@ -1877,6 +1910,37 @@ TRACE_EVENT(rxrpc_resend, __entry->transmitted) ); +TRACE_EVENT(rxrpc_rotate, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq, + struct rxrpc_ack_summary *summary, rxrpc_seq_t seq, + enum rxrpc_rotate_trace trace), + + TP_ARGS(call, tq, summary, seq, trace), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_seq_t, qbase) + __field(rxrpc_seq_t, seq) + __field(unsigned int, nr_rep) + __field(enum rxrpc_rotate_trace, trace) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->qbase = tq->qbase; + __entry->seq = seq; + __entry->nr_rep = tq->nr_reported_acks; + __entry->trace = trace; + ), + + TP_printk("c=%08x tq=%x q=%x nr=%x %s", + __entry->call, + __entry->qbase, + __entry->seq, + __entry->nr_rep, + __print_symbolic(__entry->trace, rxrpc_rotate_traces)) + ); + TRACE_EVENT(rxrpc_rx_icmp, TP_PROTO(struct rxrpc_peer *peer, struct sock_extended_err *ee, struct sockaddr_rxrpc *srx), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index f6e6b2ab6c2a..9a70f0b86570 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -214,9 +214,8 @@ struct rxrpc_skb_priv { rxrpc_seq_t first_ack; /* First packet in acks table */ rxrpc_seq_t prev_ack; /* Highest seq seen */ rxrpc_serial_t acked_serial; /* Packet in response to (or 0) */ + u16 nr_acks; /* Number of acks+nacks */ u8 reason; /* Reason for ack */ - u8 nr_acks; /* Number of acks+nacks */ - u8 nr_nacks; /* Number of nacks */ } ack; }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ @@ -734,7 +733,6 @@ struct rxrpc_call { u16 cong_dup_acks; /* Count of ACKs showing missing packets */ u16 cong_cumul_acks; /* Cumulative ACK count */ ktime_t cong_tstamp; /* Last time cwnd was changed */ - struct sk_buff *cong_last_nack; /* Last ACK with nacks received */ /* Receive-phase ACK management (ACKs we send). */ u8 ackr_reason; /* reason to ACK */ @@ -775,11 +773,10 @@ struct rxrpc_ack_summary { u16 nr_new_hacks; /* Number of rotated new ACKs */ u16 nr_new_sacks; /* Number of new soft ACKs in packet */ u16 nr_new_snacks; /* Number of new soft nacks in packet */ - u16 nr_retained_snacks; /* Number of nacks retained between ACKs */ u8 ack_reason; - bool saw_snacks:1; /* T if we saw a soft NACK */ bool new_low_snack:1; /* T if new low soft NACK found */ bool retrans_timeo:1; /* T if reTx due to timeout happened */ + bool need_retransmit:1; /* T if we need transmission */ u8 /*enum rxrpc_congest_change*/ change; }; @@ -858,6 +855,10 @@ struct rxrpc_txqueue { struct rxrpc_txqueue *next; ktime_t xmit_ts_base; rxrpc_seq_t qbase; + u8 nr_reported_acks; /* Number of segments explicitly acked/nacked */ + unsigned long segment_acked; /* Bit-per-buf: Set if ACK'd */ + unsigned long segment_lost; /* Bit-per-buf: Set if declared lost */ + unsigned long segment_retransmitted; /* Bit-per-buf: Set if retransmitted */ /* The arrays we want to pack into as few cache lines as possible. */ struct { @@ -935,7 +936,7 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial, enum rxrpc_propose_ack_trace why); void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t, enum rxrpc_propose_ack_trace); -void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb); +void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_response); bool rxrpc_input_call_event(struct rxrpc_call *call); @@ -1383,6 +1384,16 @@ static inline bool after_eq(u32 seq1, u32 seq2) return (s32)(seq1 - seq2) >= 0; } +static inline u32 earliest(u32 seq1, u32 seq2) +{ + return before(seq1, seq2) ? seq1 : seq2; +} + +static inline u32 latest(u32 seq1, u32 seq2) +{ + return after(seq1, seq2) ? seq1 : seq2; +} + static inline void rxrpc_queue_rx_call_packet(struct rxrpc_call *call, struct sk_buff *skb) { rxrpc_get_skb(skb, rxrpc_skb_get_call_rx); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 2311e5c737e8..e25921d39d4d 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -65,9 +65,9 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) /* * Retransmit one or more packets. */ -static void rxrpc_retransmit_data(struct rxrpc_call *call, +static bool rxrpc_retransmit_data(struct rxrpc_call *call, struct rxrpc_send_data_req *req, - ktime_t rto) + ktime_t rto, bool skip_too_young) { struct rxrpc_txqueue *tq = req->tq; unsigned int ix = req->seq & RXRPC_TXQ_MASK; @@ -78,9 +78,11 @@ static void rxrpc_retransmit_data(struct rxrpc_call *call, xmit_ts = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]); resend_at = ktime_add(xmit_ts, rto); - trace_rxrpc_retransmit(call, req, txb, - ktime_sub(resend_at, req->now)); + trace_rxrpc_retransmit(call, req, txb, ktime_sub(resend_at, req->now)); + if (skip_too_young && ktime_after(resend_at, req->now)) + return false; + __set_bit(ix, &tq->segment_retransmitted); txb->flags |= RXRPC_TXBUF_RESENT; rxrpc_send_data_packet(call, req); rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); @@ -89,128 +91,119 @@ static void rxrpc_retransmit_data(struct rxrpc_call *call, req->n = 0; req->did_send = true; req->now = ktime_get_real(); + return true; } /* * Perform retransmission of NAK'd and unack'd packets. */ -void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) +void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_response) { struct rxrpc_send_data_req req = { .now = ktime_get_real(), }; - struct rxrpc_ackpacket *ack = NULL; - struct rxrpc_skb_priv *sp; - struct rxrpc_txqueue *tq; - struct rxrpc_txbuf *txb; - rxrpc_seq_t transmitted = call->tx_transmitted, seq; - ktime_t next_resend = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC); - ktime_t resend_at = KTIME_MAX, delay; - bool unacked = false, did_send = false; - unsigned int qix; + struct rxrpc_txqueue *tq = call->tx_queue; + ktime_t lowest_xmit_ts = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC); + bool unacked = false; _enter("{%d,%d}", call->tx_bottom, call->tx_top); - if (call->tx_bottom == call->tx_top) - goto no_resend; + if (call->tx_bottom == call->tx_top) { + call->resend_at = KTIME_MAX; + trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend); + return; + } - trace_rxrpc_resend(call, ack_skb); - tq = call->tx_queue; - seq = call->tx_bottom; + trace_rxrpc_resend(call, ack_serial); - /* Scan the soft ACK table and resend any explicitly NAK'd packets. */ - if (ack_skb) { - sp = rxrpc_skb(ack_skb); - ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header); + /* Scan the transmission queue, looking for explicitly NAK'd packets. */ + do { + unsigned long naks = ~tq->segment_acked; + rxrpc_seq_t tq_top = tq->qbase + RXRPC_NR_TXQUEUE - 1; - for (int i = 0; i < sp->ack.nr_acks; i++) { - rxrpc_seq_t aseq; + if (after(tq->qbase, call->tx_transmitted)) + break; - if (ack->acks[i] & 1) - continue; - aseq = sp->ack.first_ack + i; - while (after_eq(aseq, tq->qbase + RXRPC_NR_TXQUEUE)) - tq = tq->next; - seq = aseq; - qix = seq - tq->qbase; - txb = tq->bufs[qix]; - if (after(seq, transmitted)) - goto no_further_resend; + if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE) + naks &= (1UL << tq->nr_reported_acks) - 1; - resend_at = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[qix]); - resend_at = ktime_add(resend_at, rto); - if (after(txb->serial, call->acks_highest_serial)) { - if (ktime_after(resend_at, req.now) && - ktime_before(resend_at, next_resend)) - next_resend = resend_at; + _debug("retr %16lx %u c=%08x [%x]", + tq->segment_acked, tq->nr_reported_acks, call->debug_id, tq->qbase); + _debug("nack %16lx", naks); + + while (naks) { + unsigned int ix = __ffs(naks); + struct rxrpc_txbuf *txb = tq->bufs[ix]; + + __clear_bit(ix, &naks); + if (after(txb->serial, call->acks_highest_serial)) continue; /* Ack point not yet reached */ - } rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked); req.tq = tq; - req.seq = seq; + req.seq = tq->qbase + ix; req.n = 1; - rxrpc_retransmit_data(call, &req, rto); - - if (after_eq(seq, call->tx_top)) - goto no_further_resend; - } - } - - /* Fast-forward through the Tx queue to the point the peer says it has - * seen. Anything between the soft-ACK table and that point will get - * ACK'd or NACK'd in due course, so don't worry about it here; here we - * need to consider retransmitting anything beyond that point. - */ - seq = call->acks_prev_seq; - if (after_eq(seq, call->tx_transmitted)) - goto no_further_resend; - seq++; - - while (after_eq(seq, tq->qbase + RXRPC_NR_TXQUEUE)) - tq = tq->next; - - while (before_eq(seq, call->tx_transmitted)) { - qix = seq - tq->qbase; - if (qix >= RXRPC_NR_TXQUEUE) { - tq = tq->next; - continue; - } - txb = tq->bufs[qix]; - resend_at = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[qix]); - resend_at = ktime_add(resend_at, rto); - - if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE && - before(txb->serial, ntohl(ack->serial))) - goto do_resend; /* Wasn't accounted for by a more recent ping. */ - - if (ktime_after(resend_at, req.now)) { - if (ktime_before(resend_at, next_resend)) - next_resend = resend_at; - seq++; - continue; + rxrpc_retransmit_data(call, &req, rto, false); } - do_resend: - unacked = true; + /* Anything after the soft-ACK table up to and including + * ack.previousPacket will get ACK'd or NACK'd in due course, + * so don't worry about those here. We do, however, need to + * consider retransmitting anything beyond that point. + */ + if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE && + after(tq_top, call->acks_prev_seq)) { + rxrpc_seq_t start = latest(call->acks_prev_seq, + tq->qbase + tq->nr_reported_acks); + rxrpc_seq_t stop = earliest(tq_top, call->tx_transmitted); - req.tq = tq; - req.seq = seq; - req.n = 1; - rxrpc_retransmit_data(call, &req, rto); - seq++; - } + _debug("unrep %x-%x", start, stop); + for (rxrpc_seq_t seq = start; before(seq, stop); seq++) { + struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK]; -no_further_resend: -no_resend: - if (resend_at < KTIME_MAX) { - delay = rxrpc_get_rto_backoff(call->peer, did_send); - resend_at = ktime_add(resend_at, delay); + if (ping_response && + before(txb->serial, call->acks_highest_serial)) + break; /* Wasn't accounted for by a more recent ping. */ + req.tq = tq; + req.seq = seq; + req.n = 1; + if (rxrpc_retransmit_data(call, &req, rto, true)) + unacked = true; + } + } + + /* Work out the next retransmission timeout. */ + if (ktime_before(tq->xmit_ts_base, lowest_xmit_ts)) { + unsigned int lowest_us = UINT_MAX; + + for (int i = 0; i < RXRPC_NR_TXQUEUE; i++) + if (!test_bit(i, &tq->segment_acked) && + tq->segment_xmit_ts[i] < lowest_us) + lowest_us = tq->segment_xmit_ts[i]; + _debug("lowest[%x] %llx %u", tq->qbase, tq->xmit_ts_base, lowest_us); + + if (lowest_us != UINT_MAX) { + ktime_t lowest_ns = ktime_add_us(tq->xmit_ts_base, lowest_us); + + if (ktime_before(lowest_ns, lowest_xmit_ts)) + lowest_xmit_ts = lowest_ns; + } + } + } while ((tq = tq->next)); + + if (lowest_xmit_ts < KTIME_MAX) { + ktime_t delay = rxrpc_get_rto_backoff(call->peer, req.did_send); + ktime_t resend_at = ktime_add(lowest_xmit_ts, delay); + + _debug("delay %llu %lld", delay, ktime_sub(resend_at, req.now)); + call->resend_at = resend_at; trace_rxrpc_timer_set(call, resend_at - req.now, rxrpc_timer_trace_resend_reset); + } else { + call->resend_at = KTIME_MAX; + trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend); } - call->resend_at = resend_at; if (unacked) rxrpc_congestion_timeout(call); @@ -494,7 +487,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) if (resend && __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY && !test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags)) - rxrpc_resend(call, NULL); + rxrpc_resend(call, 0, false); if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags)) rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0, diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index a9682b31a4f9..bba058055c97 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -691,7 +691,6 @@ static void rxrpc_destroy_call(struct work_struct *work) del_timer_sync(&call->timer); - rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); rxrpc_cleanup_tx_buffers(call); rxrpc_cleanup_rx_buffers(call); rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c25d816aafee..6e7ff133b5aa 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -34,8 +34,6 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, rxrpc_serial_t acked_serial) { - bool resend = false; - summary->change = rxrpc_cong_no_change; summary->in_flight = (call->tx_top - call->tx_bottom) - call->acks_nr_sacks; @@ -52,12 +50,13 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, } call->cong_cumul_acks += summary->nr_new_sacks; + call->cong_cumul_acks += summary->nr_new_hacks; if (call->cong_cumul_acks > 255) call->cong_cumul_acks = 255; switch (call->cong_ca_state) { case RXRPC_CA_SLOW_START: - if (summary->saw_snacks) + if (call->acks_nr_snacks > 0) goto packet_loss_detected; if (call->cong_cumul_acks > 0) call->cong_cwnd += 1; @@ -68,7 +67,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, goto out; case RXRPC_CA_CONGEST_AVOIDANCE: - if (summary->saw_snacks) + if (call->acks_nr_snacks > 0) goto packet_loss_detected; /* We analyse the number of packets that get ACK'd per RTT @@ -87,7 +86,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, goto out; case RXRPC_CA_PACKET_LOSS: - if (!summary->saw_snacks) + if (call->acks_nr_snacks == 0) goto resume_normality; if (summary->new_low_snack) { @@ -108,7 +107,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, call->cong_cwnd = call->cong_ssthresh + 3; call->cong_extra = 0; call->cong_dup_acks = 0; - resend = true; + summary->need_retransmit = true; goto out; case RXRPC_CA_FAST_RETRANSMIT: @@ -119,12 +118,12 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, if (call->cong_dup_acks == 2) { summary->change = rxrpc_cong_retransmit_again; call->cong_dup_acks = 0; - resend = true; + summary->need_retransmit = true; } } else { summary->change = rxrpc_cong_progress; call->cong_cwnd = call->cong_ssthresh; - if (!summary->saw_snacks) + if (call->acks_nr_snacks == 0) goto resume_normality; } goto out; @@ -149,8 +148,6 @@ out_no_clear_ca: if (call->cong_cwnd >= RXRPC_TX_MAX_WINDOW) call->cong_cwnd = RXRPC_TX_MAX_WINDOW; trace_rxrpc_congest(call, summary, acked_serial); - if (resend) - rxrpc_resend(call, skb); return; packet_loss_detected: @@ -212,6 +209,13 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, trace_rxrpc_tx_rotate(call, seq, to); trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate); + if (call->acks_lowest_nak == call->tx_bottom) { + call->acks_lowest_nak = to; + } else if (after(to, call->acks_lowest_nak)) { + summary->new_low_snack = true; + call->acks_lowest_nak = to; + } + /* We may have a left over fully-consumed buffer at the front that we * couldn't drop before (rotate_and_keep below). */ @@ -231,6 +235,25 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, set_bit(RXRPC_CALL_TX_LAST, &call->flags); rot_last = true; } + + if (ix == tq->nr_reported_acks) { + /* Packet directly hard ACK'd. */ + tq->nr_reported_acks++; + summary->nr_new_hacks++; + __set_bit(ix, &tq->segment_acked); + trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_hack); + } else if (test_bit(ix, &tq->segment_acked)) { + /* Soft ACK -> hard ACK. */ + call->acks_nr_sacks--; + trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_sack); + } else { + /* Soft NAK -> hard ACK. */ + call->acks_nr_snacks--; + summary->nr_new_hacks++; + __set_bit(ix, &tq->segment_acked); + trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_snak); + } + rxrpc_put_txbuf(tq->bufs[ix], rxrpc_txbuf_put_rotated); tq->bufs[ix] = NULL; @@ -268,13 +291,6 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, _debug("%x,%x,%x,%d", to, call->tx_bottom, call->tx_top, rot_last); - if (call->acks_lowest_nak == call->tx_bottom) { - call->acks_lowest_nak = to; - } else if (after(to, call->acks_lowest_nak)) { - summary->new_low_snack = true; - call->acks_lowest_nak = to; - } - wake_up(&call->waitq); return rot_last; } @@ -293,11 +309,6 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, call->resend_at = KTIME_MAX; trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend); - if (unlikely(call->cong_last_nack)) { - rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); - call->cong_last_nack = NULL; - } - switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: @@ -770,40 +781,92 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb wake_up(&call->waitq); } +#if defined(CONFIG_X86) && __GNUC__ && !defined(__clang__) +/* Clang doesn't support the %z constraint modifier */ +#define shiftr_adv_rotr(shift_from, rotate_into) ({ \ + asm(" shr%z1 %1\n" \ + " inc %0\n" \ + " rcr%z2 %2\n" \ + : "+d"(shift_from), "+m"(*(shift_from)), "+rm"(rotate_into) \ + ); \ + }) +#else +#define shiftr_adv_rotr(shift_from, rotate_into) ({ \ + typeof(rotate_into) __bit0 = *(shift_from) & 1; \ + *(shift_from) >>= 1; \ + shift_from++; \ + rotate_into >>= 1; \ + rotate_into |= __bit0 << (sizeof(rotate_into) * 8 - 1); \ + }) +#endif + /* - * Determine how many nacks from the previous ACK have now been satisfied. + * Process a batch of soft ACKs specific to a transmission queue segment. */ -static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, - struct rxrpc_ack_summary *summary, - rxrpc_seq_t hard_ack) +static void rxrpc_input_soft_ack_tq(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + unsigned long extracted_acks, + int nr_reported, + rxrpc_seq_t seq, + rxrpc_seq_t *lowest_nak) { - struct sk_buff *skb = call->cong_last_nack; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int i, new_acks = 0, retained_nacks = 0; - rxrpc_seq_t seq = hard_ack + 1, old_seq = sp->ack.first_ack; - u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); + unsigned long old_reported, flipped, new_acks, a_to_n, n_to_a; + int new, a, n; - if (after_eq(seq, old_seq + sp->ack.nr_acks)) { - summary->nr_new_sacks += sp->ack.nr_nacks; - summary->nr_new_sacks += seq - (old_seq + sp->ack.nr_acks); - summary->nr_retained_snacks = 0; - } else if (seq == old_seq) { - summary->nr_retained_snacks = sp->ack.nr_nacks; - } else { - for (i = 0; i < sp->ack.nr_acks; i++) { - if (acks[i] == RXRPC_ACK_TYPE_NACK) { - if (before(old_seq + i, seq)) - new_acks++; - else - retained_nacks++; - } - } + old_reported = ~0UL >> (RXRPC_NR_TXQUEUE - tq->nr_reported_acks); + _enter("{%x,%lx,%d},%lx,%d,%x", + tq->qbase, tq->segment_acked, tq->nr_reported_acks, + extracted_acks, nr_reported, seq); - summary->nr_new_sacks += new_acks; - summary->nr_retained_snacks = retained_nacks; + _debug("[%x]", tq->qbase); + _debug("tq %16lx %u", tq->segment_acked, tq->nr_reported_acks); + _debug("sack %16lx %u", extracted_acks, nr_reported); + + /* See how many previously logged ACKs/NAKs have flipped. */ + flipped = (tq->segment_acked ^ extracted_acks) & old_reported; + if (flipped) { + n_to_a = ~tq->segment_acked & flipped; /* Old NAK -> ACK */ + a_to_n = tq->segment_acked & flipped; /* Old ACK -> NAK */ + a = hweight_long(n_to_a); + n = hweight_long(a_to_n); + _debug("flip %16lx", flipped); + _debug("ntoa %16lx %d", n_to_a, a); + _debug("aton %16lx %d", a_to_n, n); + call->acks_nr_sacks += a - n; + call->acks_nr_snacks += n - a; + summary->nr_new_sacks += a; + summary->nr_new_snacks += n; } - return old_seq + sp->ack.nr_acks - 1; + /* See how many new ACKs/NAKs have been acquired. */ + new = nr_reported - tq->nr_reported_acks; + if (new > 0) { + new_acks = extracted_acks & ~old_reported; + if (new_acks) { + a = hweight_long(new_acks); + n = new - a; + _debug("new_a %16lx new=%d a=%d n=%d", new_acks, new, a, n); + call->acks_nr_sacks += a; + call->acks_nr_snacks += n; + summary->nr_new_sacks += a; + summary->nr_new_snacks += n; + } else { + call->acks_nr_snacks += new; + summary->nr_new_snacks += new; + } + } + + tq->nr_reported_acks = nr_reported; + tq->segment_acked = extracted_acks; + trace_rxrpc_apply_acks(call, tq); + + if (extracted_acks != ~0UL) { + rxrpc_seq_t lowest = seq + ffz(extracted_acks); + + if (before(lowest, *lowest_nak)) + *lowest_nak = lowest; + } } /* @@ -817,39 +880,50 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, */ static void rxrpc_input_soft_acks(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, - struct sk_buff *skb, - rxrpc_seq_t since) + struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int i, old_nacks = 0; - rxrpc_seq_t lowest_nak = call->acks_hard_ack + sp->ack.nr_acks + 1; - rxrpc_seq_t seq = call->acks_hard_ack; + struct rxrpc_txqueue *tq = call->tx_queue; + unsigned long extracted = ~0UL; + unsigned int nr = 0; + rxrpc_seq_t seq = call->acks_hard_ack + 1; + rxrpc_seq_t lowest_nak = seq + sp->ack.nr_acks; u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); - for (i = 0; i < sp->ack.nr_acks; i++) { - seq++; - if (acks[i] == RXRPC_ACK_TYPE_ACK) { - call->acks_nr_sacks++; - if (after(seq, since)) - summary->nr_new_sacks++; - } else { - summary->saw_snacks = true; - if (before_eq(seq, since)) { - /* Overlap with previous ACK */ - old_nacks++; - } else { - summary->nr_new_snacks++; - sp->ack.nr_nacks++; - } + _enter("%x,%x,%u", tq->qbase, seq, sp->ack.nr_acks); - if (before(seq, lowest_nak)) - lowest_nak = seq; + while (after(seq, tq->qbase + RXRPC_NR_TXQUEUE - 1)) + tq = tq->next; + + for (unsigned int i = 0; i < sp->ack.nr_acks; i++) { + /* Decant ACKs until we hit a txqueue boundary. */ + shiftr_adv_rotr(acks, extracted); + if (i == 256) { + acks -= i; + i = 0; } + seq++; + nr++; + if ((seq & RXRPC_TXQ_MASK) != 0) + continue; + + _debug("bound %16lx %u", extracted, nr); + + rxrpc_input_soft_ack_tq(call, summary, tq, extracted, RXRPC_NR_TXQUEUE, + seq - RXRPC_NR_TXQUEUE, &lowest_nak); + extracted = ~0UL; + nr = 0; + tq = tq->next; + prefetch(tq); } - if (lowest_nak != call->acks_lowest_nak) { - call->acks_lowest_nak = lowest_nak; - summary->new_low_snack = true; + if (nr) { + unsigned int nr_reported = seq & RXRPC_TXQ_MASK; + + extracted >>= RXRPC_NR_TXQUEUE - nr_reported; + _debug("tail %16lx %u", extracted, nr_reported); + rxrpc_input_soft_ack_tq(call, summary, tq, extracted, nr_reported, + seq & ~RXRPC_TXQ_MASK, &lowest_nak); } /* We *can* have more nacks than we did - the peer is permitted to drop @@ -857,9 +931,14 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, * possible for the nack distribution to change whilst the number of * nacks stays the same or goes down. */ - if (old_nacks < summary->nr_retained_snacks) - summary->nr_new_sacks += summary->nr_retained_snacks - old_nacks; - summary->nr_retained_snacks = old_nacks; + if (lowest_nak != call->acks_lowest_nak) { + call->acks_lowest_nak = lowest_nak; + summary->new_low_snack = true; + } + + _debug("summary A=%d+%d N=%d+%d", + call->acks_nr_sacks, summary->nr_new_sacks, + call->acks_nr_snacks, summary->nr_new_snacks); } /* @@ -902,7 +981,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_acktrailer trailer; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); rxrpc_serial_t ack_serial, acked_serial; - rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since; + rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; int nr_acks, offset, ioffset; _enter(""); @@ -920,6 +999,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) trace_rxrpc_rx_ack(call, sp); rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]); + prefetch(call->tx_queue); if (acked_serial != 0) { switch (summary.ack_reason) { @@ -980,16 +1060,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (nr_acks > 0) skb_condense(skb); - if (call->cong_last_nack) { - since = rxrpc_input_check_prev_ack(call, &summary, hard_ack); - rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); - call->cong_last_nack = NULL; - } else { - summary.nr_new_sacks = hard_ack - call->acks_hard_ack; - call->acks_lowest_nak = hard_ack + nr_acks; - since = hard_ack; - } - call->acks_latest_ts = skb->tstamp; call->acks_hard_ack = hard_ack; call->acks_prev_seq = prev_pkt; @@ -1037,9 +1107,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (nr_acks > 0) { if (offset > (int)skb->len - nr_acks) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack); - rxrpc_input_soft_acks(call, &summary, skb, since); - rxrpc_get_skb(skb, rxrpc_skb_get_last_nack); - call->cong_last_nack = skb; + rxrpc_input_soft_acks(call, &summary, skb); } if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && @@ -1049,6 +1117,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ack_ping_for_lost_reply); rxrpc_congestion_management(call, skb, &summary, acked_serial); + if (summary.need_retransmit) + rxrpc_resend(call, ack_serial, summary.ack_reason == RXRPC_ACK_PING_RESPONSE); send_response: if (summary.ack_reason == RXRPC_ACK_PING) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 7ed928b6f0e1..978c2dc6a7d4 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -461,7 +461,7 @@ dont_set_request_ack: len += sizeof(*jumbo); } - trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, false); + trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags | flags, false); kv->iov_len = len; return len; } @@ -522,6 +522,13 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se } /* Set timeouts */ + if (call->peer->rtt_count > 1) { + ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); + + call->ack_lost_at = ktime_add(req->now, delay); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack); + } + if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) { ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo)); @@ -596,6 +603,7 @@ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req ret = 0; trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, true); + conn->peer->last_tx_at = ktime_get_seconds(); goto done; } } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index dfbf9f4b24b6..381b25597f4e 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -299,6 +299,9 @@ static int rxrpc_alloc_txqueue(struct sock *sk, struct rxrpc_call *call) kfree(tq); return -ENOMEM; } else { + /* We start at seq 1, so pretend seq 0 is hard-acked. */ + tq->nr_reported_acks = 1; + tq->segment_acked = 1UL; tq->qbase = 0; call->tx_qbase = 0; call->send_queue = tq; From dcdff0d8e3b61033b28c72926997d458949fcc05 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:52 +0000 Subject: [PATCH 0249/1386] rxrpc: Store the DATA serial in the txqueue and use this in RTT calc Store the serial number set on a DATA packet at the point of transmission in the rxrpc_txqueue struct and when an ACK is received, match the reference number in the ACK by trawling the txqueue rather than sharing an RTT table with ACK RTT. This can be done as part of Tx queue rotation. This means we have a lot more RTT samples available and is faster to search with all the serial numbers packed together into a few cachelines rather than being hung off different txbufs. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-25-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 14 ++---- net/rxrpc/ar-internal.h | 4 ++ net/rxrpc/call_event.c | 8 +-- net/rxrpc/input.c | 94 +++++++++++++++++++++++------------- net/rxrpc/output.c | 6 ++- 5 files changed, 79 insertions(+), 47 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 609522a5bd0f..798bea0853c4 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -337,11 +337,10 @@ E_(rxrpc_rtt_tx_ping, "PING") #define rxrpc_rtt_rx_traces \ - EM(rxrpc_rtt_rx_other_ack, "OACK") \ + EM(rxrpc_rtt_rx_data_ack, "DACK") \ EM(rxrpc_rtt_rx_obsolete, "OBSL") \ EM(rxrpc_rtt_rx_lost, "LOST") \ - EM(rxrpc_rtt_rx_ping_response, "PONG") \ - E_(rxrpc_rtt_rx_requested_ack, "RACK") + E_(rxrpc_rtt_rx_ping_response, "PONG") #define rxrpc_timer_traces \ EM(rxrpc_timer_trace_delayed_ack, "DelayAck ") \ @@ -1695,10 +1694,9 @@ TRACE_EVENT(rxrpc_retransmit, ); TRACE_EVENT(rxrpc_congest, - TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, - rxrpc_serial_t ack_serial), + TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary), - TP_ARGS(call, summary, ack_serial), + TP_ARGS(call, summary), TP_STRUCT__entry( __field(unsigned int, call) @@ -1706,7 +1704,6 @@ TRACE_EVENT(rxrpc_congest, __field(rxrpc_seq_t, hard_ack) __field(rxrpc_seq_t, top) __field(rxrpc_seq_t, lowest_nak) - __field(rxrpc_serial_t, ack_serial) __field(u16, nr_sacks) __field(u16, nr_snacks) __field(u16, cwnd) @@ -1722,7 +1719,6 @@ TRACE_EVENT(rxrpc_congest, __entry->hard_ack = call->acks_hard_ack; __entry->top = call->tx_top; __entry->lowest_nak = call->acks_lowest_nak; - __entry->ack_serial = ack_serial; __entry->nr_sacks = call->acks_nr_sacks; __entry->nr_snacks = call->acks_nr_snacks; __entry->cwnd = call->cong_cwnd; @@ -1734,7 +1730,7 @@ TRACE_EVENT(rxrpc_congest, TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u A=%u+%u/%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s", __entry->call, - __entry->ack_serial, + __entry->sum.acked_serial, __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), __entry->hard_ack, __print_symbolic(__entry->ca_state, rxrpc_ca_states), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9a70f0b86570..297be421639c 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -769,6 +769,7 @@ struct rxrpc_call { * Summary of a new ACK and the changes it made to the Tx buffer packet states. */ struct rxrpc_ack_summary { + rxrpc_serial_t acked_serial; /* Serial number ACK'd */ u16 in_flight; /* Number of unreceived transmissions */ u16 nr_new_hacks; /* Number of rotated new ACKs */ u16 nr_new_sacks; /* Number of new soft ACKs in packet */ @@ -777,6 +778,7 @@ struct rxrpc_ack_summary { bool new_low_snack:1; /* T if new low soft NACK found */ bool retrans_timeo:1; /* T if reTx due to timeout happened */ bool need_retransmit:1; /* T if we need transmission */ + bool rtt_sample_avail:1; /* T if RTT sample available */ u8 /*enum rxrpc_congest_change*/ change; }; @@ -859,12 +861,14 @@ struct rxrpc_txqueue { unsigned long segment_acked; /* Bit-per-buf: Set if ACK'd */ unsigned long segment_lost; /* Bit-per-buf: Set if declared lost */ unsigned long segment_retransmitted; /* Bit-per-buf: Set if retransmitted */ + unsigned long rtt_samples; /* Bit-per-buf: Set if available for RTT */ /* The arrays we want to pack into as few cache lines as possible. */ struct { #define RXRPC_NR_TXQUEUE BITS_PER_LONG #define RXRPC_TXQ_MASK (RXRPC_NR_TXQUEUE - 1) struct rxrpc_txbuf *bufs[RXRPC_NR_TXQUEUE]; + unsigned int segment_serial[RXRPC_NR_TXQUEUE]; unsigned int segment_xmit_ts[RXRPC_NR_TXQUEUE]; } ____cacheline_aligned; }; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index e25921d39d4d..f71773b18e22 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -159,11 +159,11 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_ rxrpc_seq_t stop = earliest(tq_top, call->tx_transmitted); _debug("unrep %x-%x", start, stop); - for (rxrpc_seq_t seq = start; before(seq, stop); seq++) { - struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK]; + for (rxrpc_seq_t seq = start; before_eq(seq, stop); seq++) { + rxrpc_serial_t serial = tq->segment_serial[seq & RXRPC_TXQ_MASK]; if (ping_response && - before(txb->serial, call->acks_highest_serial)) + before(serial, call->acks_highest_serial)) break; /* Wasn't accounted for by a more recent ping. */ req.tq = tq; req.seq = seq; @@ -198,7 +198,7 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_ _debug("delay %llu %lld", delay, ktime_sub(resend_at, req.now)); call->resend_at = resend_at; - trace_rxrpc_timer_set(call, resend_at - req.now, + trace_rxrpc_timer_set(call, ktime_sub(resend_at, req.now), rxrpc_timer_trace_resend_reset); } else { call->resend_at = KTIME_MAX; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 6e7ff133b5aa..41b4fb56f96c 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -30,9 +30,7 @@ static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq, * Do TCP-style congestion management [RFC 5681]. */ static void rxrpc_congestion_management(struct rxrpc_call *call, - struct sk_buff *skb, - struct rxrpc_ack_summary *summary, - rxrpc_serial_t acked_serial) + struct rxrpc_ack_summary *summary) { summary->change = rxrpc_cong_no_change; summary->in_flight = (call->tx_top - call->tx_bottom) - call->acks_nr_sacks; @@ -44,7 +42,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, if (call->cong_cwnd >= call->cong_ssthresh && call->cong_ca_state == RXRPC_CA_SLOW_START) { call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; - call->cong_tstamp = skb->tstamp; + call->cong_tstamp = call->acks_latest_ts; call->cong_cumul_acks = 0; } } @@ -62,7 +60,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, call->cong_cwnd += 1; if (call->cong_cwnd >= call->cong_ssthresh) { call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; - call->cong_tstamp = skb->tstamp; + call->cong_tstamp = call->acks_latest_ts; } goto out; @@ -75,12 +73,12 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, */ if (call->peer->rtt_count == 0) goto out; - if (ktime_before(skb->tstamp, + if (ktime_before(call->acks_latest_ts, ktime_add_us(call->cong_tstamp, call->peer->srtt_us >> 3))) goto out_no_clear_ca; summary->change = rxrpc_cong_rtt_window_end; - call->cong_tstamp = skb->tstamp; + call->cong_tstamp = call->acks_latest_ts; if (call->cong_cumul_acks >= call->cong_cwnd) call->cong_cwnd++; goto out; @@ -137,7 +135,7 @@ resume_normality: summary->change = rxrpc_cong_cleared_nacks; call->cong_dup_acks = 0; call->cong_extra = 0; - call->cong_tstamp = skb->tstamp; + call->cong_tstamp = call->acks_latest_ts; if (call->cong_cwnd < call->cong_ssthresh) call->cong_ca_state = RXRPC_CA_SLOW_START; else @@ -147,7 +145,7 @@ out: out_no_clear_ca: if (call->cong_cwnd >= RXRPC_TX_MAX_WINDOW) call->cong_cwnd = RXRPC_TX_MAX_WINDOW; - trace_rxrpc_congest(call, summary, acked_serial); + trace_rxrpc_congest(call, summary); return; packet_loss_detected: @@ -194,11 +192,29 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) call->cong_cwnd = umax(call->cong_cwnd / 2, RXRPC_MIN_CWND); } +/* + * Add an RTT sample derived from an ACK'd DATA packet. + */ +static void rxrpc_add_data_rtt_sample(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + int ix, + rxrpc_serial_t ack_serial) +{ + rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_data_ack, -1, + summary->acked_serial, ack_serial, + ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]), + call->acks_latest_ts); + summary->rtt_sample_avail = false; + __clear_bit(ix, &tq->rtt_samples); /* Prevent repeat RTT sample */ +} + /* * Apply a hard ACK by advancing the Tx window. */ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, - struct rxrpc_ack_summary *summary) + struct rxrpc_ack_summary *summary, + rxrpc_serial_t ack_serial) { struct rxrpc_txqueue *tq = call->tx_queue; rxrpc_seq_t seq = call->tx_bottom + 1; @@ -236,6 +252,11 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, rot_last = true; } + if (summary->rtt_sample_avail && + summary->acked_serial == tq->segment_serial[ix] && + test_bit(ix, &tq->rtt_samples)) + rxrpc_add_data_rtt_sample(call, summary, tq, ix, ack_serial); + if (ix == tq->nr_reported_acks) { /* Packet directly hard ACK'd. */ tq->nr_reported_acks++; @@ -348,7 +369,7 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) } if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - if (!rxrpc_rotate_tx_window(call, top, &summary)) { + if (!rxrpc_rotate_tx_window(call, top, &summary, 0)) { rxrpc_proto_abort(call, top, rxrpc_eproto_early_reply); return false; } @@ -800,6 +821,19 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb }) #endif +/* + * Deal with RTT samples from soft ACKs. + */ +static void rxrpc_input_soft_rtt(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + rxrpc_serial_t ack_serial) +{ + for (int ix = 0; ix < RXRPC_NR_TXQUEUE; ix++) + if (summary->acked_serial == tq->segment_serial[ix]) + return rxrpc_add_data_rtt_sample(call, summary, tq, ix, ack_serial); +} + /* * Process a batch of soft ACKs specific to a transmission queue segment. */ @@ -909,6 +943,8 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, _debug("bound %16lx %u", extracted, nr); + if (summary->rtt_sample_avail) + rxrpc_input_soft_rtt(call, summary, tq, sp->hdr.serial); rxrpc_input_soft_ack_tq(call, summary, tq, extracted, RXRPC_NR_TXQUEUE, seq - RXRPC_NR_TXQUEUE, &lowest_nak); extracted = ~0UL; @@ -980,7 +1016,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_ack_summary summary = { 0 }; struct rxrpc_acktrailer trailer; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - rxrpc_serial_t ack_serial, acked_serial; + rxrpc_serial_t ack_serial; rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; int nr_acks, offset, ioffset; @@ -989,11 +1025,11 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) offset = sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); ack_serial = sp->hdr.serial; - acked_serial = sp->ack.acked_serial; first_soft_ack = sp->ack.first_ack; prev_pkt = sp->ack.prev_ack; nr_acks = sp->ack.nr_acks; hard_ack = first_soft_ack - 1; + summary.acked_serial = sp->ack.acked_serial; summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ? sp->ack.reason : RXRPC_ACK__INVALID); @@ -1001,21 +1037,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]); prefetch(call->tx_queue); - if (acked_serial != 0) { - switch (summary.ack_reason) { - case RXRPC_ACK_PING_RESPONSE: - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, - rxrpc_rtt_rx_ping_response); - break; - case RXRPC_ACK_REQUESTED: - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, - rxrpc_rtt_rx_requested_ack); - break; - default: - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, - rxrpc_rtt_rx_other_ack); - break; - } + if (summary.acked_serial != 0) { + if (summary.ack_reason == RXRPC_ACK_PING_RESPONSE) + rxrpc_complete_rtt_probe(call, skb->tstamp, summary.acked_serial, + ack_serial, rxrpc_rtt_rx_ping_response); + else + summary.rtt_sample_avail = true; } /* If we get an EXCEEDS_WINDOW ACK from the server, it probably @@ -1068,8 +1095,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) case RXRPC_ACK_PING: break; default: - if (acked_serial && after(acked_serial, call->acks_highest_serial)) - call->acks_highest_serial = acked_serial; + if (summary.acked_serial && + after(summary.acked_serial, call->acks_highest_serial)) + call->acks_highest_serial = summary.acked_serial; break; } @@ -1098,7 +1126,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow); if (after(hard_ack, call->tx_bottom)) { - if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { + if (rxrpc_rotate_tx_window(call, hard_ack, &summary, ack_serial)) { rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack); goto send_response; } @@ -1116,7 +1144,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ping(call, ack_serial, rxrpc_propose_ack_ping_for_lost_reply); - rxrpc_congestion_management(call, skb, &summary, acked_serial); + rxrpc_congestion_management(call, &summary); if (summary.need_retransmit) rxrpc_resend(call, ack_serial, summary.ack_reason == RXRPC_ACK_PING_RESPONSE); @@ -1136,7 +1164,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_ack_summary summary = { 0 }; - if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) + if (rxrpc_rotate_tx_window(call, call->tx_top, &summary, 0)) rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ackall); } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 978c2dc6a7d4..20bf45317264 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -436,7 +436,7 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, trace_rxrpc_req_ack(call->debug_id, txb->seq, why); if (why != rxrpc_reqack_no_srv_last) { flags |= RXRPC_REQUEST_ACK; - rxrpc_begin_rtt_probe(call, serial, req->now, rxrpc_rtt_tx_data); + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, -1, serial); call->peer->rtt_last_req = req->now; } dont_set_request_ack: @@ -508,6 +508,10 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se _debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq); tq->segment_xmit_ts[ix] = xmit_ts; + tq->segment_serial[ix] = serial; + if (i + 1 == req->n) + /* Only sample the last subpacket in a jumbo. */ + __set_bit(ix, &tq->rtt_samples); len += rxrpc_prepare_data_subpacket(call, req, txb, serial, i); serial++; seq++; From 7903d4438b3f50b2f44af1ce4560631e0e0a9779 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:53 +0000 Subject: [PATCH 0250/1386] rxrpc: Don't use received skbuff timestamps Don't use received skbuff timestamps, but rather set a timestamp when an ack is processed so that the time taken to get to rxrpc_input_ack() is included in the RTT. The timestamp of the latest ACK received is tracked in call->acks_latest_ts. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-26-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- net/rxrpc/input.c | 19 ++++++++++--------- net/rxrpc/local_object.c | 3 --- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 41b4fb56f96c..c682e95e15dc 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1037,14 +1037,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]); prefetch(call->tx_queue); - if (summary.acked_serial != 0) { - if (summary.ack_reason == RXRPC_ACK_PING_RESPONSE) - rxrpc_complete_rtt_probe(call, skb->tstamp, summary.acked_serial, - ack_serial, rxrpc_rtt_rx_ping_response); - else - summary.rtt_sample_avail = true; - } - /* If we get an EXCEEDS_WINDOW ACK from the server, it probably * indicates that the client address changed due to NAT. The server * lost the call because it switched to a different peer. @@ -1087,7 +1079,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (nr_acks > 0) skb_condense(skb); - call->acks_latest_ts = skb->tstamp; + call->acks_latest_ts = ktime_get_real(); call->acks_hard_ack = hard_ack; call->acks_prev_seq = prev_pkt; @@ -1108,6 +1100,15 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (hard_ack + 1 == 0) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero); + if (summary.acked_serial != 0) { + if (summary.ack_reason == RXRPC_ACK_PING_RESPONSE) + rxrpc_complete_rtt_probe(call, call->acks_latest_ts, + summary.acked_serial, ack_serial, + rxrpc_rtt_rx_ping_response); + else + summary.rtt_sample_avail = true; + } + /* Ignore ACKs unless we are or have just been transmitting. */ switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 2792d2304605..a74a4b43904f 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -215,9 +215,6 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) /* we want to set the don't fragment bit */ rxrpc_local_dont_fragment(local, true); - - /* We want receive timestamps. */ - sock_enable_timestamps(usk); break; default: From c637bd066841de6d0a204898a62f1d9bb8fa1b7f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:54 +0000 Subject: [PATCH 0251/1386] rxrpc: Generate rtt_min Generate rtt_min as this is required by RACK-TLP. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-27-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- lib/win_minmax.c | 1 + net/rxrpc/ar-internal.h | 2 ++ net/rxrpc/rtt.c | 20 ++++++++++++++++---- 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/lib/win_minmax.c b/lib/win_minmax.c index ec10506834b6..1682e614309c 100644 --- a/lib/win_minmax.c +++ b/lib/win_minmax.c @@ -97,3 +97,4 @@ u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas) return minmax_subwin_update(m, win, &val); } +EXPORT_SYMBOL(minmax_running_min); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 297be421639c..d0d0ab453909 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -371,6 +371,8 @@ struct rxrpc_peer { spinlock_t rtt_input_lock; /* RTT lock for input routine */ ktime_t rtt_last_req; /* Time of last RTT request */ unsigned int rtt_count; /* Number of samples we've got */ + unsigned int rtt_taken; /* Number of samples taken (wrapping) */ + struct minmax min_rtt; /* Estimated minimum RTT */ u32 srtt_us; /* smoothed round trip time << 3 in usecs */ u32 mdev_us; /* medium deviation */ diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c index 6dc51486b5a6..8048467f4bee 100644 --- a/net/rxrpc/rtt.c +++ b/net/rxrpc/rtt.c @@ -127,16 +127,27 @@ static void rxrpc_set_rto(struct rxrpc_peer *peer) peer->rto_us = rxrpc_bound_rto(rto); } -static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us) +static void rxrpc_update_rtt_min(struct rxrpc_peer *peer, ktime_t resp_time, long rtt_us) +{ + /* Window size 5mins in approx usec (ipv4.sysctl_tcp_min_rtt_wlen) */ + u32 wlen_us = 5ULL * NSEC_PER_SEC / 1024; + + minmax_running_min(&peer->min_rtt, wlen_us, resp_time / 1024, + (u32)rtt_us ? : jiffies_to_usecs(1)); +} + +static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, ktime_t resp_time, long rtt_us) { if (rtt_us < 0) return; - //rxrpc_update_rtt_min(peer, rtt_us); + /* Update RACK min RTT [RFC8985 6.1 Step 1]. */ + rxrpc_update_rtt_min(peer, resp_time, rtt_us); + rxrpc_rtt_estimator(peer, rtt_us); rxrpc_set_rto(peer); - /* RFC6298: only reset backoff on valid RTT measurement. */ + /* Only reset backoff on valid RTT measurement [RFC6298]. */ peer->backoff = 0; } @@ -157,9 +168,10 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, return; spin_lock(&peer->rtt_input_lock); - rxrpc_ack_update_rtt(peer, rtt_us); + rxrpc_ack_update_rtt(peer, resp_time, rtt_us); if (peer->rtt_count < 3) peer->rtt_count++; + peer->rtt_taken++; spin_unlock(&peer->rtt_input_lock); trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial, From 93dfca65a1df42a3c8b1094299dc42ab8f18e5c8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:55 +0000 Subject: [PATCH 0252/1386] rxrpc: Adjust the rxrpc_rtt_rx tracepoint Adjust the rxrpc_rtt_rx tracepoint in the following ways: (1) Display the collected RTT sample in the rxrpc_rtt_rx trace. (2) Move the division of srtt by 8 to the TP_printk() rather doing it before invoking the trace point. (3) Display the min_rtt value. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 14 ++++++++++---- net/rxrpc/input.c | 4 ++-- net/rxrpc/rtt.c | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 798bea0853c4..6e929f4448ac 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1415,9 +1415,9 @@ TRACE_EVENT(rxrpc_rtt_rx, TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, int slot, rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, - u32 rtt, u32 rto), + u32 rtt, u32 srtt, u32 rto), - TP_ARGS(call, why, slot, send_serial, resp_serial, rtt, rto), + TP_ARGS(call, why, slot, send_serial, resp_serial, rtt, srtt, rto), TP_STRUCT__entry( __field(unsigned int, call) @@ -1426,7 +1426,9 @@ TRACE_EVENT(rxrpc_rtt_rx, __field(rxrpc_serial_t, send_serial) __field(rxrpc_serial_t, resp_serial) __field(u32, rtt) + __field(u32, srtt) __field(u32, rto) + __field(u32, min_rtt) ), TP_fast_assign( @@ -1436,17 +1438,21 @@ TRACE_EVENT(rxrpc_rtt_rx, __entry->send_serial = send_serial; __entry->resp_serial = resp_serial; __entry->rtt = rtt; + __entry->srtt = srtt; __entry->rto = rto; + __entry->min_rtt = minmax_get(&call->peer->min_rtt) ), - TP_printk("c=%08x [%d] %s sr=%08x rr=%08x rtt=%u rto=%u", + TP_printk("c=%08x [%d] %s sr=%08x rr=%08x rtt=%u srtt=%u rto=%u min=%u", __entry->call, __entry->slot, __print_symbolic(__entry->why, rxrpc_rtt_rx_traces), __entry->send_serial, __entry->resp_serial, __entry->rtt, - __entry->rto) + __entry->srtt / 8, + __entry->rto, + __entry->min_rtt) ); TRACE_EVENT(rxrpc_timer_set, diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c682e95e15dc..1eb9c22aba51 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -740,7 +740,7 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, */ if (after(acked_serial, orig_serial)) { trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_obsolete, i, - orig_serial, acked_serial, 0, 0); + orig_serial, acked_serial, 0, 0, 0); clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); smp_wmb(); set_bit(i, &call->rtt_avail); @@ -748,7 +748,7 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, } if (!matched) - trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_lost, 9, 0, acked_serial, 0, 0); + trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_lost, 9, 0, acked_serial, 0, 0, 0); } /* diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c index 8048467f4bee..e0b7d99854b4 100644 --- a/net/rxrpc/rtt.c +++ b/net/rxrpc/rtt.c @@ -175,7 +175,7 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, spin_unlock(&peer->rtt_input_lock); trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial, - peer->srtt_us >> 3, peer->rto_us); + rtt_us, peer->srtt_us, peer->rto_us); } /* From a3d7f46d983fb2ed528b9cceb457c067fe4277a2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:56 +0000 Subject: [PATCH 0253/1386] rxrpc: Display userStatus in rxrpc_rx_ack trace Display the userStatus field from the Rx packet header in the rxrpc_rx_ack trace line. This is used for flow control purposes by FS.StoreData-type kafs RPC calls. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 6e929f4448ac..7681c67f7d65 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1031,11 +1031,13 @@ TRACE_EVENT(rxrpc_rx_ack, __field(rxrpc_seq_t, prev) __field(u8, reason) __field(u8, n_acks) + __field(u8, user_status) ), TP_fast_assign( __entry->call = call->debug_id; __entry->serial = sp->hdr.serial; + __entry->user_status = sp->hdr.userStatus; __entry->ack_serial = sp->ack.acked_serial; __entry->first = sp->ack.first_ack; __entry->prev = sp->ack.prev_ack; @@ -1043,11 +1045,12 @@ TRACE_EVENT(rxrpc_rx_ack, __entry->n_acks = sp->ack.nr_acks; ), - TP_printk("c=%08x %08x %s r=%08x f=%08x p=%08x n=%u", + TP_printk("c=%08x %08x %s r=%08x us=%02x f=%08x p=%08x n=%u", __entry->call, __entry->serial, __print_symbolic(__entry->reason, rxrpc_ack_names), __entry->ack_serial, + __entry->user_status, __entry->first, __entry->prev, __entry->n_acks) From 5c0ceba23bb47085d6c9c53bff08a29634ee4e7e Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:57 +0000 Subject: [PATCH 0254/1386] rxrpc: Fix the calculation and use of RTO Make the following changes to the calculation and use of RTO: (1) Fix rxrpc_resend() to use the backed-off RTO value obtained by calling rxrpc_get_rto_backoff() rather than extracting the value itself. Without this, it may retransmit packets too early. (2) The RTO value being similar to the RTT causes a lot of extraneous resends because the RTT doesn't end up taking account of clearing out of the receive queue on the server. Worse, responses to PING-ACKs are made as fast as possible and so are less than the DATA-requested-ACK RTT and so skew the RTT down. Fix this by putting a lower bound on the RTO by adding 100ms to it and limiting the lower end to 200ms. Fixes: c410bf01933e ("rxrpc: Fix the excessive initial retransmission timeout") Fixes: 37473e416234 ("rxrpc: Clean up the resend algorithm") Signed-off-by: David Howells Suggested-by: Simon Wilkinson cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- net/rxrpc/call_event.c | 3 ++- net/rxrpc/rtt.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index f71773b18e22..4390c97e3ba6 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -103,7 +103,8 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_ .now = ktime_get_real(), }; struct rxrpc_txqueue *tq = call->tx_queue; - ktime_t lowest_xmit_ts = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC); + ktime_t lowest_xmit_ts = KTIME_MAX; + ktime_t rto = rxrpc_get_rto_backoff(call->peer, false); bool unacked = false; _enter("{%d,%d}", call->tx_bottom, call->tx_top); diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c index e0b7d99854b4..3f1ec8e420a6 100644 --- a/net/rxrpc/rtt.c +++ b/net/rxrpc/rtt.c @@ -27,7 +27,7 @@ static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) static u32 rxrpc_bound_rto(u32 rto) { - return umin(rto, RXRPC_RTO_MAX); + return clamp(200000, rto + 100000, RXRPC_RTO_MAX); } /* From 0130eff911b13e0ad5fc2eebd44833cacd5a8b0b Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:58 +0000 Subject: [PATCH 0255/1386] rxrpc: Fix initial resend timeout The constant for the initial resend timeout is in milliseconds, but the variable it's assigned to is in microseconds. Fix the constant to be in microseconds. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- net/rxrpc/rtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c index 3f1ec8e420a6..aff75e168de8 100644 --- a/net/rxrpc/rtt.c +++ b/net/rxrpc/rtt.c @@ -12,7 +12,7 @@ #include "ar-internal.h" #define RXRPC_RTO_MAX (120 * USEC_PER_SEC) -#define RXRPC_TIMEOUT_INIT ((unsigned int)(1 * MSEC_PER_SEC)) /* RFC6298 2.1 initial RTO value */ +#define RXRPC_TIMEOUT_INIT ((unsigned int)(1 * USEC_PER_SEC)) /* RFC6298 2.1 initial RTO value */ #define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) From fe24a5494390d22ff645fd201d2bf1669fa3aab1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:59 +0000 Subject: [PATCH 0256/1386] rxrpc: Send jumbo DATA packets Send jumbo DATA packets if the path-MTU probing using padded PING ACK packets shows up sufficient capacity to do so. This allows larger chunks of data to be sent without reducing the retryability as the subpackets in a jumbo packet can also be retransmitted individually. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_event.c | 2 +- net/rxrpc/call_object.c | 1 + net/rxrpc/input.c | 3 +++ 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index d0d0ab453909..1307749a1a74 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -710,6 +710,7 @@ struct rxrpc_call { u16 tx_backoff; /* Delay to insert due to Tx failure (ms) */ u8 tx_winsize; /* Maximum size of Tx window */ #define RXRPC_TX_MAX_WINDOW 128 + u8 tx_jumbo_max; /* Maximum subpkts peer will accept */ ktime_t tx_last_sent; /* Last time a transmission occurred */ /* Received data tracking */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 4390c97e3ba6..39772459426b 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -288,7 +288,7 @@ static void rxrpc_transmit_fresh_data(struct rxrpc_call *call) struct rxrpc_txqueue *tq; struct rxrpc_txbuf *txb; rxrpc_seq_t send_top, seq; - int limit = min(space, 1); + int limit = min(space, max(call->peer->pmtud_jumbo, 1)); /* Order send_top before the contents of the new txbufs and * txqueue pointers diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index bba058055c97..e0644e9a8d21 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -155,6 +155,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, refcount_set(&call->ref, 1); call->debug_id = debug_id; call->tx_total_len = -1; + call->tx_jumbo_max = 1; call->next_rx_timo = 20 * HZ; call->next_req_timo = 1 * HZ; call->ackr_window = 1; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 1eb9c22aba51..a7a249872a54 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -796,8 +796,11 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb peer->ackr_adv_pmtud = true; } else { peer->ackr_adv_pmtud = false; + capacity = clamp(capacity, 1, jumbo_max); } + call->tx_jumbo_max = capacity; + if (wake) wake_up(&call->waitq); } From 08d55d7cf3f33c730ce2694393efe16b7983a9c8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:47:00 +0000 Subject: [PATCH 0257/1386] rxrpc: Don't allocate a txbuf for an ACK transmission Don't allocate an rxrpc_txbuf struct for an ACK transmission. There's now no need as the memory to hold the ACK content is allocated with a page frag allocator. The allocation and freeing of a txbuf is just unnecessary overhead. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 2 - net/rxrpc/ar-internal.h | 5 +- net/rxrpc/output.c | 260 +++++++++++++++++++++-------------- net/rxrpc/txbuf.c | 76 ---------- 4 files changed, 156 insertions(+), 187 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 7681c67f7d65..326a4c257aea 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -462,13 +462,11 @@ /* ---- Must update size of stat_why_req_ack[] if more are added! */ #define rxrpc_txbuf_traces \ - EM(rxrpc_txbuf_alloc_ack, "ALLOC ACK ") \ EM(rxrpc_txbuf_alloc_data, "ALLOC DATA ") \ EM(rxrpc_txbuf_free, "FREE ") \ EM(rxrpc_txbuf_get_buffer, "GET BUFFER ") \ EM(rxrpc_txbuf_get_trans, "GET TRANS ") \ EM(rxrpc_txbuf_get_retrans, "GET RETRANS") \ - EM(rxrpc_txbuf_put_ack_tx, "PUT ACK TX ") \ EM(rxrpc_txbuf_put_cleaned, "PUT CLEANED") \ EM(rxrpc_txbuf_put_nomem, "PUT NOMEM ") \ EM(rxrpc_txbuf_put_rotated, "PUT ROTATED") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1307749a1a74..db93d7f78902 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -834,11 +834,9 @@ struct rxrpc_txbuf { #define RXRPC_TXBUF_WIRE_FLAGS 0xff /* The wire protocol flags */ #define RXRPC_TXBUF_RESENT 0x100 /* Set if has been resent */ __be16 cksum; /* Checksum to go in header */ - unsigned short ack_rwind; /* ACK receive window */ - u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */ bool jumboable; /* Can be non-terminal jumbo subpacket */ u8 nr_kvec; /* Amount of kvec[] used */ - struct kvec kvec[3]; + struct kvec kvec[1]; }; static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb) @@ -1364,7 +1362,6 @@ static inline void rxrpc_sysctl_exit(void) {} extern atomic_t rxrpc_nr_txbuf; struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size, size_t data_align, gfp_t gfp); -struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_size); void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 20bf45317264..a7de8a02f419 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -72,81 +72,50 @@ static void rxrpc_set_keepalive(struct rxrpc_call *call, ktime_t now) } /* - * Fill out an ACK packet. + * Allocate transmission buffers for an ACK and attach them to local->kv[]. */ -static void rxrpc_fill_out_ack(struct rxrpc_call *call, - struct rxrpc_txbuf *txb, - u8 ack_reason, - rxrpc_serial_t serial) +static int rxrpc_alloc_ack(struct rxrpc_call *call, size_t sack_size) { - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; - struct rxrpc_acktrailer *trailer = txb->kvec[2].iov_base + 3; - struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); - unsigned int qsize, sack, wrap, to, max_mtu, if_mtu; - rxrpc_seq_t window, wtop; - int rsize; - u8 *filler = txb->kvec[2].iov_base; - u8 *sackp = txb->kvec[1].iov_base; + struct rxrpc_wire_header *whdr; + struct rxrpc_acktrailer *trailer; + struct rxrpc_ackpacket *ack; + struct kvec *kv = call->local->kvec; + gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS; + void *buf, *buf2 = NULL; + u8 *filler; - rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill); + buf = page_frag_alloc(&call->local->tx_alloc, + sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp); + if (!buf) + return -ENOMEM; - window = call->ackr_window; - wtop = call->ackr_wtop; - sack = call->ackr_sack_base % RXRPC_SACK_SIZE; - - whdr->seq = 0; - whdr->type = RXRPC_PACKET_TYPE_ACK; - txb->flags |= RXRPC_SLOW_START_OK; - ack->bufferSpace = 0; - ack->maxSkew = 0; - ack->firstPacket = htonl(window); - ack->previousPacket = htonl(call->rx_highest_seq); - ack->serial = htonl(serial); - ack->reason = ack_reason; - ack->nAcks = wtop - window; - filler[0] = 0; - filler[1] = 0; - filler[2] = 0; - - if (ack_reason == RXRPC_ACK_PING) - txb->flags |= RXRPC_REQUEST_ACK; - - if (after(wtop, window)) { - txb->len += ack->nAcks; - txb->kvec[1].iov_base = sackp; - txb->kvec[1].iov_len = ack->nAcks; - - wrap = RXRPC_SACK_SIZE - sack; - to = umin(ack->nAcks, RXRPC_SACK_SIZE); - - if (sack + ack->nAcks <= RXRPC_SACK_SIZE) { - memcpy(sackp, call->ackr_sack_table + sack, ack->nAcks); - } else { - memcpy(sackp, call->ackr_sack_table + sack, wrap); - memcpy(sackp + wrap, call->ackr_sack_table, to - wrap); + if (sack_size) { + buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp); + if (!buf2) { + page_frag_free(buf); + return -ENOMEM; } - } else if (before(wtop, window)) { - pr_warn("ack window backward %x %x", window, wtop); - } else if (ack->reason == RXRPC_ACK_DELAY) { - ack->reason = RXRPC_ACK_IDLE; } - qsize = (window - 1) - call->rx_consumed; - rsize = max_t(int, call->rx_winsize - qsize, 0); - txb->ack_rwind = rsize; + whdr = buf; + ack = buf + sizeof(*whdr); + filler = buf + sizeof(*whdr) + sizeof(*ack) + 1; + trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3; - if_mtu = call->peer->if_mtu - call->peer->hdrsize; - if (call->peer->ackr_adv_pmtud) { - max_mtu = umax(call->peer->max_data, rxrpc_rx_mtu); - } else { - if_mtu = umin(if_mtu, 1444); - max_mtu = if_mtu; - } + kv[0].iov_base = whdr; + kv[0].iov_len = sizeof(*whdr) + sizeof(*ack); + kv[1].iov_base = buf2; + kv[1].iov_len = sack_size; + kv[2].iov_base = filler; + kv[2].iov_len = 3 + sizeof(*trailer); + return 3; /* Number of kvec[] used. */ +} - trailer->maxMTU = htonl(max_mtu); - trailer->ifMTU = htonl(if_mtu); - trailer->rwind = htonl(rsize); - trailer->jumbo_max = 0; /* Advertise pmtu discovery */ +static void rxrpc_free_ack(struct rxrpc_call *call) +{ + page_frag_free(call->local->kvec[0].iov_base); + if (call->local->kvec[1].iov_base) + page_frag_free(call->local->kvec[1].iov_base); } /* @@ -178,17 +147,110 @@ no_slot: } /* - * Transmit an ACK packet. + * Fill out an ACK packet. */ -static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb, - int nr_kv, enum rxrpc_propose_ack_trace why) +static int rxrpc_fill_out_ack(struct rxrpc_call *call, int nr_kv, u8 ack_reason, + rxrpc_serial_t serial_to_ack, rxrpc_serial_t *_ack_serial) { struct kvec *kv = call->local->kvec; struct rxrpc_wire_header *whdr = kv[0].iov_base; + struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3; + struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); + unsigned int qsize, sack, wrap, to, max_mtu, if_mtu; + rxrpc_seq_t window, wtop; + ktime_t now = ktime_get_real(); + int rsize; + u8 *filler = kv[2].iov_base; + u8 *sackp = kv[1].iov_base; + + rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill); + + window = call->ackr_window; + wtop = call->ackr_wtop; + sack = call->ackr_sack_base % RXRPC_SACK_SIZE; + + *_ack_serial = rxrpc_get_next_serial(call->conn); + + whdr->epoch = htonl(call->conn->proto.epoch); + whdr->cid = htonl(call->cid); + whdr->callNumber = htonl(call->call_id); + whdr->serial = htonl(*_ack_serial); + whdr->seq = 0; + whdr->type = RXRPC_PACKET_TYPE_ACK; + whdr->flags = call->conn->out_clientflag | RXRPC_SLOW_START_OK; + whdr->userStatus = 0; + whdr->securityIndex = call->security_ix; + whdr->_rsvd = 0; + whdr->serviceId = htons(call->dest_srx.srx_service); + + ack->bufferSpace = 0; + ack->maxSkew = 0; + ack->firstPacket = htonl(window); + ack->previousPacket = htonl(call->rx_highest_seq); + ack->serial = htonl(serial_to_ack); + ack->reason = ack_reason; + ack->nAcks = wtop - window; + filler[0] = 0; + filler[1] = 0; + filler[2] = 0; + + if (ack_reason == RXRPC_ACK_PING) + whdr->flags |= RXRPC_REQUEST_ACK; + + if (after(wtop, window)) { + kv[1].iov_len = ack->nAcks; + + wrap = RXRPC_SACK_SIZE - sack; + to = umin(ack->nAcks, RXRPC_SACK_SIZE); + + if (sack + ack->nAcks <= RXRPC_SACK_SIZE) { + memcpy(sackp, call->ackr_sack_table + sack, ack->nAcks); + } else { + memcpy(sackp, call->ackr_sack_table + sack, wrap); + memcpy(sackp + wrap, call->ackr_sack_table, to - wrap); + } + } else if (before(wtop, window)) { + pr_warn("ack window backward %x %x", window, wtop); + } else if (ack->reason == RXRPC_ACK_DELAY) { + ack->reason = RXRPC_ACK_IDLE; + } + + qsize = (window - 1) - call->rx_consumed; + rsize = max_t(int, call->rx_winsize - qsize, 0); + + if_mtu = call->peer->if_mtu - call->peer->hdrsize; + if (call->peer->ackr_adv_pmtud) { + max_mtu = umax(call->peer->max_data, rxrpc_rx_mtu); + } else { + if_mtu = umin(if_mtu, 1444); + max_mtu = if_mtu; + } + + trailer->maxMTU = htonl(max_mtu); + trailer->ifMTU = htonl(if_mtu); + trailer->rwind = htonl(rsize); + trailer->jumbo_max = 0; /* Advertise pmtu discovery */ + + if (ack_reason == RXRPC_ACK_PING) + rxrpc_begin_rtt_probe(call, *_ack_serial, now, rxrpc_rtt_tx_ping); + if (whdr->flags & RXRPC_REQUEST_ACK) + call->peer->rtt_last_req = now; + rxrpc_set_keepalive(call, now); + return nr_kv; +} + +/* + * Transmit an ACK packet. + */ +static void rxrpc_send_ack_packet(struct rxrpc_call *call, int nr_kv, size_t len, + rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why) +{ + struct kvec *kv = call->local->kvec; + struct rxrpc_wire_header *whdr = kv[0].iov_base; + struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3; struct rxrpc_connection *conn; struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); struct msghdr msg; - ktime_t now; int ret; if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) @@ -202,41 +264,31 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t msg.msg_controllen = 0; msg.msg_flags = MSG_SPLICE_PAGES; - whdr->flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; - - txb->serial = rxrpc_get_next_serial(conn); - whdr->serial = htonl(txb->serial); - trace_rxrpc_tx_ack(call->debug_id, txb->serial, + trace_rxrpc_tx_ack(call->debug_id, serial, ntohl(ack->firstPacket), ntohl(ack->serial), ack->reason, ack->nAcks, - txb->ack_rwind); + ntohl(trailer->rwind)); rxrpc_inc_stat(call->rxnet, stat_tx_ack_send); - iov_iter_kvec(&msg.msg_iter, WRITE, kv, nr_kv, txb->len); + iov_iter_kvec(&msg.msg_iter, WRITE, kv, nr_kv, len); rxrpc_local_dont_fragment(conn->local, why == rxrpc_propose_ack_ping_for_mtu_probe); - ret = do_udp_sendmsg(conn->local->socket, &msg, txb->len); + ret = do_udp_sendmsg(conn->local->socket, &msg, len); call->peer->last_tx_at = ktime_get_seconds(); if (ret < 0) { - trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, + trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_ack); if (why == rxrpc_propose_ack_ping_for_mtu_probe && ret == -EMSGSIZE) - rxrpc_input_probe_for_pmtud(conn, txb->serial, true); + rxrpc_input_probe_for_pmtud(conn, serial, true); } else { trace_rxrpc_tx_packet(call->debug_id, whdr, rxrpc_tx_point_call_ack); - now = ktime_get_real(); - if (ack->reason == RXRPC_ACK_PING) - rxrpc_begin_rtt_probe(call, txb->serial, now, rxrpc_rtt_tx_ping); - if (txb->flags & RXRPC_REQUEST_ACK) - call->peer->rtt_last_req = now; - rxrpc_set_keepalive(call, now); if (why == rxrpc_propose_ack_ping_for_mtu_probe) { call->peer->pmtud_pending = false; call->peer->pmtud_probing = true; - call->conn->pmtud_probe = txb->serial; + call->conn->pmtud_probe = serial; call->conn->pmtud_call = call->debug_id; trace_rxrpc_pmtud_tx(call); } @@ -248,10 +300,11 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t * Queue an ACK for immediate transmission. */ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, - rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why) + rxrpc_serial_t serial_to_ack, enum rxrpc_propose_ack_trace why) { - struct rxrpc_txbuf *txb; struct kvec *kv = call->local->kvec; + rxrpc_serial_t ack_serial; + size_t len; int nr_kv; if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) @@ -259,32 +312,29 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]); - txb = rxrpc_alloc_ack_txbuf(call, call->ackr_wtop - call->ackr_window); - if (!txb) { + nr_kv = rxrpc_alloc_ack(call, call->ackr_wtop - call->ackr_window); + if (nr_kv < 0) { kleave(" = -ENOMEM"); return; } - txb->ack_why = why; - - rxrpc_fill_out_ack(call, txb, ack_reason, serial); + nr_kv = rxrpc_fill_out_ack(call, nr_kv, ack_reason, serial_to_ack, &ack_serial); + len = kv[0].iov_len; + len += kv[1].iov_len; + len += kv[2].iov_len; /* Extend a path MTU probe ACK. */ - nr_kv = txb->nr_kvec; - kv[0] = txb->kvec[0]; - kv[1] = txb->kvec[1]; - kv[2] = txb->kvec[2]; if (why == rxrpc_propose_ack_ping_for_mtu_probe) { size_t probe_mtu = call->peer->pmtud_trial + sizeof(struct rxrpc_wire_header); - if (txb->len > probe_mtu) + if (len > probe_mtu) goto skip; - while (txb->len < probe_mtu) { - size_t part = umin(probe_mtu - txb->len, PAGE_SIZE); + while (len < probe_mtu) { + size_t part = umin(probe_mtu - len, PAGE_SIZE); kv[nr_kv].iov_base = page_address(ZERO_PAGE(0)); kv[nr_kv].iov_len = part; - txb->len += part; + len += part; nr_kv++; } } @@ -293,10 +343,10 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, atomic_set(&call->ackr_nr_consumed, 0); clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags); - trace_rxrpc_send_ack(call, why, ack_reason, serial); - rxrpc_send_ack_packet(call, txb, nr_kv, why); + trace_rxrpc_send_ack(call, why, ack_reason, ack_serial); + rxrpc_send_ack_packet(call, nr_kv, len, ack_serial, why); skip: - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx); + rxrpc_free_ack(call); } /* diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index 067223c8c35f..131d9e55c8e9 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -73,82 +73,6 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_ return txb; } -/* - * Allocate and partially initialise an ACK packet. - */ -struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_size) -{ - struct rxrpc_wire_header *whdr; - struct rxrpc_acktrailer *trailer; - struct rxrpc_ackpacket *ack; - struct rxrpc_txbuf *txb; - gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS; - void *buf, *buf2 = NULL; - u8 *filler; - - txb = kmalloc(sizeof(*txb), gfp); - if (!txb) - return NULL; - - buf = page_frag_alloc(&call->local->tx_alloc, - sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp); - if (!buf) { - kfree(txb); - return NULL; - } - - if (sack_size) { - buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp); - if (!buf2) { - page_frag_free(buf); - kfree(txb); - return NULL; - } - } - - whdr = buf; - ack = buf + sizeof(*whdr); - filler = buf + sizeof(*whdr) + sizeof(*ack) + 1; - trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3; - - refcount_set(&txb->ref, 1); - txb->call_debug_id = call->debug_id; - txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); - txb->space = 0; - txb->len = sizeof(*whdr) + sizeof(*ack) + 3 + sizeof(*trailer); - txb->offset = 0; - txb->flags = call->conn->out_clientflag; - txb->ack_rwind = 0; - txb->seq = 0; - txb->serial = 0; - txb->cksum = 0; - txb->nr_kvec = 3; - txb->kvec[0].iov_base = whdr; - txb->kvec[0].iov_len = sizeof(*whdr) + sizeof(*ack); - txb->kvec[1].iov_base = buf2; - txb->kvec[1].iov_len = sack_size; - txb->kvec[2].iov_base = filler; - txb->kvec[2].iov_len = 3 + sizeof(*trailer); - - whdr->epoch = htonl(call->conn->proto.epoch); - whdr->cid = htonl(call->cid); - whdr->callNumber = htonl(call->call_id); - whdr->seq = 0; - whdr->type = RXRPC_PACKET_TYPE_ACK; - whdr->flags = 0; - whdr->userStatus = 0; - whdr->securityIndex = call->security_ix; - whdr->_rsvd = 0; - whdr->serviceId = htons(call->dest_srx.srx_service); - - get_page(virt_to_head_page(trailer)); - - trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1, - rxrpc_txbuf_alloc_ack); - atomic_inc(&rxrpc_nr_txbuf); - return txb; -} - void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) { int r; From a2ea9a9072607c2fd6442bd1ffb4dbdbf882aed7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:47:01 +0000 Subject: [PATCH 0258/1386] rxrpc: Use irq-disabling spinlocks between app and I/O thread Where a spinlock is used by both the application thread and the I/O thread, use irq-disabling locking so that an interrupt taken on the app thread doesn't also slow down the I/O thread. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- net/rxrpc/af_rxrpc.c | 4 ++-- net/rxrpc/ar-internal.h | 1 - net/rxrpc/call_accept.c | 20 ++++++++++---------- net/rxrpc/call_object.c | 15 +++++++-------- net/rxrpc/conn_client.c | 12 ++++++------ net/rxrpc/conn_event.c | 8 ++++---- net/rxrpc/conn_object.c | 8 ++++---- net/rxrpc/input.c | 5 +---- net/rxrpc/io_thread.c | 8 ++++---- net/rxrpc/peer_event.c | 8 ++++---- net/rxrpc/peer_object.c | 1 + net/rxrpc/recvmsg.c | 18 +++++++++--------- net/rxrpc/security.c | 4 ++-- net/rxrpc/sendmsg.c | 2 -- 14 files changed, 54 insertions(+), 60 deletions(-) diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 9d8bd0b37e41..86873399f7d5 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -408,9 +408,9 @@ void rxrpc_kernel_shutdown_call(struct socket *sock, struct rxrpc_call *call) /* Make sure we're not going to call back into a kernel service */ if (call->notify_rx) { - spin_lock(&call->notify_lock); + spin_lock_irq(&call->notify_lock); call->notify_rx = rxrpc_dummy_notify_rx; - spin_unlock(&call->notify_lock); + spin_unlock_irq(&call->notify_lock); } } mutex_unlock(&call->user_mutex); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index db93d7f78902..ffd80dc88f40 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -700,7 +700,6 @@ struct rxrpc_call { struct rxrpc_txqueue *send_queue; /* Queue that sendmsg is writing into */ /* Transmitted data tracking. */ - spinlock_t tx_lock; /* Transmit queue lock */ struct rxrpc_txqueue *tx_queue; /* Start of transmission buffers */ struct rxrpc_txqueue *tx_qtail; /* End of transmission buffers */ rxrpc_seq_t tx_qbase; /* First slot in tx_queue */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index a6776b1604ba..e685034ce4f7 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -188,8 +188,8 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) /* Make sure that there aren't any incoming calls in progress before we * clear the preallocation buffers. */ - spin_lock(&rx->incoming_lock); - spin_unlock(&rx->incoming_lock); + spin_lock_irq(&rx->incoming_lock); + spin_unlock_irq(&rx->incoming_lock); head = b->peer_backlog_head; tail = b->peer_backlog_tail; @@ -343,7 +343,7 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local, if (sp->hdr.type != RXRPC_PACKET_TYPE_DATA) return rxrpc_protocol_error(skb, rxrpc_eproto_no_service_call); - read_lock(&local->services_lock); + read_lock_irq(&local->services_lock); /* Weed out packets to services we're not offering. Packets that would * begin a call are explicitly rejected and the rest are just @@ -399,12 +399,12 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local, spin_unlock(&conn->state_lock); spin_unlock(&rx->incoming_lock); - read_unlock(&local->services_lock); + read_unlock_irq(&local->services_lock); if (hlist_unhashed(&call->error_link)) { - spin_lock(&call->peer->lock); + spin_lock_irq(&call->peer->lock); hlist_add_head(&call->error_link, &call->peer->error_targets); - spin_unlock(&call->peer->lock); + spin_unlock_irq(&call->peer->lock); } _leave(" = %p{%d}", call, call->debug_id); @@ -413,20 +413,20 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local, return true; unsupported_service: - read_unlock(&local->services_lock); + read_unlock_irq(&local->services_lock); return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, RX_INVALID_OPERATION, -EOPNOTSUPP); unsupported_security: - read_unlock(&local->services_lock); + read_unlock_irq(&local->services_lock); return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered, RX_INVALID_OPERATION, -EKEYREJECTED); no_call: spin_unlock(&rx->incoming_lock); - read_unlock(&local->services_lock); + read_unlock_irq(&local->services_lock); _leave(" = f [%u]", skb->mark); return false; discard: - read_unlock(&local->services_lock); + read_unlock_irq(&local->services_lock); return true; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index e0644e9a8d21..75cd0b06e14c 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -49,7 +49,7 @@ void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what) bool busy; if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) { - spin_lock_bh(&local->lock); + spin_lock_irq(&local->lock); busy = !list_empty(&call->attend_link); trace_rxrpc_poke_call(call, busy, what); if (!busy && !rxrpc_try_get_call(call, rxrpc_call_get_poke)) @@ -57,7 +57,7 @@ void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what) if (!busy) { list_add_tail(&call->attend_link, &local->call_attend_q); } - spin_unlock_bh(&local->lock); + spin_unlock_irq(&local->lock); if (!busy) rxrpc_wake_up_io_thread(local); } @@ -151,7 +151,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, skb_queue_head_init(&call->rx_oos_queue); init_waitqueue_head(&call->waitq); spin_lock_init(&call->notify_lock); - spin_lock_init(&call->tx_lock); refcount_set(&call->ref, 1); call->debug_id = debug_id; call->tx_total_len = -1; @@ -302,9 +301,9 @@ static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp) trace_rxrpc_client(NULL, -1, rxrpc_client_queue_new_call); rxrpc_get_call(call, rxrpc_call_get_io_thread); - spin_lock(&local->client_call_lock); + spin_lock_irq(&local->client_call_lock); list_add_tail(&call->wait_link, &local->new_client_calls); - spin_unlock(&local->client_call_lock); + spin_unlock_irq(&local->client_call_lock); rxrpc_wake_up_io_thread(local); return 0; @@ -434,7 +433,7 @@ error_attached_to_socket: /* * Set up an incoming call. call->conn points to the connection. - * This is called in BH context and isn't allowed to fail. + * This is called with interrupts disabled and isn't allowed to fail. */ void rxrpc_incoming_call(struct rxrpc_sock *rx, struct rxrpc_call *call, @@ -576,7 +575,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) rxrpc_put_call_slot(call); /* Make sure we don't get any more notifications */ - spin_lock(&rx->recvmsg_lock); + spin_lock_irq(&rx->recvmsg_lock); if (!list_empty(&call->recvmsg_link)) { _debug("unlinking once-pending call %p { e=%lx f=%lx }", @@ -589,7 +588,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) call->recvmsg_link.next = NULL; call->recvmsg_link.prev = NULL; - spin_unlock(&rx->recvmsg_lock); + spin_unlock_irq(&rx->recvmsg_lock); if (put) rxrpc_put_call(call, rxrpc_call_put_unnotify); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 5f76bd90567c..db0099197890 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -510,9 +510,9 @@ void rxrpc_connect_client_calls(struct rxrpc_local *local) struct rxrpc_call *call; LIST_HEAD(new_client_calls); - spin_lock(&local->client_call_lock); + spin_lock_irq(&local->client_call_lock); list_splice_tail_init(&local->new_client_calls, &new_client_calls); - spin_unlock(&local->client_call_lock); + spin_unlock_irq(&local->client_call_lock); while ((call = list_first_entry_or_null(&new_client_calls, struct rxrpc_call, wait_link))) { @@ -547,9 +547,9 @@ void rxrpc_expose_client_call(struct rxrpc_call *call) set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); trace_rxrpc_client(conn, channel, rxrpc_client_exposed); - spin_lock(&call->peer->lock); + spin_lock_irq(&call->peer->lock); hlist_add_head(&call->error_link, &call->peer->error_targets); - spin_unlock(&call->peer->lock); + spin_unlock_irq(&call->peer->lock); } } @@ -590,9 +590,9 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call ASSERTCMP(call->call_id, ==, 0); ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); /* May still be on ->new_client_calls. */ - spin_lock(&local->client_call_lock); + spin_lock_irq(&local->client_call_lock); list_del_init(&call->wait_link); - spin_unlock(&local->client_call_lock); + spin_unlock_irq(&local->client_call_lock); return; } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index f6c02cc44d98..6b29a294ee07 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -26,7 +26,7 @@ static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff bool aborted = false; if (conn->state != RXRPC_CONN_ABORTED) { - spin_lock(&conn->state_lock); + spin_lock_irq(&conn->state_lock); if (conn->state != RXRPC_CONN_ABORTED) { conn->abort_code = abort_code; conn->error = err; @@ -37,7 +37,7 @@ static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff set_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events); aborted = true; } - spin_unlock(&conn->state_lock); + spin_unlock_irq(&conn->state_lock); } return aborted; @@ -261,10 +261,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, if (ret < 0) return ret; - spin_lock(&conn->state_lock); + spin_lock_irq(&conn->state_lock); if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) conn->state = RXRPC_CONN_SERVICE; - spin_unlock(&conn->state_lock); + spin_unlock_irq(&conn->state_lock); if (conn->state == RXRPC_CONN_SERVICE) { /* Offload call state flipping to the I/O thread. As diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index b0627398311b..7eba4d7d9a38 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -31,13 +31,13 @@ void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) if (WARN_ON_ONCE(!local)) return; - spin_lock_bh(&local->lock); + spin_lock_irq(&local->lock); busy = !list_empty(&conn->attend_link); if (!busy) { rxrpc_get_connection(conn, why); list_add_tail(&conn->attend_link, &local->conn_attend_q); } - spin_unlock_bh(&local->lock); + spin_unlock_irq(&local->lock); rxrpc_wake_up_io_thread(local); } @@ -196,9 +196,9 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) call->peer->cong_ssthresh = call->cong_ssthresh; if (!hlist_unhashed(&call->error_link)) { - spin_lock(&call->peer->lock); + spin_lock_irq(&call->peer->lock); hlist_del_init(&call->error_link); - spin_unlock(&call->peer->lock); + spin_unlock_irq(&call->peer->lock); } if (rxrpc_is_client_call(call)) { diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index a7a249872a54..821e10c03086 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -424,7 +424,7 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); bool last = sp->hdr.flags & RXRPC_LAST_PACKET; - __skb_queue_tail(&call->recvmsg_queue, skb); + skb_queue_tail(&call->recvmsg_queue, skb); rxrpc_input_update_ack_window(call, window, wtop); trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq); if (last) @@ -501,7 +501,6 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg); - spin_lock(&call->recvmsg_queue.lock); rxrpc_input_queue_data(call, skb, window, wtop, rxrpc_receive_queue); *_notify = true; @@ -523,8 +522,6 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_receive_queue_oos); } - spin_unlock(&call->recvmsg_queue.lock); - call->ackr_sack_base = sack; } else { unsigned int slot; diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index bc678a299bd8..fbacf2056f64 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -500,9 +500,9 @@ int rxrpc_io_thread(void *data) } /* Deal with connections that want immediate attention. */ - spin_lock_bh(&local->lock); + spin_lock_irq(&local->lock); list_splice_tail_init(&local->conn_attend_q, &conn_attend_q); - spin_unlock_bh(&local->lock); + spin_unlock_irq(&local->lock); while ((conn = list_first_entry_or_null(&conn_attend_q, struct rxrpc_connection, @@ -519,9 +519,9 @@ int rxrpc_io_thread(void *data) rxrpc_discard_expired_client_conns(local); /* Deal with calls that want immediate attention. */ - spin_lock_bh(&local->lock); + spin_lock_irq(&local->lock); list_splice_tail_init(&local->call_attend_q, &call_attend_q); - spin_unlock_bh(&local->lock); + spin_unlock_irq(&local->lock); while ((call = list_first_entry_or_null(&call_attend_q, struct rxrpc_call, diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index ff30e0c05507..d82e44a3901b 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -213,23 +213,23 @@ static void rxrpc_distribute_error(struct rxrpc_peer *peer, struct sk_buff *skb, struct rxrpc_call *call; HLIST_HEAD(error_targets); - spin_lock(&peer->lock); + spin_lock_irq(&peer->lock); hlist_move_list(&peer->error_targets, &error_targets); while (!hlist_empty(&error_targets)) { call = hlist_entry(error_targets.first, struct rxrpc_call, error_link); hlist_del_init(&call->error_link); - spin_unlock(&peer->lock); + spin_unlock_irq(&peer->lock); rxrpc_see_call(call, rxrpc_call_see_distribute_error); rxrpc_set_call_completion(call, compl, 0, -err); rxrpc_input_call_event(call); - spin_lock(&peer->lock); + spin_lock_irq(&peer->lock); } - spin_unlock(&peer->lock); + spin_unlock_irq(&peer->lock); } /* diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 80ef6f06d512..27b34ed4d76a 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -320,6 +320,7 @@ static void rxrpc_free_peer(struct rxrpc_peer *peer) * Set up a new incoming peer. There shouldn't be any other matching peers * since we've already done a search in the list from the non-reentrant context * (the data_ready handler) that is the only place we can add new peers. + * Called with interrupts disabled. */ void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer) { diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index a482f88c5fc5..32cd5f1d541d 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -36,16 +36,16 @@ void rxrpc_notify_socket(struct rxrpc_call *call) sk = &rx->sk; if (rx && sk->sk_state < RXRPC_CLOSE) { if (call->notify_rx) { - spin_lock(&call->notify_lock); + spin_lock_irq(&call->notify_lock); call->notify_rx(sk, call, call->user_call_ID); - spin_unlock(&call->notify_lock); + spin_unlock_irq(&call->notify_lock); } else { - spin_lock(&rx->recvmsg_lock); + spin_lock_irq(&rx->recvmsg_lock); if (list_empty(&call->recvmsg_link)) { rxrpc_get_call(call, rxrpc_call_get_notify_socket); list_add_tail(&call->recvmsg_link, &rx->recvmsg_q); } - spin_unlock(&rx->recvmsg_lock); + spin_unlock_irq(&rx->recvmsg_lock); if (!sock_flag(sk, SOCK_DEAD)) { _debug("call %ps", sk->sk_data_ready); @@ -337,14 +337,14 @@ try_again: * We also want to weed out calls that got requeued whilst we were * shovelling data out. */ - spin_lock(&rx->recvmsg_lock); + spin_lock_irq(&rx->recvmsg_lock); l = rx->recvmsg_q.next; call = list_entry(l, struct rxrpc_call, recvmsg_link); if (!rxrpc_call_is_complete(call) && skb_queue_empty(&call->recvmsg_queue)) { list_del_init(&call->recvmsg_link); - spin_unlock(&rx->recvmsg_lock); + spin_unlock_irq(&rx->recvmsg_lock); release_sock(&rx->sk); trace_rxrpc_recvmsg(call->debug_id, rxrpc_recvmsg_unqueue, 0); rxrpc_put_call(call, rxrpc_call_put_recvmsg); @@ -355,7 +355,7 @@ try_again: list_del_init(&call->recvmsg_link); else rxrpc_get_call(call, rxrpc_call_get_recvmsg); - spin_unlock(&rx->recvmsg_lock); + spin_unlock_irq(&rx->recvmsg_lock); call_debug_id = call->debug_id; trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_dequeue, 0); @@ -445,9 +445,9 @@ error_unlock_call: error_requeue_call: if (!(flags & MSG_PEEK)) { - spin_lock(&rx->recvmsg_lock); + spin_lock_irq(&rx->recvmsg_lock); list_add(&call->recvmsg_link, &rx->recvmsg_q); - spin_unlock(&rx->recvmsg_lock); + spin_unlock_irq(&rx->recvmsg_lock); trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0); } else { rxrpc_put_call(call, rxrpc_call_put_recvmsg); diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index cb8dd1d3b1d4..9784adc8f275 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -114,10 +114,10 @@ found: if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) { ret = conn->security->init_connection_security(conn, token); if (ret == 0) { - spin_lock(&conn->state_lock); + spin_lock_irq(&conn->state_lock); if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) conn->state = RXRPC_CONN_CLIENT; - spin_unlock(&conn->state_lock); + spin_unlock_irq(&conn->state_lock); } } mutex_unlock(&conn->security_lock); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 381b25597f4e..df501a7c92fa 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -261,7 +261,6 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, trace_rxrpc_tq(call, sq, seq, rxrpc_tq_queue); /* Add the packet to the call's output buffer */ - spin_lock(&call->tx_lock); poke = (READ_ONCE(call->tx_bottom) == call->send_top); sq->bufs[ix] = txb; /* Order send_top after the queue->next pointer and txb content. */ @@ -270,7 +269,6 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, rxrpc_notify_end_tx(rx, call, notify_end_tx); call->send_queue = NULL; } - spin_unlock(&call->tx_lock); if (poke) rxrpc_poke_call(call, rxrpc_call_poke_start); From 547a9acd4c5e95190c6c93a6d8628c5b8b74a4d6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:47:02 +0000 Subject: [PATCH 0259/1386] rxrpc: Tidy up the ACK parsing a bit Tidy up the ACK parsing in the following ways: (1) Put the serial number of the ACK packet into the rxrpc_ack_summary struct and access it from there whilst parsing an ACK. (2) Be consistent about using "if (summary.acked_serial)" rather than "if (summary.acked_serial != 0)". Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/input.c | 55 +++++++++++++++++++---------------------- 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ffd80dc88f40..aa240b4b4bec 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -771,6 +771,7 @@ struct rxrpc_call { * Summary of a new ACK and the changes it made to the Tx buffer packet states. */ struct rxrpc_ack_summary { + rxrpc_serial_t ack_serial; /* Serial number of ACK */ rxrpc_serial_t acked_serial; /* Serial number ACK'd */ u16 in_flight; /* Number of unreceived transmissions */ u16 nr_new_hacks; /* Number of rotated new ACKs */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 821e10c03086..036cf440b63b 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -198,11 +198,10 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) static void rxrpc_add_data_rtt_sample(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, struct rxrpc_txqueue *tq, - int ix, - rxrpc_serial_t ack_serial) + int ix) { rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_data_ack, -1, - summary->acked_serial, ack_serial, + summary->acked_serial, summary->ack_serial, ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]), call->acks_latest_ts); summary->rtt_sample_avail = false; @@ -213,8 +212,7 @@ static void rxrpc_add_data_rtt_sample(struct rxrpc_call *call, * Apply a hard ACK by advancing the Tx window. */ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, - struct rxrpc_ack_summary *summary, - rxrpc_serial_t ack_serial) + struct rxrpc_ack_summary *summary) { struct rxrpc_txqueue *tq = call->tx_queue; rxrpc_seq_t seq = call->tx_bottom + 1; @@ -255,7 +253,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, if (summary->rtt_sample_avail && summary->acked_serial == tq->segment_serial[ix] && test_bit(ix, &tq->rtt_samples)) - rxrpc_add_data_rtt_sample(call, summary, tq, ix, ack_serial); + rxrpc_add_data_rtt_sample(call, summary, tq, ix); if (ix == tq->nr_reported_acks) { /* Packet directly hard ACK'd. */ @@ -369,7 +367,7 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) } if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - if (!rxrpc_rotate_tx_window(call, top, &summary, 0)) { + if (!rxrpc_rotate_tx_window(call, top, &summary)) { rxrpc_proto_abort(call, top, rxrpc_eproto_early_reply); return false; } @@ -826,12 +824,11 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb */ static void rxrpc_input_soft_rtt(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, - struct rxrpc_txqueue *tq, - rxrpc_serial_t ack_serial) + struct rxrpc_txqueue *tq) { for (int ix = 0; ix < RXRPC_NR_TXQUEUE; ix++) if (summary->acked_serial == tq->segment_serial[ix]) - return rxrpc_add_data_rtt_sample(call, summary, tq, ix, ack_serial); + return rxrpc_add_data_rtt_sample(call, summary, tq, ix); } /* @@ -944,7 +941,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, _debug("bound %16lx %u", extracted, nr); if (summary->rtt_sample_avail) - rxrpc_input_soft_rtt(call, summary, tq, sp->hdr.serial); + rxrpc_input_soft_rtt(call, summary, tq); rxrpc_input_soft_ack_tq(call, summary, tq, extracted, RXRPC_NR_TXQUEUE, seq - RXRPC_NR_TXQUEUE, &lowest_nak); extracted = ~0UL; @@ -1016,7 +1013,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_ack_summary summary = { 0 }; struct rxrpc_acktrailer trailer; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - rxrpc_serial_t ack_serial; rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; int nr_acks, offset, ioffset; @@ -1024,14 +1020,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) offset = sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); - ack_serial = sp->hdr.serial; - first_soft_ack = sp->ack.first_ack; - prev_pkt = sp->ack.prev_ack; - nr_acks = sp->ack.nr_acks; - hard_ack = first_soft_ack - 1; - summary.acked_serial = sp->ack.acked_serial; - summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ? - sp->ack.reason : RXRPC_ACK__INVALID); + summary.ack_serial = sp->hdr.serial; + first_soft_ack = sp->ack.first_ack; + prev_pkt = sp->ack.prev_ack; + nr_acks = sp->ack.nr_acks; + hard_ack = first_soft_ack - 1; + summary.acked_serial = sp->ack.acked_serial; + summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ? + sp->ack.reason : RXRPC_ACK__INVALID); trace_rxrpc_rx_ack(call, sp); rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]); @@ -1066,7 +1062,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) /* Discard any out-of-order or duplicate ACKs (outside lock). */ if (!rxrpc_is_ack_valid(call, hard_ack, prev_pkt)) { - trace_rxrpc_rx_discard_ack(call, ack_serial, hard_ack, prev_pkt); + trace_rxrpc_rx_discard_ack(call, summary.ack_serial, hard_ack, prev_pkt); goto send_response; } @@ -1100,10 +1096,10 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (hard_ack + 1 == 0) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero); - if (summary.acked_serial != 0) { + if (summary.acked_serial) { if (summary.ack_reason == RXRPC_ACK_PING_RESPONSE) rxrpc_complete_rtt_probe(call, call->acks_latest_ts, - summary.acked_serial, ack_serial, + summary.acked_serial, summary.ack_serial, rxrpc_rtt_rx_ping_response); else summary.rtt_sample_avail = true; @@ -1127,7 +1123,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow); if (after(hard_ack, call->tx_bottom)) { - if (rxrpc_rotate_tx_window(call, hard_ack, &summary, ack_serial)) { + if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack); goto send_response; } @@ -1142,19 +1138,20 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && call->acks_nr_sacks == call->tx_top - hard_ack && rxrpc_is_client_call(call)) - rxrpc_propose_ping(call, ack_serial, + rxrpc_propose_ping(call, summary.ack_serial, rxrpc_propose_ack_ping_for_lost_reply); rxrpc_congestion_management(call, &summary); if (summary.need_retransmit) - rxrpc_resend(call, ack_serial, summary.ack_reason == RXRPC_ACK_PING_RESPONSE); + rxrpc_resend(call, summary.ack_serial, + summary.ack_reason == RXRPC_ACK_PING_RESPONSE); send_response: if (summary.ack_reason == RXRPC_ACK_PING) - rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial, + rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, summary.ack_serial, rxrpc_propose_ack_respond_to_ping); else if (sp->hdr.flags & RXRPC_REQUEST_ACK) - rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial, + rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, summary.ack_serial, rxrpc_propose_ack_respond_to_ack); } @@ -1165,7 +1162,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_ack_summary summary = { 0 }; - if (rxrpc_rotate_tx_window(call, call->tx_top, &summary, 0)) + if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ackall); } From 372d12d191cb80720319e224d401fd82c602e9e4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:47:03 +0000 Subject: [PATCH 0260/1386] rxrpc: Add a reason indicator to the tx_data tracepoint Add an indicator to the rxrpc_tx_data tracepoint to indicate what triggered the transmission of a particular packet. At this point, it's only normal transmission and retransmission, plus the tracepoint is also used to record loss injection, but in a future patch, TLP-induced (re-)transmission will also be a thing. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 21 ++++++++++++++------- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_event.c | 12 ++++++++---- net/rxrpc/output.c | 6 +++--- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 326a4c257aea..d79623fff746 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -302,6 +302,11 @@ EM(rxrpc_txqueue_rotate_last, "RLS") \ E_(rxrpc_txqueue_wait, "WAI") +#define rxrpc_txdata_traces \ + EM(rxrpc_txdata_inject_loss, " *INJ-LOSS*") \ + EM(rxrpc_txdata_new_data, " ") \ + E_(rxrpc_txdata_retransmit, " *RETRANS*") + #define rxrpc_receive_traces \ EM(rxrpc_receive_end, "END") \ EM(rxrpc_receive_front, "FRN") \ @@ -534,6 +539,7 @@ enum rxrpc_timer_trace { rxrpc_timer_traces } __mode(byte); enum rxrpc_tq_trace { rxrpc_tq_traces } __mode(byte); enum rxrpc_tx_point { rxrpc_tx_points } __mode(byte); enum rxrpc_txbuf_trace { rxrpc_txbuf_traces } __mode(byte); +enum rxrpc_txdata_trace { rxrpc_txdata_traces } __mode(byte); enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte); #endif /* end __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY */ @@ -572,6 +578,7 @@ rxrpc_timer_traces; rxrpc_tq_traces; rxrpc_tx_points; rxrpc_txbuf_traces; +rxrpc_txdata_traces; rxrpc_txqueue_traces; /* @@ -1222,9 +1229,10 @@ TRACE_EVENT(rxrpc_tx_packet, TRACE_EVENT(rxrpc_tx_data, TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, - rxrpc_serial_t serial, unsigned int flags, bool lose), + rxrpc_serial_t serial, unsigned int flags, + enum rxrpc_txdata_trace trace), - TP_ARGS(call, seq, serial, flags, lose), + TP_ARGS(call, seq, serial, flags, trace), TP_STRUCT__entry( __field(unsigned int, call) @@ -1233,7 +1241,7 @@ TRACE_EVENT(rxrpc_tx_data, __field(u32, cid) __field(u32, call_id) __field(u16, flags) - __field(bool, lose) + __field(enum rxrpc_txdata_trace, trace) ), TP_fast_assign( @@ -1243,18 +1251,17 @@ TRACE_EVENT(rxrpc_tx_data, __entry->seq = seq; __entry->serial = serial; __entry->flags = flags; - __entry->lose = lose; + __entry->trace = trace; ), - TP_printk("c=%08x DATA %08x:%08x %08x q=%08x fl=%02x%s%s", + TP_printk("c=%08x DATA %08x:%08x %08x q=%08x fl=%02x%s", __entry->call, __entry->cid, __entry->call_id, __entry->serial, __entry->seq, __entry->flags & RXRPC_TXBUF_WIRE_FLAGS, - __entry->flags & RXRPC_TXBUF_RESENT ? " *RETRANS*" : "", - __entry->lose ? " *LOSE*" : "") + __print_symbolic(__entry->trace, rxrpc_txdata_traces)) ); TRACE_EVENT(rxrpc_tx_ack, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index aa240b4b4bec..139575032ae2 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -883,6 +883,7 @@ struct rxrpc_send_data_req { rxrpc_seq_t seq; /* Sequence of first data */ int n; /* Number of DATA packets to glue into jumbo */ bool did_send; /* T if did actually send */ + int /* enum rxrpc_txdata_trace */ trace; }; #include diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 39772459426b..99d9502564cc 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -101,6 +101,7 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_ { struct rxrpc_send_data_req req = { .now = ktime_get_real(), + .trace = rxrpc_txdata_retransmit, }; struct rxrpc_txqueue *tq = call->tx_queue; ktime_t lowest_xmit_ts = KTIME_MAX; @@ -269,7 +270,8 @@ static unsigned int rxrpc_tx_window_space(struct rxrpc_call *call) /* * Transmit some as-yet untransmitted data. */ -static void rxrpc_transmit_fresh_data(struct rxrpc_call *call) +static void rxrpc_transmit_fresh_data(struct rxrpc_call *call, + enum rxrpc_txdata_trace trace) { int space = rxrpc_tx_window_space(call); @@ -284,6 +286,7 @@ static void rxrpc_transmit_fresh_data(struct rxrpc_call *call) .now = ktime_get_real(), .seq = call->tx_transmitted + 1, .n = 0, + .trace = trace, }; struct rxrpc_txqueue *tq; struct rxrpc_txbuf *txb; @@ -332,7 +335,8 @@ static void rxrpc_transmit_fresh_data(struct rxrpc_call *call) } } -static void rxrpc_transmit_some_data(struct rxrpc_call *call) +static void rxrpc_transmit_some_data(struct rxrpc_call *call, + enum rxrpc_txdata_trace trace) { switch (__rxrpc_call_state(call)) { case RXRPC_CALL_SERVER_ACK_REQUEST: @@ -349,7 +353,7 @@ static void rxrpc_transmit_some_data(struct rxrpc_call *call) rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow); return; } - rxrpc_transmit_fresh_data(call); + rxrpc_transmit_fresh_data(call, trace); break; default: return; @@ -463,7 +467,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) resend = true; } - rxrpc_transmit_some_data(call); + rxrpc_transmit_some_data(call, rxrpc_txdata_new_data); now = ktime_get_real(); t = ktime_sub(call->keepalive_at, now); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index a7de8a02f419..2633f955d1d0 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -511,7 +511,7 @@ dont_set_request_ack: len += sizeof(*jumbo); } - trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags | flags, false); + trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, req->trace); kv->iov_len = len; return len; } @@ -655,8 +655,8 @@ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req if ((lose++ & 7) == 7) { ret = 0; - trace_rxrpc_tx_data(call, txb->seq, txb->serial, - txb->flags, true); + trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, + rxrpc_txdata_inject_loss); conn->peer->last_tx_at = ktime_get_seconds(); goto done; } From b509934094fd52ac3a49ee2a2c144e885517069f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:47:04 +0000 Subject: [PATCH 0261/1386] rxrpc: Add a reason indicator to the tx_ack tracepoint Record the reason for the transmission of an ACK in the rxrpc_tx_ack tracepoint, and not just in the rxrpc_propose_ack tracepoint. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 13 +++++++++---- net/rxrpc/conn_event.c | 3 ++- net/rxrpc/output.c | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d79623fff746..0cfc8e1baf1f 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -375,6 +375,7 @@ EM(rxrpc_propose_ack_processing_op, "ProcOp ") \ EM(rxrpc_propose_ack_respond_to_ack, "Rsp2Ack") \ EM(rxrpc_propose_ack_respond_to_ping, "Rsp2Png") \ + EM(rxrpc_propose_ack_retransmit, "Retrans") \ EM(rxrpc_propose_ack_retry_tx, "RetryTx") \ EM(rxrpc_propose_ack_rotate_rx, "RxAck ") \ EM(rxrpc_propose_ack_rx_idle, "RxIdle ") \ @@ -1267,9 +1268,10 @@ TRACE_EVENT(rxrpc_tx_data, TRACE_EVENT(rxrpc_tx_ack, TP_PROTO(unsigned int call, rxrpc_serial_t serial, rxrpc_seq_t ack_first, rxrpc_serial_t ack_serial, - u8 reason, u8 n_acks, u16 rwind), + u8 reason, u8 n_acks, u16 rwind, + enum rxrpc_propose_ack_trace trace), - TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks, rwind), + TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks, rwind, trace), TP_STRUCT__entry( __field(unsigned int, call) @@ -1279,6 +1281,7 @@ TRACE_EVENT(rxrpc_tx_ack, __field(u8, reason) __field(u8, n_acks) __field(u16, rwind) + __field(enum rxrpc_propose_ack_trace, trace) ), TP_fast_assign( @@ -1289,16 +1292,18 @@ TRACE_EVENT(rxrpc_tx_ack, __entry->reason = reason; __entry->n_acks = n_acks; __entry->rwind = rwind; + __entry->trace = trace; ), - TP_printk(" c=%08x ACK %08x %s f=%08x r=%08x n=%u rw=%u", + TP_printk(" c=%08x ACK %08x %s f=%08x r=%08x n=%u rw=%u %s", __entry->call, __entry->serial, __print_symbolic(__entry->reason, rxrpc_ack_names), __entry->ack_first, __entry->ack_serial, __entry->n_acks, - __entry->rwind) + __entry->rwind, + __print_symbolic(__entry->trace, rxrpc_propose_ack_traces)) ); TRACE_EVENT(rxrpc_receive, diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 6b29a294ee07..713e04394ceb 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -177,7 +177,8 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, trace_rxrpc_tx_ack(chan->call_debug_id, serial, ntohl(pkt.ack.firstPacket), ntohl(pkt.ack.serial), - pkt.ack.reason, 0, rxrpc_rx_window_size); + pkt.ack.reason, 0, rxrpc_rx_window_size, + rxrpc_propose_ack_retransmit); break; default: diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 2633f955d1d0..74c3ff55b482 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -267,7 +267,7 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, int nr_kv, size_t len trace_rxrpc_tx_ack(call->debug_id, serial, ntohl(ack->firstPacket), ntohl(ack->serial), ack->reason, ack->nAcks, - ntohl(trailer->rwind)); + ntohl(trailer->rwind), why); rxrpc_inc_stat(call->rxnet, stat_tx_ack_send); From b40ef2b85a7d117dd323b5910e504899e0a3e7dc Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:47:05 +0000 Subject: [PATCH 0262/1386] rxrpc: Manage RTT per-call rather than per-peer Manage the determination of RTT on a per-call (ie. per-RPC op) basis rather than on a per-peer basis, averaging across all calls going to that peer. The problem is that the RTT measurements from the initial packets on a call may be off because the server may do some setting up (such as getting a lock on a file) before accepting the rest of the data in the RPC and, further, the RTT may be affected by server-side file operations, for instance if a large amount of data is being written or read. Note: When handling the FS.StoreData-type RPCs, for example, the server uses the userStatus field in the header of ACK packets as supplementary flow control to aid in managing this. AF_RXRPC does not yet support this, but it should be added. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 2 +- net/rxrpc/ar-internal.h | 39 +++++++++------- net/rxrpc/call_event.c | 18 +++---- net/rxrpc/call_object.c | 2 + net/rxrpc/input.c | 10 ++-- net/rxrpc/output.c | 14 +++--- net/rxrpc/peer_object.c | 9 +--- net/rxrpc/proc.c | 6 +-- net/rxrpc/rtt.c | 91 ++++++++++++++++++------------------ net/rxrpc/sendmsg.c | 2 +- 10 files changed, 97 insertions(+), 96 deletions(-) diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 0cfc8e1baf1f..71df5c48a413 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1453,7 +1453,7 @@ TRACE_EVENT(rxrpc_rtt_rx, __entry->rtt = rtt; __entry->srtt = srtt; __entry->rto = rto; - __entry->min_rtt = minmax_get(&call->peer->min_rtt) + __entry->min_rtt = minmax_get(&call->min_rtt) ), TP_printk("c=%08x [%d] %s sr=%08x rr=%08x rtt=%u srtt=%u rto=%u min=%u", diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 139575032ae2..a9d732ba6df0 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -366,20 +366,9 @@ struct rxrpc_peer { unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */ unsigned short tx_seg_max; /* Maximum number of transmissable segments */ - /* calculated RTT cache */ -#define RXRPC_RTT_CACHE_SIZE 32 - spinlock_t rtt_input_lock; /* RTT lock for input routine */ - ktime_t rtt_last_req; /* Time of last RTT request */ - unsigned int rtt_count; /* Number of samples we've got */ - unsigned int rtt_taken; /* Number of samples taken (wrapping) */ - struct minmax min_rtt; /* Estimated minimum RTT */ - - u32 srtt_us; /* smoothed round trip time << 3 in usecs */ - u32 mdev_us; /* medium deviation */ - u32 mdev_max_us; /* maximal mdev for the last rtt period */ - u32 rttvar_us; /* smoothed mdev_max */ - u32 rto_us; /* Retransmission timeout in usec */ - u8 backoff; /* Backoff timeout (as shift) */ + /* Calculated RTT cache */ + unsigned int recent_srtt_us; + unsigned int recent_rto_us; u8 cong_ssthresh; /* Congestion slow-start threshold */ }; @@ -765,6 +754,18 @@ struct rxrpc_call { rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */ unsigned short acks_nr_sacks; /* Number of soft acks recorded */ unsigned short acks_nr_snacks; /* Number of soft nacks recorded */ + + /* Calculated RTT cache */ + ktime_t rtt_last_req; /* Time of last RTT request */ + unsigned int rtt_count; /* Number of samples we've got */ + unsigned int rtt_taken; /* Number of samples taken (wrapping) */ + struct minmax min_rtt; /* Estimated minimum RTT */ + u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + u32 mdev_us; /* medium deviation */ + u32 mdev_max_us; /* maximal mdev for the last rtt period */ + u32 rttvar_us; /* smoothed mdev_max */ + u32 rto_us; /* Retransmission timeout in usec */ + u8 backoff; /* Backoff timeout (as shift) */ }; /* @@ -1287,10 +1288,12 @@ static inline int rxrpc_abort_eproto(struct rxrpc_call *call, /* * rtt.c */ -void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, int, - rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); -ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans); -void rxrpc_peer_init_rtt(struct rxrpc_peer *); +void rxrpc_call_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + int rtt_slot, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + ktime_t send_time, ktime_t resp_time); +ktime_t rxrpc_get_rto_backoff(struct rxrpc_call *call, bool retrans); +void rxrpc_call_init_rtt(struct rxrpc_call *call); /* * rxkad.c diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 99d9502564cc..7af275544251 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -44,8 +44,8 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial, trace_rxrpc_propose_ack(call, why, RXRPC_ACK_DELAY, serial); - if (call->peer->srtt_us) - delay = (call->peer->srtt_us >> 3) * NSEC_PER_USEC; + if (call->srtt_us) + delay = (call->srtt_us >> 3) * NSEC_PER_USEC; else delay = ms_to_ktime(READ_ONCE(rxrpc_soft_ack_delay)); ktime_add_ms(delay, call->tx_backoff); @@ -105,7 +105,7 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_ }; struct rxrpc_txqueue *tq = call->tx_queue; ktime_t lowest_xmit_ts = KTIME_MAX; - ktime_t rto = rxrpc_get_rto_backoff(call->peer, false); + ktime_t rto = rxrpc_get_rto_backoff(call, false); bool unacked = false; _enter("{%d,%d}", call->tx_bottom, call->tx_top); @@ -195,7 +195,7 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_ } while ((tq = tq->next)); if (lowest_xmit_ts < KTIME_MAX) { - ktime_t delay = rxrpc_get_rto_backoff(call->peer, req.did_send); + ktime_t delay = rxrpc_get_rto_backoff(call, req.did_send); ktime_t resend_at = ktime_add(lowest_xmit_ts, delay); _debug("delay %llu %lld", delay, ktime_sub(resend_at, req.now)); @@ -216,7 +216,7 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_ */ if (!req.did_send) { ktime_t next_ping = ktime_add_us(call->acks_latest_ts, - call->peer->srtt_us >> 3); + call->srtt_us >> 3); if (ktime_sub(next_ping, req.now) <= 0) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, @@ -366,8 +366,8 @@ static void rxrpc_transmit_some_data(struct rxrpc_call *call, */ static void rxrpc_send_initial_ping(struct rxrpc_call *call) { - if (call->peer->rtt_count < 3 || - ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + if (call->rtt_count < 3 || + ktime_before(ktime_add_ms(call->rtt_last_req, 1000), ktime_get_real())) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_params); @@ -499,10 +499,10 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) rxrpc_propose_ack_rx_idle); if (call->ackr_nr_unacked > 2) { - if (call->peer->rtt_count < 3) + if (call->rtt_count < 3) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_rtt); - else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + else if (ktime_before(ktime_add_ms(call->rtt_last_req, 1000), ktime_get_real())) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_old_rtt); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 75cd0b06e14c..fb4ee0d2e9e1 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -176,6 +176,8 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, call->cong_cwnd = RXRPC_MIN_CWND; call->cong_ssthresh = RXRPC_TX_MAX_WINDOW; + rxrpc_call_init_rtt(call); + call->rxnet = rxnet; call->rtt_avail = RXRPC_CALL_RTT_AVAIL_MASK; atomic_inc(&rxnet->nr_calls); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 036cf440b63b..9f308bd512e9 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -71,11 +71,11 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, /* We analyse the number of packets that get ACK'd per RTT * period and increase the window if we managed to fill it. */ - if (call->peer->rtt_count == 0) + if (call->rtt_count == 0) goto out; if (ktime_before(call->acks_latest_ts, ktime_add_us(call->cong_tstamp, - call->peer->srtt_us >> 3))) + call->srtt_us >> 3))) goto out_no_clear_ca; summary->change = rxrpc_cong_rtt_window_end; call->cong_tstamp = call->acks_latest_ts; @@ -179,7 +179,7 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) if (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_REPLY) return; - rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8)); + rtt = ns_to_ktime(call->srtt_us * (NSEC_PER_USEC / 8)); now = ktime_get_real(); if (!ktime_before(ktime_add(call->tx_last_sent, rtt), now)) return; @@ -200,7 +200,7 @@ static void rxrpc_add_data_rtt_sample(struct rxrpc_call *call, struct rxrpc_txqueue *tq, int ix) { - rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_data_ack, -1, + rxrpc_call_add_rtt(call, rxrpc_rtt_rx_data_ack, -1, summary->acked_serial, summary->ack_serial, ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]), call->acks_latest_ts); @@ -725,7 +725,7 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); smp_mb(); /* Read data before setting avail bit */ set_bit(i, &call->rtt_avail); - rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial, + rxrpc_call_add_rtt(call, type, i, acked_serial, ack_serial, sent_at, resp_time); matched = true; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 74c3ff55b482..ecaf3becee40 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -234,7 +234,7 @@ static int rxrpc_fill_out_ack(struct rxrpc_call *call, int nr_kv, u8 ack_reason, if (ack_reason == RXRPC_ACK_PING) rxrpc_begin_rtt_probe(call, *_ack_serial, now, rxrpc_rtt_tx_ping); if (whdr->flags & RXRPC_REQUEST_ACK) - call->peer->rtt_last_req = now; + call->rtt_last_req = now; rxrpc_set_keepalive(call, now); return nr_kv; } @@ -473,9 +473,9 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, why = rxrpc_reqack_slow_start; else if (call->tx_winsize <= 2) why = rxrpc_reqack_small_txwin; - else if (call->peer->rtt_count < 3 && txb->seq & 1) + else if (call->rtt_count < 3) why = rxrpc_reqack_more_rtt; - else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real())) + else if (ktime_before(ktime_add_ms(call->rtt_last_req, 1000), ktime_get_real())) why = rxrpc_reqack_old_rtt; else if (!last && !after(READ_ONCE(call->send_top), txb->seq)) why = rxrpc_reqack_app_stall; @@ -487,7 +487,7 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, if (why != rxrpc_reqack_no_srv_last) { flags |= RXRPC_REQUEST_ACK; trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, -1, serial); - call->peer->rtt_last_req = req->now; + call->rtt_last_req = req->now; } dont_set_request_ack: @@ -576,8 +576,8 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se } /* Set timeouts */ - if (call->peer->rtt_count > 1) { - ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); + if (call->rtt_count > 1) { + ktime_t delay = rxrpc_get_rto_backoff(call, false); call->ack_lost_at = ktime_add(req->now, delay); trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack); @@ -590,7 +590,7 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx); } if (call->resend_at == KTIME_MAX) { - ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); + ktime_t delay = rxrpc_get_rto_backoff(call, false); call->resend_at = ktime_add(req->now, delay); trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 27b34ed4d76a..e1c63129586b 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -235,12 +235,9 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp, peer->service_conns = RB_ROOT; seqlock_init(&peer->service_conn_lock); spin_lock_init(&peer->lock); - spin_lock_init(&peer->rtt_input_lock); seqcount_init(&peer->mtu_lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); - - rxrpc_peer_init_rtt(peer); - + peer->recent_srtt_us = UINT_MAX; peer->cong_ssthresh = RXRPC_TX_MAX_WINDOW; trace_rxrpc_peer(peer->debug_id, 1, why); } @@ -283,8 +280,6 @@ static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer, peer->max_data = peer->if_mtu - peer->hdrsize; rxrpc_assess_MTU_size(local, peer); - - peer->rtt_last_req = ktime_get_real(); } /* @@ -496,7 +491,7 @@ EXPORT_SYMBOL(rxrpc_kernel_get_call_peer); */ unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *peer) { - return peer->rtt_count > 0 ? peer->srtt_us >> 3 : UINT_MAX; + return READ_ONCE(peer->recent_srtt_us); } EXPORT_SYMBOL(rxrpc_kernel_get_srtt); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 5f974ec13d69..d803562ca0ac 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -296,15 +296,15 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) now = ktime_get_seconds(); seq_printf(seq, - "UDP %-47.47s %-47.47s %3u %4u %5u %6llus %8u %8u\n", + "UDP %-47.47s %-47.47s %3u %4u %5u %6llus %8d %8d\n", lbuff, rbuff, refcount_read(&peer->ref), peer->cong_ssthresh, peer->max_data, now - peer->last_tx_at, - peer->srtt_us >> 3, - peer->rto_us); + READ_ONCE(peer->recent_srtt_us), + READ_ONCE(peer->recent_rto_us)); return 0; } diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c index aff75e168de8..7474f88d7b18 100644 --- a/net/rxrpc/rtt.c +++ b/net/rxrpc/rtt.c @@ -15,14 +15,14 @@ #define RXRPC_TIMEOUT_INIT ((unsigned int)(1 * USEC_PER_SEC)) /* RFC6298 2.1 initial RTO value */ #define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ -static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) +static u32 rxrpc_rto_min_us(struct rxrpc_call *call) { return 200; } -static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) +static u32 __rxrpc_set_rto(const struct rxrpc_call *call) { - return (peer->srtt_us >> 3) + peer->rttvar_us; + return (call->srtt_us >> 3) + call->rttvar_us; } static u32 rxrpc_bound_rto(u32 rto) @@ -40,10 +40,10 @@ static u32 rxrpc_bound_rto(u32 rto) * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */ -static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) +static void rxrpc_rtt_estimator(struct rxrpc_call *call, long sample_rtt_us) { long m = sample_rtt_us; /* RTT */ - u32 srtt = peer->srtt_us; + u32 srtt = call->srtt_us; /* The following amusing code comes from Jacobson's * article in SIGCOMM '88. Note that rtt and mdev @@ -66,7 +66,7 @@ static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) srtt += m; /* rtt = 7/8 rtt + 1/8 new */ if (m < 0) { m = -m; /* m is now abs(error) */ - m -= (peer->mdev_us >> 2); /* similar update on mdev */ + m -= (call->mdev_us >> 2); /* similar update on mdev */ /* This is similar to one of Eifel findings. * Eifel blocks mdev updates when rtt decreases. * This solution is a bit different: we use finer gain @@ -78,31 +78,31 @@ static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) if (m > 0) m >>= 3; } else { - m -= (peer->mdev_us >> 2); /* similar update on mdev */ + m -= (call->mdev_us >> 2); /* similar update on mdev */ } - peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ - if (peer->mdev_us > peer->mdev_max_us) { - peer->mdev_max_us = peer->mdev_us; - if (peer->mdev_max_us > peer->rttvar_us) - peer->rttvar_us = peer->mdev_max_us; + call->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ + if (call->mdev_us > call->mdev_max_us) { + call->mdev_max_us = call->mdev_us; + if (call->mdev_max_us > call->rttvar_us) + call->rttvar_us = call->mdev_max_us; } } else { /* no previous measure. */ srtt = m << 3; /* take the measured time to be rtt */ - peer->mdev_us = m << 1; /* make sure rto = 3*rtt */ - peer->rttvar_us = umax(peer->mdev_us, rxrpc_rto_min_us(peer)); - peer->mdev_max_us = peer->rttvar_us; + call->mdev_us = m << 1; /* make sure rto = 3*rtt */ + call->rttvar_us = umax(call->mdev_us, rxrpc_rto_min_us(call)); + call->mdev_max_us = call->rttvar_us; } - peer->srtt_us = umax(srtt, 1); + call->srtt_us = umax(srtt, 1); } /* * Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ -static void rxrpc_set_rto(struct rxrpc_peer *peer) +static void rxrpc_set_rto(struct rxrpc_call *call) { u32 rto; @@ -113,7 +113,7 @@ static void rxrpc_set_rto(struct rxrpc_peer *peer) * is invisible. Actually, Linux-2.4 also generates erratic * ACKs in some circumstances. */ - rto = __rxrpc_set_rto(peer); + rto = __rxrpc_set_rto(call); /* 2. Fixups made earlier cannot be right. * If we do not estimate RTO correctly without them, @@ -124,73 +124,73 @@ static void rxrpc_set_rto(struct rxrpc_peer *peer) /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo * guarantees that rto is higher. */ - peer->rto_us = rxrpc_bound_rto(rto); + call->rto_us = rxrpc_bound_rto(rto); } -static void rxrpc_update_rtt_min(struct rxrpc_peer *peer, ktime_t resp_time, long rtt_us) +static void rxrpc_update_rtt_min(struct rxrpc_call *call, ktime_t resp_time, long rtt_us) { /* Window size 5mins in approx usec (ipv4.sysctl_tcp_min_rtt_wlen) */ u32 wlen_us = 5ULL * NSEC_PER_SEC / 1024; - minmax_running_min(&peer->min_rtt, wlen_us, resp_time / 1024, + minmax_running_min(&call->min_rtt, wlen_us, resp_time / 1024, (u32)rtt_us ? : jiffies_to_usecs(1)); } -static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, ktime_t resp_time, long rtt_us) +static void rxrpc_ack_update_rtt(struct rxrpc_call *call, ktime_t resp_time, long rtt_us) { if (rtt_us < 0) return; /* Update RACK min RTT [RFC8985 6.1 Step 1]. */ - rxrpc_update_rtt_min(peer, resp_time, rtt_us); + rxrpc_update_rtt_min(call, resp_time, rtt_us); - rxrpc_rtt_estimator(peer, rtt_us); - rxrpc_set_rto(peer); + rxrpc_rtt_estimator(call, rtt_us); + rxrpc_set_rto(call); /* Only reset backoff on valid RTT measurement [RFC6298]. */ - peer->backoff = 0; + call->backoff = 0; } /* * Add RTT information to cache. This is called in softirq mode and has - * exclusive access to the peer RTT data. + * exclusive access to the call RTT data. */ -void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, +void rxrpc_call_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, int rtt_slot, rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, ktime_t send_time, ktime_t resp_time) { - struct rxrpc_peer *peer = call->peer; s64 rtt_us; rtt_us = ktime_to_us(ktime_sub(resp_time, send_time)); if (rtt_us < 0) return; - spin_lock(&peer->rtt_input_lock); - rxrpc_ack_update_rtt(peer, resp_time, rtt_us); - if (peer->rtt_count < 3) - peer->rtt_count++; - peer->rtt_taken++; - spin_unlock(&peer->rtt_input_lock); + rxrpc_ack_update_rtt(call, resp_time, rtt_us); + if (call->rtt_count < 3) + call->rtt_count++; + call->rtt_taken++; + + WRITE_ONCE(call->peer->recent_srtt_us, call->srtt_us / 8); + WRITE_ONCE(call->peer->recent_rto_us, call->rto_us); trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial, - rtt_us, peer->srtt_us, peer->rto_us); + rtt_us, call->srtt_us, call->rto_us); } /* * Get the retransmission timeout to set in nanoseconds, backing it off each * time we retransmit. */ -ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) +ktime_t rxrpc_get_rto_backoff(struct rxrpc_call *call, bool retrans) { u64 timo_us; - u32 backoff = READ_ONCE(peer->backoff); + u32 backoff = READ_ONCE(call->backoff); - timo_us = peer->rto_us; + timo_us = call->rto_us; timo_us <<= backoff; if (retrans && timo_us * 2 <= RXRPC_RTO_MAX) - WRITE_ONCE(peer->backoff, backoff + 1); + WRITE_ONCE(call->backoff, backoff + 1); if (timo_us < 1) timo_us = 1; @@ -198,10 +198,11 @@ ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) return ns_to_ktime(timo_us * NSEC_PER_USEC); } -void rxrpc_peer_init_rtt(struct rxrpc_peer *peer) +void rxrpc_call_init_rtt(struct rxrpc_call *call) { - peer->rto_us = RXRPC_TIMEOUT_INIT; - peer->mdev_us = RXRPC_TIMEOUT_INIT; - peer->backoff = 0; - //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U); + call->rtt_last_req = KTIME_MIN; + call->rto_us = RXRPC_TIMEOUT_INIT; + call->mdev_us = RXRPC_TIMEOUT_INIT; + call->backoff = 0; + //minmax_reset(&call->rtt_min, rxrpc_jiffies32, ~0U); } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index df501a7c92fa..c4c8b718cafa 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -134,7 +134,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, rxrpc_seq_t tx_start, tx_win; signed long rtt, timeout; - rtt = READ_ONCE(call->peer->srtt_us) >> 3; + rtt = READ_ONCE(call->srtt_us) >> 3; rtt = usecs_to_jiffies(rtt) * 2; if (rtt < 2) rtt = 2; From 4ee4c2f82b81c088d1514b04c28c84c15e98ba1a Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:47:06 +0000 Subject: [PATCH 0263/1386] rxrpc: Fix request for an ACK when cwnd is minimum rxrpc_prepare_data_subpacket() sets the REQUEST-ACK flag on the outgoing DATA packet under a number of circumstances, including, theoretically, when the cwnd is at minimum (or less). However, the minimum in this function is hard-coded as 2, but the actual minimum is RXRPC_MIN_CWND (which is currently 4) and so this never occurs. Without this, we will miss the request of some ACKs, potentially leading to a transmission stall until a timeout occurs on one side or the other that leads to an ACK being generated. Fix the function to use RXRPC_MIN_CWND rather than a hard-coded number. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- net/rxrpc/output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index ecaf3becee40..f934551a9b1c 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -469,7 +469,7 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, why = rxrpc_reqack_ack_lost; else if (txb->flags & RXRPC_TXBUF_RESENT) why = rxrpc_reqack_retrans; - else if (call->cong_ca_state == RXRPC_CA_SLOW_START && call->cong_cwnd <= 2) + else if (call->cong_ca_state == RXRPC_CA_SLOW_START && call->cong_cwnd <= RXRPC_MIN_CWND) why = rxrpc_reqack_slow_start; else if (call->tx_winsize <= 2) why = rxrpc_reqack_small_txwin; From 7c482665931b6ce7bc72fa5feae6c35567070296 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:47:07 +0000 Subject: [PATCH 0264/1386] rxrpc: Implement RACK/TLP to deal with transmission stalls [RFC8985] When an rxrpc call is in its transmission phase and is sending a lot of packets, stalls occasionally occur that cause severe performance degradation (eg. increasing the transmission time for a 256MiB payload from 0.7s to 2.5s over a 10G link). rxrpc already implements TCP-style congestion control [RFC5681] and this helps mitigate the effects, but occasionally we're missing a time event that deals with a missing ACK, leading to a stall until the RTO expires. Fix this by implementing RACK/TLP in rxrpc. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 342 ++++++++++++++++++++++++++-- net/rxrpc/Makefile | 1 + net/rxrpc/ar-internal.h | 107 ++++++++- net/rxrpc/call_event.c | 253 ++++++++------------- net/rxrpc/call_object.c | 3 +- net/rxrpc/input.c | 117 ++++++---- net/rxrpc/input_rack.c | 418 +++++++++++++++++++++++++++++++++++ net/rxrpc/io_thread.c | 1 + net/rxrpc/output.c | 41 +++- 9 files changed, 1044 insertions(+), 239 deletions(-) create mode 100644 net/rxrpc/input_rack.c diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 71df5c48a413..2f119d18a061 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -305,7 +305,9 @@ #define rxrpc_txdata_traces \ EM(rxrpc_txdata_inject_loss, " *INJ-LOSS*") \ EM(rxrpc_txdata_new_data, " ") \ - E_(rxrpc_txdata_retransmit, " *RETRANS*") + EM(rxrpc_txdata_retransmit, " *RETRANS*") \ + EM(rxrpc_txdata_tlp_new_data, " *TLP-NEW*") \ + E_(rxrpc_txdata_tlp_retransmit, " *TLP-RETRANS*") #define rxrpc_receive_traces \ EM(rxrpc_receive_end, "END") \ @@ -353,11 +355,12 @@ EM(rxrpc_timer_trace_hard, "HardLimit") \ EM(rxrpc_timer_trace_idle, "IdleLimit") \ EM(rxrpc_timer_trace_keepalive, "KeepAlive") \ - EM(rxrpc_timer_trace_lost_ack, "LostAck ") \ EM(rxrpc_timer_trace_ping, "DelayPing") \ - EM(rxrpc_timer_trace_resend, "Resend ") \ - EM(rxrpc_timer_trace_resend_reset, "ResendRst") \ - E_(rxrpc_timer_trace_resend_tx, "ResendTx ") + EM(rxrpc_timer_trace_rack_off, "RACK-OFF ") \ + EM(rxrpc_timer_trace_rack_zwp, "RACK-ZWP ") \ + EM(rxrpc_timer_trace_rack_reo, "RACK-Reo ") \ + EM(rxrpc_timer_trace_rack_tlp_pto, "TLP-PTO ") \ + E_(rxrpc_timer_trace_rack_rto, "RTO ") #define rxrpc_propose_ack_traces \ EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \ @@ -478,9 +481,9 @@ EM(rxrpc_txbuf_put_rotated, "PUT ROTATED") \ EM(rxrpc_txbuf_put_send_aborted, "PUT SEND-X ") \ EM(rxrpc_txbuf_put_trans, "PUT TRANS ") \ + EM(rxrpc_txbuf_see_lost, "SEE LOST ") \ EM(rxrpc_txbuf_see_out_of_step, "OUT-OF-STEP") \ - EM(rxrpc_txbuf_see_send_more, "SEE SEND+ ") \ - E_(rxrpc_txbuf_see_unacked, "SEE UNACKED") + E_(rxrpc_txbuf_see_send_more, "SEE SEND+ ") #define rxrpc_tq_traces \ EM(rxrpc_tq_alloc, "ALLOC") \ @@ -505,6 +508,24 @@ EM(rxrpc_rotate_trace_sack, "soft-ack") \ E_(rxrpc_rotate_trace_snak, "soft-nack") +#define rxrpc_rack_timer_modes \ + EM(RXRPC_CALL_RACKTIMER_OFF, "---") \ + EM(RXRPC_CALL_RACKTIMER_RACK_REORDER, "REO") \ + EM(RXRPC_CALL_RACKTIMER_TLP_PTO, "TLP") \ + E_(RXRPC_CALL_RACKTIMER_RTO, "RTO") + +#define rxrpc_tlp_probe_traces \ + EM(rxrpc_tlp_probe_trace_busy, "busy") \ + EM(rxrpc_tlp_probe_trace_transmit_new, "transmit-new") \ + E_(rxrpc_tlp_probe_trace_retransmit, "retransmit") + +#define rxrpc_tlp_ack_traces \ + EM(rxrpc_tlp_ack_trace_acked, "acked") \ + EM(rxrpc_tlp_ack_trace_dup_acked, "dup-acked") \ + EM(rxrpc_tlp_ack_trace_hard_beyond, "hard-beyond") \ + EM(rxrpc_tlp_ack_trace_incomplete, "incomplete") \ + E_(rxrpc_tlp_ack_trace_new_data, "new-data") + /* * Generate enums for tracing information. */ @@ -537,6 +558,8 @@ enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte); enum rxrpc_sack_trace { rxrpc_sack_traces } __mode(byte); enum rxrpc_skb_trace { rxrpc_skb_traces } __mode(byte); enum rxrpc_timer_trace { rxrpc_timer_traces } __mode(byte); +enum rxrpc_tlp_ack_trace { rxrpc_tlp_ack_traces } __mode(byte); +enum rxrpc_tlp_probe_trace { rxrpc_tlp_probe_traces } __mode(byte); enum rxrpc_tq_trace { rxrpc_tq_traces } __mode(byte); enum rxrpc_tx_point { rxrpc_tx_points } __mode(byte); enum rxrpc_txbuf_trace { rxrpc_txbuf_traces } __mode(byte); @@ -567,6 +590,7 @@ rxrpc_conn_traces; rxrpc_local_traces; rxrpc_pmtud_reduce_traces; rxrpc_propose_ack_traces; +rxrpc_rack_timer_modes; rxrpc_receive_traces; rxrpc_recvmsg_traces; rxrpc_req_ack_traces; @@ -576,6 +600,8 @@ rxrpc_rtt_tx_traces; rxrpc_sack_traces; rxrpc_skb_traces; rxrpc_timer_traces; +rxrpc_tlp_ack_traces; +rxrpc_tlp_probe_traces; rxrpc_tq_traces; rxrpc_tx_points; rxrpc_txbuf_traces; @@ -618,6 +644,20 @@ TRACE_EVENT(rxrpc_local, __entry->usage) ); +TRACE_EVENT(rxrpc_iothread_rx, + TP_PROTO(struct rxrpc_local *local, unsigned int nr_rx), + TP_ARGS(local, nr_rx), + TP_STRUCT__entry( + __field(unsigned int, local) + __field(unsigned int, nr_rx) + ), + TP_fast_assign( + __entry->local = local->debug_id; + __entry->nr_rx = nr_rx; + ), + TP_printk("L=%08x nrx=%u", __entry->local, __entry->nr_rx) + ); + TRACE_EVENT(rxrpc_peer, TP_PROTO(unsigned int peer_debug_id, int ref, enum rxrpc_peer_trace why), @@ -1684,16 +1724,15 @@ TRACE_EVENT(rxrpc_drop_ack, TRACE_EVENT(rxrpc_retransmit, TP_PROTO(struct rxrpc_call *call, struct rxrpc_send_data_req *req, - struct rxrpc_txbuf *txb, ktime_t expiry), + struct rxrpc_txbuf *txb), - TP_ARGS(call, req, txb, expiry), + TP_ARGS(call, req, txb), TP_STRUCT__entry( __field(unsigned int, call) __field(unsigned int, qbase) __field(rxrpc_seq_t, seq) __field(rxrpc_serial_t, serial) - __field(ktime_t, expiry) ), TP_fast_assign( @@ -1701,15 +1740,13 @@ TRACE_EVENT(rxrpc_retransmit, __entry->qbase = req->tq->qbase; __entry->seq = req->seq; __entry->serial = txb->serial; - __entry->expiry = expiry; ), - TP_printk("c=%08x tq=%x q=%x r=%x xp=%lld", + TP_printk("c=%08x tq=%x q=%x r=%x", __entry->call, __entry->qbase, __entry->seq, - __entry->serial, - ktime_to_us(__entry->expiry)) + __entry->serial) ); TRACE_EVENT(rxrpc_congest, @@ -1767,9 +1804,9 @@ TRACE_EVENT(rxrpc_congest, ); TRACE_EVENT(rxrpc_reset_cwnd, - TP_PROTO(struct rxrpc_call *call, ktime_t now), + TP_PROTO(struct rxrpc_call *call, ktime_t since_last_tx, ktime_t rtt), - TP_ARGS(call, now), + TP_ARGS(call, since_last_tx, rtt), TP_STRUCT__entry( __field(unsigned int, call) @@ -1779,6 +1816,7 @@ TRACE_EVENT(rxrpc_reset_cwnd, __field(rxrpc_seq_t, hard_ack) __field(rxrpc_seq_t, prepared) __field(ktime_t, since_last_tx) + __field(ktime_t, rtt) __field(bool, has_data) ), @@ -1789,18 +1827,20 @@ TRACE_EVENT(rxrpc_reset_cwnd, __entry->extra = call->cong_extra; __entry->hard_ack = call->acks_hard_ack; __entry->prepared = call->send_top - call->tx_bottom; - __entry->since_last_tx = ktime_sub(now, call->tx_last_sent); + __entry->since_last_tx = since_last_tx; + __entry->rtt = rtt; __entry->has_data = call->tx_bottom != call->tx_top; ), - TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu d=%u", + TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu/%llu d=%u", __entry->call, __entry->hard_ack, __print_symbolic(__entry->ca_state, rxrpc_ca_states), __entry->cwnd, __entry->extra, __entry->prepared, - ktime_to_ns(__entry->since_last_tx), + ktime_to_us(__entry->since_last_tx), + ktime_to_us(__entry->rtt), __entry->has_data) ); @@ -1925,6 +1965,32 @@ TRACE_EVENT(rxrpc_resend, __entry->transmitted) ); +TRACE_EVENT(rxrpc_resend_lost, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq, unsigned long lost), + + TP_ARGS(call, tq, lost), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_seq_t, qbase) + __field(u8, nr_rep) + __field(unsigned long, lost) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->qbase = tq->qbase; + __entry->nr_rep = tq->nr_reported_acks; + __entry->lost = lost; + ), + + TP_printk("c=%08x tq=%x lost=%016lx nr=%u", + __entry->call, + __entry->qbase, + __entry->lost, + __entry->nr_rep) + ); + TRACE_EVENT(rxrpc_rotate, TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq, struct rxrpc_ack_summary *summary, rxrpc_seq_t seq, @@ -2363,6 +2429,244 @@ TRACE_EVENT(rxrpc_pmtud_reduce, __entry->serial, __entry->max_data) ); +TRACE_EVENT(rxrpc_rack, + TP_PROTO(struct rxrpc_call *call, ktime_t timo), + + TP_ARGS(call, timo), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_serial_t, ack_serial) + __field(rxrpc_seq_t, seq) + __field(enum rxrpc_rack_timer_mode, mode) + __field(unsigned short, nr_sent) + __field(unsigned short, nr_lost) + __field(unsigned short, nr_resent) + __field(unsigned short, nr_sacked) + __field(ktime_t, timo) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->ack_serial = call->rx_serial; + __entry->seq = call->rack_end_seq; + __entry->mode = call->rack_timer_mode; + __entry->nr_sent = call->tx_nr_sent; + __entry->nr_lost = call->tx_nr_lost; + __entry->nr_resent = call->tx_nr_resent; + __entry->nr_sacked = call->acks_nr_sacks; + __entry->timo = timo; + ), + + TP_printk("c=%08x r=%08x q=%08x %s slrs=%u,%u,%u,%u t=%lld", + __entry->call, __entry->ack_serial, __entry->seq, + __print_symbolic(__entry->mode, rxrpc_rack_timer_modes), + __entry->nr_sent, __entry->nr_lost, + __entry->nr_resent, __entry->nr_sacked, + ktime_to_us(__entry->timo)) + ); + +TRACE_EVENT(rxrpc_rack_update, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary), + + TP_ARGS(call, summary), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_serial_t, ack_serial) + __field(rxrpc_seq_t, seq) + __field(int, xmit_ts) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->ack_serial = call->rx_serial; + __entry->seq = call->rack_end_seq; + __entry->xmit_ts = ktime_sub(call->acks_latest_ts, call->rack_xmit_ts); + ), + + TP_printk("c=%08x r=%08x q=%08x xt=%lld", + __entry->call, __entry->ack_serial, __entry->seq, + ktime_to_us(__entry->xmit_ts)) + ); + +TRACE_EVENT(rxrpc_rack_scan_loss, + TP_PROTO(struct rxrpc_call *call), + + TP_ARGS(call), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(ktime_t, rack_rtt) + __field(ktime_t, rack_reo_wnd) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->rack_rtt = call->rack_rtt; + __entry->rack_reo_wnd = call->rack_reo_wnd; + ), + + TP_printk("c=%08x rtt=%lld reow=%lld", + __entry->call, ktime_to_us(__entry->rack_rtt), + ktime_to_us(__entry->rack_reo_wnd)) + ); + +TRACE_EVENT(rxrpc_rack_scan_loss_tq, + TP_PROTO(struct rxrpc_call *call, const struct rxrpc_txqueue *tq, + unsigned long nacks), + + TP_ARGS(call, tq, nacks), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_seq_t, qbase) + __field(unsigned long, nacks) + __field(unsigned long, lost) + __field(unsigned long, retrans) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->qbase = tq->qbase; + __entry->nacks = nacks; + __entry->lost = tq->segment_lost; + __entry->retrans = tq->segment_retransmitted; + ), + + TP_printk("c=%08x q=%08x n=%lx l=%lx r=%lx", + __entry->call, __entry->qbase, + __entry->nacks, __entry->lost, __entry->retrans) + ); + +TRACE_EVENT(rxrpc_rack_detect_loss, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, + rxrpc_seq_t seq), + + TP_ARGS(call, summary, seq), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_serial_t, ack_serial) + __field(rxrpc_seq_t, seq) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->ack_serial = call->rx_serial; + __entry->seq = seq; + ), + + TP_printk("c=%08x r=%08x q=%08x", + __entry->call, __entry->ack_serial, __entry->seq) + ); + +TRACE_EVENT(rxrpc_rack_mark_loss_tq, + TP_PROTO(struct rxrpc_call *call, const struct rxrpc_txqueue *tq), + + TP_ARGS(call, tq), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_seq_t, qbase) + __field(rxrpc_seq_t, trans) + __field(unsigned long, acked) + __field(unsigned long, lost) + __field(unsigned long, retrans) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->qbase = tq->qbase; + __entry->trans = call->tx_transmitted; + __entry->acked = tq->segment_acked; + __entry->lost = tq->segment_lost; + __entry->retrans = tq->segment_retransmitted; + ), + + TP_printk("c=%08x tq=%08x txq=%08x a=%lx l=%lx r=%lx", + __entry->call, __entry->qbase, __entry->trans, + __entry->acked, __entry->lost, __entry->retrans) + ); + +TRACE_EVENT(rxrpc_tlp_probe, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_tlp_probe_trace trace), + + TP_ARGS(call, trace), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_serial_t, serial) + __field(rxrpc_seq_t, seq) + __field(enum rxrpc_tlp_probe_trace, trace) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->serial = call->tlp_serial; + __entry->seq = call->tlp_seq; + __entry->trace = trace; + ), + + TP_printk("c=%08x r=%08x pq=%08x %s", + __entry->call, __entry->serial, __entry->seq, + __print_symbolic(__entry->trace, rxrpc_tlp_probe_traces)) + ); + +TRACE_EVENT(rxrpc_tlp_ack, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, + enum rxrpc_tlp_ack_trace trace), + + TP_ARGS(call, summary, trace), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_serial_t, serial) + __field(rxrpc_seq_t, tlp_seq) + __field(rxrpc_seq_t, hard_ack) + __field(enum rxrpc_tlp_ack_trace, trace) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->serial = call->tlp_serial; + __entry->tlp_seq = call->tlp_seq; + __entry->hard_ack = call->acks_hard_ack; + __entry->trace = trace; + ), + + TP_printk("c=%08x r=%08x pq=%08x hq=%08x %s", + __entry->call, __entry->serial, + __entry->tlp_seq, __entry->hard_ack, + __print_symbolic(__entry->trace, rxrpc_tlp_ack_traces)) + ); + +TRACE_EVENT(rxrpc_rack_timer, + TP_PROTO(struct rxrpc_call *call, ktime_t delay, bool exp), + + TP_ARGS(call, delay, exp), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(bool, exp) + __field(enum rxrpc_rack_timer_mode, mode) + __field(ktime_t, delay) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->exp = exp; + __entry->mode = call->rack_timer_mode; + __entry->delay = delay; + ), + + TP_printk("c=%08x %s %s to=%lld", + __entry->call, + __entry->exp ? "Exp" : "Set", + __print_symbolic(__entry->mode, rxrpc_rack_timer_modes), + ktime_to_us(__entry->delay)) + ); + #undef EM #undef E_ diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index ac5caf5a48e1..210b75e3179e 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -16,6 +16,7 @@ rxrpc-y := \ conn_object.o \ conn_service.o \ input.o \ + input_rack.o \ insecure.o \ io_thread.o \ key.o \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a9d732ba6df0..0c0a3c89dba3 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -621,6 +621,18 @@ enum rxrpc_ca_state { NR__RXRPC_CA_STATES } __mode(byte); +/* + * Current purpose of call RACK timer. According to the RACK-TLP protocol + * [RFC8985], the transmission timer (call->rack_timo_at) may only be used for + * one of these at once. + */ +enum rxrpc_rack_timer_mode { + RXRPC_CALL_RACKTIMER_OFF, /* Timer not running */ + RXRPC_CALL_RACKTIMER_RACK_REORDER, /* RACK reordering timer */ + RXRPC_CALL_RACKTIMER_TLP_PTO, /* TLP timeout */ + RXRPC_CALL_RACKTIMER_RTO, /* Retransmission timeout */ +} __mode(byte); + /* * RxRPC call definition * - matched by { connection, call_id } @@ -638,8 +650,7 @@ struct rxrpc_call { struct mutex user_mutex; /* User access mutex */ struct sockaddr_rxrpc dest_srx; /* Destination address */ ktime_t delay_ack_at; /* When DELAY ACK needs to happen */ - ktime_t ack_lost_at; /* When ACK is figured as lost */ - ktime_t resend_at; /* When next resend needs to happen */ + ktime_t rack_timo_at; /* When ACK is figured as lost */ ktime_t ping_at; /* When next to send a ping */ ktime_t keepalive_at; /* When next to send a keepalive ping */ ktime_t expect_rx_by; /* When we expect to get a packet by */ @@ -695,8 +706,12 @@ struct rxrpc_call { rxrpc_seq_t tx_bottom; /* First packet in buffer */ rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */ rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ + rxrpc_serial_t tx_last_serial; /* Serial of last DATA transmitted */ u16 tx_backoff; /* Delay to insert due to Tx failure (ms) */ - u8 tx_winsize; /* Maximum size of Tx window */ + u16 tx_nr_sent; /* Number of packets sent, but unacked */ + u16 tx_nr_lost; /* Number of packets marked lost */ + u16 tx_nr_resent; /* Number of packets resent, but unacked */ + u16 tx_winsize; /* Maximum size of Tx window */ #define RXRPC_TX_MAX_WINDOW 128 u8 tx_jumbo_max; /* Maximum subpkts peer will accept */ ktime_t tx_last_sent; /* Last time a transmission occurred */ @@ -725,6 +740,25 @@ struct rxrpc_call { u16 cong_cumul_acks; /* Cumulative ACK count */ ktime_t cong_tstamp; /* Last time cwnd was changed */ + /* RACK-TLP [RFC8985] state. */ + ktime_t rack_xmit_ts; /* Latest transmission timestamp */ + ktime_t rack_rtt; /* RTT of most recently ACK'd segment */ + ktime_t rack_rtt_ts; /* Timestamp of rack_rtt */ + ktime_t rack_reo_wnd; /* Reordering window */ + unsigned int rack_reo_wnd_mult; /* Multiplier applied to rack_reo_wnd */ + int rack_reo_wnd_persist; /* Num loss recoveries before reset reo_wnd */ + rxrpc_seq_t rack_fack; /* Highest sequence so far ACK'd */ + rxrpc_seq_t rack_end_seq; /* Highest sequence seen */ + rxrpc_seq_t rack_dsack_round; /* DSACK opt recv'd in latest roundtrip */ + bool rack_dsack_round_none; /* T if dsack_round is "None" */ + bool rack_reordering_seen; /* T if detected reordering event */ + enum rxrpc_rack_timer_mode rack_timer_mode; /* Current mode of RACK timer */ + bool tlp_is_retrans; /* T if unacked TLP retransmission */ + rxrpc_serial_t tlp_serial; /* Serial of TLP probe (or 0 if none in progress) */ + rxrpc_seq_t tlp_seq; /* Sequence of TLP probe */ + unsigned int tlp_rtt_taken; /* Last time RTT taken */ + ktime_t tlp_max_ack_delay; /* Sender budget for max delayed ACK interval */ + /* Receive-phase ACK management (ACKs we send). */ u8 ackr_reason; /* reason to ACK */ u16 ackr_sack_base; /* Starting slot in SACK table ring */ @@ -783,6 +817,9 @@ struct rxrpc_ack_summary { bool retrans_timeo:1; /* T if reTx due to timeout happened */ bool need_retransmit:1; /* T if we need transmission */ bool rtt_sample_avail:1; /* T if RTT sample available */ + bool in_fast_or_rto_recovery:1; + bool exiting_fast_or_rto_recovery:1; + bool tlp_probe_acked:1; /* T if the TLP probe seq was acked */ u8 /*enum rxrpc_congest_change*/ change; }; @@ -864,6 +901,7 @@ struct rxrpc_txqueue { unsigned long segment_lost; /* Bit-per-buf: Set if declared lost */ unsigned long segment_retransmitted; /* Bit-per-buf: Set if retransmitted */ unsigned long rtt_samples; /* Bit-per-buf: Set if available for RTT */ + unsigned long ever_retransmitted; /* Bit-per-buf: Set if ever retransmitted */ /* The arrays we want to pack into as few cache lines as possible. */ struct { @@ -883,7 +921,9 @@ struct rxrpc_send_data_req { struct rxrpc_txqueue *tq; /* Tx queue segment holding first DATA */ rxrpc_seq_t seq; /* Sequence of first data */ int n; /* Number of DATA packets to glue into jumbo */ + bool retrans; /* T if this is a retransmission */ bool did_send; /* T if did actually send */ + bool tlp_probe; /* T if this is a TLP probe */ int /* enum rxrpc_txdata_trace */ trace; }; @@ -943,8 +983,9 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial, enum rxrpc_propose_ack_trace why); void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t, enum rxrpc_propose_ack_trace); -void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_response); - +void rxrpc_resend_tlp(struct rxrpc_call *call); +void rxrpc_transmit_some_data(struct rxrpc_call *call, unsigned int limit, + enum rxrpc_txdata_trace trace); bool rxrpc_input_call_event(struct rxrpc_call *call); /* @@ -1123,6 +1164,32 @@ void rxrpc_congestion_degrade(struct rxrpc_call *); void rxrpc_input_call_packet(struct rxrpc_call *, struct sk_buff *); void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *); +/* + * input_rack.c + */ +void rxrpc_input_rack_one(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + unsigned int ix); +void rxrpc_input_rack(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + unsigned long new_acks); +void rxrpc_rack_detect_loss_and_arm_timer(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary); +ktime_t rxrpc_tlp_calc_pto(struct rxrpc_call *call, ktime_t now); +void rxrpc_tlp_send_probe(struct rxrpc_call *call); +void rxrpc_tlp_process_ack(struct rxrpc_call *call, struct rxrpc_ack_summary *summary); +void rxrpc_rack_timer_expired(struct rxrpc_call *call, ktime_t overran_by); + +/* Initialise TLP state [RFC8958 7.1]. */ +static inline void rxrpc_tlp_init(struct rxrpc_call *call) +{ + call->tlp_serial = 0; + call->tlp_seq = call->acks_hard_ack; + call->tlp_is_retrans = false; +} + /* * io_thread.c */ @@ -1402,6 +1469,11 @@ static inline u32 latest(u32 seq1, u32 seq2) return after(seq1, seq2) ? seq1 : seq2; } +static inline bool rxrpc_seq_in_txq(const struct rxrpc_txqueue *tq, rxrpc_seq_t seq) +{ + return (seq & (RXRPC_NR_TXQUEUE - 1)) == tq->qbase; +} + static inline void rxrpc_queue_rx_call_packet(struct rxrpc_call *call, struct sk_buff *skb) { rxrpc_get_skb(skb, rxrpc_skb_get_call_rx); @@ -1409,6 +1481,31 @@ static inline void rxrpc_queue_rx_call_packet(struct rxrpc_call *call, struct sk rxrpc_poke_call(call, rxrpc_call_poke_rx_packet); } +/* + * Calculate how much space there is for transmitting more DATA packets. + */ +static inline unsigned int rxrpc_tx_window_space(const struct rxrpc_call *call) +{ + int winsize = umin(call->tx_winsize, call->cong_cwnd + call->cong_extra); + int transmitted = call->tx_top - call->tx_bottom; + + return max(winsize - transmitted, 0); +} + +static inline unsigned int rxrpc_left_out(const struct rxrpc_call *call) +{ + return call->acks_nr_sacks + call->tx_nr_lost; +} + +/* + * Calculate the number of transmitted DATA packets assumed to be in flight + * [approx RFC6675]. + */ +static inline unsigned int rxrpc_tx_in_flight(const struct rxrpc_call *call) +{ + return call->tx_nr_sent - rxrpc_left_out(call) + call->tx_nr_resent; +} + /* * debug tracing */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 7af275544251..8e477f7f8850 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -54,35 +54,21 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial, trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_delayed_ack); } -/* - * Handle congestion being detected by the retransmit timeout. - */ -static void rxrpc_congestion_timeout(struct rxrpc_call *call) -{ - set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags); -} - /* * Retransmit one or more packets. */ static bool rxrpc_retransmit_data(struct rxrpc_call *call, - struct rxrpc_send_data_req *req, - ktime_t rto, bool skip_too_young) + struct rxrpc_send_data_req *req) { struct rxrpc_txqueue *tq = req->tq; unsigned int ix = req->seq & RXRPC_TXQ_MASK; struct rxrpc_txbuf *txb = tq->bufs[ix]; - ktime_t xmit_ts, resend_at; _enter("%x,%x,%x,%x", tq->qbase, req->seq, ix, txb->debug_id); - xmit_ts = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]); - resend_at = ktime_add(xmit_ts, rto); - trace_rxrpc_retransmit(call, req, txb, ktime_sub(resend_at, req->now)); - if (skip_too_young && ktime_after(resend_at, req->now)) - return false; + req->retrans = true; + trace_rxrpc_retransmit(call, req, txb); - __set_bit(ix, &tq->segment_retransmitted); txb->flags |= RXRPC_TXBUF_RESENT; rxrpc_send_data_packet(call, req); rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); @@ -97,135 +83,78 @@ static bool rxrpc_retransmit_data(struct rxrpc_call *call, /* * Perform retransmission of NAK'd and unack'd packets. */ -void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_response) +static void rxrpc_resend(struct rxrpc_call *call) { struct rxrpc_send_data_req req = { .now = ktime_get_real(), .trace = rxrpc_txdata_retransmit, }; - struct rxrpc_txqueue *tq = call->tx_queue; - ktime_t lowest_xmit_ts = KTIME_MAX; - ktime_t rto = rxrpc_get_rto_backoff(call, false); - bool unacked = false; + struct rxrpc_txqueue *tq; _enter("{%d,%d}", call->tx_bottom, call->tx_top); - if (call->tx_bottom == call->tx_top) { - call->resend_at = KTIME_MAX; - trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend); - return; - } + trace_rxrpc_resend(call, call->acks_highest_serial); - trace_rxrpc_resend(call, ack_serial); - - /* Scan the transmission queue, looking for explicitly NAK'd packets. */ - do { - unsigned long naks = ~tq->segment_acked; - rxrpc_seq_t tq_top = tq->qbase + RXRPC_NR_TXQUEUE - 1; + /* Scan the transmission queue, looking for lost packets. */ + for (tq = call->tx_queue; tq; tq = tq->next) { + unsigned long lost = tq->segment_lost; if (after(tq->qbase, call->tx_transmitted)) break; - if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE) - naks &= (1UL << tq->nr_reported_acks) - 1; - _debug("retr %16lx %u c=%08x [%x]", tq->segment_acked, tq->nr_reported_acks, call->debug_id, tq->qbase); - _debug("nack %16lx", naks); + _debug("lost %16lx", lost); - while (naks) { - unsigned int ix = __ffs(naks); + trace_rxrpc_resend_lost(call, tq, lost); + while (lost) { + unsigned int ix = __ffs(lost); struct rxrpc_txbuf *txb = tq->bufs[ix]; - __clear_bit(ix, &naks); - if (after(txb->serial, call->acks_highest_serial)) - continue; /* Ack point not yet reached */ - - rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked); + __clear_bit(ix, &lost); + rxrpc_see_txbuf(txb, rxrpc_txbuf_see_lost); req.tq = tq; req.seq = tq->qbase + ix; req.n = 1; - rxrpc_retransmit_data(call, &req, rto, false); + rxrpc_retransmit_data(call, &req); } - - /* Anything after the soft-ACK table up to and including - * ack.previousPacket will get ACK'd or NACK'd in due course, - * so don't worry about those here. We do, however, need to - * consider retransmitting anything beyond that point. - */ - if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE && - after(tq_top, call->acks_prev_seq)) { - rxrpc_seq_t start = latest(call->acks_prev_seq, - tq->qbase + tq->nr_reported_acks); - rxrpc_seq_t stop = earliest(tq_top, call->tx_transmitted); - - _debug("unrep %x-%x", start, stop); - for (rxrpc_seq_t seq = start; before_eq(seq, stop); seq++) { - rxrpc_serial_t serial = tq->segment_serial[seq & RXRPC_TXQ_MASK]; - - if (ping_response && - before(serial, call->acks_highest_serial)) - break; /* Wasn't accounted for by a more recent ping. */ - req.tq = tq; - req.seq = seq; - req.n = 1; - if (rxrpc_retransmit_data(call, &req, rto, true)) - unacked = true; - } - } - - /* Work out the next retransmission timeout. */ - if (ktime_before(tq->xmit_ts_base, lowest_xmit_ts)) { - unsigned int lowest_us = UINT_MAX; - - for (int i = 0; i < RXRPC_NR_TXQUEUE; i++) - if (!test_bit(i, &tq->segment_acked) && - tq->segment_xmit_ts[i] < lowest_us) - lowest_us = tq->segment_xmit_ts[i]; - _debug("lowest[%x] %llx %u", tq->qbase, tq->xmit_ts_base, lowest_us); - - if (lowest_us != UINT_MAX) { - ktime_t lowest_ns = ktime_add_us(tq->xmit_ts_base, lowest_us); - - if (ktime_before(lowest_ns, lowest_xmit_ts)) - lowest_xmit_ts = lowest_ns; - } - } - } while ((tq = tq->next)); - - if (lowest_xmit_ts < KTIME_MAX) { - ktime_t delay = rxrpc_get_rto_backoff(call, req.did_send); - ktime_t resend_at = ktime_add(lowest_xmit_ts, delay); - - _debug("delay %llu %lld", delay, ktime_sub(resend_at, req.now)); - call->resend_at = resend_at; - trace_rxrpc_timer_set(call, ktime_sub(resend_at, req.now), - rxrpc_timer_trace_resend_reset); - } else { - call->resend_at = KTIME_MAX; - trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend); - } - - if (unacked) - rxrpc_congestion_timeout(call); - - /* If there was nothing that needed retransmission then it's likely - * that an ACK got lost somewhere. Send a ping to find out instead of - * retransmitting data. - */ - if (!req.did_send) { - ktime_t next_ping = ktime_add_us(call->acks_latest_ts, - call->srtt_us >> 3); - - if (ktime_sub(next_ping, req.now) <= 0) - rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, - rxrpc_propose_ack_ping_for_0_retrans); } + rxrpc_get_rto_backoff(call, req.did_send); _leave(""); } +/* + * Resend the highest-seq DATA packet so far transmitted for RACK-TLP [RFC8985 7.3]. + */ +void rxrpc_resend_tlp(struct rxrpc_call *call) +{ + struct rxrpc_send_data_req req = { + .now = ktime_get_real(), + .seq = call->tx_transmitted, + .n = 1, + .tlp_probe = true, + .trace = rxrpc_txdata_tlp_retransmit, + }; + + /* There's a chance it'll be on the tail segment of the queue. */ + req.tq = READ_ONCE(call->tx_qtail); + if (req.tq && + before(call->tx_transmitted, req.tq->qbase + RXRPC_NR_TXQUEUE)) { + rxrpc_retransmit_data(call, &req); + return; + } + + for (req.tq = call->tx_queue; req.tq; req.tq = req.tq->next) { + if (after_eq(call->tx_transmitted, req.tq->qbase) && + before(call->tx_transmitted, req.tq->qbase + RXRPC_NR_TXQUEUE)) { + rxrpc_retransmit_data(call, &req); + return; + } + } +} + /* * Start transmitting the reply to a service. This cancels the need to ACK the * request if we haven't yet done so. @@ -259,18 +188,10 @@ static void rxrpc_close_tx_phase(struct rxrpc_call *call) } } -static unsigned int rxrpc_tx_window_space(struct rxrpc_call *call) -{ - int winsize = umin(call->tx_winsize, call->cong_cwnd + call->cong_extra); - int in_flight = call->tx_top - call->tx_bottom; - - return max(winsize - in_flight, 0); -} - /* - * Transmit some as-yet untransmitted data. + * Transmit some as-yet untransmitted data, to a maximum of the supplied limit. */ -static void rxrpc_transmit_fresh_data(struct rxrpc_call *call, +static void rxrpc_transmit_fresh_data(struct rxrpc_call *call, unsigned int limit, enum rxrpc_txdata_trace trace) { int space = rxrpc_tx_window_space(call); @@ -335,8 +256,8 @@ static void rxrpc_transmit_fresh_data(struct rxrpc_call *call, } } -static void rxrpc_transmit_some_data(struct rxrpc_call *call, - enum rxrpc_txdata_trace trace) +void rxrpc_transmit_some_data(struct rxrpc_call *call, unsigned int limit, + enum rxrpc_txdata_trace trace) { switch (__rxrpc_call_state(call)) { case RXRPC_CALL_SERVER_ACK_REQUEST: @@ -353,7 +274,7 @@ static void rxrpc_transmit_some_data(struct rxrpc_call *call, rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow); return; } - rxrpc_transmit_fresh_data(call, trace); + rxrpc_transmit_fresh_data(call, limit, trace); break; default: return; @@ -380,7 +301,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) { struct sk_buff *skb; ktime_t now, t; - bool resend = false, did_receive = false, saw_ack = false; + bool did_receive = false, saw_ack = false; s32 abort_code; rxrpc_see_call(call, rxrpc_call_see_input); @@ -398,21 +319,33 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) goto out; } - while ((skb = __skb_dequeue(&call->rx_queue))) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + do { + skb = __skb_dequeue(&call->rx_queue); + if (skb) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - if (__rxrpc_call_is_complete(call) || - skb->mark == RXRPC_SKB_MARK_ERROR) { + if (__rxrpc_call_is_complete(call) || + skb->mark == RXRPC_SKB_MARK_ERROR) { + rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); + goto out; + } + + saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK; + + rxrpc_input_call_packet(call, skb); rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); - goto out; + did_receive = true; } - saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK; + t = ktime_sub(call->rack_timo_at, ktime_get_real()); + if (t <= 0) { + trace_rxrpc_timer_exp(call, t, + rxrpc_timer_trace_rack_off + call->rack_timer_mode); + call->rack_timo_at = KTIME_MAX; + rxrpc_rack_timer_expired(call, t); + } - rxrpc_input_call_packet(call, skb); - rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); - did_receive = true; - } + } while (!skb_queue_empty(&call->rx_queue)); /* If we see our async-event poke, check for timeout trippage. */ now = ktime_get_real(); @@ -445,13 +378,6 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) rxrpc_propose_ack_delayed_ack); } - t = ktime_sub(call->ack_lost_at, now); - if (t <= 0) { - trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_lost_ack); - call->ack_lost_at = KTIME_MAX; - set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events); - } - t = ktime_sub(call->ping_at, now); if (t <= 0) { trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_ping); @@ -460,15 +386,6 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) rxrpc_propose_ack_ping_for_keepalive); } - t = ktime_sub(call->resend_at, now); - if (t <= 0) { - trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_resend); - call->resend_at = KTIME_MAX; - resend = true; - } - - rxrpc_transmit_some_data(call, rxrpc_txdata_new_data); - now = ktime_get_real(); t = ktime_sub(call->keepalive_at, now); if (t <= 0) { @@ -478,21 +395,30 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) rxrpc_propose_ack_ping_for_keepalive); } + if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events)) + rxrpc_send_initial_ping(call); + + rxrpc_transmit_some_data(call, UINT_MAX, rxrpc_txdata_new_data); + if (saw_ack) rxrpc_congestion_degrade(call); - if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events)) - rxrpc_send_initial_ping(call); + if (did_receive && + (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_SEND_REQUEST || + __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SEND_REPLY)) { + t = ktime_sub(call->rack_timo_at, ktime_get_real()); + trace_rxrpc_rack(call, t); + } /* Process events */ if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_lost_ack); - if (resend && + if (call->tx_nr_lost > 0 && __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY && !test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags)) - rxrpc_resend(call, 0, false); + rxrpc_resend(call); if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags)) rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0, @@ -520,8 +446,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) set(call->expect_req_by); set(call->expect_rx_by); set(call->delay_ack_at); - set(call->ack_lost_at); - set(call->resend_at); + set(call->rack_timo_at); set(call->keepalive_at); set(call->ping_at); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index fb4ee0d2e9e1..5a543c3f6fb0 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -160,8 +160,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, call->ackr_window = 1; call->ackr_wtop = 1; call->delay_ack_at = KTIME_MAX; - call->ack_lost_at = KTIME_MAX; - call->resend_at = KTIME_MAX; + call->rack_timo_at = KTIME_MAX; call->ping_at = KTIME_MAX; call->keepalive_at = KTIME_MAX; call->expect_rx_by = KTIME_MAX; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 9f308bd512e9..4974b5accafa 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -27,13 +27,13 @@ static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq, } /* - * Do TCP-style congestion management [RFC 5681]. + * Do TCP-style congestion management [RFC5681]. */ static void rxrpc_congestion_management(struct rxrpc_call *call, struct rxrpc_ack_summary *summary) { summary->change = rxrpc_cong_no_change; - summary->in_flight = (call->tx_top - call->tx_bottom) - call->acks_nr_sacks; + summary->in_flight = rxrpc_tx_in_flight(call); if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) { summary->retrans_timeo = true; @@ -106,9 +106,12 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, call->cong_extra = 0; call->cong_dup_acks = 0; summary->need_retransmit = true; + summary->in_fast_or_rto_recovery = true; goto out; case RXRPC_CA_FAST_RETRANSMIT: + rxrpc_tlp_init(call); + summary->in_fast_or_rto_recovery = true; if (!summary->new_low_snack) { if (summary->nr_new_sacks == 0) call->cong_cwnd += 1; @@ -121,8 +124,10 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, } else { summary->change = rxrpc_cong_progress; call->cong_cwnd = call->cong_ssthresh; - if (call->acks_nr_snacks == 0) + if (call->acks_nr_snacks == 0) { + summary->exiting_fast_or_rto_recovery = true; goto resume_normality; + } } goto out; @@ -171,7 +176,7 @@ send_extra_data: */ void rxrpc_congestion_degrade(struct rxrpc_call *call) { - ktime_t rtt, now; + ktime_t rtt, now, time_since; if (call->cong_ca_state != RXRPC_CA_SLOW_START && call->cong_ca_state != RXRPC_CA_CONGEST_AVOIDANCE) @@ -181,10 +186,11 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) rtt = ns_to_ktime(call->srtt_us * (NSEC_PER_USEC / 8)); now = ktime_get_real(); - if (!ktime_before(ktime_add(call->tx_last_sent, rtt), now)) + time_since = ktime_sub(now, call->tx_last_sent); + if (ktime_before(time_since, rtt)) return; - trace_rxrpc_reset_cwnd(call, now); + trace_rxrpc_reset_cwnd(call, time_since, rtt); rxrpc_inc_stat(call->rxnet, stat_tx_data_cwnd_reset); call->tx_last_sent = now; call->cong_ca_state = RXRPC_CA_SLOW_START; @@ -200,11 +206,11 @@ static void rxrpc_add_data_rtt_sample(struct rxrpc_call *call, struct rxrpc_txqueue *tq, int ix) { + ktime_t xmit_ts = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]); + rxrpc_call_add_rtt(call, rxrpc_rtt_rx_data_ack, -1, summary->acked_serial, summary->ack_serial, - ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]), - call->acks_latest_ts); - summary->rtt_sample_avail = false; + xmit_ts, call->acks_latest_ts); __clear_bit(ix, &tq->rtt_samples); /* Prevent repeat RTT sample */ } @@ -216,7 +222,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, { struct rxrpc_txqueue *tq = call->tx_queue; rxrpc_seq_t seq = call->tx_bottom + 1; - bool rot_last = false; + bool rot_last = false, trace = false; _enter("%x,%x", call->tx_bottom, to); @@ -250,14 +256,16 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, rot_last = true; } - if (summary->rtt_sample_avail && - summary->acked_serial == tq->segment_serial[ix] && + if (summary->acked_serial == tq->segment_serial[ix] && test_bit(ix, &tq->rtt_samples)) rxrpc_add_data_rtt_sample(call, summary, tq, ix); if (ix == tq->nr_reported_acks) { /* Packet directly hard ACK'd. */ tq->nr_reported_acks++; + rxrpc_input_rack_one(call, summary, tq, ix); + if (seq == call->tlp_seq) + summary->tlp_probe_acked = true; summary->nr_new_hacks++; __set_bit(ix, &tq->segment_acked); trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_hack); @@ -268,11 +276,21 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, } else { /* Soft NAK -> hard ACK. */ call->acks_nr_snacks--; + rxrpc_input_rack_one(call, summary, tq, ix); + if (seq == call->tlp_seq) + summary->tlp_probe_acked = true; summary->nr_new_hacks++; __set_bit(ix, &tq->segment_acked); trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_snak); } + call->tx_nr_sent--; + if (__test_and_clear_bit(ix, &tq->segment_lost)) + call->tx_nr_lost--; + if (__test_and_clear_bit(ix, &tq->segment_retransmitted)) + call->tx_nr_resent--; + __clear_bit(ix, &tq->ever_retransmitted); + rxrpc_put_txbuf(tq->bufs[ix], rxrpc_txbuf_put_rotated); tq->bufs[ix] = NULL; @@ -282,7 +300,10 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, rxrpc_txqueue_rotate)); seq++; + trace = true; if (!(seq & RXRPC_TXQ_MASK)) { + trace_rxrpc_rack_update(call, summary); + trace = false; prefetch(tq->next); if (tq != call->tx_qtail) { call->tx_qbase += RXRPC_NR_TXQUEUE; @@ -299,6 +320,9 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, } while (before_eq(seq, to)); + if (trace) + trace_rxrpc_rack_update(call, summary); + if (rot_last) { set_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags); if (tq) { @@ -325,8 +349,10 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, { ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); - call->resend_at = KTIME_MAX; - trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend); + call->rack_timer_mode = RXRPC_CALL_RACKTIMER_OFF; + call->rack_timo_at = KTIME_MAX; + trace_rxrpc_rack_timer(call, 0, false); + trace_rxrpc_timer_can(call, rxrpc_timer_trace_rack_off + call->rack_timer_mode); switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: @@ -842,10 +868,13 @@ static void rxrpc_input_soft_ack_tq(struct rxrpc_call *call, rxrpc_seq_t seq, rxrpc_seq_t *lowest_nak) { - unsigned long old_reported, flipped, new_acks, a_to_n, n_to_a; + unsigned long old_reported = 0, flipped, new_acks = 0; + unsigned long a_to_n, n_to_a = 0; int new, a, n; - old_reported = ~0UL >> (RXRPC_NR_TXQUEUE - tq->nr_reported_acks); + if (tq->nr_reported_acks > 0) + old_reported = ~0UL >> (RXRPC_NR_TXQUEUE - tq->nr_reported_acks); + _enter("{%x,%lx,%d},%lx,%d,%x", tq->qbase, tq->segment_acked, tq->nr_reported_acks, extracted_acks, nr_reported, seq); @@ -898,6 +927,18 @@ static void rxrpc_input_soft_ack_tq(struct rxrpc_call *call, if (before(lowest, *lowest_nak)) *lowest_nak = lowest; } + + if (summary->acked_serial) + rxrpc_input_soft_rtt(call, summary, tq); + + new_acks |= n_to_a; + if (new_acks) + rxrpc_input_rack(call, summary, tq, new_acks); + + if (call->tlp_serial && + rxrpc_seq_in_txq(tq, call->tlp_seq) && + test_bit(call->tlp_seq - tq->qbase, &new_acks)) + summary->tlp_probe_acked = true; } /* @@ -940,8 +981,6 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, _debug("bound %16lx %u", extracted, nr); - if (summary->rtt_sample_avail) - rxrpc_input_soft_rtt(call, summary, tq); rxrpc_input_soft_ack_tq(call, summary, tq, extracted, RXRPC_NR_TXQUEUE, seq - RXRPC_NR_TXQUEUE, &lowest_nak); extracted = ~0UL; @@ -1063,7 +1102,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) /* Discard any out-of-order or duplicate ACKs (outside lock). */ if (!rxrpc_is_ack_valid(call, hard_ack, prev_pkt)) { trace_rxrpc_rx_discard_ack(call, summary.ack_serial, hard_ack, prev_pkt); - goto send_response; + goto send_response; /* Still respond if requested. */ } trailer.maxMTU = 0; @@ -1079,14 +1118,19 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) call->acks_hard_ack = hard_ack; call->acks_prev_seq = prev_pkt; - switch (summary.ack_reason) { - case RXRPC_ACK_PING: - break; - default: - if (summary.acked_serial && - after(summary.acked_serial, call->acks_highest_serial)) - call->acks_highest_serial = summary.acked_serial; - break; + if (summary.acked_serial) { + switch (summary.ack_reason) { + case RXRPC_ACK_PING_RESPONSE: + rxrpc_complete_rtt_probe(call, call->acks_latest_ts, + summary.acked_serial, summary.ack_serial, + rxrpc_rtt_rx_ping_response); + break; + default: + if (after(summary.acked_serial, call->acks_highest_serial)) + call->acks_highest_serial = summary.acked_serial; + summary.rtt_sample_avail = true; + break; + } } /* Parse rwind and mtu sizes if provided. */ @@ -1096,15 +1140,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (hard_ack + 1 == 0) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero); - if (summary.acked_serial) { - if (summary.ack_reason == RXRPC_ACK_PING_RESPONSE) - rxrpc_complete_rtt_probe(call, call->acks_latest_ts, - summary.acked_serial, summary.ack_serial, - rxrpc_rtt_rx_ping_response); - else - summary.rtt_sample_avail = true; - } - /* Ignore ACKs unless we are or have just been transmitting. */ switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: @@ -1141,10 +1176,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ping(call, summary.ack_serial, rxrpc_propose_ack_ping_for_lost_reply); + /* Drive the congestion management algorithm first and then RACK-TLP as + * the latter depends on the state/change in state in the former. + */ rxrpc_congestion_management(call, &summary); - if (summary.need_retransmit) - rxrpc_resend(call, summary.ack_serial, - summary.ack_reason == RXRPC_ACK_PING_RESPONSE); + rxrpc_rack_detect_loss_and_arm_timer(call, &summary); + rxrpc_tlp_process_ack(call, &summary); + if (call->tlp_serial && after_eq(summary.acked_serial, call->tlp_serial)) + call->tlp_serial = 0; send_response: if (summary.ack_reason == RXRPC_ACK_PING) diff --git a/net/rxrpc/input_rack.c b/net/rxrpc/input_rack.c new file mode 100644 index 000000000000..13c371261e0a --- /dev/null +++ b/net/rxrpc/input_rack.c @@ -0,0 +1,418 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RACK-TLP [RFC8958] Implementation + * + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "ar-internal.h" + +static bool rxrpc_rack_sent_after(ktime_t t1, rxrpc_seq_t seq1, + ktime_t t2, rxrpc_seq_t seq2) +{ + if (ktime_after(t1, t2)) + return true; + return t1 == t2 && after(seq1, seq2); +} + +/* + * Mark a packet lost. + */ +static void rxrpc_rack_mark_lost(struct rxrpc_call *call, + struct rxrpc_txqueue *tq, unsigned int ix) +{ + if (__test_and_set_bit(ix, &tq->segment_lost)) { + if (__test_and_clear_bit(ix, &tq->segment_retransmitted)) + call->tx_nr_resent--; + } else { + call->tx_nr_lost++; + } + tq->segment_xmit_ts[ix] = UINT_MAX; +} + +/* + * Get the transmission time of a packet in the Tx queue. + */ +static ktime_t rxrpc_get_xmit_ts(const struct rxrpc_txqueue *tq, unsigned int ix) +{ + if (tq->segment_xmit_ts[ix] == UINT_MAX) + return KTIME_MAX; + return ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]); +} + +/* + * Get a bitmask of nack bits for a queue segment and mask off any that aren't + * yet reported. + */ +static unsigned long rxrpc_tq_nacks(const struct rxrpc_txqueue *tq) +{ + unsigned long nacks = ~tq->segment_acked; + + if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE) + nacks &= (1UL << tq->nr_reported_acks) - 1; + return nacks; +} + +/* + * Update the RACK state for the most recently sent packet that has been + * delivered [RFC8958 6.2 Step 2]. + */ +static void rxrpc_rack_update(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + unsigned int ix) +{ + rxrpc_seq_t seq = tq->qbase + ix; + ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix); + ktime_t rtt = ktime_sub(call->acks_latest_ts, xmit_ts); + + if (__test_and_clear_bit(ix, &tq->segment_lost)) + call->tx_nr_lost--; + + if (test_bit(ix, &tq->segment_retransmitted)) { + /* Use Rx.serial instead of TCP.ACK.ts_option.echo_reply. */ + if (before(call->acks_highest_serial, tq->segment_serial[ix])) + return; + if (rtt < minmax_get(&call->min_rtt)) + return; + } + + /* The RACK algorithm requires the segment ACKs to be traversed in + * order of segment transmission - but the only thing this seems to + * matter for is that RACK.rtt is set to the rtt of the most recently + * transmitted segment. We should be able to achieve the same by only + * setting RACK.rtt if the xmit time is greater. + */ + if (ktime_after(xmit_ts, call->rack_rtt_ts)) { + call->rack_rtt = rtt; + call->rack_rtt_ts = xmit_ts; + } + + if (rxrpc_rack_sent_after(xmit_ts, seq, call->rack_xmit_ts, call->rack_end_seq)) { + call->rack_rtt = rtt; + call->rack_xmit_ts = xmit_ts; + call->rack_end_seq = seq; + } +} + +/* + * Detect data segment reordering [RFC8958 6.2 Step 3]. + */ +static void rxrpc_rack_detect_reordering(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + unsigned int ix) +{ + rxrpc_seq_t seq = tq->qbase + ix; + + /* Track the highest sequence number so far ACK'd. This is not + * necessarily the same as ack.firstPacket + ack.nAcks - 1 as the peer + * could put a NACK in the last SACK slot. + */ + if (after(seq, call->rack_fack)) + call->rack_fack = seq; + else if (before(seq, call->rack_fack) && + test_bit(ix, &tq->segment_retransmitted)) + call->rack_reordering_seen = true; +} + +void rxrpc_input_rack_one(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + unsigned int ix) +{ + rxrpc_rack_update(call, summary, tq, ix); + rxrpc_rack_detect_reordering(call, summary, tq, ix); +} + +void rxrpc_input_rack(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + unsigned long new_acks) +{ + while (new_acks) { + unsigned int ix = __ffs(new_acks); + + __clear_bit(ix, &new_acks); + rxrpc_input_rack_one(call, summary, tq, ix); + } + + trace_rxrpc_rack_update(call, summary); +} + +/* + * Update the reordering window [RFC8958 6.2 Step 4]. Returns the updated + * duration of the reordering window. + * + * Note that the Rx protocol doesn't have a 'DSACK option' per se, but ACKs can + * be given a 'DUPLICATE' reason with the serial number referring to the + * duplicated DATA packet. Rx does not inform as to whether this was a + * reception of the same packet twice or of a retransmission of a packet we + * already received (though this could be determined by the transmitter based + * on the serial number). + */ +static ktime_t rxrpc_rack_update_reo_wnd(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary) +{ + rxrpc_seq_t snd_una = call->acks_lowest_nak; /* Lowest unack'd seq */ + rxrpc_seq_t snd_nxt = call->tx_transmitted + 1; /* Next seq to be sent */ + bool have_dsack_option = summary->ack_reason == RXRPC_ACK_DUPLICATE; + int dup_thresh = 3; + + /* DSACK-based reordering window adaptation */ + if (!call->rack_dsack_round_none && + after_eq(snd_una, call->rack_dsack_round)) + call->rack_dsack_round_none = true; + + /* Grow the reordering window per round that sees DSACK. Reset the + * window after 16 DSACK-free recoveries. + */ + if (call->rack_dsack_round_none && have_dsack_option) { + call->rack_dsack_round_none = false; + call->rack_dsack_round = snd_nxt; + call->rack_reo_wnd_mult++; + call->rack_reo_wnd_persist = 16; + } else if (summary->exiting_fast_or_rto_recovery) { + call->rack_reo_wnd_persist--; + if (call->rack_reo_wnd_persist <= 0) + call->rack_reo_wnd_mult = 1; + } + + if (!call->rack_reordering_seen) { + if (summary->in_fast_or_rto_recovery) + return 0; + if (call->acks_nr_sacks >= dup_thresh) + return 0; + } + + return us_to_ktime(umin(call->rack_reo_wnd_mult * minmax_get(&call->min_rtt) / 4, + call->srtt_us >> 3)); +} + +/* + * Detect losses [RFC8958 6.2 Step 5]. + */ +static ktime_t rxrpc_rack_detect_loss(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary) +{ + struct rxrpc_txqueue *tq; + ktime_t timeout = 0, lost_after, now = ktime_get_real(); + + call->rack_reo_wnd = rxrpc_rack_update_reo_wnd(call, summary); + lost_after = ktime_add(call->rack_rtt, call->rack_reo_wnd); + trace_rxrpc_rack_scan_loss(call); + + for (tq = call->tx_queue; tq; tq = tq->next) { + unsigned long nacks = rxrpc_tq_nacks(tq); + + if (after(tq->qbase, call->tx_transmitted)) + break; + trace_rxrpc_rack_scan_loss_tq(call, tq, nacks); + + /* Skip ones marked lost but not yet retransmitted */ + nacks &= ~tq->segment_lost | tq->segment_retransmitted; + + while (nacks) { + unsigned int ix = __ffs(nacks); + rxrpc_seq_t seq = tq->qbase + ix; + ktime_t remaining; + ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix); + + __clear_bit(ix, &nacks); + + if (rxrpc_rack_sent_after(call->rack_xmit_ts, call->rack_end_seq, + xmit_ts, seq)) { + remaining = ktime_sub(ktime_add(xmit_ts, lost_after), now); + if (remaining <= 0) { + rxrpc_rack_mark_lost(call, tq, ix); + trace_rxrpc_rack_detect_loss(call, summary, seq); + } else { + timeout = max(remaining, timeout); + } + } + } + } + + return timeout; +} + +/* + * Detect losses and set a timer to retry the detection [RFC8958 6.2 Step 5]. + */ +void rxrpc_rack_detect_loss_and_arm_timer(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary) +{ + ktime_t timeout = rxrpc_rack_detect_loss(call, summary); + + if (timeout) { + call->rack_timer_mode = RXRPC_CALL_RACKTIMER_RACK_REORDER; + call->rack_timo_at = ktime_add(ktime_get_real(), timeout); + trace_rxrpc_rack_timer(call, timeout, false); + trace_rxrpc_timer_set(call, timeout, rxrpc_timer_trace_rack_reo); + } +} + +/* + * Handle RACK-TLP RTO expiration [RFC8958 6.3]. + */ +static void rxrpc_rack_mark_losses_on_rto(struct rxrpc_call *call) +{ + struct rxrpc_txqueue *tq; + rxrpc_seq_t snd_una = call->acks_lowest_nak; /* Lowest unack'd seq */ + ktime_t lost_after = ktime_add(call->rack_rtt, call->rack_reo_wnd); + ktime_t deadline = ktime_sub(ktime_get_real(), lost_after); + + for (tq = call->tx_queue; tq; tq = tq->next) { + unsigned long unacked = ~tq->segment_acked; + + trace_rxrpc_rack_mark_loss_tq(call, tq); + while (unacked) { + unsigned int ix = __ffs(unacked); + rxrpc_seq_t seq = tq->qbase + ix; + ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix); + + if (after(seq, call->tx_transmitted)) + return; + __clear_bit(ix, &unacked); + + if (seq == snd_una || + ktime_before(xmit_ts, deadline)) + rxrpc_rack_mark_lost(call, tq, ix); + } + } +} + +/* + * Calculate the TLP loss probe timeout (PTO) [RFC8958 7.2]. + */ +ktime_t rxrpc_tlp_calc_pto(struct rxrpc_call *call, ktime_t now) +{ + unsigned int flight_size = rxrpc_tx_in_flight(call); + ktime_t rto_at = ktime_add(call->tx_last_sent, + rxrpc_get_rto_backoff(call, false)); + ktime_t pto; + + if (call->rtt_count > 0) { + /* Use 2*SRTT as the timeout. */ + pto = ns_to_ktime(call->srtt_us * NSEC_PER_USEC / 4); + if (flight_size) + pto = ktime_add(pto, call->tlp_max_ack_delay); + } else { + pto = NSEC_PER_SEC; + } + + if (ktime_after(ktime_add(now, pto), rto_at)) + pto = ktime_sub(rto_at, now); + return pto; +} + +/* + * Send a TLP loss probe on PTO expiration [RFC8958 7.3]. + */ +void rxrpc_tlp_send_probe(struct rxrpc_call *call) +{ + unsigned int in_flight = rxrpc_tx_in_flight(call); + + if (after_eq(call->acks_hard_ack, call->tx_transmitted)) + return; /* Everything we transmitted has been acked. */ + + /* There must be no other loss probe still in flight and we need to + * have taken a new RTT sample since last probe or the start of + * connection. + */ + if (!call->tlp_serial && + call->tlp_rtt_taken != call->rtt_taken) { + call->tlp_is_retrans = false; + if (after(call->send_top, call->tx_transmitted) && + rxrpc_tx_window_space(call) > 0) { + /* Transmit the lowest-sequence unsent DATA */ + call->tx_last_serial = 0; + rxrpc_transmit_some_data(call, 1, rxrpc_txdata_tlp_new_data); + call->tlp_serial = call->tx_last_serial; + call->tlp_seq = call->tx_transmitted; + trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_transmit_new); + in_flight = rxrpc_tx_in_flight(call); + } else { + /* Retransmit the highest-sequence DATA sent */ + call->tx_last_serial = 0; + rxrpc_resend_tlp(call); + call->tlp_is_retrans = true; + trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_retransmit); + } + } else { + trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_busy); + } + + if (in_flight != 0) { + ktime_t rto = rxrpc_get_rto_backoff(call, false); + + call->rack_timer_mode = RXRPC_CALL_RACKTIMER_RTO; + call->rack_timo_at = ktime_add(ktime_get_real(), rto); + trace_rxrpc_rack_timer(call, rto, false); + trace_rxrpc_timer_set(call, rto, rxrpc_timer_trace_rack_rto); + } +} + +/* + * Detect losses using the ACK of a TLP loss probe [RFC8958 7.4]. + */ +void rxrpc_tlp_process_ack(struct rxrpc_call *call, struct rxrpc_ack_summary *summary) +{ + if (!call->tlp_serial || after(call->tlp_seq, call->acks_hard_ack)) + return; + + if (!call->tlp_is_retrans) { + /* TLP of new data delivered */ + trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_new_data); + call->tlp_serial = 0; + } else if (summary->ack_reason == RXRPC_ACK_DUPLICATE && + summary->acked_serial == call->tlp_serial) { + /* General Case: Detected packet losses using RACK [7.4.1] */ + trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_dup_acked); + call->tlp_serial = 0; + } else if (after(call->acks_hard_ack, call->tlp_seq)) { + /* Repaired the single loss */ + trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_hard_beyond); + call->tlp_serial = 0; + // TODO: Invoke congestion control to react to the loss + // event the probe has repaired + } else if (summary->tlp_probe_acked) { + trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_acked); + /* Special Case: Detected a single loss repaired by the loss + * probe [7.4.2] + */ + call->tlp_serial = 0; + } else { + trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_incomplete); + } +} + +/* + * Handle RACK timer expiration; returns true to request a resend. + */ +void rxrpc_rack_timer_expired(struct rxrpc_call *call, ktime_t overran_by) +{ + struct rxrpc_ack_summary summary = {}; + enum rxrpc_rack_timer_mode mode = call->rack_timer_mode; + + trace_rxrpc_rack_timer(call, overran_by, true); + call->rack_timer_mode = RXRPC_CALL_RACKTIMER_OFF; + + switch (mode) { + case RXRPC_CALL_RACKTIMER_RACK_REORDER: + rxrpc_rack_detect_loss_and_arm_timer(call, &summary); + break; + case RXRPC_CALL_RACKTIMER_TLP_PTO: + rxrpc_tlp_send_probe(call); + break; + case RXRPC_CALL_RACKTIMER_RTO: + // Might need to poke the congestion algo in some way + rxrpc_rack_mark_losses_on_rto(call); + break; + //case RXRPC_CALL_RACKTIMER_ZEROWIN: + default: + pr_warn("Unexpected rack timer %u", call->rack_timer_mode); + } +} diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index fbacf2056f64..2925c7fc82cf 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -470,6 +470,7 @@ int rxrpc_io_thread(void *data) spin_lock_irq(&local->rx_queue.lock); skb_queue_splice_tail_init(&local->rx_queue, &rx_queue); spin_unlock_irq(&local->rx_queue.lock); + trace_rxrpc_iothread_rx(local, skb_queue_len(&rx_queue)); } /* Distribute packets and errors. */ diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f934551a9b1c..6f7a125d6e90 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -542,12 +542,14 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se unsigned int xmit_ts; rxrpc_seq_t seq = req->seq; size_t len = 0; + bool start_tlp = false; trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit); /* Each transmission of a Tx packet needs a new serial number */ serial = rxrpc_get_next_serials(call->conn, req->n); + call->tx_last_serial = serial + req->n - 1; call->tx_last_sent = req->now; xmit_ts = rxrpc_prepare_txqueue(tq, req); prefetch(tq->next); @@ -557,6 +559,18 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK]; _debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq); + + /* Record (re-)transmission for RACK [RFC8985 6.1]. */ + if (__test_and_clear_bit(ix, &tq->segment_lost)) + call->tx_nr_lost--; + if (req->retrans) { + __set_bit(ix, &tq->ever_retransmitted); + __set_bit(ix, &tq->segment_retransmitted); + call->tx_nr_resent++; + } else { + call->tx_nr_sent++; + start_tlp = true; + } tq->segment_xmit_ts[ix] = xmit_ts; tq->segment_serial[ix] = serial; if (i + 1 == req->n) @@ -576,11 +590,24 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se } /* Set timeouts */ - if (call->rtt_count > 1) { - ktime_t delay = rxrpc_get_rto_backoff(call, false); + if (req->tlp_probe) { + /* Sending TLP loss probe [RFC8985 7.3]. */ + call->tlp_serial = serial - 1; + call->tlp_seq = seq - 1; + } else if (start_tlp) { + /* Schedule TLP loss probe [RFC8985 7.2]. */ + ktime_t pto; - call->ack_lost_at = ktime_add(req->now, delay); - trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack); + if (!test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) + /* The first packet may take longer to elicit a response. */ + pto = NSEC_PER_SEC; + else + pto = rxrpc_tlp_calc_pto(call, req->now); + + call->rack_timer_mode = RXRPC_CALL_RACKTIMER_TLP_PTO; + call->rack_timo_at = ktime_add(req->now, pto); + trace_rxrpc_rack_timer(call, pto, false); + trace_rxrpc_timer_set(call, pto, rxrpc_timer_trace_rack_tlp_pto); } if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) { @@ -589,12 +616,6 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se call->expect_rx_by = ktime_add(req->now, delay); trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx); } - if (call->resend_at == KTIME_MAX) { - ktime_t delay = rxrpc_get_rto_backoff(call, false); - - call->resend_at = ktime_add(req->now, delay); - trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend); - } rxrpc_set_keepalive(call, req->now); return len; From 2d20773aec14996b6cc4db92d885028319be683d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Dec 2024 22:38:11 +0000 Subject: [PATCH 0265/1386] mctp: no longer rely on net->dev_index_head[] mctp_dump_addrinfo() is one of the last users of net->dev_index_head[] in the control path. Switch to for_each_netdev_dump() for better scalability. Use C99 for mctp_device_rtnl_msg_handlers[] to prepare future RTNL removal from mctp_dump_addrinfo() (mdev->addrs is not yet RCU protected) Signed-off-by: Eric Dumazet Cc: Matt Johnston Reviewed-by: Kuniyuki Iwashima Acked-by: Jeremy Kerr Link: https://patch.msgid.link/20241206223811.1343076-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/mctp/device.c | 50 ++++++++++++++++++----------------------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/net/mctp/device.c b/net/mctp/device.c index 26ce34b7e88e..8e0724c56723 100644 --- a/net/mctp/device.c +++ b/net/mctp/device.c @@ -20,8 +20,7 @@ #include struct mctp_dump_cb { - int h; - int idx; + unsigned long ifindex; size_t a_idx; }; @@ -115,43 +114,29 @@ static int mctp_dump_addrinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct mctp_dump_cb *mcb = (void *)cb->ctx; struct net *net = sock_net(skb->sk); - struct hlist_head *head; struct net_device *dev; struct ifaddrmsg *hdr; struct mctp_dev *mdev; - int ifindex; - int idx = 0, rc; + int ifindex, rc; hdr = nlmsg_data(cb->nlh); // filter by ifindex if requested ifindex = hdr->ifa_index; rcu_read_lock(); - for (; mcb->h < NETDEV_HASHENTRIES; mcb->h++, mcb->idx = 0) { - idx = 0; - head = &net->dev_index_head[mcb->h]; - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx >= mcb->idx && - (ifindex == 0 || ifindex == dev->ifindex)) { - mdev = __mctp_dev_get(dev); - if (mdev) { - rc = mctp_dump_dev_addrinfo(mdev, - skb, cb); - mctp_dev_put(mdev); - // Error indicates full buffer, this - // callback will get retried. - if (rc < 0) - goto out; - } - } - idx++; - // reset for next iteration - mcb->a_idx = 0; - } + for_each_netdev_dump(net, dev, mcb->ifindex) { + if (ifindex && ifindex != dev->ifindex) + continue; + mdev = __mctp_dev_get(dev); + if (!mdev) + continue; + rc = mctp_dump_dev_addrinfo(mdev, skb, cb); + mctp_dev_put(mdev); + if (rc < 0) + break; + mcb->a_idx = 0; } -out: rcu_read_unlock(); - mcb->idx = idx; return skb->len; } @@ -531,9 +516,12 @@ static struct notifier_block mctp_dev_nb = { }; static const struct rtnl_msg_handler mctp_device_rtnl_msg_handlers[] = { - {THIS_MODULE, PF_MCTP, RTM_NEWADDR, mctp_rtm_newaddr, NULL, 0}, - {THIS_MODULE, PF_MCTP, RTM_DELADDR, mctp_rtm_deladdr, NULL, 0}, - {THIS_MODULE, PF_MCTP, RTM_GETADDR, NULL, mctp_dump_addrinfo, 0}, + {.owner = THIS_MODULE, .protocol = PF_MCTP, .msgtype = RTM_NEWADDR, + .doit = mctp_rtm_newaddr}, + {.owner = THIS_MODULE, .protocol = PF_MCTP, .msgtype = RTM_DELADDR, + .doit = mctp_rtm_deladdr}, + {.owner = THIS_MODULE, .protocol = PF_MCTP, .msgtype = RTM_GETADDR, + .dumpit = mctp_dump_addrinfo}, }; int __init mctp_device_init(void) From ca7858880590d4f1dfe73b2cbf372b8ed80a6d81 Mon Sep 17 00:00:00 2001 From: Jesse Van Gavere Date: Fri, 6 Dec 2024 21:42:02 +0100 Subject: [PATCH 0266/1386] net: dsa: microchip: Make MDIO bus name unique In configurations with 2 or more DSA clusters it will fail to allocate unique MDIO bus names as only the switch ID is used, fix this by using a combination of the tree ID and switch ID when needed Signed-off-by: Jesse Van Gavere Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241206204202.649912-1-jesse.vangavere@scioteq.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 920443ee8ffd..f5822c57be32 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2550,7 +2550,11 @@ static int ksz_mdio_register(struct ksz_device *dev) bus->read = ksz_sw_mdio_read; bus->write = ksz_sw_mdio_write; bus->name = "ksz user smi"; - snprintf(bus->id, MII_BUS_ID_SIZE, "SMI-%d", ds->index); + if (ds->dst->index != 0) { + snprintf(bus->id, MII_BUS_ID_SIZE, "SMI-%d-%d", ds->dst->index, ds->index); + } else { + snprintf(bus->id, MII_BUS_ID_SIZE, "SMI-%d", ds->index); + } } ret = ksz_parse_dt_phy_config(dev, bus, mdio_np); From 3f330db30638b6489d548084a7e8843374d41ad0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 5 Dec 2024 08:59:14 -0800 Subject: [PATCH 0267/1386] net: reformat kdoc return statements kernel-doc -Wall warns about missing Return: statement for non-void functions. We have a number of kdocs in our headers which are missing the colon, IOW they use * Return some value or * Returns some value Having the colon makes some sense, it should help kdoc parser avoid false positives. So add them. This is mostly done with a sed script, and removing the unnecessary cases (mostly the comments which aren't kdoc). Acked-by: Johannes Berg Acked-by: Richard Cochran Acked-by: Sergey Ryazanov Reviewed-by: Edward Cree Acked-by: Alexandra Winter Acked-by: Pablo Neira Ayuso Link: https://patch.msgid.link/20241205165914.1071102-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/etherdevice.h | 18 ++++++------ include/linux/ethtool.h | 6 ++-- include/linux/if_vlan.h | 28 +++++++++---------- include/linux/netdevice.h | 14 ++++++---- include/linux/netfilter/x_tables.h | 2 +- include/linux/netfilter_netdev.h | 3 +- include/linux/ptp_clock_kernel.h | 4 +-- include/linux/rfkill.h | 2 +- include/linux/rtnetlink.h | 2 +- include/linux/skbuff.h | 16 +++++------ include/linux/wwan.h | 2 +- include/net/cfg80211.h | 2 +- include/net/dst.h | 2 +- include/net/genetlink.h | 6 ++-- include/net/ipv6.h | 2 +- include/net/iucv/iucv.h | 30 ++++++++++---------- include/net/netfilter/nf_tproxy.h | 4 +-- include/net/netlink.h | 44 +++++++++++++++--------------- include/net/page_pool/helpers.h | 9 ++---- include/net/pkt_cls.h | 4 +-- include/net/tcp.h | 2 +- 21 files changed, 101 insertions(+), 101 deletions(-) diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index ecf203f01034..9a1eacf35d37 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -81,7 +81,7 @@ static const u8 eth_ipv6_mcast_addr_base[ETH_ALEN] __aligned(2) = * is_link_local_ether_addr - Determine if given Ethernet address is link-local * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if address is link local reserved addr (01:80:c2:00:00:0X) per + * Return: true if address is link local reserved addr (01:80:c2:00:00:0X) per * IEEE 802.1Q 8.6.3 Frame filtering. * * Please note: addr must be aligned to u16. @@ -104,7 +104,7 @@ static inline bool is_link_local_ether_addr(const u8 *addr) * is_zero_ether_addr - Determine if give Ethernet address is all zeros. * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is all zeroes. + * Return: true if the address is all zeroes. * * Please note: addr must be aligned to u16. */ @@ -123,7 +123,7 @@ static inline bool is_zero_ether_addr(const u8 *addr) * is_multicast_ether_addr - Determine if the Ethernet address is a multicast. * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is a multicast address. + * Return: true if the address is a multicast address. * By definition the broadcast address is also a multicast address. */ static inline bool is_multicast_ether_addr(const u8 *addr) @@ -157,7 +157,7 @@ static inline bool is_multicast_ether_addr_64bits(const u8 *addr) * is_local_ether_addr - Determine if the Ethernet address is locally-assigned one (IEEE 802). * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is a local address. + * Return: true if the address is a local address. */ static inline bool is_local_ether_addr(const u8 *addr) { @@ -168,7 +168,7 @@ static inline bool is_local_ether_addr(const u8 *addr) * is_broadcast_ether_addr - Determine if the Ethernet address is broadcast * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is the broadcast address. + * Return: true if the address is the broadcast address. * * Please note: addr must be aligned to u16. */ @@ -183,7 +183,7 @@ static inline bool is_broadcast_ether_addr(const u8 *addr) * is_unicast_ether_addr - Determine if the Ethernet address is unicast * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is a unicast address. + * Return: true if the address is a unicast address. */ static inline bool is_unicast_ether_addr(const u8 *addr) { @@ -197,7 +197,7 @@ static inline bool is_unicast_ether_addr(const u8 *addr) * Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not * a multicast address, and is not FF:FF:FF:FF:FF:FF. * - * Return true if the address is valid. + * Return: true if the address is valid. * * Please note: addr must be aligned to u16. */ @@ -214,7 +214,7 @@ static inline bool is_valid_ether_addr(const u8 *addr) * * Check that the value from the Ethertype/length field is a valid Ethertype. * - * Return true if the valid is an 802.3 supported Ethertype. + * Return: true if the valid is an 802.3 supported Ethertype. */ static inline bool eth_proto_is_802_3(__be16 proto) { @@ -458,7 +458,7 @@ static inline bool ether_addr_is_ip_mcast(const u8 *addr) * ether_addr_to_u64 - Convert an Ethernet address into a u64 value. * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return a u64 value of the address + * Return: a u64 value of the address */ static inline u64 ether_addr_to_u64(const u8 *addr) { diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index b8b935b52603..e217c6321ed0 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -257,7 +257,7 @@ struct ethtool_link_ksettings { * @mode : one of the ETHTOOL_LINK_MODE_*_BIT * (not atomic, no bound checking) * - * Returns true/false. + * Returns: true/false. */ #define ethtool_link_ksettings_test_link_mode(ptr, name, mode) \ test_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, (ptr)->link_modes.name) @@ -1199,7 +1199,7 @@ ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, * @dev: pointer to net_device structure * @vclock_index: pointer to pointer of vclock index * - * Return number of phc vclocks + * Return: number of phc vclocks */ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index); @@ -1253,7 +1253,7 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, * ethtool_get_ts_info_by_layer - Obtains time stamping capabilities from the MAC or PHY layer. * @dev: pointer to net_device structure * @info: buffer to hold the result - * Returns zero on success, non-zero otherwise. + * Returns: zero on success, non-zero otherwise. */ int ethtool_get_ts_info_by_layer(struct net_device *dev, struct kernel_ethtool_ts_info *info); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index c1645c86eed9..d6326b53e336 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -310,7 +310,7 @@ static inline bool vlan_uses_dev(const struct net_device *dev) * eth_type_vlan - check for valid vlan ether type. * @ethertype: ether type to check * - * Returns true if the ether type is a vlan ether type. + * Returns: true if the ether type is a vlan ether type. */ static inline bool eth_type_vlan(__be16 ethertype) { @@ -341,9 +341,9 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features, * @mac_len: MAC header length including outer vlan headers * * Inserts the VLAN tag into @skb as part of the payload at offset mac_len - * Returns error if skb_cow_head fails. - * * Does not change skb->protocol so this function can be used during receive. + * + * Returns: error if skb_cow_head fails. */ static inline int __vlan_insert_inner_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci, @@ -390,9 +390,9 @@ static inline int __vlan_insert_inner_tag(struct sk_buff *skb, * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload - * Returns error if skb_cow_head fails. - * * Does not change skb->protocol so this function can be used during receive. + * + * Returns: error if skb_cow_head fails. */ static inline int __vlan_insert_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) @@ -533,7 +533,7 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb, * @skb: skbuff to query * @vlan_tci: buffer to store value * - * Returns error if the skb is not of VLAN type + * Returns: error if the skb is not of VLAN type */ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { @@ -551,7 +551,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) * @skb: skbuff to query * @vlan_tci: buffer to store value * - * Returns error if @skb->vlan_tci is not set correctly + * Returns: error if @skb->vlan_tci is not set correctly */ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, u16 *vlan_tci) @@ -570,7 +570,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, * @skb: skbuff to query * @vlan_tci: buffer to store value * - * Returns error if the skb is not VLAN tagged + * Returns: error if the skb is not VLAN tagged */ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { @@ -587,7 +587,7 @@ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) * @type: first vlan protocol * @depth: buffer to store length of eth and vlan tags in bytes * - * Returns the EtherType of the packet, regardless of whether it is + * Returns: the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, @@ -629,7 +629,7 @@ static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, * vlan_get_protocol - get protocol EtherType. * @skb: skbuff to query * - * Returns the EtherType of the packet, regardless of whether it is + * Returns: the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ static inline __be16 vlan_get_protocol(const struct sk_buff *skb) @@ -710,7 +710,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, * Expects the skb to contain a VLAN tag in the payload, and to have skb->data * pointing at the MAC header. * - * Returns a new pointer to skb->data, or NULL on failure to pull. + * Returns: a new pointer to skb->data, or NULL on failure to pull. */ static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) { @@ -727,7 +727,7 @@ static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) * skb_vlan_tagged - check if skb is vlan tagged. * @skb: skbuff to query * - * Returns true if the skb is tagged, regardless of whether it is hardware + * Returns: true if the skb is tagged, regardless of whether it is hardware * accelerated or not. */ static inline bool skb_vlan_tagged(const struct sk_buff *skb) @@ -743,7 +743,7 @@ static inline bool skb_vlan_tagged(const struct sk_buff *skb) * skb_vlan_tagged_multi - check if skb is vlan tagged with multiple headers. * @skb: skbuff to query * - * Returns true if the skb is tagged with multiple vlan headers, regardless + * Returns: true if the skb is tagged with multiple vlan headers, regardless * of whether it is hardware accelerated or not. */ static inline bool skb_vlan_tagged_multi(struct sk_buff *skb) @@ -774,7 +774,7 @@ static inline bool skb_vlan_tagged_multi(struct sk_buff *skb) * @skb: skbuff to query * @features: features to be checked * - * Returns features without unsafe ones if the skb has multiple tags. + * Returns: features without unsafe ones if the skb has multiple tags. */ static inline netdev_features_t vlan_features_check(struct sk_buff *skb, netdev_features_t features) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 135105441681..d917949bba03 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -509,7 +509,7 @@ static inline bool napi_prefer_busy_poll(struct napi_struct *n) * is scheduled for example in the context of delayed timer * that can be skipped if a NAPI is already scheduled. * - * Return True if NAPI is scheduled, False otherwise. + * Return: True if NAPI is scheduled, False otherwise. */ static inline bool napi_is_scheduled(struct napi_struct *n) { @@ -524,7 +524,7 @@ bool napi_schedule_prep(struct napi_struct *n); * * Schedule NAPI poll routine to be called if it is not already * running. - * Return true if we schedule a NAPI or false if not. + * Return: true if we schedule a NAPI or false if not. * Refer to napi_schedule_prep() for additional reason on why * a NAPI might not be scheduled. */ @@ -558,7 +558,7 @@ static inline void napi_schedule_irqoff(struct napi_struct *n) * Mark NAPI processing as complete. Should only be called if poll budget * has not been completely consumed. * Prefer over napi_complete(). - * Return false if device should avoid rearming interrupts. + * Return: false if device should avoid rearming interrupts. */ bool napi_complete_done(struct napi_struct *n, int work_done); @@ -3851,7 +3851,7 @@ static inline bool netif_attr_test_mask(unsigned long j, * @online_mask: bitmask for CPUs/Rx queues that are online * @nr_bits: number of bits in the bitmask * - * Returns true if a CPU/Rx queue is online. + * Returns: true if a CPU/Rx queue is online. */ static inline bool netif_attr_test_online(unsigned long j, const unsigned long *online_mask, @@ -3871,7 +3871,8 @@ static inline bool netif_attr_test_online(unsigned long j, * @srcp: the cpumask/Rx queue mask pointer * @nr_bits: number of bits in the bitmask * - * Returns >= nr_bits if no further CPUs/Rx queues set. + * Returns: next (after n) CPU/Rx queue index in the mask; + * >= nr_bits if no further CPUs/Rx queues set. */ static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, unsigned int nr_bits) @@ -3893,7 +3894,8 @@ static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, * @src2p: the second CPUs/Rx queues mask pointer * @nr_bits: number of bits in the bitmask * - * Returns >= nr_bits if no further CPUs/Rx queues set in both. + * Returns: next (after n) CPU/Rx queue index set in both masks; + * >= nr_bits if no further CPUs/Rx queues set in both. */ static inline int netif_attrmask_next_and(int n, const unsigned long *src1p, const unsigned long *src2p, diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5897f3dbaf7c..f39f688d7285 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -357,7 +357,7 @@ extern struct static_key xt_tee_enabled; * Begin packet processing : all readers must wait the end * 1) Must be called with preemption disabled * 2) softirqs must be disabled too (or we should use this_cpu_add()) - * Returns : + * Returns: * 1 if no recursion on this cpu * 0 if recursion detected */ diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index 8676316547cc..3175073a66ba 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -66,7 +66,6 @@ static inline bool nf_hook_egress_active(void) * @rc: result code which shall be returned by __dev_queue_xmit() on failure * @dev: netdev whose egress hooks shall be applied to @skb * - * Returns @skb on success or %NULL if the packet was consumed or filtered. * Caller must hold rcu_read_lock. * * On ingress, packets are classified first by tc, then by netfilter. @@ -81,6 +80,8 @@ static inline bool nf_hook_egress_active(void) * called recursively by tunnel drivers such as vxlan, the flag is reverted to * false after sch_handle_egress(). This ensures that netfilter is applied * both on the overlay and underlying network. + * + * Returns: @skb on success or %NULL if the packet was consumed or filtered. */ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, struct net_device *dev) diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index c892d22ce0a7..0d68d09bedd1 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -307,7 +307,7 @@ static inline u64 adjust_by_scaled_ppm(u64 base, long scaled_ppm) * @info: Structure describing the new clock. * @parent: Pointer to the parent device of the new clock. * - * Returns a valid pointer on success or PTR_ERR on failure. If PHC + * Returns: a valid pointer on success or PTR_ERR on failure. If PHC * support is missing at the configuration level, this function * returns NULL, and drivers are expected to gracefully handle that * case separately. @@ -445,7 +445,7 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index); * @hwtstamp: timestamp * @vclock_index: phc index of ptp vclock. * - * Returns converted timestamp, or 0 on error. + * Returns: converted timestamp, or 0 on error. */ ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index); #else diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 997b34197385..6816e4c5f3f0 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -241,7 +241,7 @@ bool rfkill_soft_blocked(struct rfkill *rfkill); * rfkill_find_type - Helper for finding rfkill type by name * @name: the name of the type * - * Returns enum rfkill_type that corresponds to the name. + * Returns: enum rfkill_type that corresponds to the name. */ enum rfkill_type rfkill_find_type(const char *name); diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 14b88f551920..811ce44113f6 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -78,7 +78,7 @@ static inline bool lockdep_rtnl_is_held(void) * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL * @p: The pointer to read, prior to dereferencing * - * Return the value of the specified RCU-protected pointer, but omit + * Return: the value of the specified RCU-protected pointer, but omit * the READ_ONCE(), because caller holds RTNL. */ #define rtnl_dereference(p) \ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 95452d1a07fc..69624b394cd9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1134,7 +1134,7 @@ static inline bool skb_pfmemalloc(const struct sk_buff *skb) * skb_dst - returns skb dst_entry * @skb: buffer * - * Returns skb dst_entry, regardless of reference taken or not. + * Returns: skb dst_entry, regardless of reference taken or not. */ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) { @@ -1222,7 +1222,7 @@ static inline bool skb_wifi_acked_valid(const struct sk_buff *skb) * skb_unref - decrement the skb's reference count * @skb: buffer * - * Returns true if we can free the skb. + * Returns: true if we can free the skb. */ static inline bool skb_unref(struct sk_buff *skb) { @@ -1344,7 +1344,7 @@ struct sk_buff_fclones { * @sk: socket * @skb: buffer * - * Returns true if skb is a fast clone, and its clone is not freed. + * Returns: true if skb is a fast clone, and its clone is not freed. * Some drivers call skb_orphan() in their ndo_start_xmit(), * so we also check that didn't happen. */ @@ -3516,7 +3516,7 @@ static inline struct page *__dev_alloc_page_noprof(gfp_t gfp_mask) * A page shouldn't be considered for reusing/recycling if it was allocated * under memory pressure or at a distant memory node. * - * Returns false if this page should be returned to page allocator, true + * Returns: false if this page should be returned to page allocator, true * otherwise. */ static inline bool dev_page_is_reusable(const struct page *page) @@ -3633,7 +3633,7 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, * skb_frag_address - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer * - * Returns the address of the data within @frag. The page must already + * Returns: the address of the data within @frag. The page must already * be mapped. */ static inline void *skb_frag_address(const skb_frag_t *frag) @@ -3648,7 +3648,7 @@ static inline void *skb_frag_address(const skb_frag_t *frag) * skb_frag_address_safe - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer * - * Returns the address of the data within @frag. Checks that the page + * Returns: the address of the data within @frag. Checks that the page * is mapped and returns %NULL otherwise. */ static inline void *skb_frag_address_safe(const skb_frag_t *frag) @@ -3890,7 +3890,7 @@ static inline int skb_linearize(struct sk_buff *skb) * skb_has_shared_frag - can any frag be overwritten * @skb: buffer to test * - * Return true if the skb has at least one frag that might be modified + * Return: true if the skb has at least one frag that might be modified * by an external entity (as in vmsplice()/sendfile()) */ static inline bool skb_has_shared_frag(const struct sk_buff *skb) @@ -4612,7 +4612,7 @@ static inline void __skb_reset_checksum_unnecessary(struct sk_buff *skb) /* Check if we need to perform checksum complete validation. * - * Returns true if checksum complete is needed, false otherwise + * Returns: true if checksum complete is needed, false otherwise * (either checksum is unnecessary or zero checksum is allowed). */ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 79c781875c09..a4d6cc0c9f68 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -97,7 +97,7 @@ struct wwan_port_caps { * * This function must be balanced with a call to wwan_remove_port(). * - * Returns a valid pointer to wwan_port on success or PTR_ERR on failure + * Returns: a valid pointer to wwan_port on success or PTR_ERR on failure */ struct wwan_port *wwan_create_port(struct device *parent, enum wwan_port_type type, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 27acf1292a5c..182f7965048f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5957,7 +5957,7 @@ int wiphy_register(struct wiphy *wiphy); * @wiphy: the wiphy to check the locking on * @p: The pointer to read, prior to dereferencing * - * Return the value of the specified RCU-protected pointer, but omit the + * Return: the value of the specified RCU-protected pointer, but omit the * READ_ONCE(), because caller holds the wiphy mutex used for updates. */ #define wiphy_dereference(wiphy, p) \ diff --git a/include/net/dst.h b/include/net/dst.h index 08647c99d79c..78c78cdce0e9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -307,7 +307,7 @@ static inline bool dst_hold_safe(struct dst_entry *dst) * @skb: buffer * * If dst is not yet refcounted and not destroyed, grab a ref on it. - * Returns true if dst is refcounted. + * Returns: true if dst is refcounted. */ static inline bool skb_dst_force(struct sk_buff *skb) { diff --git a/include/net/genetlink.h b/include/net/genetlink.h index d096cc6352de..a03d56765832 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -354,7 +354,7 @@ __genlmsg_iput(struct sk_buff *skb, const struct genl_info *info, int flags) * such requests) or a struct initialized by genl_info_init_ntf() * when constructing notifications. * - * Returns pointer to new genetlink header. + * Returns: pointer to new genetlink header. */ static inline void * genlmsg_iput(struct sk_buff *skb, const struct genl_info *info) @@ -366,7 +366,7 @@ genlmsg_iput(struct sk_buff *skb, const struct genl_info *info) * genlmsg_nlhdr - Obtain netlink header from user specified header * @user_hdr: user header as returned from genlmsg_put() * - * Returns pointer to netlink header. + * Returns: pointer to netlink header. */ static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr) { @@ -435,7 +435,7 @@ static inline void genl_dump_check_consistent(struct netlink_callback *cb, * @flags: netlink message flags * @cmd: generic netlink command * - * Returns pointer to user specific header + * Returns: pointer to user specific header */ static inline void *genlmsg_put_reply(struct sk_buff *skb, struct genl_info *info, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 248bfb26e2af..f5c43ad1565e 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -471,7 +471,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, /* This helper is specialized for BIG TCP needs. * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header. * It assumes headers are already in skb->head. - * Returns 0, or IPPROTO_TCP if a BIG TCP packet is there. + * Returns: 0, or IPPROTO_TCP if a BIG TCP packet is there. */ static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb) { diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h index dd9e93c12260..9804fa5d9c67 100644 --- a/include/net/iucv/iucv.h +++ b/include/net/iucv/iucv.h @@ -202,7 +202,7 @@ struct iucv_handler { * * Registers a driver with IUCV. * - * Returns 0 on success, -ENOMEM if the memory allocation for the pathid + * Returns: 0 on success, -ENOMEM if the memory allocation for the pathid * table failed, or -EIO if IUCV_DECLARE_BUFFER failed on all cpus. */ int iucv_register(struct iucv_handler *handler, int smp); @@ -224,7 +224,7 @@ void iucv_unregister(struct iucv_handler *handle, int smp); * * Allocate a new path structure for use with iucv_connect. * - * Returns NULL if the memory allocation failed or a pointer to the + * Returns: NULL if the memory allocation failed or a pointer to the * path structure. */ static inline struct iucv_path *iucv_path_alloc(u16 msglim, u8 flags, gfp_t gfp) @@ -260,7 +260,7 @@ static inline void iucv_path_free(struct iucv_path *path) * This function is issued after the user received a connection pending * external interrupt and now wishes to complete the IUCV communication path. * - * Returns the result of the CP IUCV call. + * Returns: the result of the CP IUCV call. */ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler, u8 *userdata, void *private); @@ -278,7 +278,7 @@ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler, * successfully, you are not able to use the path until you receive an IUCV * Connection Complete external interrupt. * - * Returns the result of the CP IUCV call. + * Returns: the result of the CP IUCV call. */ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler, u8 *userid, u8 *system, u8 *userdata, @@ -292,7 +292,7 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler, * This function temporarily suspends incoming messages on an IUCV path. * You can later reactivate the path by invoking the iucv_resume function. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_path_quiesce(struct iucv_path *path, u8 *userdata); @@ -304,7 +304,7 @@ int iucv_path_quiesce(struct iucv_path *path, u8 *userdata); * This function resumes incoming messages on an IUCV path that has * been stopped with iucv_path_quiesce. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_path_resume(struct iucv_path *path, u8 *userdata); @@ -315,7 +315,7 @@ int iucv_path_resume(struct iucv_path *path, u8 *userdata); * * This function terminates an IUCV path. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_path_sever(struct iucv_path *path, u8 *userdata); @@ -327,7 +327,7 @@ int iucv_path_sever(struct iucv_path *path, u8 *userdata); * * Cancels a message you have sent. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg, u32 srccls); @@ -347,7 +347,7 @@ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg, * * Locking: local_bh_enable/local_bh_disable * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, u8 flags, void *buffer, size_t size, size_t *residual); @@ -367,7 +367,7 @@ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, * * Locking: no locking. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, u8 flags, void *buffer, size_t size, @@ -382,7 +382,7 @@ int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, * are notified of a message and the time that you complete the message, * the message may be rejected. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg); @@ -399,7 +399,7 @@ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg); * pathid, msgid, and trgcls. Prmmsg signifies the data is moved into * the parameter list. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg, u8 flags, void *reply, size_t size); @@ -419,7 +419,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg, * * Locking: local_bh_enable/local_bh_disable * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg, u8 flags, u32 srccls, void *buffer, size_t size); @@ -439,7 +439,7 @@ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg, * * Locking: no locking. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg, u8 flags, u32 srccls, void *buffer, size_t size); @@ -461,7 +461,7 @@ int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg, * reply to the message and a buffer is provided into which IUCV moves * the reply to this message. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg, u8 flags, u32 srccls, void *buffer, size_t size, diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h index 5adf6fda11e8..06985530517b 100644 --- a/include/net/netfilter/nf_tproxy.h +++ b/include/net/netfilter/nf_tproxy.h @@ -49,7 +49,7 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr); * * nf_tproxy_handle_time_wait4() consumes the socket reference passed in. * - * Returns the listener socket if there's one, the TIME_WAIT socket if + * Returns: the listener socket if there's one, the TIME_WAIT socket if * no such listener is found, or NULL if the TCP header is incomplete. */ struct sock * @@ -108,7 +108,7 @@ nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, * * nf_tproxy_handle_time_wait6() consumes the socket reference passed in. * - * Returns the listener socket if there's one, the TIME_WAIT socket if + * Returns: the listener socket if there's one, the TIME_WAIT socket if * no such listener is found, or NULL if the TCP header is incomplete. */ struct sock * diff --git a/include/net/netlink.h b/include/net/netlink.h index 39eaa6be6ca8..e015ffbed819 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -649,7 +649,7 @@ static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining) * @nlh: netlink message header * @remaining: number of bytes remaining in message stream * - * Returns the next netlink message in the message stream and + * Returns: the next netlink message in the message stream and * decrements remaining by the size of the current message. */ static inline struct nlmsghdr * @@ -676,7 +676,7 @@ nlmsg_next(const struct nlmsghdr *nlh, int *remaining) * exceeding maxtype will be rejected, policy must be specified, attributes * will be validated in the strictest way possible. * - * Returns 0 on success or a negative error code. + * Returns: 0 on success or a negative error code. */ static inline int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, int len, @@ -701,7 +701,7 @@ static inline int nla_parse(struct nlattr **tb, int maxtype, * exceeding maxtype will be ignored and attributes from the policy are not * always strictly validated (only for new attributes). * - * Returns 0 on success or a negative error code. + * Returns: 0 on success or a negative error code. */ static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype, const struct nlattr *head, int len, @@ -726,7 +726,7 @@ static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype, * exceeding maxtype will be rejected as well as trailing data, but the * policy is not completely strictly validated (only for new attributes). * - * Returns 0 on success or a negative error code. + * Returns: 0 on success or a negative error code. */ static inline int nla_parse_deprecated_strict(struct nlattr **tb, int maxtype, const struct nlattr *head, @@ -833,7 +833,7 @@ nlmsg_parse_deprecated_strict(const struct nlmsghdr *nlh, int hdrlen, * @hdrlen: length of family specific header * @attrtype: type of attribute to look for * - * Returns the first attribute which matches the specified type. + * Returns: the first attribute which matches the specified type. */ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, int hdrlen, int attrtype) @@ -854,7 +854,7 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, * specified policy. Validation is done in liberal mode. * See documentation of struct nla_policy for more details. * - * Returns 0 on success or a negative error code. + * Returns: 0 on success or a negative error code. */ static inline int nla_validate_deprecated(const struct nlattr *head, int len, int maxtype, @@ -877,7 +877,7 @@ static inline int nla_validate_deprecated(const struct nlattr *head, int len, * specified policy. Validation is done in strict mode. * See documentation of struct nla_policy for more details. * - * Returns 0 on success or a negative error code. + * Returns: 0 on success or a negative error code. */ static inline int nla_validate(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, @@ -914,7 +914,7 @@ static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh, * nlmsg_report - need to report back to application? * @nlh: netlink message header * - * Returns 1 if a report back to the application is requested. + * Returns: 1 if a report back to the application is requested. */ static inline int nlmsg_report(const struct nlmsghdr *nlh) { @@ -925,7 +925,7 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh) * nlmsg_seq - return the seq number of netlink message * @nlh: netlink message header * - * Returns 0 if netlink message is NULL + * Returns: 0 if netlink message is NULL */ static inline u32 nlmsg_seq(const struct nlmsghdr *nlh) { @@ -952,7 +952,7 @@ static inline u32 nlmsg_seq(const struct nlmsghdr *nlh) * @payload: length of message payload * @flags: message flags * - * Returns NULL if the tailroom of the skb is insufficient to store + * Returns: NULL if the tailroom of the skb is insufficient to store * the message header and payload. */ static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, @@ -971,7 +971,7 @@ static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 se * * Append data to an existing nlmsg, used when constructing a message * with multiple fixed-format headers (which is rare). - * Returns NULL if the tailroom of the skb is insufficient to store + * Returns: NULL if the tailroom of the skb is insufficient to store * the extra payload. */ static inline void *nlmsg_append(struct sk_buff *skb, u32 size) @@ -993,7 +993,7 @@ static inline void *nlmsg_append(struct sk_buff *skb, u32 size) * @payload: length of message payload * @flags: message flags * - * Returns NULL if the tailroom of the skb is insufficient to store + * Returns: NULL if the tailroom of the skb is insufficient to store * the message header and payload. */ static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb, @@ -1050,7 +1050,7 @@ static inline void nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh) * nlmsg_get_pos - return current position in netlink message * @skb: socket buffer the message is stored in * - * Returns a pointer to the current tail of the message. + * Returns: a pointer to the current tail of the message. */ static inline void *nlmsg_get_pos(struct sk_buff *skb) { @@ -1276,7 +1276,7 @@ static inline int nla_ok(const struct nlattr *nla, int remaining) * @nla: netlink attribute * @remaining: number of bytes remaining in attribute stream * - * Returns the next netlink attribute in the attribute stream and + * Returns: the next netlink attribute in the attribute stream and * decrements remaining by the size of the current attribute. */ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining) @@ -1292,7 +1292,7 @@ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining) * @nla: attribute containing the nested attributes * @attrtype: type of attribute to look for * - * Returns the first attribute which matches the specified type. + * Returns: the first attribute which matches the specified type. */ static inline struct nlattr * nla_find_nested(const struct nlattr *nla, int attrtype) @@ -2091,7 +2091,7 @@ static inline int nla_get_flag(const struct nlattr *nla) * nla_get_msecs - return payload of msecs attribute * @nla: msecs netlink attribute * - * Returns the number of milliseconds in jiffies. + * Returns: the number of milliseconds in jiffies. */ static inline unsigned long nla_get_msecs(const struct nlattr *nla) { @@ -2183,7 +2183,7 @@ static inline void *nla_memdup_noprof(const struct nlattr *src, gfp_t gfp) * marked their nest attributes with NLA_F_NESTED flag. New APIs should use * nla_nest_start() which sets the flag. * - * Returns the container attribute or NULL on error + * Returns: the container attribute or NULL on error */ static inline struct nlattr *nla_nest_start_noflag(struct sk_buff *skb, int attrtype) @@ -2204,7 +2204,7 @@ static inline struct nlattr *nla_nest_start_noflag(struct sk_buff *skb, * Unlike nla_nest_start_noflag(), mark the nest attribute with NLA_F_NESTED * flag. This is the preferred function to use in new code. * - * Returns the container attribute or NULL on error + * Returns: the container attribute or NULL on error */ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) { @@ -2219,7 +2219,7 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) * Corrects the container attribute header to include the all * appended attributes. * - * Returns the total data length of the skb. + * Returns: the total data length of the skb. */ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) { @@ -2252,7 +2252,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * specified policy. Attributes with a type exceeding maxtype will be * ignored. See documentation of struct nla_policy for more details. * - * Returns 0 on success or a negative error code. + * Returns: 0 on success or a negative error code. */ static inline int __nla_validate_nested(const struct nlattr *start, int maxtype, const struct nla_policy *policy, @@ -2285,7 +2285,7 @@ nla_validate_nested_deprecated(const struct nlattr *start, int maxtype, * nla_need_padding_for_64bit - test 64-bit alignment of the next attribute * @skb: socket buffer the message is stored in * - * Return true if padding is needed to align the next attribute (nla_data()) to + * Return: true if padding is needed to align the next attribute (nla_data()) to * a 64-bit aligned area. */ static inline bool nla_need_padding_for_64bit(struct sk_buff *skb) @@ -2312,7 +2312,7 @@ static inline bool nla_need_padding_for_64bit(struct sk_buff *skb) * This will only be done in architectures which do not have * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS defined. * - * Returns zero on success or a negative error code. + * Returns: zero on success or a negative error code. */ static inline int nla_align_64bit(struct sk_buff *skb, int padattr) { diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 793e6fd78bc5..26caa2c20912 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -104,8 +104,7 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) * * Get a page fragment from the page allocator or page_pool caches. * - * Return: - * Return allocated page fragment, otherwise return NULL. + * Return: allocated page fragment, otherwise return NULL. */ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, unsigned int *offset, @@ -155,8 +154,7 @@ static inline struct page *page_pool_alloc(struct page_pool *pool, * depending on the requested size in order to allocate memory with least memory * utilization and performance penalty. * - * Return: - * Return allocated page or page fragment, otherwise return NULL. + * Return: allocated page or page fragment, otherwise return NULL. */ static inline struct page *page_pool_dev_alloc(struct page_pool *pool, unsigned int *offset, @@ -190,8 +188,7 @@ static inline void *page_pool_alloc_va(struct page_pool *pool, * This is just a thin wrapper around the page_pool_alloc() API, and * it returns va of the allocated page or page fragment. * - * Return: - * Return the va for the allocated page or page fragment, otherwise return NULL. + * Return: the va for the allocated page or page fragment, otherwise return NULL. */ static inline void *page_pool_dev_alloc_va(struct page_pool *pool, unsigned int *size) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index cf199af85c52..22c5ab4269d7 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -319,7 +319,7 @@ tcf_exts_hw_stats_update(const struct tcf_exts *exts, * tcf_exts_has_actions - check if at least one action is present * @exts: tc filter extensions handle * - * Returns true if at least one action is present. + * Returns: true if at least one action is present. */ static inline bool tcf_exts_has_actions(struct tcf_exts *exts) { @@ -501,7 +501,7 @@ int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *, * through all ematches respecting their logic relations returning * as soon as the result is obvious. * - * Returns 1 if the ematch tree as-one matches, no ematches are configured + * Returns: 1 if the ematch tree as-one matches, no ematches are configured * or ematch is not enabled in the kernel, otherwise 0 is returned. */ static inline int tcf_em_tree_match(struct sk_buff *skb, diff --git a/include/net/tcp.h b/include/net/tcp.h index e9b37b76e894..5b2b04835688 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1817,7 +1817,7 @@ int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp, * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() * @c: returned tcp_sigpool for usage (uninitialized on failure) * - * Returns 0 on success, error otherwise. + * Returns: 0 on success, error otherwise. */ int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c); /** From 9234a37a495dc34cece943bec495ab541e4143ab Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:40:50 +0100 Subject: [PATCH 0268/1386] vxlan: In vxlan_rcv(), access flags through the vxlan netdevice vxlan_sock.flags is constructed from vxlan_dev.cfg.flags, as the subset of flags (named VXLAN_F_RCV_FLAGS) that is important from the point of view of socket sharing. Attempts to reconfigure these flags during the vxlan netdev lifetime are also bounced. It is therefore immaterial whether we access the flags through the vxlan_dev or through the socket. Convert the socket accesses to netdevice accesses in this separate patch to make the conversions that take place in the following patches more obvious. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/5d237ffd731055e524d7b7c436de43358d8743d2.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index b46a799bd390..1ac2dcdd493e 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1717,7 +1717,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) /* For backwards compatibility, only allow reserved fields to be * used by VXLAN extensions if explicitly requested. */ - if (vs->flags & VXLAN_F_GPE) { + if (vxlan->cfg.flags & VXLAN_F_GPE) { if (!vxlan_parse_gpe_proto(&unparsed, &protocol)) goto drop; unparsed.vx_flags &= ~VXLAN_GPE_USED_BITS; @@ -1730,8 +1730,8 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) goto drop; } - if (vs->flags & VXLAN_F_REMCSUM_RX) { - reason = vxlan_remcsum(&unparsed, skb, vs->flags); + if (vxlan->cfg.flags & VXLAN_F_REMCSUM_RX) { + reason = vxlan_remcsum(&unparsed, skb, vxlan->cfg.flags); if (unlikely(reason)) goto drop; } @@ -1756,8 +1756,8 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) memset(md, 0, sizeof(*md)); } - if (vs->flags & VXLAN_F_GBP) - vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md); + if (vxlan->cfg.flags & VXLAN_F_GBP) + vxlan_parse_gbp_hdr(&unparsed, skb, vxlan->cfg.flags, md); /* Note that GBP and GPE can never be active together. This is * ensured in vxlan_dev_configure. */ From 0f09ae907818d593e55c4b058d286a0914a43c3f Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:40:51 +0100 Subject: [PATCH 0269/1386] vxlan: vxlan_rcv() callees: Move clearing of unparsed flags out In order to migrate away from the use of unparsed to detect invalid flags, move all the code that actually clears the flags from callees directly to vxlan_rcv(). Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/2857871d929375c881b9defe378473c8200ead9b.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 1ac2dcdd493e..c2254b0ac964 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1562,7 +1562,7 @@ static enum skb_drop_reason vxlan_remcsum(struct vxlanhdr *unparsed, size_t start, offset; if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload) - goto out; + return SKB_NOT_DROPPED_YET; start = vxlan_rco_start(unparsed->vx_vni); offset = start + vxlan_rco_offset(unparsed->vx_vni); @@ -1573,10 +1573,6 @@ static enum skb_drop_reason vxlan_remcsum(struct vxlanhdr *unparsed, skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset, !!(vxflags & VXLAN_F_REMCSUM_NOPARTIAL)); -out: - unparsed->vx_flags &= ~VXLAN_HF_RCO; - unparsed->vx_vni &= VXLAN_VNI_MASK; - return SKB_NOT_DROPPED_YET; } @@ -1588,7 +1584,7 @@ static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed, struct metadata_dst *tun_dst; if (!(unparsed->vx_flags & VXLAN_HF_GBP)) - goto out; + return; md->gbp = ntohs(gbp->policy_id); @@ -1607,8 +1603,6 @@ static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed, /* In flow-based mode, GBP is carried in dst_metadata */ if (!(vxflags & VXLAN_F_COLLECT_METADATA)) skb->mark = md->gbp; -out: - unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS; } static enum skb_drop_reason vxlan_set_mac(struct vxlan_dev *vxlan, @@ -1734,6 +1728,8 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) reason = vxlan_remcsum(&unparsed, skb, vxlan->cfg.flags); if (unlikely(reason)) goto drop; + unparsed.vx_flags &= ~VXLAN_HF_RCO; + unparsed.vx_vni &= VXLAN_VNI_MASK; } if (vxlan_collect_metadata(vs)) { @@ -1756,8 +1752,10 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) memset(md, 0, sizeof(*md)); } - if (vxlan->cfg.flags & VXLAN_F_GBP) + if (vxlan->cfg.flags & VXLAN_F_GBP) { vxlan_parse_gbp_hdr(&unparsed, skb, vxlan->cfg.flags, md); + unparsed.vx_flags &= ~VXLAN_GBP_USED_BITS; + } /* Note that GBP and GPE can never be active together. This is * ensured in vxlan_dev_configure. */ From fe3dcbcfae522fae9c62954488398562ff6b5ece Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:40:52 +0100 Subject: [PATCH 0270/1386] vxlan: vxlan_rcv() callees: Drop the unparsed argument The functions vxlan_remcsum() and vxlan_parse_gbp_hdr() take both the SKB and the unparsed VXLAN header. Now that unparsed adjustment is handled directly by vxlan_rcv(), drop this argument, and have the function derive it from the SKB on its own. vxlan_parse_gpe_proto() does not take SKB, so keep the header parameter. However const it so that it's clear that the intention is that it does not get changed. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/5ea651f4e06485ba1a84a8eb556a457c39f0dfd4.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index c2254b0ac964..37c20cf93f92 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -622,9 +622,9 @@ static int vxlan_fdb_append(struct vxlan_fdb *f, return 1; } -static bool vxlan_parse_gpe_proto(struct vxlanhdr *hdr, __be16 *protocol) +static bool vxlan_parse_gpe_proto(const struct vxlanhdr *hdr, __be16 *protocol) { - struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)hdr; + const struct vxlanhdr_gpe *gpe = (const struct vxlanhdr_gpe *)hdr; /* Need to have Next Protocol set for interfaces in GPE mode. */ if (!gpe->np_applied) @@ -1554,18 +1554,17 @@ static void vxlan_sock_release(struct vxlan_dev *vxlan) #endif } -static enum skb_drop_reason vxlan_remcsum(struct vxlanhdr *unparsed, - struct sk_buff *skb, - u32 vxflags) +static enum skb_drop_reason vxlan_remcsum(struct sk_buff *skb, u32 vxflags) { + const struct vxlanhdr *vh = vxlan_hdr(skb); enum skb_drop_reason reason; size_t start, offset; - if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload) + if (!(vh->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload) return SKB_NOT_DROPPED_YET; - start = vxlan_rco_start(unparsed->vx_vni); - offset = start + vxlan_rco_offset(unparsed->vx_vni); + start = vxlan_rco_start(vh->vx_vni); + offset = start + vxlan_rco_offset(vh->vx_vni); reason = pskb_may_pull_reason(skb, offset + sizeof(u16)); if (reason) @@ -1576,14 +1575,16 @@ static enum skb_drop_reason vxlan_remcsum(struct vxlanhdr *unparsed, return SKB_NOT_DROPPED_YET; } -static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed, - struct sk_buff *skb, u32 vxflags, +static void vxlan_parse_gbp_hdr(struct sk_buff *skb, u32 vxflags, struct vxlan_metadata *md) { - struct vxlanhdr_gbp *gbp = (struct vxlanhdr_gbp *)unparsed; + const struct vxlanhdr *vh = vxlan_hdr(skb); + const struct vxlanhdr_gbp *gbp; struct metadata_dst *tun_dst; - if (!(unparsed->vx_flags & VXLAN_HF_GBP)) + gbp = (const struct vxlanhdr_gbp *)vh; + + if (!(vh->vx_flags & VXLAN_HF_GBP)) return; md->gbp = ntohs(gbp->policy_id); @@ -1712,7 +1713,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) * used by VXLAN extensions if explicitly requested. */ if (vxlan->cfg.flags & VXLAN_F_GPE) { - if (!vxlan_parse_gpe_proto(&unparsed, &protocol)) + if (!vxlan_parse_gpe_proto(vxlan_hdr(skb), &protocol)) goto drop; unparsed.vx_flags &= ~VXLAN_GPE_USED_BITS; raw_proto = true; @@ -1725,7 +1726,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) } if (vxlan->cfg.flags & VXLAN_F_REMCSUM_RX) { - reason = vxlan_remcsum(&unparsed, skb, vxlan->cfg.flags); + reason = vxlan_remcsum(skb, vxlan->cfg.flags); if (unlikely(reason)) goto drop; unparsed.vx_flags &= ~VXLAN_HF_RCO; @@ -1753,7 +1754,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) } if (vxlan->cfg.flags & VXLAN_F_GBP) { - vxlan_parse_gbp_hdr(&unparsed, skb, vxlan->cfg.flags, md); + vxlan_parse_gbp_hdr(skb, vxlan->cfg.flags, md); unparsed.vx_flags &= ~VXLAN_GBP_USED_BITS; } /* Note that GBP and GPE can never be active together. This is From e713130dfb4d6b5a2cd42f33a94b6ac983d2989d Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:40:53 +0100 Subject: [PATCH 0271/1386] vxlan: vxlan_rcv(): Extract vxlan_hdr(skb) to a named variable Having a named reference to the VXLAN header is more handy than having to conjure it anew through vxlan_hdr() on every use. Add a new variable and convert several open-coded sites. Additionally, convert one "unparsed" use to the new variable as well. Thus the only "unparsed" uses that remain are the flag-clearing and the header validity check at the end. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Mateusz Polchlopek Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/2a0a940e883c435a0fdbcdc1d03c4858957ad00e.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 37c20cf93f92..f114568b67a3 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1667,6 +1667,7 @@ static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph, static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) { struct vxlan_vni_node *vninode = NULL; + const struct vxlanhdr *vh; struct vxlan_dev *vxlan; struct vxlan_sock *vs; struct vxlanhdr unparsed; @@ -1685,11 +1686,11 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) goto drop; unparsed = *vxlan_hdr(skb); + vh = vxlan_hdr(skb); /* VNI flag always required to be set */ - if (!(unparsed.vx_flags & VXLAN_HF_VNI)) { + if (!(vh->vx_flags & VXLAN_HF_VNI)) { netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n", - ntohl(vxlan_hdr(skb)->vx_flags), - ntohl(vxlan_hdr(skb)->vx_vni)); + ntohl(vh->vx_flags), ntohl(vh->vx_vni)); reason = SKB_DROP_REASON_VXLAN_INVALID_HDR; /* Return non vxlan pkt */ goto drop; @@ -1701,7 +1702,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) if (!vs) goto drop; - vni = vxlan_vni(vxlan_hdr(skb)->vx_vni); + vni = vxlan_vni(vh->vx_vni); vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni, &vninode); if (!vxlan) { @@ -1713,7 +1714,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) * used by VXLAN extensions if explicitly requested. */ if (vxlan->cfg.flags & VXLAN_F_GPE) { - if (!vxlan_parse_gpe_proto(vxlan_hdr(skb), &protocol)) + if (!vxlan_parse_gpe_proto(vh, &protocol)) goto drop; unparsed.vx_flags &= ~VXLAN_GPE_USED_BITS; raw_proto = true; From e4f8647767cfac0291def86ddfac23b925294701 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:40:54 +0100 Subject: [PATCH 0272/1386] vxlan: Track reserved bits explicitly as part of the configuration In order to make it possible to configure which bits in VXLAN header should be considered reserved, introduce a new field vxlan_config::reserved_bits. Have it cover the whole header, except for the VNI-present bit and the bits for VNI itself, and have individual enabled features clear more bits off reserved_bits. (This is expressed as first constructing a used_bits set, and then inverting it to get the reserved_bits. The set of used_bits will be useful on its own for validation of user-set reserved_bits in a following patch.) The patch also moves a comment relevant to the validation from the unparsed validation site up to the new site. Logically this patch should add the new comment, and a later patch that removes the unparsed bits would remove the old comment. But keeping both legs in the same patch is better from the history spelunking point of view. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/984dbf98d5940d3900268dbffaf70961f731d4a4.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 41 +++++++++++++++++++++++++--------- include/net/vxlan.h | 1 + 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index f114568b67a3..e50f4cb70193 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1710,9 +1710,20 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) goto drop; } - /* For backwards compatibility, only allow reserved fields to be - * used by VXLAN extensions if explicitly requested. - */ + if (vh->vx_flags & vxlan->cfg.reserved_bits.vx_flags || + vh->vx_vni & vxlan->cfg.reserved_bits.vx_vni) { + /* If the header uses bits besides those enabled by the + * netdevice configuration, treat this as a malformed packet. + * This behavior diverges from VXLAN RFC (RFC7348) which + * stipulates that bits in reserved in reserved fields are to be + * ignored. The approach here maintains compatibility with + * previous stack code, and also is more robust and provides a + * little more security in adding extensions to VXLAN. + */ + reason = SKB_DROP_REASON_VXLAN_INVALID_HDR; + goto drop; + } + if (vxlan->cfg.flags & VXLAN_F_GPE) { if (!vxlan_parse_gpe_proto(vh, &protocol)) goto drop; @@ -1763,14 +1774,6 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) */ if (unparsed.vx_flags || unparsed.vx_vni) { - /* If there are any unprocessed flags remaining treat - * this as a malformed packet. This behavior diverges from - * VXLAN RFC (RFC7348) which stipulates that bits in reserved - * in reserved fields are to be ignored. The approach here - * maintains compatibility with previous stack code, and also - * is more robust and provides a little more security in - * adding extensions to VXLAN. - */ reason = SKB_DROP_REASON_VXLAN_INVALID_HDR; goto drop; } @@ -4070,6 +4073,10 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], struct net_device *dev, struct vxlan_config *conf, bool changelink, struct netlink_ext_ack *extack) { + struct vxlanhdr used_bits = { + .vx_flags = VXLAN_HF_VNI, + .vx_vni = VXLAN_VNI_MASK, + }; struct vxlan_dev *vxlan = netdev_priv(dev); int err = 0; @@ -4296,6 +4303,8 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], extack); if (err) return err; + used_bits.vx_flags |= VXLAN_HF_RCO; + used_bits.vx_vni |= ~VXLAN_VNI_MASK; } if (data[IFLA_VXLAN_GBP]) { @@ -4303,6 +4312,7 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], VXLAN_F_GBP, changelink, false, extack); if (err) return err; + used_bits.vx_flags |= VXLAN_GBP_USED_BITS; } if (data[IFLA_VXLAN_GPE]) { @@ -4311,8 +4321,17 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], extack); if (err) return err; + + used_bits.vx_flags |= VXLAN_GPE_USED_BITS; } + /* For backwards compatibility, only allow reserved fields to be + * used by VXLAN extensions if explicitly requested. + */ + conf->reserved_bits = (struct vxlanhdr) { + .vx_flags = ~used_bits.vx_flags, + .vx_vni = ~used_bits.vx_vni, + }; if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) { err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_NOPARTIAL, VXLAN_F_REMCSUM_NOPARTIAL, changelink, diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 33ba6fc151cf..2dd23ee2bacd 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -227,6 +227,7 @@ struct vxlan_config { unsigned int addrmax; bool no_share; enum ifla_vxlan_df df; + struct vxlanhdr reserved_bits; }; enum { From 752b1c8d8b409f2b03e61e153696689ee081bf07 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:40:55 +0100 Subject: [PATCH 0273/1386] vxlan: Bump error counters for header mismatches The VXLAN driver so far has not increased the error counters for packets that set reserved bits. It does so for other packet errors, so do it for this case as well. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/d096084167d56706d620afe5136cf37a2d34d1b9.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index e50f4cb70193..92832a396ab7 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1721,6 +1721,10 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) * little more security in adding extensions to VXLAN. */ reason = SKB_DROP_REASON_VXLAN_INVALID_HDR; + DEV_STATS_INC(vxlan->dev, rx_frame_errors); + DEV_STATS_INC(vxlan->dev, rx_errors); + vxlan_vnifilter_count(vxlan, vni, vninode, + VXLAN_VNI_STATS_RX_ERRORS, 0); goto drop; } From bb16786ed6fdff3a67ba33ed928ae138fd4254b5 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:40:56 +0100 Subject: [PATCH 0274/1386] vxlan: vxlan_rcv(): Drop unparsed The code currently validates the VXLAN header in two ways: first by comparing it with the set of reserved bits, constructed ahead of time during the netdevice construction; and second by gradually clearing the bits off a separate copy of VXLAN header, "unparsed". Drop the latter validation method. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/4559f16c5664c189b3a4ee6f5da91f552ad4821c.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 92832a396ab7..ff5684a2103a 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1670,7 +1670,6 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) const struct vxlanhdr *vh; struct vxlan_dev *vxlan; struct vxlan_sock *vs; - struct vxlanhdr unparsed; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; __be16 protocol = htons(ETH_P_TEB); @@ -1685,7 +1684,6 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) if (reason) goto drop; - unparsed = *vxlan_hdr(skb); vh = vxlan_hdr(skb); /* VNI flag always required to be set */ if (!(vh->vx_flags & VXLAN_HF_VNI)) { @@ -1695,8 +1693,6 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) /* Return non vxlan pkt */ goto drop; } - unparsed.vx_flags &= ~VXLAN_HF_VNI; - unparsed.vx_vni &= ~VXLAN_VNI_MASK; vs = rcu_dereference_sk_user_data(sk); if (!vs) @@ -1731,7 +1727,6 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) if (vxlan->cfg.flags & VXLAN_F_GPE) { if (!vxlan_parse_gpe_proto(vh, &protocol)) goto drop; - unparsed.vx_flags &= ~VXLAN_GPE_USED_BITS; raw_proto = true; } @@ -1745,8 +1740,6 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) reason = vxlan_remcsum(skb, vxlan->cfg.flags); if (unlikely(reason)) goto drop; - unparsed.vx_flags &= ~VXLAN_HF_RCO; - unparsed.vx_vni &= VXLAN_VNI_MASK; } if (vxlan_collect_metadata(vs)) { @@ -1769,19 +1762,12 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) memset(md, 0, sizeof(*md)); } - if (vxlan->cfg.flags & VXLAN_F_GBP) { + if (vxlan->cfg.flags & VXLAN_F_GBP) vxlan_parse_gbp_hdr(skb, vxlan->cfg.flags, md); - unparsed.vx_flags &= ~VXLAN_GBP_USED_BITS; - } /* Note that GBP and GPE can never be active together. This is * ensured in vxlan_dev_configure. */ - if (unparsed.vx_flags || unparsed.vx_vni) { - reason = SKB_DROP_REASON_VXLAN_INVALID_HDR; - goto drop; - } - if (!raw_proto) { reason = vxlan_set_mac(vxlan, vs, skb, vni); if (reason) From 6c11379b104e3718135fd7fc37bb254b41e4cf65 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:40:57 +0100 Subject: [PATCH 0275/1386] vxlan: Add an attribute to make VXLAN header validation configurable The set of bits that the VXLAN netdevice currently considers reserved is defined by the features enabled at the netdevice construction. In order to make this configurable, add an attribute, IFLA_VXLAN_RESERVED_BITS. The payload is a pair of big-endian u32's covering the VXLAN header. This is validated against the set of flags used by the various enabled VXLAN features, and attempts to override bits used by an enabled feature are bounced. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/c657275e5ceed301e62c69fe8e559e32909442e2.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 53 +++++++++++++++++++++++++++++----- include/uapi/linux/if_link.h | 1 + 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index ff5684a2103a..43cf672b7b9f 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -3428,6 +3428,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 }, [IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1), [IFLA_VXLAN_LABEL_POLICY] = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX), + [IFLA_VXLAN_RESERVED_BITS] = NLA_POLICY_EXACT_LEN(sizeof(struct vxlanhdr)), }; static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], @@ -4315,13 +4316,44 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], used_bits.vx_flags |= VXLAN_GPE_USED_BITS; } - /* For backwards compatibility, only allow reserved fields to be - * used by VXLAN extensions if explicitly requested. - */ - conf->reserved_bits = (struct vxlanhdr) { - .vx_flags = ~used_bits.vx_flags, - .vx_vni = ~used_bits.vx_vni, - }; + if (data[IFLA_VXLAN_RESERVED_BITS]) { + struct vxlanhdr reserved_bits; + + if (changelink) { + NL_SET_ERR_MSG_ATTR(extack, + data[IFLA_VXLAN_RESERVED_BITS], + "Cannot change reserved_bits"); + return -EOPNOTSUPP; + } + + nla_memcpy(&reserved_bits, data[IFLA_VXLAN_RESERVED_BITS], + sizeof(reserved_bits)); + if (used_bits.vx_flags & reserved_bits.vx_flags || + used_bits.vx_vni & reserved_bits.vx_vni) { + __be64 ub_be64, rb_be64; + + memcpy(&ub_be64, &used_bits, sizeof(ub_be64)); + memcpy(&rb_be64, &reserved_bits, sizeof(rb_be64)); + + NL_SET_ERR_MSG_ATTR_FMT(extack, + data[IFLA_VXLAN_RESERVED_BITS], + "Used bits %#018llx cannot overlap reserved bits %#018llx", + be64_to_cpu(ub_be64), + be64_to_cpu(rb_be64)); + return -EINVAL; + } + + conf->reserved_bits = reserved_bits; + } else { + /* For backwards compatibility, only allow reserved fields to be + * used by VXLAN extensions if explicitly requested. + */ + conf->reserved_bits = (struct vxlanhdr) { + .vx_flags = ~used_bits.vx_flags, + .vx_vni = ~used_bits.vx_vni, + }; + } + if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) { err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_NOPARTIAL, VXLAN_F_REMCSUM_NOPARTIAL, changelink, @@ -4506,6 +4538,8 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(0) + /* IFLA_VXLAN_GPE */ nla_total_size(0) + /* IFLA_VXLAN_REMCSUM_NOPARTIAL */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_VNIFILTER */ + /* IFLA_VXLAN_RESERVED_BITS */ + nla_total_size(sizeof(struct vxlanhdr)) + 0; } @@ -4608,6 +4642,11 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) !!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))) goto nla_put_failure; + if (nla_put(skb, IFLA_VXLAN_RESERVED_BITS, + sizeof(vxlan->cfg.reserved_bits), + &vxlan->cfg.reserved_bits)) + goto nla_put_failure; + return 0; nla_put_failure: diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2575e0cd9b48..77730c340c8f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1394,6 +1394,7 @@ enum { IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ IFLA_VXLAN_LOCALBYPASS, IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */ + IFLA_VXLAN_RESERVED_BITS, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) From 8653eb21d68c6882ce5716b04379431817310b85 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:40:58 +0100 Subject: [PATCH 0276/1386] selftests: net: lib: Rename ip_link_master() to ip_link_set_master() Let's have a verb in that function name to make it clearer what's going on. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/fbf7c53a429b340b9cff5831280ea8c305a224f9.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/fdb_notify.sh | 6 +++--- tools/testing/selftests/net/lib.sh | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh index c03151e7791c..c159230c9b62 100755 --- a/tools/testing/selftests/net/fdb_notify.sh +++ b/tools/testing/selftests/net/fdb_notify.sh @@ -49,7 +49,7 @@ test_dup_vxlan_self() { ip_link_add br up type bridge vlan_filtering 1 ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_master vx br + ip_link_set_master vx br do_test_dup add "vxlan" dev vx self dst 192.0.2.1 do_test_dup del "vxlan" dev vx self dst 192.0.2.1 @@ -59,7 +59,7 @@ test_dup_vxlan_master() { ip_link_add br up type bridge vlan_filtering 1 ip_link_add vx up type vxlan id 2000 dstport 4789 - ip_link_master vx br + ip_link_set_master vx br do_test_dup add "vxlan master" dev vx master do_test_dup del "vxlan master" dev vx master @@ -79,7 +79,7 @@ test_dup_macvlan_master() ip_link_add br up type bridge vlan_filtering 1 ip_link_add dd up type dummy ip_link_add mv up link dd type macvlan mode passthru - ip_link_master mv br + ip_link_set_master mv br do_test_dup add "macvlan master" dev mv self do_test_dup del "macvlan master" dev mv self diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 8994fec1c38f..5ea6537acd2b 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -451,7 +451,7 @@ ip_link_add() defer ip link del dev "$name" } -ip_link_master() +ip_link_set_master() { local member=$1; shift local master=$1; shift From d76ccb2ec368c8a44f64839140cd253c19f6a79a Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:40:59 +0100 Subject: [PATCH 0277/1386] selftests: net: lib: Add several autodefer helpers Add ip_link_set_addr(), ip_link_set_up(), ip_addr_add() and ip_route_add() to the suite of helpers that automatically schedule a corresponding cleanup. When setting a new MAC, one needs to remember the old address first. Move mac_get() from forwarding/ to that end. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/add6bcbe30828fd01363266df20c338cf13aaf25.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/lib.sh | 7 ---- tools/testing/selftests/net/lib.sh | 39 +++++++++++++++++++ 2 files changed, 39 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 7337f398f9cc..1fd40bada694 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -932,13 +932,6 @@ packets_rate() echo $(((t1 - t0) / interval)) } -mac_get() -{ - local if_name=$1 - - ip -j link show dev $if_name | jq -r '.[]["address"]' -} - ether_addr_to_u64() { local addr="$1" diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 5ea6537acd2b..2cd5c743b2d9 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -435,6 +435,13 @@ xfail_on_veth() fi } +mac_get() +{ + local if_name=$1 + + ip -j link show dev $if_name | jq -r '.[]["address"]' +} + kill_process() { local pid=$1; shift @@ -459,3 +466,35 @@ ip_link_set_master() ip link set dev "$member" master "$master" defer ip link set dev "$member" nomaster } + +ip_link_set_addr() +{ + local name=$1; shift + local addr=$1; shift + + local old_addr=$(mac_get "$name") + ip link set dev "$name" address "$addr" + defer ip link set dev "$name" address "$old_addr" +} + +ip_link_set_up() +{ + local name=$1; shift + + ip link set dev "$name" up + defer ip link set dev "$name" down +} + +ip_addr_add() +{ + local name=$1; shift + + ip addr add dev "$name" "$@" + defer ip addr del dev "$name" "$@" +} + +ip_route_add() +{ + ip route add "$@" + defer ip route del "$@" +} From d84b5dccf3ebdeeabef910d1c19b931c84f67884 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:41:00 +0100 Subject: [PATCH 0278/1386] selftests: forwarding: Add a selftest for the new reserved_bits UAPI Run VXLAN packets through a gateway. Flip individual bits of the packet and/or reserved bits of the gateway, and check that the gateway treats the packets as expected. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/388bef3c30ebc887d4e64cd86a362e2df2f2d2e1.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- .../testing/selftests/net/forwarding/Makefile | 1 + .../net/forwarding/vxlan_reserved.sh | 352 ++++++++++++++++++ 2 files changed, 353 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/vxlan_reserved.sh diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 7d885cff8d79..00bde7b6f39e 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -105,6 +105,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_bridge_1q_port_8472_ipv6.sh \ vxlan_bridge_1q_port_8472.sh \ vxlan_bridge_1q.sh \ + vxlan_reserved.sh \ vxlan_symmetric_ipv6.sh \ vxlan_symmetric.sh diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh new file mode 100755 index 000000000000..46c31794b91b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh @@ -0,0 +1,352 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/28 | +# +----|---------------+ +# | +# +----|--------------------------------+ +# | SW | | +# | +--|------------------------------+ | +# | | + $swp1 BR1 (802.1d) | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 192.0.2.17 | | +# | | id 1000 dstport $VXPORT | | +# | +---------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +--|----------------------------------+ +# | +# +--|----------------------------------+ +# | | | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | +# | VRP2 (vrf) | +# +-------------------------------------+ + +: ${VXPORT:=4789} +: ${ALL_TESTS:=" + default_test + plain_test + reserved_0_test + reserved_10_test + reserved_31_test + reserved_56_test + reserved_63_test + "} + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + defer simple_if_fini $h1 192.0.2.1/28 + + tc qdisc add dev $h1 clsact + defer tc qdisc del dev $h1 clsact + + tc filter add dev $h1 ingress pref 77 \ + prot ip flower skip_hw ip_proto icmp action drop + defer tc filter del dev $h1 ingress pref 77 +} + +switch_create() +{ + ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + ip_link_set_addr br1 $(mac_get $swp1) + ip_link_set_up br1 + + ip_link_set_up $rp1 + ip_addr_add $rp1 192.0.2.17/28 + ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18 + + ip_link_set_master $swp1 br1 + ip_link_set_up $swp1 +} + +vrp2_create() +{ + simple_if_init $rp2 192.0.2.18/28 + defer simple_if_fini $rp2 192.0.2.18/28 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + vrf_prepare + defer vrf_cleanup + + forwarding_enable + defer forwarding_restore + + h1_create + switch_create + + vrp2_create +} + +vxlan_header_bytes() +{ + local vni=$1; shift + local -a extra_bits=("$@") + local -a bits + local i + + for ((i=0; i < 64; i++)); do + bits[i]=0 + done + + # Bit 4 is the I flag and is always on. + bits[4]=1 + + for i in ${extra_bits[@]}; do + bits[i]=1 + done + + # Bits 32..55 carry the VNI + local mask=0x800000 + for ((i=0; i < 24; i++)); do + bits[$((i + 32))]=$(((vni & mask) != 0)) + ((mask >>= 1)) + done + + local bytes + for ((i=0; i < 8; i++)); do + local byte=0 + local j + for ((j=0; j < 8; j++)); do + local bit=${bits[8 * i + j]} + ((byte += bit << (7 - j))) + done + bytes+=$(printf %02x $byte): + done + + echo ${bytes%:} +} + +neg_bytes() +{ + local bytes=$1; shift + + local -A neg=([0]=f [1]=e [2]=d [3]=c [4]=b [5]=a [6]=9 [7]=8 + [8]=7 [9]=6 [a]=5 [b]=4 [c]=3 [d]=2 [e]=1 [f]=0 [:]=:) + local out + local i + + for ((i=0; i < ${#bytes}; i++)); do + local c=${bytes:$i:1} + out+=${neg[$c]} + done + echo $out +} + +vxlan_ping_do() +{ + local count=$1; shift + local dev=$1; shift + local next_hop_mac=$1; shift + local dest_ip=$1; shift + local dest_mac=$1; shift + local vni=$1; shift + local reserved_bits=$1; shift + + local vxlan_header=$(vxlan_header_bytes $vni $reserved_bits) + + $MZ $dev -c $count -d 100msec -q \ + -b $next_hop_mac -B $dest_ip \ + -t udp sp=23456,dp=$VXPORT,p=$(: + )"$vxlan_header:"$( : VXLAN + )"$dest_mac:"$( : ETH daddr + )"00:11:22:33:44:55:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:54:"$( : IP total length + )"99:83:"$( : IP identification + )"40:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"01:"$( : IP proto + )"00:00:"$( : IP header csum + )"$(ipv4_to_bytes 192.0.2.3):"$( : IP saddr + )"$(ipv4_to_bytes 192.0.2.1):"$( : IP daddr + )"08:"$( : ICMP type + )"00:"$( : ICMP code + )"8b:f2:"$( : ICMP csum + )"1f:6a:"$( : ICMP request identifier + )"00:01:"$( : ICMP request seq. number + )"4f:ff:c5:5b:00:00:00:00:"$( : ICMP payload + )"6d:74:0b:00:00:00:00:00:"$( : + )"10:11:12:13:14:15:16:17:"$( : + )"18:19:1a:1b:1c:1d:1e:1f:"$( : + )"20:21:22:23:24:25:26:27:"$( : + )"28:29:2a:2b:2c:2d:2e:2f:"$( : + )"30:31:32:33:34:35:36:37" +} + +vxlan_device_add() +{ + ip_link_add vx1 up type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 "$@" + ip_link_set_master vx1 br1 +} + +vxlan_all_reserved_bits() +{ + local i + + for ((i=0; i < 64; i++)); do + if ((i == 4 || i >= 32 && i < 56)); then + continue + fi + echo $i + done +} + +vxlan_ping_vanilla() +{ + vxlan_ping_do 10 $rp2 $(mac_get $rp1) 192.0.2.17 $(mac_get $h1) 1000 +} + +vxlan_ping_reserved() +{ + for bit in $(vxlan_all_reserved_bits); do + vxlan_ping_do 1 $rp2 $(mac_get $rp1) \ + 192.0.2.17 $(mac_get $h1) 1000 "$bit" + ((n++)) + done +} + +vxlan_ping_test() +{ + local what=$1; shift + local get_stat=$1; shift + local expect=$1; shift + + RET=0 + + local t0=$($get_stat) + + "$@" + check_err $? "Failure when running $@" + + local t1=$($get_stat) + local delta=$((t1 - t0)) + + ((expect == delta)) + check_err $? "Expected to capture $expect packets, got $delta." + + log_test "$what" +} + +__default_test_do() +{ + local n_allowed_bits=$1; shift + local what=$1; shift + + vxlan_ping_test "$what: clean packets" \ + "tc_rule_stats_get $h1 77 ingress" \ + 10 vxlan_ping_vanilla + + local t0=$(link_stats_get vx1 rx errors) + vxlan_ping_test "$what: mangled packets" \ + "tc_rule_stats_get $h1 77 ingress" \ + $n_allowed_bits vxlan_ping_reserved + local t1=$(link_stats_get vx1 rx errors) + + RET=0 + local expect=$((39 - n_allowed_bits)) + local delta=$((t1 - t0)) + ((expect == delta)) + check_err $? "Expected $expect error packets, got $delta." + log_test "$what: drops reported" +} + +default_test_do() +{ + vxlan_device_add + __default_test_do 0 "Default" +} + +default_test() +{ + in_defer_scope \ + default_test_do +} + +plain_test_do() +{ + vxlan_device_add reserved_bits 0xf7ffffff000000ff + __default_test_do 0 "reserved_bits 0xf7ffffff000000ff" +} + +plain_test() +{ + in_defer_scope \ + plain_test_do +} + +reserved_test() +{ + local bit=$1; shift + + local allowed_bytes=$(vxlan_header_bytes 0xffffff $bit) + local reserved_bytes=$(neg_bytes $allowed_bytes) + local reserved_bits=${reserved_bytes//:/} + + vxlan_device_add reserved_bits 0x$reserved_bits + __default_test_do 1 "reserved_bits 0x$reserved_bits" +} + +reserved_0_test() +{ + in_defer_scope \ + reserved_test 0 +} + +reserved_10_test() +{ + in_defer_scope \ + reserved_test 10 +} + +reserved_31_test() +{ + in_defer_scope \ + reserved_test 31 +} + +reserved_56_test() +{ + in_defer_scope \ + reserved_test 56 +} + +reserved_63_test() +{ + in_defer_scope \ + reserved_test 63 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS From 8a241ef9b9b86afc087e5b3e4a95cb60f9ee796c Mon Sep 17 00:00:00 2001 From: Shinas Rasheed Date: Thu, 5 Dec 2024 22:41:34 -0800 Subject: [PATCH 0279/1386] octeon_ep: add ndo ops for VFs in PF driver These APIs are needed to support applications that use netlink to get VF information from a PF driver. Signed-off-by: Shinas Rasheed Link: https://patch.msgid.link/20241206064135.2331790-1-srasheed@marvell.com Signed-off-by: Jakub Kicinski --- .../ethernet/marvell/octeon_ep/octep_main.c | 39 +++++++++++++++++++ .../ethernet/marvell/octeon_ep/octep_main.h | 1 + .../marvell/octeon_ep/octep_pfvf_mbox.c | 23 ++++++++++- .../marvell/octeon_ep/octep_pfvf_mbox.h | 6 ++- 4 files changed, 65 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 549436efc204..3a9825883d79 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -1137,6 +1137,43 @@ static int octep_set_features(struct net_device *dev, netdev_features_t features return err; } +static int octep_get_vf_config(struct net_device *dev, int vf, + struct ifla_vf_info *ivi) +{ + struct octep_device *oct = netdev_priv(dev); + + ivi->vf = vf; + ether_addr_copy(ivi->mac, oct->vf_info[vf].mac_addr); + ivi->spoofchk = true; + ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE; + ivi->trusted = false; + + return 0; +} + +static int octep_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct octep_device *oct = netdev_priv(dev); + int err; + + if (!is_valid_ether_addr(mac)) { + dev_err(&oct->pdev->dev, "Invalid MAC Address %pM\n", mac); + return -EADDRNOTAVAIL; + } + + dev_dbg(&oct->pdev->dev, "set vf-%d mac to %pM\n", vf, mac); + ether_addr_copy(oct->vf_info[vf].mac_addr, mac); + oct->vf_info[vf].flags |= OCTEON_PFVF_FLAG_MAC_SET_BY_PF; + + err = octep_ctrl_net_set_mac_addr(oct, vf, mac, true); + if (err) + dev_err(&oct->pdev->dev, + "Set VF%d MAC address failed via host control Mbox\n", + vf); + + return err; +} + static const struct net_device_ops octep_netdev_ops = { .ndo_open = octep_open, .ndo_stop = octep_stop, @@ -1146,6 +1183,8 @@ static const struct net_device_ops octep_netdev_ops = { .ndo_set_mac_address = octep_set_mac, .ndo_change_mtu = octep_change_mtu, .ndo_set_features = octep_set_features, + .ndo_get_vf_config = octep_get_vf_config, + .ndo_set_vf_mac = octep_set_vf_mac }; /** diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h index fee59e0e0138..3b56916af468 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h @@ -220,6 +220,7 @@ struct octep_iface_link_info { /* The Octeon VF device specific info data structure.*/ struct octep_pfvf_info { u8 mac_addr[ETH_ALEN]; + u32 flags; u32 mbox_version; }; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c index e6eb98d70f3c..ebecdd29f3bd 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c @@ -156,12 +156,23 @@ static void octep_pfvf_set_mac_addr(struct octep_device *oct, u32 vf_id, { int err; + if (oct->vf_info[vf_id].flags & OCTEON_PFVF_FLAG_MAC_SET_BY_PF) { + dev_err(&oct->pdev->dev, + "VF%d attempted to override administrative set MAC address\n", + vf_id); + rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; + return; + } + err = octep_ctrl_net_set_mac_addr(oct, vf_id, cmd.s_set_mac.mac_addr, true); if (err) { rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; - dev_err(&oct->pdev->dev, "Set VF MAC address failed via host control Mbox\n"); + dev_err(&oct->pdev->dev, "Set VF%d MAC address failed via host control Mbox\n", + vf_id); return; } + + ether_addr_copy(oct->vf_info[vf_id].mac_addr, cmd.s_set_mac.mac_addr); rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; } @@ -171,10 +182,18 @@ static void octep_pfvf_get_mac_addr(struct octep_device *oct, u32 vf_id, { int err; + if (oct->vf_info[vf_id].flags & OCTEON_PFVF_FLAG_MAC_SET_BY_PF) { + dev_dbg(&oct->pdev->dev, "VF%d MAC address set by PF\n", vf_id); + ether_addr_copy(rsp->s_set_mac.mac_addr, + oct->vf_info[vf_id].mac_addr); + rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; + return; + } err = octep_ctrl_net_get_mac_addr(oct, vf_id, rsp->s_set_mac.mac_addr); if (err) { rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; - dev_err(&oct->pdev->dev, "Get VF MAC address failed via host control Mbox\n"); + dev_err(&oct->pdev->dev, "Get VF%d MAC address failed via host control Mbox\n", + vf_id); return; } rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.h b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.h index 0dc6eead292a..386a095a99bc 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.h @@ -8,8 +8,6 @@ #ifndef _OCTEP_PFVF_MBOX_H_ #define _OCTEP_PFVF_MBOX_H_ -/* VF flags */ -#define OCTEON_PFVF_FLAG_MAC_SET_BY_PF BIT_ULL(0) /* PF has set VF MAC address */ #define OCTEON_SDP_16K_HW_FRS 16380UL #define OCTEON_SDP_64K_HW_FRS 65531UL @@ -23,6 +21,10 @@ enum octep_pfvf_mbox_version { #define OCTEP_PFVF_MBOX_VERSION_CURRENT OCTEP_PFVF_MBOX_VERSION_V2 +/* VF flags */ +/* PF has set VF MAC address */ +#define OCTEON_PFVF_FLAG_MAC_SET_BY_PF BIT(0) + enum octep_pfvf_mbox_opcode { OCTEP_PFVF_MBOX_CMD_VERSION, OCTEP_PFVF_MBOX_CMD_SET_MTU, From 070927427d82debf5797e60c96165f2666c0bc42 Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Sat, 7 Dec 2024 00:21:39 +0500 Subject: [PATCH 0280/1386] net: renesas: rswitch: do not deinit disabled ports In rswitch_ether_port_init_all(), only enabled ports are initialized. Then, rswitch_ether_port_deinit_all() shall also only deinitialize enabled ports. Signed-off-by: Nikita Yushchenko Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20241206192140.1714-1-nikita.yoush@cogentembedded.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rswitch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 8d18dae4d8fb..3ba4a605f7cf 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1527,7 +1527,7 @@ static void rswitch_ether_port_deinit_all(struct rswitch_private *priv) { unsigned int i; - for (i = 0; i < RSWITCH_NUM_PORTS; i++) { + rswitch_for_each_enabled_port(priv, i) { phy_exit(priv->rdev[i]->serdes); rswitch_ether_port_deinit_one(priv->rdev[i]); } From 32fd46f5b69e9a2e1206d576359e533e5b7c4694 Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Sat, 7 Dec 2024 00:21:40 +0500 Subject: [PATCH 0281/1386] net: renesas: rswitch: remove speed from gwca structure This field is set but never used. GWCA is rswitch CPU interface module which connects rswitch to the host over AXI bus. Speed of the switch ports is not anyhow related to GWCA operation. Signed-off-by: Nikita Yushchenko Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20241206192140.1714-2-nikita.yoush@cogentembedded.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rswitch.c | 3 --- drivers/net/ethernet/renesas/rswitch.h | 1 - 2 files changed, 4 deletions(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 3ba4a605f7cf..b754cc96e268 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1901,9 +1901,6 @@ static int rswitch_device_alloc(struct rswitch_private *priv, unsigned int index if (err < 0) goto out_get_params; - if (rdev->priv->gwca.speed < rdev->etha->speed) - rdev->priv->gwca.speed = rdev->etha->speed; - err = rswitch_rxdmac_alloc(ndev); if (err < 0) goto out_rxdmac; diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h index 72e3ff596d31..303883369b94 100644 --- a/drivers/net/ethernet/renesas/rswitch.h +++ b/drivers/net/ethernet/renesas/rswitch.h @@ -993,7 +993,6 @@ struct rswitch_gwca { DECLARE_BITMAP(used, RSWITCH_MAX_NUM_QUEUES); u32 tx_irq_bits[RSWITCH_NUM_IRQ_REGS]; u32 rx_irq_bits[RSWITCH_NUM_IRQ_REGS]; - int speed; }; #define NUM_QUEUES_PER_NDEV 2 From 31cdd8418234e70043abd26894b57eb201489cba Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:42:58 +0100 Subject: [PATCH 0282/1386] net: stmmac: Fix CSR divider comment The comment in declaration of STMMAC_CSR_250_300M incorrectly describes the constant as '/* MDC = clk_scr_i/122 */' but the DWC Ether QOS Handbook version 5.20a says it is CSR clock/124. Signed-off-by: Jan Petrous (OSS) Reviewed-by: Jacob Keller Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-1-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index d79ff252cfdc..75cbfb576358 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -33,7 +33,7 @@ #define STMMAC_CSR_20_35M 0x2 /* MDC = clk_scr_i/16 */ #define STMMAC_CSR_35_60M 0x3 /* MDC = clk_scr_i/26 */ #define STMMAC_CSR_150_250M 0x4 /* MDC = clk_scr_i/102 */ -#define STMMAC_CSR_250_300M 0x5 /* MDC = clk_scr_i/122 */ +#define STMMAC_CSR_250_300M 0x5 /* MDC = clk_scr_i/124 */ /* MTL algorithms identifiers */ #define MTL_TX_ALGORITHM_WRR 0x0 From c8fab05d021dfc04401102f9fa1de07fc8f75d8d Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:42:59 +0100 Subject: [PATCH 0283/1386] net: stmmac: Extend CSR calc support Add support for CSR clock range up to 800 MHz. Reviewed-by: Jacob Keller Reviewed-by: Russell King (Oracle) Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-2-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 2 ++ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++++ include/linux/stmmac.h | 2 ++ 3 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 1367fa5c9b8e..70d601f45481 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -257,6 +257,8 @@ struct stmmac_safety_stats { #define CSR_F_150M 150000000 #define CSR_F_250M 250000000 #define CSR_F_300M 300000000 +#define CSR_F_500M 500000000 +#define CSR_F_800M 800000000 #define MAC_CSR_H_FRQ_MASK 0x20 diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 9b262cdad60b..3cb7ad6ccc4e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -325,6 +325,10 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv) priv->clk_csr = STMMAC_CSR_150_250M; else if ((clk_rate >= CSR_F_250M) && (clk_rate <= CSR_F_300M)) priv->clk_csr = STMMAC_CSR_250_300M; + else if ((clk_rate >= CSR_F_300M) && (clk_rate < CSR_F_500M)) + priv->clk_csr = STMMAC_CSR_300_500M; + else if ((clk_rate >= CSR_F_500M) && (clk_rate < CSR_F_800M)) + priv->clk_csr = STMMAC_CSR_500_800M; } if (priv->plat->flags & STMMAC_FLAG_HAS_SUN8I) { diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 75cbfb576358..865d0fe26f98 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -34,6 +34,8 @@ #define STMMAC_CSR_35_60M 0x3 /* MDC = clk_scr_i/26 */ #define STMMAC_CSR_150_250M 0x4 /* MDC = clk_scr_i/102 */ #define STMMAC_CSR_250_300M 0x5 /* MDC = clk_scr_i/124 */ +#define STMMAC_CSR_300_500M 0x6 /* MDC = clk_scr_i/204 */ +#define STMMAC_CSR_500_800M 0x7 /* MDC = clk_scr_i/324 */ /* MTL algorithms identifiers */ #define MTL_TX_ALGORITHM_WRR 0x0 From cb09f61a9ab84369c62f2ef7f8a2b797f596f6d1 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:43:00 +0100 Subject: [PATCH 0284/1386] net: stmmac: Fix clock rate variables size The clock API clk_get_rate() returns unsigned long value. Expand affected members of stmmac platform data and convert the stmmac_clk_csr_set() and dwmac4_core_init() methods to defining the unsigned long clk_rate local variables. Reviewed-by: Andrew Lunn Reviewed-by: Serge Semin Reviewed-by: Russell King (Oracle) Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-3-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 2 +- include/linux/stmmac.h | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 901a3c1959fa..2a5b38723635 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -777,7 +777,7 @@ static void ethqos_ptp_clk_freq_config(struct stmmac_priv *priv) netdev_err(priv->dev, "Failed to max out clk_ptp_ref: %d\n", err); plat_dat->clk_ptp_rate = clk_get_rate(plat_dat->clk_ptp_ref); - netdev_dbg(priv->dev, "PTP rate %d\n", plat_dat->clk_ptp_rate); + netdev_dbg(priv->dev, "PTP rate %lu\n", plat_dat->clk_ptp_rate); } static int qcom_ethqos_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index c25781874aa7..c36f90a782c5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -27,7 +27,7 @@ static void dwmac4_core_init(struct mac_device_info *hw, struct stmmac_priv *priv = netdev_priv(dev); void __iomem *ioaddr = hw->pcsr; u32 value = readl(ioaddr + GMAC_CONFIG); - u32 clk_rate; + unsigned long clk_rate; value |= GMAC_CORE_INIT; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3cb7ad6ccc4e..d45fd7a3acd5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -301,7 +301,7 @@ static void stmmac_global_err(struct stmmac_priv *priv) */ static void stmmac_clk_csr_set(struct stmmac_priv *priv) { - u32 clk_rate; + unsigned long clk_rate; clk_rate = clk_get_rate(priv->plat->stmmac_clk); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 3ac32444e492..06e07e6e180b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -640,7 +640,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) dev_info(&pdev->dev, "PTP uses main clock\n"); } else { plat->clk_ptp_rate = clk_get_rate(plat->clk_ptp_ref); - dev_dbg(&pdev->dev, "PTP rate %d\n", plat->clk_ptp_rate); + dev_dbg(&pdev->dev, "PTP rate %lu\n", plat->clk_ptp_rate); } plat->stmmac_rst = devm_reset_control_get_optional(&pdev->dev, diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 865d0fe26f98..c9878a612e53 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -252,8 +252,8 @@ struct plat_stmmacenet_data { struct clk *stmmac_clk; struct clk *pclk; struct clk *clk_ptp_ref; - unsigned int clk_ptp_rate; - unsigned int clk_ref_rate; + unsigned long clk_ptp_rate; + unsigned long clk_ref_rate; unsigned int mult_fact_100ns; s32 ptp_max_adj; u32 cdc_error_adj; @@ -265,7 +265,7 @@ struct plat_stmmacenet_data { int mac_port_sel_speed; int has_xgmac; u8 vlan_fail_q; - unsigned int eee_usecs_rate; + unsigned long eee_usecs_rate; struct pci_dev *pdev; int int_snapshot_num; int msi_mac_vec; From 386aa60abdb600a4e5ad818e6dba171685942e54 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:43:01 +0100 Subject: [PATCH 0285/1386] net: phy: Add helper for mapping RGMII link speed to clock rate The RGMII interface supports three data rates: 10/100 Mbps and 1 Gbps. These speeds correspond to clock frequencies of 2.5/25 MHz and 125 MHz, respectively. Many Ethernet drivers, including glues in stmmac, follow a similar pattern of converting RGMII speed to clock frequency. To simplify code, define the helper rgmii_clock(speed) to convert connection speed to clock frequency. Suggested-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-4-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/linux/phy.h b/include/linux/phy.h index bb157136351e..e597a32cc787 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -298,6 +298,29 @@ static inline const char *phy_modes(phy_interface_t interface) } } +/** + * rgmii_clock - map link speed to the clock rate + * @speed: link speed value + * + * Description: maps RGMII supported link speeds + * into the clock rates. + * + * Returns: clock rate or negative errno + */ +static inline long rgmii_clock(int speed) +{ + switch (speed) { + case SPEED_10: + return 2500000; + case SPEED_100: + return 25000000; + case SPEED_1000: + return 125000000; + default: + return -EINVAL; + } +} + #define PHY_INIT_TIMEOUT 100000 #define PHY_FORCE_TIMEOUT 10 From 37b66c483e4c8a72cd1fd22f8ced05cc40f9e128 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:43:02 +0100 Subject: [PATCH 0286/1386] net: dwmac-dwc-qos-eth: Use helper rgmii_clock Utilize a new helper function rgmii_clock(). Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-5-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 83290e707df5..bd4eb187f8c6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -181,24 +181,19 @@ static void dwc_qos_remove(struct platform_device *pdev) static void tegra_eqos_fix_speed(void *priv, unsigned int speed, unsigned int mode) { struct tegra_eqos *eqos = priv; - unsigned long rate = 125000000; bool needs_calibration = false; + long rate = 125000000; u32 value; int err; switch (speed) { case SPEED_1000: - needs_calibration = true; - rate = 125000000; - break; - case SPEED_100: needs_calibration = true; - rate = 25000000; - break; + fallthrough; case SPEED_10: - rate = 2500000; + rate = rgmii_clock(speed); break; default: From 839b75ea4d940f810650a2ce11c91d94c5f01aa3 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:43:03 +0100 Subject: [PATCH 0287/1386] net: dwmac-imx: Use helper rgmii_clock Utilize a new helper function rgmii_clock(). Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-6-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 641f3cd019a3..43e0fbba4f77 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -186,7 +186,7 @@ static void imx_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mod { struct plat_stmmacenet_data *plat_dat; struct imx_priv_data *dwmac = priv; - unsigned long rate; + long rate; int err; plat_dat = dwmac->plat_dat; @@ -196,17 +196,8 @@ static void imx_dwmac_fix_speed(void *priv, unsigned int speed, unsigned int mod (plat_dat->mac_interface == PHY_INTERFACE_MODE_MII)) return; - switch (speed) { - case SPEED_1000: - rate = 125000000; - break; - case SPEED_100: - rate = 25000000; - break; - case SPEED_10: - rate = 2500000; - break; - default: + rate = rgmii_clock(speed); + if (rate < 0) { dev_err(dwmac->dev, "invalid speed %u\n", speed); return; } From 8470bfc835154a80774e5ab0e46969f196c0dba1 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:43:04 +0100 Subject: [PATCH 0288/1386] net: dwmac-intel-plat: Use helper rgmii_clock Utilize a new helper function rgmii_clock(). When in, remove dead code in kmb_eth_fix_mac_speed(). Reviewed-by: Andrew Lunn Signed-off-by: Jan Petrous (OSS) Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-7-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- .../stmicro/stmmac/dwmac-intel-plat.c | 22 ++++--------------- 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c index d94f0a150e93..ddee6154d40b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel-plat.c @@ -31,27 +31,13 @@ struct intel_dwmac_data { static void kmb_eth_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) { struct intel_dwmac *dwmac = priv; - unsigned long rate; + long rate; int ret; - rate = clk_get_rate(dwmac->tx_clk); - - switch (speed) { - case SPEED_1000: - rate = 125000000; - break; - - case SPEED_100: - rate = 25000000; - break; - - case SPEED_10: - rate = 2500000; - break; - - default: + rate = rgmii_clock(speed); + if (rate < 0) { dev_err(dwmac->dev, "Invalid speed\n"); - break; + return; } ret = clk_set_rate(dwmac->tx_clk, rate); From 30b4a9b5c335b32a8c8234662b180876a2db173e Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:43:05 +0100 Subject: [PATCH 0289/1386] net: dwmac-rk: Use helper rgmii_clock Utilize a new helper function rgmii_clock(). Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-8-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-rk.c | 30 ++++--------------- 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 8cb374668b74..a4dc89e23a68 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1079,20 +1079,11 @@ static void rk3568_set_gmac_speed(struct rk_priv_data *bsp_priv, int speed) { struct clk *clk_mac_speed = bsp_priv->clks[RK_CLK_MAC_SPEED].clk; struct device *dev = &bsp_priv->pdev->dev; - unsigned long rate; + long rate; int ret; - switch (speed) { - case 10: - rate = 2500000; - break; - case 100: - rate = 25000000; - break; - case 1000: - rate = 125000000; - break; - default: + rate = rgmii_clock(speed); + if (rate < 0) { dev_err(dev, "unknown speed value for GMAC speed=%d", speed); return; } @@ -1540,20 +1531,11 @@ static void rv1126_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) { struct clk *clk_mac_speed = bsp_priv->clks[RK_CLK_MAC_SPEED].clk; struct device *dev = &bsp_priv->pdev->dev; - unsigned long rate; + long rate; int ret; - switch (speed) { - case 10: - rate = 2500000; - break; - case 100: - rate = 25000000; - break; - case 1000: - rate = 125000000; - break; - default: + rate = rgmii_clock(speed); + if (rate < 0) { dev_err(dev, "unknown speed value for RGMII speed=%d", speed); return; } From b561d717a799241d260a7e5667e8d35de6fac874 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:43:06 +0100 Subject: [PATCH 0290/1386] net: dwmac-starfive: Use helper rgmii_clock Utilize a new helper function rgmii_clock(). Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Reviewed-by: Emil Renner Berthing Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-9-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- .../ethernet/stmicro/stmmac/dwmac-starfive.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c index 421666279dd3..0a0a363d3730 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c @@ -34,24 +34,13 @@ struct starfive_dwmac { static void starfive_dwmac_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) { struct starfive_dwmac *dwmac = priv; - unsigned long rate; + long rate; int err; - rate = clk_get_rate(dwmac->clk_tx); - - switch (speed) { - case SPEED_1000: - rate = 125000000; - break; - case SPEED_100: - rate = 25000000; - break; - case SPEED_10: - rate = 2500000; - break; - default: + rate = rgmii_clock(speed); + if (rate < 0) { dev_err(dwmac->dev, "invalid speed %u\n", speed); - break; + return; } err = clk_set_rate(dwmac->clk_tx, rate); From 04207d28f46870df113112a4afc42458495837d6 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:43:07 +0100 Subject: [PATCH 0291/1386] net: macb: Use helper rgmii_clock Utilize a new helper function rgmii_clock(). Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Signed-off-by: Jan Petrous (OSS) Acked-by: Nicolas Ferre Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-10-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/cadence/macb_main.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index daa416fb1724..640f500f989d 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -530,19 +530,9 @@ static void macb_set_tx_clk(struct macb *bp, int speed) if (bp->phy_interface == PHY_INTERFACE_MODE_MII) return; - switch (speed) { - case SPEED_10: - rate = 2500000; - break; - case SPEED_100: - rate = 25000000; - break; - case SPEED_1000: - rate = 125000000; - break; - default: + rate = rgmii_clock(speed); + if (rate < 0) return; - } rate_rounded = clk_round_rate(bp->tx_clk, rate); if (rate_rounded < 0) From fd59bca4d5eaba6cadf78e74b5e72fd8852a7529 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:43:08 +0100 Subject: [PATCH 0292/1386] net: xgene_enet: Use helper rgmii_clock Utilize a new helper function rgmii_clock(). Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-11-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/apm/xgene/xgene_enet_hw.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index e641dbbea1e2..b854b6b42d77 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -421,18 +421,12 @@ static void xgene_enet_configure_clock(struct xgene_enet_pdata *pdata) if (dev->of_node) { struct clk *parent = clk_get_parent(pdata->clk); + long rate = rgmii_clock(pdata->phy_speed); - switch (pdata->phy_speed) { - case SPEED_10: - clk_set_rate(parent, 2500000); - break; - case SPEED_100: - clk_set_rate(parent, 25000000); - break; - default: - clk_set_rate(parent, 125000000); - break; - } + if (rate < 0) + rate = 125000000; + + clk_set_rate(parent, rate); } #ifdef CONFIG_ACPI else { From 1ead5777550717f77fa70d6342fc467bebc18519 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:43:09 +0100 Subject: [PATCH 0293/1386] net: dwmac-sti: Use helper rgmii_clock Utilize a new helper function rgmii_clock(). Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-12-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/dwmac-sti.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c index a6ff02d905a9..eabc4da9e1a9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c @@ -21,10 +21,7 @@ #include "stmmac_platform.h" -#define DWMAC_125MHZ 125000000 #define DWMAC_50MHZ 50000000 -#define DWMAC_25MHZ 25000000 -#define DWMAC_2_5MHZ 2500000 #define IS_PHY_IF_MODE_RGMII(iface) (iface == PHY_INTERFACE_MODE_RGMII || \ iface == PHY_INTERFACE_MODE_RGMII_ID || \ @@ -140,7 +137,7 @@ static void stih4xx_fix_retime_src(void *priv, u32 spd, unsigned int mode) struct sti_dwmac *dwmac = priv; u32 src = dwmac->tx_retime_src; u32 reg = dwmac->ctrl_reg; - u32 freq = 0; + long freq = 0; if (dwmac->interface == PHY_INTERFACE_MODE_MII) { src = TX_RETIME_SRC_TXCLK; @@ -153,19 +150,14 @@ static void stih4xx_fix_retime_src(void *priv, u32 spd, unsigned int mode) } } else if (IS_PHY_IF_MODE_RGMII(dwmac->interface)) { /* On GiGa clk source can be either ext or from clkgen */ - if (spd == SPEED_1000) { - freq = DWMAC_125MHZ; - } else { + freq = rgmii_clock(spd); + + if (spd != SPEED_1000 && freq > 0) /* Switch to clkgen for these speeds */ src = TX_RETIME_SRC_CLKGEN; - if (spd == SPEED_100) - freq = DWMAC_25MHZ; - else if (spd == SPEED_10) - freq = DWMAC_2_5MHZ; - } } - if (src == TX_RETIME_SRC_CLKGEN && freq) + if (src == TX_RETIME_SRC_CLKGEN && freq > 0) clk_set_rate(dwmac->clk, freq); regmap_update_bits(dwmac->regmap, reg, STIH4XX_RETIME_SRC_MASK, From 91f10e5895209c855edc0f993410f5d82b54e049 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:43:10 +0100 Subject: [PATCH 0294/1386] dt-bindings: net: Add DT bindings for DWMAC on NXP S32G/R SoCs Add basic description for DWMAC ethernet IP on NXP S32G2xx, S32G3xx and S32R45 automotive series SoCs. Reviewed-by: Rob Herring (Arm) Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-13-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- .../bindings/net/nxp,s32-dwmac.yaml | 105 ++++++++++++++++++ .../devicetree/bindings/net/snps,dwmac.yaml | 1 + 2 files changed, 106 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml diff --git a/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml b/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml new file mode 100644 index 000000000000..2b8b74c5feec --- /dev/null +++ b/Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml @@ -0,0 +1,105 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright 2021-2024 NXP +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/nxp,s32-dwmac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP S32G2xx/S32G3xx/S32R45 GMAC ethernet controller + +maintainers: + - Jan Petrous (OSS) + +description: + This device is a Synopsys DWC IP, integrated on NXP S32G/R SoCs. + The SoC series S32G2xx and S32G3xx feature one DWMAC instance, + the SoC S32R45 has two instances. The devices can use RGMII/RMII/MII + interface over Pinctrl device or the output can be routed + to the embedded SerDes for SGMII connectivity. + +properties: + compatible: + oneOf: + - const: nxp,s32g2-dwmac + - items: + - enum: + - nxp,s32g3-dwmac + - nxp,s32r45-dwmac + - const: nxp,s32g2-dwmac + + reg: + items: + - description: Main GMAC registers + - description: GMAC PHY mode control register + + interrupts: + maxItems: 1 + + interrupt-names: + const: macirq + + clocks: + items: + - description: Main GMAC clock + - description: Transmit clock + - description: Receive clock + - description: PTP reference clock + + clock-names: + items: + - const: stmmaceth + - const: tx + - const: rx + - const: ptp_ref + +required: + - clocks + - clock-names + +allOf: + - $ref: snps,dwmac.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + bus { + #address-cells = <2>; + #size-cells = <2>; + + ethernet@4033c000 { + compatible = "nxp,s32g2-dwmac"; + reg = <0x0 0x4033c000 0x0 0x2000>, /* gmac IP */ + <0x0 0x4007c004 0x0 0x4>; /* GMAC_0_CTRL_STS */ + interrupt-parent = <&gic>; + interrupts = ; + interrupt-names = "macirq"; + snps,mtl-rx-config = <&mtl_rx_setup>; + snps,mtl-tx-config = <&mtl_tx_setup>; + clocks = <&clks 24>, <&clks 17>, <&clks 16>, <&clks 15>; + clock-names = "stmmaceth", "tx", "rx", "ptp_ref"; + phy-mode = "rgmii-id"; + phy-handle = <&phy0>; + + mtl_rx_setup: rx-queues-config { + snps,rx-queues-to-use = <5>; + }; + + mtl_tx_setup: tx-queues-config { + snps,tx-queues-to-use = <5>; + }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + compatible = "snps,dwmac-mdio"; + + phy0: ethernet-phy@0 { + reg = <0>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index eb1f3ae41ab9..91e75eb3f329 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -67,6 +67,7 @@ properties: - ingenic,x2000-mac - loongson,ls2k-dwmac - loongson,ls7a-dwmac + - nxp,s32g2-dwmac - qcom,qcs404-ethqos - qcom,sa8775p-ethqos - qcom,sc8280xp-ethqos From cd197ac5d661ee2ab36f1578164e276ad947506c Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:43:11 +0100 Subject: [PATCH 0295/1386] net: stmmac: dwmac-s32: add basic NXP S32G/S32R glue driver NXP S32G2xx/S32G3xx and S32R45 are automotive grade SoCs that integrate one or two Synopsys DWMAC 5.10/5.20 IPs. The basic driver supports only RGMII interface. Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-14-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/Kconfig | 12 ++ drivers/net/ethernet/stmicro/stmmac/Makefile | 1 + .../net/ethernet/stmicro/stmmac/dwmac-s32.c | 202 ++++++++++++++++++ 3 files changed, 215 insertions(+) create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 6658536a4e17..4cc85a36a1ab 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -154,6 +154,18 @@ config DWMAC_RZN1 the stmmac device driver. This support can make use of a custom MII converter PCS device. +config DWMAC_S32 + tristate "NXP S32G/S32R GMAC support" + default ARCH_S32 + depends on OF && (ARCH_S32 || COMPILE_TEST) + help + Support for ethernet controller on NXP S32CC SOCs. + + This selects NXP SoC glue layer support for the stmmac + device driver. This driver is used for the S32CC series + SOCs GMAC ethernet controller, ie. S32G2xx, S32G3xx and + S32R45. + config DWMAC_SOCFPGA tristate "SOCFPGA dwmac support" default ARCH_INTEL_SOCFPGA diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 2389fd261344..b26f0e79c2b3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_DWMAC_MESON) += dwmac-meson.o dwmac-meson8b.o obj-$(CONFIG_DWMAC_QCOM_ETHQOS) += dwmac-qcom-ethqos.o obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rk.o obj-$(CONFIG_DWMAC_RZN1) += dwmac-rzn1.o +obj-$(CONFIG_DWMAC_S32) += dwmac-s32.o obj-$(CONFIG_DWMAC_SOCFPGA) += dwmac-altr-socfpga.o obj-$(CONFIG_DWMAC_STARFIVE) += dwmac-starfive.o obj-$(CONFIG_DWMAC_STI) += dwmac-sti.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c new file mode 100644 index 000000000000..9cc0e5817416 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NXP S32G/R GMAC glue layer + * + * Copyright 2019-2024 NXP + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "stmmac_platform.h" + +#define GMAC_INTF_RATE_125M 125000000 /* 125MHz */ + +/* SoC PHY interface control register */ +#define PHY_INTF_SEL_MII 0x00 +#define PHY_INTF_SEL_SGMII 0x01 +#define PHY_INTF_SEL_RGMII 0x02 +#define PHY_INTF_SEL_RMII 0x08 + +struct s32_priv_data { + void __iomem *ioaddr; + void __iomem *ctrl_sts; + struct device *dev; + phy_interface_t *intf_mode; + struct clk *tx_clk; + struct clk *rx_clk; +}; + +static int s32_gmac_write_phy_intf_select(struct s32_priv_data *gmac) +{ + writel(PHY_INTF_SEL_RGMII, gmac->ctrl_sts); + + dev_dbg(gmac->dev, "PHY mode set to %s\n", phy_modes(*gmac->intf_mode)); + + return 0; +} + +static int s32_gmac_init(struct platform_device *pdev, void *priv) +{ + struct s32_priv_data *gmac = priv; + int ret; + + /* Set initial TX interface clock */ + ret = clk_prepare_enable(gmac->tx_clk); + if (ret) { + dev_err(&pdev->dev, "Can't enable tx clock\n"); + return ret; + } + ret = clk_set_rate(gmac->tx_clk, GMAC_INTF_RATE_125M); + if (ret) { + dev_err(&pdev->dev, "Can't set tx clock\n"); + goto err_tx_disable; + } + + /* Set initial RX interface clock */ + ret = clk_prepare_enable(gmac->rx_clk); + if (ret) { + dev_err(&pdev->dev, "Can't enable rx clock\n"); + goto err_tx_disable; + } + ret = clk_set_rate(gmac->rx_clk, GMAC_INTF_RATE_125M); + if (ret) { + dev_err(&pdev->dev, "Can't set rx clock\n"); + goto err_txrx_disable; + } + + /* Set interface mode */ + ret = s32_gmac_write_phy_intf_select(gmac); + if (ret) { + dev_err(&pdev->dev, "Can't set PHY interface mode\n"); + goto err_txrx_disable; + } + + return 0; + +err_txrx_disable: + clk_disable_unprepare(gmac->rx_clk); +err_tx_disable: + clk_disable_unprepare(gmac->tx_clk); + return ret; +} + +static void s32_gmac_exit(struct platform_device *pdev, void *priv) +{ + struct s32_priv_data *gmac = priv; + + clk_disable_unprepare(gmac->tx_clk); + clk_disable_unprepare(gmac->rx_clk); +} + +static void s32_fix_mac_speed(void *priv, unsigned int speed, unsigned int mode) +{ + struct s32_priv_data *gmac = priv; + long tx_clk_rate; + int ret; + + tx_clk_rate = rgmii_clock(speed); + if (tx_clk_rate < 0) { + dev_err(gmac->dev, "Unsupported/Invalid speed: %d\n", speed); + return; + } + + dev_dbg(gmac->dev, "Set tx clock to %ld Hz\n", tx_clk_rate); + ret = clk_set_rate(gmac->tx_clk, tx_clk_rate); + if (ret) + dev_err(gmac->dev, "Can't set tx clock\n"); +} + +static int s32_dwmac_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat; + struct device *dev = &pdev->dev; + struct stmmac_resources res; + struct s32_priv_data *gmac; + int ret; + + gmac = devm_kzalloc(&pdev->dev, sizeof(*gmac), GFP_KERNEL); + if (!gmac) + return -ENOMEM; + + gmac->dev = &pdev->dev; + + ret = stmmac_get_platform_resources(pdev, &res); + if (ret) + return dev_err_probe(dev, ret, + "Failed to get platform resources\n"); + + plat = devm_stmmac_probe_config_dt(pdev, res.mac); + if (IS_ERR(plat)) + return dev_err_probe(dev, PTR_ERR(plat), + "dt configuration failed\n"); + + /* PHY interface mode control reg */ + gmac->ctrl_sts = devm_platform_get_and_ioremap_resource(pdev, 1, NULL); + if (IS_ERR(gmac->ctrl_sts)) + return dev_err_probe(dev, PTR_ERR(gmac->ctrl_sts), + "S32CC config region is missing\n"); + + /* tx clock */ + gmac->tx_clk = devm_clk_get(&pdev->dev, "tx"); + if (IS_ERR(gmac->tx_clk)) + return dev_err_probe(dev, PTR_ERR(gmac->tx_clk), + "tx clock not found\n"); + + /* rx clock */ + gmac->rx_clk = devm_clk_get(&pdev->dev, "rx"); + if (IS_ERR(gmac->rx_clk)) + return dev_err_probe(dev, PTR_ERR(gmac->rx_clk), + "rx clock not found\n"); + + gmac->intf_mode = &plat->phy_interface; + gmac->ioaddr = res.addr; + + /* S32CC core feature set */ + plat->has_gmac4 = true; + plat->pmt = 1; + plat->flags |= STMMAC_FLAG_SPH_DISABLE; + plat->rx_fifo_size = 20480; + plat->tx_fifo_size = 20480; + + plat->init = s32_gmac_init; + plat->exit = s32_gmac_exit; + plat->fix_mac_speed = s32_fix_mac_speed; + + plat->bsp_priv = gmac; + + return stmmac_pltfr_probe(pdev, plat, &res); +} + +static const struct of_device_id s32_dwmac_match[] = { + { .compatible = "nxp,s32g2-dwmac" }, + { } +}; +MODULE_DEVICE_TABLE(of, s32_dwmac_match); + +static struct platform_driver s32_dwmac_driver = { + .probe = s32_dwmac_probe, + .remove = stmmac_pltfr_remove, + .driver = { + .name = "s32-dwmac", + .pm = &stmmac_pltfr_pm_ops, + .of_match_table = s32_dwmac_match, + }, +}; +module_platform_driver(s32_dwmac_driver); + +MODULE_AUTHOR("Jan Petrous (OSS) "); +MODULE_DESCRIPTION("NXP S32G/R common chassis GMAC driver"); +MODULE_LICENSE("GPL"); + From 6bc6234cbd5e9f7a4d8a20aa4d5f0c891e099649 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:43:12 +0100 Subject: [PATCH 0296/1386] MAINTAINERS: Add Jan Petrous as the NXP S32G/R DWMAC driver maintainer Add myself as NXP S32G/R DWMAC Ethernet driver maintainer. Reviewed-by: Andrew Lunn Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-15-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2d75560e64ac..af35519be320 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2836,6 +2836,13 @@ S: Maintained F: arch/arm64/boot/dts/freescale/s32g*.dts* F: drivers/pinctrl/nxp/ +ARM/NXP S32G/S32R DWMAC ETHERNET DRIVER +M: Jan Petrous +L: NXP S32 Linux Team +S: Maintained +F: Documentation/devicetree/bindings/net/nxp,s32-dwmac.yaml +F: drivers/net/ethernet/stmicro/stmmac/dwmac-s32.c + ARM/Orion SoC/Technologic Systems TS-78xx platform support M: Alexander Clouter L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) From 5595e3613ea768b81f600d37ea6880983339f21a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 7 Dec 2024 15:18:44 -0600 Subject: [PATCH 0297/1386] dsa: mv88e6xxx: Move available stats into info structure Different families of switches have different statistics available. This information is current hard coded into functions, however this information will also soon be needed when getting statistics from the RMU. Move it into the info structure. Signed-off-by: Andrew Lunn Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241207-v6-13-rc1-net-next-mv88e6xxx-stats-refactor-v1-1-b9960f839846@lunn.ch Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 42 +++++++++++++++++++++++++++++--- drivers/net/dsa/mv88e6xxx/chip.h | 1 + 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 3a792f79270d..794653b53bb5 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1289,7 +1289,7 @@ static size_t mv88e6095_stats_get_stat(struct mv88e6xxx_chip *chip, int port, const struct mv88e6xxx_hw_stat *stat, uint64_t *data) { - if (!(stat->type & (STATS_TYPE_BANK0 | STATS_TYPE_PORT))) + if (!(stat->type & chip->info->stats_type)) return 0; *data = _mv88e6xxx_get_ethtool_stat(chip, stat, port, 0, @@ -1301,7 +1301,7 @@ static size_t mv88e6250_stats_get_stat(struct mv88e6xxx_chip *chip, int port, const struct mv88e6xxx_hw_stat *stat, uint64_t *data) { - if (!(stat->type & STATS_TYPE_BANK0)) + if (!(stat->type & chip->info->stats_type)) return 0; *data = _mv88e6xxx_get_ethtool_stat(chip, stat, port, 0, @@ -1313,7 +1313,7 @@ static size_t mv88e6320_stats_get_stat(struct mv88e6xxx_chip *chip, int port, const struct mv88e6xxx_hw_stat *stat, uint64_t *data) { - if (!(stat->type & (STATS_TYPE_BANK0 | STATS_TYPE_BANK1))) + if (!(stat->type & chip->info->stats_type)) return 0; *data = _mv88e6xxx_get_ethtool_stat(chip, stat, port, @@ -1326,7 +1326,7 @@ static size_t mv88e6390_stats_get_stat(struct mv88e6xxx_chip *chip, int port, const struct mv88e6xxx_hw_stat *stat, uint64_t *data) { - if (!(stat->type & (STATS_TYPE_BANK0 | STATS_TYPE_BANK1))) + if (!(stat->type & chip->info->stats_type)) return 0; *data = _mv88e6xxx_get_ethtool_stat(chip, stat, port, @@ -5645,6 +5645,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 9, .g2_irqs = 5, + .stats_type = STATS_TYPE_BANK0, .atu_move_port_mask = 0xf, .dual_chip = true, .ops = &mv88e6250_ops, @@ -5665,6 +5666,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 9, .g2_irqs = 5, + .stats_type = STATS_TYPE_BANK0, .atu_move_port_mask = 0xf, .dual_chip = true, .ops = &mv88e6250_ops, @@ -5687,6 +5689,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 8, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT, .atu_move_port_mask = 0xf, .pvt = true, .multi_chip = true, @@ -5708,6 +5711,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 8, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT, .atu_move_port_mask = 0xf, .multi_chip = true, .ops = &mv88e6095_ops, @@ -5730,6 +5734,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 8, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT, .atu_move_port_mask = 0xf, .pvt = true, .multi_chip = true, @@ -5754,6 +5759,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 9, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT, .atu_move_port_mask = 0xf, .pvt = true, .multi_chip = true, @@ -5776,6 +5782,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 9, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT, .atu_move_port_mask = 0xf, .multi_chip = true, .ops = &mv88e6131_ops, @@ -5800,6 +5807,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .atu_move_port_mask = 0x1f, .g1_irqs = 9, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1, .pvt = true, .multi_chip = true, .edsa_support = MV88E6XXX_EDSA_SUPPORTED, @@ -5823,6 +5831,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 9, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT, .atu_move_port_mask = 0xf, .pvt = true, .multi_chip = true, @@ -5848,6 +5857,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 9, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT, .atu_move_port_mask = 0xf, .pvt = true, .multi_chip = true, @@ -5872,6 +5882,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 9, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1, .atu_move_port_mask = 0xf, .pvt = true, .multi_chip = true, @@ -5897,6 +5908,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 9, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT, .atu_move_port_mask = 0xf, .pvt = true, .multi_chip = true, @@ -5921,6 +5933,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 9, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT, .atu_move_port_mask = 0xf, .pvt = true, .multi_chip = true, @@ -5946,6 +5959,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 9, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT, .atu_move_port_mask = 0xf, .pvt = true, .multi_chip = true, @@ -5968,6 +5982,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .global2_addr = 0x1c, .age_time_coeff = 15000, .g1_irqs = 8, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT, .atu_move_port_mask = 0xf, .multi_chip = true, .edsa_support = MV88E6XXX_EDSA_SUPPORTED, @@ -5992,6 +6007,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 3750, .g1_irqs = 9, .g2_irqs = 14, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1, .pvt = true, .multi_chip = true, .atu_move_port_mask = 0x1f, @@ -6016,6 +6032,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 3750, .g1_irqs = 9, .g2_irqs = 14, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1, .atu_move_port_mask = 0x1f, .pvt = true, .multi_chip = true, @@ -6039,6 +6056,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 3750, .g1_irqs = 9, .g2_irqs = 14, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1, .atu_move_port_mask = 0x1f, .pvt = true, .multi_chip = true, @@ -6063,6 +6081,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 3750, .g1_irqs = 10, .g2_irqs = 14, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1, .atu_move_port_mask = 0x1f, .pvt = true, .multi_chip = true, @@ -6087,6 +6106,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 3750, .g1_irqs = 10, .g2_irqs = 14, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1, .atu_move_port_mask = 0x1f, .pvt = true, .multi_chip = true, @@ -6114,6 +6134,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 9, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0, .atu_move_port_mask = 0xf, .dual_chip = true, .ptp_support = true, @@ -6138,6 +6159,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 9, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT, .atu_move_port_mask = 0xf, .pvt = true, .multi_chip = true, @@ -6161,6 +6183,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 9, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0, .atu_move_port_mask = 0xf, .dual_chip = true, .ptp_support = true, @@ -6184,6 +6207,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 3750, .g1_irqs = 9, .g2_irqs = 14, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1, .atu_move_port_mask = 0x1f, .pvt = true, .multi_chip = true, @@ -6208,6 +6232,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 8, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1, .atu_move_port_mask = 0xf, .pvt = true, .multi_chip = true, @@ -6233,6 +6258,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 8, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1, .atu_move_port_mask = 0xf, .multi_chip = true, .edsa_support = MV88E6XXX_EDSA_SUPPORTED, @@ -6259,6 +6285,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .atu_move_port_mask = 0x1f, .g1_irqs = 9, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1, .pvt = true, .multi_chip = true, .edsa_support = MV88E6XXX_EDSA_SUPPORTED, @@ -6283,6 +6310,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 9, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT, .atu_move_port_mask = 0xf, .pvt = true, .multi_chip = true, @@ -6307,6 +6335,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 9, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT, .atu_move_port_mask = 0xf, .pvt = true, .multi_chip = true, @@ -6332,6 +6361,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 15000, .g1_irqs = 9, .g2_irqs = 10, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_PORT, .atu_move_port_mask = 0xf, .pvt = true, .multi_chip = true, @@ -6359,6 +6389,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 3750, .g1_irqs = 10, .g2_irqs = 14, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1, .atu_move_port_mask = 0x1f, .pvt = true, .multi_chip = true, @@ -6383,6 +6414,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 3750, .g1_irqs = 9, .g2_irqs = 14, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1, .atu_move_port_mask = 0x1f, .pvt = true, .multi_chip = true, @@ -6408,6 +6440,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 3750, .g1_irqs = 9, .g2_irqs = 14, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1, .atu_move_port_mask = 0x1f, .pvt = true, .multi_chip = true, @@ -6433,6 +6466,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .age_time_coeff = 3750, .g1_irqs = 10, .g2_irqs = 14, + .stats_type = STATS_TYPE_BANK0 | STATS_TYPE_BANK1, .atu_move_port_mask = 0x1f, .pvt = true, .multi_chip = true, diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 9fe8e8a7856b..86bf113c9bfa 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -144,6 +144,7 @@ struct mv88e6xxx_info { unsigned int age_time_coeff; unsigned int g1_irqs; unsigned int g2_irqs; + int stats_type; bool pvt; /* Mark certain ports as invalid. This is required for example for the From 9a4eef6bf2bee375e94a3647cc11906ed3ee58f7 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 7 Dec 2024 15:18:45 -0600 Subject: [PATCH 0298/1386] dsa: mv88e6xxx: Centralise common statistics check With moving information about available statistics into the info structure, the test becomes identical. Consolidate them into a single test. Signed-off-by: Andrew Lunn Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241207-v6-13-rc1-net-next-mv88e6xxx-stats-refactor-v1-2-b9960f839846@lunn.ch Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 794653b53bb5..34708c739b04 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -1289,9 +1289,6 @@ static size_t mv88e6095_stats_get_stat(struct mv88e6xxx_chip *chip, int port, const struct mv88e6xxx_hw_stat *stat, uint64_t *data) { - if (!(stat->type & chip->info->stats_type)) - return 0; - *data = _mv88e6xxx_get_ethtool_stat(chip, stat, port, 0, MV88E6XXX_G1_STATS_OP_HIST_RX); return 1; @@ -1301,9 +1298,6 @@ static size_t mv88e6250_stats_get_stat(struct mv88e6xxx_chip *chip, int port, const struct mv88e6xxx_hw_stat *stat, uint64_t *data) { - if (!(stat->type & chip->info->stats_type)) - return 0; - *data = _mv88e6xxx_get_ethtool_stat(chip, stat, port, 0, MV88E6XXX_G1_STATS_OP_HIST_RX); return 1; @@ -1313,9 +1307,6 @@ static size_t mv88e6320_stats_get_stat(struct mv88e6xxx_chip *chip, int port, const struct mv88e6xxx_hw_stat *stat, uint64_t *data) { - if (!(stat->type & chip->info->stats_type)) - return 0; - *data = _mv88e6xxx_get_ethtool_stat(chip, stat, port, MV88E6XXX_G1_STATS_OP_BANK_1_BIT_9, MV88E6XXX_G1_STATS_OP_HIST_RX); @@ -1326,9 +1317,6 @@ static size_t mv88e6390_stats_get_stat(struct mv88e6xxx_chip *chip, int port, const struct mv88e6xxx_hw_stat *stat, uint64_t *data) { - if (!(stat->type & chip->info->stats_type)) - return 0; - *data = _mv88e6xxx_get_ethtool_stat(chip, stat, port, MV88E6XXX_G1_STATS_OP_BANK_1_BIT_10, 0); @@ -1341,6 +1329,9 @@ static size_t mv88e6xxx_stats_get_stat(struct mv88e6xxx_chip *chip, int port, { int ret = 0; + if (!(stat->type & chip->info->stats_type)) + return 0; + if (chip->info->ops->stats_get_stat) { mv88e6xxx_reg_lock(chip); ret = chip->info->ops->stats_get_stat(chip, port, stat, data); From 46afe345ff181e3b72830cb12f1e11cc837cc58e Mon Sep 17 00:00:00 2001 From: Furong Xu <0x1207@gmail.com> Date: Sun, 8 Dec 2024 15:02:02 +0800 Subject: [PATCH 0299/1386] net: stmmac: Relocate extern declarations in common.h and hwif.h The extern declarations should be in a header file that corresponds to their definition, move these extern declarations to its header file. Some of them have nowhere to go, so move them to hwif.h since they are referenced in hwif.c only. dwmac100_* dwmac1000_* dwmac4_* dwmac410_* dwmac510_* stay in hwif.h, otherwise you will be flooded with name conflicts from dwmac100.h, dwmac1000.h and dwmac4.h if hwif.c try to #include these .h files. Compile tested only. No functional change intended. Suggested-by: Russell King (Oracle) Signed-off-by: Furong Xu <0x1207@gmail.com> Link: https://patch.msgid.link/20241208070202.203931-1-0x1207@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 14 -------------- .../net/ethernet/stmicro/stmmac/dwmac4_descs.h | 3 +++ drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h | 5 +++++ drivers/net/ethernet/stmicro/stmmac/hwif.c | 2 ++ drivers/net/ethernet/stmicro/stmmac/hwif.h | 16 +++++++++------- drivers/net/ethernet/stmicro/stmmac/mmc.h | 3 +++ drivers/net/ethernet/stmicro/stmmac/stmmac_est.h | 2 ++ drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h | 3 +++ 8 files changed, 27 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 70d601f45481..e25db747a81a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -545,18 +545,8 @@ struct dma_features { #define STMMAC_VLAN_INSERT 0x2 #define STMMAC_VLAN_REPLACE 0x3 -extern const struct stmmac_desc_ops enh_desc_ops; -extern const struct stmmac_desc_ops ndesc_ops; - struct mac_device_info; -extern const struct stmmac_hwtimestamp stmmac_ptp; -extern const struct stmmac_hwtimestamp dwmac1000_ptp; -extern const struct stmmac_mode_ops dwmac4_ring_mode_ops; - -extern const struct ptp_clock_info stmmac_ptp_clock_ops; -extern const struct ptp_clock_info dwmac1000_ptp_clock_ops; - struct mac_link { u32 caps; u32 speed_mask; @@ -643,8 +633,4 @@ void stmmac_dwmac4_set_mac(void __iomem *ioaddr, bool enable); void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr); -extern const struct stmmac_mode_ops ring_mode_ops; -extern const struct stmmac_mode_ops chain_mode_ops; -extern const struct stmmac_desc_ops dwmac4_desc_ops; - #endif /* __COMMON_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h index 1ce6f43d545a..806555976496 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.h @@ -144,4 +144,7 @@ /* TDS3 use for both format (read and write back) */ #define RDES3_OWN BIT(31) +extern const struct stmmac_mode_ops dwmac4_ring_mode_ops; +extern const struct stmmac_desc_ops dwmac4_desc_ops; + #endif /* __DWMAC4_DESCS_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index a04a79003692..20027d3c25a7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -493,4 +493,9 @@ #define XGMAC_RDES3_TSD BIT(6) #define XGMAC_RDES3_TSA BIT(4) +extern const struct stmmac_ops dwxgmac210_ops; +extern const struct stmmac_ops dwxlgmac2_ops; +extern const struct stmmac_dma_ops dwxgmac210_dma_ops; +extern const struct stmmac_desc_ops dwxgmac210_desc_ops; + #endif /* __STMMAC_DWXGMAC2_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index a72d336a8350..4bd79de2e222 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -9,6 +9,8 @@ #include "stmmac_fpe.h" #include "stmmac_ptp.h" #include "stmmac_est.h" +#include "dwmac4_descs.h" +#include "dwxgmac2.h" static u32 stmmac_get_id(struct stmmac_priv *priv, u32 id_reg) { diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index 64f8ed67dcc4..e428c82b7d31 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -665,6 +665,15 @@ struct stmmac_regs_off { u32 est_off; }; +extern const struct stmmac_desc_ops enh_desc_ops; +extern const struct stmmac_desc_ops ndesc_ops; + +extern const struct stmmac_hwtimestamp stmmac_ptp; +extern const struct stmmac_hwtimestamp dwmac1000_ptp; + +extern const struct stmmac_mode_ops ring_mode_ops; +extern const struct stmmac_mode_ops chain_mode_ops; + extern const struct stmmac_ops dwmac100_ops; extern const struct stmmac_dma_ops dwmac100_dma_ops; extern const struct stmmac_ops dwmac1000_ops; @@ -677,13 +686,6 @@ extern const struct stmmac_ops dwmac510_ops; extern const struct stmmac_tc_ops dwmac4_tc_ops; extern const struct stmmac_tc_ops dwmac510_tc_ops; extern const struct stmmac_tc_ops dwxgmac_tc_ops; -extern const struct stmmac_ops dwxgmac210_ops; -extern const struct stmmac_ops dwxlgmac2_ops; -extern const struct stmmac_dma_ops dwxgmac210_dma_ops; -extern const struct stmmac_desc_ops dwxgmac210_desc_ops; -extern const struct stmmac_mmc_ops dwmac_mmc_ops; -extern const struct stmmac_mmc_ops dwxgmac_mmc_ops; -extern const struct stmmac_est_ops dwmac510_est_ops; #define GMAC_VERSION 0x00000020 /* GMAC CORE Version */ #define GMAC4_VERSION 0x00000110 /* GMAC4+ CORE Version */ diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc.h b/drivers/net/ethernet/stmicro/stmmac/mmc.h index 5d1ea3e07459..1cba39fb2c44 100644 --- a/drivers/net/ethernet/stmicro/stmmac/mmc.h +++ b/drivers/net/ethernet/stmicro/stmmac/mmc.h @@ -139,4 +139,7 @@ struct stmmac_counters { unsigned int mmc_rx_fpe_fragment_cntr; }; +extern const struct stmmac_mmc_ops dwmac_mmc_ops; +extern const struct stmmac_mmc_ops dwxgmac_mmc_ops; + #endif /* __MMC_H__ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.h index 7a858c566e7e..d247fa383a6e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.h @@ -62,3 +62,5 @@ #define EST_SRWO BIT(0) #define EST_GCL_DATA 0x00000034 + +extern const struct stmmac_est_ops dwmac510_est_ops; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h index 4cc70480ce0f..3fe0e3a80e80 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h @@ -104,4 +104,7 @@ int dwmac1000_ptp_enable(struct ptp_clock_info *ptp, void dwmac1000_get_ptptime(void __iomem *ptpaddr, u64 *ptp_time); void dwmac1000_timestamp_interrupt(struct stmmac_priv *priv); +extern const struct ptp_clock_info stmmac_ptp_clock_ops; +extern const struct ptp_clock_info dwmac1000_ptp_clock_ops; + #endif /* __STMMAC_PTP_H__ */ From 33035977b464fc15fa1028606a05316f91f14a23 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 9 Dec 2024 00:49:55 +0100 Subject: [PATCH 0300/1386] net: pktgen: Use kthread_create_on_cpu() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the proper API instead of open coding it. Signed-off-by: Frederic Weisbecker Reviewed-by: Eric Dumazet Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241208234955.31910-1-frederic@kernel.org Signed-off-by: Jakub Kicinski --- net/core/pktgen.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 7e23cacbe66e..ee95dbb0539a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3883,17 +3883,14 @@ static int __net_init pktgen_create_thread(int cpu, struct pktgen_net *pn) list_add_tail(&t->th_list, &pn->pktgen_threads); init_completion(&t->start_done); - p = kthread_create_on_node(pktgen_thread_worker, - t, - cpu_to_node(cpu), - "kpktgend_%d", cpu); + p = kthread_create_on_cpu(pktgen_thread_worker, t, cpu, "kpktgend_%d"); if (IS_ERR(p)) { pr_err("kthread_create_on_node() failed for cpu %d\n", t->cpu); list_del(&t->th_list); kfree(t); return PTR_ERR(p); } - kthread_bind(p, cpu); + t->tsk = p; pe = proc_create_data(t->tsk->comm, 0600, pn->proc_dir, From 6bb6ab852c19442285c50874954da454ae60c6c3 Mon Sep 17 00:00:00 2001 From: Andrew Kreimer Date: Mon, 9 Dec 2024 14:47:30 +0200 Subject: [PATCH 0301/1386] net: hinic: Fix typo in dev_err message There is a typo in dev_err message: fliter -> filter. Fix it via codespell. Signed-off-by: Andrew Kreimer Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241209124804.9789-1-algonell@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/huawei/hinic/hinic_port.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c index f81a43d2cdfc..486fb0e20bef 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_port.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c @@ -469,7 +469,7 @@ int hinic_set_vlan_fliter(struct hinic_dev *nic_dev, u32 en) err = HINIC_MGMT_CMD_UNSUPPORTED; } else if (err || !out_size || vlan_filter.status) { dev_err(&pdev->dev, - "Failed to set vlan fliter, err: %d, status: 0x%x, out size: 0x%x\n", + "Failed to set vlan filter, err: %d, status: 0x%x, out size: 0x%x\n", err, vlan_filter.status, out_size); err = -EINVAL; } From 4eb0308d78d3891d7d691f719e262cf908bdcb35 Mon Sep 17 00:00:00 2001 From: Dimitri Fedrau Date: Mon, 9 Dec 2024 18:50:42 +0100 Subject: [PATCH 0302/1386] net: phy: dp83822: Replace DP83822_DEVADDR with MDIO_MMD_VEND2 Instead of using DP83822_DEVADDR which is locally defined use MDIO_MMD_VEND2. Signed-off-by: Dimitri Fedrau Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241209-dp83822-mdio-mmd-vend2-v1-1-4473c7284b94@liebherr.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/dp83822.c | 58 +++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index cf8b6d0bfaa9..25ee09c48027 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -22,8 +22,6 @@ #define DP83826C_PHY_ID 0x2000a130 #define DP83826NC_PHY_ID 0x2000a110 -#define DP83822_DEVADDR 0x1f - #define MII_DP83822_CTRL_2 0x0a #define MII_DP83822_PHYSTS 0x10 #define MII_DP83822_PHYSCR 0x11 @@ -159,14 +157,14 @@ static int dp83822_config_wol(struct phy_device *phydev, /* MAC addresses start with byte 5, but stored in mac[0]. * 822 PHYs store bytes 4|5, 2|3, 0|1 */ - phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_DA1, + phy_write_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_DA1, (mac[1] << 8) | mac[0]); - phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_DA2, + phy_write_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_DA2, (mac[3] << 8) | mac[2]); - phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_DA3, + phy_write_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_DA3, (mac[5] << 8) | mac[4]); - value = phy_read_mmd(phydev, DP83822_DEVADDR, + value = phy_read_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_CFG); if (wol->wolopts & WAKE_MAGIC) value |= DP83822_WOL_MAGIC_EN; @@ -174,13 +172,13 @@ static int dp83822_config_wol(struct phy_device *phydev, value &= ~DP83822_WOL_MAGIC_EN; if (wol->wolopts & WAKE_MAGICSECURE) { - phy_write_mmd(phydev, DP83822_DEVADDR, + phy_write_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RXSOP1, (wol->sopass[1] << 8) | wol->sopass[0]); - phy_write_mmd(phydev, DP83822_DEVADDR, + phy_write_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RXSOP2, (wol->sopass[3] << 8) | wol->sopass[2]); - phy_write_mmd(phydev, DP83822_DEVADDR, + phy_write_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RXSOP3, (wol->sopass[5] << 8) | wol->sopass[4]); value |= DP83822_WOL_SECURE_ON; @@ -194,10 +192,10 @@ static int dp83822_config_wol(struct phy_device *phydev, value |= DP83822_WOL_EN | DP83822_WOL_INDICATION_SEL | DP83822_WOL_CLR_INDICATION; - return phy_write_mmd(phydev, DP83822_DEVADDR, + return phy_write_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_CFG, value); } else { - return phy_clear_bits_mmd(phydev, DP83822_DEVADDR, + return phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_CFG, DP83822_WOL_EN | DP83822_WOL_MAGIC_EN | @@ -226,23 +224,23 @@ static void dp83822_get_wol(struct phy_device *phydev, wol->supported = (WAKE_MAGIC | WAKE_MAGICSECURE); wol->wolopts = 0; - value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG); + value = phy_read_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_CFG); if (value & DP83822_WOL_MAGIC_EN) wol->wolopts |= WAKE_MAGIC; if (value & DP83822_WOL_SECURE_ON) { - sopass_val = phy_read_mmd(phydev, DP83822_DEVADDR, + sopass_val = phy_read_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RXSOP1); wol->sopass[0] = (sopass_val & 0xff); wol->sopass[1] = (sopass_val >> 8); - sopass_val = phy_read_mmd(phydev, DP83822_DEVADDR, + sopass_val = phy_read_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RXSOP2); wol->sopass[2] = (sopass_val & 0xff); wol->sopass[3] = (sopass_val >> 8); - sopass_val = phy_read_mmd(phydev, DP83822_DEVADDR, + sopass_val = phy_read_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RXSOP3); wol->sopass[4] = (sopass_val & 0xff); wol->sopass[5] = (sopass_val >> 8); @@ -430,18 +428,18 @@ static int dp83822_config_init(struct phy_device *phydev) if (tx_int_delay <= 0) rgmii_delay |= DP83822_TX_CLK_SHIFT; - err = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR, + err = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RCSR, DP83822_RX_CLK_SHIFT | DP83822_TX_CLK_SHIFT, rgmii_delay); if (err) return err; - err = phy_set_bits_mmd(phydev, DP83822_DEVADDR, + err = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); if (err) return err; } else { - err = phy_clear_bits_mmd(phydev, DP83822_DEVADDR, + err = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RCSR, DP83822_RGMII_MODE_EN); if (err) @@ -496,7 +494,7 @@ static int dp83822_config_init(struct phy_device *phydev) return err; if (dp83822->fx_signal_det_low) { - err = phy_set_bits_mmd(phydev, DP83822_DEVADDR, + err = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_GENCFG, DP83822_SIG_DET_LOW); if (err) @@ -514,10 +512,10 @@ static int dp8382x_config_rmii_mode(struct phy_device *phydev) if (!device_property_read_string(dev, "ti,rmii-mode", &of_val)) { if (strcmp(of_val, "master") == 0) { - ret = phy_clear_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR, + ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RCSR, DP83822_RMII_MODE_SEL); } else if (strcmp(of_val, "slave") == 0) { - ret = phy_set_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR, + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RCSR, DP83822_RMII_MODE_SEL); } else { phydev_err(phydev, "Invalid value for ti,rmii-mode property (%s)\n", @@ -539,7 +537,7 @@ static int dp83826_config_init(struct phy_device *phydev) int ret; if (phydev->interface == PHY_INTERFACE_MODE_RMII) { - ret = phy_set_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR, + ret = phy_set_bits_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RCSR, DP83822_RMII_MODE_EN); if (ret) return ret; @@ -548,7 +546,7 @@ static int dp83826_config_init(struct phy_device *phydev) if (ret) return ret; } else { - ret = phy_clear_bits_mmd(phydev, DP83822_DEVADDR, MII_DP83822_RCSR, + ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_RCSR, DP83822_RMII_MODE_EN); if (ret) return ret; @@ -560,7 +558,7 @@ static int dp83826_config_init(struct phy_device *phydev) FIELD_GET(DP83826_CFG_DAC_MINUS_MDIX_5_TO_4, dp83822->cfg_dac_minus)); mask = DP83826_VOD_CFG1_MINUS_MDIX_MASK | DP83826_VOD_CFG1_MINUS_MDI_MASK; - ret = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83826_VOD_CFG1, mask, val); + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83826_VOD_CFG1, mask, val); if (ret) return ret; @@ -568,7 +566,7 @@ static int dp83826_config_init(struct phy_device *phydev) FIELD_GET(DP83826_CFG_DAC_MINUS_MDIX_3_TO_0, dp83822->cfg_dac_minus)); mask = DP83826_VOD_CFG2_MINUS_MDIX_MASK; - ret = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83826_VOD_CFG2, mask, val); + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83826_VOD_CFG2, mask, val); if (ret) return ret; } @@ -577,7 +575,7 @@ static int dp83826_config_init(struct phy_device *phydev) val = FIELD_PREP(DP83826_VOD_CFG2_PLUS_MDIX_MASK, dp83822->cfg_dac_plus) | FIELD_PREP(DP83826_VOD_CFG2_PLUS_MDI_MASK, dp83822->cfg_dac_plus); mask = DP83826_VOD_CFG2_PLUS_MDIX_MASK | DP83826_VOD_CFG2_PLUS_MDI_MASK; - ret = phy_modify_mmd(phydev, DP83822_DEVADDR, MII_DP83826_VOD_CFG2, mask, val); + ret = phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83826_VOD_CFG2, mask, val); if (ret) return ret; } @@ -673,7 +671,7 @@ static int dp83822_read_straps(struct phy_device *phydev) int fx_enabled, fx_sd_enable; int val; - val = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_SOR1); + val = phy_read_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_SOR1); if (val < 0) return val; @@ -748,7 +746,7 @@ static int dp83822_suspend(struct phy_device *phydev) { int value; - value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG); + value = phy_read_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_CFG); if (!(value & DP83822_WOL_EN)) genphy_suspend(phydev); @@ -762,9 +760,9 @@ static int dp83822_resume(struct phy_device *phydev) genphy_resume(phydev); - value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG); + value = phy_read_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_CFG); - phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG, value | + phy_write_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_WOL_CFG, value | DP83822_WOL_CLR_INDICATION); return 0; From be325f08c432ae5ac6d6594d163e1899cdf202df Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Dec 2024 10:07:45 +0000 Subject: [PATCH 0303/1386] rtnetlink: add ndo_fdb_dump_context rtnl_fdb_dump() and various ndo_fdb_dump() helpers share a hidden layout of cb->ctx. Before switching rtnl_fdb_dump() to for_each_netdev_dump() in the following patch, make this more explicit. Signed-off-by: Eric Dumazet Reviewed-by: Ido Schimmel Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241209100747.2269613-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- .../ethernet/freescale/dpaa2/dpaa2-switch.c | 3 ++- drivers/net/ethernet/mscc/ocelot_net.c | 3 ++- drivers/net/vxlan/vxlan_core.c | 5 ++-- include/linux/rtnetlink.h | 7 +++++ net/bridge/br_fdb.c | 3 ++- net/core/rtnetlink.c | 26 ++++++++++--------- net/dsa/user.c | 3 ++- 7 files changed, 32 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index a293b08f36d4..147a93bf9fa9 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -780,13 +780,14 @@ struct ethsw_dump_ctx { static int dpaa2_switch_fdb_dump_nl(struct fdb_dump_entry *entry, struct ethsw_dump_ctx *dump) { + struct ndo_fdb_dump_context *ctx = (void *)dump->cb->ctx; int is_dynamic = entry->type & DPSW_FDB_ENTRY_DINAMIC; u32 portid = NETLINK_CB(dump->cb->skb).portid; u32 seq = dump->cb->nlh->nlmsg_seq; struct nlmsghdr *nlh; struct ndmsg *ndm; - if (dump->idx < dump->cb->args[2]) + if (dump->idx < ctx->fdb_idx) goto skip; nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 558e03301aa8..8d48468cddd7 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -758,12 +758,13 @@ static int ocelot_port_fdb_do_dump(const unsigned char *addr, u16 vid, bool is_static, void *data) { struct ocelot_dump_ctx *dump = data; + struct ndo_fdb_dump_context *ctx = (void *)dump->cb->ctx; u32 portid = NETLINK_CB(dump->cb->skb).portid; u32 seq = dump->cb->nlh->nlmsg_seq; struct nlmsghdr *nlh; struct ndmsg *ndm; - if (dump->idx < dump->cb->args[2]) + if (dump->idx < ctx->fdb_idx) goto skip; nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 43cf672b7b9f..0c356e0a61ef 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1352,6 +1352,7 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int *idx) { + struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; struct vxlan_dev *vxlan = netdev_priv(dev); unsigned int h; int err = 0; @@ -1364,7 +1365,7 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct vxlan_rdst *rd; if (rcu_access_pointer(f->nh)) { - if (*idx < cb->args[2]) + if (*idx < ctx->fdb_idx) goto skip_nh; err = vxlan_fdb_info(skb, vxlan, f, NETLINK_CB(cb->skb).portid, @@ -1381,7 +1382,7 @@ skip_nh: } list_for_each_entry_rcu(rd, &f->remotes, list) { - if (*idx < cb->args[2]) + if (*idx < ctx->fdb_idx) goto skip; err = vxlan_fdb_info(skb, vxlan, f, diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 811ce44113f6..c43cffb014a7 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -178,6 +178,13 @@ void rtnetlink_init(void); void __rtnl_unlock(void); void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); +/* Shared by rtnl_fdb_dump() and various ndo_fdb_dump() helpers. */ +struct ndo_fdb_dump_context { + unsigned long s_h; + unsigned long s_idx; + unsigned long fdb_idx; +}; + extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 82bac2426631..902694c0ce64 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -955,6 +955,7 @@ int br_fdb_dump(struct sk_buff *skb, struct net_device *filter_dev, int *idx) { + struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; struct net_bridge *br = netdev_priv(dev); struct net_bridge_fdb_entry *f; int err = 0; @@ -970,7 +971,7 @@ int br_fdb_dump(struct sk_buff *skb, rcu_read_lock(); hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { - if (*idx < cb->args[2]) + if (*idx < ctx->fdb_idx) goto skip; if (filter_dev && (!f->dst || f->dst->dev != filter_dev)) { if (filter_dev != dev) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ab5f201bf0ab..453cc8bf18fb 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4762,15 +4762,16 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, int *idx, struct netdev_hw_addr_list *list) { + struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; struct netdev_hw_addr *ha; - int err; u32 portid, seq; + int err; portid = NETLINK_CB(cb->skb).portid; seq = cb->nlh->nlmsg_seq; list_for_each_entry(ha, &list->list, list) { - if (*idx < cb->args[2]) + if (*idx < ctx->fdb_idx) goto skip; err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0, @@ -4909,10 +4910,9 @@ static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh, static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct net_device *dev; - struct net_device *br_dev = NULL; - const struct net_device_ops *ops = NULL; - const struct net_device_ops *cops = NULL; + const struct net_device_ops *ops = NULL, *cops = NULL; + struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; + struct net_device *dev, *br_dev = NULL; struct net *net = sock_net(skb->sk); struct hlist_head *head; int brport_idx = 0; @@ -4922,6 +4922,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) int err = 0; int fidx = 0; + NL_ASSERT_CTX_FITS(struct ndo_fdb_dump_context); + if (cb->strict_check) err = valid_fdb_dump_strict(cb->nlh, &br_idx, &brport_idx, cb->extack); @@ -4939,8 +4941,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) ops = br_dev->netdev_ops; } - s_h = cb->args[0]; - s_idx = cb->args[1]; + s_h = ctx->s_h; + s_idx = ctx->s_idx; for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; @@ -4992,7 +4994,7 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) cops = NULL; /* reset fdb offset to 0 for rest of the interfaces */ - cb->args[2] = 0; + ctx->fdb_idx = 0; fidx = 0; cont: idx++; @@ -5000,9 +5002,9 @@ cont: } out: - cb->args[0] = h; - cb->args[1] = idx; - cb->args[2] = fidx; + ctx->s_h = h; + ctx->s_idx = idx; + ctx->fdb_idx = fidx; return skb->len; } diff --git a/net/dsa/user.c b/net/dsa/user.c index 06c30a9e29ff..c736c019e2af 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -515,12 +515,13 @@ dsa_user_port_fdb_do_dump(const unsigned char *addr, u16 vid, bool is_static, void *data) { struct dsa_user_dump_ctx *dump = data; + struct ndo_fdb_dump_context *ctx = (void *)dump->cb->ctx; u32 portid = NETLINK_CB(dump->cb->skb).portid; u32 seq = dump->cb->nlh->nlmsg_seq; struct nlmsghdr *nlh; struct ndmsg *ndm; - if (dump->idx < dump->cb->args[2]) + if (dump->idx < ctx->fdb_idx) goto skip; nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, From 53970a05f799087e2dd2005973609188504e7fcc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Dec 2024 10:07:46 +0000 Subject: [PATCH 0304/1386] rtnetlink: switch rtnl_fdb_dump() to for_each_netdev_dump() This is the last netdev iterator still using net->dev_index_head[]. Convert to modern for_each_netdev_dump() for better scalability, and use common patterns in our stack. Following patch in this series removes the pad field in struct ndo_fdb_dump_context. Signed-off-by: Eric Dumazet Reviewed-by: Ido Schimmel Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241209100747.2269613-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/rtnetlink.h | 4 +- net/core/rtnetlink.c | 100 +++++++++++++++----------------------- 2 files changed, 41 insertions(+), 63 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index c43cffb014a7..5546571c2553 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -180,8 +180,8 @@ void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); /* Shared by rtnl_fdb_dump() and various ndo_fdb_dump() helpers. */ struct ndo_fdb_dump_context { - unsigned long s_h; - unsigned long s_idx; + unsigned long ifindex; + unsigned long pad; unsigned long fdb_idx; }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 453cc8bf18fb..8fe252c298a2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4914,13 +4914,10 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; struct net_device *dev, *br_dev = NULL; struct net *net = sock_net(skb->sk); - struct hlist_head *head; int brport_idx = 0; int br_idx = 0; - int h, s_h; - int idx = 0, s_idx; - int err = 0; int fidx = 0; + int err; NL_ASSERT_CTX_FITS(struct ndo_fdb_dump_context); @@ -4941,69 +4938,50 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) ops = br_dev->netdev_ops; } - s_h = ctx->s_h; - s_idx = ctx->s_idx; + for_each_netdev_dump(net, dev, ctx->ifindex) { + if (brport_idx && (dev->ifindex != brport_idx)) + continue; - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - hlist_for_each_entry(dev, head, index_hlist) { - - if (brport_idx && (dev->ifindex != brport_idx)) + if (!br_idx) { /* user did not specify a specific bridge */ + if (netif_is_bridge_port(dev)) { + br_dev = netdev_master_upper_dev_get(dev); + cops = br_dev->netdev_ops; + } + } else { + if (dev != br_dev && + !netif_is_bridge_port(dev)) continue; - if (!br_idx) { /* user did not specify a specific bridge */ - if (netif_is_bridge_port(dev)) { - br_dev = netdev_master_upper_dev_get(dev); - cops = br_dev->netdev_ops; - } - } else { - if (dev != br_dev && - !netif_is_bridge_port(dev)) - continue; - - if (br_dev != netdev_master_upper_dev_get(dev) && - !netif_is_bridge_master(dev)) - continue; - cops = ops; - } - - if (idx < s_idx) - goto cont; - - if (netif_is_bridge_port(dev)) { - if (cops && cops->ndo_fdb_dump) { - err = cops->ndo_fdb_dump(skb, cb, - br_dev, dev, - &fidx); - if (err == -EMSGSIZE) - goto out; - } - } - - if (dev->netdev_ops->ndo_fdb_dump) - err = dev->netdev_ops->ndo_fdb_dump(skb, cb, - dev, NULL, - &fidx); - else - err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, - &fidx); - if (err == -EMSGSIZE) - goto out; - - cops = NULL; - - /* reset fdb offset to 0 for rest of the interfaces */ - ctx->fdb_idx = 0; - fidx = 0; -cont: - idx++; + if (br_dev != netdev_master_upper_dev_get(dev) && + !netif_is_bridge_master(dev)) + continue; + cops = ops; } + + if (netif_is_bridge_port(dev)) { + if (cops && cops->ndo_fdb_dump) { + err = cops->ndo_fdb_dump(skb, cb, br_dev, dev, + &fidx); + if (err == -EMSGSIZE) + break; + } + } + + if (dev->netdev_ops->ndo_fdb_dump) + err = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, + &fidx); + else + err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, &fidx); + if (err == -EMSGSIZE) + break; + + cops = NULL; + + /* reset fdb offset to 0 for rest of the interfaces */ + ctx->fdb_idx = 0; + fidx = 0; } -out: - ctx->s_h = h; - ctx->s_idx = idx; ctx->fdb_idx = fidx; return skb->len; From 53a6d8912372fc23ea82cc7a49eb59047aa0a650 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Dec 2024 10:07:47 +0000 Subject: [PATCH 0305/1386] rtnetlink: remove pad field in ndo_fdb_dump_context I chose to remove this field in a separate patch to ease potential bisection, in case one ndo_fdb_dump() is still using the old way (cb->args[2] instead of ctx->fdb_idx) Signed-off-by: Eric Dumazet Reviewed-by: Ido Schimmel Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241209100747.2269613-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/rtnetlink.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 5546571c2553..3b9d132cbc9e 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -181,7 +181,6 @@ void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); /* Shared by rtnl_fdb_dump() and various ndo_fdb_dump() helpers. */ struct ndo_fdb_dump_context { unsigned long ifindex; - unsigned long pad; unsigned long fdb_idx; }; From ce864c76ccd69470205f5cb22181bffe23563730 Mon Sep 17 00:00:00 2001 From: Philipp Stanner Date: Fri, 6 Dec 2024 20:57:13 +0100 Subject: [PATCH 0306/1386] net: wwan: t7xx: Replace deprecated PCI functions pcim_iomap_regions() and pcim_iomap_table() have been deprecated by the PCI subsystem. Replace them with pcim_iomap_region(). Additionally, pass the actual driver name to that function to improve debug output. Signed-off-by: Philipp Stanner Reviewed-by: Simon Horman Reviewed-by: Sergey Ryazanov Link: https://patch.msgid.link/20241206195712.182282-2-pstanner@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/wwan/t7xx/t7xx_pci.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/net/wwan/t7xx/t7xx_pci.c b/drivers/net/wwan/t7xx/t7xx_pci.c index 8381b0dc7acb..02f2ec7cf4ce 100644 --- a/drivers/net/wwan/t7xx/t7xx_pci.c +++ b/drivers/net/wwan/t7xx/t7xx_pci.c @@ -43,6 +43,8 @@ #include "t7xx_state_monitor.h" #include "t7xx_port_proxy.h" +#define DRIVER_NAME "mtk_t7xx" + #define T7XX_PCI_IREG_BASE 0 #define T7XX_PCI_EREG_BASE 2 @@ -833,6 +835,7 @@ static void t7xx_pci_infracfg_ao_calc(struct t7xx_pci_dev *t7xx_dev) static int t7xx_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct t7xx_pci_dev *t7xx_dev; + void __iomem *iomem; int ret; t7xx_dev = devm_kzalloc(&pdev->dev, sizeof(*t7xx_dev), GFP_KERNEL); @@ -848,12 +851,21 @@ static int t7xx_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_master(pdev); - ret = pcim_iomap_regions(pdev, BIT(T7XX_PCI_IREG_BASE) | BIT(T7XX_PCI_EREG_BASE), - pci_name(pdev)); + iomem = pcim_iomap_region(pdev, T7XX_PCI_IREG_BASE, DRIVER_NAME); + ret = PTR_ERR_OR_ZERO(iomem); if (ret) { - dev_err(&pdev->dev, "Could not request BARs: %d\n", ret); + dev_err(&pdev->dev, "Could not request IREG BAR: %d\n", ret); return -ENOMEM; } + IREG_BASE(t7xx_dev) = iomem; + + iomem = pcim_iomap_region(pdev, T7XX_PCI_EREG_BASE, DRIVER_NAME); + ret = PTR_ERR_OR_ZERO(iomem); + if (ret) { + dev_err(&pdev->dev, "Could not request EREG BAR: %d\n", ret); + return -ENOMEM; + } + t7xx_dev->base_addr.pcie_ext_reg_base = iomem; ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)); if (ret) { @@ -867,9 +879,6 @@ static int t7xx_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; } - IREG_BASE(t7xx_dev) = pcim_iomap_table(pdev)[T7XX_PCI_IREG_BASE]; - t7xx_dev->base_addr.pcie_ext_reg_base = pcim_iomap_table(pdev)[T7XX_PCI_EREG_BASE]; - ret = t7xx_pci_pm_init(t7xx_dev); if (ret) return ret; @@ -937,7 +946,7 @@ static const struct pci_device_id t7xx_pci_table[] = { MODULE_DEVICE_TABLE(pci, t7xx_pci_table); static struct pci_driver t7xx_pci_driver = { - .name = "mtk_t7xx", + .name = DRIVER_NAME, .id_table = t7xx_pci_table, .probe = t7xx_pci_probe, .remove = t7xx_pci_remove, From d354d008255ffdde6f3d4549dd6a06a14ee76619 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 9 Dec 2024 14:07:41 +0100 Subject: [PATCH 0307/1386] net: usb: lan78xx: Add error handling to lan78xx_setup_irq_domain Update `lan78xx_setup_irq_domain` to handle errors in `lan78xx_read_reg`. Return the error code immediately if the read operation fails, ensuring proper error propagation during IRQ domain setup. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241209130751.703182-2-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index d5f6367d3714..070b21baffaf 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2445,7 +2445,10 @@ static int lan78xx_setup_irq_domain(struct lan78xx_net *dev) mutex_init(&dev->domain_data.irq_lock); - lan78xx_read_reg(dev, INT_EP_CTL, &buf); + ret = lan78xx_read_reg(dev, INT_EP_CTL, &buf); + if (ret < 0) + return ret; + dev->domain_data.irqenable = buf; dev->domain_data.irqchip = &lan78xx_irqchip; From 6f31135894ec96481e2bda93a1da70712f5e57c1 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 9 Dec 2024 14:07:42 +0100 Subject: [PATCH 0308/1386] net: usb: lan78xx: Add error handling to lan78xx_init_mac_address Convert `lan78xx_init_mac_address` to return error codes and handle failures in register read and write operations. Update `lan78xx_reset` to check for errors during MAC address initialization and propagate them appropriately. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241209130751.703182-3-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 070b21baffaf..26dc43bac84b 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2153,13 +2153,19 @@ static const struct ethtool_ops lan78xx_ethtool_ops = { .get_regs = lan78xx_get_regs, }; -static void lan78xx_init_mac_address(struct lan78xx_net *dev) +static int lan78xx_init_mac_address(struct lan78xx_net *dev) { u32 addr_lo, addr_hi; u8 addr[6]; + int ret; - lan78xx_read_reg(dev, RX_ADDRL, &addr_lo); - lan78xx_read_reg(dev, RX_ADDRH, &addr_hi); + ret = lan78xx_read_reg(dev, RX_ADDRL, &addr_lo); + if (ret < 0) + return ret; + + ret = lan78xx_read_reg(dev, RX_ADDRH, &addr_hi); + if (ret < 0) + return ret; addr[0] = addr_lo & 0xFF; addr[1] = (addr_lo >> 8) & 0xFF; @@ -2192,14 +2198,26 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev) (addr[2] << 16) | (addr[3] << 24); addr_hi = addr[4] | (addr[5] << 8); - lan78xx_write_reg(dev, RX_ADDRL, addr_lo); - lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); + if (ret < 0) + return ret; + + ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + if (ret < 0) + return ret; } - lan78xx_write_reg(dev, MAF_LO(0), addr_lo); - lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); + ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); + if (ret < 0) + return ret; + + ret = lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); + if (ret < 0) + return ret; eth_hw_addr_set(dev->net, addr); + + return 0; } /* MDIO read and write wrappers for phylib */ @@ -2990,7 +3008,9 @@ static int lan78xx_reset(struct lan78xx_net *dev) } } while (buf & HW_CFG_LRST_); - lan78xx_init_mac_address(dev); + ret = lan78xx_init_mac_address(dev); + if (ret < 0) + return ret; /* save DEVID for later usage */ ret = lan78xx_read_reg(dev, ID_REV, &buf); From 9a46956c72cbc5c1725b3ee7de5586e6e8b1b0b7 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 9 Dec 2024 14:07:43 +0100 Subject: [PATCH 0309/1386] net: usb: lan78xx: Add error handling to lan78xx_set_mac_addr Update `lan78xx_set_mac_addr` to handle errors during MAC address register write operations. Ensure that errors are properly propagated to the caller, improving the robustness of MAC address updates. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241209130751.703182-4-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 26dc43bac84b..5d318ff8b33d 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2763,6 +2763,7 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p) struct lan78xx_net *dev = netdev_priv(netdev); struct sockaddr *addr = p; u32 addr_lo, addr_hi; + int ret; if (netif_running(netdev)) return -EBUSY; @@ -2779,14 +2780,20 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p) addr_hi = netdev->dev_addr[4] | netdev->dev_addr[5] << 8; - lan78xx_write_reg(dev, RX_ADDRL, addr_lo); - lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); + if (ret < 0) + return ret; + + ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + if (ret < 0) + return ret; /* Added to support MAC address changes */ - lan78xx_write_reg(dev, MAF_LO(0), addr_lo); - lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); + ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); + if (ret < 0) + return ret; - return 0; + return lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); } /* Enable or disable Rx checksum offload engine */ From 41b774e4f3279a3b3149a36fe27557ecdc72c29c Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 9 Dec 2024 14:07:45 +0100 Subject: [PATCH 0310/1386] net: usb: lan78xx: Simplify lan78xx_update_reg Simplify `lan78xx_update_reg` by directly returning the result of `lan78xx_write_reg`. This eliminates unnecessary checks and improves code readability. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241209130751.703182-6-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 5d318ff8b33d..442b6ee2dd46 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -674,11 +674,7 @@ static int lan78xx_update_reg(struct lan78xx_net *dev, u32 reg, u32 mask, buf &= ~mask; buf |= (mask & data); - ret = lan78xx_write_reg(dev, reg, buf); - if (ret < 0) - return ret; - - return 0; + return lan78xx_write_reg(dev, reg, buf); } static int lan78xx_read_stats(struct lan78xx_net *dev, From bf361b18d91e96dee50c5794097a80ff3594725c Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 9 Dec 2024 14:07:46 +0100 Subject: [PATCH 0311/1386] net: usb: lan78xx: Fix return value handling in lan78xx_set_features Update `lan78xx_set_features` to correctly return the result of `lan78xx_write_reg`. This ensures that errors during register writes are propagated to the caller. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241209130751.703182-7-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 442b6ee2dd46..71f8176210c9 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2822,9 +2822,7 @@ static int lan78xx_set_features(struct net_device *netdev, spin_unlock_irqrestore(&pdata->rfe_ctl_lock, flags); - lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); - - return 0; + return lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); } static void lan78xx_deferred_vlan_write(struct work_struct *param) From 21fff45a6cc1b5fceeedc5f6e2ccb118d4c19063 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 9 Dec 2024 14:07:49 +0100 Subject: [PATCH 0312/1386] net: usb: lan78xx: Improve error handling in lan78xx_phy_wait_not_busy Update `lan78xx_phy_wait_not_busy` to forward errors from `lan78xx_read_reg` instead of overwriting them with `-EIO`. Replace `-EIO` with `-ETIMEDOUT` for timeout cases, providing more specific and appropriate error codes. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241209130751.703182-10-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 71f8176210c9..7e7407748426 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -963,14 +963,14 @@ static int lan78xx_phy_wait_not_busy(struct lan78xx_net *dev) do { ret = lan78xx_read_reg(dev, MII_ACC, &val); - if (unlikely(ret < 0)) - return -EIO; + if (ret < 0) + return ret; if (!(val & MII_ACC_MII_BUSY_)) return 0; } while (!time_after(jiffies, start_time + HZ)); - return -EIO; + return -ETIMEDOUT; } static inline u32 mii_access(int id, int index, int read) From 530f17e6cb3bac67723dd90b289a381cc04a52e8 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 9 Dec 2024 14:07:50 +0100 Subject: [PATCH 0313/1386] net: usb: lan78xx: Rename lan78xx_phy_wait_not_busy to lan78xx_mdiobus_wait_not_busy Rename `lan78xx_phy_wait_not_busy` to `lan78xx_mdiobus_wait_not_busy` for clarity and accuracy, as the function operates on the MII bus rather than a specific PHY. Update all references to reflect the new name. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241209130751.703182-11-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 7e7407748426..4661d131b190 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -955,7 +955,7 @@ static int lan78xx_flush_rx_fifo(struct lan78xx_net *dev) } /* Loop until the read is completed with timeout called with phy_mutex held */ -static int lan78xx_phy_wait_not_busy(struct lan78xx_net *dev) +static int lan78xx_mdiobus_wait_not_busy(struct lan78xx_net *dev) { unsigned long start_time = jiffies; u32 val; @@ -1604,7 +1604,7 @@ static int lan78xx_mac_reset(struct lan78xx_net *dev) * bus can result in the MAC interface locking up and not * completing register access transactions. */ - ret = lan78xx_phy_wait_not_busy(dev); + ret = lan78xx_mdiobus_wait_not_busy(dev); if (ret < 0) goto done; @@ -2230,7 +2230,7 @@ static int lan78xx_mdiobus_read(struct mii_bus *bus, int phy_id, int idx) mutex_lock(&dev->phy_mutex); /* confirm MII not busy */ - ret = lan78xx_phy_wait_not_busy(dev); + ret = lan78xx_mdiobus_wait_not_busy(dev); if (ret < 0) goto done; @@ -2240,7 +2240,7 @@ static int lan78xx_mdiobus_read(struct mii_bus *bus, int phy_id, int idx) if (ret < 0) goto done; - ret = lan78xx_phy_wait_not_busy(dev); + ret = lan78xx_mdiobus_wait_not_busy(dev); if (ret < 0) goto done; @@ -2271,7 +2271,7 @@ static int lan78xx_mdiobus_write(struct mii_bus *bus, int phy_id, int idx, mutex_lock(&dev->phy_mutex); /* confirm MII not busy */ - ret = lan78xx_phy_wait_not_busy(dev); + ret = lan78xx_mdiobus_wait_not_busy(dev); if (ret < 0) goto done; @@ -2286,7 +2286,7 @@ static int lan78xx_mdiobus_write(struct mii_bus *bus, int phy_id, int idx, if (ret < 0) goto done; - ret = lan78xx_phy_wait_not_busy(dev); + ret = lan78xx_mdiobus_wait_not_busy(dev); if (ret < 0) goto done; From 54d3970548bd9de40f921c95f8c31e1b1b2382cb Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Mon, 9 Dec 2024 11:24:11 +0500 Subject: [PATCH 0314/1386] net: renesas: rswitch: enable only used MFWD features Currently, rswitch driver does not utilize most of MFWD forwarding and processing features. It only uses port-based forwarding for ETHA ports, and direct descriptor forwarding for GWCA port. Update rswitch_fwd_init() to enable exactly that, and keep everything else disabled. Signed-off-by: Nikita Yushchenko Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/rswitch.c | 30 +++++++++++++++++--------- drivers/net/ethernet/renesas/rswitch.h | 14 ++++++------ 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index b754cc96e268..ec52f0416389 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -111,25 +111,35 @@ static void rswitch_top_init(struct rswitch_private *priv) /* Forwarding engine block (MFWD) */ static void rswitch_fwd_init(struct rswitch_private *priv) { + u32 all_ports_mask = GENMASK(RSWITCH_NUM_AGENTS - 1, 0); unsigned int i; - /* For ETHA */ - for (i = 0; i < RSWITCH_NUM_PORTS; i++) { - iowrite32(FWPC0_DEFAULT, priv->addr + FWPC0(i)); + /* Start with empty configuration */ + for (i = 0; i < RSWITCH_NUM_AGENTS; i++) { + /* Disable all port features */ + iowrite32(0, priv->addr + FWPC0(i)); + /* Disallow L3 forwarding and direct descriptor forwarding */ + iowrite32(FIELD_PREP(FWCP1_LTHFW, all_ports_mask), + priv->addr + FWPC1(i)); + /* Disallow L2 forwarding */ + iowrite32(FIELD_PREP(FWCP2_LTWFW, all_ports_mask), + priv->addr + FWPC2(i)); + /* Disallow port based forwarding */ iowrite32(0, priv->addr + FWPBFC(i)); } - for (i = 0; i < RSWITCH_NUM_PORTS; i++) { + /* For enabled ETHA ports, setup port based forwarding */ + rswitch_for_each_enabled_port(priv, i) { + /* Port based forwarding from port i to GWCA port */ + rswitch_modify(priv->addr, FWPBFC(i), FWPBFC_PBDV, + FIELD_PREP(FWPBFC_PBDV, BIT(priv->gwca.index))); + /* Within GWCA port, forward to Rx queue for port i */ iowrite32(priv->rdev[i]->rx_queue->index, priv->addr + FWPBFCSDC(GWCA_INDEX, i)); - iowrite32(BIT(priv->gwca.index), priv->addr + FWPBFC(i)); } - /* For GWCA */ - iowrite32(FWPC0_DEFAULT, priv->addr + FWPC0(priv->gwca.index)); - iowrite32(FWPC1_DDE, priv->addr + FWPC1(priv->gwca.index)); - iowrite32(0, priv->addr + FWPBFC(priv->gwca.index)); - iowrite32(GENMASK(RSWITCH_NUM_PORTS - 1, 0), priv->addr + FWPBFC(priv->gwca.index)); + /* For GWCA port, allow direct descriptor forwarding */ + rswitch_modify(priv->addr, FWPC1(priv->gwca.index), FWPC1_DDE, FWPC1_DDE); } /* Gateway CPU agent block (GWCA) */ diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h index 303883369b94..0f06f34db821 100644 --- a/drivers/net/ethernet/renesas/rswitch.h +++ b/drivers/net/ethernet/renesas/rswitch.h @@ -12,6 +12,7 @@ #define RSWITCH_MAX_NUM_QUEUES 128 +#define RSWITCH_NUM_AGENTS 5 #define RSWITCH_NUM_PORTS 3 #define rswitch_for_each_enabled_port(priv, i) \ for (i = 0; i < RSWITCH_NUM_PORTS; i++) \ @@ -806,6 +807,7 @@ enum rswitch_gwca_mode { #define CABPPFLC_INIT_VALUE 0x00800080 /* MFWD */ +#define FWPC0(i) (FWPC00 + (i) * 0x10) #define FWPC0_LTHTA BIT(0) #define FWPC0_IP4UE BIT(3) #define FWPC0_IP4TE BIT(4) @@ -819,15 +821,15 @@ enum rswitch_gwca_mode { #define FWPC0_MACHMA BIT(27) #define FWPC0_VLANSA BIT(28) -#define FWPC0(i) (FWPC00 + (i) * 0x10) -#define FWPC0_DEFAULT (FWPC0_LTHTA | FWPC0_IP4UE | FWPC0_IP4TE | \ - FWPC0_IP4OE | FWPC0_L2SE | FWPC0_IP4EA | \ - FWPC0_IPDSA | FWPC0_IPHLA | FWPC0_MACSDA | \ - FWPC0_MACHLA | FWPC0_MACHMA | FWPC0_VLANSA) #define FWPC1(i) (FWPC10 + (i) * 0x10) +#define FWCP1_LTHFW GENMASK(16 + (RSWITCH_NUM_AGENTS - 1), 16) #define FWPC1_DDE BIT(0) -#define FWPBFC(i) (FWPBFC0 + (i) * 0x10) +#define FWPC2(i) (FWPC20 + (i) * 0x10) +#define FWCP2_LTWFW GENMASK(16 + (RSWITCH_NUM_AGENTS - 1), 16) + +#define FWPBFC(i) (FWPBFC0 + (i) * 0x10) +#define FWPBFC_PBDV GENMASK(RSWITCH_NUM_AGENTS - 1, 0) #define FWPBFCSDC(j, i) (FWPBFCSDC00 + (i) * 0x10 + (j) * 0x04) From c0b8980e6041afa363361e41fcafd7862721c3ee Mon Sep 17 00:00:00 2001 From: James Chapman Date: Mon, 9 Dec 2024 11:46:07 +0000 Subject: [PATCH 0315/1386] l2tp: Handle eth stats using NETDEV_PCPU_STAT_DSTATS. l2tp_eth uses the TSTATS infrastructure (dev_sw_netstats_*()) for RX and TX packet counters and DEV_STATS_INC for dropped counters. Consolidate that using the DSTATS infrastructure, which can handle both packet counters and packet drops. Statistics that don't fit DSTATS are still updated atomically with DEV_STATS_INC(). This change is inspired by the introduction of DSTATS helpers and their use in other udp tunnel drivers: Link: https://lore.kernel.org/all/cover.1733313925.git.gnault@redhat.com/ Signed-off-by: James Chapman Reviewed-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_eth.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index d692b902e120..e83691073496 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -73,9 +73,9 @@ static netdev_tx_t l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev int ret = l2tp_xmit_skb(session, skb); if (likely(ret == NET_XMIT_SUCCESS)) - dev_sw_netstats_tx_add(dev, 1, len); + dev_dstats_tx_add(dev, len); else - DEV_STATS_INC(dev, tx_dropped); + dev_dstats_tx_dropped(dev); return NETDEV_TX_OK; } @@ -84,7 +84,6 @@ static const struct net_device_ops l2tp_eth_netdev_ops = { .ndo_init = l2tp_eth_dev_init, .ndo_uninit = l2tp_eth_dev_uninit, .ndo_start_xmit = l2tp_eth_dev_xmit, - .ndo_get_stats64 = dev_get_tstats64, .ndo_set_mac_address = eth_mac_addr, }; @@ -100,7 +99,7 @@ static void l2tp_eth_dev_setup(struct net_device *dev) dev->lltx = true; dev->netdev_ops = &l2tp_eth_netdev_ops; dev->needs_free_netdev = true; - dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; + dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; } static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len) @@ -128,7 +127,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, goto error_rcu; if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) - dev_sw_netstats_rx_add(dev, data_len); + dev_dstats_rx_add(dev, data_len); else DEV_STATS_INC(dev, rx_errors); From 02f41c8aa643b0d329ee9fa3f3341919bf86b759 Mon Sep 17 00:00:00 2001 From: Baochen Qiang Date: Fri, 6 Dec 2024 13:45:52 +0800 Subject: [PATCH 0316/1386] wifi: ath12k: fix leaking michael_mic for non-primary links In ath12k_dp_rx_peer_frag_setup(), commit ea4192553850 ("wifi: ath12k: add primary link for data path operations") checks whether a link is the primary link, and returns directly if it isn't. In ML scenario where we have non-primary links created, this results in leaking the michael_mic info since it is allocated by default but could never be freed for a non-primary link. Note that we can not move the might-sleep allocation after primary link check since there we are in atomic context (due to spin lock). So keep the default allocation, and then free it before return to fix this issue. Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0-03427-QCAHMTSWPL_V1.0_V2.0_SILICONZ-1.15378.4 Fixes: ea4192553850 ("wifi: ath12k: add primary link for data path operations") Signed-off-by: Baochen Qiang Acked-by: Kalle Valo Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241206054552.177424-1-quic_bqiang@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_rx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index b24d1de4aabb..f8e79eff2089 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -2830,6 +2830,7 @@ int ath12k_dp_rx_peer_frag_setup(struct ath12k *ar, const u8 *peer_mac, int vdev if (!peer->primary_link) { spin_unlock_bh(&ab->base_lock); + crypto_free_shash(tfm); return 0; } From 58fa8109fa8dae2947567e8f56dbd55ad81bc35c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 6 Dec 2024 15:52:29 +0300 Subject: [PATCH 0317/1386] wifi: ath12k: Off by one in ath12k_wmi_process_csa_switch_count_event() The ahvif->vif->link_conf[] array has IEEE80211_MLD_MAX_NUM_LINKS elements so this should be >= instead of > to avoid an out of bounds access. Fixes: 3952657848c0 ("wifi: ath12k: Use mac80211 vif's link_conf instead of bss_conf") Signed-off-by: Dan Carpenter Acked-by: Kalle Valo Link: https://patch.msgid.link/755becb1-819b-484d-8fac-9a2db53ced1b@stanley.mountain Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 402ae477da61..46c5027e4f1c 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -6873,7 +6873,7 @@ ath12k_wmi_process_csa_switch_count_event(struct ath12k_base *ab, } ahvif = arvif->ahvif; - if (arvif->link_id > IEEE80211_MLD_MAX_NUM_LINKS) { + if (arvif->link_id >= IEEE80211_MLD_MAX_NUM_LINKS) { ath12k_warn(ab, "Invalid CSA switch count even link id: %d\n", arvif->link_id); continue; From 1015d61570802c7e0794585934d3bc5e09de743e Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 6 Dec 2024 13:57:10 +0800 Subject: [PATCH 0318/1386] wifi: rtw89: ps: refactor PS flow to support MLO Firmware can only support PS on single one VIF operating in station mode, so argument of PS entry rtw89_enter_lps() should be rtwvif insetad of rtwvif_link. To enter PS under MLO, for each rtwvif, driver sends H2C command to tell firmware which mac_id will enter PS one by one, and afterward asks firmware to enter deep PS. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241206055716.18598-2-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.c | 26 ++++++------------ drivers/net/wireless/realtek/rtw89/ps.c | 33 ++++++++++++----------- drivers/net/wireless/realtek/rtw89/ps.h | 4 +-- drivers/net/wireless/realtek/rtw89/wow.c | 6 ++--- 4 files changed, 29 insertions(+), 40 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index 29d0ac502bab..0519b0826281 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -2729,10 +2729,6 @@ static enum rtw89_ps_mode rtw89_update_ps_mode(struct rtw89_dev *rtwdev) { const struct rtw89_chip_info *chip = rtwdev->chip; - /* FIXME: Fix __rtw89_enter_ps_mode() to consider MLO cases. */ - if (rtwdev->support_mlo) - return RTW89_PS_MODE_NONE; - if (rtw89_disable_ps_mode || !chip->ps_mode_supported || RTW89_CHK_FW_FEATURE(NO_DEEP_PS, &rtwdev->fw)) return RTW89_PS_MODE_NONE; @@ -3469,21 +3465,10 @@ static bool rtw89_traffic_stats_track(struct rtw89_dev *rtwdev) return tfc_changed; } -static void rtw89_vif_enter_lps(struct rtw89_dev *rtwdev, - struct rtw89_vif_link *rtwvif_link) -{ - if (rtwvif_link->wifi_role != RTW89_WIFI_ROLE_STATION && - rtwvif_link->wifi_role != RTW89_WIFI_ROLE_P2P_CLIENT) - return; - - rtw89_enter_lps(rtwdev, rtwvif_link, true); -} - static void rtw89_enter_lps_track(struct rtw89_dev *rtwdev) { - struct rtw89_vif_link *rtwvif_link; + struct ieee80211_vif *vif; struct rtw89_vif *rtwvif; - unsigned int link_id; rtw89_for_each_rtwvif(rtwdev, rtwvif) { if (rtwvif->tdls_peer) @@ -3495,8 +3480,13 @@ static void rtw89_enter_lps_track(struct rtw89_dev *rtwdev) rtwvif->stats.rx_tfc_lv != RTW89_TFC_IDLE) continue; - rtw89_vif_for_each_link(rtwvif, rtwvif_link, link_id) - rtw89_vif_enter_lps(rtwdev, rtwvif_link); + vif = rtwvif_to_vif(rtwvif); + + if (!(vif->type == NL80211_IFTYPE_STATION || + vif->type == NL80211_IFTYPE_P2P_CLIENT)) + continue; + + rtw89_enter_lps(rtwdev, rtwvif, true); } } diff --git a/drivers/net/wireless/realtek/rtw89/ps.c b/drivers/net/wireless/realtek/rtw89/ps.c index c1c12abc2ea9..5e3a5e3c9776 100644 --- a/drivers/net/wireless/realtek/rtw89/ps.c +++ b/drivers/net/wireless/realtek/rtw89/ps.c @@ -62,11 +62,8 @@ static void rtw89_ps_power_mode_change(struct rtw89_dev *rtwdev, bool enter) rtw89_mac_power_mode_change(rtwdev, enter); } -void __rtw89_enter_ps_mode(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link) +void __rtw89_enter_ps_mode(struct rtw89_dev *rtwdev) { - if (rtwvif_link->wifi_role == RTW89_WIFI_ROLE_P2P_CLIENT) - return; - if (!rtwdev->ps_mode) return; @@ -85,8 +82,8 @@ void __rtw89_leave_ps_mode(struct rtw89_dev *rtwdev) rtw89_ps_power_mode_change(rtwdev, false); } -static void __rtw89_enter_lps(struct rtw89_dev *rtwdev, - struct rtw89_vif_link *rtwvif_link) +static void __rtw89_enter_lps_link(struct rtw89_dev *rtwdev, + struct rtw89_vif_link *rtwvif_link) { struct rtw89_lps_parm lps_param = { .macid = rtwvif_link->mac_id, @@ -121,17 +118,27 @@ void rtw89_leave_ps_mode(struct rtw89_dev *rtwdev) __rtw89_leave_ps_mode(rtwdev); } -void rtw89_enter_lps(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link, +void rtw89_enter_lps(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif, bool ps_mode) { + struct rtw89_vif_link *rtwvif_link; + bool can_ps_mode = true; + unsigned int link_id; + lockdep_assert_held(&rtwdev->mutex); if (test_and_set_bit(RTW89_FLAG_LEISURE_PS, rtwdev->flags)) return; - __rtw89_enter_lps(rtwdev, rtwvif_link); - if (ps_mode) - __rtw89_enter_ps_mode(rtwdev, rtwvif_link); + rtw89_vif_for_each_link(rtwvif, rtwvif_link, link_id) { + __rtw89_enter_lps_link(rtwdev, rtwvif_link); + + if (rtwvif_link->wifi_role == RTW89_WIFI_ROLE_P2P_CLIENT) + can_ps_mode = false; + } + + if (ps_mode && can_ps_mode) + __rtw89_enter_ps_mode(rtwdev); } static void rtw89_leave_lps_vif(struct rtw89_dev *rtwdev, @@ -282,12 +289,6 @@ void rtw89_recalc_lps(struct rtw89_dev *rtwdev) enum rtw89_entity_mode mode; int count = 0; - /* FIXME: Fix rtw89_enter_lps() and __rtw89_enter_ps_mode() - * to take MLO cases into account before doing the following. - */ - if (rtwdev->support_mlo) - goto disable_lps; - mode = rtw89_get_entity_mode(rtwdev); if (mode == RTW89_ENTITY_MODE_MCC) goto disable_lps; diff --git a/drivers/net/wireless/realtek/rtw89/ps.h b/drivers/net/wireless/realtek/rtw89/ps.h index cdd712966b09..2b88f254a32d 100644 --- a/drivers/net/wireless/realtek/rtw89/ps.h +++ b/drivers/net/wireless/realtek/rtw89/ps.h @@ -5,11 +5,11 @@ #ifndef __RTW89_PS_H_ #define __RTW89_PS_H_ -void rtw89_enter_lps(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link, +void rtw89_enter_lps(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif, bool ps_mode); void rtw89_leave_lps(struct rtw89_dev *rtwdev); void __rtw89_leave_ps_mode(struct rtw89_dev *rtwdev); -void __rtw89_enter_ps_mode(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link); +void __rtw89_enter_ps_mode(struct rtw89_dev *rtwdev); void rtw89_leave_ps_mode(struct rtw89_dev *rtwdev); void rtw89_enter_ips(struct rtw89_dev *rtwdev); void rtw89_leave_ips(struct rtw89_dev *rtwdev); diff --git a/drivers/net/wireless/realtek/rtw89/wow.c b/drivers/net/wireless/realtek/rtw89/wow.c index 1e1dbb20d47a..01754d031bb4 100644 --- a/drivers/net/wireless/realtek/rtw89/wow.c +++ b/drivers/net/wireless/realtek/rtw89/wow.c @@ -694,9 +694,7 @@ static void rtw89_wow_leave_deep_ps(struct rtw89_dev *rtwdev) static void rtw89_wow_enter_deep_ps(struct rtw89_dev *rtwdev) { - struct rtw89_vif_link *rtwvif_link = rtwdev->wow.rtwvif_link; - - __rtw89_enter_ps_mode(rtwdev, rtwvif_link); + __rtw89_enter_ps_mode(rtwdev); } static void rtw89_wow_enter_ps(struct rtw89_dev *rtwdev) @@ -704,7 +702,7 @@ static void rtw89_wow_enter_ps(struct rtw89_dev *rtwdev) struct rtw89_vif_link *rtwvif_link = rtwdev->wow.rtwvif_link; if (rtw89_wow_mgd_linked(rtwdev)) - rtw89_enter_lps(rtwdev, rtwvif_link, false); + rtw89_enter_lps(rtwdev, rtwvif_link->rtwvif, false); else if (rtw89_wow_no_link(rtwdev)) rtw89_fw_h2c_fwips(rtwdev, rtwvif_link, true); } From 8c86036693a3c7e24008734f01109f14807e7347 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 6 Dec 2024 13:57:11 +0800 Subject: [PATCH 0319/1386] wifi: rtw89: ps: refactor channel info to firmware before entering PS In PS mode, firmware needs hardware parameters related to channel info to configure hardware itself. Before entering PS, driver prepares these info to firmware via firmware H2C command. Since firmware only consider PS for single one vif, change the argument of entry function to rtwvif, and only consider first link for this old H2C command that only support legacy. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241206055716.18598-3-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/fw.c | 35 +++++++++++++++++++------ drivers/net/wireless/realtek/rtw89/fw.h | 3 +-- drivers/net/wireless/realtek/rtw89/ps.c | 3 ++- 3 files changed, 30 insertions(+), 11 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index c604ea1d39f1..d17c6037c9a6 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -2594,14 +2594,17 @@ fail: return ret; } -int rtw89_fw_h2c_lps_ch_info(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link) +int rtw89_fw_h2c_lps_ch_info(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif) { - const struct rtw89_chan *chan = rtw89_chan_get(rtwdev, - rtwvif_link->chanctx_idx); const struct rtw89_chip_info *chip = rtwdev->chip; + const struct rtw89_chan *chan; + struct rtw89_vif_link *rtwvif_link; struct rtw89_h2c_lps_ch_info *h2c; u32 len = sizeof(*h2c); + unsigned int link_id; struct sk_buff *skb; + bool no_chan = true; + u8 phy_idx; u32 done; int ret; @@ -2616,11 +2619,27 @@ int rtw89_fw_h2c_lps_ch_info(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rt skb_put(skb, len); h2c = (struct rtw89_h2c_lps_ch_info *)skb->data; - h2c->info[0].central_ch = chan->channel; - h2c->info[0].pri_ch = chan->primary_channel; - h2c->info[0].band = chan->band_type; - h2c->info[0].bw = chan->band_width; - h2c->mlo_dbcc_mode_lps = cpu_to_le32(MLO_2_PLUS_0_1RF); + rtw89_vif_for_each_link(rtwvif, rtwvif_link, link_id) { + phy_idx = rtwvif_link->phy_idx; + if (phy_idx >= ARRAY_SIZE(h2c->info)) + continue; + + chan = rtw89_chan_get(rtwdev, rtwvif_link->chanctx_idx); + no_chan = false; + + h2c->info[phy_idx].central_ch = chan->channel; + h2c->info[phy_idx].pri_ch = chan->primary_channel; + h2c->info[phy_idx].band = chan->band_type; + h2c->info[phy_idx].bw = chan->band_width; + } + + if (no_chan) { + rtw89_err(rtwdev, "no chan for h2c lps_ch_info\n"); + ret = -ENOENT; + goto fail; + } + + h2c->mlo_dbcc_mode_lps = cpu_to_le32(rtwdev->mlo_dbcc_mode); rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C, H2C_CAT_OUTSRC, H2C_CL_OUTSRC_DM, diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index 95681c390bb8..b38705868caa 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -4638,8 +4638,7 @@ int rtw89_fw_h2c_init_ba_cam_users(struct rtw89_dev *rtwdev, u8 users, int rtw89_fw_h2c_lps_parm(struct rtw89_dev *rtwdev, struct rtw89_lps_parm *lps_param); -int rtw89_fw_h2c_lps_ch_info(struct rtw89_dev *rtwdev, - struct rtw89_vif_link *rtwvif_link); +int rtw89_fw_h2c_lps_ch_info(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif); int rtw89_fw_h2c_fwips(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link, bool enable); struct sk_buff *rtw89_fw_h2c_alloc_skb_with_hdr(struct rtw89_dev *rtwdev, u32 len); diff --git a/drivers/net/wireless/realtek/rtw89/ps.c b/drivers/net/wireless/realtek/rtw89/ps.c index 5e3a5e3c9776..a8b4b9095dc8 100644 --- a/drivers/net/wireless/realtek/rtw89/ps.c +++ b/drivers/net/wireless/realtek/rtw89/ps.c @@ -93,7 +93,6 @@ static void __rtw89_enter_lps_link(struct rtw89_dev *rtwdev, rtw89_btc_ntfy_radio_state(rtwdev, BTC_RFCTRL_FW_CTRL); rtw89_fw_h2c_lps_parm(rtwdev, &lps_param); - rtw89_fw_h2c_lps_ch_info(rtwdev, rtwvif_link); } static void __rtw89_leave_lps(struct rtw89_dev *rtwdev, @@ -137,6 +136,8 @@ void rtw89_enter_lps(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif, can_ps_mode = false; } + rtw89_fw_h2c_lps_ch_info(rtwdev, rtwvif); + if (ps_mode && can_ps_mode) __rtw89_enter_ps_mode(rtwdev); } From 5b4ca804792a3128ee56ac74a390358fabba6fa3 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Fri, 6 Dec 2024 13:57:12 +0800 Subject: [PATCH 0320/1386] wifi: rtw89: ps: update data for firmware and settings for hardware before/after PS For MLO supported IC, send H2C command to firmware before PS with link information for each PHY for MLO to work properly. And re-init hardware settings regarding to RX descriptor information after PS. Signed-off-by: Eric Huang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241206055716.18598-4-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.c | 2 + drivers/net/wireless/realtek/rtw89/core.h | 2 + drivers/net/wireless/realtek/rtw89/fw.c | 82 +++++++++++++++++++++++ drivers/net/wireless/realtek/rtw89/fw.h | 18 +++++ drivers/net/wireless/realtek/rtw89/phy.c | 6 ++ drivers/net/wireless/realtek/rtw89/phy.h | 1 + drivers/net/wireless/realtek/rtw89/ps.c | 8 ++- 7 files changed, 118 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index 0519b0826281..ee6ad185135c 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -2189,6 +2189,8 @@ static void rtw89_vif_rx_stats_iter(void *data, u8 *mac, if (phy_ppdu) ewma_rssi_add(&rtwdev->phystat.bcn_rssi, phy_ppdu->rssi_avg); + + pkt_stat->beacon_rate = desc_info->data_rate; } if (!ether_addr_equal(bss_conf->addr, hdr->addr1)) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 82844e470d1b..ec2a80af04bb 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -4466,6 +4466,7 @@ enum rtw89_fw_feature { RTW89_FW_FEATURE_NO_WOW_CPU_IO_RX, RTW89_FW_FEATURE_NOTIFY_AP_INFO, RTW89_FW_FEATURE_CH_INFO_BE_V0, + RTW89_FW_FEATURE_LPS_CH_INFO, }; struct rtw89_fw_suit { @@ -4835,6 +4836,7 @@ struct rtw89_pkt_drop_params { struct rtw89_pkt_stat { u16 beacon_nr; + u8 beacon_rate; u32 rx_rate_cnt[RTW89_HW_RATE_NR]; }; diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index d17c6037c9a6..e5f3efe3a7e6 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -729,6 +729,7 @@ static const struct __fw_feat_cfg fw_feat_tbl[] = { __CFG_FW_FEAT(RTL8922A, ge, 0, 35, 12, 0, BEACON_FILTER), __CFG_FW_FEAT(RTL8922A, ge, 0, 35, 22, 0, WOW_REASON_V1), __CFG_FW_FEAT(RTL8922A, lt, 0, 35, 31, 0, RFK_PRE_NOTIFY_V0), + __CFG_FW_FEAT(RTL8922A, lt, 0, 35, 31, 0, LPS_CH_INFO), __CFG_FW_FEAT(RTL8922A, lt, 0, 35, 42, 0, RFK_RXDCK_V0), __CFG_FW_FEAT(RTL8922A, ge, 0, 35, 46, 0, NOTIFY_AP_INFO), __CFG_FW_FEAT(RTL8922A, lt, 0, 35, 47, 0, CH_INFO_BE_V0), @@ -2664,6 +2665,87 @@ fail: return ret; } +int rtw89_fw_h2c_lps_ml_cmn_info(struct rtw89_dev *rtwdev, + struct rtw89_vif *rtwvif) +{ + const struct rtw89_phy_bb_gain_info_be *gain = &rtwdev->bb_gain.be; + struct rtw89_pkt_stat *pkt_stat = &rtwdev->phystat.cur_pkt_stat; + const struct rtw89_chip_info *chip = rtwdev->chip; + struct rtw89_h2c_lps_ml_cmn_info *h2c; + struct rtw89_vif_link *rtwvif_link; + const struct rtw89_chan *chan; + u8 bw_idx = RTW89_BB_BW_20_40; + u32 len = sizeof(*h2c); + unsigned int link_id; + struct sk_buff *skb; + u8 gain_band; + u32 done; + u8 path; + int ret; + int i; + + if (chip->chip_gen != RTW89_CHIP_BE) + return 0; + + skb = rtw89_fw_h2c_alloc_skb_with_hdr(rtwdev, len); + if (!skb) { + rtw89_err(rtwdev, "failed to alloc skb for h2c lps_ml_cmn_info\n"); + return -ENOMEM; + } + skb_put(skb, len); + h2c = (struct rtw89_h2c_lps_ml_cmn_info *)skb->data; + + h2c->fmt_id = 0x1; + + h2c->mlo_dbcc_mode = cpu_to_le32(rtwdev->mlo_dbcc_mode); + + rtw89_vif_for_each_link(rtwvif, rtwvif_link, link_id) { + path = rtwvif_link->phy_idx == RTW89_PHY_1 ? RF_PATH_B : RF_PATH_A; + chan = rtw89_chan_get(rtwdev, rtwvif_link->chanctx_idx); + gain_band = rtw89_subband_to_gain_band_be(chan->subband_type); + + h2c->central_ch[rtwvif_link->phy_idx] = chan->channel; + h2c->pri_ch[rtwvif_link->phy_idx] = chan->primary_channel; + h2c->band[rtwvif_link->phy_idx] = chan->band_type; + h2c->bw[rtwvif_link->phy_idx] = chan->band_width; + if (pkt_stat->beacon_rate < RTW89_HW_RATE_OFDM6) + h2c->bcn_rate_type[rtwvif_link->phy_idx] = 0x1; + else + h2c->bcn_rate_type[rtwvif_link->phy_idx] = 0x2; + + /* Fill BW20 RX gain table for beacon mode */ + for (i = 0; i < TIA_GAIN_NUM; i++) { + h2c->tia_gain[rtwvif_link->phy_idx][i] = + cpu_to_le16(gain->tia_gain[gain_band][bw_idx][path][i]); + } + memcpy(h2c->lna_gain[rtwvif_link->phy_idx], + gain->lna_gain[gain_band][bw_idx][path], + LNA_GAIN_NUM); + } + + rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C, + H2C_CAT_OUTSRC, H2C_CL_OUTSRC_DM, + H2C_FUNC_FW_LPS_ML_CMN_INFO, 0, 0, len); + + rtw89_phy_write32_mask(rtwdev, R_CHK_LPS_STAT, B_CHK_LPS_STAT, 0); + ret = rtw89_h2c_tx(rtwdev, skb, false); + if (ret) { + rtw89_err(rtwdev, "failed to send h2c\n"); + goto fail; + } + + ret = read_poll_timeout(rtw89_phy_read32_mask, done, done, 50, 5000, + true, rtwdev, R_CHK_LPS_STAT, B_CHK_LPS_STAT); + if (ret) + rtw89_warn(rtwdev, "h2c_lps_ml_cmn_info done polling timeout\n"); + + return 0; +fail: + dev_kfree_skb_any(skb); + + return ret; +} + #define H2C_P2P_ACT_LEN 20 int rtw89_fw_h2c_p2p_act(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link, diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index b38705868caa..2dfc584da7d6 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -1783,6 +1783,21 @@ struct rtw89_h2c_lps_ch_info { __le32 mlo_dbcc_mode_lps; } __packed; +struct rtw89_h2c_lps_ml_cmn_info { + u8 fmt_id; + u8 rsvd0[3]; + __le32 mlo_dbcc_mode; + u8 central_ch[RTW89_PHY_MAX]; + u8 pri_ch[RTW89_PHY_MAX]; + u8 bw[RTW89_PHY_MAX]; + u8 band[RTW89_PHY_MAX]; + u8 bcn_rate_type[RTW89_PHY_MAX]; + u8 rsvd1[2]; + __le16 tia_gain[RTW89_PHY_MAX][TIA_GAIN_NUM]; + u8 lna_gain[RTW89_PHY_MAX][LNA_GAIN_NUM]; + u8 rsvd2[2]; +} __packed; + static inline void RTW89_SET_FWCMD_CPU_EXCEPTION_TYPE(void *cmd, u32 val) { le32p_replace_bits((__le32 *)cmd, val, GENMASK(31, 0)); @@ -4211,6 +4226,7 @@ enum rtw89_mrc_h2c_func { #define H2C_CL_OUTSRC_DM 0x2 #define H2C_FUNC_FW_LPS_CH_INFO 0xb +#define H2C_FUNC_FW_LPS_ML_CMN_INFO 0xe #define H2C_CL_OUTSRC_RF_REG_A 0x8 #define H2C_CL_OUTSRC_RF_REG_B 0x9 @@ -4639,6 +4655,8 @@ int rtw89_fw_h2c_init_ba_cam_users(struct rtw89_dev *rtwdev, u8 users, int rtw89_fw_h2c_lps_parm(struct rtw89_dev *rtwdev, struct rtw89_lps_parm *lps_param); int rtw89_fw_h2c_lps_ch_info(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif); +int rtw89_fw_h2c_lps_ml_cmn_info(struct rtw89_dev *rtwdev, + struct rtw89_vif *rtwvif); int rtw89_fw_h2c_fwips(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link, bool enable); struct sk_buff *rtw89_fw_h2c_alloc_skb_with_hdr(struct rtw89_dev *rtwdev, u32 len); diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c index 8d36bf962732..1d4d3dcce060 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.c +++ b/drivers/net/wireless/realtek/rtw89/phy.c @@ -6519,6 +6519,12 @@ void rtw89_phy_dm_init(struct rtw89_dev *rtwdev) rtw89_chip_cfg_txrx_path(rtwdev); } +void rtw89_phy_dm_reinit(struct rtw89_dev *rtwdev) +{ + rtw89_phy_env_monitor_init(rtwdev); + rtw89_physts_parsing_init(rtwdev); +} + void rtw89_phy_set_bss_color(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link) { diff --git a/drivers/net/wireless/realtek/rtw89/phy.h b/drivers/net/wireless/realtek/rtw89/phy.h index e6d06f0a6c09..cf33c1655b7a 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.h +++ b/drivers/net/wireless/realtek/rtw89/phy.h @@ -813,6 +813,7 @@ void rtw89_phy_config_rf_reg_v1(struct rtw89_dev *rtwdev, enum rtw89_rf_path rf_path, void *extra_data); void rtw89_phy_dm_init(struct rtw89_dev *rtwdev); +void rtw89_phy_dm_reinit(struct rtw89_dev *rtwdev); void rtw89_phy_write32_idx(struct rtw89_dev *rtwdev, u32 addr, u32 mask, u32 data, enum rtw89_phy_idx phy_idx); void rtw89_phy_write32_idx_set(struct rtw89_dev *rtwdev, u32 addr, u32 bits, diff --git a/drivers/net/wireless/realtek/rtw89/ps.c b/drivers/net/wireless/realtek/rtw89/ps.c index a8b4b9095dc8..96ea04d90cd3 100644 --- a/drivers/net/wireless/realtek/rtw89/ps.c +++ b/drivers/net/wireless/realtek/rtw89/ps.c @@ -8,6 +8,7 @@ #include "debug.h" #include "fw.h" #include "mac.h" +#include "phy.h" #include "ps.h" #include "reg.h" #include "util.h" @@ -136,7 +137,10 @@ void rtw89_enter_lps(struct rtw89_dev *rtwdev, struct rtw89_vif *rtwvif, can_ps_mode = false; } - rtw89_fw_h2c_lps_ch_info(rtwdev, rtwvif); + if (RTW89_CHK_FW_FEATURE(LPS_CH_INFO, &rtwdev->fw)) + rtw89_fw_h2c_lps_ch_info(rtwdev, rtwvif); + else + rtw89_fw_h2c_lps_ml_cmn_info(rtwdev, rtwvif); if (ps_mode && can_ps_mode) __rtw89_enter_ps_mode(rtwdev); @@ -165,6 +169,8 @@ void rtw89_leave_lps(struct rtw89_dev *rtwdev) __rtw89_leave_ps_mode(rtwdev); + rtw89_phy_dm_reinit(rtwdev); + rtw89_for_each_rtwvif(rtwdev, rtwvif) rtw89_vif_for_each_link(rtwvif, rtwvif_link, link_id) rtw89_leave_lps_vif(rtwdev, rtwvif_link); From f0441c540fe808570c275a5700adc42b2cfd914b Mon Sep 17 00:00:00 2001 From: Kuan-Chung Chen Date: Fri, 6 Dec 2024 13:57:13 +0800 Subject: [PATCH 0321/1386] wifi: rtw89: disable firmware training HE GI and LTF Given the performance trade-off associated with firmware training HE GI/LTF, especially in high attenuation environments, we have decided to utilize a constant value instead. Signed-off-by: Kuan-Chung Chen Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241206055716.18598-5-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 10 ++++++++++ drivers/net/wireless/realtek/rtw89/phy.c | 24 +++++++++++++++-------- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index ec2a80af04bb..15967978bf4a 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -6875,6 +6875,16 @@ bool rtw89_sta_has_beamformer_cap(struct ieee80211_link_sta *link_sta) return false; } +static inline +bool rtw89_sta_link_has_su_mu_4xhe08(struct ieee80211_link_sta *link_sta) +{ + if (link_sta->he_cap.he_cap_elem.phy_cap_info[7] & + IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI) + return true; + + return false; +} + static inline struct rtw89_fw_suit *rtw89_fw_suit_get(struct rtw89_dev *rtwdev, enum rtw89_fw_type type) { diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c index 1d4d3dcce060..604ea048c3ab 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.c +++ b/drivers/net/wireless/realtek/rtw89/phy.c @@ -264,16 +264,26 @@ rtw89_ra_mask_eht_rates[4] = {RA_MASK_EHT_1SS_RATES, RA_MASK_EHT_2SS_RATES, static void rtw89_phy_ra_gi_ltf(struct rtw89_dev *rtwdev, struct rtw89_sta_link *rtwsta_link, + struct ieee80211_link_sta *link_sta, const struct rtw89_chan *chan, bool *fix_giltf_en, u8 *fix_giltf) { struct cfg80211_bitrate_mask *mask = &rtwsta_link->mask; u8 band = chan->band_type; enum nl80211_band nl_band = rtw89_hw_to_nl80211_band(band); - u8 he_gi = mask->control[nl_band].he_gi; u8 he_ltf = mask->control[nl_band].he_ltf; + u8 he_gi = mask->control[nl_band].he_gi; - if (!rtwsta_link->use_cfg_mask) + *fix_giltf_en = true; + + if (rtwdev->chip->chip_id == RTL8852C && + chan->band_width == RTW89_CHANNEL_WIDTH_160 && + rtw89_sta_link_has_su_mu_4xhe08(link_sta)) + *fix_giltf = RTW89_GILTF_SGI_4XHE08; + else + *fix_giltf = RTW89_GILTF_2XHE08; + + if (!(rtwsta_link->use_cfg_mask && link_sta->he_cap.has_he)) return; if (he_ltf == 2 && he_gi == 2) { @@ -288,12 +298,7 @@ static void rtw89_phy_ra_gi_ltf(struct rtw89_dev *rtwdev, *fix_giltf = RTW89_GILTF_1XHE16; } else if (he_ltf == 0 && he_gi == 0) { *fix_giltf = RTW89_GILTF_1XHE08; - } else { - *fix_giltf_en = false; - return; } - - *fix_giltf_en = true; } static void rtw89_phy_ra_sta_update(struct rtw89_dev *rtwdev, @@ -326,6 +331,8 @@ static void rtw89_phy_ra_sta_update(struct rtw89_dev *rtwdev, mode |= RTW89_RA_MODE_EHT; ra_mask |= get_eht_ra_mask(link_sta); high_rate_masks = rtw89_ra_mask_eht_rates; + rtw89_phy_ra_gi_ltf(rtwdev, rtwsta_link, link_sta, + chan, &fix_giltf_en, &fix_giltf); } else if (link_sta->he_cap.has_he) { mode |= RTW89_RA_MODE_HE; csi_mode = RTW89_RA_RPT_MODE_HE; @@ -337,7 +344,8 @@ static void rtw89_phy_ra_sta_update(struct rtw89_dev *rtwdev, if (link_sta->he_cap.he_cap_elem.phy_cap_info[1] & IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD) ldpc_en = 1; - rtw89_phy_ra_gi_ltf(rtwdev, rtwsta_link, chan, &fix_giltf_en, &fix_giltf); + rtw89_phy_ra_gi_ltf(rtwdev, rtwsta_link, link_sta, + chan, &fix_giltf_en, &fix_giltf); } else if (link_sta->vht_cap.vht_supported) { u16 mcs_map = le16_to_cpu(link_sta->vht_cap.vht_mcs.rx_mcs_map); From 9ddc6ee0b215783252fdab234661ece8c32e2c61 Mon Sep 17 00:00:00 2001 From: Kuan-Chung Chen Date: Fri, 6 Dec 2024 13:57:14 +0800 Subject: [PATCH 0322/1386] wifi: rtw89: 8852c: disable ER SU when 4x HE-LTF and 0.8 GI capability differ Since hardware only has single one register for HE-LTF setting, to prevent interoperability issues, 8852CE disables ER SU when the AP can handle SU/MU with 4x HE-LTF and 0.8 GI, but does not support ER SU with the same settings. Signed-off-by: Kuan-Chung Chen Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241206055716.18598-6-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.c | 24 ++++++++++++++++++----- drivers/net/wireless/realtek/rtw89/core.h | 10 ++++++++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.c b/drivers/net/wireless/realtek/rtw89/core.c index ee6ad185135c..f848185e2ced 100644 --- a/drivers/net/wireless/realtek/rtw89/core.c +++ b/drivers/net/wireless/realtek/rtw89/core.c @@ -3844,6 +3844,22 @@ int rtw89_core_sta_link_disconnect(struct rtw89_dev *rtwdev, return ret; } +static bool rtw89_sta_link_can_er(struct rtw89_dev *rtwdev, + struct ieee80211_bss_conf *bss_conf, + struct ieee80211_link_sta *link_sta) +{ + if (!bss_conf->he_support || + bss_conf->he_oper.params & IEEE80211_HE_OPERATION_ER_SU_DISABLE) + return false; + + if (rtwdev->chip->chip_id == RTL8852C && + rtw89_sta_link_has_su_mu_4xhe08(link_sta) && + !rtw89_sta_link_has_er_su_4xhe08(link_sta)) + return false; + + return true; +} + int rtw89_core_sta_link_assoc(struct rtw89_dev *rtwdev, struct rtw89_vif_link *rtwvif_link, struct rtw89_sta_link *rtwsta_link) @@ -3854,12 +3870,11 @@ int rtw89_core_sta_link_assoc(struct rtw89_dev *rtwdev, rtwsta_link); const struct rtw89_chan *chan = rtw89_chan_get(rtwdev, rtwvif_link->chanctx_idx); + struct ieee80211_link_sta *link_sta; int ret; if (vif->type == NL80211_IFTYPE_AP || sta->tdls) { if (sta->tdls) { - struct ieee80211_link_sta *link_sta; - rcu_read_lock(); link_sta = rtw89_sta_rcu_dereference_link(rtwsta_link, true); @@ -3910,9 +3925,8 @@ int rtw89_core_sta_link_assoc(struct rtw89_dev *rtwdev, rcu_read_lock(); bss_conf = rtw89_vif_rcu_dereference_link(rtwvif_link, true); - if (bss_conf->he_support && - !(bss_conf->he_oper.params & IEEE80211_HE_OPERATION_ER_SU_DISABLE)) - rtwsta_link->er_cap = true; + link_sta = rtw89_sta_rcu_dereference_link(rtwsta_link, true); + rtwsta_link->er_cap = rtw89_sta_link_can_er(rtwdev, bss_conf, link_sta); rcu_read_unlock(); diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 15967978bf4a..c2b5eeb4a4f1 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -6885,6 +6885,16 @@ bool rtw89_sta_link_has_su_mu_4xhe08(struct ieee80211_link_sta *link_sta) return false; } +static inline +bool rtw89_sta_link_has_er_su_4xhe08(struct ieee80211_link_sta *link_sta) +{ + if (link_sta->he_cap.he_cap_elem.phy_cap_info[8] & + IEEE80211_HE_PHY_CAP8_HE_ER_SU_PPDU_4XLTF_AND_08_US_GI) + return true; + + return false; +} + static inline struct rtw89_fw_suit *rtw89_fw_suit_get(struct rtw89_dev *rtwdev, enum rtw89_fw_type type) { From a2854ac3383032310db381b2cfda5b164d8585ec Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Fri, 6 Dec 2024 13:57:15 +0800 Subject: [PATCH 0323/1386] wifi: rtw89: regd: update regulatory map to R68-R51 Sync Realtek Channel Plan R68 and Realtek Regulatory R51. Configure 6 GHz field of Realtek regd for the following countries. BO, DO, EG, LS, MZ, NG, OM, ZW, PK, PH, TH, KM, CG, CD, GE, GI, GU, LR, MH, FM, MP, PW, MF, SX, SZ, TZ, VI Besides, add entries for the following countries. CU, SY, SD Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241206055716.18598-7-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/regd.c | 57 ++++++++++++----------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/regd.c b/drivers/net/wireless/realtek/rtw89/regd.c index cad5189708e7..80b2f74589eb 100644 --- a/drivers/net/wireless/realtek/rtw89/regd.c +++ b/drivers/net/wireless/realtek/rtw89/regd.c @@ -17,7 +17,7 @@ static const struct rtw89_regd rtw89_ww_regd = static const struct rtw89_regd rtw89_regd_map[] = { COUNTRY_REGD("AR", RTW89_MEXICO, RTW89_MEXICO, RTW89_FCC), - COUNTRY_REGD("BO", RTW89_FCC, RTW89_FCC, RTW89_FCC), + COUNTRY_REGD("BO", RTW89_FCC, RTW89_FCC, RTW89_NA), COUNTRY_REGD("BR", RTW89_FCC, RTW89_FCC, RTW89_FCC), COUNTRY_REGD("CL", RTW89_CHILE, RTW89_CHILE, RTW89_CHILE), COUNTRY_REGD("CO", RTW89_FCC, RTW89_FCC, RTW89_FCC), @@ -35,7 +35,7 @@ static const struct rtw89_regd rtw89_regd_map[] = { COUNTRY_REGD("UY", RTW89_FCC, RTW89_FCC, RTW89_NA), COUNTRY_REGD("VE", RTW89_FCC, RTW89_FCC, RTW89_NA), COUNTRY_REGD("PR", RTW89_FCC, RTW89_FCC, RTW89_NA), - COUNTRY_REGD("DO", RTW89_FCC, RTW89_FCC, RTW89_NA), + COUNTRY_REGD("DO", RTW89_FCC, RTW89_FCC, RTW89_FCC), COUNTRY_REGD("AT", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("BE", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("CY", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), @@ -72,7 +72,7 @@ static const struct rtw89_regd rtw89_regd_map[] = { COUNTRY_REGD("BA", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("BG", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("HR", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), - COUNTRY_REGD("EG", RTW89_ETSI, RTW89_ETSI, RTW89_NA), + COUNTRY_REGD("EG", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("GH", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("IQ", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("IL", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), @@ -82,13 +82,13 @@ static const struct rtw89_regd rtw89_regd_map[] = { COUNTRY_REGD("KW", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("KG", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("LB", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), - COUNTRY_REGD("LS", RTW89_ETSI, RTW89_ETSI, RTW89_NA), + COUNTRY_REGD("LS", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("MK", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("MA", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), - COUNTRY_REGD("MZ", RTW89_ETSI, RTW89_ETSI, RTW89_NA), + COUNTRY_REGD("MZ", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("NA", RTW89_ETSI, RTW89_ETSI, RTW89_NA), - COUNTRY_REGD("NG", RTW89_ETSI, RTW89_ETSI, RTW89_NA), - COUNTRY_REGD("OM", RTW89_ETSI, RTW89_ETSI, RTW89_NA), + COUNTRY_REGD("NG", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), + COUNTRY_REGD("OM", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("QA", RTW89_QATAR, RTW89_QATAR, RTW89_QATAR), COUNTRY_REGD("RO", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("RU", RTW89_ETSI, RTW89_ETSI, RTW89_NA), @@ -101,7 +101,7 @@ static const struct rtw89_regd rtw89_regd_map[] = { COUNTRY_REGD("UA", RTW89_UKRAINE, RTW89_UKRAINE, RTW89_UKRAINE), COUNTRY_REGD("AE", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("YE", RTW89_ETSI, RTW89_ETSI, RTW89_NA), - COUNTRY_REGD("ZW", RTW89_ETSI, RTW89_ETSI, RTW89_NA), + COUNTRY_REGD("ZW", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("BD", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("KH", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("CN", RTW89_CN, RTW89_CN, RTW89_CN), @@ -110,12 +110,12 @@ static const struct rtw89_regd rtw89_regd_map[] = { COUNTRY_REGD("ID", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("KR", RTW89_KCC, RTW89_KCC, RTW89_KCC), COUNTRY_REGD("MY", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), - COUNTRY_REGD("PK", RTW89_ETSI, RTW89_ETSI, RTW89_NA), - COUNTRY_REGD("PH", RTW89_ETSI, RTW89_ETSI, RTW89_NA), + COUNTRY_REGD("PK", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), + COUNTRY_REGD("PH", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("SG", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("LK", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("TW", RTW89_FCC, RTW89_FCC, RTW89_ETSI), - COUNTRY_REGD("TH", RTW89_ETSI, RTW89_ETSI, RTW89_THAILAND), + COUNTRY_REGD("TH", RTW89_THAILAND, RTW89_THAILAND, RTW89_THAILAND), COUNTRY_REGD("VN", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("AU", RTW89_ACMA, RTW89_ACMA, RTW89_ACMA), COUNTRY_REGD("NZ", RTW89_ACMA, RTW89_ACMA, RTW89_ACMA), @@ -158,9 +158,9 @@ static const struct rtw89_regd rtw89_regd_map[] = { COUNTRY_REGD("TD", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("CX", RTW89_ACMA, RTW89_ACMA, RTW89_NA), COUNTRY_REGD("CC", RTW89_ACMA, RTW89_ACMA, RTW89_NA), - COUNTRY_REGD("KM", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), - COUNTRY_REGD("CG", RTW89_ETSI, RTW89_ETSI, RTW89_NA), - COUNTRY_REGD("CD", RTW89_ETSI, RTW89_ETSI, RTW89_NA), + COUNTRY_REGD("KM", RTW89_ETSI, RTW89_ETSI, RTW89_NA), + COUNTRY_REGD("CG", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), + COUNTRY_REGD("CD", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("CK", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("CI", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("DJ", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), @@ -176,12 +176,12 @@ static const struct rtw89_regd rtw89_regd_map[] = { COUNTRY_REGD("TF", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("GA", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("GM", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), - COUNTRY_REGD("GE", RTW89_ETSI, RTW89_ETSI, RTW89_NA), - COUNTRY_REGD("GI", RTW89_ETSI, RTW89_ETSI, RTW89_NA), + COUNTRY_REGD("GE", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), + COUNTRY_REGD("GI", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("GL", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("GD", RTW89_FCC, RTW89_FCC, RTW89_FCC), COUNTRY_REGD("GP", RTW89_ETSI, RTW89_ETSI, RTW89_NA), - COUNTRY_REGD("GU", RTW89_FCC, RTW89_FCC, RTW89_NA), + COUNTRY_REGD("GU", RTW89_FCC, RTW89_FCC, RTW89_FCC), COUNTRY_REGD("GG", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("GN", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("GW", RTW89_ETSI, RTW89_ETSI, RTW89_NA), @@ -194,19 +194,19 @@ static const struct rtw89_regd rtw89_regd_map[] = { COUNTRY_REGD("KI", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("XK", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("LA", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), - COUNTRY_REGD("LR", RTW89_ETSI, RTW89_ETSI, RTW89_NA), + COUNTRY_REGD("LR", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("LY", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("MO", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("MG", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("MW", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("MV", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("ML", RTW89_ETSI, RTW89_ETSI, RTW89_NA), - COUNTRY_REGD("MH", RTW89_FCC, RTW89_FCC, RTW89_NA), + COUNTRY_REGD("MH", RTW89_FCC, RTW89_FCC, RTW89_FCC), COUNTRY_REGD("MQ", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("MR", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("MU", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("YT", RTW89_ETSI, RTW89_ETSI, RTW89_NA), - COUNTRY_REGD("FM", RTW89_FCC, RTW89_FCC, RTW89_NA), + COUNTRY_REGD("FM", RTW89_FCC, RTW89_FCC, RTW89_FCC), COUNTRY_REGD("MD", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("MN", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("MS", RTW89_ETSI, RTW89_ETSI, RTW89_NA), @@ -216,15 +216,15 @@ static const struct rtw89_regd rtw89_regd_map[] = { COUNTRY_REGD("NE", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("NU", RTW89_ACMA, RTW89_ACMA, RTW89_NA), COUNTRY_REGD("NF", RTW89_ACMA, RTW89_ACMA, RTW89_NA), - COUNTRY_REGD("MP", RTW89_FCC, RTW89_FCC, RTW89_NA), - COUNTRY_REGD("PW", RTW89_FCC, RTW89_FCC, RTW89_NA), + COUNTRY_REGD("MP", RTW89_FCC, RTW89_FCC, RTW89_FCC), + COUNTRY_REGD("PW", RTW89_FCC, RTW89_FCC, RTW89_FCC), COUNTRY_REGD("RE", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("RW", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("SH", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("KN", RTW89_FCC, RTW89_FCC, RTW89_FCC), COUNTRY_REGD("LC", RTW89_FCC, RTW89_FCC, RTW89_FCC), - COUNTRY_REGD("MF", RTW89_FCC, RTW89_FCC, RTW89_NA), - COUNTRY_REGD("SX", RTW89_FCC, RTW89_FCC, RTW89_NA), + COUNTRY_REGD("MF", RTW89_FCC, RTW89_FCC, RTW89_FCC), + COUNTRY_REGD("SX", RTW89_FCC, RTW89_FCC, RTW89_FCC), COUNTRY_REGD("PM", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("VC", RTW89_FCC, RTW89_FCC, RTW89_NA), COUNTRY_REGD("WS", RTW89_FCC, RTW89_FCC, RTW89_NA), @@ -237,9 +237,9 @@ static const struct rtw89_regd rtw89_regd_map[] = { COUNTRY_REGD("GS", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("SR", RTW89_FCC, RTW89_FCC, RTW89_FCC), COUNTRY_REGD("SJ", RTW89_ETSI, RTW89_ETSI, RTW89_NA), - COUNTRY_REGD("SZ", RTW89_ETSI, RTW89_ETSI, RTW89_NA), + COUNTRY_REGD("SZ", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("TJ", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), - COUNTRY_REGD("TZ", RTW89_ETSI, RTW89_ETSI, RTW89_NA), + COUNTRY_REGD("TZ", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("TG", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("TK", RTW89_ACMA, RTW89_ACMA, RTW89_NA), COUNTRY_REGD("TO", RTW89_ETSI, RTW89_ETSI, RTW89_NA), @@ -247,13 +247,16 @@ static const struct rtw89_regd rtw89_regd_map[] = { COUNTRY_REGD("TC", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("TV", RTW89_ETSI, RTW89_NA, RTW89_NA), COUNTRY_REGD("UG", RTW89_ETSI, RTW89_ETSI, RTW89_NA), - COUNTRY_REGD("VI", RTW89_FCC, RTW89_FCC, RTW89_NA), + COUNTRY_REGD("VI", RTW89_FCC, RTW89_FCC, RTW89_FCC), COUNTRY_REGD("UZ", RTW89_ETSI, RTW89_ETSI, RTW89_ETSI), COUNTRY_REGD("VU", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("WF", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("EH", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("ZM", RTW89_ETSI, RTW89_ETSI, RTW89_NA), + COUNTRY_REGD("CU", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("IR", RTW89_ETSI, RTW89_ETSI, RTW89_NA), + COUNTRY_REGD("SY", RTW89_ETSI, RTW89_NA, RTW89_NA), + COUNTRY_REGD("SD", RTW89_ETSI, RTW89_ETSI, RTW89_NA), COUNTRY_REGD("PS", RTW89_ETSI, RTW89_ETSI, RTW89_NA), }; From 9ae817c779df50710218e65593ca8105aa27ff91 Mon Sep 17 00:00:00 2001 From: Chih-Kang Chang Date: Fri, 6 Dec 2024 13:57:16 +0800 Subject: [PATCH 0324/1386] wifi: rtw89: 8922a: update format of RFK pre-notify H2C command v2 The RFK pre-notify H2C command is to tell firmware the channels driver is using. Since the format is changed after 0.35.49.0, update it accordingly. Signed-off-by: Chih-Kang Chang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241206055716.18598-8-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 1 + drivers/net/wireless/realtek/rtw89/fw.c | 33 ++++++++++++++++------- drivers/net/wireless/realtek/rtw89/fw.h | 7 ++++- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index c2b5eeb4a4f1..155538370a89 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -4462,6 +4462,7 @@ enum rtw89_fw_feature { RTW89_FW_FEATURE_SCAN_OFFLOAD_BE_V0, RTW89_FW_FEATURE_WOW_REASON_V1, RTW89_FW_FEATURE_RFK_PRE_NOTIFY_V0, + RTW89_FW_FEATURE_RFK_PRE_NOTIFY_V1, RTW89_FW_FEATURE_RFK_RXDCK_V0, RTW89_FW_FEATURE_NO_WOW_CPU_IO_RX, RTW89_FW_FEATURE_NOTIFY_AP_INFO, diff --git a/drivers/net/wireless/realtek/rtw89/fw.c b/drivers/net/wireless/realtek/rtw89/fw.c index e5f3efe3a7e6..90db15685728 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.c +++ b/drivers/net/wireless/realtek/rtw89/fw.c @@ -733,6 +733,7 @@ static const struct __fw_feat_cfg fw_feat_tbl[] = { __CFG_FW_FEAT(RTL8922A, lt, 0, 35, 42, 0, RFK_RXDCK_V0), __CFG_FW_FEAT(RTL8922A, ge, 0, 35, 46, 0, NOTIFY_AP_INFO), __CFG_FW_FEAT(RTL8922A, lt, 0, 35, 47, 0, CH_INFO_BE_V0), + __CFG_FW_FEAT(RTL8922A, lt, 0, 35, 49, 0, RFK_PRE_NOTIFY_V1), }; static void rtw89_fw_iterate_feature_cfg(struct rtw89_fw_info *fw, @@ -5540,7 +5541,9 @@ int rtw89_fw_h2c_rf_pre_ntfy(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy_idx) { struct rtw89_rfk_mcc_info *rfk_mcc = &rtwdev->rfk_mcc; + struct rtw89_fw_h2c_rfk_pre_info_common *common; struct rtw89_fw_h2c_rfk_pre_info_v0 *h2c_v0; + struct rtw89_fw_h2c_rfk_pre_info_v1 *h2c_v1; struct rtw89_fw_h2c_rfk_pre_info *h2c; u8 tbl_sel[NUM_OF_RTW89_FW_RFK_PATH]; u32 len = sizeof(*h2c); @@ -5550,7 +5553,10 @@ int rtw89_fw_h2c_rf_pre_ntfy(struct rtw89_dev *rtwdev, u32 val32; int ret; - if (RTW89_CHK_FW_FEATURE(RFK_PRE_NOTIFY_V0, &rtwdev->fw)) { + if (RTW89_CHK_FW_FEATURE(RFK_PRE_NOTIFY_V1, &rtwdev->fw)) { + len = sizeof(*h2c_v1); + ver = 1; + } else if (RTW89_CHK_FW_FEATURE(RFK_PRE_NOTIFY_V0, &rtwdev->fw)) { len = sizeof(*h2c_v0); ver = 0; } @@ -5562,17 +5568,18 @@ int rtw89_fw_h2c_rf_pre_ntfy(struct rtw89_dev *rtwdev, } skb_put(skb, len); h2c = (struct rtw89_fw_h2c_rfk_pre_info *)skb->data; + common = &h2c->base_v1.common; - h2c->common.mlo_mode = cpu_to_le32(rtwdev->mlo_dbcc_mode); + common->mlo_mode = cpu_to_le32(rtwdev->mlo_dbcc_mode); BUILD_BUG_ON(NUM_OF_RTW89_FW_RFK_TBL > RTW89_RFK_CHS_NR); BUILD_BUG_ON(ARRAY_SIZE(rfk_mcc->data) < NUM_OF_RTW89_FW_RFK_PATH); for (tbl = 0; tbl < NUM_OF_RTW89_FW_RFK_TBL; tbl++) { for (path = 0; path < NUM_OF_RTW89_FW_RFK_PATH; path++) { - h2c->common.dbcc.ch[path][tbl] = + common->dbcc.ch[path][tbl] = cpu_to_le32(rfk_mcc->data[path].ch[tbl]); - h2c->common.dbcc.band[path][tbl] = + common->dbcc.band[path][tbl] = cpu_to_le32(rfk_mcc->data[path].band[tbl]); } } @@ -5580,13 +5587,19 @@ int rtw89_fw_h2c_rf_pre_ntfy(struct rtw89_dev *rtwdev, for (path = 0; path < NUM_OF_RTW89_FW_RFK_PATH; path++) { tbl_sel[path] = rfk_mcc->data[path].table_idx; - h2c->common.tbl.cur_ch[path] = + common->tbl.cur_ch[path] = cpu_to_le32(rfk_mcc->data[path].ch[tbl_sel[path]]); - h2c->common.tbl.cur_band[path] = + common->tbl.cur_band[path] = cpu_to_le32(rfk_mcc->data[path].band[tbl_sel[path]]); + + if (ver <= 1) + continue; + + h2c->cur_bandwidth[path] = + cpu_to_le32(rfk_mcc->data[path].bw[tbl_sel[path]]); } - h2c->common.phy_idx = cpu_to_le32(phy_idx); + common->phy_idx = cpu_to_le32(phy_idx); if (ver == 0) { /* RFK_PRE_NOTIFY_V0 */ h2c_v0 = (struct rtw89_fw_h2c_rfk_pre_info_v0 *)skb->data; @@ -5612,8 +5625,10 @@ int rtw89_fw_h2c_rf_pre_ntfy(struct rtw89_dev *rtwdev, goto done; } - if (rtw89_is_mlo_1_1(rtwdev)) - h2c->mlo_1_1 = cpu_to_le32(1); + if (rtw89_is_mlo_1_1(rtwdev)) { + h2c_v1 = &h2c->base_v1; + h2c_v1->mlo_1_1 = cpu_to_le32(1); + } done: rtw89_h2c_pkt_set_hdr(rtwdev, skb, FWCMD_TYPE_H2C, H2C_CAT_OUTSRC, H2C_CL_OUTSRC_RF_FW_RFK, diff --git a/drivers/net/wireless/realtek/rtw89/fw.h b/drivers/net/wireless/realtek/rtw89/fw.h index 2dfc584da7d6..a3fe183c2ab0 100644 --- a/drivers/net/wireless/realtek/rtw89/fw.h +++ b/drivers/net/wireless/realtek/rtw89/fw.h @@ -4293,11 +4293,16 @@ struct rtw89_fw_h2c_rfk_pre_info_v0 { } __packed mlo; } __packed; -struct rtw89_fw_h2c_rfk_pre_info { +struct rtw89_fw_h2c_rfk_pre_info_v1 { struct rtw89_fw_h2c_rfk_pre_info_common common; __le32 mlo_1_1; } __packed; +struct rtw89_fw_h2c_rfk_pre_info { + struct rtw89_fw_h2c_rfk_pre_info_v1 base_v1; + __le32 cur_bandwidth[NUM_OF_RTW89_FW_RFK_PATH]; +} __packed; + struct rtw89_h2c_rf_tssi { __le16 len; u8 phy; From 2fdac64c3c35858aa8ac5caa70b232e03456e120 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 6 Dec 2024 14:37:10 -0300 Subject: [PATCH 0325/1386] wifi: rtlwifi: remove unused check_buddy_priv Commit 2461c7d60f9f ("rtlwifi: Update header file") introduced a global list of private data structures. Later on, commit 26634c4b1868 ("rtlwifi Modify existing bits to match vendor version 2013.02.07") started adding the private data to that list at probe time and added a hook, check_buddy_priv to find the private data from a similar device. However, that function was never used. Besides, though there is a lock for that list, it is never used. And when the probe fails, the private data is never removed from the list. This would cause a second probe to access freed memory. Remove the unused hook, structures and members, which will prevent the potential race condition on the list and its corruption during a second probe when probe fails. Fixes: 26634c4b1868 ("rtlwifi Modify existing bits to match vendor version 2013.02.07") Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241206173713.3222187-2-cascardo@igalia.com --- drivers/net/wireless/realtek/rtlwifi/base.c | 7 ---- drivers/net/wireless/realtek/rtlwifi/base.h | 1 - drivers/net/wireless/realtek/rtlwifi/pci.c | 44 --------------------- drivers/net/wireless/realtek/rtlwifi/wifi.h | 12 ------ 4 files changed, 64 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index aab4605de9c4..fd28c7a722d8 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -2696,9 +2696,6 @@ MODULE_AUTHOR("Larry Finger "); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Realtek 802.11n PCI wireless core"); -struct rtl_global_var rtl_global_var = {}; -EXPORT_SYMBOL_GPL(rtl_global_var); - static int __init rtl_core_module_init(void) { BUILD_BUG_ON(TX_PWR_BY_RATE_NUM_RATE < TX_PWR_BY_RATE_NUM_SECTION); @@ -2712,10 +2709,6 @@ static int __init rtl_core_module_init(void) /* add debugfs */ rtl_debugfs_add_topdir(); - /* init some global vars */ - INIT_LIST_HEAD(&rtl_global_var.glb_priv_list); - spin_lock_init(&rtl_global_var.glb_list_lock); - return 0; } diff --git a/drivers/net/wireless/realtek/rtlwifi/base.h b/drivers/net/wireless/realtek/rtlwifi/base.h index f081a9a90563..f3a6a43a42ec 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.h +++ b/drivers/net/wireless/realtek/rtlwifi/base.h @@ -124,7 +124,6 @@ int rtl_send_smps_action(struct ieee80211_hw *hw, u8 *rtl_find_ie(u8 *data, unsigned int len, u8 ie); void rtl_recognize_peer(struct ieee80211_hw *hw, u8 *data, unsigned int len); u8 rtl_tid_to_ac(u8 tid); -extern struct rtl_global_var rtl_global_var; void rtl_phy_scan_operation_backup(struct ieee80211_hw *hw, u8 operation); #endif diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 40fc3c297a8a..4388066eb9e2 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -295,46 +295,6 @@ static bool rtl_pci_get_amd_l1_patch(struct ieee80211_hw *hw) return status; } -static bool rtl_pci_check_buddy_priv(struct ieee80211_hw *hw, - struct rtl_priv **buddy_priv) -{ - struct rtl_priv *rtlpriv = rtl_priv(hw); - struct rtl_pci_priv *pcipriv = rtl_pcipriv(hw); - struct rtl_priv *tpriv = NULL, *iter; - struct rtl_pci_priv *tpcipriv = NULL; - - if (!list_empty(&rtlpriv->glb_var->glb_priv_list)) { - list_for_each_entry(iter, &rtlpriv->glb_var->glb_priv_list, - list) { - tpcipriv = (struct rtl_pci_priv *)iter->priv; - rtl_dbg(rtlpriv, COMP_INIT, DBG_LOUD, - "pcipriv->ndis_adapter.funcnumber %x\n", - pcipriv->ndis_adapter.funcnumber); - rtl_dbg(rtlpriv, COMP_INIT, DBG_LOUD, - "tpcipriv->ndis_adapter.funcnumber %x\n", - tpcipriv->ndis_adapter.funcnumber); - - if (pcipriv->ndis_adapter.busnumber == - tpcipriv->ndis_adapter.busnumber && - pcipriv->ndis_adapter.devnumber == - tpcipriv->ndis_adapter.devnumber && - pcipriv->ndis_adapter.funcnumber != - tpcipriv->ndis_adapter.funcnumber) { - tpriv = iter; - break; - } - } - } - - rtl_dbg(rtlpriv, COMP_INIT, DBG_LOUD, - "find_buddy_priv %d\n", tpriv != NULL); - - if (tpriv) - *buddy_priv = tpriv; - - return tpriv != NULL; -} - static void rtl_pci_parse_configuration(struct pci_dev *pdev, struct ieee80211_hw *hw) { @@ -2011,7 +1971,6 @@ static bool _rtl_pci_find_adapter(struct pci_dev *pdev, pcipriv->ndis_adapter.amd_l1_patch); rtl_pci_parse_configuration(pdev, hw); - list_add_tail(&rtlpriv->list, &rtlpriv->glb_var->glb_priv_list); return true; } @@ -2158,7 +2117,6 @@ int rtl_pci_probe(struct pci_dev *pdev, rtlpriv->rtlhal.interface = INTF_PCI; rtlpriv->cfg = (struct rtl_hal_cfg *)(id->driver_data); rtlpriv->intf_ops = &rtl_pci_ops; - rtlpriv->glb_var = &rtl_global_var; rtl_efuse_ops_init(hw); /* MEM map */ @@ -2316,7 +2274,6 @@ void rtl_pci_disconnect(struct pci_dev *pdev) if (rtlpci->using_msi) pci_disable_msi(rtlpci->pdev); - list_del(&rtlpriv->list); if (rtlpriv->io.pci_mem_start != 0) { pci_iounmap(pdev, (void __iomem *)rtlpriv->io.pci_mem_start); pci_release_regions(pdev); @@ -2375,7 +2332,6 @@ EXPORT_SYMBOL(rtl_pci_resume); const struct rtl_intf_ops rtl_pci_ops = { .adapter_start = rtl_pci_start, .adapter_stop = rtl_pci_stop, - .check_buddy_priv = rtl_pci_check_buddy_priv, .adapter_tx = rtl_pci_tx, .flush = rtl_pci_flush, .reset_trx_ring = rtl_pci_reset_trx_ring, diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index ae6e351bc83c..f1830ddcdd8c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -2270,8 +2270,6 @@ struct rtl_intf_ops { /*com */ int (*adapter_start)(struct ieee80211_hw *hw); void (*adapter_stop)(struct ieee80211_hw *hw); - bool (*check_buddy_priv)(struct ieee80211_hw *hw, - struct rtl_priv **buddy_priv); int (*adapter_tx)(struct ieee80211_hw *hw, struct ieee80211_sta *sta, @@ -2514,14 +2512,6 @@ struct dig_t { u32 rssi_max; }; -struct rtl_global_var { - /* from this list we can get - * other adapter's rtl_priv - */ - struct list_head glb_priv_list; - spinlock_t glb_list_lock; -}; - #define IN_4WAY_TIMEOUT_TIME (30 * MSEC_PER_SEC) /* 30 seconds */ struct rtl_btc_info { @@ -2667,9 +2657,7 @@ struct rtl_scan_list { struct rtl_priv { struct ieee80211_hw *hw; struct completion firmware_loading_complete; - struct list_head list; struct rtl_priv *buddy_priv; - struct rtl_global_var *glb_var; struct rtl_dmsp_ctl dmsp_ctl; struct rtl_locks locks; struct rtl_works works; From d8ece6fc3694657e4886191b32ca1690af11adda Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 6 Dec 2024 14:37:11 -0300 Subject: [PATCH 0326/1386] wifi: rtlwifi: destroy workqueue at rtl_deinit_core rtl_wq is allocated at rtl_init_core, so it makes more sense to destroy it at rtl_deinit_core. In the case of USB, where _rtl_usb_init does not require anything to be undone, that is fine. But for PCI, rtl_pci_init, which is called after rtl_init_core, needs to deallocate data, but only if it has been called. That means that destroying the workqueue needs to be done whether rtl_pci_init has been called or not. And since rtl_pci_deinit was doing it, it has to be moved out of there. It makes more sense to move it to rtl_deinit_core and have it done in both cases, USB and PCI. Since this is a requirement for a followup memory leak fix, mark this as fixing such memory leak. Fixes: 0c8173385e54 ("rtl8192ce: Add new driver") Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241206173713.3222187-3-cascardo@igalia.com --- drivers/net/wireless/realtek/rtlwifi/base.c | 6 ++++++ drivers/net/wireless/realtek/rtlwifi/pci.c | 2 -- drivers/net/wireless/realtek/rtlwifi/usb.c | 5 ----- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c index fd28c7a722d8..ff61867d142f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/base.c +++ b/drivers/net/wireless/realtek/rtlwifi/base.c @@ -575,9 +575,15 @@ static void rtl_free_entries_from_ack_queue(struct ieee80211_hw *hw, void rtl_deinit_core(struct ieee80211_hw *hw) { + struct rtl_priv *rtlpriv = rtl_priv(hw); + rtl_c2hcmd_launcher(hw, 0); rtl_free_entries_from_scan_list(hw); rtl_free_entries_from_ack_queue(hw, false); + if (rtlpriv->works.rtl_wq) { + destroy_workqueue(rtlpriv->works.rtl_wq); + rtlpriv->works.rtl_wq = NULL; + } } EXPORT_SYMBOL_GPL(rtl_deinit_core); diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index 4388066eb9e2..e60ac910e750 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -1656,8 +1656,6 @@ static void rtl_pci_deinit(struct ieee80211_hw *hw) synchronize_irq(rtlpci->pdev->irq); tasklet_kill(&rtlpriv->works.irq_tasklet); cancel_work_sync(&rtlpriv->works.lps_change_work); - - destroy_workqueue(rtlpriv->works.rtl_wq); } static int rtl_pci_init(struct ieee80211_hw *hw, struct pci_dev *pdev) diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index 0368ecea2e81..f5718e570011 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -629,11 +629,6 @@ static void _rtl_usb_cleanup_rx(struct ieee80211_hw *hw) tasklet_kill(&rtlusb->rx_work_tasklet); cancel_work_sync(&rtlpriv->works.lps_change_work); - if (rtlpriv->works.rtl_wq) { - destroy_workqueue(rtlpriv->works.rtl_wq); - rtlpriv->works.rtl_wq = NULL; - } - skb_queue_purge(&rtlusb->rx_queue); while ((urb = usb_get_from_anchor(&rtlusb->rx_cleanup_urbs))) { From e7ceefbfd8d447abc8aca8ab993a942803522c06 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 6 Dec 2024 14:37:12 -0300 Subject: [PATCH 0327/1386] wifi: rtlwifi: fix memory leaks and invalid access at probe error path Deinitialize at reverse order when probe fails. When init_sw_vars fails, rtl_deinit_core should not be called, specially now that it destroys the rtl_wq workqueue. And call rtl_pci_deinit and deinit_sw_vars, otherwise, memory will be leaked. Remove pci_set_drvdata call as it will already be cleaned up by the core driver code and could lead to memory leaks too. cf. commit 8d450935ae7f ("wireless: rtlwifi: remove unnecessary pci_set_drvdata()") and commit 3d86b93064c7 ("rtlwifi: Fix PCI probe error path orphaned memory"). Fixes: 0c8173385e54 ("rtl8192ce: Add new driver") Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241206173713.3222187-4-cascardo@igalia.com --- drivers/net/wireless/realtek/rtlwifi/pci.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index e60ac910e750..a870117cf12a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -2165,7 +2165,7 @@ int rtl_pci_probe(struct pci_dev *pdev, if (rtlpriv->cfg->ops->init_sw_vars(hw)) { pr_err("Can't init_sw_vars\n"); err = -ENODEV; - goto fail3; + goto fail2; } rtl_init_sw_leds(hw); @@ -2183,14 +2183,14 @@ int rtl_pci_probe(struct pci_dev *pdev, err = rtl_pci_init(hw, pdev); if (err) { pr_err("Failed to init PCI\n"); - goto fail3; + goto fail4; } err = ieee80211_register_hw(hw); if (err) { pr_err("Can't register mac80211 hw.\n"); err = -ENODEV; - goto fail3; + goto fail5; } rtlpriv->mac80211.mac80211_registered = 1; @@ -2213,9 +2213,12 @@ int rtl_pci_probe(struct pci_dev *pdev, set_bit(RTL_STATUS_INTERFACE_START, &rtlpriv->status); return 0; -fail3: - pci_set_drvdata(pdev, NULL); +fail5: + rtl_pci_deinit(hw); +fail4: rtl_deinit_core(hw); +fail3: + rtlpriv->cfg->ops->deinit_sw_vars(hw); fail2: if (rtlpriv->io.pci_mem_start != 0) From b59b86c5d08be7d761c04affcbcec8184738c200 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Fri, 6 Dec 2024 14:37:13 -0300 Subject: [PATCH 0328/1386] wifi: rtlwifi: pci: wait for firmware loading before releasing memory At probe error path, the firmware loading work may have already been queued. In such a case, it will try to access memory allocated by the probe function, which is about to be released. In such paths, wait for the firmware worker to finish before releasing memory. Fixes: 3d86b93064c7 ("rtlwifi: Fix PCI probe error path orphaned memory") Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241206173713.3222187-5-cascardo@igalia.com --- drivers/net/wireless/realtek/rtlwifi/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index a870117cf12a..0eafc4d125f9 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -2218,6 +2218,7 @@ fail5: fail4: rtl_deinit_core(hw); fail3: + wait_for_completion(&rtlpriv->firmware_loading_complete); rtlpriv->cfg->ops->deinit_sw_vars(hw); fail2: From b6c10a19363787ffdffe08049b9e0b71c101d401 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 9 Dec 2024 12:20:19 +0800 Subject: [PATCH 0329/1386] wifi: rtw89: 8852c: rfk: refine target channel calculation in _rx_dck_channel_calc() The channel is not possibly 0, so original code is fine. Still want to avoid Coverity warning, so ensure -32 offset for the channel number which is larger than 125 only. Actually, don't change logic at all. Addresses-Coverity-ID: 1628150 ("Overflowed constant") Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241209042020.21290-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/rtw8852c_rfk.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852c_rfk.c b/drivers/net/wireless/realtek/rtw89/rtw8852c_rfk.c index bd17c0a1c684..b92e2ce4f4ad 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852c_rfk.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852c_rfk.c @@ -1769,10 +1769,10 @@ u8 _rx_dck_channel_calc(struct rtw89_dev *rtwdev, const struct rtw89_chan *chan) target_ch = chan->channel - 33; } } else if (chan->band_type == RTW89_BAND_6G) { - if (chan->channel >= 1 && chan->channel <= 125) - target_ch = chan->channel + 32; - else + if (chan->channel > 125) target_ch = chan->channel - 32; + else + target_ch = chan->channel + 32; } else { target_ch = chan->channel; } From 5fdf5e557f06213ef5134d31770c29e77de205cd Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 9 Dec 2024 12:20:20 +0800 Subject: [PATCH 0330/1386] wifi: rtw89: 8851b: rfk: remove unnecessary assignment of return value of _dpk_dgain_read() The return value of _dpk_dgain_read() is not used afterward, so remove it safely. Addresses-Coverity-ID: 1504753 ("Unused value") Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241209042020.21290-2-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/rtw8851b_rfk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtw89/rtw8851b_rfk.c b/drivers/net/wireless/realtek/rtw89/rtw8851b_rfk.c index 364e36354225..f72b3ac6f149 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8851b_rfk.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8851b_rfk.c @@ -2199,7 +2199,7 @@ static u8 _dpk_agc(struct rtw89_dev *rtwdev, enum rtw89_phy_idx phy, if (dgain > 0x5fc || dgain < 0x556) { _dpk_one_shot(rtwdev, phy, path, D_SYNC); - dgain = _dpk_dgain_read(rtwdev); + _dpk_dgain_read(rtwdev); } if (agc_cnt == 0) { From 09489812013f9ff3850c3af9900c88012b8c1e5d Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Mon, 9 Dec 2024 12:21:27 +0800 Subject: [PATCH 0331/1386] wifi: rtw89: phy: add dummy C2H event handler for report of TAS power The newer firmware, lik RTL8852C version 0.27.111.0, will notify driver report of TAS (Time Averaged SAR) power by new C2H events. This is to assist in higher accurate calculation of TAS. For now, driver doesn't use the report yet, so add a dummy handler to avoid it throws info like: rtw89_8852ce 0000:03:00.0: c2h class 9 func 6 not support Also add "MAC" and "PHY" to the message to disambiguate the source of C2H event. Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241209042127.21424-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/mac.c | 4 ++-- drivers/net/wireless/realtek/rtw89/phy.c | 10 ++++++++-- drivers/net/wireless/realtek/rtw89/phy.h | 1 + 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c index bb4f58118e05..c78066fd4504 100644 --- a/drivers/net/wireless/realtek/rtw89/mac.c +++ b/drivers/net/wireless/realtek/rtw89/mac.c @@ -5558,11 +5558,11 @@ void rtw89_mac_c2h_handle(struct rtw89_dev *rtwdev, struct sk_buff *skb, case RTW89_MAC_C2H_CLASS_FWDBG: return; default: - rtw89_info(rtwdev, "c2h class %d not support\n", class); + rtw89_info(rtwdev, "MAC c2h class %d not support\n", class); return; } if (!handler) { - rtw89_info(rtwdev, "c2h class %d func %d not support\n", class, + rtw89_info(rtwdev, "MAC c2h class %d func %d not support\n", class, func); return; } diff --git a/drivers/net/wireless/realtek/rtw89/phy.c b/drivers/net/wireless/realtek/rtw89/phy.c index 604ea048c3ab..4e3754fd18fd 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.c +++ b/drivers/net/wireless/realtek/rtw89/phy.c @@ -3444,10 +3444,16 @@ rtw89_phy_c2h_rfk_report_state(struct rtw89_dev *rtwdev, struct sk_buff *c2h, u3 (int)(len - sizeof(report->hdr)), &report->state); } +static void +rtw89_phy_c2h_rfk_log_tas_pwr(struct rtw89_dev *rtwdev, struct sk_buff *c2h, u32 len) +{ +} + static void (* const rtw89_phy_c2h_rfk_report_handler[])(struct rtw89_dev *rtwdev, struct sk_buff *c2h, u32 len) = { [RTW89_PHY_C2H_RFK_REPORT_FUNC_STATE] = rtw89_phy_c2h_rfk_report_state, + [RTW89_PHY_C2H_RFK_LOG_TAS_PWR] = rtw89_phy_c2h_rfk_log_tas_pwr, }; bool rtw89_phy_c2h_chk_atomic(struct rtw89_dev *rtwdev, u8 class, u8 func) @@ -3501,11 +3507,11 @@ void rtw89_phy_c2h_handle(struct rtw89_dev *rtwdev, struct sk_buff *skb, return; fallthrough; default: - rtw89_info(rtwdev, "c2h class %d not support\n", class); + rtw89_info(rtwdev, "PHY c2h class %d not support\n", class); return; } if (!handler) { - rtw89_info(rtwdev, "c2h class %d func %d not support\n", class, + rtw89_info(rtwdev, "PHY c2h class %d func %d not support\n", class, func); return; } diff --git a/drivers/net/wireless/realtek/rtw89/phy.h b/drivers/net/wireless/realtek/rtw89/phy.h index cf33c1655b7a..697ee47fe325 100644 --- a/drivers/net/wireless/realtek/rtw89/phy.h +++ b/drivers/net/wireless/realtek/rtw89/phy.h @@ -151,6 +151,7 @@ enum rtw89_phy_c2h_rfk_log_func { enum rtw89_phy_c2h_rfk_report_func { RTW89_PHY_C2H_RFK_REPORT_FUNC_STATE = 0, + RTW89_PHY_C2H_RFK_LOG_TAS_PWR = 6, }; enum rtw89_phy_c2h_dm_func { From f87e4f2434430b0f750fbdff4fd0601807571bb2 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Tue, 10 Dec 2024 22:56:53 +0000 Subject: [PATCH 0332/1386] nfp: Convert timeouts to secs_to_jiffies() Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced secs_to_jiffies(). As the value here is a multiple of 1000, use secs_to_jiffies() instead of msecs_to_jiffies to avoid the multiplication. This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with the following Coccinelle rules: @@ constant C; @@ - msecs_to_jiffies(C * 1000) + secs_to_jiffies(C) @@ constant C; @@ - msecs_to_jiffies(C * MSEC_PER_SEC) + secs_to_jiffies(C) Signed-off-by: Easwar Hariharan Reviewed-by: Louis Peens Link: https://patch.msgid.link/20241210-converge-secs-to-jiffies-v3-20-59479891e658@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 98e098c09c03..abba165738a3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2779,7 +2779,7 @@ static void nfp_net_netdev_init(struct nfp_net *nn) break; } - netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000); + netdev->watchdog_timeo = secs_to_jiffies(5); /* MTU range: 68 - hw-specific max */ netdev->min_mtu = ETH_MIN_MTU; From 67571036635b8136a53b615c6bb57021d982d7da Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 11 Dec 2024 00:19:27 +0000 Subject: [PATCH 0333/1386] gve: Remove unused gve_adminq_set_mtu The last use of gve_adminq_set_mtu() was removed by commit 37149e9374bf ("gve: Implement packet continuation for RX.") Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Kalesh AP Reviewed-by: Praveen Kaligineedi Link: https://patch.msgid.link/20241211001927.253161-1-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_adminq.c | 14 -------------- drivers/net/ethernet/google/gve/gve_adminq.h | 1 - 2 files changed, 15 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 060e0e674938..aa7d723011d0 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -1128,20 +1128,6 @@ int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id) return gve_adminq_execute_cmd(priv, &cmd); } -int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu) -{ - union gve_adminq_command cmd; - - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = cpu_to_be32(GVE_ADMINQ_SET_DRIVER_PARAMETER); - cmd.set_driver_param = (struct gve_adminq_set_driver_parameter) { - .parameter_type = cpu_to_be32(GVE_SET_PARAM_MTU), - .parameter_value = cpu_to_be64(mtu), - }; - - return gve_adminq_execute_cmd(priv, &cmd); -} - int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len, dma_addr_t stats_report_addr, u64 interval) { diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 863683de9694..228217458275 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -612,7 +612,6 @@ int gve_adminq_destroy_rx_queues(struct gve_priv *priv, u32 queue_id); int gve_adminq_register_page_list(struct gve_priv *priv, struct gve_queue_page_list *qpl); int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id); -int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu); int gve_adminq_report_stats(struct gve_priv *priv, u64 stats_report_len, dma_addr_t stats_report_addr, u64 interval); int gve_adminq_verify_driver_compatibility(struct gve_priv *priv, From b82ca90d5512b07be2b9ee28d0e9a775bc23e3e9 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 11 Dec 2024 11:54:19 +0530 Subject: [PATCH 0334/1386] cn10k-ipsec: Fix compilation error when CONFIG_XFRM_OFFLOAD disabled Define static branch variable "cn10k_ipsec_sa_enabled" in "otx2_txrx.c". This fixes below compilation error when CONFIG_XFRM_OFFLOAD is disabled. drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.o:(__jump_table+0x8): undefined reference to `cn10k_ipsec_sa_enabled' drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.o:(__jump_table+0x18): undefined reference to `cn10k_ipsec_sa_enabled' drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.o:(__jump_table+0x28): undefined reference to `cn10k_ipsec_sa_enabled' Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202412110505.ZKDzGRMv-lkp@intel.com/ Fixes: 6a77a158848a ("cn10k-ipsec: Process outbound ipsec crypto offload") Signed-off-by: Bharat Bhushan Link: https://patch.msgid.link/20241211062419.2587111-1-bbhushan2@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c | 2 -- drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c index c333e04daad3..09a5b5268205 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c @@ -14,8 +14,6 @@ #include "otx2_struct.h" #include "cn10k_ipsec.h" -DEFINE_STATIC_KEY_FALSE(cn10k_ipsec_sa_enabled); - static bool is_dev_support_ipsec_offload(struct pci_dev *pdev) { return is_dev_cn10ka_b0(pdev) || is_dev_cn10kb(pdev); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 4e0133d1d892..224cef938927 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -27,6 +27,8 @@ */ #define PTP_SYNC_SEC_OFFSET 34 +DEFINE_STATIC_KEY_FALSE(cn10k_ipsec_sa_enabled); + static bool otx2_xdp_rcv_pkt_handler(struct otx2_nic *pfvf, struct bpf_prog *prog, struct nix_cqe_rx_s *cqe, From ae7837bb3d9d0bad1230353bbafb92b3e6ad3941 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 11 Dec 2024 00:58:02 +0000 Subject: [PATCH 0335/1386] isdn: Remove unused get_Bprotocol4id() get_Bprotocol4id() was added in 2008 in commit 1b2b03f8e514 ("Add mISDN core files") but hasn't been used. Remove it. Signed-off-by: Dr. David Alan Gilbert Link: https://patch.msgid.link/20241211005802.258279-1-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/isdn/mISDN/core.c | 14 -------------- drivers/isdn/mISDN/core.h | 1 - 2 files changed, 15 deletions(-) diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c index e34a7a46754e..8ec2d4d4f135 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c @@ -294,20 +294,6 @@ get_Bprotocol4mask(u_int m) return NULL; } -struct Bprotocol * -get_Bprotocol4id(u_int id) -{ - u_int m; - - if (id < ISDN_P_B_START || id > 63) { - printk(KERN_WARNING "%s id not in range %d\n", - __func__, id); - return NULL; - } - m = 1 << (id & ISDN_P_B_MASK); - return get_Bprotocol4mask(m); -} - int mISDN_register_Bprotocol(struct Bprotocol *bp) { diff --git a/drivers/isdn/mISDN/core.h b/drivers/isdn/mISDN/core.h index 42599f49c189..5617c06de8e4 100644 --- a/drivers/isdn/mISDN/core.h +++ b/drivers/isdn/mISDN/core.h @@ -55,7 +55,6 @@ extern void __add_layer2(struct mISDNchannel *, struct mISDNstack *); extern u_int get_all_Bprotocols(void); struct Bprotocol *get_Bprotocol4mask(u_int); -struct Bprotocol *get_Bprotocol4id(u_int); extern int mISDN_inittimer(u_int *); extern void mISDN_timer_cleanup(void); From c4117091d029087abde76e6947d43dca8f1db20b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 10 Dec 2024 12:27:10 -0800 Subject: [PATCH 0336/1386] lib: packing: create __pack() and __unpack() variants without error checking A future variant of the API, which works on arrays of packed_field structures, will make most of these checks redundant. The idea will be that we want to perform sanity checks at compile time, not once for every function call. Introduce new variants of pack() and unpack(), which elide the sanity checks, assuming that the input was pre-sanitized. Signed-off-by: Vladimir Oltean Signed-off-by: Jacob Keller Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241210-packing-pack-fields-and-ice-implementation-v10-1-ee56a47479ac@intel.com Signed-off-by: Jakub Kicinski --- lib/packing.c | 142 +++++++++++++++++++++++++++----------------------- 1 file changed, 78 insertions(+), 64 deletions(-) diff --git a/lib/packing.c b/lib/packing.c index 793942745e34..f237b8af99f5 100644 --- a/lib/packing.c +++ b/lib/packing.c @@ -51,64 +51,20 @@ static size_t calculate_box_addr(size_t box, size_t len, u8 quirks) return offset_of_group + offset_in_group; } -/** - * pack - Pack u64 number into bitfield of buffer. - * - * @pbuf: Pointer to a buffer holding the packed value. - * @uval: CPU-readable unpacked value to pack. - * @startbit: The index (in logical notation, compensated for quirks) where - * the packed value starts within pbuf. Must be larger than, or - * equal to, endbit. - * @endbit: The index (in logical notation, compensated for quirks) where - * the packed value ends within pbuf. Must be smaller than, or equal - * to, startbit. - * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. - * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and - * QUIRK_MSB_ON_THE_RIGHT. - * - * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming - * correct usage, return code may be discarded. The @pbuf memory will - * be modified on success. - */ -int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, - u8 quirks) +static void __pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks) { /* Logical byte indices corresponding to the * start and end of the field. */ - int plogical_first_u8, plogical_last_u8, box; - /* width of the field to access in the pbuf */ - u64 value_width; - - /* startbit is expected to be larger than endbit, and both are - * expected to be within the logically addressable range of the buffer. - */ - if (unlikely(startbit < endbit || startbit >= BITS_PER_BYTE * pbuflen)) - /* Invalid function call */ - return -EINVAL; - - value_width = startbit - endbit + 1; - if (unlikely(value_width > 64)) - return -ERANGE; - - /* Check if "uval" fits in "value_width" bits. - * If value_width is 64, the check will fail, but any - * 64-bit uval will surely fit. - */ - if (unlikely(value_width < 64 && uval >= (1ull << value_width))) - /* Cannot store "uval" inside "value_width" bits. - * Truncating "uval" is most certainly not desirable, - * so simply erroring out is appropriate. - */ - return -ERANGE; + int plogical_first_u8 = startbit / BITS_PER_BYTE; + int plogical_last_u8 = endbit / BITS_PER_BYTE; + int box; /* Iterate through an idealistic view of the pbuf as an u64 with * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low * logical bit significance. "box" denotes the current logical u8. */ - plogical_first_u8 = startbit / BITS_PER_BYTE; - plogical_last_u8 = endbit / BITS_PER_BYTE; - for (box = plogical_first_u8; box >= plogical_last_u8; box--) { /* Bit indices into the currently accessed 8-bit box */ size_t box_start_bit, box_end_bit, box_addr; @@ -163,15 +119,13 @@ int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, ((u8 *)pbuf)[box_addr] &= ~box_mask; ((u8 *)pbuf)[box_addr] |= pval; } - return 0; } -EXPORT_SYMBOL(pack); /** - * unpack - Unpack u64 number from packed buffer. + * pack - Pack u64 number into bitfield of buffer. * * @pbuf: Pointer to a buffer holding the packed value. - * @uval: Pointer to an u64 holding the unpacked value. + * @uval: CPU-readable unpacked value to pack. * @startbit: The index (in logical notation, compensated for quirks) where * the packed value starts within pbuf. Must be larger than, or * equal to, endbit. @@ -183,16 +137,12 @@ EXPORT_SYMBOL(pack); * QUIRK_MSB_ON_THE_RIGHT. * * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming - * correct usage, return code may be discarded. The @uval will be - * modified on success. + * correct usage, return code may be discarded. The @pbuf memory will + * be modified on success. */ -int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, - size_t pbuflen, u8 quirks) +int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, + u8 quirks) { - /* Logical byte indices corresponding to the - * start and end of the field. - */ - int plogical_first_u8, plogical_last_u8, box; /* width of the field to access in the pbuf */ u64 value_width; @@ -207,6 +157,33 @@ int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, if (unlikely(value_width > 64)) return -ERANGE; + /* Check if "uval" fits in "value_width" bits. + * If value_width is 64, the check will fail, but any + * 64-bit uval will surely fit. + */ + if (value_width < 64 && uval >= (1ull << value_width)) + /* Cannot store "uval" inside "value_width" bits. + * Truncating "uval" is most certainly not desirable, + * so simply erroring out is appropriate. + */ + return -ERANGE; + + __pack(pbuf, uval, startbit, endbit, pbuflen, quirks); + + return 0; +} +EXPORT_SYMBOL(pack); + +static void __unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks) +{ + /* Logical byte indices corresponding to the + * start and end of the field. + */ + int plogical_first_u8 = startbit / BITS_PER_BYTE; + int plogical_last_u8 = endbit / BITS_PER_BYTE; + int box; + /* Initialize parameter */ *uval = 0; @@ -214,9 +191,6 @@ int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low * logical bit significance. "box" denotes the current logical u8. */ - plogical_first_u8 = startbit / BITS_PER_BYTE; - plogical_last_u8 = endbit / BITS_PER_BYTE; - for (box = plogical_first_u8; box >= plogical_last_u8; box--) { /* Bit indices into the currently accessed 8-bit box */ size_t box_start_bit, box_end_bit, box_addr; @@ -271,6 +245,46 @@ int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, *uval &= ~proj_mask; *uval |= pval; } +} + +/** + * unpack - Unpack u64 number from packed buffer. + * + * @pbuf: Pointer to a buffer holding the packed value. + * @uval: Pointer to an u64 holding the unpacked value. + * @startbit: The index (in logical notation, compensated for quirks) where + * the packed value starts within pbuf. Must be larger than, or + * equal to, endbit. + * @endbit: The index (in logical notation, compensated for quirks) where + * the packed value ends within pbuf. Must be smaller than, or equal + * to, startbit. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Return: 0 on success, EINVAL or ERANGE if called incorrectly. Assuming + * correct usage, return code may be discarded. The @uval will be + * modified on success. + */ +int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, + size_t pbuflen, u8 quirks) +{ + /* width of the field to access in the pbuf */ + u64 value_width; + + /* startbit is expected to be larger than endbit, and both are + * expected to be within the logically addressable range of the buffer. + */ + if (startbit < endbit || startbit >= BITS_PER_BYTE * pbuflen) + /* Invalid function call */ + return -EINVAL; + + value_width = startbit - endbit + 1; + if (value_width > 64) + return -ERANGE; + + __unpack(pbuf, uval, startbit, endbit, pbuflen, quirks); + return 0; } EXPORT_SYMBOL(unpack); From 48c2752785ad1730e08a64507f05d0e5d5bc79b8 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 10 Dec 2024 12:27:11 -0800 Subject: [PATCH 0337/1386] lib: packing: demote truncation error in pack() to a warning in __pack() Most of the sanity checks in pack() and unpack() can be covered at compile time. There is only one exception, and that is truncation of the uval during a pack() operation. We'd like the error-less __pack() to catch that condition as well. But at the same time, it is currently the responsibility of consumer drivers (currently just sja1105) to print anything at all when this error occurs, and then discard the return code. We can just print a loud warning in the library code and continue with the truncated __pack() operation. In practice, having the warning is very important, see commit 24deec6b9e4a ("net: dsa: sja1105: disallow C45 transactions on the BASE-TX MDIO bus") where the bug was caught exactly by noticing this print. Add the first print to the packing library, and at the same time remove the print for the same condition from the sja1105 driver, to avoid double printing. Signed-off-by: Vladimir Oltean Signed-off-by: Jacob Keller Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241210-packing-pack-fields-and-ice-implementation-v10-2-ee56a47479ac@intel.com Signed-off-by: Jakub Kicinski --- .../net/dsa/sja1105/sja1105_static_config.c | 8 ++---- lib/packing.c | 26 +++++++------------ 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c index baba204ad62f..3d790f8c6f4d 100644 --- a/drivers/net/dsa/sja1105/sja1105_static_config.c +++ b/drivers/net/dsa/sja1105/sja1105_static_config.c @@ -26,12 +26,8 @@ void sja1105_pack(void *buf, const u64 *val, int start, int end, size_t len) pr_err("Start bit (%d) expected to be larger than end (%d)\n", start, end); } else if (rc == -ERANGE) { - if ((start - end + 1) > 64) - pr_err("Field %d-%d too large for 64 bits!\n", - start, end); - else - pr_err("Cannot store %llx inside bits %d-%d (would truncate)\n", - *val, start, end); + pr_err("Field %d-%d too large for 64 bits!\n", + start, end); } dump_stack(); } diff --git a/lib/packing.c b/lib/packing.c index f237b8af99f5..09a2d195b943 100644 --- a/lib/packing.c +++ b/lib/packing.c @@ -59,8 +59,17 @@ static void __pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, */ int plogical_first_u8 = startbit / BITS_PER_BYTE; int plogical_last_u8 = endbit / BITS_PER_BYTE; + int value_width = startbit - endbit + 1; int box; + /* Check if "uval" fits in "value_width" bits. + * The test only works for value_width < 64, but in the latter case, + * any 64-bit uval will surely fit. + */ + WARN(value_width < 64 && uval >= (1ull << value_width), + "Cannot store 0x%llx inside bits %zu-%zu - will truncate\n", + uval, startbit, endbit); + /* Iterate through an idealistic view of the pbuf as an u64 with * no quirks, u8 by u8 (aligned at u8 boundaries), from high to low * logical bit significance. "box" denotes the current logical u8. @@ -143,9 +152,6 @@ static void __pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, u8 quirks) { - /* width of the field to access in the pbuf */ - u64 value_width; - /* startbit is expected to be larger than endbit, and both are * expected to be within the logically addressable range of the buffer. */ @@ -153,19 +159,7 @@ int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, /* Invalid function call */ return -EINVAL; - value_width = startbit - endbit + 1; - if (unlikely(value_width > 64)) - return -ERANGE; - - /* Check if "uval" fits in "value_width" bits. - * If value_width is 64, the check will fail, but any - * 64-bit uval will surely fit. - */ - if (value_width < 64 && uval >= (1ull << value_width)) - /* Cannot store "uval" inside "value_width" bits. - * Truncating "uval" is most certainly not desirable, - * so simply erroring out is appropriate. - */ + if (unlikely(startbit - endbit >= 64)) return -ERANGE; __pack(pbuf, uval, startbit, endbit, pbuflen, quirks); From 41d7ea30494cc0dde3e124a75ce0add93f988ba9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 10 Dec 2024 12:27:12 -0800 Subject: [PATCH 0338/1386] lib: packing: add pack_fields() and unpack_fields() This is new API which caters to the following requirements: - Pack or unpack a large number of fields to/from a buffer with a small code footprint. The current alternative is to open-code a large number of calls to pack() and unpack(), or to use packing() to reduce that number to half. But packing() is not const-correct. - Use unpacked numbers stored in variables smaller than u64. This reduces the rodata footprint of the stored field arrays. - Perform error checking at compile time, rather than runtime, and return void from the API functions. Because the C preprocessor can't generate variable length code (loops), this is a bit tricky to do with macros. To handle this, implement macros which sanity check the packed field definitions based on their size. Finally, a single macro with a chain of __builtin_choose_expr() is used to select the appropriate macros. We enforce the use of ascending or descending order to avoid O(N^2) scaling when checking for overlap. Note that the macros are written with care to ensure that the compilers can correctly evaluate the resulting code at compile time. In particular, care was taken with avoiding too many nested statement expressions. Nested statement expressions trip up some compilers, especially when passing down variables created in previous statement expressions. There are two key design choices intended to keep the overall macro code size small. First, the definition of each CHECK_PACKED_FIELDS_N macro is implemented recursively, by calling the N-1 macro. This avoids needing the code to repeat multiple times. Second, the CHECK_PACKED_FIELD macro enforces that the fields in the array are sorted in order. This allows checking for overlap only with neighboring fields, rather than the general overlap case where each field would need to be checked against other fields. The overlap checks use the first two fields to determine the order of the remaining fields, thus allowing either ascending or descending order. This enables drivers the flexibility to keep the fields ordered in which ever order most naturally fits their hardware design and its associated documentation. The CHECK_PACKED_FIELDS macro is directly called from within pack_fields and unpack_fields, ensuring that all drivers using the API receive the benefits of the compile-time checks. Users do not need to directly call any of the macros directly. The CHECK_PACKED_FIELDS and its helper macros CHECK_PACKED_FIELDS_(0..50) are generated using a simple C program in scripts/gen_packed_field_checks.c This program can be compiled on demand and executed to generate the macro code in include/linux/packing.h. This will aid in the event that a driver needs more than 50 fields. The generator can be updated with a new size, and used to update the packing.h header file. In practice, the ice driver will need to support 27 fields, and the sja1105 driver will need to support 0 fields. This on-demand generation avoids the need to modify Kbuild. We do not anticipate the maximum number of fields to grow very often. - Reduced rodata footprint for the storage of the packed field arrays. To that end, we have struct packed_field_u8 and packed_field_u16, which define the fields with the associated type. More can be added as needed (unlikely for now). On these types, the same generic pack_fields() and unpack_fields() API can be used, thanks to the new C11 _Generic() selection feature, which can call pack_fields_u8() or pack_fields_16(), depending on the type of the "fields" array - a simplistic form of polymorphism. It is evaluated at compile time which function will actually be called. Over time, packing() is expected to be completely replaced either with pack() or with pack_fields(). Signed-off-by: Vladimir Oltean Co-developed-by: Jacob Keller Signed-off-by: Jacob Keller Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241210-packing-pack-fields-and-ice-implementation-v10-3-ee56a47479ac@intel.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + Makefile | 4 + include/linux/packing.h | 425 ++++++++++++++++++++++++++++++ lib/packing.c | 153 +++++++++++ lib/packing_test.c | 61 +++++ scripts/.gitignore | 1 + scripts/Makefile | 2 +- scripts/gen_packed_field_checks.c | 37 +++ 8 files changed, 683 insertions(+), 1 deletion(-) create mode 100644 scripts/gen_packed_field_checks.c diff --git a/MAINTAINERS b/MAINTAINERS index af35519be320..15cf366c0aec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17644,6 +17644,7 @@ F: Documentation/core-api/packing.rst F: include/linux/packing.h F: lib/packing.c F: lib/packing_test.c +F: scripts/gen_packed_field_checks.c PADATA PARALLEL EXECUTION MECHANISM M: Steffen Klassert diff --git a/Makefile b/Makefile index 93ab62cef244..9a9fd5504ae8 100644 --- a/Makefile +++ b/Makefile @@ -1367,6 +1367,10 @@ PHONY += scripts_unifdef scripts_unifdef: scripts_basic $(Q)$(MAKE) $(build)=scripts scripts/unifdef +PHONY += scripts_gen_packed_field_checks +scripts_gen_packed_field_checks: scripts_basic + $(Q)$(MAKE) $(build)=scripts scripts/gen_packed_field_checks + # --------------------------------------------------------------------------- # Install diff --git a/include/linux/packing.h b/include/linux/packing.h index 5d36dcd06f60..0589d70bbe04 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -8,6 +8,83 @@ #include #include +#define GEN_PACKED_FIELD_STRUCT(__type) \ + struct packed_field_ ## __type { \ + __type startbit; \ + __type endbit; \ + __type offset; \ + __type size; \ + } + +/* struct packed_field_u8. Use with bit offsets < 256, buffers < 32B and + * unpacked structures < 256B. + */ +GEN_PACKED_FIELD_STRUCT(u8); + +/* struct packed_field_u16. Use with bit offsets < 65536, buffers < 8KB and + * unpacked structures < 64KB. + */ +GEN_PACKED_FIELD_STRUCT(u16); + +#define PACKED_FIELD(start, end, struct_name, struct_field) \ +{ \ + (start), \ + (end), \ + offsetof(struct_name, struct_field), \ + sizeof_field(struct_name, struct_field), \ +} + +#define CHECK_PACKED_FIELD_OVERLAP(fields, index1, index2) ({ \ + typeof(&(fields)[0]) __f = (fields); \ + typeof(__f[0]) _f1 = __f[index1]; typeof(__f[0]) _f2 = __f[index2]; \ + const bool _ascending = __f[0].startbit < __f[1].startbit; \ + BUILD_BUG_ON_MSG(_ascending && _f1.startbit >= _f2.startbit, \ + __stringify(fields) " field " __stringify(index2) \ + " breaks ascending order"); \ + BUILD_BUG_ON_MSG(!_ascending && _f1.startbit <= _f2.startbit, \ + __stringify(fields) " field " __stringify(index2) \ + " breaks descending order"); \ + BUILD_BUG_ON_MSG(max(_f1.endbit, _f2.endbit) <= \ + min(_f1.startbit, _f2.startbit), \ + __stringify(fields) " field " __stringify(index2) \ + " overlaps with previous field"); \ +}) + +#define CHECK_PACKED_FIELD(fields, index) ({ \ + typeof(&(fields)[0]) _f = (fields); \ + typeof(_f[0]) __f = _f[index]; \ + BUILD_BUG_ON_MSG(__f.startbit < __f.endbit, \ + __stringify(fields) " field " __stringify(index) \ + " start bit must not be smaller than end bit"); \ + BUILD_BUG_ON_MSG(__f.size != 1 && __f.size != 2 && \ + __f.size != 4 && __f.size != 8, \ + __stringify(fields) " field " __stringify(index) \ + " has unsupported unpacked storage size"); \ + BUILD_BUG_ON_MSG(__f.startbit - __f.endbit >= BITS_PER_BYTE * __f.size, \ + __stringify(fields) " field " __stringify(index) \ + " exceeds unpacked storage size"); \ + __builtin_choose_expr(index != 0, \ + CHECK_PACKED_FIELD_OVERLAP(fields, index - 1, index), \ + 1); \ +}) + +/* Note that the packed fields may be either in ascending or descending order. + * Thus, we must check that both the first and last field wit within the + * packed buffer size. + */ +#define CHECK_PACKED_FIELDS_SIZE(fields, pbuflen) ({ \ + typeof(&(fields)[0]) _f = (fields); \ + typeof(pbuflen) _len = (pbuflen); \ + const size_t num_fields = ARRAY_SIZE(fields); \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(_len), \ + __stringify(fields) " pbuflen " __stringify(pbuflen) \ + " must be a compile time constant"); \ + BUILD_BUG_ON_MSG(_f[0].startbit >= BITS_PER_BYTE * _len, \ + __stringify(fields) " first field exceeds packed buffer size"); \ + BUILD_BUG_ON_MSG(_f[num_fields - 1].startbit >= BITS_PER_BYTE * _len, \ + __stringify(fields) " last field exceeds packed buffer size"); \ +}) + #define QUIRK_MSB_ON_THE_RIGHT BIT(0) #define QUIRK_LITTLE_ENDIAN BIT(1) #define QUIRK_LSW32_IS_FIRST BIT(2) @@ -26,4 +103,352 @@ int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, size_t pbuflen, u8 quirks); +void pack_fields_u8(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks); + +void pack_fields_u16(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks); + +void unpack_fields_u8(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks); + +void unpack_fields_u16(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks); + +/* Do not hand-edit the following packed field check macros! + * + * They are generated using scripts/gen_packed_field_checks.c, which may be + * built via "make scripts_gen_packed_field_checks". If larger macro sizes are + * needed in the future, please use this program to re-generate the macros and + * insert them here. + */ + +#define CHECK_PACKED_FIELDS_1(fields) \ + CHECK_PACKED_FIELD(fields, 0) + +#define CHECK_PACKED_FIELDS_2(fields) do { \ + CHECK_PACKED_FIELDS_1(fields); \ + CHECK_PACKED_FIELD(fields, 1); \ +} while (0) + +#define CHECK_PACKED_FIELDS_3(fields) do { \ + CHECK_PACKED_FIELDS_2(fields); \ + CHECK_PACKED_FIELD(fields, 2); \ +} while (0) + +#define CHECK_PACKED_FIELDS_4(fields) do { \ + CHECK_PACKED_FIELDS_3(fields); \ + CHECK_PACKED_FIELD(fields, 3); \ +} while (0) + +#define CHECK_PACKED_FIELDS_5(fields) do { \ + CHECK_PACKED_FIELDS_4(fields); \ + CHECK_PACKED_FIELD(fields, 4); \ +} while (0) + +#define CHECK_PACKED_FIELDS_6(fields) do { \ + CHECK_PACKED_FIELDS_5(fields); \ + CHECK_PACKED_FIELD(fields, 5); \ +} while (0) + +#define CHECK_PACKED_FIELDS_7(fields) do { \ + CHECK_PACKED_FIELDS_6(fields); \ + CHECK_PACKED_FIELD(fields, 6); \ +} while (0) + +#define CHECK_PACKED_FIELDS_8(fields) do { \ + CHECK_PACKED_FIELDS_7(fields); \ + CHECK_PACKED_FIELD(fields, 7); \ +} while (0) + +#define CHECK_PACKED_FIELDS_9(fields) do { \ + CHECK_PACKED_FIELDS_8(fields); \ + CHECK_PACKED_FIELD(fields, 8); \ +} while (0) + +#define CHECK_PACKED_FIELDS_10(fields) do { \ + CHECK_PACKED_FIELDS_9(fields); \ + CHECK_PACKED_FIELD(fields, 9); \ +} while (0) + +#define CHECK_PACKED_FIELDS_11(fields) do { \ + CHECK_PACKED_FIELDS_10(fields); \ + CHECK_PACKED_FIELD(fields, 10); \ +} while (0) + +#define CHECK_PACKED_FIELDS_12(fields) do { \ + CHECK_PACKED_FIELDS_11(fields); \ + CHECK_PACKED_FIELD(fields, 11); \ +} while (0) + +#define CHECK_PACKED_FIELDS_13(fields) do { \ + CHECK_PACKED_FIELDS_12(fields); \ + CHECK_PACKED_FIELD(fields, 12); \ +} while (0) + +#define CHECK_PACKED_FIELDS_14(fields) do { \ + CHECK_PACKED_FIELDS_13(fields); \ + CHECK_PACKED_FIELD(fields, 13); \ +} while (0) + +#define CHECK_PACKED_FIELDS_15(fields) do { \ + CHECK_PACKED_FIELDS_14(fields); \ + CHECK_PACKED_FIELD(fields, 14); \ +} while (0) + +#define CHECK_PACKED_FIELDS_16(fields) do { \ + CHECK_PACKED_FIELDS_15(fields); \ + CHECK_PACKED_FIELD(fields, 15); \ +} while (0) + +#define CHECK_PACKED_FIELDS_17(fields) do { \ + CHECK_PACKED_FIELDS_16(fields); \ + CHECK_PACKED_FIELD(fields, 16); \ +} while (0) + +#define CHECK_PACKED_FIELDS_18(fields) do { \ + CHECK_PACKED_FIELDS_17(fields); \ + CHECK_PACKED_FIELD(fields, 17); \ +} while (0) + +#define CHECK_PACKED_FIELDS_19(fields) do { \ + CHECK_PACKED_FIELDS_18(fields); \ + CHECK_PACKED_FIELD(fields, 18); \ +} while (0) + +#define CHECK_PACKED_FIELDS_20(fields) do { \ + CHECK_PACKED_FIELDS_19(fields); \ + CHECK_PACKED_FIELD(fields, 19); \ +} while (0) + +#define CHECK_PACKED_FIELDS_21(fields) do { \ + CHECK_PACKED_FIELDS_20(fields); \ + CHECK_PACKED_FIELD(fields, 20); \ +} while (0) + +#define CHECK_PACKED_FIELDS_22(fields) do { \ + CHECK_PACKED_FIELDS_21(fields); \ + CHECK_PACKED_FIELD(fields, 21); \ +} while (0) + +#define CHECK_PACKED_FIELDS_23(fields) do { \ + CHECK_PACKED_FIELDS_22(fields); \ + CHECK_PACKED_FIELD(fields, 22); \ +} while (0) + +#define CHECK_PACKED_FIELDS_24(fields) do { \ + CHECK_PACKED_FIELDS_23(fields); \ + CHECK_PACKED_FIELD(fields, 23); \ +} while (0) + +#define CHECK_PACKED_FIELDS_25(fields) do { \ + CHECK_PACKED_FIELDS_24(fields); \ + CHECK_PACKED_FIELD(fields, 24); \ +} while (0) + +#define CHECK_PACKED_FIELDS_26(fields) do { \ + CHECK_PACKED_FIELDS_25(fields); \ + CHECK_PACKED_FIELD(fields, 25); \ +} while (0) + +#define CHECK_PACKED_FIELDS_27(fields) do { \ + CHECK_PACKED_FIELDS_26(fields); \ + CHECK_PACKED_FIELD(fields, 26); \ +} while (0) + +#define CHECK_PACKED_FIELDS_28(fields) do { \ + CHECK_PACKED_FIELDS_27(fields); \ + CHECK_PACKED_FIELD(fields, 27); \ +} while (0) + +#define CHECK_PACKED_FIELDS_29(fields) do { \ + CHECK_PACKED_FIELDS_28(fields); \ + CHECK_PACKED_FIELD(fields, 28); \ +} while (0) + +#define CHECK_PACKED_FIELDS_30(fields) do { \ + CHECK_PACKED_FIELDS_29(fields); \ + CHECK_PACKED_FIELD(fields, 29); \ +} while (0) + +#define CHECK_PACKED_FIELDS_31(fields) do { \ + CHECK_PACKED_FIELDS_30(fields); \ + CHECK_PACKED_FIELD(fields, 30); \ +} while (0) + +#define CHECK_PACKED_FIELDS_32(fields) do { \ + CHECK_PACKED_FIELDS_31(fields); \ + CHECK_PACKED_FIELD(fields, 31); \ +} while (0) + +#define CHECK_PACKED_FIELDS_33(fields) do { \ + CHECK_PACKED_FIELDS_32(fields); \ + CHECK_PACKED_FIELD(fields, 32); \ +} while (0) + +#define CHECK_PACKED_FIELDS_34(fields) do { \ + CHECK_PACKED_FIELDS_33(fields); \ + CHECK_PACKED_FIELD(fields, 33); \ +} while (0) + +#define CHECK_PACKED_FIELDS_35(fields) do { \ + CHECK_PACKED_FIELDS_34(fields); \ + CHECK_PACKED_FIELD(fields, 34); \ +} while (0) + +#define CHECK_PACKED_FIELDS_36(fields) do { \ + CHECK_PACKED_FIELDS_35(fields); \ + CHECK_PACKED_FIELD(fields, 35); \ +} while (0) + +#define CHECK_PACKED_FIELDS_37(fields) do { \ + CHECK_PACKED_FIELDS_36(fields); \ + CHECK_PACKED_FIELD(fields, 36); \ +} while (0) + +#define CHECK_PACKED_FIELDS_38(fields) do { \ + CHECK_PACKED_FIELDS_37(fields); \ + CHECK_PACKED_FIELD(fields, 37); \ +} while (0) + +#define CHECK_PACKED_FIELDS_39(fields) do { \ + CHECK_PACKED_FIELDS_38(fields); \ + CHECK_PACKED_FIELD(fields, 38); \ +} while (0) + +#define CHECK_PACKED_FIELDS_40(fields) do { \ + CHECK_PACKED_FIELDS_39(fields); \ + CHECK_PACKED_FIELD(fields, 39); \ +} while (0) + +#define CHECK_PACKED_FIELDS_41(fields) do { \ + CHECK_PACKED_FIELDS_40(fields); \ + CHECK_PACKED_FIELD(fields, 40); \ +} while (0) + +#define CHECK_PACKED_FIELDS_42(fields) do { \ + CHECK_PACKED_FIELDS_41(fields); \ + CHECK_PACKED_FIELD(fields, 41); \ +} while (0) + +#define CHECK_PACKED_FIELDS_43(fields) do { \ + CHECK_PACKED_FIELDS_42(fields); \ + CHECK_PACKED_FIELD(fields, 42); \ +} while (0) + +#define CHECK_PACKED_FIELDS_44(fields) do { \ + CHECK_PACKED_FIELDS_43(fields); \ + CHECK_PACKED_FIELD(fields, 43); \ +} while (0) + +#define CHECK_PACKED_FIELDS_45(fields) do { \ + CHECK_PACKED_FIELDS_44(fields); \ + CHECK_PACKED_FIELD(fields, 44); \ +} while (0) + +#define CHECK_PACKED_FIELDS_46(fields) do { \ + CHECK_PACKED_FIELDS_45(fields); \ + CHECK_PACKED_FIELD(fields, 45); \ +} while (0) + +#define CHECK_PACKED_FIELDS_47(fields) do { \ + CHECK_PACKED_FIELDS_46(fields); \ + CHECK_PACKED_FIELD(fields, 46); \ +} while (0) + +#define CHECK_PACKED_FIELDS_48(fields) do { \ + CHECK_PACKED_FIELDS_47(fields); \ + CHECK_PACKED_FIELD(fields, 47); \ +} while (0) + +#define CHECK_PACKED_FIELDS_49(fields) do { \ + CHECK_PACKED_FIELDS_48(fields); \ + CHECK_PACKED_FIELD(fields, 48); \ +} while (0) + +#define CHECK_PACKED_FIELDS_50(fields) do { \ + CHECK_PACKED_FIELDS_49(fields); \ + CHECK_PACKED_FIELD(fields, 49); \ +} while (0) + +#define CHECK_PACKED_FIELDS(fields) \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 1, ({ CHECK_PACKED_FIELDS_1(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 2, ({ CHECK_PACKED_FIELDS_2(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 3, ({ CHECK_PACKED_FIELDS_3(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 4, ({ CHECK_PACKED_FIELDS_4(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 5, ({ CHECK_PACKED_FIELDS_5(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 6, ({ CHECK_PACKED_FIELDS_6(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 7, ({ CHECK_PACKED_FIELDS_7(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 8, ({ CHECK_PACKED_FIELDS_8(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 9, ({ CHECK_PACKED_FIELDS_9(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 10, ({ CHECK_PACKED_FIELDS_10(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 11, ({ CHECK_PACKED_FIELDS_11(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 12, ({ CHECK_PACKED_FIELDS_12(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 13, ({ CHECK_PACKED_FIELDS_13(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 14, ({ CHECK_PACKED_FIELDS_14(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 15, ({ CHECK_PACKED_FIELDS_15(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 16, ({ CHECK_PACKED_FIELDS_16(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 17, ({ CHECK_PACKED_FIELDS_17(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 18, ({ CHECK_PACKED_FIELDS_18(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 19, ({ CHECK_PACKED_FIELDS_19(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 20, ({ CHECK_PACKED_FIELDS_20(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 21, ({ CHECK_PACKED_FIELDS_21(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 22, ({ CHECK_PACKED_FIELDS_22(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 23, ({ CHECK_PACKED_FIELDS_23(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 24, ({ CHECK_PACKED_FIELDS_24(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 25, ({ CHECK_PACKED_FIELDS_25(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 26, ({ CHECK_PACKED_FIELDS_26(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 27, ({ CHECK_PACKED_FIELDS_27(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 28, ({ CHECK_PACKED_FIELDS_28(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 29, ({ CHECK_PACKED_FIELDS_29(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 30, ({ CHECK_PACKED_FIELDS_30(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 31, ({ CHECK_PACKED_FIELDS_31(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 32, ({ CHECK_PACKED_FIELDS_32(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 33, ({ CHECK_PACKED_FIELDS_33(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 34, ({ CHECK_PACKED_FIELDS_34(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 35, ({ CHECK_PACKED_FIELDS_35(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 36, ({ CHECK_PACKED_FIELDS_36(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 37, ({ CHECK_PACKED_FIELDS_37(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 38, ({ CHECK_PACKED_FIELDS_38(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 39, ({ CHECK_PACKED_FIELDS_39(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 40, ({ CHECK_PACKED_FIELDS_40(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 41, ({ CHECK_PACKED_FIELDS_41(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 42, ({ CHECK_PACKED_FIELDS_42(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 43, ({ CHECK_PACKED_FIELDS_43(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 44, ({ CHECK_PACKED_FIELDS_44(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 45, ({ CHECK_PACKED_FIELDS_45(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 46, ({ CHECK_PACKED_FIELDS_46(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 47, ({ CHECK_PACKED_FIELDS_47(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 48, ({ CHECK_PACKED_FIELDS_48(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 49, ({ CHECK_PACKED_FIELDS_49(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 50, ({ CHECK_PACKED_FIELDS_50(fields); }), \ + ({ BUILD_BUG_ON_MSG(1, "CHECK_PACKED_FIELDS() must be regenerated to support array sizes larger than 50."); }) \ +)))))))))))))))))))))))))))))))))))))))))))))))))) + +/* End of generated content */ + +#define pack_fields(pbuf, pbuflen, ustruct, fields, quirks) \ + ({ \ + CHECK_PACKED_FIELDS(fields); \ + CHECK_PACKED_FIELDS_SIZE((fields), (pbuflen)); \ + _Generic((fields), \ + const struct packed_field_u8 * : pack_fields_u8, \ + const struct packed_field_u16 * : pack_fields_u16 \ + )((pbuf), (pbuflen), (ustruct), (fields), ARRAY_SIZE(fields), (quirks)); \ + }) + +#define unpack_fields(pbuf, pbuflen, ustruct, fields, quirks) \ + ({ \ + CHECK_PACKED_FIELDS(fields); \ + CHECK_PACKED_FIELDS_SIZE((fields), (pbuflen)); \ + _Generic((fields), \ + const struct packed_field_u8 * : unpack_fields_u8, \ + const struct packed_field_u16 * : unpack_fields_u16 \ + )((pbuf), (pbuflen), (ustruct), (fields), ARRAY_SIZE(fields), (quirks)); \ + }) + #endif diff --git a/lib/packing.c b/lib/packing.c index 09a2d195b943..bb1643d9e64d 100644 --- a/lib/packing.c +++ b/lib/packing.c @@ -5,10 +5,37 @@ #include #include #include +#include #include #include #include +#define __pack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks) \ + ({ \ + for (size_t i = 0; i < (num_fields); i++) { \ + typeof(&(fields)[0]) field = &(fields)[i]; \ + u64 uval; \ + \ + uval = ustruct_field_to_u64(ustruct, field->offset, field->size); \ + \ + __pack(pbuf, uval, field->startbit, field->endbit, \ + pbuflen, quirks); \ + } \ + }) + +#define __unpack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks) \ + ({ \ + for (size_t i = 0; i < (num_fields); i++) { \ + typeof(&(fields)[0]) field = &fields[i]; \ + u64 uval; \ + \ + __unpack(pbuf, &uval, field->startbit, field->endbit, \ + pbuflen, quirks); \ + \ + u64_to_ustruct_field(ustruct, field->offset, field->size, uval); \ + } \ + }) + /** * calculate_box_addr - Determine physical location of byte in buffer * @box: Index of byte within buffer seen as a logical big-endian big number @@ -322,4 +349,130 @@ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, } EXPORT_SYMBOL(packing); +static u64 ustruct_field_to_u64(const void *ustruct, size_t field_offset, + size_t field_size) +{ + switch (field_size) { + case 1: + return *((u8 *)(ustruct + field_offset)); + case 2: + return *((u16 *)(ustruct + field_offset)); + case 4: + return *((u32 *)(ustruct + field_offset)); + default: + return *((u64 *)(ustruct + field_offset)); + } +} + +static void u64_to_ustruct_field(void *ustruct, size_t field_offset, + size_t field_size, u64 uval) +{ + switch (field_size) { + case 1: + *((u8 *)(ustruct + field_offset)) = uval; + break; + case 2: + *((u16 *)(ustruct + field_offset)) = uval; + break; + case 4: + *((u32 *)(ustruct + field_offset)) = uval; + break; + default: + *((u64 *)(ustruct + field_offset)) = uval; + break; + } +} + +/** + * pack_fields_u8 - Pack array of fields + * + * @pbuf: Pointer to a buffer holding the packed value. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @ustruct: Pointer to CPU-readable structure holding the unpacked value. + * It is expected (but not checked) that this has the same data type + * as all struct packed_field_u8 definitions. + * @fields: Array of packed_field_u8 field definition. They must not overlap. + * @num_fields: Length of @fields array. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Use the pack_fields() macro instead of calling this directly. + */ +void pack_fields_u8(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks) +{ + __pack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks); +} +EXPORT_SYMBOL(pack_fields_u8); + +/** + * pack_fields_u16 - Pack array of fields + * + * @pbuf: Pointer to a buffer holding the packed value. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @ustruct: Pointer to CPU-readable structure holding the unpacked value. + * It is expected (but not checked) that this has the same data type + * as all struct packed_field_u16 definitions. + * @fields: Array of packed_field_u16 field definitions. They must not overlap. + * @num_fields: Length of @fields array. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Use the pack_fields() macro instead of calling this directly. + */ +void pack_fields_u16(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks) +{ + __pack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks); +} +EXPORT_SYMBOL(pack_fields_u16); + +/** + * unpack_fields_u8 - Unpack array of fields + * + * @pbuf: Pointer to a buffer holding the packed value. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @ustruct: Pointer to CPU-readable structure holding the unpacked value. + * It is expected (but not checked) that this has the same data type + * as all struct packed_field_u8 definitions. + * @fields: Array of packed_field_u8 field definitions. They must not overlap. + * @num_fields: Length of @fields array. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Use the unpack_fields() macro instead of calling this directly. + */ +void unpack_fields_u8(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks) +{ + __unpack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks); +} +EXPORT_SYMBOL(unpack_fields_u8); + +/** + * unpack_fields_u16 - Unpack array of fields + * + * @pbuf: Pointer to a buffer holding the packed value. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @ustruct: Pointer to CPU-readable structure holding the unpacked value. + * It is expected (but not checked) that this has the same data type + * as all struct packed_field_u16 definitions. + * @fields: Array of packed_field_u16 field definitions. They must not overlap. + * @num_fields: Length of @fields array. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Use the unpack_fields() macro instead of calling this directly. + */ +void unpack_fields_u16(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks) +{ + __unpack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks); +} +EXPORT_SYMBOL(unpack_fields_u16); + MODULE_DESCRIPTION("Generic bitfield packing and unpacking"); diff --git a/lib/packing_test.c b/lib/packing_test.c index b38ea43c03fd..ce3b83d33b04 100644 --- a/lib/packing_test.c +++ b/lib/packing_test.c @@ -396,9 +396,70 @@ static void packing_test_unpack(struct kunit *test) KUNIT_EXPECT_EQ(test, uval, params->uval); } +#define PACKED_BUF_SIZE 8 + +typedef struct __packed { u8 buf[PACKED_BUF_SIZE]; } packed_buf_t; + +struct test_data { + u32 field3; + u16 field2; + u16 field4; + u16 field6; + u8 field1; + u8 field5; +}; + +static const struct packed_field_u8 test_fields[] = { + PACKED_FIELD(63, 61, struct test_data, field1), + PACKED_FIELD(60, 52, struct test_data, field2), + PACKED_FIELD(51, 28, struct test_data, field3), + PACKED_FIELD(27, 14, struct test_data, field4), + PACKED_FIELD(13, 9, struct test_data, field5), + PACKED_FIELD(8, 0, struct test_data, field6), +}; + +static void packing_test_pack_fields(struct kunit *test) +{ + const struct test_data data = { + .field1 = 0x2, + .field2 = 0x100, + .field3 = 0xF00050, + .field4 = 0x7D3, + .field5 = 0x9, + .field6 = 0x10B, + }; + packed_buf_t expect = { + .buf = { 0x50, 0x0F, 0x00, 0x05, 0x01, 0xF4, 0xD3, 0x0B }, + }; + packed_buf_t buf = {}; + + pack_fields(&buf, sizeof(buf), &data, test_fields, 0); + + KUNIT_EXPECT_MEMEQ(test, &expect, &buf, sizeof(buf)); +} + +static void packing_test_unpack_fields(struct kunit *test) +{ + const packed_buf_t buf = { + .buf = { 0x17, 0x28, 0x10, 0x19, 0x3D, 0xA9, 0x07, 0x9C }, + }; + struct test_data data = {}; + + unpack_fields(&buf, sizeof(buf), &data, test_fields, 0); + + KUNIT_EXPECT_EQ(test, 0, data.field1); + KUNIT_EXPECT_EQ(test, 0x172, data.field2); + KUNIT_EXPECT_EQ(test, 0x810193, data.field3); + KUNIT_EXPECT_EQ(test, 0x36A4, data.field4); + KUNIT_EXPECT_EQ(test, 0x3, data.field5); + KUNIT_EXPECT_EQ(test, 0x19C, data.field6); +} + static struct kunit_case packing_test_cases[] = { KUNIT_CASE_PARAM(packing_test_pack, packing_gen_params), KUNIT_CASE_PARAM(packing_test_unpack, packing_gen_params), + KUNIT_CASE(packing_test_pack_fields), + KUNIT_CASE(packing_test_unpack_fields), {}, }; diff --git a/scripts/.gitignore b/scripts/.gitignore index 3dbb8bb2457b..c2ef68848da5 100644 --- a/scripts/.gitignore +++ b/scripts/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only /asn1_compiler +/gen_packed_field_checks /generate_rust_target /insert-sys-cert /kallsyms diff --git a/scripts/Makefile b/scripts/Makefile index 6bcda4b9d054..546e8175e1c4 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -47,7 +47,7 @@ HOSTCFLAGS_sorttable.o += -DMCOUNT_SORT_ENABLED endif # The following programs are only built on demand -hostprogs += unifdef +hostprogs += unifdef gen_packed_field_checks # The module linker script is preprocessed on demand targets += module.lds diff --git a/scripts/gen_packed_field_checks.c b/scripts/gen_packed_field_checks.c new file mode 100644 index 000000000000..60042b7616ee --- /dev/null +++ b/scripts/gen_packed_field_checks.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2024, Intel Corporation +#include +#include + +#define MAX_PACKED_FIELD_SIZE 50 + +int main(int argc, char **argv) +{ + /* The first macro doesn't need a 'do {} while(0)' loop */ + printf("#define CHECK_PACKED_FIELDS_1(fields) \\\n"); + printf("\tCHECK_PACKED_FIELD(fields, 0)\n\n"); + + /* Remaining macros require a do/while loop, and are implemented + * recursively by calling the previous iteration's macro. + */ + for (int i = 2; i <= MAX_PACKED_FIELD_SIZE; i++) { + printf("#define CHECK_PACKED_FIELDS_%d(fields) do { \\\n", i); + printf("\tCHECK_PACKED_FIELDS_%d(fields); \\\n", i - 1); + printf("\tCHECK_PACKED_FIELD(fields, %d); \\\n", i - 1); + printf("} while (0)\n\n"); + } + + printf("#define CHECK_PACKED_FIELDS(fields) \\\n"); + + for (int i = 1; i <= MAX_PACKED_FIELD_SIZE; i++) + printf("\t__builtin_choose_expr(ARRAY_SIZE(fields) == %d, ({ CHECK_PACKED_FIELDS_%d(fields); }), \\\n", + i, i); + + printf("\t({ BUILD_BUG_ON_MSG(1, \"CHECK_PACKED_FIELDS() must be regenerated to support array sizes larger than %d.\"); }) \\\n", + MAX_PACKED_FIELD_SIZE); + + for (int i = 1; i <= MAX_PACKED_FIELD_SIZE; i++) + printf(")"); + + printf("\n"); +} From a9ad2a8dfb436c55607c8038aa926f55a6d6ca8e Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 10 Dec 2024 12:27:13 -0800 Subject: [PATCH 0339/1386] lib: packing: document recently added APIs Extend the documentation for the packing library, covering the intended use for the recently added APIs. This includes the pack() and unpack() macros, as well as the pack_fields() and unpack_fields() macros. Add a note that the packing() API is now deprecated in favor of pack() and unpack(). For the pack_fields() and unpack_fields() APIs, explain the rationale for when a driver may want to select this API. Provide an example which shows how to define the fields and call the pack_fields() and unpack_fields() macros. Co-developed-by: Vladimir Oltean Signed-off-by: Vladimir Oltean Signed-off-by: Jacob Keller Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241210-packing-pack-fields-and-ice-implementation-v10-4-ee56a47479ac@intel.com Signed-off-by: Jakub Kicinski --- Documentation/core-api/packing.rst | 118 +++++++++++++++++++++++++++-- 1 file changed, 113 insertions(+), 5 deletions(-) diff --git a/Documentation/core-api/packing.rst b/Documentation/core-api/packing.rst index 821691f23c54..0ce2078c8e13 100644 --- a/Documentation/core-api/packing.rst +++ b/Documentation/core-api/packing.rst @@ -227,11 +227,119 @@ Intended use Drivers that opt to use this API first need to identify which of the above 3 quirk combinations (for a total of 8) match what the hardware documentation -describes. Then they should wrap the packing() function, creating a new -xxx_packing() that calls it using the proper QUIRK_* one-hot bits set. +describes. + +There are 3 supported usage patterns, detailed below. + +packing() +^^^^^^^^^ + +This API function is deprecated. The packing() function returns an int-encoded error code, which protects the programmer against incorrect API use. The errors are not expected to occur -during runtime, therefore it is reasonable for xxx_packing() to return void -and simply swallow those errors. Optionally it can dump stack or print the -error description. +during runtime, therefore it is reasonable to wrap packing() into a custom +function which returns void and swallows those errors. Optionally it can +dump stack or print the error description. + +.. code-block:: c + + void my_packing(void *buf, u64 *val, int startbit, int endbit, + size_t len, enum packing_op op) + { + int err; + + /* Adjust quirks accordingly */ + err = packing(buf, val, startbit, endbit, len, op, QUIRK_LSW32_IS_FIRST); + if (likely(!err)) + return; + + if (err == -EINVAL) { + pr_err("Start bit (%d) expected to be larger than end (%d)\n", + startbit, endbit); + } else if (err == -ERANGE) { + if ((startbit - endbit + 1) > 64) + pr_err("Field %d-%d too large for 64 bits!\n", + startbit, endbit); + else + pr_err("Cannot store %llx inside bits %d-%d (would truncate)\n", + *val, startbit, endbit); + } + dump_stack(); + } + +pack() and unpack() +^^^^^^^^^^^^^^^^^^^ + +These are const-correct variants of packing(), and eliminate the last "enum +packing_op op" argument. + +Calling pack(...) is equivalent, and preferred, to calling packing(..., PACK). + +Calling unpack(...) is equivalent, and preferred, to calling packing(..., UNPACK). + +pack_fields() and unpack_fields() +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The library exposes optimized functions for the scenario where there are many +fields represented in a buffer, and it encourages consumer drivers to avoid +repetitive calls to pack() and unpack() for each field, but instead use +pack_fields() and unpack_fields(), which reduces the code footprint. + +These APIs use field definitions in arrays of ``struct packed_field_u8`` or +``struct packed_field_u16``, allowing consumer drivers to minimize the size +of these arrays according to their custom requirements. + +The pack_fields() and unpack_fields() API functions are actually macros which +automatically select the appropriate function at compile time, based on the +type of the fields array passed in. + +An additional benefit over pack() and unpack() is that sanity checks on the +field definitions are handled at compile time with ``BUILD_BUG_ON`` rather +than only when the offending code is executed. These functions return void and +wrapping them to handle unexpected errors is not necessary. + +It is recommended, but not required, that you wrap your packed buffer into a +structured type with a fixed size. This generally makes it easier for the +compiler to enforce that the correct size buffer is used. + +Here is an example of how to use the fields APIs: + +.. code-block:: c + + /* Ordering inside the unpacked structure is flexible and can be different + * from the packed buffer. Here, it is optimized to reduce padding. + */ + struct data { + u64 field3; + u32 field4; + u16 field1; + u8 field2; + }; + + #define SIZE 13 + + typdef struct __packed { u8 buf[SIZE]; } packed_buf_t; + + static const struct packed_field_u8 fields[] = { + PACKED_FIELD(100, 90, struct data, field1), + PACKED_FIELD(90, 87, struct data, field2), + PACKED_FIELD(86, 30, struct data, field3), + PACKED_FIELD(29, 0, struct data, field4), + }; + + void unpack_your_data(const packed_buf_t *buf, struct data *unpacked) + { + BUILD_BUG_ON(sizeof(*buf) != SIZE; + + unpack_fields(buf, sizeof(*buf), unpacked, fields, + QUIRK_LITTLE_ENDIAN); + } + + void pack_your_data(const struct data *unpacked, packed_buf_t *buf) + { + BUILD_BUG_ON(sizeof(*buf) != SIZE; + + pack_fields(buf, sizeof(*buf), unpacked, fields, + QUIRK_LITTLE_ENDIAN); + } From aeeaa9f891737cadbb0832c2b552f3dca3b04675 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 10 Dec 2024 12:27:14 -0800 Subject: [PATCH 0340/1386] ice: remove int_q_state from ice_tlan_ctx The int_q_state field of the ice_tlan_ctx structure represents the internal queue state. However, we never actually need to assign this or read this during normal operation. In fact, trying to unpack it would not be possible as it is larger than a u64. Remove this field from the ice_tlan_ctx structure, and remove its packing field from the ice_tlan_ctx_info array. Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241210-packing-pack-fields-and-ice-implementation-v10-5-ee56a47479ac@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_common.c | 1 - drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 496d86cbd13f..e2a4f4897119 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1470,7 +1470,6 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = { ICE_CTX_STORE(ice_tlan_ctx, drop_ena, 1, 165), ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx, 2, 166), ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx, 3, 168), - ICE_CTX_STORE(ice_tlan_ctx, int_q_state, 122, 171), { 0 } }; diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 611577ebc29d..0e8ed8c226e6 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -590,7 +590,6 @@ struct ice_tlan_ctx { u8 drop_ena; u8 cache_prof_idx; u8 pkt_shaper_prof_idx; - u8 int_q_state; /* width not needed - internal - DO NOT WRITE!!! */ }; #endif /* _ICE_LAN_TX_RX_H_ */ From efe39d8b4b9d8175fd7c3610da4c8fa663154a1e Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 10 Dec 2024 12:27:15 -0800 Subject: [PATCH 0341/1386] ice: use structures to keep track of queue context size The ice Tx and Rx queue context are currently stored as arrays of bytes with defined size (ICE_RXQ_CTX_SZ and ICE_TXQ_CTX_SZ). The packed queue context is often passed to other functions as a simple u8 * pointer, which does not allow tracking the size. This makes the queue context API easy to misuse, as you can pass an arbitrary u8 array or pointer. Introduce wrapper typedefs which use a __packed structure that has the proper fixed size for the Tx and Rx context buffers. This enables the compiler to track the size of the value and ensures that passing the wrong buffer size will be detected by the compiler. The existing APIs do not benefit much from this change, however the wrapping structures will be used to simplify the arguments of new packing functions based on the recently introduced pack_fields API. Co-developed-by: Vladimir Oltean Signed-off-by: Vladimir Oltean Signed-off-by: Jacob Keller Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241210-packing-pack-fields-and-ice-implementation-v10-6-ee56a47479ac@intel.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/intel/ice/ice_adminq_cmd.h | 11 +++++++-- drivers/net/ethernet/intel/ice/ice_base.c | 2 +- drivers/net/ethernet/intel/ice/ice_common.c | 24 +++++++++---------- .../net/ethernet/intel/ice/ice_lan_tx_rx.h | 2 -- 4 files changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 1489a8ceec51..3bf05b135b35 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -12,6 +12,13 @@ #define ICE_AQC_TOPO_MAX_LEVEL_NUM 0x9 #define ICE_AQ_SET_MAC_FRAME_SIZE_MAX 9728 +#define ICE_RXQ_CTX_SIZE_DWORDS 8 +#define ICE_RXQ_CTX_SZ (ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32)) +#define ICE_TXQ_CTX_SZ 22 + +typedef struct __packed { u8 buf[ICE_RXQ_CTX_SZ]; } ice_rxq_ctx_buf_t; +typedef struct __packed { u8 buf[ICE_TXQ_CTX_SZ]; } ice_txq_ctx_buf_t; + struct ice_aqc_generic { __le32 param0; __le32 param1; @@ -2084,10 +2091,10 @@ struct ice_aqc_add_txqs_perq { __le16 txq_id; u8 rsvd[2]; __le32 q_teid; - u8 txq_ctx[22]; + ice_txq_ctx_buf_t txq_ctx; u8 rsvd2[2]; struct ice_aqc_txsched_elem info; -}; +} __packed; /* The format of the command buffer for Add Tx LAN Queues (0x0C30) * is an array of the following structs. Please note that the length of diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 82a9cd4ec7ae..e7aaa0624121 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -910,7 +910,7 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, ice_setup_tx_ctx(ring, &tlan_ctx, pf_q); /* copy context contents into the qg_buf */ qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q); - ice_set_ctx(hw, (u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx, + ice_set_ctx(hw, (u8 *)&tlan_ctx, (u8 *)&qg_buf->txqs[0].txq_ctx, ice_tlan_ctx_info); /* init queue specific tail reg. It is referred as diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index e2a4f4897119..64bf25aab673 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1362,29 +1362,27 @@ int ice_reset(struct ice_hw *hw, enum ice_reset_req req) /** * ice_copy_rxq_ctx_to_hw * @hw: pointer to the hardware structure - * @ice_rxq_ctx: pointer to the rxq context + * @rxq_ctx: pointer to the packed Rx queue context * @rxq_index: the index of the Rx queue * * Copies rxq context from dense structure to HW register space */ -static int -ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, u8 *ice_rxq_ctx, u32 rxq_index) +static int ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, + const ice_rxq_ctx_buf_t *rxq_ctx, + u32 rxq_index) { u8 i; - if (!ice_rxq_ctx) - return -EINVAL; - if (rxq_index > QRX_CTRL_MAX_INDEX) return -EINVAL; /* Copy each dword separately to HW */ for (i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) { - wr32(hw, QRX_CONTEXT(i, rxq_index), - *((u32 *)(ice_rxq_ctx + (i * sizeof(u32))))); + u32 ctx = ((const u32 *)rxq_ctx)[i]; - ice_debug(hw, ICE_DBG_QCTX, "qrxdata[%d]: %08X\n", i, - *((u32 *)(ice_rxq_ctx + (i * sizeof(u32))))); + wr32(hw, QRX_CONTEXT(i, rxq_index), ctx); + + ice_debug(hw, ICE_DBG_QCTX, "qrxdata[%d]: %08X\n", i, ctx); } return 0; @@ -1429,15 +1427,15 @@ static const struct ice_ctx_ele ice_rlan_ctx_info[] = { int ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, u32 rxq_index) { - u8 ctx_buf[ICE_RXQ_CTX_SZ] = { 0 }; + ice_rxq_ctx_buf_t buf = {}; if (!rlan_ctx) return -EINVAL; rlan_ctx->prefena = 1; - ice_set_ctx(hw, (u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info); - return ice_copy_rxq_ctx_to_hw(hw, ctx_buf, rxq_index); + ice_set_ctx(hw, (u8 *)rlan_ctx, (u8 *)&buf, ice_rlan_ctx_info); + return ice_copy_rxq_ctx_to_hw(hw, &buf, rxq_index); } /* LAN Tx Queue Context */ diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 0e8ed8c226e6..a76e5b0e7861 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -371,8 +371,6 @@ enum ice_rx_flex_desc_status_error_1_bits { ICE_RX_FLEX_DESC_STATUS1_LAST /* this entry must be last!!! */ }; -#define ICE_RXQ_CTX_SIZE_DWORDS 8 -#define ICE_RXQ_CTX_SZ (ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32)) #define ICE_TX_CMPLTNQ_CTX_SIZE_DWORDS 22 #define ICE_TX_DRBELL_Q_CTX_SIZE_DWORDS 5 #define GLTCLAN_CQ_CNTX(i, CQ) (GLTCLAN_CQ_CNTX0(CQ) + ((i) * 0x0800)) From dc4305be467a6f84f66005cfc58557d56b5ab107 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 10 Dec 2024 12:27:16 -0800 Subject: [PATCH 0342/1386] ice: use for Tx and Rx queue context data The ice driver needs to write the Tx and Rx queue context when programming Tx and Rx queues. This is currently done using some bespoke custom logic via the ice_set_ctx() and its helper functions, along with bit position definitions in the ice_tlan_ctx_info and ice_rlan_ctx_info structures. This logic does work, but is problematic for several reasons: 1) ice_set_ctx requires a helper function for each byte size being packed, as it uses a separate function to pack u8, u16, u32, and u64 fields. This requires 4 functions which contain near-duplicate logic with the types changed out. 2) The logic in the ice_pack_ctx_word, ice_pack_ctx_dword, and ice_pack_ctx_qword does not handle values which straddle alignment boundaries very well. This requires that several fields in the ice_tlan_ctx_info and ice_rlan_ctx_info be a size larger than their bit size should require. 3) Future support for live migration will require adding unpacking functions to take the packed hardware context and unpack it into the ice_rlan_ctx and ice_tlan_ctx structures. Implementing this would require implementing ice_get_ctx, and its associated helper functions, which essentially doubles the amount of code required. The Linux kernel has had a packing library that can handle this logic since commit 554aae35007e ("lib: Add support for generic packing operations"). The library was recently extended with support for packing or unpacking an array of fields, with a similar structure as the ice_ctx_ele structure. Replace the ice-specific ice_set_ctx() logic with the recently added pack_fields and packed_field_s infrastructure from For API simplicity, the Tx and Rx queue context are programmed using separate ice_pack_txq_ctx() and ice_pack_rxq_ctx(). This avoids needing to export the packed_field_s arrays. The functions can pointers to the appropriate ice_txq_ctx_buf_t and ice_rxq_ctx_buf_t types, ensuring that only buffers of the appropriate size are passed. Signed-off-by: Jacob Keller Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241210-packing-pack-fields-and-ice-implementation-v10-7-ee56a47479ac@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/Kconfig | 1 + drivers/net/ethernet/intel/ice/ice_base.c | 3 +- drivers/net/ethernet/intel/ice/ice_common.c | 243 +++--------------- drivers/net/ethernet/intel/ice/ice_common.h | 5 +- .../net/ethernet/intel/ice/ice_lan_tx_rx.h | 14 - 5 files changed, 42 insertions(+), 224 deletions(-) diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index 20bc40eec487..24ec9a4f1ffa 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -292,6 +292,7 @@ config ICE select DIMLIB select LIBIE select NET_DEVLINK + select PACKING select PLDMFW select DPLL help diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index e7aaa0624121..5fe7b5a10020 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -910,8 +910,7 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, ice_setup_tx_ctx(ring, &tlan_ctx, pf_q); /* copy context contents into the qg_buf */ qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q); - ice_set_ctx(hw, (u8 *)&tlan_ctx, (u8 *)&qg_buf->txqs[0].txq_ctx, - ice_tlan_ctx_info); + ice_pack_txq_ctx(&tlan_ctx, &qg_buf->txqs[0].txq_ctx); /* init queue specific tail reg. It is referred as * transmit comm scheduler queue doorbell. diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 64bf25aab673..8683c9ac6ced 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -6,6 +6,7 @@ #include "ice_adminq_cmd.h" #include "ice_flow.h" #include "ice_ptp_hw.h" +#include #define ICE_PF_RESET_WAIT_COUNT 300 #define ICE_MAX_NETLIST_SIZE 10 @@ -1388,9 +1389,12 @@ static int ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, return 0; } +#define ICE_CTX_STORE(struct_name, struct_field, width, lsb) \ + PACKED_FIELD((lsb) + (width) - 1, (lsb), struct struct_name, struct_field) + /* LAN Rx Queue Context */ -static const struct ice_ctx_ele ice_rlan_ctx_info[] = { - /* Field Width LSB */ +static const struct packed_field_u8 ice_rlan_ctx_fields[] = { + /* Field Width LSB */ ICE_CTX_STORE(ice_rlan_ctx, head, 13, 0), ICE_CTX_STORE(ice_rlan_ctx, cpuid, 8, 13), ICE_CTX_STORE(ice_rlan_ctx, base, 57, 32), @@ -1411,9 +1415,23 @@ static const struct ice_ctx_ele ice_rlan_ctx_info[] = { ICE_CTX_STORE(ice_rlan_ctx, tphhead_ena, 1, 196), ICE_CTX_STORE(ice_rlan_ctx, lrxqthresh, 3, 198), ICE_CTX_STORE(ice_rlan_ctx, prefena, 1, 201), - { 0 } }; +/** + * ice_pack_rxq_ctx - Pack Rx queue context into a HW buffer + * @ctx: the Rx queue context to pack + * @buf: the HW buffer to pack into + * + * Pack the Rx queue context from the CPU-friendly unpacked buffer into its + * bit-packed HW layout. + */ +static void ice_pack_rxq_ctx(const struct ice_rlan_ctx *ctx, + ice_rxq_ctx_buf_t *buf) +{ + pack_fields(buf, sizeof(*buf), ctx, ice_rlan_ctx_fields, + QUIRK_LITTLE_ENDIAN | QUIRK_LSW32_IS_FIRST); +} + /** * ice_write_rxq_ctx * @hw: pointer to the hardware structure @@ -1434,12 +1452,13 @@ int ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, rlan_ctx->prefena = 1; - ice_set_ctx(hw, (u8 *)rlan_ctx, (u8 *)&buf, ice_rlan_ctx_info); + ice_pack_rxq_ctx(rlan_ctx, &buf); + return ice_copy_rxq_ctx_to_hw(hw, &buf, rxq_index); } /* LAN Tx Queue Context */ -const struct ice_ctx_ele ice_tlan_ctx_info[] = { +static const struct packed_field_u8 ice_tlan_ctx_fields[] = { /* Field Width LSB */ ICE_CTX_STORE(ice_tlan_ctx, base, 57, 0), ICE_CTX_STORE(ice_tlan_ctx, port_num, 3, 57), @@ -1468,9 +1487,22 @@ const struct ice_ctx_ele ice_tlan_ctx_info[] = { ICE_CTX_STORE(ice_tlan_ctx, drop_ena, 1, 165), ICE_CTX_STORE(ice_tlan_ctx, cache_prof_idx, 2, 166), ICE_CTX_STORE(ice_tlan_ctx, pkt_shaper_prof_idx, 3, 168), - { 0 } }; +/** + * ice_pack_txq_ctx - Pack Tx queue context into a HW buffer + * @ctx: the Tx queue context to pack + * @buf: the HW buffer to pack into + * + * Pack the Tx queue context from the CPU-friendly unpacked buffer into its + * bit-packed HW layout. + */ +void ice_pack_txq_ctx(const struct ice_tlan_ctx *ctx, ice_txq_ctx_buf_t *buf) +{ + pack_fields(buf, sizeof(*buf), ctx, ice_tlan_ctx_fields, + QUIRK_LITTLE_ENDIAN | QUIRK_LSW32_IS_FIRST); +} + /* Sideband Queue command wrappers */ /** @@ -4554,205 +4586,6 @@ ice_aq_add_rdma_qsets(struct ice_hw *hw, u8 num_qset_grps, /* End of FW Admin Queue command wrappers */ -/** - * ice_pack_ctx_byte - write a byte to a packed context structure - * @src_ctx: unpacked source context structure - * @dest_ctx: packed destination context data - * @ce_info: context element description - */ -static void ice_pack_ctx_byte(u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info) -{ - u8 src_byte, dest_byte, mask; - u8 *from, *dest; - u16 shift_width; - - /* copy from the next struct field */ - from = src_ctx + ce_info->offset; - - /* prepare the bits and mask */ - shift_width = ce_info->lsb % 8; - mask = GENMASK(ce_info->width - 1 + shift_width, shift_width); - - src_byte = *from; - src_byte <<= shift_width; - src_byte &= mask; - - /* get the current bits from the target bit string */ - dest = dest_ctx + (ce_info->lsb / 8); - - memcpy(&dest_byte, dest, sizeof(dest_byte)); - - dest_byte &= ~mask; /* get the bits not changing */ - dest_byte |= src_byte; /* add in the new bits */ - - /* put it all back */ - memcpy(dest, &dest_byte, sizeof(dest_byte)); -} - -/** - * ice_pack_ctx_word - write a word to a packed context structure - * @src_ctx: unpacked source context structure - * @dest_ctx: packed destination context data - * @ce_info: context element description - */ -static void ice_pack_ctx_word(u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info) -{ - u16 src_word, mask; - __le16 dest_word; - u8 *from, *dest; - u16 shift_width; - - /* copy from the next struct field */ - from = src_ctx + ce_info->offset; - - /* prepare the bits and mask */ - shift_width = ce_info->lsb % 8; - mask = GENMASK(ce_info->width - 1 + shift_width, shift_width); - - /* don't swizzle the bits until after the mask because the mask bits - * will be in a different bit position on big endian machines - */ - src_word = *(u16 *)from; - src_word <<= shift_width; - src_word &= mask; - - /* get the current bits from the target bit string */ - dest = dest_ctx + (ce_info->lsb / 8); - - memcpy(&dest_word, dest, sizeof(dest_word)); - - dest_word &= ~(cpu_to_le16(mask)); /* get the bits not changing */ - dest_word |= cpu_to_le16(src_word); /* add in the new bits */ - - /* put it all back */ - memcpy(dest, &dest_word, sizeof(dest_word)); -} - -/** - * ice_pack_ctx_dword - write a dword to a packed context structure - * @src_ctx: unpacked source context structure - * @dest_ctx: packed destination context data - * @ce_info: context element description - */ -static void ice_pack_ctx_dword(u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info) -{ - u32 src_dword, mask; - __le32 dest_dword; - u8 *from, *dest; - u16 shift_width; - - /* copy from the next struct field */ - from = src_ctx + ce_info->offset; - - /* prepare the bits and mask */ - shift_width = ce_info->lsb % 8; - mask = GENMASK(ce_info->width - 1 + shift_width, shift_width); - - /* don't swizzle the bits until after the mask because the mask bits - * will be in a different bit position on big endian machines - */ - src_dword = *(u32 *)from; - src_dword <<= shift_width; - src_dword &= mask; - - /* get the current bits from the target bit string */ - dest = dest_ctx + (ce_info->lsb / 8); - - memcpy(&dest_dword, dest, sizeof(dest_dword)); - - dest_dword &= ~(cpu_to_le32(mask)); /* get the bits not changing */ - dest_dword |= cpu_to_le32(src_dword); /* add in the new bits */ - - /* put it all back */ - memcpy(dest, &dest_dword, sizeof(dest_dword)); -} - -/** - * ice_pack_ctx_qword - write a qword to a packed context structure - * @src_ctx: unpacked source context structure - * @dest_ctx: packed destination context data - * @ce_info: context element description - */ -static void ice_pack_ctx_qword(u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info) -{ - u64 src_qword, mask; - __le64 dest_qword; - u8 *from, *dest; - u16 shift_width; - - /* copy from the next struct field */ - from = src_ctx + ce_info->offset; - - /* prepare the bits and mask */ - shift_width = ce_info->lsb % 8; - mask = GENMASK_ULL(ce_info->width - 1 + shift_width, shift_width); - - /* don't swizzle the bits until after the mask because the mask bits - * will be in a different bit position on big endian machines - */ - src_qword = *(u64 *)from; - src_qword <<= shift_width; - src_qword &= mask; - - /* get the current bits from the target bit string */ - dest = dest_ctx + (ce_info->lsb / 8); - - memcpy(&dest_qword, dest, sizeof(dest_qword)); - - dest_qword &= ~(cpu_to_le64(mask)); /* get the bits not changing */ - dest_qword |= cpu_to_le64(src_qword); /* add in the new bits */ - - /* put it all back */ - memcpy(dest, &dest_qword, sizeof(dest_qword)); -} - -/** - * ice_set_ctx - set context bits in packed structure - * @hw: pointer to the hardware structure - * @src_ctx: pointer to a generic non-packed context structure - * @dest_ctx: pointer to memory for the packed structure - * @ce_info: List of Rx context elements - */ -int ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info) -{ - int f; - - for (f = 0; ce_info[f].width; f++) { - /* We have to deal with each element of the FW response - * using the correct size so that we are correct regardless - * of the endianness of the machine. - */ - if (ce_info[f].width > (ce_info[f].size_of * BITS_PER_BYTE)) { - ice_debug(hw, ICE_DBG_QCTX, "Field %d width of %d bits larger than size of %d byte(s) ... skipping write\n", - f, ce_info[f].width, ce_info[f].size_of); - continue; - } - switch (ce_info[f].size_of) { - case sizeof(u8): - ice_pack_ctx_byte(src_ctx, dest_ctx, &ce_info[f]); - break; - case sizeof(u16): - ice_pack_ctx_word(src_ctx, dest_ctx, &ce_info[f]); - break; - case sizeof(u32): - ice_pack_ctx_dword(src_ctx, dest_ctx, &ce_info[f]); - break; - case sizeof(u64): - ice_pack_ctx_qword(src_ctx, dest_ctx, &ce_info[f]); - break; - default: - return -EINVAL; - } - } - - return 0; -} - /** * ice_get_lan_q_ctx - get the LAN queue context for the given VSI and TC * @hw: pointer to the HW struct diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 27208a60cece..a68bea3934e3 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -92,9 +92,8 @@ ice_aq_set_rss_key(struct ice_hw *hw, u16 vsi_handle, bool ice_check_sq_alive(struct ice_hw *hw, struct ice_ctl_q_info *cq); int ice_aq_q_shutdown(struct ice_hw *hw, bool unloading); void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode); -extern const struct ice_ctx_ele ice_tlan_ctx_info[]; -int ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx, - const struct ice_ctx_ele *ce_info); + +void ice_pack_txq_ctx(const struct ice_tlan_ctx *ctx, ice_txq_ctx_buf_t *buf); extern struct mutex ice_global_cfg_lock_sw; diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index a76e5b0e7861..31d4a445d640 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -408,20 +408,6 @@ struct ice_rlan_ctx { u8 prefena; /* NOTE: normally must be set to 1 at init */ }; -struct ice_ctx_ele { - u16 offset; - u16 size_of; - u16 width; - u16 lsb; -}; - -#define ICE_CTX_STORE(_struct, _ele, _width, _lsb) { \ - .offset = offsetof(struct _struct, _ele), \ - .size_of = sizeof_field(struct _struct, _ele), \ - .width = _width, \ - .lsb = _lsb, \ -} - /* for hsplit_0 field of Rx RLAN context */ enum ice_rlan_ctx_rx_hsplit_0 { ICE_RLAN_RX_HSPLIT_0_NO_SPLIT = 0, From f72588a4267b2211d18328433035b629d24f6f03 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 10 Dec 2024 12:27:17 -0800 Subject: [PATCH 0343/1386] ice: reduce size of queue context fields The ice_rlan_ctx and ice_tlan_ctx structures have some fields which are intentionally sized larger than necessary relative to the packed sizes the data must fit into. This was done because the original ice_set_ctx() function and its helpers did not correctly handle packing when the packed bits straddled a byte. This is no longer the case with the use of the implementation. Save some bytes in these structures by sizing the variables to the number of bytes the actual bitpacked fields fit into. There are a couple of gaps left in the structure, which is a result of the fields being in the order they appear in the packed bit layout, but where alignment forces some extra gaps. We could fix this, saving ~8 bytes from each structure. However, these structures are not used heavily, and the resulting savings is minimal: $ bloat-o-meter ice-before-reorder.ko ice-after-reorder.ko add/remove: 0/0 grow/shrink: 1/1 up/down: 26/-70 (-44) Function old new delta ice_vsi_cfg_txq 1873 1899 +26 ice_setup_rx_ctx.constprop 1529 1459 -70 Total: Before=1459555, After=1459511, chg -0.00% Thus, the fields are left in the same order as the packed bit layout, despite the gaps this causes. Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241210-packing-pack-fields-and-ice-implementation-v10-8-ee56a47479ac@intel.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/intel/ice/ice_lan_tx_rx.h | 32 ++++++------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 31d4a445d640..1479b45738af 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -375,23 +375,17 @@ enum ice_rx_flex_desc_status_error_1_bits { #define ICE_TX_DRBELL_Q_CTX_SIZE_DWORDS 5 #define GLTCLAN_CQ_CNTX(i, CQ) (GLTCLAN_CQ_CNTX0(CQ) + ((i) * 0x0800)) -/* RLAN Rx queue context data - * - * The sizes of the variables may be larger than needed due to crossing byte - * boundaries. If we do not have the width of the variable set to the correct - * size then we could end up shifting bits off the top of the variable when the - * variable is at the top of a byte and crosses over into the next byte. - */ +/* RLAN Rx queue context data */ struct ice_rlan_ctx { u16 head; - u16 cpuid; /* bigger than needed, see above for reason */ + u8 cpuid; #define ICE_RLAN_BASE_S 7 u64 base; u16 qlen; #define ICE_RLAN_CTX_DBUF_S 7 - u16 dbuf; /* bigger than needed, see above for reason */ + u8 dbuf; #define ICE_RLAN_CTX_HBUF_S 6 - u16 hbuf; /* bigger than needed, see above for reason */ + u8 hbuf; u8 dtype; u8 dsize; u8 crcstrip; @@ -399,12 +393,12 @@ struct ice_rlan_ctx { u8 hsplit_0; u8 hsplit_1; u8 showiv; - u32 rxmax; /* bigger than needed, see above for reason */ + u16 rxmax; u8 tphrdesc_ena; u8 tphwdesc_ena; u8 tphdata_ena; u8 tphhead_ena; - u16 lrxqthresh; /* bigger than needed, see above for reason */ + u8 lrxqthresh; u8 prefena; /* NOTE: normally must be set to 1 at init */ }; @@ -535,18 +529,12 @@ enum ice_tx_ctx_desc_eipt_offload { #define ICE_LAN_TXQ_MAX_QGRPS 127 #define ICE_LAN_TXQ_MAX_QDIS 1023 -/* Tx queue context data - * - * The sizes of the variables may be larger than needed due to crossing byte - * boundaries. If we do not have the width of the variable set to the correct - * size then we could end up shifting bits off the top of the variable when the - * variable is at the top of a byte and crosses over into the next byte. - */ +/* Tx queue context data */ struct ice_tlan_ctx { #define ICE_TLAN_CTX_BASE_S 7 u64 base; /* base is defined in 128-byte units */ u8 port_num; - u16 cgd_num; /* bigger than needed, see above for reason */ + u8 cgd_num; u8 pf_num; u16 vmvf_num; u8 vmvf_type; @@ -557,7 +545,7 @@ struct ice_tlan_ctx { u8 tsyn_ena; u8 internal_usage_flag; u8 alt_vlan; - u16 cpuid; /* bigger than needed, see above for reason */ + u8 cpuid; u8 wb_mode; u8 tphrd_desc; u8 tphrd; @@ -566,7 +554,7 @@ struct ice_tlan_ctx { u16 qnum_in_func; u8 itr_notification_mode; u8 adjust_prof_id; - u32 qlen; /* bigger than needed, see above for reason */ + u16 qlen; u8 quanta_prof_idx; u8 tso_ena; u16 tso_qnum; From ac001acc4d353455dd9f7b8e74d2f9c01e83ace2 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 10 Dec 2024 12:27:18 -0800 Subject: [PATCH 0344/1386] ice: move prefetch enable to ice_setup_rx_ctx The ice_write_rxq_ctx() function is responsible for programming the Rx Queue context into hardware. It receives the configuration in unpacked form via the ice_rlan_ctx structure. This function unconditionally modifies the context to set the prefetch enable bit. This was done by commit c31a5c25bb19 ("ice: Always set prefena when configuring an Rx queue"). Setting this bit makes sense, since prefetching descriptors is almost always the preferred behavior. However, the ice_write_rxq_ctx() function is not the place that actually defines the queue context. We initialize the Rx Queue context in ice_setup_rx_ctx(). It is surprising to have the Rx queue context changed by a function who's responsibility is to program the given context to hardware. Following the principle of least surprise, move the setting of the prefetch enable bit out of ice_write_rxq_ctx() and into the ice_setup_rx_ctx(). Signed-off-by: Jacob Keller Reviewed-by: Przemek Kitszel Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241210-packing-pack-fields-and-ice-implementation-v10-9-ee56a47479ac@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_base.c | 3 +++ drivers/net/ethernet/intel/ice/ice_common.c | 9 +++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 5fe7b5a10020..b2af8e3586f7 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -454,6 +454,9 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring) /* Rx queue threshold in units of 64 */ rlan_ctx.lrxqthresh = 1; + /* Enable descriptor prefetch */ + rlan_ctx.prefena = 1; + /* PF acts as uplink for switchdev; set flex descriptor with src_vsi * metadata and flags to allow redirecting to PR netdev */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 8683c9ac6ced..4c6cc48aaef0 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1433,14 +1433,13 @@ static void ice_pack_rxq_ctx(const struct ice_rlan_ctx *ctx, } /** - * ice_write_rxq_ctx + * ice_write_rxq_ctx - Write Rx Queue context to hardware * @hw: pointer to the hardware structure * @rlan_ctx: pointer to the rxq context * @rxq_index: the index of the Rx queue * - * Converts rxq context from sparse to dense structure and then writes - * it to HW register space and enables the hardware to prefetch descriptors - * instead of only fetching them on demand + * Pack the sparse Rx Queue context into dense hardware format and write it + * into the HW register space. */ int ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, u32 rxq_index) @@ -1450,8 +1449,6 @@ int ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, if (!rlan_ctx) return -EINVAL; - rlan_ctx->prefena = 1; - ice_pack_rxq_ctx(rlan_ctx, &buf); return ice_copy_rxq_ctx_to_hw(hw, &buf, rxq_index); From 39be64c34ca303094d11a23fc9b36e73e3adb9dc Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Tue, 10 Dec 2024 12:27:19 -0800 Subject: [PATCH 0345/1386] ice: cleanup Rx queue context programming functions The ice_copy_rxq_ctx_to_hw() and ice_write_rxq_ctx() functions perform some defensive checks which are typically frowned upon by kernel style guidelines. In particular, NULL checks on buffers which point to the stack are discouraged, especially when the functions are static and only called once. Checks of this sort only serve to hide potential programming error, as we will not produce the normal crash dump on a NULL access. In addition, ice_copy_rxq_ctx_to_hw() cannot fail in another way, so could be made void. Future support for VF Live Migration will need to introduce an inverse function for reading Rx queue context from HW registers to unpack it, as well as functions to pack and unpack Tx queue context from HW. Rather than copying these style issues into the new functions, lets first cleanup the existing code. For the ice_copy_rxq_ctx_to_hw() function: * Move the Rx queue index check out of this function. * Convert the function to a void return. * Use a simple int variable instead of a u8 for the for loop index, and initialize it inside the for loop. * Update the function description to better align with kernel doc style. For the ice_write_rxq_ctx() function: * Move the Rx queue index check into this function. * Update the function description with a Returns: to align with kernel doc style. These changes align the existing write functions to current kernel style, and will align with the style of the new functions added when we implement live migration in a future series. Signed-off-by: Jacob Keller Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241210-packing-pack-fields-and-ice-implementation-v10-10-ee56a47479ac@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_common.c | 28 ++++++++------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 4c6cc48aaef0..f89bc6ede315 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -1361,32 +1361,23 @@ int ice_reset(struct ice_hw *hw, enum ice_reset_req req) } /** - * ice_copy_rxq_ctx_to_hw + * ice_copy_rxq_ctx_to_hw - Copy packed Rx queue context to HW registers * @hw: pointer to the hardware structure * @rxq_ctx: pointer to the packed Rx queue context * @rxq_index: the index of the Rx queue - * - * Copies rxq context from dense structure to HW register space */ -static int ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, - const ice_rxq_ctx_buf_t *rxq_ctx, - u32 rxq_index) +static void ice_copy_rxq_ctx_to_hw(struct ice_hw *hw, + const ice_rxq_ctx_buf_t *rxq_ctx, + u32 rxq_index) { - u8 i; - - if (rxq_index > QRX_CTRL_MAX_INDEX) - return -EINVAL; - /* Copy each dword separately to HW */ - for (i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) { + for (int i = 0; i < ICE_RXQ_CTX_SIZE_DWORDS; i++) { u32 ctx = ((const u32 *)rxq_ctx)[i]; wr32(hw, QRX_CONTEXT(i, rxq_index), ctx); ice_debug(hw, ICE_DBG_QCTX, "qrxdata[%d]: %08X\n", i, ctx); } - - return 0; } #define ICE_CTX_STORE(struct_name, struct_field, width, lsb) \ @@ -1435,23 +1426,26 @@ static void ice_pack_rxq_ctx(const struct ice_rlan_ctx *ctx, /** * ice_write_rxq_ctx - Write Rx Queue context to hardware * @hw: pointer to the hardware structure - * @rlan_ctx: pointer to the rxq context + * @rlan_ctx: pointer to the unpacked Rx queue context * @rxq_index: the index of the Rx queue * * Pack the sparse Rx Queue context into dense hardware format and write it * into the HW register space. + * + * Return: 0 on success, or -EINVAL if the Rx queue index is invalid. */ int ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx, u32 rxq_index) { ice_rxq_ctx_buf_t buf = {}; - if (!rlan_ctx) + if (rxq_index > QRX_CTRL_MAX_INDEX) return -EINVAL; ice_pack_rxq_ctx(rlan_ctx, &buf); + ice_copy_rxq_ctx_to_hw(hw, &buf, rxq_index); - return ice_copy_rxq_ctx_to_hw(hw, &buf, rxq_index); + return 0; } /* LAN Tx Queue Context */ From d51cfd5f4fe01cd3d212703c8fe5dd6886da969c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Dec 2024 18:33:50 +0000 Subject: [PATCH 0346/1386] ipv6: mcast: reduce ipv6_chk_mcast_addr() indentation Add a label and two gotos to shorten lines by two tabulations, to ease code review of following patches. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://patch.msgid.link/20241210183352.86530-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/mcast.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index b244dbf61d5f..afe707b6841d 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1021,29 +1021,31 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, rcu_read_lock(); idev = __in6_dev_get(dev); - if (idev) { - for_each_mc_rcu(idev, mc) { - if (ipv6_addr_equal(&mc->mca_addr, group)) + if (!idev) + goto unlock; + for_each_mc_rcu(idev, mc) { + if (ipv6_addr_equal(&mc->mca_addr, group)) + break; + } + if (!mc) + goto unlock; + if (src_addr && !ipv6_addr_any(src_addr)) { + struct ip6_sf_list *psf; + + for_each_psf_rcu(mc, psf) { + if (ipv6_addr_equal(&psf->sf_addr, src_addr)) break; } - if (mc) { - if (src_addr && !ipv6_addr_any(src_addr)) { - struct ip6_sf_list *psf; - - for_each_psf_rcu(mc, psf) { - if (ipv6_addr_equal(&psf->sf_addr, src_addr)) - break; - } - if (psf) - rv = psf->sf_count[MCAST_INCLUDE] || - psf->sf_count[MCAST_EXCLUDE] != - mc->mca_sfcount[MCAST_EXCLUDE]; - else - rv = mc->mca_sfcount[MCAST_EXCLUDE] != 0; - } else - rv = true; /* don't filter unspecified source */ - } + if (psf) + rv = psf->sf_count[MCAST_INCLUDE] || + psf->sf_count[MCAST_EXCLUDE] != + mc->mca_sfcount[MCAST_EXCLUDE]; + else + rv = mc->mca_sfcount[MCAST_EXCLUDE] != 0; + } else { + rv = true; /* don't filter unspecified source */ } +unlock: rcu_read_unlock(); return rv; } From 626962911ad886f2b8e6d6f612289f9c7268b435 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Dec 2024 18:33:51 +0000 Subject: [PATCH 0347/1386] ipv6: mcast: annotate data-races around mc->mca_sfcount[MCAST_EXCLUDE] mc->mca_sfcount[MCAST_EXCLUDE] is read locklessly from ipv6_chk_mcast_addr(). Add READ_ONCE() and WRITE_ONCE() annotations accordingly. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://patch.msgid.link/20241210183352.86530-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/mcast.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index afe707b6841d..09622142b070 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1039,9 +1039,9 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, if (psf) rv = psf->sf_count[MCAST_INCLUDE] || psf->sf_count[MCAST_EXCLUDE] != - mc->mca_sfcount[MCAST_EXCLUDE]; + READ_ONCE(mc->mca_sfcount[MCAST_EXCLUDE]); else - rv = mc->mca_sfcount[MCAST_EXCLUDE] != 0; + rv = READ_ONCE(mc->mca_sfcount[MCAST_EXCLUDE]) != 0; } else { rv = true; /* don't filter unspecified source */ } @@ -2505,7 +2505,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, sf_markstate(pmc); isexclude = pmc->mca_sfmode == MCAST_EXCLUDE; if (!delta) - pmc->mca_sfcount[sfmode]++; + WRITE_ONCE(pmc->mca_sfcount[sfmode], + pmc->mca_sfcount[sfmode] + 1); err = 0; for (i = 0; i < sfcount; i++) { err = ip6_mc_add1_src(pmc, sfmode, &psfsrc[i]); @@ -2516,7 +2517,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca, int j; if (!delta) - pmc->mca_sfcount[sfmode]--; + WRITE_ONCE(pmc->mca_sfcount[sfmode], + pmc->mca_sfcount[sfmode] - 1); for (j = 0; j < i; j++) ip6_mc_del1_src(pmc, sfmode, &psfsrc[j]); } else if (isexclude != (pmc->mca_sfcount[MCAST_EXCLUDE] != 0)) { @@ -2561,7 +2563,8 @@ static void ip6_mc_clear_src(struct ifmcaddr6 *pmc) RCU_INIT_POINTER(pmc->mca_sources, NULL); pmc->mca_sfmode = MCAST_EXCLUDE; pmc->mca_sfcount[MCAST_INCLUDE] = 0; - pmc->mca_sfcount[MCAST_EXCLUDE] = 1; + /* Paired with the READ_ONCE() from ipv6_chk_mcast_addr() */ + WRITE_ONCE(pmc->mca_sfcount[MCAST_EXCLUDE], 1); } /* called with mc_lock */ From 00bf2032e97691c4b53427b33a85b134324e2a94 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 10 Dec 2024 18:33:52 +0000 Subject: [PATCH 0348/1386] ipv6: mcast: annotate data-race around psf->sf_count[MCAST_XXX] psf->sf_count[MCAST_XXX] fields are read locklessly from ipv6_chk_mcast_addr() and igmp6_mcf_seq_show(). Add READ_ONCE() and WRITE_ONCE() annotations accordingly. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://patch.msgid.link/20241210183352.86530-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/mcast.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 09622142b070..5ca8692d565d 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1037,8 +1037,8 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, break; } if (psf) - rv = psf->sf_count[MCAST_INCLUDE] || - psf->sf_count[MCAST_EXCLUDE] != + rv = READ_ONCE(psf->sf_count[MCAST_INCLUDE]) || + READ_ONCE(psf->sf_count[MCAST_EXCLUDE]) != READ_ONCE(mc->mca_sfcount[MCAST_EXCLUDE]); else rv = READ_ONCE(mc->mca_sfcount[MCAST_EXCLUDE]) != 0; @@ -2287,7 +2287,7 @@ static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode, /* source filter not found, or count wrong => bug */ return -ESRCH; } - psf->sf_count[sfmode]--; + WRITE_ONCE(psf->sf_count[sfmode], psf->sf_count[sfmode] - 1); if (!psf->sf_count[MCAST_INCLUDE] && !psf->sf_count[MCAST_EXCLUDE]) { struct inet6_dev *idev = pmc->idev; @@ -2393,7 +2393,7 @@ static int ip6_mc_add1_src(struct ifmcaddr6 *pmc, int sfmode, rcu_assign_pointer(pmc->mca_sources, psf); } } - psf->sf_count[sfmode]++; + WRITE_ONCE(psf->sf_count[sfmode], psf->sf_count[sfmode] + 1); return 0; } @@ -3079,8 +3079,8 @@ static int igmp6_mcf_seq_show(struct seq_file *seq, void *v) state->dev->ifindex, state->dev->name, &state->im->mca_addr, &psf->sf_addr, - psf->sf_count[MCAST_INCLUDE], - psf->sf_count[MCAST_EXCLUDE]); + READ_ONCE(psf->sf_count[MCAST_INCLUDE]), + READ_ONCE(psf->sf_count[MCAST_EXCLUDE])); } return 0; } From 19ce8cd3046587efbd2c6253947be7c22dfccc18 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 9 Dec 2024 20:38:03 +0100 Subject: [PATCH 0349/1386] tcp: Measure TIME-WAIT reuse delay with millisecond precision Prepare ground for TIME-WAIT socket reuse with subsecond delay. Today the last TS.Recent update timestamp, recorded in seconds and stored tp->ts_recent_stamp and tw->tw_ts_recent_stamp fields, has two purposes. Firstly, it is used to track the age of the last recorded TS.Recent value to detect when that value becomes outdated due to potential wrap-around of the other TCP timestamp clock (RFC 7323, section 5.5). For this purpose a second-based timestamp is completely sufficient as even in the worst case scenario of a peer using a high resolution microsecond timestamp, the wrap-around interval is ~36 minutes long. Secondly, it serves as a threshold value for allowing TIME-WAIT socket reuse. A TIME-WAIT socket can be reused only once the virtual 1 Hz clock, ktime_get_seconds, is past the TS.Recent update timestamp. The purpose behind delaying the TIME-WAIT socket reuse is to wait for the other TCP timestamp clock to tick at least once before reusing the connection. It is only then that the PAWS mechanism for the reopened connection can detect old duplicate segments from the previous connection incarnation (RFC 7323, appendix B.2). In this case using a timestamp with second resolution not only blocks the way toward allowing faster TIME-WAIT reuse after shorter subsecond delay, but also makes it impossible to reliably delay TW reuse by one second. As Eric Dumazet has pointed out [1], due to timestamp rounding, the TW reuse delay will actually be between (0, 1] seconds, and 0.5 seconds on average. We delay TW reuse for one full second only when last TS.Recent update coincides with our virtual 1 Hz clock tick. Considering the above, introduce a dedicated field to store a millisecond timestamp of transition into the TIME-WAIT state. Place it in an existing 4-byte hole inside inet_timewait_sock structure to avoid an additional memory cost. Use the new timestamp to (i) reliably delay TIME-WAIT reuse by one second, and (ii) prepare for configurable subsecond reuse delay in the subsequent change. We assume here that a full one second delay was the original intention in [2] because it accounts for the worst case scenario of the other TCP using the slowest recommended 1 Hz timestamp clock. A more involved alternative would be to change the resolution of the last TS.Recent update timestamp, tw->tw_ts_recent_stamp, to milliseconds. [1] https://lore.kernel.org/netdev/CANn89iKB4GFd8sVzCbRttqw_96o3i2wDhX-3DraQtsceNGYwug@mail.gmail.com/ [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b8439924316d5bcb266d165b93d632a4b4b859af Signed-off-by: Jakub Sitnicki Reviewed-by: Eric Dumazet Reviewed-by: Jason Xing Link: https://patch.msgid.link/20241209-jakub-krn-909-poc-msec-tw-tstamp-v2-1-66aca0eed03e@cloudflare.com Signed-off-by: Jakub Kicinski --- include/net/inet_timewait_sock.h | 4 ++++ net/ipv4/tcp_ipv4.c | 5 +++-- net/ipv4/tcp_minisocks.c | 7 ++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 62c0a7e65d6b..67a313575780 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -74,6 +74,10 @@ struct inet_timewait_sock { tw_tos : 8; u32 tw_txhash; u32 tw_priority; + /** + * @tw_reuse_stamp: Time of entry into %TCP_TIME_WAIT state in msec. + */ + u32 tw_entry_stamp; struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; struct inet_bind2_bucket *tw_tb2; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a38c8b1f44db..3b6ba1d16921 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -120,6 +120,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); struct tcp_sock *tp = tcp_sk(sk); int ts_recent_stamp; + u32 reuse_thresh; if (READ_ONCE(tw->tw_substate) == TCP_FIN_WAIT2) reuse = 0; @@ -162,9 +163,9 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ ts_recent_stamp = READ_ONCE(tcptw->tw_ts_recent_stamp); + reuse_thresh = READ_ONCE(tw->tw_entry_stamp) + MSEC_PER_SEC; if (ts_recent_stamp && - (!twp || (reuse && time_after32(ktime_get_seconds(), - ts_recent_stamp)))) { + (!twp || (reuse && time_after32(tcp_clock_ms(), reuse_thresh)))) { /* inet_twsk_hashdance_schedule() sets sk_refcnt after putting twsk * and releasing the bucket lock. */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7121d8573928..b089b08e9617 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -157,8 +157,11 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, rcv_nxt); if (tmp_opt.saw_tstamp) { + u64 ts = tcp_clock_ms(); + + WRITE_ONCE(tw->tw_entry_stamp, ts); WRITE_ONCE(tcptw->tw_ts_recent_stamp, - ktime_get_seconds()); + div_u64(ts, MSEC_PER_SEC)); WRITE_ONCE(tcptw->tw_ts_recent, tmp_opt.rcv_tsval); } @@ -316,6 +319,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_mark = sk->sk_mark; tw->tw_priority = READ_ONCE(sk->sk_priority); tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; + /* refreshed when we enter true TIME-WAIT state */ + tw->tw_entry_stamp = tcp_time_stamp_ms(tp); tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_snd_nxt = tp->snd_nxt; tcptw->tw_rcv_wnd = tcp_receive_window(tp); From ca6a6f93867a9763bdf8685c788e2e558d10975f Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 9 Dec 2024 20:38:04 +0100 Subject: [PATCH 0350/1386] tcp: Add sysctl to configure TIME-WAIT reuse delay Today we have a hardcoded delay of 1 sec before a TIME-WAIT socket can be reused by reopening a connection. This is a safe choice based on an assumption that the other TCP timestamp clock frequency, which is unknown to us, may be as low as 1 Hz (RFC 7323, section 5.4). However, this means that in the presence of short lived connections with an RTT of couple of milliseconds, the time during which a 4-tuple is blocked from reuse can be orders of magnitude longer that the connection lifetime. Combined with a reduced pool of ephemeral ports, when using IP_LOCAL_PORT_RANGE to share an egress IP address between hosts [1], the long TIME-WAIT reuse delay can lead to port exhaustion, where all available 4-tuples are tied up in TIME-WAIT state. Turn the reuse delay into a per-netns setting so that sysadmins can make more aggressive assumptions about remote TCP timestamp clock frequency and shorten the delay in order to allow connections to reincarnate faster. Note that applications can completely bypass the TIME-WAIT delay protection already today by locking the local port with bind() before connecting. Such immediate connection reuse may result in PAWS failing to detect old duplicate segments, leaving us with just the sequence number check as a safety net. This new configurable offers a trade off where the sysadmin can balance between the risk of PAWS detection failing to act versus exhausting ports by having sockets tied up in TIME-WAIT state for too long. [1] https://lpc.events/event/16/contributions/1349/ Signed-off-by: Jakub Sitnicki Reviewed-by: Eric Dumazet Reviewed-by: Jason Xing Link: https://patch.msgid.link/20241209-jakub-krn-909-poc-msec-tw-tstamp-v2-2-66aca0eed03e@cloudflare.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 14 ++++++++++++++ .../net_cachelines/netns_ipv4_sysctl.rst | 1 + include/net/netns/ipv4.h | 1 + net/ipv4/sysctl_net_ipv4.c | 10 ++++++++++ net/ipv4/tcp_ipv4.c | 4 +++- 5 files changed, 29 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index eacf8983e230..2f2b00295836 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1000,6 +1000,20 @@ tcp_tw_reuse - INTEGER Default: 2 +tcp_tw_reuse_delay - UNSIGNED INTEGER + The delay in milliseconds before a TIME-WAIT socket can be reused by a + new connection, if TIME-WAIT socket reuse is enabled. The actual reuse + threshold is within [N, N+1] range, where N is the requested delay in + milliseconds, to ensure the delay interval is never shorter than the + configured value. + + This setting contains an assumption about the other TCP timestamp clock + tick interval. It should not be set to a value lower than the peer's + clock tick for PAWS (Protection Against Wrapped Sequence numbers) + mechanism work correctly for the reused connection. + + Default: 1000 (milliseconds) + tcp_window_scaling - BOOLEAN Enable window scaling as defined in RFC1323. diff --git a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst index 629da6dc6d74..de0263302f16 100644 --- a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst +++ b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst @@ -79,6 +79,7 @@ u8 sysctl_tcp_retries1 u8 sysctl_tcp_retries2 u8 sysctl_tcp_orphan_retries u8 sysctl_tcp_tw_reuse timewait_sock_ops +unsigned_int sysctl_tcp_tw_reuse_delay timewait_sock_ops int sysctl_tcp_fin_timeout TCP_LAST_ACK/tcp_rcv_state_process unsigned_int sysctl_tcp_notsent_lowat read_mostly tcp_notsent_lowat/tcp_stream_memory_free u8 sysctl_tcp_sack tcp_syn_options diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3c014170e001..46452da35206 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -175,6 +175,7 @@ struct netns_ipv4 { u8 sysctl_tcp_retries2; u8 sysctl_tcp_orphan_retries; u8 sysctl_tcp_tw_reuse; + unsigned int sysctl_tcp_tw_reuse_delay; int sysctl_tcp_fin_timeout; u8 sysctl_tcp_sack; u8 sysctl_tcp_window_scaling; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a79b2a52ce01..42cb5dc9cb24 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -45,6 +45,7 @@ static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024; static unsigned int udp_child_hash_entries_max = UDP_HTABLE_SIZE_MAX; static int tcp_plb_max_rounds = 31; static int tcp_plb_max_cong_thresh = 256; +static unsigned int tcp_tw_reuse_delay_max = TCP_PAWS_MSL * MSEC_PER_SEC; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -1065,6 +1066,15 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, + { + .procname = "tcp_tw_reuse_delay", + .data = &init_net.ipv4.sysctl_tcp_tw_reuse_delay, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ONE, + .extra2 = &tcp_tw_reuse_delay_max, + }, { .procname = "tcp_max_syn_backlog", .data = &init_net.ipv4.sysctl_max_syn_backlog, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3b6ba1d16921..e45222d5fc2e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -163,7 +163,8 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ ts_recent_stamp = READ_ONCE(tcptw->tw_ts_recent_stamp); - reuse_thresh = READ_ONCE(tw->tw_entry_stamp) + MSEC_PER_SEC; + reuse_thresh = READ_ONCE(tw->tw_entry_stamp) + + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse_delay); if (ts_recent_stamp && (!twp || (reuse && time_after32(tcp_clock_ms(), reuse_thresh)))) { /* inet_twsk_hashdance_schedule() sets sk_refcnt after putting twsk @@ -3458,6 +3459,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; net->ipv4.sysctl_tcp_tw_reuse = 2; + net->ipv4.sysctl_tcp_tw_reuse_delay = 1 * MSEC_PER_SEC; net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1; refcount_set(&net->ipv4.tcp_death_row.tw_refcount, 1); From 175dd9079ecbd86d0e10927c442d64519baf5809 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 10 Dec 2024 10:45:37 +0100 Subject: [PATCH 0351/1386] mlxsw: spectrum_flower: Do not allow mixing sample and mirror actions The device does not support multiple mirror actions per rule and the driver rejects such configuration: # tc filter add dev swp1 ingress pref 1 proto ip flower skip_sw action mirred egress mirror dev swp2 action mirred egress mirror dev swp3 Error: mlxsw_spectrum: Multiple mirror actions per rule are not supported. We have an error talking to the kernel Internally, the sample action is implemented by the device by mirroring to the CPU port. Therefore, mixing sample and mirror actions in a single rule does not work correctly and results in the last action effect. Solve by rejecting such misconfiguration: # tc filter add dev swp1 ingress pref 1 proto ip flower skip_sw action mirred egress mirror dev swp2 action sample rate 100 group 1 Error: mlxsw_spectrum: Sample action after mirror action is not supported. We have an error talking to the kernel # tc filter add dev swp1 ingress pref 1 proto ip flower skip_sw action sample rate 100 group 1 action mirred egress mirror dev swp2 Error: mlxsw_spectrum: Mirror action after sample action is not supported. We have an error talking to the kernel Reported-by: Vladyslav Mykhaliuk Signed-off-by: Ido Schimmel Reviewed-by: Amit Cohen Reviewed-by: Jiri Pirko Signed-off-by: Petr Machata Reviewed-by: Michal Swiatkowski Reviewed-by: Kalesh AP Link: https://patch.msgid.link/d6c979914e8706dbe1dedbaf29ffffb0b8d71166.1733822570.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index f07955b5439f..6a4a81c63451 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -192,6 +192,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return -EOPNOTSUPP; } + if (sample_act_count) { + NL_SET_ERR_MSG_MOD(extack, "Mirror action after sample action is not supported"); + return -EOPNOTSUPP; + } + err = mlxsw_sp_acl_rulei_act_mirror(mlxsw_sp, rulei, block, out_dev, extack); @@ -265,6 +270,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, return -EOPNOTSUPP; } + if (mirror_act_count) { + NL_SET_ERR_MSG_MOD(extack, "Sample action after mirror action is not supported"); + return -EOPNOTSUPP; + } + err = mlxsw_sp_acl_rulei_act_sample(mlxsw_sp, rulei, block, act->sample.psample_group, From 3fa2540d93d85ad18456dbd29386c737ad3f7e02 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 10 Dec 2024 12:38:26 +0000 Subject: [PATCH 0352/1386] net: fec: use phydev->eee_cfg.tx_lpi_timer Rather than maintaining a private copy of the LPI timer, make use of the LPI timer maintained by phylib. In any case, phylib overwrites the value of tx_lpi_timer set by the driver in phy_ethtool_get_eee(). Note that feb->eee.tx_lpi_timer is initialised to zero, which is just the same with phylib's copy, so there should be no functional change. Signed-off-by: Russell King (Oracle) Tested-by: Wei Fang Link: https://patch.msgid.link/E1tKzVS-006c67-IJ@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fec.h | 2 -- drivers/net/ethernet/freescale/fec_main.c | 16 ++++++---------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 1cca0425d493..c81f2ea588f2 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -671,8 +671,6 @@ struct fec_enet_private { unsigned int tx_time_itr; unsigned int itr_clk_rate; - /* tx lpi eee mode */ - struct ethtool_keee eee; unsigned int clk_ref_rate; /* ptp clock period in ns*/ diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1b55047c0237..b2daed55bf6c 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2045,14 +2045,14 @@ static int fec_enet_us_to_tx_cycle(struct net_device *ndev, int us) return us * (fep->clk_ref_rate / 1000) / 1000; } -static int fec_enet_eee_mode_set(struct net_device *ndev, bool enable) +static int fec_enet_eee_mode_set(struct net_device *ndev, u32 lpi_timer, + bool enable) { struct fec_enet_private *fep = netdev_priv(ndev); - struct ethtool_keee *p = &fep->eee; unsigned int sleep_cycle, wake_cycle; if (enable) { - sleep_cycle = fec_enet_us_to_tx_cycle(ndev, p->tx_lpi_timer); + sleep_cycle = fec_enet_us_to_tx_cycle(ndev, lpi_timer); wake_cycle = sleep_cycle; } else { sleep_cycle = 0; @@ -2105,7 +2105,9 @@ static void fec_enet_adjust_link(struct net_device *ndev) napi_enable(&fep->napi); } if (fep->quirks & FEC_QUIRK_HAS_EEE) - fec_enet_eee_mode_set(ndev, phy_dev->enable_tx_lpi); + fec_enet_eee_mode_set(ndev, + phy_dev->eee_cfg.tx_lpi_timer, + phy_dev->enable_tx_lpi); } else { if (fep->link) { netif_stop_queue(ndev); @@ -3181,7 +3183,6 @@ static int fec_enet_get_eee(struct net_device *ndev, struct ethtool_keee *edata) { struct fec_enet_private *fep = netdev_priv(ndev); - struct ethtool_keee *p = &fep->eee; if (!(fep->quirks & FEC_QUIRK_HAS_EEE)) return -EOPNOTSUPP; @@ -3189,8 +3190,6 @@ fec_enet_get_eee(struct net_device *ndev, struct ethtool_keee *edata) if (!netif_running(ndev)) return -ENETDOWN; - edata->tx_lpi_timer = p->tx_lpi_timer; - return phy_ethtool_get_eee(ndev->phydev, edata); } @@ -3198,7 +3197,6 @@ static int fec_enet_set_eee(struct net_device *ndev, struct ethtool_keee *edata) { struct fec_enet_private *fep = netdev_priv(ndev); - struct ethtool_keee *p = &fep->eee; if (!(fep->quirks & FEC_QUIRK_HAS_EEE)) return -EOPNOTSUPP; @@ -3206,8 +3204,6 @@ fec_enet_set_eee(struct net_device *ndev, struct ethtool_keee *edata) if (!netif_running(ndev)) return -ENETDOWN; - p->tx_lpi_timer = edata->tx_lpi_timer; - return phy_ethtool_set_eee(ndev->phydev, edata); } From 66c366392e55ae07e37699eeacca50f01b0bb879 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 10 Dec 2024 14:18:11 +0000 Subject: [PATCH 0353/1386] net: dsa: remove check for dp->pl in EEE methods When user ports are initialised, a phylink instance is always created, and so dp->pl will always be non-NULL. The EEE methods are only used for user ports, so checking for dp->pl to be NULL makes no sense. No other phylink-calling method implements similar checks in DSA. Remove this unnecessary check. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/E1tL13z-006cZ7-BZ@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- net/dsa/user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dsa/user.c b/net/dsa/user.c index c736c019e2af..e1a0b153c353 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -1230,7 +1230,7 @@ static int dsa_user_set_eee(struct net_device *dev, struct ethtool_keee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev || !dp->pl) + if (!dev->phydev) return -ENODEV; if (!ds->ops->set_mac_eee) @@ -1250,7 +1250,7 @@ static int dsa_user_get_eee(struct net_device *dev, struct ethtool_keee *e) int ret; /* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev || !dp->pl) + if (!dev->phydev) return -ENODEV; if (!ds->ops->get_mac_eee) From 9723a77318b7c0cfd06ea207e52a042f8c815318 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 10 Dec 2024 14:18:16 +0000 Subject: [PATCH 0354/1386] net: dsa: add hook to determine whether EEE is supported Add a hook to determine whether the switch supports EEE. This will return false if the switch does not, or true if it does. If the method is not implemented, we assume (currently) that the switch supports EEE. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/E1tL144-006cZD-El@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 1 + net/dsa/user.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/include/net/dsa.h b/include/net/dsa.h index 72ae65e7246a..aaa75bbaa0ea 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -988,6 +988,7 @@ struct dsa_switch_ops { /* * Port's MAC EEE settings */ + bool (*support_eee)(struct dsa_switch *ds, int port); int (*set_mac_eee)(struct dsa_switch *ds, int port, struct ethtool_keee *e); int (*get_mac_eee)(struct dsa_switch *ds, int port, diff --git a/net/dsa/user.c b/net/dsa/user.c index e1a0b153c353..a74339680010 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -1229,6 +1229,10 @@ static int dsa_user_set_eee(struct net_device *dev, struct ethtool_keee *e) struct dsa_switch *ds = dp->ds; int ret; + /* Check whether the switch supports EEE */ + if (ds->ops->support_eee && !ds->ops->support_eee(ds, dp->index)) + return -EOPNOTSUPP; + /* Port's PHY and MAC both need to be EEE capable */ if (!dev->phydev) return -ENODEV; @@ -1249,6 +1253,10 @@ static int dsa_user_get_eee(struct net_device *dev, struct ethtool_keee *e) struct dsa_switch *ds = dp->ds; int ret; + /* Check whether the switch supports EEE */ + if (ds->ops->support_eee && !ds->ops->support_eee(ds, dp->index)) + return -EOPNOTSUPP; + /* Port's PHY and MAC both need to be EEE capable */ if (!dev->phydev) return -ENODEV; From 99379f587278c818777cb4778e2c79c6c1440c65 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 10 Dec 2024 14:18:21 +0000 Subject: [PATCH 0355/1386] net: dsa: provide implementation of .support_eee() Provide a trivial implementation for the .support_eee() method which switch drivers can use to simply indicate that they support EEE on all their user ports. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/E1tL149-006cZJ-JJ@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 1 + net/dsa/port.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/include/net/dsa.h b/include/net/dsa.h index aaa75bbaa0ea..4aeedb296d67 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1384,5 +1384,6 @@ static inline bool dsa_user_dev_check(const struct net_device *dev) netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev); void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up); +bool dsa_supports_eee(struct dsa_switch *ds, int port); #endif diff --git a/net/dsa/port.c b/net/dsa/port.c index ee0aaec4c8e0..5c9d1798e830 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1575,6 +1575,22 @@ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, cpu_dp->tag_ops = tag_ops; } +/* dsa_supports_eee - indicate that EEE is supported + * @ds: pointer to &struct dsa_switch + * @port: port index + * + * A default implementation for the .support_eee() DSA operations member, + * which drivers can use to indicate that they support EEE on all of their + * user ports. + * + * Returns: true + */ +bool dsa_supports_eee(struct dsa_switch *ds, int port) +{ + return true; +} +EXPORT_SYMBOL_GPL(dsa_supports_eee); + static void dsa_port_phylink_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) From c86692fc2cb77d94dd8c166c2b9017f196d02a84 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 10 Dec 2024 14:18:26 +0000 Subject: [PATCH 0356/1386] net: dsa: b53/bcm_sf2: implement .support_eee() method Implement the .support_eee() method to indicate that EEE is not supported by two switch variants, rather than making these checks in the .set_mac_eee() and .get_mac_eee() methods. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/E1tL14E-006cZU-Nc@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/dsa/b53/b53_common.c | 13 +++++++------ drivers/net/dsa/b53/b53_priv.h | 1 + drivers/net/dsa/bcm_sf2.c | 1 + 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 285785c942b0..0561b60f668f 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2224,13 +2224,16 @@ int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy) } EXPORT_SYMBOL(b53_eee_init); -int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e) +bool b53_support_eee(struct dsa_switch *ds, int port) { struct b53_device *dev = ds->priv; - if (is5325(dev) || is5365(dev)) - return -EOPNOTSUPP; + return !is5325(dev) && !is5365(dev); +} +EXPORT_SYMBOL(b53_support_eee); +int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e) +{ return 0; } EXPORT_SYMBOL(b53_get_mac_eee); @@ -2240,9 +2243,6 @@ int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e) struct b53_device *dev = ds->priv; struct ethtool_keee *p = &dev->ports[port].eee; - if (is5325(dev) || is5365(dev)) - return -EOPNOTSUPP; - p->eee_enabled = e->eee_enabled; b53_eee_enable_set(ds, port, e->eee_enabled); @@ -2298,6 +2298,7 @@ static const struct dsa_switch_ops b53_switch_ops = { .phylink_get_caps = b53_phylink_get_caps, .port_enable = b53_enable_port, .port_disable = b53_disable_port, + .support_eee = b53_support_eee, .get_mac_eee = b53_get_mac_eee, .set_mac_eee = b53_set_mac_eee, .port_bridge_join = b53_br_join, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 05141176daf5..99e5cfc98ae8 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -384,6 +384,7 @@ int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy); void b53_disable_port(struct dsa_switch *ds, int port); void b53_brcm_hdr_setup(struct dsa_switch *ds, int port); int b53_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy); +bool b53_support_eee(struct dsa_switch *ds, int port); int b53_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e); int b53_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 43bde1f583ff..a53fb6191e6b 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1232,6 +1232,7 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .set_wol = bcm_sf2_sw_set_wol, .port_enable = bcm_sf2_port_setup, .port_disable = bcm_sf2_port_disable, + .support_eee = b53_support_eee, .get_mac_eee = b53_get_mac_eee, .set_mac_eee = b53_set_mac_eee, .port_bridge_join = b53_br_join, From 7eb4f3d9fe173d71b9f9fad7e426e528fcb59b74 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 10 Dec 2024 14:18:31 +0000 Subject: [PATCH 0357/1386] net: dsa: mt753x: implement .support_eee() method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the .support_eee() method by using the generic helper as all user ports support EEE. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Reviewed-by: Arınç ÜNAL Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/E1tL14J-006cZa-Rh@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mt7530.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 086b8b3d5b40..9605febd3573 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -3238,6 +3238,7 @@ const struct dsa_switch_ops mt7530_switch_ops = { .port_mirror_add = mt753x_port_mirror_add, .port_mirror_del = mt753x_port_mirror_del, .phylink_get_caps = mt753x_phylink_get_caps, + .support_eee = dsa_supports_eee, .get_mac_eee = mt753x_get_mac_eee, .set_mac_eee = mt753x_set_mac_eee, .conduit_state_change = mt753x_conduit_state_change, From fe3ef44385b217c49fd1bfcab3c221d34174e1b4 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 10 Dec 2024 14:18:36 +0000 Subject: [PATCH 0358/1386] net: dsa: qca8k: implement .support_eee() method Implement the .support_eee() method by using the generic helper as all user ports support EEE. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/E1tL14O-006cZg-VM@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/dsa/qca/qca8k-8xxx.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index 59b4a7240b58..ec74e3c2b0e9 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -2016,6 +2016,7 @@ static const struct dsa_switch_ops qca8k_switch_ops = { .get_ethtool_stats = qca8k_get_ethtool_stats, .get_sset_count = qca8k_get_sset_count, .set_ageing_time = qca8k_set_ageing_time, + .support_eee = dsa_supports_eee, .get_mac_eee = qca8k_get_mac_eee, .set_mac_eee = qca8k_set_mac_eee, .port_enable = qca8k_port_enable, From eb3126e720e7629474332417dae256d471727865 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 10 Dec 2024 14:18:42 +0000 Subject: [PATCH 0359/1386] net: dsa: mv88e6xxx: implement .support_eee() method Implement the .support_eee() method by using the generic helper as all user ports support EEE. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/E1tL14U-006cZm-2K@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/dsa/mv88e6xxx/chip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 34708c739b04..570c8642d387 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -7099,6 +7099,7 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .get_sset_count = mv88e6xxx_get_sset_count, .port_max_mtu = mv88e6xxx_get_max_mtu, .port_change_mtu = mv88e6xxx_change_mtu, + .support_eee = dsa_supports_eee, .get_mac_eee = mv88e6xxx_get_mac_eee, .set_mac_eee = mv88e6xxx_set_mac_eee, .get_eeprom_len = mv88e6xxx_get_eeprom_len, From 801fd546c1cad69a00cef0a300e9f293d8e50432 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 10 Dec 2024 14:18:47 +0000 Subject: [PATCH 0360/1386] net: dsa: ksz: implement .support_eee() method Implement the .support_eee() method by reusing the ksz_validate_eee() method as a template, renaming the function, changing the return type and values, and removing it from the ksz_set_mac_eee() and ksz_get_mac_eee() methods. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/E1tL14Z-006cZs-6o@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index f5822c57be32..94f9aa983ff6 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -3454,12 +3454,12 @@ static int ksz_max_mtu(struct dsa_switch *ds, int port) return -EOPNOTSUPP; } -static int ksz_validate_eee(struct dsa_switch *ds, int port) +static bool ksz_support_eee(struct dsa_switch *ds, int port) { struct ksz_device *dev = ds->priv; if (!dev->info->internal_phy[port]) - return -EOPNOTSUPP; + return false; switch (dev->chip_id) { case KSZ8563_CHIP_ID: @@ -3471,21 +3471,15 @@ static int ksz_validate_eee(struct dsa_switch *ds, int port) case KSZ9896_CHIP_ID: case KSZ9897_CHIP_ID: case LAN9646_CHIP_ID: - return 0; + return true; } - return -EOPNOTSUPP; + return false; } static int ksz_get_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e) { - int ret; - - ret = ksz_validate_eee(ds, port); - if (ret) - return ret; - /* There is no documented control of Tx LPI configuration. */ e->tx_lpi_enabled = true; @@ -3501,11 +3495,6 @@ static int ksz_set_mac_eee(struct dsa_switch *ds, int port, struct ethtool_keee *e) { struct ksz_device *dev = ds->priv; - int ret; - - ret = ksz_validate_eee(ds, port); - if (ret) - return ret; if (!e->tx_lpi_enabled) { dev_err(dev->dev, "Disabling EEE Tx LPI is not supported\n"); @@ -4651,6 +4640,7 @@ static const struct dsa_switch_ops ksz_switch_ops = { .cls_flower_add = ksz_cls_flower_add, .cls_flower_del = ksz_cls_flower_del, .port_setup_tc = ksz_setup_tc, + .support_eee = ksz_support_eee, .get_mac_eee = ksz_get_mac_eee, .set_mac_eee = ksz_set_mac_eee, .port_get_default_prio = ksz_port_get_default_prio, From 88325a291a0cd077bf49b889af605e683b5f956e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 10 Dec 2024 14:18:52 +0000 Subject: [PATCH 0361/1386] net: dsa: require .support_eee() method to be implemented Now that we have updated all drivers, switch DSA to require an implementation of the .support_eee() method for EEE to be usable, rather than defaulting to being permissive when not implemented. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/E1tL14e-006cZy-AT@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- net/dsa/user.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dsa/user.c b/net/dsa/user.c index a74339680010..4a8de48a6f24 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -1230,7 +1230,7 @@ static int dsa_user_set_eee(struct net_device *dev, struct ethtool_keee *e) int ret; /* Check whether the switch supports EEE */ - if (ds->ops->support_eee && !ds->ops->support_eee(ds, dp->index)) + if (!ds->ops->support_eee || !ds->ops->support_eee(ds, dp->index)) return -EOPNOTSUPP; /* Port's PHY and MAC both need to be EEE capable */ @@ -1254,7 +1254,7 @@ static int dsa_user_get_eee(struct net_device *dev, struct ethtool_keee *e) int ret; /* Check whether the switch supports EEE */ - if (ds->ops->support_eee && !ds->ops->support_eee(ds, dp->index)) + if (!ds->ops->support_eee || !ds->ops->support_eee(ds, dp->index)) return -EOPNOTSUPP; /* Port's PHY and MAC both need to be EEE capable */ From 4aa567b1df8b88e3d49a1896b0d4467d5bec6c89 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 10 Dec 2024 10:30:41 -0800 Subject: [PATCH 0362/1386] ionic: add asic codes to firmware interface file Now that the firmware has learned how to properly report the asic type id, add the values to our interface file. The sharp-eyed reviewers will catch that the CAPRI value changed here from 0 to 1. This comes with the FW actually defining it correctly. This is safe for us to change as nothing actually uses that value yet. Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Reviewed-by: Kalesh AP Signed-off-by: Paolo Abeni --- drivers/net/ethernet/pensando/ionic/ionic.h | 2 -- drivers/net/ethernet/pensando/ionic/ionic_if.h | 6 +++++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h index 1c61390677f7..0639bf56bd3a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic.h +++ b/drivers/net/ethernet/pensando/ionic/ionic.h @@ -18,8 +18,6 @@ struct ionic_lif; #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF 0x1002 #define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF 0x1003 -#define IONIC_ASIC_TYPE_ELBA 2 - #define DEVCMD_TIMEOUT 5 #define IONIC_ADMINQ_TIME_SLICE msecs_to_jiffies(100) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index 9c85c0706c6e..6ea190f1a706 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -3209,7 +3209,11 @@ union ionic_adminq_comp { #define IONIC_BAR0_INTR_CTRL_OFFSET 0x2000 #define IONIC_DEV_CMD_DONE 0x00000001 -#define IONIC_ASIC_TYPE_CAPRI 0 +#define IONIC_ASIC_TYPE_NONE 0 +#define IONIC_ASIC_TYPE_CAPRI 1 +#define IONIC_ASIC_TYPE_ELBA 2 +#define IONIC_ASIC_TYPE_GIGLIO 3 +#define IONIC_ASIC_TYPE_SALINA 4 /** * struct ionic_doorbell - Doorbell register layout From 33ce1d41c133b053ccea5ac4aaaab260d42706b3 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Tue, 10 Dec 2024 10:30:42 -0800 Subject: [PATCH 0363/1386] ionic: Use VLAN_ETH_HLEN when possible Replace when ETH_HLEN and VLAN_HLEN are used together with VLAN_ETH_HLEN since it's the same value and uses 1 define instead of 2. Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Reviewed-by: Kalesh AP Signed-off-by: Paolo Abeni --- drivers/net/ethernet/pensando/ionic/ionic_lif.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 40496587b2b3..052c767a2c75 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3265,7 +3265,7 @@ int ionic_lif_alloc(struct ionic *ionic) lif->netdev->min_mtu = max_t(unsigned int, ETH_MIN_MTU, le32_to_cpu(lif->identity->eth.min_frame_size)); lif->netdev->max_mtu = - le32_to_cpu(lif->identity->eth.max_frame_size) - ETH_HLEN - VLAN_HLEN; + le32_to_cpu(lif->identity->eth.max_frame_size) - VLAN_ETH_HLEN; lif->neqs = ionic->neqs_per_lif; lif->nxqs = ionic->ntxqs_per_lif; From 7c372bac12b2003a44aa333773001c83bcb07d09 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Tue, 10 Dec 2024 10:30:43 -0800 Subject: [PATCH 0364/1386] ionic: Translate IONIC_RC_ENOSUPP to EOPNOTSUPP Instead of reporting -EINVAL when IONIC_RC_ENOSUPP is returned use the -EOPNOTSUPP value. This aligns better since the FW only returns IONIC_RC_ENOSUPP when operations aren't supported not when invalid values are used. Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Reviewed-by: Kalesh AP Signed-off-by: Paolo Abeni --- drivers/net/ethernet/pensando/ionic/ionic_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c index 0f817c3f92d8..daf1e82cb76b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_main.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c @@ -81,8 +81,9 @@ static int ionic_error_to_errno(enum ionic_status_code code) case IONIC_RC_EQTYPE: case IONIC_RC_EQID: case IONIC_RC_EINVAL: - case IONIC_RC_ENOSUPP: return -EINVAL; + case IONIC_RC_ENOSUPP: + return -EOPNOTSUPP; case IONIC_RC_EPERM: return -EPERM; case IONIC_RC_ENOENT: From a8b05dd3389f313b2ba858165a6ded53c274e5fd Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 10 Dec 2024 10:30:44 -0800 Subject: [PATCH 0365/1386] ionic: add speed defines for 200G and 400G Add higher speed defines to the ionic_if.h API and decode them in the ethtool get_link_ksettings callback. Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Signed-off-by: Paolo Abeni --- .../ethernet/pensando/ionic/ionic_ethtool.c | 39 +++++++++++++++++++ .../net/ethernet/pensando/ionic/ionic_if.h | 16 +++++++- 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index dda22fa4448c..272317048cb9 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -158,6 +158,20 @@ static int ionic_get_link_ksettings(struct net_device *netdev, 25000baseCR_Full); copper_seen++; break; + case IONIC_XCVR_PID_QSFP_50G_CR2_FC: + case IONIC_XCVR_PID_QSFP_50G_CR2: + ethtool_link_ksettings_add_link_mode(ks, supported, + 50000baseCR2_Full); + copper_seen++; + break; + case IONIC_XCVR_PID_QSFP_200G_CR4: + ethtool_link_ksettings_add_link_mode(ks, supported, 200000baseCR4_Full); + copper_seen++; + break; + case IONIC_XCVR_PID_QSFP_400G_CR4: + ethtool_link_ksettings_add_link_mode(ks, supported, 400000baseCR4_Full); + copper_seen++; + break; case IONIC_XCVR_PID_SFP_10GBASE_AOC: case IONIC_XCVR_PID_SFP_10GBASE_CU: ethtool_link_ksettings_add_link_mode(ks, supported, @@ -196,6 +210,31 @@ static int ionic_get_link_ksettings(struct net_device *netdev, ethtool_link_ksettings_add_link_mode(ks, supported, 25000baseSR_Full); break; + case IONIC_XCVR_PID_QSFP_200G_AOC: + case IONIC_XCVR_PID_QSFP_200G_SR4: + ethtool_link_ksettings_add_link_mode(ks, supported, + 200000baseSR4_Full); + break; + case IONIC_XCVR_PID_QSFP_200G_FR4: + ethtool_link_ksettings_add_link_mode(ks, supported, + 200000baseLR4_ER4_FR4_Full); + break; + case IONIC_XCVR_PID_QSFP_200G_DR4: + ethtool_link_ksettings_add_link_mode(ks, supported, + 200000baseDR4_Full); + break; + case IONIC_XCVR_PID_QSFP_400G_FR4: + ethtool_link_ksettings_add_link_mode(ks, supported, + 400000baseLR4_ER4_FR4_Full); + break; + case IONIC_XCVR_PID_QSFP_400G_DR4: + ethtool_link_ksettings_add_link_mode(ks, supported, + 400000baseDR4_Full); + break; + case IONIC_XCVR_PID_QSFP_400G_SR4: + ethtool_link_ksettings_add_link_mode(ks, supported, + 400000baseSR4_Full); + break; case IONIC_XCVR_PID_SFP_10GBASE_SR: ethtool_link_ksettings_add_link_mode(ks, supported, 10000baseSR_Full); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h index 6ea190f1a706..830c8adbfbee 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_if.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h @@ -1277,7 +1277,10 @@ enum ionic_xcvr_pid { IONIC_XCVR_PID_SFP_25GBASE_CR_S = 3, IONIC_XCVR_PID_SFP_25GBASE_CR_L = 4, IONIC_XCVR_PID_SFP_25GBASE_CR_N = 5, - + IONIC_XCVR_PID_QSFP_50G_CR2_FC = 6, + IONIC_XCVR_PID_QSFP_50G_CR2 = 7, + IONIC_XCVR_PID_QSFP_200G_CR4 = 8, + IONIC_XCVR_PID_QSFP_400G_CR4 = 9, /* Fiber */ IONIC_XCVR_PID_QSFP_100G_AOC = 50, IONIC_XCVR_PID_QSFP_100G_ACC = 51, @@ -1303,6 +1306,15 @@ enum ionic_xcvr_pid { IONIC_XCVR_PID_SFP_25GBASE_ACC = 71, IONIC_XCVR_PID_SFP_10GBASE_T = 72, IONIC_XCVR_PID_SFP_1000BASE_T = 73, + IONIC_XCVR_PID_QSFP_200G_AOC = 74, + IONIC_XCVR_PID_QSFP_200G_FR4 = 75, + IONIC_XCVR_PID_QSFP_200G_DR4 = 76, + IONIC_XCVR_PID_QSFP_200G_SR4 = 77, + IONIC_XCVR_PID_QSFP_200G_ACC = 78, + IONIC_XCVR_PID_QSFP_400G_FR4 = 79, + IONIC_XCVR_PID_QSFP_400G_DR4 = 80, + IONIC_XCVR_PID_QSFP_400G_SR4 = 81, + IONIC_XCVR_PID_QSFP_400G_VR4 = 82, }; /** @@ -1404,6 +1416,8 @@ struct ionic_xcvr_status { */ union ionic_port_config { struct { +#define IONIC_SPEED_400G 400000 /* 400G in Mbps */ +#define IONIC_SPEED_200G 200000 /* 200G in Mbps */ #define IONIC_SPEED_100G 100000 /* 100G in Mbps */ #define IONIC_SPEED_50G 50000 /* 50G in Mbps */ #define IONIC_SPEED_40G 40000 /* 40G in Mbps */ From a857c841e7ea0f8ac18bcd3944e2e32cfd82efed Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 10 Dec 2024 10:30:45 -0800 Subject: [PATCH 0366/1386] ionic: add support for QSFP_PLUS_CMIS Teach the driver to recognize and decode the sfp pid SFF8024_ID_QSFP_PLUS_CMIS correctly. Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Reviewed-by: Kalesh AP Signed-off-by: Paolo Abeni --- drivers/net/ethernet/pensando/ionic/ionic_ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 272317048cb9..720092b1633a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -968,6 +968,7 @@ static int ionic_get_module_info(struct net_device *netdev, break; case SFF8024_ID_QSFP_8436_8636: case SFF8024_ID_QSFP28_8636: + case SFF8024_ID_QSFP_PLUS_CMIS: modinfo->type = ETH_MODULE_SFF_8436; modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; break; From a3b16198d3df38aa2fc6de167b919ecb3fae74a6 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Wed, 11 Dec 2024 01:35:40 +0200 Subject: [PATCH 0367/1386] selftests: forwarding: add a pvid_change test to bridge_vlan_unaware Historically, DSA drivers have seen problems with the model in which bridge VLANs work, particularly with them being offloaded to switchdev asynchronously relative to when they become active (vlan_filtering=1). This switchdev API peculiarity was papered over by commit 2ea7a679ca2a ("net: dsa: Don't add vlans when vlan filtering is disabled"), which introduced other problems, fixed by commit 54a0ed0df496 ("net: dsa: provide an option for drivers to always receive bridge VLANs") through an opt-in ds->configure_vlan_while_not_filtering bool (which later became an opt-out). The point is that some DSA drivers still skip VLAN configuration while VLAN-unaware, and there is a desire to get rid of that behavior. It's hard to deduce from the wording "at least one corner case" what Andrew saw, but my best guess is that there is a discrepancy of meaning between bridge pvid and hardware port pvid which caused breakage. On one side, the Linux bridge with vlan_filtering=0 is completely VLAN-unaware, and will accept and process a packet the same way irrespective of the VLAN groups on the ports or the bridge itself (there may not even be a pvid, and this makes no difference). On the other hand, DSA switches still do VLAN processing internally, even with vlan_filtering disabled, but they are expected to classify all packets to the port pvid. That pvid shouldn't be confused with the bridge pvid, and there lies the problem. When a switch port is under a VLAN-unaware bridge, the hardware pvid must be explicitly managed by the driver to classify all received packets to it, regardless of bridge VLAN groups. When under a VLAN-aware bridge, the hardware pvid must be synchronized to the bridge port pvid. To do this correctly, the pattern is unfortunately a bit complicated, and involves hooking the pvid change logic into quite a few places (the ones that change the input variables which determine the value to use as hardware pvid for a port). See mv88e6xxx_port_commit_pvid(), sja1105_commit_pvid(), ocelot_port_set_pvid() etc. The point is that not all drivers used to do that, especially in older kernels. If a driver is to blindly program a bridge pvid VLAN received from switchdev while it's VLAN-unaware, this might in turn change the hardware pvid used by a VLAN-unaware bridge port, which might result in packet loss depending which other ports have that pvid too (in that same note, it might also go unnoticed). To capture that condition, it is sufficient to take a VLAN-unaware bridge and change the [VLAN-aware] bridge pvid on a single port, to a VID that isn't present on any other port. This shouldn't have absolutely any effect on packet classification or forwarding. However, broken drivers will take the bait, and change their PVID to 3, causing packet loss. Signed-off-by: Vladimir Oltean Tested-by: Ido Schimmel Link: https://patch.msgid.link/20241210233541.1401837-1-vladimir.oltean@nxp.com Signed-off-by: Paolo Abeni --- .../net/forwarding/bridge_vlan_unaware.sh | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh index 1c8a26046589..2b5700b61ffa 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding pvid_change" NUM_NETIFS=4 source lib.sh @@ -77,12 +77,16 @@ cleanup() ping_ipv4() { - ping_test $h1 192.0.2.2 + local msg=$1 + + ping_test $h1 192.0.2.2 "$msg" } ping_ipv6() { - ping6_test $h1 2001:db8:1::2 + local msg=$1 + + ping6_test $h1 2001:db8:1::2 "$msg" } learning() @@ -95,6 +99,21 @@ flooding() flood_test $swp2 $h1 $h2 } +pvid_change() +{ + # Test that the changing of the VLAN-aware PVID does not affect + # VLAN-unaware forwarding + bridge vlan add vid 3 dev $swp1 pvid untagged + + ping_ipv4 " with bridge port $swp1 PVID changed" + ping_ipv6 " with bridge port $swp1 PVID changed" + + bridge vlan del vid 3 dev $swp1 + + ping_ipv4 " with bridge port $swp1 PVID deleted" + ping_ipv6 " with bridge port $swp1 PVID deleted" +} + trap cleanup EXIT setup_prepare From 27ef6a9981fe74191849966a6d5e0400a4008ab8 Mon Sep 17 00:00:00 2001 From: Guangguan Wang Date: Wed, 11 Dec 2024 10:30:54 +0800 Subject: [PATCH 0368/1386] net/smc: support SMC-R V2 for rdma devices with max_recv_sge equals to 1 For SMC-R V2, llc msg can be larger than SMC_WR_BUF_SIZE, thus every recv wr has 2 sges, the first sge with length SMC_WR_BUF_SIZE is for V1/V2 compatible llc/cdc msg, and the second sge with length SMC_WR_BUF_V2_SIZE-SMC_WR_TX_SIZE is for V2 specific llc msg, like SMC_LLC_DELETE_RKEY and SMC_LLC_ADD_LINK for SMC-R V2. The memory buffer in the second sge is shared by all recv wr in one link and all link in one lgr for saving memory usage purpose. But not all RDMA devices with max_recv_sge greater than 1. Thus SMC-R V2 can not support on such RDMA devices and SMC_CLC_DECL_INTERR fallback happens because of the failure of create qp. This patch introduce the support for SMC-R V2 on RDMA devices with max_recv_sge equals to 1. Every recv wr has only one sge with individual buffer whose size is SMC_WR_BUF_V2_SIZE once the RDMA device's max_recv_sge equals to 1. It may use more memory, but it is better than SMC_CLC_DECL_INTERR fallback. Co-developed-by: Wen Gu Signed-off-by: Wen Gu Signed-off-by: Guangguan Wang Reviewed-by: Wenjia Zhang Signed-off-by: Paolo Abeni --- net/smc/smc_core.c | 5 +++++ net/smc/smc_core.h | 11 ++++++++++- net/smc/smc_ib.c | 3 +-- net/smc/smc_llc.c | 21 +++++++++++++++------ net/smc/smc_wr.c | 42 +++++++++++++++++++++--------------------- 5 files changed, 52 insertions(+), 30 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 500952c2e67b..ede4d5f3111b 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -795,9 +795,14 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, if (lgr->smc_version == SMC_V2) { lnk->smcibdev = ini->smcrv2.ib_dev_v2; lnk->ibport = ini->smcrv2.ib_port_v2; + lnk->wr_rx_sge_cnt = lnk->smcibdev->ibdev->attrs.max_recv_sge < 2 ? 1 : 2; + lnk->wr_rx_buflen = smc_link_shared_v2_rxbuf(lnk) ? + SMC_WR_BUF_SIZE : SMC_WR_BUF_V2_SIZE; } else { lnk->smcibdev = ini->ib_dev; lnk->ibport = ini->ib_port; + lnk->wr_rx_sge_cnt = 1; + lnk->wr_rx_buflen = SMC_WR_BUF_SIZE; } get_device(&lnk->smcibdev->ibdev->dev); atomic_inc(&lnk->smcibdev->lnk_cnt); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 69b54ecd6503..48a1b1dcb576 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -122,10 +122,14 @@ struct smc_link { } ____cacheline_aligned_in_smp; struct completion tx_ref_comp; - struct smc_wr_buf *wr_rx_bufs; /* WR recv payload buffers */ + u8 *wr_rx_bufs; /* WR recv payload buffers */ struct ib_recv_wr *wr_rx_ibs; /* WR recv meta data */ struct ib_sge *wr_rx_sges; /* WR recv scatter meta data */ /* above three vectors have wr_rx_cnt elements and use the same index */ + int wr_rx_sge_cnt; /* rx sge, V1 is 1, V2 is either 2 or 1 */ + int wr_rx_buflen; /* buffer len for the first sge, len for the + * second sge is lgr shared if rx sge is 2. + */ dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */ dma_addr_t wr_rx_v2_dma_addr; /* DMA address of v2 rx buf*/ u64 wr_rx_id; /* seq # of last recv WR */ @@ -506,6 +510,11 @@ static inline bool smc_link_active(struct smc_link *lnk) return lnk->state == SMC_LNK_ACTIVE; } +static inline bool smc_link_shared_v2_rxbuf(struct smc_link *lnk) +{ + return lnk->wr_rx_sge_cnt > 1; +} + static inline void smc_gid_be16_convert(__u8 *buf, u8 *gid_raw) { sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x", diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 9c563cdbea90..53828833a3f7 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -662,7 +662,6 @@ void smc_ib_destroy_queue_pair(struct smc_link *lnk) /* create a queue pair within the protection domain for a link */ int smc_ib_create_queue_pair(struct smc_link *lnk) { - int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1; struct ib_qp_init_attr qp_attr = { .event_handler = smc_ib_qp_event_handler, .qp_context = lnk, @@ -676,7 +675,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) .max_send_wr = SMC_WR_BUF_CNT * 3, .max_recv_wr = SMC_WR_BUF_CNT * 3, .max_send_sge = SMC_IB_MAX_SEND_SGE, - .max_recv_sge = sges_per_buf, + .max_recv_sge = lnk->wr_rx_sge_cnt, .max_inline_data = 0, }, .sq_sig_type = IB_SIGNAL_REQ_WR, diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 018ce8133b02..f865c58c3aa7 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -997,13 +997,14 @@ static int smc_llc_cli_conf_link(struct smc_link *link, } static void smc_llc_save_add_link_rkeys(struct smc_link *link, - struct smc_link *link_new) + struct smc_link *link_new, + u8 *llc_msg) { struct smc_llc_msg_add_link_v2_ext *ext; struct smc_link_group *lgr = link->lgr; int max, i; - ext = (struct smc_llc_msg_add_link_v2_ext *)((u8 *)lgr->wr_rx_buf_v2 + + ext = (struct smc_llc_msg_add_link_v2_ext *)(llc_msg + SMC_WR_TX_SIZE); max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2); down_write(&lgr->rmbs_lock); @@ -1098,7 +1099,9 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry) if (rc) goto out_clear_lnk; if (lgr->smc_version == SMC_V2) { - smc_llc_save_add_link_rkeys(link, lnk_new); + u8 *llc_msg = smc_link_shared_v2_rxbuf(link) ? + (u8 *)lgr->wr_rx_buf_v2 : (u8 *)llc; + smc_llc_save_add_link_rkeys(link, lnk_new, llc_msg); } else { rc = smc_llc_cli_rkey_exchange(link, lnk_new); if (rc) { @@ -1498,7 +1501,9 @@ int smc_llc_srv_add_link(struct smc_link *link, if (rc) goto out_err; if (lgr->smc_version == SMC_V2) { - smc_llc_save_add_link_rkeys(link, link_new); + u8 *llc_msg = smc_link_shared_v2_rxbuf(link) ? + (u8 *)lgr->wr_rx_buf_v2 : (u8 *)add_llc; + smc_llc_save_add_link_rkeys(link, link_new, llc_msg); } else { rc = smc_llc_srv_rkey_exchange(link, link_new); if (rc) @@ -1807,8 +1812,12 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr) if (lgr->smc_version == SMC_V2) { struct smc_llc_msg_delete_rkey_v2 *llcv2; - memcpy(lgr->wr_rx_buf_v2, llc, sizeof(*llc)); - llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)lgr->wr_rx_buf_v2; + if (smc_link_shared_v2_rxbuf(link)) { + memcpy(lgr->wr_rx_buf_v2, llc, sizeof(*llc)); + llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)lgr->wr_rx_buf_v2; + } else { + llcv2 = (struct smc_llc_msg_delete_rkey_v2 *)llc; + } llcv2->num_inval_rkeys = 0; max = min_t(u8, llcv2->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2); diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 994c0cd4fddb..b04a21b8c511 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -439,7 +439,7 @@ static inline void smc_wr_rx_demultiplex(struct ib_wc *wc) return; /* short message */ temp_wr_id = wc->wr_id; index = do_div(temp_wr_id, link->wr_rx_cnt); - wr_rx = (struct smc_wr_rx_hdr *)&link->wr_rx_bufs[index]; + wr_rx = (struct smc_wr_rx_hdr *)(link->wr_rx_bufs + index * link->wr_rx_buflen); hash_for_each_possible(smc_wr_rx_hash, handler, list, wr_rx->type) { if (handler->type == wr_rx->type) handler->handler(wc, wr_rx); @@ -555,7 +555,6 @@ void smc_wr_remember_qp_attr(struct smc_link *lnk) static void smc_wr_init_sge(struct smc_link *lnk) { - int sges_per_buf = (lnk->lgr->smc_version == SMC_V2) ? 2 : 1; bool send_inline = (lnk->qp_attr.cap.max_inline_data > SMC_WR_TX_SIZE); u32 i; @@ -608,13 +607,14 @@ static void smc_wr_init_sge(struct smc_link *lnk) * the larger spillover buffer, allowing easy data mapping. */ for (i = 0; i < lnk->wr_rx_cnt; i++) { - int x = i * sges_per_buf; + int x = i * lnk->wr_rx_sge_cnt; lnk->wr_rx_sges[x].addr = - lnk->wr_rx_dma_addr + i * SMC_WR_BUF_SIZE; - lnk->wr_rx_sges[x].length = SMC_WR_TX_SIZE; + lnk->wr_rx_dma_addr + i * lnk->wr_rx_buflen; + lnk->wr_rx_sges[x].length = smc_link_shared_v2_rxbuf(lnk) ? + SMC_WR_TX_SIZE : lnk->wr_rx_buflen; lnk->wr_rx_sges[x].lkey = lnk->roce_pd->local_dma_lkey; - if (lnk->lgr->smc_version == SMC_V2) { + if (lnk->lgr->smc_version == SMC_V2 && smc_link_shared_v2_rxbuf(lnk)) { lnk->wr_rx_sges[x + 1].addr = lnk->wr_rx_v2_dma_addr + SMC_WR_TX_SIZE; lnk->wr_rx_sges[x + 1].length = @@ -624,7 +624,7 @@ static void smc_wr_init_sge(struct smc_link *lnk) } lnk->wr_rx_ibs[i].next = NULL; lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[x]; - lnk->wr_rx_ibs[i].num_sge = sges_per_buf; + lnk->wr_rx_ibs[i].num_sge = lnk->wr_rx_sge_cnt; } lnk->wr_reg.wr.next = NULL; lnk->wr_reg.wr.num_sge = 0; @@ -655,7 +655,7 @@ void smc_wr_free_link(struct smc_link *lnk) if (lnk->wr_rx_dma_addr) { ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, - SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, + lnk->wr_rx_buflen * lnk->wr_rx_cnt, DMA_FROM_DEVICE); lnk->wr_rx_dma_addr = 0; } @@ -740,13 +740,11 @@ int smc_wr_alloc_lgr_mem(struct smc_link_group *lgr) int smc_wr_alloc_link_mem(struct smc_link *link) { - int sges_per_buf = link->lgr->smc_version == SMC_V2 ? 2 : 1; - /* allocate link related memory */ link->wr_tx_bufs = kcalloc(SMC_WR_BUF_CNT, SMC_WR_BUF_SIZE, GFP_KERNEL); if (!link->wr_tx_bufs) goto no_mem; - link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, SMC_WR_BUF_SIZE, + link->wr_rx_bufs = kcalloc(SMC_WR_BUF_CNT * 3, link->wr_rx_buflen, GFP_KERNEL); if (!link->wr_rx_bufs) goto no_mem_wr_tx_bufs; @@ -774,7 +772,7 @@ int smc_wr_alloc_link_mem(struct smc_link *link) if (!link->wr_tx_sges) goto no_mem_wr_tx_rdma_sges; link->wr_rx_sges = kcalloc(SMC_WR_BUF_CNT * 3, - sizeof(link->wr_rx_sges[0]) * sges_per_buf, + sizeof(link->wr_rx_sges[0]) * link->wr_rx_sge_cnt, GFP_KERNEL); if (!link->wr_rx_sges) goto no_mem_wr_tx_sges; @@ -872,7 +870,7 @@ int smc_wr_create_link(struct smc_link *lnk) smc_wr_tx_set_wr_id(&lnk->wr_tx_id, 0); lnk->wr_rx_id = 0; lnk->wr_rx_dma_addr = ib_dma_map_single( - ibdev, lnk->wr_rx_bufs, SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, + ibdev, lnk->wr_rx_bufs, lnk->wr_rx_buflen * lnk->wr_rx_cnt, DMA_FROM_DEVICE); if (ib_dma_mapping_error(ibdev, lnk->wr_rx_dma_addr)) { lnk->wr_rx_dma_addr = 0; @@ -880,13 +878,15 @@ int smc_wr_create_link(struct smc_link *lnk) goto out; } if (lnk->lgr->smc_version == SMC_V2) { - lnk->wr_rx_v2_dma_addr = ib_dma_map_single(ibdev, - lnk->lgr->wr_rx_buf_v2, SMC_WR_BUF_V2_SIZE, - DMA_FROM_DEVICE); - if (ib_dma_mapping_error(ibdev, lnk->wr_rx_v2_dma_addr)) { - lnk->wr_rx_v2_dma_addr = 0; - rc = -EIO; - goto dma_unmap; + if (smc_link_shared_v2_rxbuf(lnk)) { + lnk->wr_rx_v2_dma_addr = + ib_dma_map_single(ibdev, lnk->lgr->wr_rx_buf_v2, + SMC_WR_BUF_V2_SIZE, DMA_FROM_DEVICE); + if (ib_dma_mapping_error(ibdev, lnk->wr_rx_v2_dma_addr)) { + lnk->wr_rx_v2_dma_addr = 0; + rc = -EIO; + goto dma_unmap; + } } lnk->wr_tx_v2_dma_addr = ib_dma_map_single(ibdev, lnk->lgr->wr_tx_buf_v2, SMC_WR_BUF_V2_SIZE, @@ -935,7 +935,7 @@ dma_unmap: lnk->wr_tx_v2_dma_addr = 0; } ib_dma_unmap_single(ibdev, lnk->wr_rx_dma_addr, - SMC_WR_BUF_SIZE * lnk->wr_rx_cnt, + lnk->wr_rx_buflen * lnk->wr_rx_cnt, DMA_FROM_DEVICE); lnk->wr_rx_dma_addr = 0; out: From c12b2704a678b8a116eeb03f5b91895b90b4dd6f Mon Sep 17 00:00:00 2001 From: Guangguan Wang Date: Wed, 11 Dec 2024 10:30:55 +0800 Subject: [PATCH 0369/1386] net/smc: support ipv4 mapped ipv6 addr client for smc-r v2 AF_INET6 is not supported for smc-r v2 client before, even if the ipv6 addr is ipv4 mapped. Thus, when using AF_INET6, smc-r connection will fallback to tcp, especially for java applications running smc-r. This patch support ipv4 mapped ipv6 addr client for smc-r v2. Clients using real global ipv6 addr is still not supported yet. Signed-off-by: Guangguan Wang Reviewed-by: Wen Gu Reviewed-by: Dust Li Reviewed-by: D. Wythe Reviewed-by: Wenjia Zhang Reviewed-by: Halil Pasic Signed-off-by: Paolo Abeni --- net/smc/af_smc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 9e6c69d18581..19ebff1c2579 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1117,7 +1117,10 @@ static int smc_find_proposal_devices(struct smc_sock *smc, ini->check_smcrv2 = true; ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr; if (!(ini->smcr_version & SMC_V2) || - smc->clcsock->sk->sk_family != AF_INET || +#if IS_ENABLED(CONFIG_IPV6) + (smc->clcsock->sk->sk_family == AF_INET6 && + !ipv6_addr_v4mapped(&smc->clcsock->sk->sk_v6_rcv_saddr)) || +#endif !smc_clc_ueid_count() || smc_find_rdma_device(smc, ini)) ini->smcr_version &= ~SMC_V2; From 73e456b402faddf354ff587a859121163709ad2d Mon Sep 17 00:00:00 2001 From: Liu Jing Date: Mon, 9 Dec 2024 14:24:25 +0800 Subject: [PATCH 0370/1386] wifi: qtnfmac: fix spelling error in core.h Fix specific spelling error in core.h. Signed-off-by: Liu Jing Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241209062425.4139-1-liujing@cmss.chinamobile.com --- drivers/net/wireless/quantenna/qtnfmac/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.h b/drivers/net/wireless/quantenna/qtnfmac/core.h index b375a4751580..a377d85c2451 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/core.h +++ b/drivers/net/wireless/quantenna/qtnfmac/core.h @@ -102,7 +102,7 @@ struct qtnf_wmac { struct qtnf_mac_info macinfo; struct qtnf_vif iflist[QTNF_MAX_INTF]; struct cfg80211_scan_request *scan_req; - struct mutex mac_lock; /* lock during wmac speicific ops */ + struct mutex mac_lock; /* lock during wmac specific ops */ struct delayed_work scan_timeout; struct ieee80211_regdomain *rd; struct platform_device *pdev; From 5a10971c7645a95f5d5dc23c26fbac4bf61801d0 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Tue, 10 Dec 2024 10:56:33 +0530 Subject: [PATCH 0371/1386] wifi: ath12k: fix read pointer after free in ath12k_mac_assign_vif_to_vdev() In ath12k_mac_assign_vif_to_vdev(), if arvif is created on a different radio, it gets deleted from that radio through a call to ath12k_mac_unassign_link_vif(). This action frees the arvif pointer. Subsequently, there is a check involving arvif, which will result in a read-after-free scenario. Fix this by moving this check after arvif is again assigned via call to ath12k_mac_assign_link_vif(). Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Closes: https://scan5.scan.coverity.com/#/project-view/63541/10063?selectedIssue=1636423 Fixes: b5068bc9180d ("wifi: ath12k: Cache vdev configs before vdev create") Signed-off-by: Aditya Kumar Singh Acked-by: Jeff Johnson Acked-by: Kalle Valo Link: https://patch.msgid.link/20241210-read_after_free-v1-1-969f69c7d66c@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index c4eab4c1c10e..47a80d28d1d7 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -8064,9 +8064,6 @@ static struct ath12k *ath12k_mac_assign_vif_to_vdev(struct ieee80211_hw *hw, ab = ar->ab; - if (arvif->is_created) - goto flush; - /* Assign arvif again here since previous radio switch block * would've unassigned and cleared it. */ @@ -8077,6 +8074,9 @@ static struct ath12k *ath12k_mac_assign_vif_to_vdev(struct ieee80211_hw *hw, goto unlock; } + if (arvif->is_created) + goto flush; + if (ar->num_created_vdevs > (TARGET_NUM_VDEVS - 1)) { ath12k_warn(ab, "failed to create vdev, reached max vdev limit %d\n", TARGET_NUM_VDEVS); From a42d71e322a8066dcfa228ce8529bb073c521ae9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Wed, 11 Dec 2024 11:17:09 +0100 Subject: [PATCH 0372/1386] net_sched: sch_cake: Add drop reasons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add three qdisc-specific drop reasons and use them in sch_cake: 1) SKB_DROP_REASON_QDISC_OVERLIMIT Whenever the total queue limit for a qdisc instance is exceeded and a packet is dropped to make room. 2) SKB_DROP_REASON_QDISC_CONGESTED Whenever a packet is dropped by the qdisc AQM algorithm because congestion is detected. 3) SKB_DROP_REASON_CAKE_FLOOD Whenever a packet is dropped by the flood protection part of the CAKE AQM algorithm (BLUE). Also use the existing SKB_DROP_REASON_QUEUE_PURGE in cake_clear_tin(). Reasons show up as: perf record -a -e skb:kfree_skb sleep 1; perf script iperf3 665 [005] 848.656964: skb:kfree_skb: skbaddr=0xffff98168a333500 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x10f0 reason: QDISC_OVERLIMIT swapper 0 [001] 909.166055: skb:kfree_skb: skbaddr=0xffff98168280cee0 rx_sk=(nil) protocol=34525 location=cake_dequeue+0x5ef reason: QDISC_CONGESTED Reviewed-by: Eric Dumazet Reviewed-by: Jamal Hadi Salim Acked-by: Dave Taht Signed-off-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241211-cake-drop-reason-v2-1-920afadf4d1b@redhat.com Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 18 +++++++++++++++ net/sched/sch_cake.c | 43 +++++++++++++++++++---------------- 2 files changed, 41 insertions(+), 20 deletions(-) diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index c29282fabae6..ead4170a1d0a 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -58,6 +58,9 @@ FN(TC_EGRESS) \ FN(SECURITY_HOOK) \ FN(QDISC_DROP) \ + FN(QDISC_OVERLIMIT) \ + FN(QDISC_CONGESTED) \ + FN(CAKE_FLOOD) \ FN(FQ_BAND_LIMIT) \ FN(FQ_HORIZON_LIMIT) \ FN(FQ_FLOW_LIMIT) \ @@ -314,6 +317,21 @@ enum skb_drop_reason { * failed to enqueue to current qdisc) */ SKB_DROP_REASON_QDISC_DROP, + /** + * @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc + * instance exceeds its total buffer size limit. + */ + SKB_DROP_REASON_QDISC_OVERLIMIT, + /** + * @SKB_DROP_REASON_QDISC_CONGESTED: dropped by a qdisc AQM algorithm + * due to congestion. + */ + SKB_DROP_REASON_QDISC_CONGESTED, + /** + * @SKB_DROP_REASON_CAKE_FLOOD: dropped by the flood protection part of + * CAKE qdisc AQM algorithm (BLUE). + */ + SKB_DROP_REASON_CAKE_FLOOD, /** * @SKB_DROP_REASON_FQ_BAND_LIMIT: dropped by fq qdisc when per band * limit is reached. diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 8d8b2db4653c..deb0925f536d 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -484,13 +484,14 @@ static bool cobalt_queue_empty(struct cobalt_vars *vars, /* Call this with a freshly dequeued packet for possible congestion marking. * Returns true as an instruction to drop the packet, false for delivery. */ -static bool cobalt_should_drop(struct cobalt_vars *vars, - struct cobalt_params *p, - ktime_t now, - struct sk_buff *skb, - u32 bulk_flows) +static enum skb_drop_reason cobalt_should_drop(struct cobalt_vars *vars, + struct cobalt_params *p, + ktime_t now, + struct sk_buff *skb, + u32 bulk_flows) { - bool next_due, over_target, drop = false; + enum skb_drop_reason reason = SKB_NOT_DROPPED_YET; + bool next_due, over_target; ktime_t schedule; u64 sojourn; @@ -533,7 +534,8 @@ static bool cobalt_should_drop(struct cobalt_vars *vars, if (next_due && vars->dropping) { /* Use ECN mark if possible, otherwise drop */ - drop = !(vars->ecn_marked = INET_ECN_set_ce(skb)); + if (!(vars->ecn_marked = INET_ECN_set_ce(skb))) + reason = SKB_DROP_REASON_QDISC_CONGESTED; vars->count++; if (!vars->count) @@ -556,16 +558,17 @@ static bool cobalt_should_drop(struct cobalt_vars *vars, } /* Simple BLUE implementation. Lack of ECN is deliberate. */ - if (vars->p_drop) - drop |= (get_random_u32() < vars->p_drop); + if (vars->p_drop && reason == SKB_NOT_DROPPED_YET && + get_random_u32() < vars->p_drop) + reason = SKB_DROP_REASON_CAKE_FLOOD; /* Overload the drop_next field as an activity timeout */ if (!vars->count) vars->drop_next = ktime_add_ns(now, p->interval); - else if (ktime_to_ns(schedule) > 0 && !drop) + else if (ktime_to_ns(schedule) > 0 && reason == SKB_NOT_DROPPED_YET) vars->drop_next = now; - return drop; + return reason; } static bool cake_update_flowkeys(struct flow_keys *keys, @@ -1528,12 +1531,11 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) flow->dropped++; b->tin_dropped++; - sch->qstats.drops++; if (q->rate_flags & CAKE_FLAG_INGRESS) cake_advance_shaper(q, b, skb, now, true); - __qdisc_drop(skb, to_free); + qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT); sch->q.qlen--; qdisc_tree_reduce_backlog(sch, 1, len); @@ -1926,7 +1928,7 @@ static void cake_clear_tin(struct Qdisc *sch, u16 tin) q->cur_tin = tin; for (q->cur_flow = 0; q->cur_flow < CAKE_QUEUES; q->cur_flow++) while (!!(skb = cake_dequeue_one(sch))) - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_QUEUE_PURGE); } static struct sk_buff *cake_dequeue(struct Qdisc *sch) @@ -1934,6 +1936,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) struct cake_sched_data *q = qdisc_priv(sch); struct cake_tin_data *b = &q->tins[q->cur_tin]; struct cake_host *srchost, *dsthost; + enum skb_drop_reason reason; ktime_t now = ktime_get(); struct cake_flow *flow; struct list_head *head; @@ -2143,12 +2146,12 @@ retry: goto begin; } + reason = cobalt_should_drop(&flow->cvars, &b->cparams, now, skb, + (b->bulk_flow_count * + !!(q->rate_flags & + CAKE_FLAG_INGRESS))); /* Last packet in queue may be marked, shouldn't be dropped */ - if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb, - (b->bulk_flow_count * - !!(q->rate_flags & - CAKE_FLAG_INGRESS))) || - !flow->head) + if (reason == SKB_NOT_DROPPED_YET || !flow->head) break; /* drop this packet, get another one */ @@ -2162,7 +2165,7 @@ retry: b->tin_dropped++; qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); qdisc_qstats_drop(sch); - kfree_skb(skb); + kfree_skb_reason(skb, reason); if (q->rate_flags & CAKE_FLAG_INGRESS) goto retry; } From fcc680a647ba77370480fe753664cc10d572b240 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 11 Dec 2024 18:26:38 +0100 Subject: [PATCH 0373/1386] page_pool: allow mixing PPs within one bulk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The main reason for this change was to allow mixing pages from different &page_pools within one &xdp_buff/&xdp_frame. Why not? With stuff like devmem and io_uring zerocopy Rx, it's required to have separate PPs for header buffers and payload buffers. Adjust xdp_return_frame_bulk() and page_pool_put_netmem_bulk(), so that they won't be tied to a particular pool. Let the latter create a separate bulk of pages which's PP is different from the first netmem of the bulk and process it after the main loop. This greatly optimizes xdp_return_frame_bulk(): no more hashtable lookups and forced flushes on PP mismatch. Also make xdp_flush_frame_bulk() inline, as it's just one if + function call + one u32 read, not worth extending the call ladder. Co-developed-by: Toke Høiland-Jørgensen # iterative Signed-off-by: Toke Høiland-Jørgensen Suggested-by: Jakub Kicinski # while (count) Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241211172649.761483-2-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 6 +- include/net/xdp.h | 16 +++-- net/core/page_pool.c | 109 ++++++++++++++++++++++------------ net/core/xdp.c | 29 +-------- 4 files changed, 87 insertions(+), 73 deletions(-) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 1ea16b0e9c79..05a864031271 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -259,8 +259,7 @@ void page_pool_disable_direct_recycling(struct page_pool *pool); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), const struct xdp_mem_info *mem); -void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data, - u32 count); +void page_pool_put_netmem_bulk(netmem_ref *data, u32 count); #else static inline void page_pool_destroy(struct page_pool *pool) { @@ -272,8 +271,7 @@ static inline void page_pool_use_xdp_mem(struct page_pool *pool, { } -static inline void page_pool_put_netmem_bulk(struct page_pool *pool, - netmem_ref *data, u32 count) +static inline void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) { } #endif diff --git a/include/net/xdp.h b/include/net/xdp.h index f4020b29122f..9e7eb8223513 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -11,6 +11,8 @@ #include #include /* skb_shared_info */ +#include + /** * DOC: XDP RX-queue information * @@ -193,14 +195,12 @@ xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame) #define XDP_BULK_QUEUE_SIZE 16 struct xdp_frame_bulk { int count; - void *xa; netmem_ref q[XDP_BULK_QUEUE_SIZE]; }; static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) { - /* bq->count will be zero'ed when bq->xa gets updated */ - bq->xa = NULL; + bq->count = 0; } static inline struct skb_shared_info * @@ -317,10 +317,18 @@ void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); -void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq); +static inline void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) +{ + if (unlikely(!bq->count)) + return; + + page_pool_put_netmem_bulk(bq->q, bq->count); + bq->count = 0; +} + static __always_inline unsigned int xdp_get_frame_len(const struct xdp_frame *xdpf) { diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 4c85b77cfdac..8292e3edbbfd 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -839,9 +839,41 @@ void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, } EXPORT_SYMBOL(page_pool_put_unrefed_page); +static void page_pool_recycle_ring_bulk(struct page_pool *pool, + netmem_ref *bulk, + u32 bulk_len) +{ + bool in_softirq; + u32 i; + + /* Bulk produce into ptr_ring page_pool cache */ + in_softirq = page_pool_producer_lock(pool); + + for (i = 0; i < bulk_len; i++) { + if (__ptr_ring_produce(&pool->ring, (__force void *)bulk[i])) { + /* ring full */ + recycle_stat_inc(pool, ring_full); + break; + } + } + + page_pool_producer_unlock(pool, in_softirq); + recycle_stat_add(pool, ring, i); + + /* Hopefully all pages were returned into ptr_ring */ + if (likely(i == bulk_len)) + return; + + /* + * ptr_ring cache is full, free remaining pages outside producer lock + * since put_page() with refcnt == 1 can be an expensive operation. + */ + for (; i < bulk_len; i++) + page_pool_return_page(pool, bulk[i]); +} + /** * page_pool_put_netmem_bulk() - release references on multiple netmems - * @pool: pool from which pages were allocated * @data: array holding netmem references * @count: number of entries in @data * @@ -854,52 +886,55 @@ EXPORT_SYMBOL(page_pool_put_unrefed_page); * Please note the caller must not use data area after running * page_pool_put_netmem_bulk(), as this function overwrites it. */ -void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data, - u32 count) +void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) { - int i, bulk_len = 0; - bool allow_direct; - bool in_softirq; + u32 bulk_len = 0; - allow_direct = page_pool_napi_local(pool); - - for (i = 0; i < count; i++) { + for (u32 i = 0; i < count; i++) { netmem_ref netmem = netmem_compound_head(data[i]); - /* It is not the last user for the page frag case */ - if (!page_pool_is_last_ref(netmem)) - continue; - - netmem = __page_pool_put_page(pool, netmem, -1, allow_direct); - /* Approved for bulk recycling in ptr_ring cache */ - if (netmem) + if (page_pool_is_last_ref(netmem)) data[bulk_len++] = netmem; } - if (!bulk_len) - return; + count = bulk_len; + while (count) { + netmem_ref bulk[XDP_BULK_QUEUE_SIZE]; + struct page_pool *pool = NULL; + bool allow_direct; + u32 foreign = 0; - /* Bulk producer into ptr_ring page_pool cache */ - in_softirq = page_pool_producer_lock(pool); - for (i = 0; i < bulk_len; i++) { - if (__ptr_ring_produce(&pool->ring, (__force void *)data[i])) { - /* ring full */ - recycle_stat_inc(pool, ring_full); - break; + bulk_len = 0; + + for (u32 i = 0; i < count; i++) { + struct page_pool *netmem_pp; + netmem_ref netmem = data[i]; + + netmem_pp = netmem_get_pp(netmem); + if (unlikely(!pool)) { + pool = netmem_pp; + allow_direct = page_pool_napi_local(pool); + } else if (netmem_pp != pool) { + /* + * If the netmem belongs to a different + * page_pool, save it for another round. + */ + data[foreign++] = netmem; + continue; + } + + netmem = __page_pool_put_page(pool, netmem, -1, + allow_direct); + /* Approved for bulk recycling in ptr_ring cache */ + if (netmem) + bulk[bulk_len++] = netmem; } + + if (bulk_len) + page_pool_recycle_ring_bulk(pool, bulk, bulk_len); + + count = foreign; } - recycle_stat_add(pool, ring, i); - page_pool_producer_unlock(pool, in_softirq); - - /* Hopefully all pages was return into ptr_ring */ - if (likely(i == bulk_len)) - return; - - /* ptr_ring cache full, free remaining pages outside producer lock - * since put_page() with refcnt == 1 can be an expensive operation - */ - for (; i < bulk_len; i++) - page_pool_return_page(pool, data[i]); } EXPORT_SYMBOL(page_pool_put_netmem_bulk); diff --git a/net/core/xdp.c b/net/core/xdp.c index 938ad15c9857..56127e8ec85f 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -511,46 +511,19 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); * xdp_frame_bulk is usually stored/allocated on the function * call-stack to avoid locking penalties. */ -void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) -{ - struct xdp_mem_allocator *xa = bq->xa; - - if (unlikely(!xa || !bq->count)) - return; - - page_pool_put_netmem_bulk(xa->page_pool, bq->q, bq->count); - /* bq->xa is not cleared to save lookup, if mem.id same in next bulk */ - bq->count = 0; -} -EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk); /* Must be called with rcu_read_lock held */ void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq) { - struct xdp_mem_info *mem = &xdpf->mem; - struct xdp_mem_allocator *xa; - - if (mem->type != MEM_TYPE_PAGE_POOL) { + if (xdpf->mem.type != MEM_TYPE_PAGE_POOL) { xdp_return_frame(xdpf); return; } - xa = bq->xa; - if (unlikely(!xa)) { - xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - bq->count = 0; - bq->xa = xa; - } - if (bq->count == XDP_BULK_QUEUE_SIZE) xdp_flush_frame_bulk(bq); - if (unlikely(mem->id != xa->mem.id)) { - xdp_flush_frame_bulk(bq); - bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - } - if (unlikely(xdp_frame_has_frags(xdpf))) { struct skb_shared_info *sinfo; int i; From 56d95b0adfa224bb1c67733dbcad30dd8debd39e Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 11 Dec 2024 18:26:39 +0100 Subject: [PATCH 0374/1386] xdp: get rid of xdp_frame::mem.id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initially, xdp_frame::mem.id was used to search for the corresponding &page_pool to return the page correctly. However, after that struct page was extended to have a direct pointer to its PP (netmem has it as well), further keeping of this field makes no sense. xdp_return_frame_bulk() still used it to do a lookup, and this leftover is now removed. Remove xdp_frame::mem and replace it with ::mem_type, as only memory type still matters and we need to know it to be able to free the frame correctly. As a cute side effect, we can now make every scalar field in &xdp_frame of 4 byte width, speeding up accesses to them. Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241211172649.761483-3-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/freescale/dpaa/dpaa_eth.c | 2 +- drivers/net/veth.c | 4 +-- include/net/xdp.h | 14 +++++----- kernel/bpf/cpumap.c | 2 +- net/bpf/test_run.c | 4 +-- net/core/filter.c | 12 ++++---- net/core/xdp.c | 28 +++++++++---------- 7 files changed, 33 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index bf5baef5c3e0..4948b4906584 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2281,7 +2281,7 @@ static int dpaa_a050385_wa_xdpf(struct dpaa_priv *priv, new_xdpf->len = xdpf->len; new_xdpf->headroom = priv->tx_headroom; new_xdpf->frame_sz = DPAA_BP_RAW_SIZE; - new_xdpf->mem.type = MEM_TYPE_PAGE_ORDER0; + new_xdpf->mem_type = MEM_TYPE_PAGE_ORDER0; /* Release the initial buffer */ xdp_return_frame_rx_napi(xdpf); diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 07ebb800edf1..01251868a9c2 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -634,7 +634,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq, break; case XDP_TX: orig_frame = *frame; - xdp->rxq->mem = frame->mem; + xdp->rxq->mem.type = frame->mem_type; if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) { trace_xdp_exception(rq->dev, xdp_prog, act); frame = &orig_frame; @@ -646,7 +646,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq, goto xdp_xmit; case XDP_REDIRECT: orig_frame = *frame; - xdp->rxq->mem = frame->mem; + xdp->rxq->mem.type = frame->mem_type; if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { frame = &orig_frame; stats->rx_drops++; diff --git a/include/net/xdp.h b/include/net/xdp.h index 9e7eb8223513..1c260869a353 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -169,13 +169,13 @@ out: struct xdp_frame { void *data; - u16 len; - u16 headroom; + u32 len; + u32 headroom; u32 metasize; /* uses lower 8-bits */ /* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time, - * while mem info is valid on remote CPU. + * while mem_type is valid on remote CPU. */ - struct xdp_mem_info mem; + enum xdp_mem_type mem_type:32; struct net_device *dev_rx; /* used by cpumap */ u32 frame_sz; u32 flags; /* supported values defined in xdp_buff_flags */ @@ -306,13 +306,13 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) if (unlikely(xdp_update_frame_from_buff(xdp, xdp_frame) < 0)) return NULL; - /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */ - xdp_frame->mem = xdp->rxq->mem; + /* rxq only valid until napi_schedule ends, convert to xdp_mem_type */ + xdp_frame->mem_type = xdp->rxq->mem.type; return xdp_frame; } -void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, +void __xdp_return(void *data, enum xdp_mem_type mem_type, bool napi_direct, struct xdp_buff *xdp); void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index a2f46785ac3b..774accbd4a22 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -190,7 +190,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, int err; rxq.dev = xdpf->dev_rx; - rxq.mem = xdpf->mem; + rxq.mem.type = xdpf->mem_type; /* TODO: report queue_index to xdp_rxq_info */ xdp_convert_frame_to_buff(xdpf, &xdp); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 501ec4249fed..9ae2a7f1738b 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -153,7 +153,7 @@ static void xdp_test_run_init_page(netmem_ref netmem, void *arg) new_ctx->data = new_ctx->data_meta + meta_len; xdp_update_frame_from_buff(new_ctx, frm); - frm->mem = new_ctx->rxq->mem; + frm->mem_type = new_ctx->rxq->mem.type; memcpy(&head->orig_ctx, new_ctx, sizeof(head->orig_ctx)); } @@ -246,7 +246,7 @@ static void reset_ctx(struct xdp_page_head *head) head->ctx.data_meta = head->orig_ctx.data_meta; head->ctx.data_end = head->orig_ctx.data_end; xdp_update_frame_from_buff(&head->ctx, head->frame); - head->frame->mem = head->orig_ctx.rxq->mem; + head->frame->mem_type = head->orig_ctx.rxq->mem.type; } static int xdp_recv_frames(struct xdp_frame **frames, int nframes, diff --git a/net/core/filter.c b/net/core/filter.c index fac245065b0a..6c036708634b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4119,13 +4119,13 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset) } static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink, - struct xdp_mem_info *mem_info, bool release) + enum xdp_mem_type mem_type, bool release) { struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp); if (release) { xsk_buff_del_tail(zc_frag); - __xdp_return(NULL, mem_info, false, zc_frag); + __xdp_return(NULL, mem_type, false, zc_frag); } else { zc_frag->data_end -= shrink; } @@ -4134,18 +4134,18 @@ static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink, static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag, int shrink) { - struct xdp_mem_info *mem_info = &xdp->rxq->mem; + enum xdp_mem_type mem_type = xdp->rxq->mem.type; bool release = skb_frag_size(frag) == shrink; - if (mem_info->type == MEM_TYPE_XSK_BUFF_POOL) { - bpf_xdp_shrink_data_zc(xdp, shrink, mem_info, release); + if (mem_type == MEM_TYPE_XSK_BUFF_POOL) { + bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release); goto out; } if (release) { struct page *page = skb_frag_page(frag); - __xdp_return(page_address(page), mem_info, false, NULL); + __xdp_return(page_address(page), mem_type, false, NULL); } out: diff --git a/net/core/xdp.c b/net/core/xdp.c index 56127e8ec85f..d367571c5838 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -430,12 +430,12 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_attach_page_pool); * is used for those calls sites. Thus, allowing for faster recycling * of xdp_frames/pages in those cases. */ -void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, +void __xdp_return(void *data, enum xdp_mem_type mem_type, bool napi_direct, struct xdp_buff *xdp) { struct page *page; - switch (mem->type) { + switch (mem_type) { case MEM_TYPE_PAGE_POOL: page = virt_to_head_page(data); if (napi_direct && xdp_return_frame_no_direct()) @@ -458,7 +458,7 @@ void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, break; default: /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ - WARN(1, "Incorrect XDP memory type (%d) usage", mem->type); + WARN(1, "Incorrect XDP memory type (%d) usage", mem_type); break; } } @@ -475,10 +475,10 @@ void xdp_return_frame(struct xdp_frame *xdpf) for (i = 0; i < sinfo->nr_frags; i++) { struct page *page = skb_frag_page(&sinfo->frags[i]); - __xdp_return(page_address(page), &xdpf->mem, false, NULL); + __xdp_return(page_address(page), xdpf->mem_type, false, NULL); } out: - __xdp_return(xdpf->data, &xdpf->mem, false, NULL); + __xdp_return(xdpf->data, xdpf->mem_type, false, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame); @@ -494,10 +494,10 @@ void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) for (i = 0; i < sinfo->nr_frags; i++) { struct page *page = skb_frag_page(&sinfo->frags[i]); - __xdp_return(page_address(page), &xdpf->mem, true, NULL); + __xdp_return(page_address(page), xdpf->mem_type, true, NULL); } out: - __xdp_return(xdpf->data, &xdpf->mem, true, NULL); + __xdp_return(xdpf->data, xdpf->mem_type, true, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); @@ -516,7 +516,7 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq) { - if (xdpf->mem.type != MEM_TYPE_PAGE_POOL) { + if (xdpf->mem_type != MEM_TYPE_PAGE_POOL) { xdp_return_frame(xdpf); return; } @@ -553,10 +553,11 @@ void xdp_return_buff(struct xdp_buff *xdp) for (i = 0; i < sinfo->nr_frags; i++) { struct page *page = skb_frag_page(&sinfo->frags[i]); - __xdp_return(page_address(page), &xdp->rxq->mem, true, xdp); + __xdp_return(page_address(page), xdp->rxq->mem.type, true, + xdp); } out: - __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp); + __xdp_return(xdp->data, xdp->rxq->mem.type, true, xdp); } EXPORT_SYMBOL_GPL(xdp_return_buff); @@ -602,7 +603,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp) xdpf->headroom = 0; xdpf->metasize = metasize; xdpf->frame_sz = PAGE_SIZE; - xdpf->mem.type = MEM_TYPE_PAGE_ORDER0; + xdpf->mem_type = MEM_TYPE_PAGE_ORDER0; xsk_buff_free(xdp); return xdpf; @@ -672,7 +673,7 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, * - RX ring dev queue index (skb_record_rx_queue) */ - if (xdpf->mem.type == MEM_TYPE_PAGE_POOL) + if (xdpf->mem_type == MEM_TYPE_PAGE_POOL) skb_mark_for_recycle(skb); /* Allow SKB to reuse area used by xdp_frame */ @@ -719,8 +720,7 @@ struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf) nxdpf = addr; nxdpf->data = addr + headroom; nxdpf->frame_sz = PAGE_SIZE; - nxdpf->mem.type = MEM_TYPE_PAGE_ORDER0; - nxdpf->mem.id = 0; + nxdpf->mem_type = MEM_TYPE_PAGE_ORDER0; return nxdpf; } From 207ff83cecaeaacf0d47c8ccbe927c8354ac1280 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 11 Dec 2024 18:26:40 +0100 Subject: [PATCH 0375/1386] xdp: make __xdp_return() MP-agnostic Currently, __xdp_return() takes pointer to the virtual memory to free a buffer. Apart from that this sometimes provokes redundant data <--> page conversions, taking data pointer effectively prevents lots of XDP code to support non-page-backed buffers, as there's no mapping for the non-host memory (data is always NULL). Just convert it to always take netmem reference. For xdp_return_{buff,frame*}(), this chops off one page_address() per each frag and adds one virt_to_netmem() (same as virt_to_page()) per header buffer. For __xdp_return() itself, it removes one virt_to_page() for MEM_TYPE_PAGE_POOL and another one for MEM_TYPE_PAGE_ORDER0, adding one page_address() for [not really common nowadays] MEM_TYPE_PAGE_SHARED, but the main effect is that the abovementioned functions won't die or memleak anymore if the frame has non-host memory attached and will correctly free those. Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241211172649.761483-4-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/xdp.h | 4 ++-- net/core/filter.c | 9 +++------ net/core/xdp.c | 47 +++++++++++++++++++---------------------------- 3 files changed, 24 insertions(+), 36 deletions(-) diff --git a/include/net/xdp.h b/include/net/xdp.h index 1c260869a353..d2089cfecefd 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -312,8 +312,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) return xdp_frame; } -void __xdp_return(void *data, enum xdp_mem_type mem_type, bool napi_direct, - struct xdp_buff *xdp); +void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type, + bool napi_direct, struct xdp_buff *xdp); void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); diff --git a/net/core/filter.c b/net/core/filter.c index 6c036708634b..5fea874025d3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4125,7 +4125,7 @@ static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink, if (release) { xsk_buff_del_tail(zc_frag); - __xdp_return(NULL, mem_type, false, zc_frag); + __xdp_return(0, mem_type, false, zc_frag); } else { zc_frag->data_end -= shrink; } @@ -4142,11 +4142,8 @@ static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag, goto out; } - if (release) { - struct page *page = skb_frag_page(frag); - - __xdp_return(page_address(page), mem_type, false, NULL); - } + if (release) + __xdp_return(skb_frag_netmem(frag), mem_type, false, NULL); out: return release; diff --git a/net/core/xdp.c b/net/core/xdp.c index d367571c5838..f1165a35411b 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -430,27 +430,25 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_attach_page_pool); * is used for those calls sites. Thus, allowing for faster recycling * of xdp_frames/pages in those cases. */ -void __xdp_return(void *data, enum xdp_mem_type mem_type, bool napi_direct, - struct xdp_buff *xdp) +void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type, + bool napi_direct, struct xdp_buff *xdp) { - struct page *page; - switch (mem_type) { case MEM_TYPE_PAGE_POOL: - page = virt_to_head_page(data); + netmem = netmem_compound_head(netmem); if (napi_direct && xdp_return_frame_no_direct()) napi_direct = false; /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) * as mem->type knows this a page_pool page */ - page_pool_put_full_page(page->pp, page, napi_direct); + page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, + napi_direct); break; case MEM_TYPE_PAGE_SHARED: - page_frag_free(data); + page_frag_free(__netmem_address(netmem)); break; case MEM_TYPE_PAGE_ORDER0: - page = virt_to_page(data); /* Assumes order0 page*/ - put_page(page); + put_page(__netmem_to_page(netmem)); break; case MEM_TYPE_XSK_BUFF_POOL: /* NB! Only valid from an xdp_buff! */ @@ -466,38 +464,34 @@ void __xdp_return(void *data, enum xdp_mem_type mem_type, bool napi_direct, void xdp_return_frame(struct xdp_frame *xdpf) { struct skb_shared_info *sinfo; - int i; if (likely(!xdp_frame_has_frags(xdpf))) goto out; sinfo = xdp_get_shared_info_from_frame(xdpf); - for (i = 0; i < sinfo->nr_frags; i++) { - struct page *page = skb_frag_page(&sinfo->frags[i]); + for (u32 i = 0; i < sinfo->nr_frags; i++) + __xdp_return(skb_frag_netmem(&sinfo->frags[i]), xdpf->mem_type, + false, NULL); - __xdp_return(page_address(page), xdpf->mem_type, false, NULL); - } out: - __xdp_return(xdpf->data, xdpf->mem_type, false, NULL); + __xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, false, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) { struct skb_shared_info *sinfo; - int i; if (likely(!xdp_frame_has_frags(xdpf))) goto out; sinfo = xdp_get_shared_info_from_frame(xdpf); - for (i = 0; i < sinfo->nr_frags; i++) { - struct page *page = skb_frag_page(&sinfo->frags[i]); + for (u32 i = 0; i < sinfo->nr_frags; i++) + __xdp_return(skb_frag_netmem(&sinfo->frags[i]), xdpf->mem_type, + true, NULL); - __xdp_return(page_address(page), xdpf->mem_type, true, NULL); - } out: - __xdp_return(xdpf->data, xdpf->mem_type, true, NULL); + __xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, true, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); @@ -544,20 +538,17 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); void xdp_return_buff(struct xdp_buff *xdp) { struct skb_shared_info *sinfo; - int i; if (likely(!xdp_buff_has_frags(xdp))) goto out; sinfo = xdp_get_shared_info_from_buff(xdp); - for (i = 0; i < sinfo->nr_frags; i++) { - struct page *page = skb_frag_page(&sinfo->frags[i]); + for (u32 i = 0; i < sinfo->nr_frags; i++) + __xdp_return(skb_frag_netmem(&sinfo->frags[i]), + xdp->rxq->mem.type, true, xdp); - __xdp_return(page_address(page), xdp->rxq->mem.type, true, - xdp); - } out: - __xdp_return(xdp->data, xdp->rxq->mem.type, true, xdp); + __xdp_return(virt_to_netmem(xdp->data), xdp->rxq->mem.type, true, xdp); } EXPORT_SYMBOL_GPL(xdp_return_buff); From 0dffdb3b3366c932fb7d210f5032476c552f7000 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 11 Dec 2024 18:26:47 +0100 Subject: [PATCH 0376/1386] skbuff: allow 2-4-argument skb_frag_dma_map() skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE) is repeated across dozens of drivers and really wants a shorthand. Add a macro which will count args and handle all possible number from 2 to 5. Semantics: skb_frag_dma_map(dev, frag) -> __skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE) skb_frag_dma_map(dev, frag, offset) -> __skb_frag_dma_map(dev, frag, offset, skb_frag_size(frag) - offset, DMA_TO_DEVICE) skb_frag_dma_map(dev, frag, offset, size) -> __skb_frag_dma_map(dev, frag, offset, size, DMA_TO_DEVICE) skb_frag_dma_map(dev, frag, offset, size, dir) -> __skb_frag_dma_map(dev, frag, offset, size, dir) No object code size changes for the existing callers. Users passing less arguments also won't have bigger size comparing to the full equivalent call. Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241211172649.761483-11-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 69624b394cd9..b2509cd0b930 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3674,7 +3674,7 @@ static inline void skb_frag_page_copy(skb_frag_t *fragto, bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); /** - * skb_frag_dma_map - maps a paged fragment via the DMA API + * __skb_frag_dma_map - maps a paged fragment via the DMA API * @dev: the device to map the fragment to * @frag: the paged fragment to map * @offset: the offset within the fragment (starting at the @@ -3684,15 +3684,36 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); * * Maps the page associated with @frag to @device. */ -static inline dma_addr_t skb_frag_dma_map(struct device *dev, - const skb_frag_t *frag, - size_t offset, size_t size, - enum dma_data_direction dir) +static inline dma_addr_t __skb_frag_dma_map(struct device *dev, + const skb_frag_t *frag, + size_t offset, size_t size, + enum dma_data_direction dir) { return dma_map_page(dev, skb_frag_page(frag), skb_frag_off(frag) + offset, size, dir); } +#define skb_frag_dma_map(dev, frag, ...) \ + CONCATENATE(_skb_frag_dma_map, \ + COUNT_ARGS(__VA_ARGS__))(dev, frag, ##__VA_ARGS__) + +#define __skb_frag_dma_map1(dev, frag, offset, uf, uo) ({ \ + const skb_frag_t *uf = (frag); \ + size_t uo = (offset); \ + \ + __skb_frag_dma_map(dev, uf, uo, skb_frag_size(uf) - uo, \ + DMA_TO_DEVICE); \ +}) +#define _skb_frag_dma_map1(dev, frag, offset) \ + __skb_frag_dma_map1(dev, frag, offset, __UNIQUE_ID(frag_), \ + __UNIQUE_ID(offset_)) +#define _skb_frag_dma_map0(dev, frag) \ + _skb_frag_dma_map1(dev, frag, 0) +#define _skb_frag_dma_map2(dev, frag, offset, size) \ + __skb_frag_dma_map(dev, frag, offset, size, DMA_TO_DEVICE) +#define _skb_frag_dma_map3(dev, frag, offset, size, dir) \ + __skb_frag_dma_map(dev, frag, offset, size, dir) + static inline struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) { From 91a152cbb49c26609d217cf2f116d46143b9b8be Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 11 Dec 2024 21:20:28 +0000 Subject: [PATCH 0377/1386] net: page_pool: rename page_pool_alloc_netmem to *_netmems page_pool_alloc_netmem (without an s) was the mirror of page_pool_alloc_pages (with an s), which was confusing. Rename to page_pool_alloc_netmems so it's the mirror of page_pool_alloc_pages. Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20241211212033.1684197-2-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 2 +- net/core/page_pool.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 05a864031271..3270c92841b4 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -242,7 +242,7 @@ struct page_pool { }; struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); -netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp); +netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp); struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, unsigned int size, gfp_t gfp); netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 8292e3edbbfd..7a17af286a9e 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -574,7 +574,7 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, /* For using page_pool replace: alloc_pages() API calls, but provide * synchronization guarantee for allocation side. */ -netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp) +netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp) { netmem_ref netmem; @@ -590,11 +590,11 @@ netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp) netmem = __page_pool_alloc_pages_slow(pool, gfp); return netmem; } -EXPORT_SYMBOL(page_pool_alloc_netmem); +EXPORT_SYMBOL(page_pool_alloc_netmems); struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) { - return netmem_to_page(page_pool_alloc_netmem(pool, gfp)); + return netmem_to_page(page_pool_alloc_netmems(pool, gfp)); } EXPORT_SYMBOL(page_pool_alloc_pages); ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL); @@ -992,7 +992,7 @@ netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, } if (!netmem) { - netmem = page_pool_alloc_netmem(pool, gfp); + netmem = page_pool_alloc_netmems(pool, gfp); if (unlikely(!netmem)) { pool->frag_page = 0; return 0; From 8156c310499a34c8f42b2e2b7360abb805683bbe Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 11 Dec 2024 21:20:29 +0000 Subject: [PATCH 0378/1386] net: page_pool: create page_pool_alloc_netmem Create page_pool_alloc_netmem to be the mirror of page_pool_alloc. This enables drivers that want currently use page_pool_alloc to transition to netmem by converting the call sites to page_pool_alloc_netmem. Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20241211212033.1684197-3-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 26caa2c20912..95af7f0b029e 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -115,22 +115,22 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, return page_pool_alloc_frag(pool, offset, size, gfp); } -static inline struct page *page_pool_alloc(struct page_pool *pool, - unsigned int *offset, - unsigned int *size, gfp_t gfp) +static inline netmem_ref page_pool_alloc_netmem(struct page_pool *pool, + unsigned int *offset, + unsigned int *size, gfp_t gfp) { unsigned int max_size = PAGE_SIZE << pool->p.order; - struct page *page; + netmem_ref netmem; if ((*size << 1) > max_size) { *size = max_size; *offset = 0; - return page_pool_alloc_pages(pool, gfp); + return page_pool_alloc_netmems(pool, gfp); } - page = page_pool_alloc_frag(pool, offset, *size, gfp); - if (unlikely(!page)) - return NULL; + netmem = page_pool_alloc_frag_netmem(pool, offset, *size, gfp); + if (unlikely(!netmem)) + return 0; /* There is very likely not enough space for another fragment, so append * the remaining size to the current fragment to avoid truesize @@ -141,7 +141,14 @@ static inline struct page *page_pool_alloc(struct page_pool *pool, pool->frag_offset = max_size; } - return page; + return netmem; +} + +static inline struct page *page_pool_alloc(struct page_pool *pool, + unsigned int *offset, + unsigned int *size, gfp_t gfp) +{ + return netmem_to_page(page_pool_alloc_netmem(pool, offset, size, gfp)); } /** From b400f4b87430c105d92550cee5a72aea01fdf3d6 Mon Sep 17 00:00:00 2001 From: Samiullah Khawaja Date: Wed, 11 Dec 2024 21:20:30 +0000 Subject: [PATCH 0379/1386] page_pool: Set `dma_sync` to false for devmem memory provider Move the `dma_map` and `dma_sync` checks to `page_pool_init` to make them generic. Set dma_sync to false for devmem memory provider because the dma_sync APIs should not be used for dma_buf backed devmem memory provider. Cc: Jason Gunthorpe Signed-off-by: Samiullah Khawaja Signed-off-by: Mina Almasry Link: https://patch.msgid.link/20241211212033.1684197-4-almasrymina@google.com Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 9 ++++----- net/core/page_pool.c | 3 +++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/net/core/devmem.c b/net/core/devmem.c index 11b91c12ee11..3ebdeed2bf18 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -331,11 +331,10 @@ int mp_dmabuf_devmem_init(struct page_pool *pool) if (!binding) return -EINVAL; - if (!pool->dma_map) - return -EOPNOTSUPP; - - if (pool->dma_sync) - return -EOPNOTSUPP; + /* dma-buf dma addresses do not need and should not be used with + * dma_sync_for_cpu/device. Force disable dma_sync. + */ + pool->dma_sync = false; if (pool->p.order != 0) return -E2BIG; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 7a17af286a9e..275a7fd209d7 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -287,6 +287,9 @@ static int page_pool_init(struct page_pool *pool, } if (pool->mp_priv) { + if (!pool->dma_map || !pool->dma_sync) + return -EOPNOTSUPP; + err = mp_dmabuf_devmem_init(pool); if (err) { pr_warn("%s() mem-provider init failed %d\n", __func__, From 7dba339faae991a23c54f7b93a58798c58f8c16f Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 11 Dec 2024 21:20:31 +0000 Subject: [PATCH 0380/1386] page_pool: disable sync for cpu for dmabuf memory provider dmabuf dma-addresses should not be dma_sync'd for CPU/device. Typically its the driver responsibility to dma_sync for CPU, but the driver should not dma_sync for CPU if the netmem is actually coming from a dmabuf memory provider. The page_pool already exposes a helper for dma_sync_for_cpu: page_pool_dma_sync_for_cpu. Upgrade this existing helper to handle netmem, and have it skip dma_sync if the memory is from a dmabuf memory provider. Drivers should migrate to using this helper when adding support for netmem. Also minimize the impact on the dma syncing performance for pages. Special case the dma-sync path for pages to not go through the overhead checks for dma-syncing and conversion to netmem. Cc: Alexander Lobakin Cc: Jason Gunthorpe Signed-off-by: Mina Almasry Link: https://patch.msgid.link/20241211212033.1684197-5-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 35 ++++++++++++++++++++++++++++----- include/net/page_pool/types.h | 3 ++- net/core/devmem.c | 1 + net/core/page_pool.c | 1 + 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 95af7f0b029e..e555921e5233 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -422,7 +422,21 @@ static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem) */ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) { - return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page)); + dma_addr_t ret = page->dma_addr; + + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) + ret <<= PAGE_SHIFT; + + return ret; +} + +static inline void __page_pool_dma_sync_for_cpu(const struct page_pool *pool, + const dma_addr_t dma_addr, + u32 offset, u32 dma_sync_size) +{ + dma_sync_single_range_for_cpu(pool->p.dev, dma_addr, + offset + pool->p.offset, dma_sync_size, + page_pool_get_dma_dir(pool)); } /** @@ -441,10 +455,21 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool, const struct page *page, u32 offset, u32 dma_sync_size) { - dma_sync_single_range_for_cpu(pool->p.dev, - page_pool_get_dma_addr(page), - offset + pool->p.offset, dma_sync_size, - page_pool_get_dma_dir(pool)); + __page_pool_dma_sync_for_cpu(pool, page_pool_get_dma_addr(page), offset, + dma_sync_size); +} + +static inline void +page_pool_dma_sync_netmem_for_cpu(const struct page_pool *pool, + const netmem_ref netmem, u32 offset, + u32 dma_sync_size) +{ + if (!pool->dma_sync_for_cpu) + return; + + __page_pool_dma_sync_for_cpu(pool, + page_pool_get_dma_addr_netmem(netmem), + offset, dma_sync_size); } static inline bool page_pool_put(struct page_pool *pool) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 3270c92841b4..ed4cd114180a 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -164,7 +164,8 @@ struct page_pool { bool has_init_callback:1; /* slow::init_callback is set */ bool dma_map:1; /* Perform DMA mapping */ - bool dma_sync:1; /* Perform DMA sync */ + bool dma_sync:1; /* Perform DMA sync for device */ + bool dma_sync_for_cpu:1; /* Perform DMA sync for cpu */ #ifdef CONFIG_PAGE_POOL_STATS bool system:1; /* This is a global percpu pool */ #endif diff --git a/net/core/devmem.c b/net/core/devmem.c index 3ebdeed2bf18..0b6ed7525b22 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -335,6 +335,7 @@ int mp_dmabuf_devmem_init(struct page_pool *pool) * dma_sync_for_cpu/device. Force disable dma_sync. */ pool->dma_sync = false; + pool->dma_sync_for_cpu = false; if (pool->p.order != 0) return -E2BIG; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 275a7fd209d7..e07ad7315955 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -201,6 +201,7 @@ static int page_pool_init(struct page_pool *pool, memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); pool->cpuid = cpuid; + pool->dma_sync_for_cpu = true; /* Validate only known flags were used */ if (pool->slow.flags & ~PP_FLAG_ALL) From 3f4a0948c3524ae50f166dbc6572a3296b014e62 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Tue, 10 Dec 2024 10:04:41 +0300 Subject: [PATCH 0381/1386] wifi: brcmsmac: add gain range check to wlc_phy_iqcal_gainparams_nphy() In 'wlc_phy_iqcal_gainparams_nphy()', add gain range check to WARN() instead of possible out-of-bounds 'tbl_iqcal_gainparams_nphy' access. Compile tested only. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Dmitry Antipov Acked-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241210070441.836362-1-dmantipov@yandex.ru --- drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c index d69879e1bd87..d362c4337616 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/phy/phy_n.c @@ -23423,6 +23423,9 @@ wlc_phy_iqcal_gainparams_nphy(struct brcms_phy *pi, u16 core_no, break; } + if (WARN_ON(k == NPHY_IQCAL_NUMGAINS)) + return; + params->txgm = tbl_iqcal_gainparams_nphy[band_idx][k][1]; params->pga = tbl_iqcal_gainparams_nphy[band_idx][k][2]; params->pad = tbl_iqcal_gainparams_nphy[band_idx][k][3]; From b05d30c2b6df7e2172b18bf1baee9b202f9c6b53 Mon Sep 17 00:00:00 2001 From: Marcel Hamer Date: Wed, 11 Dec 2024 14:36:18 +0100 Subject: [PATCH 0382/1386] wifi: brcmfmac: add missing header include for brcmf_dbg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Including the fwil.h header file can lead to a build error: drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h: \ In function ‘brcmf_fil_cmd_int_set’: drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h:90:9: error: implicit \ declaration of function ‘brcmf_dbg’ [-Werror=implicit-function-declaration] 90 | brcmf_dbg(FIL, "ifidx=%d, cmd=%d, value=%d\n", ifp->ifidx, cmd, data); | ^~~~~~~~~ The error is often avoided because the debug.h header file is included before the fwil.h header file. This makes sure the header include order is irrelevant by explicitly adding the debug.h header. Fixes: 31343230abb1 ("wifi: brcmfmac: export firmware interface functions") Signed-off-by: Marcel Hamer Acked-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241211133618.2014083-1-marcel.hamer@windriver.com --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h index 31e080e4da66..ab3d6cfcb02b 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil.h @@ -6,6 +6,8 @@ #ifndef _fwil_h_ #define _fwil_h_ +#include "debug.h" + /******************************************************************************* * Dongle command codes that are interpreted by firmware ******************************************************************************/ From 9bc5c9515b4817e994579b21c32c033cbb3b0e6c Mon Sep 17 00:00:00 2001 From: Furong Xu <0x1207@gmail.com> Date: Thu, 12 Dec 2024 11:33:25 +0800 Subject: [PATCH 0383/1386] net: stmmac: Drop redundant dwxgmac_tc_ops variable dwmac510_tc_ops and dwxgmac_tc_ops are completely identical, keep dwmac510_tc_ops to provide better backward compatibility. Signed-off-by: Furong Xu <0x1207@gmail.com> Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Reviewed-by: Mateusz Polchlopek Link: https://patch.msgid.link/20241212033325.282817-1-0x1207@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/hwif.c | 4 ++-- drivers/net/ethernet/stmicro/stmmac/hwif.h | 1 - drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 11 ----------- 3 files changed, 2 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 4bd79de2e222..31bdbab9a46c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -267,7 +267,7 @@ static const struct stmmac_hwif_entry { .hwtimestamp = &stmmac_ptp, .ptp = &stmmac_ptp_clock_ops, .mode = NULL, - .tc = &dwxgmac_tc_ops, + .tc = &dwmac510_tc_ops, .mmc = &dwxgmac_mmc_ops, .est = &dwmac510_est_ops, .setup = dwxgmac2_setup, @@ -290,7 +290,7 @@ static const struct stmmac_hwif_entry { .hwtimestamp = &stmmac_ptp, .ptp = &stmmac_ptp_clock_ops, .mode = NULL, - .tc = &dwxgmac_tc_ops, + .tc = &dwmac510_tc_ops, .mmc = &dwxgmac_mmc_ops, .est = &dwmac510_est_ops, .setup = dwxlgmac2_setup, diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index e428c82b7d31..2f7295b6c1c5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -685,7 +685,6 @@ extern const struct stmmac_dma_ops dwmac410_dma_ops; extern const struct stmmac_ops dwmac510_ops; extern const struct stmmac_tc_ops dwmac4_tc_ops; extern const struct stmmac_tc_ops dwmac510_tc_ops; -extern const struct stmmac_tc_ops dwxgmac_tc_ops; #define GMAC_VERSION 0x00000020 /* GMAC CORE Version */ #define GMAC4_VERSION 0x00000110 /* GMAC4+ CORE Version */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 6a79e6a111ed..694d6ee14381 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -1284,14 +1284,3 @@ const struct stmmac_tc_ops dwmac510_tc_ops = { .query_caps = tc_query_caps, .setup_mqprio = tc_setup_dwmac510_mqprio, }; - -const struct stmmac_tc_ops dwxgmac_tc_ops = { - .init = tc_init, - .setup_cls_u32 = tc_setup_cls_u32, - .setup_cbs = tc_setup_cbs, - .setup_cls = tc_setup_cls, - .setup_taprio = tc_setup_taprio, - .setup_etf = tc_setup_etf, - .query_caps = tc_query_caps, - .setup_mqprio = tc_setup_dwmac510_mqprio, -}; From 2c2b61d2138f472e50b5531ec0cb4a1485837e21 Mon Sep 17 00:00:00 2001 From: Yuyang Huang Date: Wed, 11 Dec 2024 17:22:41 +0900 Subject: [PATCH 0384/1386] netlink: add IGMP/MLD join/leave notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change introduces netlink notifications for multicast address changes. The following features are included: * Addition and deletion of multicast addresses are reported using RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET and AF_INET6. * Two new notification groups: RTNLGRP_IPV4_MCADDR and RTNLGRP_IPV6_MCADDR are introduced for receiving these events. This change allows user space applications (e.g., ip monitor) to efficiently track multicast group memberships by listening for netlink events. Previously, applications relied on inefficient polling of procfs, introducing delays. With netlink notifications, applications receive realtime updates on multicast group membership changes, enabling more precise metrics collection and system monitoring.  This change also unlocks the potential for implementing a wide range of sophisticated multicast related features in user space by allowing applications to combine kernel provided multicast address information with user space data and communicate decisions back to the kernel for more fine grained control. This mechanism can be used for various purposes, including multicast filtering, IGMP/MLD offload, and IGMP/MLD snooping. Cc: Maciej Å»enczykowski Cc: Lorenzo Colitti Co-developed-by: Patrick Ruddy Signed-off-by: Patrick Ruddy Link: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com Signed-off-by: Yuyang Huang Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 ++ include/net/addrconf.h | 21 +++++++++++ include/uapi/linux/rtnetlink.h | 10 +++++- net/ipv4/igmp.c | 64 ++++++++++++++++++++++++++++++++++ net/ipv6/addrconf.c | 29 +++++---------- net/ipv6/mcast.c | 39 +++++++++++++++++++++ 6 files changed, 144 insertions(+), 21 deletions(-) diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 5171231f70a8..073b30a9b850 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -87,6 +87,8 @@ struct ip_mc_list { char loaded; unsigned char gsquery; /* check source marks? */ unsigned char crcount; + unsigned long mca_cstamp; + unsigned long mca_tstamp; struct rcu_head rcu; }; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 363dd63babe7..58337898fa21 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -88,6 +88,23 @@ struct ifa6_config { u16 scope; }; +enum addr_type_t { + UNICAST_ADDR, + MULTICAST_ADDR, + ANYCAST_ADDR, +}; + +struct inet6_fill_args { + u32 portid; + u32 seq; + int event; + unsigned int flags; + int netnsid; + int ifindex; + enum addr_type_t type; + bool force_rt_scope_universe; +}; + int addrconf_init(void); void addrconf_cleanup(void); @@ -525,4 +542,8 @@ int if6_proc_init(void); void if6_proc_exit(void); #endif +int inet6_fill_ifmcaddr(struct sk_buff *skb, + const struct ifmcaddr6 *ifmca, + struct inet6_fill_args *args); + #endif diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index db7254d52d93..eccc0e7dcb7d 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -93,7 +93,11 @@ enum { RTM_NEWPREFIX = 52, #define RTM_NEWPREFIX RTM_NEWPREFIX - RTM_GETMULTICAST = 58, + RTM_NEWMULTICAST = 56, +#define RTM_NEWMULTICAST RTM_NEWMULTICAST + RTM_DELMULTICAST, +#define RTM_DELMULTICAST RTM_DELMULTICAST + RTM_GETMULTICAST, #define RTM_GETMULTICAST RTM_GETMULTICAST RTM_GETANYCAST = 62, @@ -774,6 +778,10 @@ enum rtnetlink_groups { #define RTNLGRP_TUNNEL RTNLGRP_TUNNEL RTNLGRP_STATS, #define RTNLGRP_STATS RTNLGRP_STATS + RTNLGRP_IPV4_MCADDR, +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR + RTNLGRP_IPV6_MCADDR, +#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6a238398acc9..8a370ef37d3f 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -88,6 +88,8 @@ #include #include +#include +#include #include #include #include @@ -1430,6 +1432,63 @@ static void ip_mc_hash_remove(struct in_device *in_dev, *mc_hash = im->next_hash; } +static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, + const struct ip_mc_list *im, int event) +{ + struct ifa_cacheinfo ci; + struct ifaddrmsg *ifm; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0); + if (!nlh) + return -EMSGSIZE; + + ifm = nlmsg_data(nlh); + ifm->ifa_family = AF_INET; + ifm->ifa_prefixlen = 32; + ifm->ifa_flags = IFA_F_PERMANENT; + ifm->ifa_scope = RT_SCOPE_UNIVERSE; + ifm->ifa_index = dev->ifindex; + + ci.cstamp = (READ_ONCE(im->mca_cstamp) - INITIAL_JIFFIES) * 100UL / HZ; + ci.tstamp = ci.cstamp; + ci.ifa_prefered = INFINITY_LIFE_TIME; + ci.ifa_valid = INFINITY_LIFE_TIME; + + if (nla_put_in_addr(skb, IFA_MULTICAST, im->multiaddr) < 0 || + nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci) < 0) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } + + nlmsg_end(skb, nlh); + return 0; +} + +static void inet_ifmcaddr_notify(struct net_device *dev, + const struct ip_mc_list *im, int event) +{ + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOMEM; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(sizeof(__be32)), GFP_ATOMIC); + if (!skb) + goto error; + + err = inet_fill_ifmcaddr(skb, dev, im, event); + if (err < 0) { + WARN_ON_ONCE(err == -EMSGSIZE); + nlmsg_free(skb); + goto error; + } + + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MCADDR, NULL, GFP_ATOMIC); + return; +error: + rtnl_set_sk_err(net, RTNLGRP_IPV4_MCADDR, err); +} /* * A socket has joined a multicast group on device dev. @@ -1473,6 +1532,8 @@ static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr, im->interface = in_dev; in_dev_hold(in_dev); im->multiaddr = addr; + im->mca_cstamp = jiffies; + im->mca_tstamp = im->mca_cstamp; /* initial mode is (EX, empty) */ im->sfmode = mode; im->sfcount[mode] = 1; @@ -1492,6 +1553,7 @@ static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr, igmpv3_del_delrec(in_dev, im); #endif igmp_group_added(im); + inet_ifmcaddr_notify(in_dev->dev, im, RTM_NEWMULTICAST); if (!in_dev->dead) ip_rt_multicast_event(in_dev); out: @@ -1705,6 +1767,8 @@ void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp) *ip = i->next_rcu; in_dev->mc_count--; __igmp_group_dropped(i, gfp); + inet_ifmcaddr_notify(in_dev->dev, i, + RTM_DELMULTICAST); ip_mc_clear_src(i); if (!in_dev->dead) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0e765466d7f7..2e2684886953 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5127,22 +5127,6 @@ static inline int inet6_ifaddr_msgsize(void) + nla_total_size(4) /* IFA_RT_PRIORITY */; } -enum addr_type_t { - UNICAST_ADDR, - MULTICAST_ADDR, - ANYCAST_ADDR, -}; - -struct inet6_fill_args { - u32 portid; - u32 seq; - int event; - unsigned int flags; - int netnsid; - int ifindex; - enum addr_type_t type; -}; - static int inet6_fill_ifaddr(struct sk_buff *skb, const struct inet6_ifaddr *ifa, struct inet6_fill_args *args) @@ -5221,15 +5205,16 @@ error: return -EMSGSIZE; } -static int inet6_fill_ifmcaddr(struct sk_buff *skb, - const struct ifmcaddr6 *ifmca, - struct inet6_fill_args *args) +int inet6_fill_ifmcaddr(struct sk_buff *skb, + const struct ifmcaddr6 *ifmca, + struct inet6_fill_args *args) { int ifindex = ifmca->idev->dev->ifindex; u8 scope = RT_SCOPE_UNIVERSE; struct nlmsghdr *nlh; - if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) + if (!args->force_rt_scope_universe && + ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; nlh = nlmsg_put(skb, args->portid, args->seq, args->event, @@ -5254,6 +5239,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, nlmsg_end(skb, nlh); return 0; } +EXPORT_SYMBOL(inet6_fill_ifmcaddr); static int inet6_fill_ifacaddr(struct sk_buff *skb, const struct ifacaddr6 *ifaca, @@ -5418,6 +5404,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, .flags = NLM_F_MULTI, .netnsid = -1, .type = type, + .force_rt_scope_universe = false, }; struct { unsigned long ifindex; @@ -5546,6 +5533,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, .event = RTM_NEWADDR, .flags = 0, .netnsid = -1, + .force_rt_scope_universe = false, }; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; @@ -5617,6 +5605,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) .event = event, .flags = 0, .netnsid = -1, + .force_rt_scope_universe = false, }; int err = -ENOBUFS; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 5ca8692d565d..587831c148de 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -33,8 +33,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -47,6 +49,7 @@ #include #include +#include #include #include @@ -901,6 +904,39 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, return mc; } +static void inet6_ifmcaddr_notify(struct net_device *dev, + const struct ifmcaddr6 *ifmca, int event) +{ + struct inet6_fill_args fillargs = { + .portid = 0, + .seq = 0, + .event = event, + .flags = 0, + .netnsid = -1, + .force_rt_scope_universe = true, + }; + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOMEM; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(16), GFP_ATOMIC); + if (!skb) + goto error; + + err = inet6_fill_ifmcaddr(skb, ifmca, &fillargs); + if (err < 0) { + WARN_ON_ONCE(err == -EMSGSIZE); + nlmsg_free(skb); + goto error; + } + + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MCADDR, NULL, GFP_ATOMIC); + return; +error: + rtnl_set_sk_err(net, RTNLGRP_IPV6_MCADDR, err); +} + /* * device multicast group inc (add if not found) */ @@ -948,6 +984,7 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, mld_del_delrec(idev, mc); igmp6_group_added(mc); + inet6_ifmcaddr_notify(dev, mc, RTM_NEWMULTICAST); mutex_unlock(&idev->mc_lock); ma_put(mc); return 0; @@ -977,6 +1014,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) *map = ma->next; igmp6_group_dropped(ma); + inet6_ifmcaddr_notify(idev->dev, ma, + RTM_DELMULTICAST); ip6_mc_clear_src(ma); mutex_unlock(&idev->mc_lock); From a2d8af57452e60ff93a3525704788ad566433070 Mon Sep 17 00:00:00 2001 From: Dimitri Fedrau Date: Thu, 12 Dec 2024 09:44:06 +0100 Subject: [PATCH 0385/1386] dt-bindings: net: dp83822: Add support for GPIO2 clock output The GPIO2 pin on the DP83822 can be configured as clock output. Add binding to support this feature. Signed-off-by: Dimitri Fedrau Reviewed-by: Krzysztof Kozlowski Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- .../devicetree/bindings/net/ti,dp83822.yaml | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Documentation/devicetree/bindings/net/ti,dp83822.yaml b/Documentation/devicetree/bindings/net/ti,dp83822.yaml index 784866ea392b..50c24248df26 100644 --- a/Documentation/devicetree/bindings/net/ti,dp83822.yaml +++ b/Documentation/devicetree/bindings/net/ti,dp83822.yaml @@ -96,6 +96,32 @@ properties: - master - slave + ti,gpio2-clk-out: + description: | + DP83822 PHY only. + The GPIO2 pin on the DP83822 can be configured as clock output. When + omitted, the PHY's default will be left as is. + + - 'mac-if': In MII mode the clock frequency is 25-MHz, in RMII Mode the + clock frequency is 50-MHz and in RGMII Mode the clock frequency is + 25-MHz. + - 'xi': XI clock(pass-through clock from XI pin). + - 'int-ref': Internal reference clock 25-MHz. + - 'rmii-master-mode-ref': RMII master mode reference clock 50-MHz. RMII + master mode reference clock is identical to MAC IF clock in RMII master + mode. + - 'free-running': Free running clock 125-MHz. + - 'recovered': Recovered clock is a 125-MHz recovered clock from a + connected link partner. + $ref: /schemas/types.yaml#/definitions/string + enum: + - mac-if + - xi + - int-ref + - rmii-master-mode-ref + - free-running + - recovered + required: - reg @@ -110,6 +136,7 @@ examples: reg = <0>; rx-internal-delay-ps = <1>; tx-internal-delay-ps = <1>; + ti,gpio2-clk-out = "xi"; }; }; From 53e3b540952c14aa190233c173ba56d2987aa527 Mon Sep 17 00:00:00 2001 From: Dimitri Fedrau Date: Thu, 12 Dec 2024 09:44:07 +0100 Subject: [PATCH 0386/1386] net: phy: dp83822: Add support for GPIO2 clock output The GPIO2 pin on the DP83822 can be configured as clock output. Add support for configuration via DT. Signed-off-by: Dimitri Fedrau Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/dp83822.c | 48 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 25ee09c48027..334c17a68edd 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -30,6 +30,7 @@ #define MII_DP83822_FCSCR 0x14 #define MII_DP83822_RCSR 0x17 #define MII_DP83822_RESET_CTRL 0x1f +#define MII_DP83822_IOCTRL2 0x463 #define MII_DP83822_GENCFG 0x465 #define MII_DP83822_SOR1 0x467 @@ -104,6 +105,18 @@ #define DP83822_RX_CLK_SHIFT BIT(12) #define DP83822_TX_CLK_SHIFT BIT(11) +/* IOCTRL2 bits */ +#define DP83822_IOCTRL2_GPIO2_CLK_SRC GENMASK(6, 4) +#define DP83822_IOCTRL2_GPIO2_CTRL GENMASK(2, 0) +#define DP83822_IOCTRL2_GPIO2_CTRL_CLK_REF GENMASK(1, 0) + +#define DP83822_CLK_SRC_MAC_IF 0x0 +#define DP83822_CLK_SRC_XI 0x1 +#define DP83822_CLK_SRC_INT_REF 0x2 +#define DP83822_CLK_SRC_RMII_MASTER_MODE_REF 0x4 +#define DP83822_CLK_SRC_FREE_RUNNING 0x6 +#define DP83822_CLK_SRC_RECOVERED 0x7 + /* SOR1 mode */ #define DP83822_STRAP_MODE1 0 #define DP83822_STRAP_MODE2 BIT(0) @@ -139,6 +152,8 @@ struct dp83822_private { u8 cfg_dac_minus; u8 cfg_dac_plus; struct ethtool_wolinfo wol; + bool set_gpio2_clk_out; + u32 gpio2_clk_out; }; static int dp83822_config_wol(struct phy_device *phydev, @@ -413,6 +428,15 @@ static int dp83822_config_init(struct phy_device *phydev) int err = 0; int bmcr; + if (dp83822->set_gpio2_clk_out) + phy_modify_mmd(phydev, MDIO_MMD_VEND2, MII_DP83822_IOCTRL2, + DP83822_IOCTRL2_GPIO2_CTRL | + DP83822_IOCTRL2_GPIO2_CLK_SRC, + FIELD_PREP(DP83822_IOCTRL2_GPIO2_CTRL, + DP83822_IOCTRL2_GPIO2_CTRL_CLK_REF) | + FIELD_PREP(DP83822_IOCTRL2_GPIO2_CLK_SRC, + dp83822->gpio2_clk_out)); + if (phy_interface_is_rgmii(phydev)) { rx_int_delay = phy_get_internal_delay(phydev, dev, NULL, 0, true); @@ -611,6 +635,7 @@ static int dp83822_of_init(struct phy_device *phydev) { struct dp83822_private *dp83822 = phydev->priv; struct device *dev = &phydev->mdio.dev; + const char *of_val; /* Signal detection for the PHY is only enabled if the FX_EN and the * SD_EN pins are strapped. Signal detection can only enabled if FX_EN @@ -623,6 +648,29 @@ static int dp83822_of_init(struct phy_device *phydev) dp83822->fx_enabled = device_property_present(dev, "ti,fiber-mode"); + if (!device_property_read_string(dev, "ti,gpio2-clk-out", &of_val)) { + if (strcmp(of_val, "mac-if") == 0) { + dp83822->gpio2_clk_out = DP83822_CLK_SRC_MAC_IF; + } else if (strcmp(of_val, "xi") == 0) { + dp83822->gpio2_clk_out = DP83822_CLK_SRC_XI; + } else if (strcmp(of_val, "int-ref") == 0) { + dp83822->gpio2_clk_out = DP83822_CLK_SRC_INT_REF; + } else if (strcmp(of_val, "rmii-master-mode-ref") == 0) { + dp83822->gpio2_clk_out = DP83822_CLK_SRC_RMII_MASTER_MODE_REF; + } else if (strcmp(of_val, "free-running") == 0) { + dp83822->gpio2_clk_out = DP83822_CLK_SRC_FREE_RUNNING; + } else if (strcmp(of_val, "recovered") == 0) { + dp83822->gpio2_clk_out = DP83822_CLK_SRC_RECOVERED; + } else { + phydev_err(phydev, + "Invalid value for ti,gpio2-clk-out property (%s)\n", + of_val); + return -EINVAL; + } + + dp83822->set_gpio2_clk_out = true; + } + return 0; } From 329365dc46b8cedb9c4fd5cfb80b29cb85b84c39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20G=C3=BCntner?= Date: Thu, 12 Dec 2024 17:19:11 +0100 Subject: [PATCH 0387/1386] ipv4: output metric as unsigned int MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit adding a route metric greater than 0x7fff_ffff leads to an unintended wrap when printing the underlying u32 as an unsigned int (`%d`) thus incorrectly rendering the metric as negative. Formatting using `%u` corrects the issue. Signed-off-by: Maximilian Güntner Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241212161911.51598-1-code@mguentner.de Signed-off-by: Jakub Kicinski --- net/ipv4/fib_trie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 161f5526b86c..d6411ac81096 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2999,7 +2999,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%s\t%08X\t%08X\t%04X\t%d\t%u\t" - "%d\t%08X\t%d\t%u\t%u", + "%u\t%08X\t%d\t%u\t%u", nhc->nhc_dev ? nhc->nhc_dev->name : "*", prefix, gw, flags, 0, 0, fi->fib_priority, @@ -3011,7 +3011,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) } else { seq_printf(seq, "*\t%08X\t%08X\t%04X\t%d\t%u\t" - "%d\t%08X\t%d\t%u\t%u", + "%u\t%08X\t%d\t%u\t%u", prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0); } From 36e32b33d81152e1911a70750e0fe5c7621797ba Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Thu, 12 Dec 2024 13:59:08 +0200 Subject: [PATCH 0388/1386] net: ena: Fix incorrect indentation The assignment was accidentally aligned to the string one line before. This was raised by the kernel bot. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202412101739.umNl7yYu-lkp@intel.com/ Signed-off-by: David Arinzon Signed-off-by: Shay Agroskin Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241212115910.2485851-1-shayagr@amazon.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index 63c8a2328142..c1295dfad0d0 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -74,7 +74,7 @@ static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue) if (threshold < time_since_last_napi && napi_scheduled) { netdev_err(dev, "napi handler hasn't been called for a long time but is scheduled\n"); - reset_reason = ENA_REGS_RESET_SUSPECTED_POLL_STARVATION; + reset_reason = ENA_REGS_RESET_SUSPECTED_POLL_STARVATION; } schedule_reset: /* Change the state of the device to trigger reset From ffcbfb5f9779637792547356a4fb8b0cbf645fa9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 12 Dec 2024 16:08:34 +0200 Subject: [PATCH 0389/1386] net: phylink: improve phylink_sfp_config_phy() error message with missing PHY driver It seems that phylink does not support driving PHYs in SFP modules using the Generic PHY or Generic Clause 45 PHY driver. I've come to this conclusion after analyzing these facts: - sfp_sm_probe_phy(), who is our caller here, first calls phy_device_register() and then sfp_add_phy() -> ... -> phylink_sfp_connect_phy(). - phydev->supported is populated by phy_probe() - phy_probe() is usually called synchronously from phy_device_register() via phy_bus_match(), if a precise device driver is found for the PHY. In that case, phydev->supported has a good chance of being set to a non-zero mask. - There is an exceptional case for the PHYs for which phy_bus_match() didn't find a driver. Those devices sit for a while without a driver, then phy_attach_direct() force-binds the genphy_c45_driver or genphy_driver to them. Again, this triggers phy_probe() and renders a good chance of phydev->supported being populated, assuming compatibility with genphy_read_abilities() or genphy_c45_pma_read_abilities(). - phylink_sfp_config_phy() does not support the exceptional case of retrieving phydev->supported from the Generic PHY driver, due to its code flow. It expects the phydev->supported mask to already be non-empty, because it first calls phylink_validate() on it, and only calls phylink_attach_phy() if that succeeds. Thus, phylink_attach_phy() -> phy_attach_direct() has no chance of running. It is not my wish to change the state of affairs by altering the code flow, but merely to document the limitation rather than have the current unspecific error: [ 61.800079] mv88e6085 d0032004.mdio-mii:12 sfp: validation with support 00,00000000,00000000,00000000 failed: -EINVAL [ 61.820743] sfp sfp: sfp_add_phy failed: -EINVAL On the premise that an empty phydev->supported is going to make phylink_validate() fail anyway, and that this is caused by a missing PHY driver, it would be more informative to single out that case, undercut the entire phylink_sfp_config_phy() call, including phylink_validate(), and print a more specific message for this common gotcha: [ 37.076403] mv88e6085 d0032004.mdio-mii:12 sfp: PHY i2c:sfp:16 (id 0x01410cc2) has no driver loaded [ 37.089157] mv88e6085 d0032004.mdio-mii:12 sfp: Drivers which handle known common cases: CONFIG_BCM84881_PHY, CONFIG_MARVELL_PHY [ 37.108047] sfp sfp: sfp_add_phy failed: -EINVAL Link: https://lore.kernel.org/netdev/20241113144229.3ff4bgsalvj7spb7@skbuf/ Signed-off-by: Vladimir Oltean Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20241212140834.278894-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 95fbc363f9a6..6d50c2fdb190 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -3623,6 +3623,13 @@ static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy) { struct phylink *pl = upstream; + if (!phy->drv) { + phylink_err(pl, "PHY %s (id 0x%.8lx) has no driver loaded\n", + phydev_name(phy), (unsigned long)phy->phy_id); + phylink_err(pl, "Drivers which handle known common cases: CONFIG_BCM84881_PHY, CONFIG_MARVELL_PHY\n"); + return -EINVAL; + } + /* * This is the new way of dealing with flow control for PHYs, * as described by Timur Tabi in commit 529ed1275263 ("net: phy: From 0193eebbb1fcade01331f9d7cc24e57fc28a577d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 12 Dec 2024 10:11:43 +0100 Subject: [PATCH 0390/1386] ethernet: Make OA_TC6 config symbol invisible Commit aa58bec064ab1622 ("net: ethernet: oa_tc6: implement register write operation") introduced a library that implements the OPEN Alliance TC6 10BASE-T1x MAC-PHY Serial Interface protocol for supporting 10BASE-T1x MAC-PHYs. There is no need to ask the user about enabling this library, as all drivers that use it select the OA_TC6 symbol. Hence make the symbol invisible, unless when compile-testing. Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Link: https://patch.msgid.link/3b600550745af10ab7d7c3526353931c1d39f641.1733994552.git.geert+renesas@glider.be Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 9a542e3c9b05..977b42bc1e8c 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -159,7 +159,7 @@ config ETHOC Say Y here if you want to use the OpenCores 10/100 Mbps Ethernet MAC. config OA_TC6 - tristate "OPEN Alliance TC6 10BASE-T1x MAC-PHY support" + tristate "OPEN Alliance TC6 10BASE-T1x MAC-PHY support" if COMPILE_TEST depends on SPI select PHYLIB help From a63bb695396641d91201b9226b09652c1a647ff4 Mon Sep 17 00:00:00 2001 From: Brett Creeley Date: Thu, 12 Dec 2024 13:20:42 -0800 Subject: [PATCH 0391/1386] ionic: remove the unused nb_work Remove the empty and unused nb_work and associated ionic_lif_notify_work() function. v2: separated from previous net patch Link: https://lore.kernel.org/netdev/20241210174828.69525-2-shannon.nelson@amd.com/ Signed-off-by: Brett Creeley Signed-off-by: Shannon Nelson Reviewed-by: Jacob Keller Reviewed-by: Kalesh AP Link: https://patch.msgid.link/20241212212042.9348-1-shannon.nelson@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/pensando/ionic/ionic.h | 1 - drivers/net/ethernet/pensando/ionic/ionic_lif.c | 7 ------- 2 files changed, 8 deletions(-) diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h index 0639bf56bd3a..04f00ea94230 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic.h +++ b/drivers/net/ethernet/pensando/ionic/ionic.h @@ -57,7 +57,6 @@ struct ionic { DECLARE_BITMAP(intrs, IONIC_INTR_CTRL_REGS_MAX); cpumask_var_t *affinity_masks; struct delayed_work doorbell_check_dwork; - struct work_struct nb_work; struct notifier_block nb; struct rw_semaphore vf_op_lock; /* lock for VF operations */ struct ionic_vf *vfs; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 052c767a2c75..05fb46effe0d 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -3804,10 +3804,6 @@ err_out_adminq_deinit: return err; } -static void ionic_lif_notify_work(struct work_struct *ws) -{ -} - static void ionic_lif_set_netdev_info(struct ionic_lif *lif) { struct ionic_admin_ctx ctx = { @@ -3858,8 +3854,6 @@ int ionic_lif_register(struct ionic_lif *lif) ionic_lif_register_phc(lif); - INIT_WORK(&lif->ionic->nb_work, ionic_lif_notify_work); - lif->ionic->nb.notifier_call = ionic_lif_notify; err = register_netdevice_notifier(&lif->ionic->nb); @@ -3885,7 +3879,6 @@ void ionic_lif_unregister(struct ionic_lif *lif) { if (lif->ionic->nb.notifier_call) { unregister_netdevice_notifier(&lif->ionic->nb); - cancel_work_sync(&lif->ionic->nb_work); lif->ionic->nb.notifier_call = NULL; } From 410cd938511ff18a13bea39e1af80e4821dca14a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Dec 2024 09:32:18 +0100 Subject: [PATCH 0392/1386] octeontx2-af: fix build regression without CONFIG_DCB When DCB is disabled, the pfc_en struct member cannot be accessed: drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c: In function 'otx2_is_pfc_enabled': drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c:22:48: error: 'struct otx2_nic' has no member named 'pfc_en' 22 | return IS_ENABLED(CONFIG_DCB) && !!pfvf->pfc_en; | ^~ drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c: In function 'otx2_nix_config_bp': drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c:1755:33: error: 'IEEE_8021QAZ_MAX_TCS' undeclared (first use in this function) 1755 | req->chan_cnt = IEEE_8021QAZ_MAX_TCS; | ^~~~~~~~~~~~~~~~~~~~ Move the member out of the #ifdef block to avoid putting back another check in the source file and add the missing include file unconditionally. Fixes: a7ef63dbd588 ("octeontx2-af: Disable backpressure between CPT and NIX") Signed-off-by: Arnd Bergmann Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241213083228.2645757-1-arnd@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 1 + drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index bf56888e7fe7..2b49bfec7869 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "otx2_reg.h" diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 44d737a0dd09..65814e3dc93f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -505,9 +505,9 @@ struct otx2_nic { /* Devlink */ struct otx2_devlink *dl; -#ifdef CONFIG_DCB /* PFC */ u8 pfc_en; +#ifdef CONFIG_DCB u8 *queue_to_pfc_map; u16 pfc_schq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC]; bool pfc_alloc_status[NIX_PF_PFC_PRIO_MAX]; From a35d00d5512accd337510fa4de756b743d331a87 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 13 Dec 2024 11:08:27 +0000 Subject: [PATCH 0393/1386] netlink: specs: add uint, sint to netlink-raw schema Add uint, sint to the list of attr types in the netlink-raw schema. This fixes the rt_link spec which had a uint attr added in commit f858cc9eed5b ("net: add IFLA_MAX_PACING_OFFLOAD_HORIZON device attribute") Signed-off-by: Donald Hunter Link: https://patch.msgid.link/20241213110827.32250-1-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/netlink-raw.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml index 914aa1c0a273..1b0772c8e333 100644 --- a/Documentation/netlink/netlink-raw.yaml +++ b/Documentation/netlink/netlink-raw.yaml @@ -221,7 +221,7 @@ properties: type: &attr-type description: The netlink attribute type enum: [ unused, pad, flag, binary, bitfield32, - u8, u16, u32, u64, s8, s16, s32, s64, + uint, sint, u8, u16, u32, u64, s8, s16, s32, s64, string, nest, indexed-array, nest-type-value, sub-message ] doc: From 734ff310d38cfdc27a1b3eac9fa83ff754356ae7 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan Date: Thu, 12 Dec 2024 17:33:01 +0000 Subject: [PATCH 0394/1386] gve: Convert timeouts to secs_to_jiffies() Commit b35108a51cf7 ("jiffies: Define secs_to_jiffies()") introduced secs_to_jiffies(). As the value here is a multiple of 1000, use secs_to_jiffies() instead of msecs_to_jiffies to avoid the multiplication. This is converted using scripts/coccinelle/misc/secs_to_jiffies.cocci with the following Coccinelle rules: @@ constant C; @@ - msecs_to_jiffies(C * 1000) + secs_to_jiffies(C) @@ constant C; @@ - msecs_to_jiffies(C * MSEC_PER_SEC) + secs_to_jiffies(C) Signed-off-by: Easwar Hariharan Reviewed-by: Praveen Kaligineedi Link: https://patch.msgid.link/20241212-netdev-converge-secs-to-jiffies-v4-1-6dac97a6d6ab@linux.microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/google/gve/gve_tx_dqo.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index f879426cb552..394debc62268 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -1146,8 +1146,7 @@ static void gve_handle_miss_completion(struct gve_priv *priv, /* jiffies can wraparound but time comparisons can handle overflows. */ pending_packet->timeout_jiffies = jiffies + - msecs_to_jiffies(GVE_REINJECT_COMPL_TIMEOUT * - MSEC_PER_SEC); + secs_to_jiffies(GVE_REINJECT_COMPL_TIMEOUT); add_to_list(tx, &tx->dqo_compl.miss_completions, pending_packet); *bytes += pending_packet->skb->len; @@ -1191,8 +1190,7 @@ static void remove_miss_completions(struct gve_priv *priv, pending_packet->state = GVE_PACKET_STATE_TIMED_OUT_COMPL; pending_packet->timeout_jiffies = jiffies + - msecs_to_jiffies(GVE_DEALLOCATE_COMPL_TIMEOUT * - MSEC_PER_SEC); + secs_to_jiffies(GVE_DEALLOCATE_COMPL_TIMEOUT); /* Maintain pending packet in another list so the packet can be * unallocated at a later time. */ From dcacb364772eb463bde225176086bd7738b7102f Mon Sep 17 00:00:00 2001 From: Zijun Hu Date: Fri, 13 Dec 2024 20:09:11 +0800 Subject: [PATCH 0395/1386] net: wan: framer: Simplify API framer_provider_simple_of_xlate() implementation Simplify framer_provider_simple_of_xlate() implementation by API class_find_device_by_of_node(). Also correct comments to mark its parameter @dev as unused instead of @args in passing. Cc: Greg Kroah-Hartman Signed-off-by: Zijun Hu Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241213-net_fix-v2-1-6d06130d630f@quicinc.com Signed-off-by: Jakub Kicinski --- drivers/net/wan/framer/framer-core.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/drivers/net/wan/framer/framer-core.c b/drivers/net/wan/framer/framer-core.c index f547c22e26ac..58f5143359df 100644 --- a/drivers/net/wan/framer/framer-core.c +++ b/drivers/net/wan/framer/framer-core.c @@ -732,8 +732,8 @@ EXPORT_SYMBOL_GPL(devm_framer_create); /** * framer_provider_simple_of_xlate() - returns the framer instance from framer provider - * @dev: the framer provider device - * @args: of_phandle_args (not used here) + * @dev: the framer provider device (not used here) + * @args: of_phandle_args * * Intended to be used by framer provider for the common case where #framer-cells is * 0. For other cases where #framer-cells is greater than '0', the framer provider @@ -743,21 +743,14 @@ EXPORT_SYMBOL_GPL(devm_framer_create); struct framer *framer_provider_simple_of_xlate(struct device *dev, const struct of_phandle_args *args) { - struct class_dev_iter iter; - struct framer *framer; + struct device *target_dev; - class_dev_iter_init(&iter, &framer_class, NULL, NULL); - while ((dev = class_dev_iter_next(&iter))) { - framer = dev_to_framer(dev); - if (args->np != framer->dev.of_node) - continue; + target_dev = class_find_device_by_of_node(&framer_class, args->np); + if (!target_dev) + return ERR_PTR(-ENODEV); - class_dev_iter_exit(&iter); - return framer; - } - - class_dev_iter_exit(&iter); - return ERR_PTR(-ENODEV); + put_device(target_dev); + return dev_to_framer(target_dev); } EXPORT_SYMBOL_GPL(framer_provider_simple_of_xlate); From e7b4083b90b7213902124d13fd1ed808360e32b1 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 13 Dec 2024 20:52:52 +0100 Subject: [PATCH 0396/1386] mptcp: add mptcp_userspace_pm_lookup_addr helper Like __lookup_addr() helper in pm_netlink.c, a new helper mptcp_userspace_pm_lookup_addr() is also defined in pm_userspace.c. It looks up the corresponding mptcp_pm_addr_entry address in userspace_pm_local_addr_list through the passed "addr" parameter and returns the found address entry. This helper can be used in mptcp_userspace_pm_delete_local_addr(), mptcp_userspace_pm_set_flags(), mptcp_userspace_pm_get_local_id() and mptcp_userspace_pm_is_backup() to simplify the code. Please note that with this change now list_for_each_entry() is used in mptcp_userspace_pm_append_new_local_addr(), not list_for_each_entry_safe(), but that's OK to do so because mptcp_userspace_pm_lookup_addr() only returns an entry from the list, the list hasn't been modified here. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241213-net-next-mptcp-pm-misc-cleanup-v1-1-ddb6d00109a8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_userspace.c | 69 ++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index e35178f5205f..3664f3c1572e 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -26,6 +26,19 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk) } } +static struct mptcp_pm_addr_entry * +mptcp_userspace_pm_lookup_addr(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr) +{ + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (mptcp_addresses_equal(&entry->addr, addr, false)) + return entry; + } + return NULL; +} + static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry, bool needs_id) @@ -90,22 +103,20 @@ append_err: static int mptcp_userspace_pm_delete_local_addr(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *addr) { - struct mptcp_pm_addr_entry *entry, *tmp; struct sock *sk = (struct sock *)msk; + struct mptcp_pm_addr_entry *entry; - list_for_each_entry_safe(entry, tmp, &msk->pm.userspace_pm_local_addr_list, list) { - if (mptcp_addresses_equal(&entry->addr, &addr->addr, false)) { - /* TODO: a refcount is needed because the entry can - * be used multiple times (e.g. fullmesh mode). - */ - list_del_rcu(&entry->list); - sock_kfree_s(sk, entry, sizeof(*entry)); - msk->pm.local_addr_used--; - return 0; - } - } + entry = mptcp_userspace_pm_lookup_addr(msk, &addr->addr); + if (!entry) + return -EINVAL; - return -EINVAL; + /* TODO: a refcount is needed because the entry can + * be used multiple times (e.g. fullmesh mode). + */ + list_del_rcu(&entry->list); + sock_kfree_s(sk, entry, sizeof(*entry)); + msk->pm.local_addr_used--; + return 0; } static struct mptcp_pm_addr_entry * @@ -123,17 +134,12 @@ mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id) int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { - struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry; + struct mptcp_pm_addr_entry *entry = NULL, new_entry; __be16 msk_sport = ((struct inet_sock *) inet_sk((struct sock *)msk))->inet_sport; spin_lock_bh(&msk->pm.lock); - list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { - if (mptcp_addresses_equal(&e->addr, skc, false)) { - entry = e; - break; - } - } + entry = mptcp_userspace_pm_lookup_addr(msk, skc); spin_unlock_bh(&msk->pm.lock); if (entry) return entry->addr.id; @@ -153,15 +159,11 @@ bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) { struct mptcp_pm_addr_entry *entry; - bool backup = false; + bool backup; spin_lock_bh(&msk->pm.lock); - list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { - if (mptcp_addresses_equal(&entry->addr, skc, false)) { - backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); - break; - } - } + entry = mptcp_userspace_pm_lookup_addr(msk, skc); + backup = entry && !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); spin_unlock_bh(&msk->pm.lock); return backup; @@ -606,13 +608,12 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) bkup = 1; spin_lock_bh(&msk->pm.lock); - list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { - if (mptcp_addresses_equal(&entry->addr, &loc.addr, false)) { - if (bkup) - entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; - else - entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; - } + entry = mptcp_userspace_pm_lookup_addr(msk, &loc.addr); + if (entry) { + if (bkup) + entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else + entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; } spin_unlock_bh(&msk->pm.lock); From a28717d8414e965a3ce6c83f744aa1c70ac8722f Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 13 Dec 2024 20:52:53 +0100 Subject: [PATCH 0397/1386] mptcp: add mptcp_for_each_userspace_pm_addr macro Similar to mptcp_for_each_subflow() macro, this patch adds a new macro mptcp_for_each_userspace_pm_addr() for userspace PM to iterate over the address entries on the local address list userspace_pm_local_addr_list of the mptcp socket. This patch doesn't change the behaviour of the code, just refactoring. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241213-net-next-mptcp-pm-misc-cleanup-v1-2-ddb6d00109a8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_userspace.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 3664f3c1572e..6a27fab238f1 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -8,6 +8,10 @@ #include "mib.h" #include "mptcp_pm_gen.h" +#define mptcp_for_each_userspace_pm_addr(__msk, __entry) \ + list_for_each_entry(__entry, \ + &((__msk)->pm.userspace_pm_local_addr_list), list) + void mptcp_free_local_addr_list(struct mptcp_sock *msk) { struct mptcp_pm_addr_entry *entry, *tmp; @@ -32,7 +36,7 @@ mptcp_userspace_pm_lookup_addr(struct mptcp_sock *msk, { struct mptcp_pm_addr_entry *entry; - list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + mptcp_for_each_userspace_pm_addr(msk, entry) { if (mptcp_addresses_equal(&entry->addr, addr, false)) return entry; } @@ -54,7 +58,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk, bitmap_zero(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); spin_lock_bh(&msk->pm.lock); - list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) { + mptcp_for_each_userspace_pm_addr(msk, e) { addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true); if (addr_match && entry->addr.id == 0 && needs_id) entry->addr.id = e->addr.id; @@ -124,7 +128,7 @@ mptcp_userspace_pm_lookup_addr_by_id(struct mptcp_sock *msk, unsigned int id) { struct mptcp_pm_addr_entry *entry; - list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + mptcp_for_each_userspace_pm_addr(msk, entry) { if (entry->addr.id == id) return entry; } @@ -659,7 +663,7 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, lock_sock(sk); spin_lock_bh(&msk->pm.lock); - list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + mptcp_for_each_userspace_pm_addr(msk, entry) { if (test_bit(entry->addr.id, bitmap->map)) continue; From 6a389c8ceeb75cf3c523ebf652a90958267c7b13 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 13 Dec 2024 20:52:54 +0100 Subject: [PATCH 0398/1386] mptcp: add mptcp_userspace_pm_get_sock helper Each userspace pm netlink function uses nla_get_u32() to get the msk token value, then pass it to mptcp_token_get_sock() to get the msk. Finally check whether userspace PM is selected on this msk. It makes sense to wrap them into a helper, named mptcp_userspace_pm_get_sock(), to do this. This patch doesn't change the behaviour of the code, just refactoring. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241213-net-next-mptcp-pm-misc-cleanup-v1-3-ddb6d00109a8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_userspace.c | 144 +++++++++++++-------------------------- 1 file changed, 47 insertions(+), 97 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 6a27fab238f1..afb04343e74d 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -173,36 +173,50 @@ bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, return backup; } -int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) +static struct mptcp_sock *mptcp_userspace_pm_get_sock(const struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; + struct mptcp_sock *msk; + + if (!token) { + GENL_SET_ERR_MSG(info, "missing required token"); + return NULL; + } + + msk = mptcp_token_get_sock(genl_info_net(info), nla_get_u32(token)); + if (!msk) { + NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + return NULL; + } + + if (!mptcp_pm_is_userspace(msk)) { + GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); + sock_put((struct sock *)msk); + return NULL; + } + + return msk; +} + +int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) +{ struct nlattr *addr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_pm_addr_entry addr_val; struct mptcp_sock *msk; int err = -EINVAL; struct sock *sk; - u32 token_val; - if (!addr || !token) { - GENL_SET_ERR_MSG(info, "missing required inputs"); + if (!addr) { + GENL_SET_ERR_MSG(info, "missing required address"); return err; } - token_val = nla_get_u32(token); - - msk = mptcp_token_get_sock(sock_net(skb->sk), token_val); - if (!msk) { - NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + msk = mptcp_userspace_pm_get_sock(info); + if (!msk) return err; - } sk = (struct sock *)msk; - if (!mptcp_pm_is_userspace(msk)) { - GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); - goto announce_err; - } - err = mptcp_pm_parse_entry(addr, info, true, &addr_val); if (err < 0) { GENL_SET_ERR_MSG(info, "error parsing local address"); @@ -275,7 +289,6 @@ remove_err: int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; struct mptcp_pm_addr_entry *match; struct mptcp_pm_addr_entry *entry; @@ -283,30 +296,21 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) LIST_HEAD(free_list); int err = -EINVAL; struct sock *sk; - u32 token_val; u8 id_val; - if (!id || !token) { - GENL_SET_ERR_MSG(info, "missing required inputs"); + if (!id) { + GENL_SET_ERR_MSG(info, "missing required ID"); return err; } id_val = nla_get_u8(id); - token_val = nla_get_u32(token); - msk = mptcp_token_get_sock(sock_net(skb->sk), token_val); - if (!msk) { - NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + msk = mptcp_userspace_pm_get_sock(info); + if (!msk) return err; - } sk = (struct sock *)msk; - if (!mptcp_pm_is_userspace(msk)) { - GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); - goto out; - } - if (id_val == 0) { err = mptcp_userspace_pm_remove_id_zero_address(msk, info); goto out; @@ -343,7 +347,6 @@ out: int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; - struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_pm_addr_entry entry = { 0 }; struct mptcp_addr_info addr_r; @@ -351,28 +354,18 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info) struct mptcp_sock *msk; int err = -EINVAL; struct sock *sk; - u32 token_val; - if (!laddr || !raddr || !token) { - GENL_SET_ERR_MSG(info, "missing required inputs"); + if (!laddr || !raddr) { + GENL_SET_ERR_MSG(info, "missing required address(es)"); return err; } - token_val = nla_get_u32(token); - - msk = mptcp_token_get_sock(genl_info_net(info), token_val); - if (!msk) { - NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + msk = mptcp_userspace_pm_get_sock(info); + if (!msk) return err; - } sk = (struct sock *)msk; - if (!mptcp_pm_is_userspace(msk)) { - GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); - goto create_err; - } - err = mptcp_pm_parse_entry(laddr, info, true, &entry); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); @@ -475,35 +468,24 @@ static struct sock *mptcp_nl_find_ssk(struct mptcp_sock *msk, int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; - struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; struct mptcp_addr_info addr_l; struct mptcp_addr_info addr_r; struct mptcp_sock *msk; struct sock *sk, *ssk; int err = -EINVAL; - u32 token_val; - if (!laddr || !raddr || !token) { - GENL_SET_ERR_MSG(info, "missing required inputs"); + if (!laddr || !raddr) { + GENL_SET_ERR_MSG(info, "missing required address(es)"); return err; } - token_val = nla_get_u32(token); - - msk = mptcp_token_get_sock(genl_info_net(info), token_val); - if (!msk) { - NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + msk = mptcp_userspace_pm_get_sock(info); + if (!msk) return err; - } sk = (struct sock *)msk; - if (!mptcp_pm_is_userspace(msk)) { - GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); - goto destroy_err; - } - err = mptcp_pm_parse_addr(laddr, info, &addr_l); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); @@ -566,31 +548,19 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info) struct mptcp_pm_addr_entry loc = { .addr = { .family = AF_UNSPEC }, }; struct mptcp_pm_addr_entry rem = { .addr = { .family = AF_UNSPEC }, }; struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; - struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR]; - struct net *net = sock_net(skb->sk); struct mptcp_pm_addr_entry *entry; struct mptcp_sock *msk; int ret = -EINVAL; struct sock *sk; - u32 token_val; u8 bkup = 0; - token_val = nla_get_u32(token); - - msk = mptcp_token_get_sock(net, token_val); - if (!msk) { - NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + msk = mptcp_userspace_pm_get_sock(info); + if (!msk) return ret; - } sk = (struct sock *)msk; - if (!mptcp_pm_is_userspace(msk)) { - GENL_SET_ERR_MSG(info, "userspace PM not selected"); - goto set_flags_err; - } - ret = mptcp_pm_parse_entry(attr, info, false, &loc); if (ret < 0) goto set_flags_err; @@ -637,30 +607,20 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, DECLARE_BITMAP(map, MPTCP_PM_MAX_ADDR_ID + 1); } *bitmap; const struct genl_info *info = genl_info_dump(cb); - struct net *net = sock_net(msg->sk); struct mptcp_pm_addr_entry *entry; struct mptcp_sock *msk; - struct nlattr *token; int ret = -EINVAL; struct sock *sk; void *hdr; bitmap = (struct id_bitmap *)cb->ctx; - token = info->attrs[MPTCP_PM_ATTR_TOKEN]; - msk = mptcp_token_get_sock(net, nla_get_u32(token)); - if (!msk) { - NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + msk = mptcp_userspace_pm_get_sock(info); + if (!msk) return ret; - } sk = (struct sock *)msk; - if (!mptcp_pm_is_userspace(msk)) { - GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); - goto out; - } - lock_sock(sk); spin_lock_bh(&msk->pm.lock); mptcp_for_each_userspace_pm_addr(msk, entry) { @@ -685,7 +645,6 @@ int mptcp_userspace_pm_dump_addr(struct sk_buff *msg, release_sock(sk); ret = msg->len; -out: sock_put(sk); return ret; } @@ -694,28 +653,19 @@ int mptcp_userspace_pm_get_addr(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR]; - struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; struct mptcp_pm_addr_entry addr, *entry; - struct net *net = sock_net(skb->sk); struct mptcp_sock *msk; struct sk_buff *msg; int ret = -EINVAL; struct sock *sk; void *reply; - msk = mptcp_token_get_sock(net, nla_get_u32(token)); - if (!msk) { - NL_SET_ERR_MSG_ATTR(info->extack, token, "invalid token"); + msk = mptcp_userspace_pm_get_sock(info); + if (!msk) return ret; - } sk = (struct sock *)msk; - if (!mptcp_pm_is_userspace(msk)) { - GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected"); - goto out; - } - ret = mptcp_pm_parse_entry(attr, info, false, &addr); if (ret < 0) goto out; From 8008e77e07418a2a43235c2288430602b0d4c8da Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 13 Dec 2024 20:52:55 +0100 Subject: [PATCH 0399/1386] mptcp: move mptcp_pm_remove_addrs into pm_userspace Since mptcp_pm_remove_addrs() is only called from the userspace PM, this patch moves it into pm_userspace.c. For this, lookup_subflow_by_saddr() and remove_anno_list_by_saddr() helpers need to be exported in protocol.h. Also add "mptcp_" prefix for these helpers. Here, mptcp_pm_remove_addrs() is not changed to a static function because it will be used in BPF Path Manager. This patch doesn't change the behaviour of the code, just refactoring. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241213-net-next-mptcp-pm-misc-cleanup-v1-4-ddb6d00109a8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 46 +++++++--------------------------------- net/mptcp/pm_userspace.c | 28 ++++++++++++++++++++++++ net/mptcp/protocol.h | 4 ++++ 3 files changed, 40 insertions(+), 38 deletions(-) diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 7a0f7998376a..98ac73938bd8 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -107,8 +107,8 @@ static void remote_address(const struct sock_common *skc, #endif } -static bool lookup_subflow_by_saddr(const struct list_head *list, - const struct mptcp_addr_info *saddr) +bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, + const struct mptcp_addr_info *saddr) { struct mptcp_subflow_context *subflow; struct mptcp_addr_info cur; @@ -1447,8 +1447,8 @@ out_free: return ret; } -static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, - const struct mptcp_addr_info *addr) +bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr) { struct mptcp_pm_add_entry *entry; @@ -1476,7 +1476,7 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, list.ids[list.nr++] = mptcp_endp_get_local_id(msk, addr); - ret = remove_anno_list_by_saddr(msk, addr); + ret = mptcp_remove_anno_list_by_saddr(msk, addr); if (ret || force) { spin_lock_bh(&msk->pm.lock); if (ret) { @@ -1520,7 +1520,7 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, } lock_sock(sk); - remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr); + remove_subflow = mptcp_lookup_subflow_by_saddr(&msk->conn_list, addr); mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); @@ -1633,36 +1633,6 @@ int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info) return ret; } -/* Called from the userspace PM only */ -void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) -{ - struct mptcp_rm_list alist = { .nr = 0 }; - struct mptcp_pm_addr_entry *entry; - int anno_nr = 0; - - list_for_each_entry(entry, rm_list, list) { - if (alist.nr >= MPTCP_RM_IDS_MAX) - break; - - /* only delete if either announced or matching a subflow */ - if (remove_anno_list_by_saddr(msk, &entry->addr)) - anno_nr++; - else if (!lookup_subflow_by_saddr(&msk->conn_list, - &entry->addr)) - continue; - - alist.ids[alist.nr++] = entry->addr.id; - } - - if (alist.nr) { - spin_lock_bh(&msk->pm.lock); - msk->pm.add_addr_signaled -= anno_nr; - mptcp_pm_remove_addr(msk, &alist); - spin_unlock_bh(&msk->pm.lock); - } -} - -/* Called from the in-kernel PM only */ static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk, struct list_head *rm_list) { @@ -1671,11 +1641,11 @@ static void mptcp_pm_flush_addrs_and_subflows(struct mptcp_sock *msk, list_for_each_entry(entry, rm_list, list) { if (slist.nr < MPTCP_RM_IDS_MAX && - lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) + mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) slist.ids[slist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); if (alist.nr < MPTCP_RM_IDS_MAX && - remove_anno_list_by_saddr(msk, &entry->addr)) + mptcp_remove_anno_list_by_saddr(msk, &entry->addr)) alist.ids[alist.nr++] = mptcp_endp_get_local_id(msk, &entry->addr); } diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index afb04343e74d..cac4b4a7b1e5 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -287,6 +287,34 @@ remove_err: return err; } +void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) +{ + struct mptcp_rm_list alist = { .nr = 0 }; + struct mptcp_pm_addr_entry *entry; + int anno_nr = 0; + + list_for_each_entry(entry, rm_list, list) { + if (alist.nr >= MPTCP_RM_IDS_MAX) + break; + + /* only delete if either announced or matching a subflow */ + if (mptcp_remove_anno_list_by_saddr(msk, &entry->addr)) + anno_nr++; + else if (!mptcp_lookup_subflow_by_saddr(&msk->conn_list, + &entry->addr)) + continue; + + alist.ids[alist.nr++] = entry->addr.id; + } + + if (alist.nr) { + spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= anno_nr; + mptcp_pm_remove_addr(msk, &alist); + spin_unlock_bh(&msk->pm.lock); + } +} + int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a93e661ef5c4..5ba67cb601e0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1027,6 +1027,10 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, struct mptcp_pm_add_entry * mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk, const struct mptcp_addr_info *addr); +bool mptcp_lookup_subflow_by_saddr(const struct list_head *list, + const struct mptcp_addr_info *saddr); +bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, + const struct mptcp_addr_info *addr); int mptcp_pm_set_flags(struct sk_buff *skb, struct genl_info *info); int mptcp_pm_nl_set_flags(struct sk_buff *skb, struct genl_info *info); int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info); From 88d0973163711a5313ddd479a1ff543b5ac93d51 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 13 Dec 2024 20:52:56 +0100 Subject: [PATCH 0400/1386] mptcp: drop free_list for deleting entries mptcp_pm_remove_addrs() actually only deletes one address, which does not match its name. This patch renames it to mptcp_pm_remove_addr_entry() and changes the parameter "rm_list" to "entry". With the help of mptcp_pm_remove_addr_entry(), it's no longer necessary to move the entry to be deleted to free_list and then traverse the list to delete the entry, which is not allowed in BPF. The entry can be directly deleted through list_del_rcu() and sock_kfree_s() now. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241213-net-next-mptcp-pm-misc-cleanup-v1-5-ddb6d00109a8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_userspace.c | 42 ++++++++++++++-------------------------- net/mptcp/protocol.h | 3 ++- 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index cac4b4a7b1e5..7689ea987be3 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -287,41 +287,31 @@ remove_err: return err; } -void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) +void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *entry) { struct mptcp_rm_list alist = { .nr = 0 }; - struct mptcp_pm_addr_entry *entry; int anno_nr = 0; - list_for_each_entry(entry, rm_list, list) { - if (alist.nr >= MPTCP_RM_IDS_MAX) - break; + /* only delete if either announced or matching a subflow */ + if (mptcp_remove_anno_list_by_saddr(msk, &entry->addr)) + anno_nr++; + else if (!mptcp_lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) + return; - /* only delete if either announced or matching a subflow */ - if (mptcp_remove_anno_list_by_saddr(msk, &entry->addr)) - anno_nr++; - else if (!mptcp_lookup_subflow_by_saddr(&msk->conn_list, - &entry->addr)) - continue; + alist.ids[alist.nr++] = entry->addr.id; - alist.ids[alist.nr++] = entry->addr.id; - } - - if (alist.nr) { - spin_lock_bh(&msk->pm.lock); - msk->pm.add_addr_signaled -= anno_nr; - mptcp_pm_remove_addr(msk, &alist); - spin_unlock_bh(&msk->pm.lock); - } + spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= anno_nr; + mptcp_pm_remove_addr(msk, &alist); + spin_unlock_bh(&msk->pm.lock); } int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *id = info->attrs[MPTCP_PM_ATTR_LOC_ID]; struct mptcp_pm_addr_entry *match; - struct mptcp_pm_addr_entry *entry; struct mptcp_sock *msk; - LIST_HEAD(free_list); int err = -EINVAL; struct sock *sk; u8 id_val; @@ -355,16 +345,14 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info) goto out; } - list_move(&match->list, &free_list); + list_del_rcu(&match->list); spin_unlock_bh(&msk->pm.lock); - mptcp_pm_remove_addrs(msk, &free_list); + mptcp_pm_remove_addr_entry(msk, match); release_sock(sk); - list_for_each_entry_safe(match, entry, &free_list, list) { - sock_kfree_s(sk, match, sizeof(*match)); - } + sock_kfree_s(sk, match, sizeof(*match)); err = 0; out: diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 5ba67cb601e0..cd5132fe7d22 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -1038,7 +1038,8 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); -void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); +void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *entry); void mptcp_free_local_addr_list(struct mptcp_sock *msk); From 1c670b39cec7603893e7d0169578409dccf63e94 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 13 Dec 2024 20:52:57 +0100 Subject: [PATCH 0401/1386] mptcp: change local addr type of subflow_destroy Generally, in the path manager interfaces, the local address is defined as an mptcp_pm_addr_entry type address, while the remote address is defined as an mptcp_addr_info type one: (struct mptcp_pm_addr_entry *local, struct mptcp_addr_info *remote) But subflow_destroy() interface uses two mptcp_addr_info type parameters. This patch changes the first one to mptcp_pm_addr_entry type and use helper mptcp_pm_parse_entry() to parse it instead of using mptcp_pm_parse_addr(). This patch doesn't change the behaviour of the code, just refactoring. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241213-net-next-mptcp-pm-misc-cleanup-v1-6-ddb6d00109a8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_userspace.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 7689ea987be3..1d5b77e0a722 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -485,7 +485,7 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info { struct nlattr *raddr = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE]; struct nlattr *laddr = info->attrs[MPTCP_PM_ATTR_ADDR]; - struct mptcp_addr_info addr_l; + struct mptcp_pm_addr_entry addr_l; struct mptcp_addr_info addr_r; struct mptcp_sock *msk; struct sock *sk, *ssk; @@ -502,7 +502,7 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info sk = (struct sock *)msk; - err = mptcp_pm_parse_addr(laddr, info, &addr_l); + err = mptcp_pm_parse_entry(laddr, info, true, &addr_l); if (err < 0) { NL_SET_ERR_MSG_ATTR(info->extack, laddr, "error parsing local addr"); goto destroy_err; @@ -515,35 +515,34 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info } #if IS_ENABLED(CONFIG_MPTCP_IPV6) - if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) { - ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6); - addr_l.family = AF_INET6; + if (addr_l.addr.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) { + ipv6_addr_set_v4mapped(addr_l.addr.addr.s_addr, &addr_l.addr.addr6); + addr_l.addr.family = AF_INET6; } - if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) { - ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6); + if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr.addr6)) { + ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_l.addr.addr6); addr_r.family = AF_INET6; } #endif - if (addr_l.family != addr_r.family) { + if (addr_l.addr.family != addr_r.family) { GENL_SET_ERR_MSG(info, "address families do not match"); err = -EINVAL; goto destroy_err; } - if (!addr_l.port || !addr_r.port) { + if (!addr_l.addr.port || !addr_r.port) { GENL_SET_ERR_MSG(info, "missing local or remote port"); err = -EINVAL; goto destroy_err; } lock_sock(sk); - ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r); + ssk = mptcp_nl_find_ssk(msk, &addr_l.addr, &addr_r); if (ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); - struct mptcp_pm_addr_entry entry = { .addr = addr_l }; spin_lock_bh(&msk->pm.lock); - mptcp_userspace_pm_delete_local_addr(msk, &entry); + mptcp_userspace_pm_delete_local_addr(msk, &addr_l); spin_unlock_bh(&msk->pm.lock); mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN); mptcp_close_ssk(sk, ssk, subflow); From 5409fd6fec680d59111708ba337b09c1d36db0a8 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 13 Dec 2024 20:52:58 +0100 Subject: [PATCH 0402/1386] mptcp: drop useless "err = 0" in subflow_destroy Upon successful return, mptcp_pm_parse_addr() returns 0. There is no need to set "err = 0" after this. So after mptcp_nl_find_ssk() returns, just need to set "err = -ESRCH", then release and free msk socket if it returns NULL. Also, no need to define the variable "subflow" in subflow_destroy(), use mptcp_subflow_ctx(ssk) directly. This patch doesn't change the behaviour of the code, just refactoring. Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241213-net-next-mptcp-pm-misc-cleanup-v1-7-ddb6d00109a8@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_userspace.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 1d5b77e0a722..740a10d669f8 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -538,19 +538,18 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info lock_sock(sk); ssk = mptcp_nl_find_ssk(msk, &addr_l.addr, &addr_r); - if (ssk) { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); - - spin_lock_bh(&msk->pm.lock); - mptcp_userspace_pm_delete_local_addr(msk, &addr_l); - spin_unlock_bh(&msk->pm.lock); - mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN); - mptcp_close_ssk(sk, ssk, subflow); - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW); - err = 0; - } else { + if (!ssk) { err = -ESRCH; + goto release_sock; } + + spin_lock_bh(&msk->pm.lock); + mptcp_userspace_pm_delete_local_addr(msk, &addr_l); + spin_unlock_bh(&msk->pm.lock); + mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN); + mptcp_close_ssk(sk, ssk, mptcp_subflow_ctx(ssk)); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW); +release_sock: release_sock(sk); destroy_err: From aeb3ec99026979287266e4b5a1194789c1488c1a Mon Sep 17 00:00:00 2001 From: Rongwei Liu Date: Fri, 13 Dec 2024 00:13:20 +0200 Subject: [PATCH 0403/1386] net/mlx5: Add device cap abs_native_port_num When the abs_native_port_num is set, the native_port_num reported by the device may not be continuous and bigger than the num_lag_ports. Signed-off-by: Rongwei Liu Reviewed-by: Shay Drory Reviewed-by: Saeed Mahameed Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241212221329.961628-2-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5451ff1d4356..43b3cb4bf8d1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1599,7 +1599,8 @@ enum { struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x6]; u8 page_request_disable[0x1]; - u8 reserved_at_7[0x9]; + u8 abs_native_port_num[0x1]; + u8 reserved_at_8[0x8]; u8 shared_object_to_user_object_allowed[0x1]; u8 reserved_at_13[0xe]; u8 vhca_resource_manager[0x1]; From 0471b1093e3a5d702ba2bf5987c35ee0e2336855 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 12 Dec 2024 16:36:04 +0100 Subject: [PATCH 0404/1386] tls: block decryption when a rekey is pending When a TLS handshake record carrying a KeyUpdate message is received, all subsequent records will be encrypted with a new key. We need to stop decrypting incoming records with the old key, and wait until userspace provides a new key. Make a note of this in the RX context just after decrypting that record, and stop recvmsg/splice calls with EKEYEXPIRED until the new key is available. key_update_pending can't be combined with the existing bitfield, because we will read it locklessly in ->poll. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/tls.h | 3 +++ net/tls/tls_sw.c | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/include/net/tls.h b/include/net/tls.h index 61fef2880114..857340338b69 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -59,6 +59,8 @@ struct tls_rec; #define TLS_CRYPTO_INFO_READY(info) ((info)->cipher_type) +#define TLS_HANDSHAKE_KEYUPDATE 24 /* rfc8446 B.3: Key update */ + #define TLS_AAD_SPACE_SIZE 13 #define TLS_MAX_IV_SIZE 16 @@ -130,6 +132,7 @@ struct tls_sw_context_rx { u8 async_capable:1; u8 zc_capable:1; u8 reader_contended:1; + bool key_update_pending; struct tls_strparser strp; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index bbf26cc4f6ee..3dcf8ee60fea 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1314,6 +1314,10 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, int ret = 0; long timeo; + /* a rekey is pending, let userspace deal with it */ + if (unlikely(ctx->key_update_pending)) + return -EKEYEXPIRED; + timeo = sock_rcvtimeo(sk, nonblock); while (!tls_strp_msg_ready(ctx)) { @@ -1720,6 +1724,34 @@ tls_decrypt_device(struct sock *sk, struct msghdr *msg, return 1; } +static int tls_check_pending_rekey(struct tls_context *ctx, struct sk_buff *skb) +{ + const struct strp_msg *rxm = strp_msg(skb); + const struct tls_msg *tlm = tls_msg(skb); + char hs_type; + int err; + + if (likely(tlm->control != TLS_RECORD_TYPE_HANDSHAKE)) + return 0; + + if (rxm->full_len < 1) + return 0; + + err = skb_copy_bits(skb, rxm->offset, &hs_type, 1); + if (err < 0) { + DEBUG_NET_WARN_ON_ONCE(1); + return err; + } + + if (hs_type == TLS_HANDSHAKE_KEYUPDATE) { + struct tls_sw_context_rx *rx_ctx = ctx->priv_ctx_rx; + + WRITE_ONCE(rx_ctx->key_update_pending, true); + } + + return 0; +} + static int tls_rx_one_record(struct sock *sk, struct msghdr *msg, struct tls_decrypt_arg *darg) { @@ -1739,7 +1771,7 @@ static int tls_rx_one_record(struct sock *sk, struct msghdr *msg, rxm->full_len -= prot->overhead_size; tls_advance_record_sn(sk, prot, &tls_ctx->rx); - return 0; + return tls_check_pending_rekey(tls_ctx, darg->skb); } int decrypt_skb(struct sock *sk, struct scatterlist *sgout) @@ -2719,6 +2751,7 @@ int tls_set_sw_offload(struct sock *sk, int tx) crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; aead = &sw_ctx_rx->aead_recv; + sw_ctx_rx->key_update_pending = false; } cipher_desc = get_cipher_desc(crypto_info->cipher_type); From 47069594e67e882ec5c1d8d374f6aab037511509 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 12 Dec 2024 16:36:05 +0100 Subject: [PATCH 0405/1386] tls: implement rekey for TLS1.3 This adds the possibility to change the key and IV when using TLS1.3. Changing the cipher or TLS version is not supported. Once we have updated the RX key, we can unblock the receive side. If the rekey fails, the context is unmodified and userspace is free to retry the update or close the socket. This change only affects tls_sw, since 1.3 offload isn't supported. Signed-off-by: Sabrina Dubroca Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/tls/tls.h | 3 +- net/tls/tls_device.c | 2 +- net/tls/tls_main.c | 46 ++++++++++++++----- net/tls/tls_sw.c | 105 +++++++++++++++++++++++++++++-------------- 4 files changed, 108 insertions(+), 48 deletions(-) diff --git a/net/tls/tls.h b/net/tls/tls.h index e5e47452308a..774859b63f0d 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -145,7 +145,8 @@ void tls_err_abort(struct sock *sk, int err); int init_prot_info(struct tls_prot_info *prot, const struct tls_crypto_info *crypto_info, const struct tls_cipher_desc *cipher_desc); -int tls_set_sw_offload(struct sock *sk, int tx); +int tls_set_sw_offload(struct sock *sk, int tx, + struct tls_crypto_info *new_crypto_info); void tls_update_rx_zc_capable(struct tls_context *tls_ctx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); void tls_sw_strparser_done(struct tls_context *tls_ctx); diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index dc063c2c7950..e50b6e71df13 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -1227,7 +1227,7 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) context->resync_nh_reset = 1; ctx->priv_ctx_rx = context; - rc = tls_set_sw_offload(sk, 0); + rc = tls_set_sw_offload(sk, 0, NULL); if (rc) goto release_ctx; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 6b4b9f2749a6..68b5735dafc1 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -423,9 +423,10 @@ static __poll_t tls_sk_poll(struct file *file, struct socket *sock, ctx = tls_sw_ctx_rx(tls_ctx); psock = sk_psock_get(sk); - if (skb_queue_empty_lockless(&ctx->rx_list) && - !tls_strp_msg_ready(ctx) && - sk_psock_queue_empty(psock)) + if ((skb_queue_empty_lockless(&ctx->rx_list) && + !tls_strp_msg_ready(ctx) && + sk_psock_queue_empty(psock)) || + READ_ONCE(ctx->key_update_pending)) mask &= ~(EPOLLIN | EPOLLRDNORM); if (psock) @@ -612,11 +613,13 @@ static int validate_crypto_info(const struct tls_crypto_info *crypto_info, static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, unsigned int optlen, int tx) { - struct tls_crypto_info *crypto_info; - struct tls_crypto_info *alt_crypto_info; + struct tls_crypto_info *crypto_info, *alt_crypto_info; + struct tls_crypto_info *old_crypto_info = NULL; struct tls_context *ctx = tls_get_ctx(sk); const struct tls_cipher_desc *cipher_desc; union tls_crypto_context *crypto_ctx; + union tls_crypto_context tmp = {}; + bool update = false; int rc = 0; int conf; @@ -633,9 +636,18 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, crypto_info = &crypto_ctx->info; - /* Currently we don't support set crypto info more than one time */ - if (TLS_CRYPTO_INFO_READY(crypto_info)) - return -EBUSY; + if (TLS_CRYPTO_INFO_READY(crypto_info)) { + /* Currently we only support setting crypto info more + * than one time for TLS 1.3 + */ + if (crypto_info->version != TLS_1_3_VERSION) + return -EBUSY; + + update = true; + old_crypto_info = crypto_info; + crypto_info = &tmp.info; + crypto_ctx = &tmp; + } rc = copy_from_sockptr(crypto_info, optval, sizeof(*crypto_info)); if (rc) { @@ -643,7 +655,14 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, goto err_crypto_info; } - rc = validate_crypto_info(crypto_info, alt_crypto_info); + if (update) { + /* Ensure that TLS version and ciphers are not modified */ + if (crypto_info->version != old_crypto_info->version || + crypto_info->cipher_type != old_crypto_info->cipher_type) + rc = -EINVAL; + } else { + rc = validate_crypto_info(crypto_info, alt_crypto_info); + } if (rc) goto err_crypto_info; @@ -673,7 +692,8 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXDEVICE); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXDEVICE); } else { - rc = tls_set_sw_offload(sk, 1); + rc = tls_set_sw_offload(sk, 1, + update ? crypto_info : NULL); if (rc) goto err_crypto_info; TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); @@ -687,14 +707,16 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXDEVICE); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXDEVICE); } else { - rc = tls_set_sw_offload(sk, 0); + rc = tls_set_sw_offload(sk, 0, + update ? crypto_info : NULL); if (rc) goto err_crypto_info; TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW); conf = TLS_SW; } - tls_sw_strparser_arm(sk, ctx); + if (!update) + tls_sw_strparser_arm(sk, ctx); } if (tx) diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 3dcf8ee60fea..9e5aff5bab98 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2716,12 +2716,22 @@ int init_prot_info(struct tls_prot_info *prot, return 0; } -int tls_set_sw_offload(struct sock *sk, int tx) +static void tls_finish_key_update(struct sock *sk, struct tls_context *tls_ctx) { + struct tls_sw_context_rx *ctx = tls_ctx->priv_ctx_rx; + + WRITE_ONCE(ctx->key_update_pending, false); + /* wake-up pre-existing poll() */ + ctx->saved_data_ready(sk); +} + +int tls_set_sw_offload(struct sock *sk, int tx, + struct tls_crypto_info *new_crypto_info) +{ + struct tls_crypto_info *crypto_info, *src_crypto_info; struct tls_sw_context_tx *sw_ctx_tx = NULL; struct tls_sw_context_rx *sw_ctx_rx = NULL; const struct tls_cipher_desc *cipher_desc; - struct tls_crypto_info *crypto_info; char *iv, *rec_seq, *key, *salt; struct cipher_context *cctx; struct tls_prot_info *prot; @@ -2733,45 +2743,47 @@ int tls_set_sw_offload(struct sock *sk, int tx) ctx = tls_get_ctx(sk); prot = &ctx->prot_info; - if (tx) { - ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); - if (!ctx->priv_ctx_tx) - return -ENOMEM; + /* new_crypto_info != NULL means rekey */ + if (!new_crypto_info) { + if (tx) { + ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); + if (!ctx->priv_ctx_tx) + return -ENOMEM; + } else { + ctx->priv_ctx_rx = init_ctx_rx(ctx); + if (!ctx->priv_ctx_rx) + return -ENOMEM; + } + } + if (tx) { sw_ctx_tx = ctx->priv_ctx_tx; crypto_info = &ctx->crypto_send.info; cctx = &ctx->tx; aead = &sw_ctx_tx->aead_send; } else { - ctx->priv_ctx_rx = init_ctx_rx(ctx); - if (!ctx->priv_ctx_rx) - return -ENOMEM; - sw_ctx_rx = ctx->priv_ctx_rx; crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; aead = &sw_ctx_rx->aead_recv; - sw_ctx_rx->key_update_pending = false; } - cipher_desc = get_cipher_desc(crypto_info->cipher_type); + src_crypto_info = new_crypto_info ?: crypto_info; + + cipher_desc = get_cipher_desc(src_crypto_info->cipher_type); if (!cipher_desc) { rc = -EINVAL; goto free_priv; } - rc = init_prot_info(prot, crypto_info, cipher_desc); + rc = init_prot_info(prot, src_crypto_info, cipher_desc); if (rc) goto free_priv; - iv = crypto_info_iv(crypto_info, cipher_desc); - key = crypto_info_key(crypto_info, cipher_desc); - salt = crypto_info_salt(crypto_info, cipher_desc); - rec_seq = crypto_info_rec_seq(crypto_info, cipher_desc); - - memcpy(cctx->iv, salt, cipher_desc->salt); - memcpy(cctx->iv + cipher_desc->salt, iv, cipher_desc->iv); - memcpy(cctx->rec_seq, rec_seq, cipher_desc->rec_seq); + iv = crypto_info_iv(src_crypto_info, cipher_desc); + key = crypto_info_key(src_crypto_info, cipher_desc); + salt = crypto_info_salt(src_crypto_info, cipher_desc); + rec_seq = crypto_info_rec_seq(src_crypto_info, cipher_desc); if (!*aead) { *aead = crypto_alloc_aead(cipher_desc->cipher_name, 0, 0); @@ -2784,20 +2796,30 @@ int tls_set_sw_offload(struct sock *sk, int tx) ctx->push_pending_record = tls_sw_push_pending_record; + /* setkey is the last operation that could fail during a + * rekey. if it succeeds, we can start modifying the + * context. + */ rc = crypto_aead_setkey(*aead, key, cipher_desc->key); - if (rc) - goto free_aead; + if (rc) { + if (new_crypto_info) + goto out; + else + goto free_aead; + } - rc = crypto_aead_setauthsize(*aead, prot->tag_size); - if (rc) - goto free_aead; + if (!new_crypto_info) { + rc = crypto_aead_setauthsize(*aead, prot->tag_size); + if (rc) + goto free_aead; + } - if (sw_ctx_rx) { + if (!tx && !new_crypto_info) { tfm = crypto_aead_tfm(sw_ctx_rx->aead_recv); tls_update_rx_zc_capable(ctx); sw_ctx_rx->async_capable = - crypto_info->version != TLS_1_3_VERSION && + src_crypto_info->version != TLS_1_3_VERSION && !!(tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC); rc = tls_strp_init(&sw_ctx_rx->strp, sk); @@ -2805,18 +2827,33 @@ int tls_set_sw_offload(struct sock *sk, int tx) goto free_aead; } + memcpy(cctx->iv, salt, cipher_desc->salt); + memcpy(cctx->iv + cipher_desc->salt, iv, cipher_desc->iv); + memcpy(cctx->rec_seq, rec_seq, cipher_desc->rec_seq); + + if (new_crypto_info) { + unsafe_memcpy(crypto_info, new_crypto_info, + cipher_desc->crypto_info, + /* size was checked in do_tls_setsockopt_conf */); + memzero_explicit(new_crypto_info, cipher_desc->crypto_info); + if (!tx) + tls_finish_key_update(sk, ctx); + } + goto out; free_aead: crypto_free_aead(*aead); *aead = NULL; free_priv: - if (tx) { - kfree(ctx->priv_ctx_tx); - ctx->priv_ctx_tx = NULL; - } else { - kfree(ctx->priv_ctx_rx); - ctx->priv_ctx_rx = NULL; + if (!new_crypto_info) { + if (tx) { + kfree(ctx->priv_ctx_tx); + ctx->priv_ctx_tx = NULL; + } else { + kfree(ctx->priv_ctx_rx); + ctx->priv_ctx_rx = NULL; + } } out: return rc; From 510128b30f2db1600172e9aaec44f66db3c16e15 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 12 Dec 2024 16:36:06 +0100 Subject: [PATCH 0406/1386] tls: add counters for rekey This introduces 5 counters to keep track of key updates: Tls{Rx,Tx}Rekey{Ok,Error} and TlsRxRekeyReceived. Suggested-by: Jakub Kicinski Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 5 +++++ net/tls/tls_main.c | 27 ++++++++++++++++++++++----- net/tls/tls_proc.c | 5 +++++ net/tls/tls_sw.c | 6 ++++-- 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index adf5fd78dd50..51da2e00112d 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -358,6 +358,11 @@ enum LINUX_MIB_TLSRXDEVICERESYNC, /* TlsRxDeviceResync */ LINUX_MIB_TLSDECRYPTRETRY, /* TlsDecryptRetry */ LINUX_MIB_TLSRXNOPADVIOL, /* TlsRxNoPadViolation */ + LINUX_MIB_TLSRXREKEYOK, /* TlsRxRekeyOk */ + LINUX_MIB_TLSRXREKEYERROR, /* TlsRxRekeyError */ + LINUX_MIB_TLSTXREKEYOK, /* TlsTxRekeyOk */ + LINUX_MIB_TLSTXREKEYERROR, /* TlsTxRekeyError */ + LINUX_MIB_TLSRXREKEYRECEIVED, /* TlsRxRekeyReceived */ __LINUX_MIB_TLSMAX }; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 68b5735dafc1..9ee5a83c5b40 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -640,8 +640,11 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, /* Currently we only support setting crypto info more * than one time for TLS 1.3 */ - if (crypto_info->version != TLS_1_3_VERSION) + if (crypto_info->version != TLS_1_3_VERSION) { + TLS_INC_STATS(sock_net(sk), tx ? LINUX_MIB_TLSTXREKEYERROR + : LINUX_MIB_TLSRXREKEYERROR); return -EBUSY; + } update = true; old_crypto_info = crypto_info; @@ -696,8 +699,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, update ? crypto_info : NULL); if (rc) goto err_crypto_info; - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); + + if (update) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXREKEYOK); + } else { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); + } conf = TLS_SW; } } else { @@ -711,8 +719,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, update ? crypto_info : NULL); if (rc) goto err_crypto_info; - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW); + + if (update) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXREKEYOK); + } else { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW); + } conf = TLS_SW; } if (!update) @@ -735,6 +748,10 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, return 0; err_crypto_info: + if (update) { + TLS_INC_STATS(sock_net(sk), tx ? LINUX_MIB_TLSTXREKEYERROR + : LINUX_MIB_TLSRXREKEYERROR); + } memzero_explicit(crypto_ctx, sizeof(*crypto_ctx)); return rc; } diff --git a/net/tls/tls_proc.c b/net/tls/tls_proc.c index 68982728f620..367666aa07b8 100644 --- a/net/tls/tls_proc.c +++ b/net/tls/tls_proc.c @@ -22,6 +22,11 @@ static const struct snmp_mib tls_mib_list[] = { SNMP_MIB_ITEM("TlsRxDeviceResync", LINUX_MIB_TLSRXDEVICERESYNC), SNMP_MIB_ITEM("TlsDecryptRetry", LINUX_MIB_TLSDECRYPTRETRY), SNMP_MIB_ITEM("TlsRxNoPadViolation", LINUX_MIB_TLSRXNOPADVIOL), + SNMP_MIB_ITEM("TlsRxRekeyOk", LINUX_MIB_TLSRXREKEYOK), + SNMP_MIB_ITEM("TlsRxRekeyError", LINUX_MIB_TLSRXREKEYERROR), + SNMP_MIB_ITEM("TlsTxRekeyOk", LINUX_MIB_TLSTXREKEYOK), + SNMP_MIB_ITEM("TlsTxRekeyError", LINUX_MIB_TLSTXREKEYERROR), + SNMP_MIB_ITEM("TlsRxRekeyReceived", LINUX_MIB_TLSRXREKEYRECEIVED), SNMP_MIB_SENTINEL }; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9e5aff5bab98..47550d485819 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1724,7 +1724,8 @@ tls_decrypt_device(struct sock *sk, struct msghdr *msg, return 1; } -static int tls_check_pending_rekey(struct tls_context *ctx, struct sk_buff *skb) +static int tls_check_pending_rekey(struct sock *sk, struct tls_context *ctx, + struct sk_buff *skb) { const struct strp_msg *rxm = strp_msg(skb); const struct tls_msg *tlm = tls_msg(skb); @@ -1747,6 +1748,7 @@ static int tls_check_pending_rekey(struct tls_context *ctx, struct sk_buff *skb) struct tls_sw_context_rx *rx_ctx = ctx->priv_ctx_rx; WRITE_ONCE(rx_ctx->key_update_pending, true); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXREKEYRECEIVED); } return 0; @@ -1771,7 +1773,7 @@ static int tls_rx_one_record(struct sock *sk, struct msghdr *msg, rxm->full_len -= prot->overhead_size; tls_advance_record_sn(sk, prot, &tls_ctx->rx); - return tls_check_pending_rekey(tls_ctx, darg->skb); + return tls_check_pending_rekey(sk, tls_ctx, darg->skb); } int decrypt_skb(struct sock *sk, struct scatterlist *sgout) From 5aa97a43d042fffa8bd0f0bc2723f3574310686e Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 12 Dec 2024 16:36:07 +0100 Subject: [PATCH 0407/1386] docs: tls: document TLS1.3 key updates Document the kernel's behavior and userspace expectations. Suggested-by: Jakub Kicinski Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- Documentation/networking/tls.rst | 36 ++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/Documentation/networking/tls.rst b/Documentation/networking/tls.rst index 658ed3a71e1b..c7904a1bc167 100644 --- a/Documentation/networking/tls.rst +++ b/Documentation/networking/tls.rst @@ -200,6 +200,32 @@ received without a cmsg buffer set. recv will never return data from mixed types of TLS records. +TLS 1.3 Key Updates +------------------- + +In TLS 1.3, KeyUpdate handshake messages signal that the sender is +updating its TX key. Any message sent after a KeyUpdate will be +encrypted using the new key. The userspace library can pass the new +key to the kernel using the TLS_TX and TLS_RX socket options, as for +the initial keys. TLS version and cipher cannot be changed. + +To prevent attempting to decrypt incoming records using the wrong key, +decryption will be paused when a KeyUpdate message is received by the +kernel, until the new key has been provided using the TLS_RX socket +option. Any read occurring after the KeyUpdate has been read and +before the new key is provided will fail with EKEYEXPIRED. poll() will +not report any read events from the socket until the new key is +provided. There is no pausing on the transmit side. + +Userspace should make sure that the crypto_info provided has been set +properly. In particular, the kernel will not check for key/nonce +reuse. + +The number of successful and failed key updates is tracked in the +``TlsTxRekeyOk``, ``TlsRxRekeyOk``, ``TlsTxRekeyError``, +``TlsRxRekeyError`` statistics. The ``TlsRxRekeyReceived`` statistic +counts KeyUpdate handshake messages that have been received. + Integrating in to userspace TLS library --------------------------------------- @@ -286,3 +312,13 @@ TLS implementation exposes the following per-namespace statistics - ``TlsRxNoPadViolation`` - number of data RX records which had to be re-decrypted due to ``TLS_RX_EXPECT_NO_PAD`` mis-prediction. + +- ``TlsTxRekeyOk``, ``TlsRxRekeyOk`` - + number of successful rekeys on existing sessions for TX and RX + +- ``TlsTxRekeyError``, ``TlsRxRekeyError`` - + number of failed rekeys on existing sessions for TX and RX + +- ``TlsRxRekeyReceived`` - + number of received KeyUpdate handshake messages, requiring userspace + to provide a new RX key From b2e584aa3c710802600b690f34a56fb526aebf2f Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 12 Dec 2024 16:36:08 +0100 Subject: [PATCH 0408/1386] selftests: tls: add key_generation argument to tls_crypto_info_init This allows us to generate different keys, so that we can test that rekey is using the correct one. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- tools/testing/selftests/net/tls.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 1a706d03bb6b..b1f52d2bb096 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -44,9 +44,11 @@ struct tls_crypto_info_keys { }; static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, - struct tls_crypto_info_keys *tls12) + struct tls_crypto_info_keys *tls12, + char key_generation) { - memset(tls12, 0, sizeof(*tls12)); + memset(tls12, key_generation, sizeof(*tls12)); + memset(tls12, 0, sizeof(struct tls_crypto_info)); switch (cipher_type) { case TLS_CIPHER_CHACHA20_POLY1305: @@ -275,7 +277,7 @@ TEST_F(tls_basic, recseq_wrap) if (self->notls) SKIP(return, "no TLS support"); - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0); memset(&tls12.aes128.rec_seq, 0xff, sizeof(tls12.aes128.rec_seq)); ASSERT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); @@ -391,7 +393,7 @@ FIXTURE_SETUP(tls) SKIP(return, "Unsupported cipher in FIPS mode"); tls_crypto_info_init(variant->tls_version, variant->cipher_type, - &tls12); + &tls12, 0); ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); @@ -1175,7 +1177,7 @@ TEST_F(tls, bidir) struct tls_crypto_info_keys tls12; tls_crypto_info_init(variant->tls_version, variant->cipher_type, - &tls12); + &tls12, 0); ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12, tls12.len); @@ -1614,7 +1616,7 @@ TEST_F(tls, getsockopt) EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type); /* get the full crypto_info */ - tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect, 0); len = expect.len; memrnd(&get, sizeof(get)); EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0); @@ -1696,7 +1698,7 @@ FIXTURE_SETUP(tls_err) int ret; tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128, - &tls12); + &tls12, 0); ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls); @@ -2118,7 +2120,7 @@ TEST(tls_v6ops) { int sfd, ret, fd; socklen_t len, len2; - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0); addr.sin6_family = AF_INET6; addr.sin6_addr = in6addr_any; @@ -2177,7 +2179,7 @@ TEST(prequeue) { len = sizeof(addr); memrnd(buf, sizeof(buf)); - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12, 0); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); From 555f0edb9ff043196655a5b7cc65f67dfd05b530 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 12 Dec 2024 16:36:09 +0100 Subject: [PATCH 0409/1386] selftests: tls: add rekey tests Test the kernel's ability to: - update the key (but not the version or cipher), only for TLS1.3 - pause decryption after receiving a KeyUpdate message, until a new RX key has been provided - reflect the pause/non-readable socket in poll() Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- tools/testing/selftests/net/tls.c | 458 ++++++++++++++++++++++++++++++ 1 file changed, 458 insertions(+) diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index b1f52d2bb096..9a85f93c33d8 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1670,6 +1670,464 @@ TEST_F(tls, recv_efault) EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0); } +#define TLS_RECORD_TYPE_HANDSHAKE 0x16 +/* key_update, length 1, update_not_requested */ +static const char key_update_msg[] = "\x18\x00\x00\x01\x00"; +static void tls_send_keyupdate(struct __test_metadata *_metadata, int fd) +{ + size_t len = sizeof(key_update_msg); + + EXPECT_EQ(tls_send_cmsg(fd, TLS_RECORD_TYPE_HANDSHAKE, + (char *)key_update_msg, len, 0), + len); +} + +static void tls_recv_keyupdate(struct __test_metadata *_metadata, int fd, int flags) +{ + char buf[100]; + + EXPECT_EQ(tls_recv_cmsg(_metadata, fd, TLS_RECORD_TYPE_HANDSHAKE, buf, sizeof(buf), flags), + sizeof(key_update_msg)); + EXPECT_EQ(memcmp(buf, key_update_msg, sizeof(key_update_msg)), 0); +} + +/* set the key to 0 then 1 for RX, immediately to 1 for TX */ +TEST_F(tls_basic, rekey_rx) +{ + struct tls_crypto_info_keys tls12_0, tls12_1; + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_0, 0); + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_1, 1); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_0, tls12_0.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len); + EXPECT_EQ(ret, 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); +} + +/* set the key to 0 then 1 for TX, immediately to 1 for RX */ +TEST_F(tls_basic, rekey_tx) +{ + struct tls_crypto_info_keys tls12_0, tls12_1; + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_0, 0); + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_1, 1); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_0, tls12_0.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len); + EXPECT_EQ(ret, 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); +} + +TEST_F(tls, rekey) +{ + char const *test_str_1 = "test_message_before_rekey"; + char const *test_str_2 = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* initial send/recv */ + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* send after rekey */ + send_len = strlen(test_str_2) + 1; + EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* recv blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* recv non-blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + /* recv after rekey */ + EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0); +} + +TEST_F(tls, rekey_fail) +{ + char const *test_str_1 = "test_message_before_rekey"; + char const *test_str_2 = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + /* initial send/recv */ + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + + if (variant->tls_version != TLS_1_3_VERSION) { + /* just check that rekey is not supported and return */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EBUSY); + return; + } + + /* successful update */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* invalid update: change of version */ + tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* invalid update (RX socket): change of version */ + tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* invalid update: change of cipher */ + if (variant->cipher_type == TLS_CIPHER_AES_GCM_256) + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_CHACHA20_POLY1305, &tls12, 1); + else + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_256, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* send after rekey, the invalid updates shouldn't have an effect */ + send_len = strlen(test_str_2) + 1; + EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* recv blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* recv non-blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + /* recv after rekey */ + EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0); +} + +TEST_F(tls, rekey_peek) +{ + char const *test_str_1 = "test_message_before_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), -1); + + /* peek KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); +} + +TEST_F(tls, splice_rekey) +{ + int send_len = TLS_PAYLOAD_MAX_LEN / 2; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + struct tls_crypto_info_keys tls12; + int p[2]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + memrnd(mem_send, sizeof(mem_send)); + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); + + /* can't splice the KeyUpdate */ + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1); + EXPECT_EQ(errno, EINVAL); + + /* peek KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* can't splice before updating the key */ + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); +} + +TEST_F(tls, rekey_peek_splice) +{ + char const *test_str_1 = "test_message_before_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + int p[2]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + ASSERT_GE(pipe(p), 0); + + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_recv, test_str_1, send_len), 0); +} + +TEST_F(tls, rekey_getsockopt) +{ + struct tls_crypto_info_keys tls12; + struct tls_crypto_info_keys tls12_get; + socklen_t len; + + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + tls_recv_keyupdate(_metadata, self->cfd, 0); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); +} + +TEST_F(tls, rekey_poll_pending) +{ + char const *test_str = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + struct pollfd pfd = { }; + int send_len; + int ret; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* send immediately after rekey */ + send_len = strlen(test_str) + 1; + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + /* key hasn't been updated, expect cfd to be non-readable */ + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + int pid2, status; + + /* wait before installing the new key */ + sleep(1); + + /* update RX key while poll() is sleeping */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + pid2 = wait(&status); + EXPECT_EQ(pid2, ret); + EXPECT_EQ(status, 0); + } else { + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5000), 1); + + exit(!__test_passed(_metadata)); + } +} + +TEST_F(tls, rekey_poll_delay) +{ + char const *test_str = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + struct pollfd pfd = { }; + int send_len; + int ret; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + int pid2, status; + + /* wait before installing the new key */ + sleep(1); + + /* update RX key while poll() is sleeping */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + sleep(1); + send_len = strlen(test_str) + 1; + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + pid2 = wait(&status); + EXPECT_EQ(pid2, ret); + EXPECT_EQ(status, 0); + } else { + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5000), 1); + exit(!__test_passed(_metadata)); + } +} + FIXTURE(tls_err) { int fd, cfd; From 5e51e50e2324c9374d06ab05e3d7d09123e1114f Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Thu, 12 Dec 2024 18:06:41 +0100 Subject: [PATCH 0410/1386] net: Make dev_get_hwtstamp_phylib accessible Make the dev_get_hwtstamp_phylib function accessible in prevision to use it from ethtool to read the hwtstamp current configuration. Reviewed-by: Florian Fainelli Reviewed-by: Jacob Keller Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- net/core/dev.h | 2 ++ net/core/dev_ioctl.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/dev.h b/net/core/dev.h index d043dee25a68..357543cbde65 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -310,5 +310,7 @@ static inline void dev_xmit_recursion_dec(void) int dev_set_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg, struct netlink_ext_ack *extack); +int dev_get_hwtstamp_phylib(struct net_device *dev, + struct kernel_hwtstamp_config *cfg); #endif diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 46d43b950471..67cf68817f23 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -266,8 +266,8 @@ static int dev_eth_ioctl(struct net_device *dev, * -EOPNOTSUPP for phylib for now, which is still more accurate than letting * the netdev handle the GET request. */ -static int dev_get_hwtstamp_phylib(struct net_device *dev, - struct kernel_hwtstamp_config *cfg) +int dev_get_hwtstamp_phylib(struct net_device *dev, + struct kernel_hwtstamp_config *cfg) { if (phy_is_default_hwtstamp(dev->phydev)) return phy_hwtstamp_get(dev->phydev, cfg); From b18fe47c0c093cc429f7e4d7694fdf0fc362aaf5 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Thu, 12 Dec 2024 18:06:42 +0100 Subject: [PATCH 0411/1386] net: Make net_hwtstamp_validate accessible Make the net_hwtstamp_validate function accessible in prevision to use it from ethtool to validate the hwtstamp configuration before setting it. Reviewed-by: Florian Fainelli Reviewed-by: Jacob Keller Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- net/core/dev.h | 1 + net/core/dev_ioctl.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/net/core/dev.h b/net/core/dev.h index 357543cbde65..aa91eed55a40 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -312,5 +312,6 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, struct netlink_ext_ack *extack); int dev_get_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg); +int net_hwtstamp_validate(const struct kernel_hwtstamp_config *cfg); #endif diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 67cf68817f23..1f09930fca26 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -184,7 +184,7 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm return err; } -static int net_hwtstamp_validate(const struct kernel_hwtstamp_config *cfg) +int net_hwtstamp_validate(const struct kernel_hwtstamp_config *cfg) { enum hwtstamp_tx_types tx_type; enum hwtstamp_rx_filters rx_filter; From 35f7cad1743e04bf2944a2aadb6b6a42adc57bca Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Thu, 12 Dec 2024 18:06:43 +0100 Subject: [PATCH 0412/1386] net: Add the possibility to support a selected hwtstamp in netdevice Introduce the description of a hwtstamp provider, mainly defined with a the hwtstamp source and the phydev pointer. Add a hwtstamp provider description within the netdev structure to allow saving the hwtstamp we want to use. This prepares for future support of an ethtool netlink command to select the desired hwtstamp provider. By default, the old API that does not support hwtstamp selectability is used, meaning the hwtstamp provider pointer is unset. Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 10 +++++++ include/linux/net_tstamp.h | 29 ++++++++++++++++++ include/linux/netdevice.h | 4 +++ include/uapi/linux/net_tstamp.h | 11 +++++++ net/core/dev_ioctl.c | 41 ++++++++++++++++++++++++-- net/core/timestamping.c | 52 +++++++++++++++++++++++++++++---- 6 files changed, 140 insertions(+), 7 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index b26bb33cd1d4..1a908af4175b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1998,6 +1999,15 @@ void phy_detach(struct phy_device *phydev) phy_suspend(phydev); if (dev) { + struct hwtstamp_provider *hwprov; + + hwprov = rtnl_dereference(dev->hwprov); + /* Disable timestamp if it is the one selected */ + if (hwprov && hwprov->phydev == phydev) { + rcu_assign_pointer(dev->hwprov, NULL); + kfree_rcu(hwprov, rcu_head); + } + phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; phy_link_topo_del_phy(dev, phydev); diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h index 662074b08c94..ff0758e88ea1 100644 --- a/include/linux/net_tstamp.h +++ b/include/linux/net_tstamp.h @@ -19,6 +19,33 @@ enum hwtstamp_source { HWTSTAMP_SOURCE_PHYLIB, }; +/** + * struct hwtstamp_provider_desc - hwtstamp provider description + * + * @index: index of the hwtstamp provider. + * @qualifier: hwtstamp provider qualifier. + */ +struct hwtstamp_provider_desc { + int index; + enum hwtstamp_provider_qualifier qualifier; +}; + +/** + * struct hwtstamp_provider - hwtstamp provider object + * + * @rcu_head: RCU callback used to free the struct. + * @source: source of the hwtstamp provider. + * @phydev: pointer of the phydev source in case a PTP coming from phylib + * @desc: hwtstamp provider description. + */ + +struct hwtstamp_provider { + struct rcu_head rcu_head; + enum hwtstamp_source source; + struct phy_device *phydev; + struct hwtstamp_provider_desc desc; +}; + /** * struct kernel_hwtstamp_config - Kernel copy of struct hwtstamp_config * @@ -31,6 +58,7 @@ enum hwtstamp_source { * copied the ioctl request back to user space * @source: indication whether timestamps should come from the netdev or from * an attached phylib PHY + * @qualifier: qualifier of the hwtstamp provider * * Prefer using this structure for in-kernel processing of hardware * timestamping configuration, over the inextensible struct hwtstamp_config @@ -43,6 +71,7 @@ struct kernel_hwtstamp_config { struct ifreq *ifr; bool copied_to_user; enum hwtstamp_source source; + enum hwtstamp_provider_qualifier qualifier; }; static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kernel_cfg, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d917949bba03..2593019ad5b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -82,6 +82,7 @@ struct xdp_metadata_ops; struct xdp_md; struct ethtool_netdev_state; struct phy_link_topology; +struct hwtstamp_provider; typedef u32 xdp_features_t; @@ -2045,6 +2046,7 @@ enum netdev_reg_state { * * @neighbours: List heads pointing to this device's neighbours' * dev_list, one per address-family. + * @hwprov: Tracks which PTP performs hardware packet time stamping. * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2457,6 +2459,8 @@ struct net_device { struct hlist_head neighbours[NEIGH_NR_TABLES]; + struct hwtstamp_provider __rcu *hwprov; + u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 858339d1c1c4..55b0ab51096c 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -13,6 +13,17 @@ #include #include /* for SO_TIMESTAMPING */ +/* + * Possible type of hwtstamp provider. Mainly "precise" the default one + * is for IEEE 1588 quality and "approx" is for NICs DMA point. + */ +enum hwtstamp_provider_qualifier { + HWTSTAMP_PROVIDER_QUALIFIER_PRECISE, + HWTSTAMP_PROVIDER_QUALIFIER_APPROX, + + HWTSTAMP_PROVIDER_QUALIFIER_CNT, +}; + /* SO_TIMESTAMPING flags */ enum { SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 1f09930fca26..087a57b7e4fa 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -269,6 +270,21 @@ static int dev_eth_ioctl(struct net_device *dev, int dev_get_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg) { + struct hwtstamp_provider *hwprov; + + hwprov = rtnl_dereference(dev->hwprov); + if (hwprov) { + cfg->qualifier = hwprov->desc.qualifier; + if (hwprov->source == HWTSTAMP_SOURCE_PHYLIB && + hwprov->phydev) + return phy_hwtstamp_get(hwprov->phydev, cfg); + + if (hwprov->source == HWTSTAMP_SOURCE_NETDEV) + return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg); + + return -EOPNOTSUPP; + } + if (phy_is_default_hwtstamp(dev->phydev)) return phy_hwtstamp_get(dev->phydev, cfg); @@ -324,11 +340,32 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; - bool phy_ts = phy_is_default_hwtstamp(dev->phydev); struct kernel_hwtstamp_config old_cfg = {}; + struct hwtstamp_provider *hwprov; + struct phy_device *phydev; bool changed = false; + bool phy_ts; int err; + hwprov = rtnl_dereference(dev->hwprov); + if (hwprov) { + if (hwprov->source == HWTSTAMP_SOURCE_PHYLIB && + hwprov->phydev) { + phy_ts = true; + phydev = hwprov->phydev; + } else if (hwprov->source == HWTSTAMP_SOURCE_NETDEV) { + phy_ts = false; + } else { + return -EOPNOTSUPP; + } + + cfg->qualifier = hwprov->desc.qualifier; + } else { + phy_ts = phy_is_default_hwtstamp(dev->phydev); + if (phy_ts) + phydev = dev->phydev; + } + cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV; if (phy_ts && dev->see_all_hwtstamp_requests) { @@ -350,7 +387,7 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, changed = kernel_hwtstamp_config_changed(&old_cfg, cfg); if (phy_ts) { - err = phy_hwtstamp_set(dev->phydev, cfg, extack); + err = phy_hwtstamp_set(phydev, cfg, extack); if (err) { if (changed) ops->ndo_hwtstamp_set(dev, &old_cfg, NULL); diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 3717fb152ecc..a50a7ef49ae8 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -9,6 +9,7 @@ #include #include #include +#include static unsigned int classify(const struct sk_buff *skb) { @@ -21,19 +22,39 @@ static unsigned int classify(const struct sk_buff *skb) void skb_clone_tx_timestamp(struct sk_buff *skb) { + struct hwtstamp_provider *hwprov; struct mii_timestamper *mii_ts; + struct phy_device *phydev; struct sk_buff *clone; unsigned int type; - if (!skb->sk || !skb->dev || - !phy_is_default_hwtstamp(skb->dev->phydev)) + if (!skb->sk || !skb->dev) return; + rcu_read_lock(); + hwprov = rcu_dereference(skb->dev->hwprov); + if (hwprov) { + if (hwprov->source != HWTSTAMP_SOURCE_PHYLIB || + !hwprov->phydev) { + rcu_read_unlock(); + return; + } + + phydev = hwprov->phydev; + } else { + phydev = skb->dev->phydev; + if (!phy_is_default_hwtstamp(phydev)) { + rcu_read_unlock(); + return; + } + } + rcu_read_unlock(); + type = classify(skb); if (type == PTP_CLASS_NONE) return; - mii_ts = skb->dev->phydev->mii_ts; + mii_ts = phydev->mii_ts; if (likely(mii_ts->txtstamp)) { clone = skb_clone_sk(skb); if (!clone) @@ -45,12 +66,33 @@ EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp); bool skb_defer_rx_timestamp(struct sk_buff *skb) { + struct hwtstamp_provider *hwprov; struct mii_timestamper *mii_ts; + struct phy_device *phydev; unsigned int type; - if (!skb->dev || !phy_is_default_hwtstamp(skb->dev->phydev)) + if (!skb->dev) return false; + rcu_read_lock(); + hwprov = rcu_dereference(skb->dev->hwprov); + if (hwprov) { + if (hwprov->source != HWTSTAMP_SOURCE_PHYLIB || + !hwprov->phydev) { + rcu_read_unlock(); + return false; + } + + phydev = hwprov->phydev; + } else { + phydev = skb->dev->phydev; + if (!phy_is_default_hwtstamp(phydev)) { + rcu_read_unlock(); + return false; + } + } + rcu_read_unlock(); + if (skb_headroom(skb) < ETH_HLEN) return false; @@ -63,7 +105,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) if (type == PTP_CLASS_NONE) return false; - mii_ts = skb->dev->phydev->mii_ts; + mii_ts = phydev->mii_ts; if (likely(mii_ts->rxtstamp)) return mii_ts->rxtstamp(mii_ts, skb, type); From b9e3f7dc9ed95daeb83cfa45b821cacaa01aa906 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Thu, 12 Dec 2024 18:06:44 +0100 Subject: [PATCH 0413/1386] net: ethtool: tsinfo: Enhance tsinfo to support several hwtstamp by net topology Either the MAC or the PHY can provide hwtstamp, so we should be able to read the tsinfo for any hwtstamp provider. Enhance 'get' command to retrieve tsinfo of hwtstamp providers within a network topology. Add support for a specific dump command to retrieve all hwtstamp providers within the network topology, with added functionality for filtered dump to target a single interface. Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- Documentation/netlink/specs/ethtool.yaml | 20 + Documentation/networking/ethtool-netlink.rst | 7 +- include/linux/ethtool.h | 4 + .../uapi/linux/ethtool_netlink_generated.h | 10 + net/ethtool/common.c | 141 ++++++- net/ethtool/common.h | 13 + net/ethtool/netlink.c | 6 +- net/ethtool/netlink.h | 5 +- net/ethtool/ts.h | 20 + net/ethtool/tsinfo.c | 358 +++++++++++++++++- 10 files changed, 563 insertions(+), 21 deletions(-) create mode 100644 net/ethtool/ts.h diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index c7634e957d9c..4082816e5f3d 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -836,6 +836,20 @@ attribute-sets: - name: tx-err type: uint + - + name: ts-hwtstamp-provider + attr-cnt-name: __ethtool-a-ts-hwtstamp-provider-cnt + attributes: + - + name: unspec + type: unused + value: 0 + - + name: index + type: u32 + - + name: qualifier + type: u32 - name: tsinfo attr-cnt-name: __ethtool-a-tsinfo-cnt @@ -867,6 +881,10 @@ attribute-sets: name: stats type: nest nested-attributes: ts-stat + - + name: hwtstamp-provider + type: nest + nested-attributes: ts-hwtstamp-provider - name: cable-result attr-cnt-name: __ethtool-a-cable-result-cnt @@ -1912,6 +1930,7 @@ operations: request: attributes: - header + - hwtstamp-provider reply: attributes: - header @@ -1920,6 +1939,7 @@ operations: - rx-filters - phc-index - stats + - hwtstamp-provider dump: *tsinfo-get-op - name: cable-test-act diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index b25926071ece..c585e2f0ddfa 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1245,9 +1245,10 @@ Gets timestamping information like ``ETHTOOL_GET_TS_INFO`` ioctl request. Request contents: - ===================================== ====== ========================== - ``ETHTOOL_A_TSINFO_HEADER`` nested request header - ===================================== ====== ========================== + ======================================== ====== ============================ + ``ETHTOOL_A_TSINFO_HEADER`` nested request header + ``ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER`` nested PTP hw clock provider + ======================================== ====== ============================ Kernel response contents: diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e217c6321ed0..f711bfd75c4d 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -711,6 +711,7 @@ struct ethtool_rxfh_param { * @cmd: command number = %ETHTOOL_GET_TS_INFO * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags * @phc_index: device index of the associated PHC, or -1 if there is none + * @phc_qualifier: qualifier of the associated PHC * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values */ @@ -718,6 +719,7 @@ struct kernel_ethtool_ts_info { u32 cmd; u32 so_timestamping; int phc_index; + enum hwtstamp_provider_qualifier phc_qualifier; enum hwtstamp_tx_types tx_types; enum hwtstamp_rx_filters rx_filters; }; @@ -749,6 +751,7 @@ struct kernel_ethtool_ts_info { * @rss_context argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. + * @supported_hwtstamp_qualifiers: bitfield of supported hwtstamp qualifier. * @get_drvinfo: Report driver/device information. Modern drivers no * longer have to implement this callback. Most fields are * correctly filled in by the core using system information, or @@ -966,6 +969,7 @@ struct ethtool_ops { u32 rxfh_max_num_contexts; u32 supported_coalesce_params; u32 supported_ring_params; + u32 supported_hwtstamp_qualifiers; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); int (*get_regs_len)(struct net_device *); void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index b58f352fe4f2..df289dde0f61 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -385,6 +385,15 @@ enum { ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) }; +enum { + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_UNSPEC, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER, + + __ETHTOOL_A_TS_HWTSTAMP_PROVIDER_CNT, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_MAX = (__ETHTOOL_A_TS_HWTSTAMP_PROVIDER_CNT - 1) +}; + enum { ETHTOOL_A_TSINFO_UNSPEC, ETHTOOL_A_TSINFO_HEADER, @@ -393,6 +402,7 @@ enum { ETHTOOL_A_TSINFO_RX_FILTERS, ETHTOOL_A_TSINFO_PHC_INDEX, ETHTOOL_A_TSINFO_STATS, + ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER, __ETHTOOL_A_TSINFO_CNT, ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 05ce4f8080b3..666db40bcfda 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -5,9 +5,12 @@ #include #include #include +#include #include "netlink.h" #include "common.h" +#include "../core/dev.h" + const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_SG_BIT] = "tx-scatter-gather", @@ -763,20 +766,91 @@ int ethtool_check_ops(const struct ethtool_ops *ops) return 0; } -int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) +static void ethtool_init_tsinfo(struct kernel_ethtool_ts_info *info) { - const struct ethtool_ops *ops = dev->ethtool_ops; - struct phy_device *phydev = dev->phydev; - int err = 0; - memset(info, 0, sizeof(*info)); info->cmd = ETHTOOL_GET_TS_INFO; info->phc_index = -1; +} - if (phy_is_default_hwtstamp(phydev) && phy_has_tsinfo(phydev)) - err = phy_ts_info(phydev, info); - else if (ops->get_ts_info) - err = ops->get_ts_info(dev, info); +int ethtool_net_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + int err; + + if (!ops->get_ts_info) + return -ENODEV; + + /* Does ptp comes from netdev */ + ethtool_init_tsinfo(info); + info->phc_qualifier = hwprov_desc->qualifier; + err = ops->get_ts_info(dev, info); + if (err) + return err; + + if (info->phc_index == hwprov_desc->index && + net_support_hwtstamp_qualifier(dev, hwprov_desc->qualifier)) + return 0; + + return -ENODEV; +} + +int +ethtool_phy_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc) +{ + int err; + + /* Only precise qualifier is supported in phydev */ + if (hwprov_desc->qualifier != HWTSTAMP_PROVIDER_QUALIFIER_PRECISE) + return -ENODEV; + + /* Look in the phy topology */ + if (dev->link_topo) { + struct phy_device_node *pdn; + unsigned long phy_index; + + xa_for_each(&dev->link_topo->phys, phy_index, pdn) { + if (!phy_has_tsinfo(pdn->phy)) + continue; + + ethtool_init_tsinfo(info); + err = phy_ts_info(pdn->phy, info); + if (err) + return err; + + if (info->phc_index == hwprov_desc->index) + return 0; + } + return -ENODEV; + } + + /* Look on the dev->phydev */ + if (phy_has_tsinfo(dev->phydev)) { + ethtool_init_tsinfo(info); + err = phy_ts_info(dev->phydev, info); + if (err) + return err; + + if (info->phc_index == hwprov_desc->index) + return 0; + } + + return -ENODEV; +} + +int ethtool_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc) +{ + int err; + + err = ethtool_net_get_ts_info_by_phc(dev, info, hwprov_desc); + if (err == -ENODEV) + err = ethtool_phy_get_ts_info_by_phc(dev, info, hwprov_desc); info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; @@ -784,6 +858,55 @@ int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info return err; } +int __ethtool_get_ts_info(struct net_device *dev, + struct kernel_ethtool_ts_info *info) +{ + struct hwtstamp_provider *hwprov; + + hwprov = rtnl_dereference(dev->hwprov); + /* No provider specified, use default behavior */ + if (!hwprov) { + const struct ethtool_ops *ops = dev->ethtool_ops; + struct phy_device *phydev = dev->phydev; + int err = 0; + + ethtool_init_tsinfo(info); + if (phy_is_default_hwtstamp(phydev) && + phy_has_tsinfo(phydev)) + err = phy_ts_info(phydev, info); + else if (ops->get_ts_info) + err = ops->get_ts_info(dev, info); + + info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + + return err; + } + + return ethtool_get_ts_info_by_phc(dev, info, &hwprov->desc); +} + +bool net_support_hwtstamp_qualifier(struct net_device *dev, + enum hwtstamp_provider_qualifier qualifier) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (!ops) + return false; + + /* Return true with precise qualifier and with NIC without + * qualifier description to not break the old behavior. + */ + if (!ops->supported_hwtstamp_qualifiers && + qualifier == HWTSTAMP_PROVIDER_QUALIFIER_PRECISE) + return true; + + if (ops->supported_hwtstamp_qualifiers & BIT(qualifier)) + return true; + + return false; +} + int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) { struct kernel_ethtool_ts_info info = { }; diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 4a2de3ce7354..f5119204c8ff 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -21,6 +21,7 @@ struct link_mode_info { }; struct genl_info; +struct hwtstamp_provider_desc; extern const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]; @@ -49,6 +50,18 @@ int ethtool_check_max_channel(struct net_device *dev, struct genl_info *info); int ethtool_check_rss_ctx_busy(struct net_device *dev, u32 rss_context); int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info); +int ethtool_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc); +int ethtool_net_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc); +int +ethtool_phy_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc); +bool net_support_hwtstamp_qualifier(struct net_device *dev, + enum hwtstamp_provider_qualifier qualifier); extern const struct ethtool_phy_ops *ethtool_phy_ops; extern const struct ethtool_pse_ops *ethtool_pse_ops; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index e3f0ef6b851b..6ae1d91f36e7 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -1074,9 +1074,9 @@ static const struct genl_ops ethtool_genl_ops[] = { { .cmd = ETHTOOL_MSG_TSINFO_GET, .doit = ethnl_default_doit, - .start = ethnl_default_start, - .dumpit = ethnl_default_dumpit, - .done = ethnl_default_done, + .start = ethnl_tsinfo_start, + .dumpit = ethnl_tsinfo_dumpit, + .done = ethnl_tsinfo_done, .policy = ethnl_tsinfo_get_policy, .maxattr = ARRAY_SIZE(ethnl_tsinfo_get_policy) - 1, }, diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 203b08eb6c6f..960cda13e4fc 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -464,7 +464,7 @@ extern const struct nla_policy ethnl_pause_get_policy[ETHTOOL_A_PAUSE_STATS_SRC extern const struct nla_policy ethnl_pause_set_policy[ETHTOOL_A_PAUSE_TX + 1]; extern const struct nla_policy ethnl_eee_get_policy[ETHTOOL_A_EEE_HEADER + 1]; extern const struct nla_policy ethnl_eee_set_policy[ETHTOOL_A_EEE_TX_LPI_TIMER + 1]; -extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER + 1]; +extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_MAX + 1]; extern const struct nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER + 1]; extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1]; extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1]; @@ -499,6 +499,9 @@ int ethnl_phy_start(struct netlink_callback *cb); int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info); int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int ethnl_phy_done(struct netlink_callback *cb); +int ethnl_tsinfo_start(struct netlink_callback *cb); +int ethnl_tsinfo_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int ethnl_tsinfo_done(struct netlink_callback *cb); extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN]; extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN]; diff --git a/net/ethtool/ts.h b/net/ethtool/ts.h new file mode 100644 index 000000000000..d901a879a671 --- /dev/null +++ b/net/ethtool/ts.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _NET_ETHTOOL_TS_H +#define _NET_ETHTOOL_TS_H + +#include "netlink.h" + +static const struct nla_policy +ethnl_ts_hwtst_prov_policy[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_MAX + 1] = { + [ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX] = { .type = NLA_U32 }, + [ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER] = + NLA_POLICY_MAX(NLA_U32, HWTSTAMP_PROVIDER_QUALIFIER_CNT - 1) +}; + +int ts_parse_hwtst_provider(const struct nlattr *nest, + struct hwtstamp_provider_desc *hwprov_desc, + struct netlink_ext_ack *extack, + bool *mod); + +#endif /* _NET_ETHTOOL_TS_H */ diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index 03d12d6f79ca..7e495a41aeec 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -1,13 +1,18 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include +#include +#include #include "netlink.h" #include "common.h" #include "bitset.h" +#include "ts.h" struct tsinfo_req_info { struct ethnl_req_info base; + struct hwtstamp_provider_desc hwprov_desc; }; struct tsinfo_reply_data { @@ -16,34 +21,96 @@ struct tsinfo_reply_data { struct ethtool_ts_stats stats; }; +#define TSINFO_REQINFO(__req_base) \ + container_of(__req_base, struct tsinfo_req_info, base) + #define TSINFO_REPDATA(__reply_base) \ container_of(__reply_base, struct tsinfo_reply_data, base) #define ETHTOOL_TS_STAT_CNT \ (__ETHTOOL_A_TS_STAT_CNT - (ETHTOOL_A_TS_STAT_UNSPEC + 1)) -const struct nla_policy ethnl_tsinfo_get_policy[] = { +const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_MAX + 1] = { [ETHTOOL_A_TSINFO_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_stats), + [ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER] = + NLA_POLICY_NESTED(ethnl_ts_hwtst_prov_policy), }; +int ts_parse_hwtst_provider(const struct nlattr *nest, + struct hwtstamp_provider_desc *hwprov_desc, + struct netlink_ext_ack *extack, + bool *mod) +{ + struct nlattr *tb[ARRAY_SIZE(ethnl_ts_hwtst_prov_policy)]; + int ret; + + ret = nla_parse_nested(tb, + ARRAY_SIZE(ethnl_ts_hwtst_prov_policy) - 1, + nest, + ethnl_ts_hwtst_prov_policy, extack); + if (ret < 0) + return ret; + + if (NL_REQ_ATTR_CHECK(extack, nest, tb, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX) || + NL_REQ_ATTR_CHECK(extack, nest, tb, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER)) + return -EINVAL; + + ethnl_update_u32(&hwprov_desc->index, + tb[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX], + mod); + ethnl_update_u32(&hwprov_desc->qualifier, + tb[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER], + mod); + + return 0; +} + +static int +tsinfo_parse_request(struct ethnl_req_info *req_base, struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct tsinfo_req_info *req = TSINFO_REQINFO(req_base); + bool mod = false; + + req->hwprov_desc.index = -1; + + if (!tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER]) + return 0; + + return ts_parse_hwtst_provider(tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER], + &req->hwprov_desc, extack, &mod); +} + static int tsinfo_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base); + struct tsinfo_req_info *req = TSINFO_REQINFO(req_base); struct net_device *dev = reply_base->dev; int ret; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; + + if (req->hwprov_desc.index != -1) { + ret = ethtool_get_ts_info_by_phc(dev, &data->ts_info, + &req->hwprov_desc); + ethnl_ops_complete(dev); + return ret; + } + if (req_base->flags & ETHTOOL_FLAG_STATS) { ethtool_stats_init((u64 *)&data->stats, sizeof(data->stats) / sizeof(u64)); if (dev->ethtool_ops->get_ts_stats) dev->ethtool_ops->get_ts_stats(dev, &data->stats); } + ret = __ethtool_get_ts_info(dev, &data->ts_info); ethnl_ops_complete(dev); @@ -87,8 +154,11 @@ static int tsinfo_reply_size(const struct ethnl_req_info *req_base, return ret; len += ret; /* _TSINFO_RX_FILTERS */ } - if (ts_info->phc_index >= 0) + if (ts_info->phc_index >= 0) { len += nla_total_size(sizeof(u32)); /* _TSINFO_PHC_INDEX */ + /* _TSINFO_HWTSTAMP_PROVIDER */ + len += nla_total_size(0) + 2 * nla_total_size(sizeof(u32)); + } if (req_base->flags & ETHTOOL_FLAG_STATS) len += nla_total_size(0) + /* _TSINFO_STATS */ nla_total_size_64bit(sizeof(u64)) * ETHTOOL_TS_STAT_CNT; @@ -163,9 +233,29 @@ static int tsinfo_fill_reply(struct sk_buff *skb, if (ret < 0) return ret; } - if (ts_info->phc_index >= 0 && - nla_put_u32(skb, ETHTOOL_A_TSINFO_PHC_INDEX, ts_info->phc_index)) - return -EMSGSIZE; + if (ts_info->phc_index >= 0) { + struct nlattr *nest; + + ret = nla_put_u32(skb, ETHTOOL_A_TSINFO_PHC_INDEX, + ts_info->phc_index); + if (ret) + return -EMSGSIZE; + + nest = nla_nest_start(skb, ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX, + ts_info->phc_index) || + nla_put_u32(skb, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER, + ts_info->phc_qualifier)) { + nla_nest_cancel(skb, nest); + return -EMSGSIZE; + } + + nla_nest_end(skb, nest); + } if (req_base->flags & ETHTOOL_FLAG_STATS && tsinfo_put_stats(skb, &data->stats)) return -EMSGSIZE; @@ -173,6 +263,263 @@ static int tsinfo_fill_reply(struct sk_buff *skb, return 0; } +struct ethnl_tsinfo_dump_ctx { + struct tsinfo_req_info *req_info; + struct tsinfo_reply_data *reply_data; + unsigned long pos_ifindex; + bool netdev_dump_done; + unsigned long pos_phyindex; + enum hwtstamp_provider_qualifier pos_phcqualifier; +}; + +static void *ethnl_tsinfo_prepare_dump(struct sk_buff *skb, + struct net_device *dev, + struct tsinfo_reply_data *reply_data, + struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + void *ehdr = NULL; + + ehdr = ethnl_dump_put(skb, cb, + ETHTOOL_MSG_TSINFO_GET_REPLY); + if (!ehdr) + return ERR_PTR(-EMSGSIZE); + + reply_data = ctx->reply_data; + memset(reply_data, 0, sizeof(*reply_data)); + reply_data->base.dev = dev; + memset(&reply_data->ts_info, 0, sizeof(reply_data->ts_info)); + + return ehdr; +} + +static int ethnl_tsinfo_end_dump(struct sk_buff *skb, + struct net_device *dev, + struct tsinfo_req_info *req_info, + struct tsinfo_reply_data *reply_data, + void *ehdr) +{ + int ret; + + reply_data->ts_info.so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + + ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TSINFO_HEADER); + if (ret < 0) + return ret; + + ret = tsinfo_fill_reply(skb, &req_info->base, &reply_data->base); + if (ret < 0) + return ret; + + reply_data->base.dev = NULL; + genlmsg_end(skb, ehdr); + + return ret; +} + +static int ethnl_tsinfo_dump_one_phydev(struct sk_buff *skb, + struct net_device *dev, + struct phy_device *phydev, + struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + struct tsinfo_reply_data *reply_data; + struct tsinfo_req_info *req_info; + void *ehdr = NULL; + int ret = 0; + + if (!phy_has_tsinfo(phydev)) + return -EOPNOTSUPP; + + reply_data = ctx->reply_data; + req_info = ctx->req_info; + ehdr = ethnl_tsinfo_prepare_dump(skb, dev, reply_data, cb); + if (IS_ERR(ehdr)) + return PTR_ERR(ehdr); + + ret = phy_ts_info(phydev, &reply_data->ts_info); + if (ret < 0) + goto err; + + ret = ethnl_tsinfo_end_dump(skb, dev, req_info, reply_data, ehdr); + if (ret < 0) + goto err; + + return ret; +err: + genlmsg_cancel(skb, ehdr); + return ret; +} + +static int ethnl_tsinfo_dump_one_netdev(struct sk_buff *skb, + struct net_device *dev, + struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + const struct ethtool_ops *ops = dev->ethtool_ops; + struct tsinfo_reply_data *reply_data; + struct tsinfo_req_info *req_info; + void *ehdr = NULL; + int ret = 0; + + if (!ops->get_ts_info) + return -EOPNOTSUPP; + + reply_data = ctx->reply_data; + req_info = ctx->req_info; + for (; ctx->pos_phcqualifier < HWTSTAMP_PROVIDER_QUALIFIER_CNT; + ctx->pos_phcqualifier++) { + if (!net_support_hwtstamp_qualifier(dev, + ctx->pos_phcqualifier)) + continue; + + ehdr = ethnl_tsinfo_prepare_dump(skb, dev, reply_data, cb); + if (IS_ERR(ehdr)) { + ret = PTR_ERR(ehdr); + goto err; + } + + reply_data->ts_info.phc_qualifier = ctx->pos_phcqualifier; + ret = ops->get_ts_info(dev, &reply_data->ts_info); + if (ret < 0) + goto err; + + ret = ethnl_tsinfo_end_dump(skb, dev, req_info, reply_data, + ehdr); + if (ret < 0) + goto err; + } + + return ret; + +err: + genlmsg_cancel(skb, ehdr); + return ret; +} + +static int ethnl_tsinfo_dump_one_net_topo(struct sk_buff *skb, + struct net_device *dev, + struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + struct phy_device_node *pdn; + int ret = 0; + + if (!ctx->netdev_dump_done) { + ret = ethnl_tsinfo_dump_one_netdev(skb, dev, cb); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + ctx->netdev_dump_done = true; + } + + if (!dev->link_topo) { + if (phy_has_tsinfo(dev->phydev)) { + ret = ethnl_tsinfo_dump_one_phydev(skb, dev, + dev->phydev, cb); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + } + + return 0; + } + + xa_for_each_start(&dev->link_topo->phys, ctx->pos_phyindex, pdn, + ctx->pos_phyindex) { + if (phy_has_tsinfo(pdn->phy)) { + ret = ethnl_tsinfo_dump_one_phydev(skb, dev, + pdn->phy, cb); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + } + } + + return ret; +} + +int ethnl_tsinfo_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int ret = 0; + + rtnl_lock(); + if (ctx->req_info->base.dev) { + ret = ethnl_tsinfo_dump_one_net_topo(skb, + ctx->req_info->base.dev, + cb); + } else { + for_each_netdev_dump(net, dev, ctx->pos_ifindex) { + ret = ethnl_tsinfo_dump_one_net_topo(skb, dev, cb); + if (ret < 0 && ret != -EOPNOTSUPP) + break; + ctx->pos_phyindex = 0; + ctx->netdev_dump_done = false; + ctx->pos_phcqualifier = HWTSTAMP_PROVIDER_QUALIFIER_PRECISE; + } + } + rtnl_unlock(); + + return ret; +} + +int ethnl_tsinfo_start(struct netlink_callback *cb) +{ + const struct genl_dumpit_info *info = genl_dumpit_info(cb); + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + struct nlattr **tb = info->info.attrs; + struct tsinfo_reply_data *reply_data; + struct tsinfo_req_info *req_info; + int ret; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); + + req_info = kzalloc(sizeof(*req_info), GFP_KERNEL); + if (!req_info) + return -ENOMEM; + reply_data = kzalloc(sizeof(*reply_data), GFP_KERNEL); + if (!reply_data) { + ret = -ENOMEM; + goto free_req_info; + } + + ret = ethnl_parse_header_dev_get(&req_info->base, + tb[ETHTOOL_A_TSINFO_HEADER], + sock_net(cb->skb->sk), cb->extack, + false); + if (ret < 0) + goto free_reply_data; + + ctx->req_info = req_info; + ctx->reply_data = reply_data; + ctx->pos_ifindex = 0; + ctx->pos_phyindex = 0; + ctx->netdev_dump_done = false; + ctx->pos_phcqualifier = HWTSTAMP_PROVIDER_QUALIFIER_PRECISE; + + return 0; + +free_reply_data: + kfree(reply_data); +free_req_info: + kfree(req_info); + + return ret; +} + +int ethnl_tsinfo_done(struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + struct tsinfo_req_info *req_info = ctx->req_info; + + ethnl_parse_header_dev_put(&req_info->base); + kfree(ctx->reply_data); + kfree(ctx->req_info); + + return 0; +} + const struct ethnl_request_ops ethnl_tsinfo_request_ops = { .request_cmd = ETHTOOL_MSG_TSINFO_GET, .reply_cmd = ETHTOOL_MSG_TSINFO_GET_REPLY, @@ -180,6 +527,7 @@ const struct ethnl_request_ops ethnl_tsinfo_request_ops = { .req_info_size = sizeof(struct tsinfo_req_info), .reply_data_size = sizeof(struct tsinfo_reply_data), + .parse_request = tsinfo_parse_request, .prepare_data = tsinfo_prepare_data, .reply_size = tsinfo_reply_size, .fill_reply = tsinfo_fill_reply, From 6e9e2eed4f39d52edf5fd006409d211facf49f6b Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Thu, 12 Dec 2024 18:06:45 +0100 Subject: [PATCH 0414/1386] net: ethtool: Add support for tsconfig command to get/set hwtstamp config Introduce support for ETHTOOL_MSG_TSCONFIG_GET/SET ethtool netlink socket to read and configure hwtstamp configuration of a PHC provider. Note that simultaneous hwtstamp isn't supported; configuring a new one disables the previous setting. Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- Documentation/netlink/specs/ethtool.yaml | 56 +++ Documentation/networking/ethtool-netlink.rst | 75 +++ Documentation/networking/timestamping.rst | 38 +- .../uapi/linux/ethtool_netlink_generated.h | 16 + net/ethtool/Makefile | 2 +- net/ethtool/common.c | 27 +- net/ethtool/common.h | 2 +- net/ethtool/netlink.c | 18 + net/ethtool/netlink.h | 3 + net/ethtool/tsconfig.c | 444 ++++++++++++++++++ 10 files changed, 655 insertions(+), 26 deletions(-) create mode 100644 net/ethtool/tsconfig.c diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 4082816e5f3d..60f85fbf4156 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1489,6 +1489,33 @@ attribute-sets: - name: downstream-sfp-name type: string + - + name: tsconfig + attr-cnt-name: __ethtool-a-tsconfig-cnt + attributes: + - + name: unspec + type: unused + value: 0 + - + name: header + type: nest + nested-attributes: header + - + name: hwtstamp-provider + type: nest + nested-attributes: ts-hwtstamp-provider + - + name: tx-types + type: nest + nested-attributes: bitset + - + name: rx-filters + type: nest + nested-attributes: bitset + - + name: hwtstamp-flags + type: u32 operations: enum-model: directional @@ -2314,3 +2341,32 @@ operations: name: phy-ntf doc: Notification for change in PHY devices. notify: phy-get + - + name: tsconfig-get + doc: Get hwtstamp config. + + attribute-set: tsconfig + + do: &tsconfig-get-op + request: + attributes: + - header + reply: + attributes: &tsconfig + - header + - hwtstamp-provider + - tx-types + - rx-filters + - hwtstamp-flags + dump: *tsconfig-get-op + - + name: tsconfig-set + doc: Set hwtstamp config. + + attribute-set: tsconfig + + do: + request: + attributes: *tsconfig + reply: + attributes: *tsconfig diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index c585e2f0ddfa..a7ba6368a4d5 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -237,6 +237,8 @@ Userspace to kernel: ``ETHTOOL_MSG_MM_SET`` set MAC merge layer parameters ``ETHTOOL_MSG_MODULE_FW_FLASH_ACT`` flash transceiver module firmware ``ETHTOOL_MSG_PHY_GET`` get Ethernet PHY information + ``ETHTOOL_MSG_TSCONFIG_GET`` get hw timestamping configuration + ``ETHTOOL_MSG_TSCONFIG_SET`` set hw timestamping configuration ===================================== ================================= Kernel to userspace: @@ -286,6 +288,8 @@ Kernel to userspace: ``ETHTOOL_MSG_MODULE_FW_FLASH_NTF`` transceiver module flash updates ``ETHTOOL_MSG_PHY_GET_REPLY`` Ethernet PHY information ``ETHTOOL_MSG_PHY_NTF`` Ethernet PHY information change + ``ETHTOOL_MSG_TSCONFIG_GET_REPLY`` hw timestamping configuration + ``ETHTOOL_MSG_TSCONFIG_SET_REPLY`` new hw timestamping configuration ======================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -2244,6 +2248,75 @@ Kernel response contents: When ``ETHTOOL_A_PHY_UPSTREAM_TYPE`` is PHY_UPSTREAM_PHY, the PHY's parent is another PHY. +TSCONFIG_GET +============ + +Retrieves the information about the current hardware timestamping source and +configuration. + +It is similar to the deprecated ``SIOCGHWTSTAMP`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_TSCONFIG_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ======================================== ====== ============================ + ``ETHTOOL_A_TSCONFIG_HEADER`` nested request header + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` nested PTP hw clock provider + ``ETHTOOL_A_TSCONFIG_TX_TYPES`` bitset hwtstamp Tx type + ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` bitset hwtstamp Rx filter + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` u32 hwtstamp flags + ======================================== ====== ============================ + +When set the ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` attribute identifies the +source of the hw timestamping provider. It is composed by +``ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX`` attribute which describe the index of +the PTP device and ``ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER`` which describe +the qualifier of the timestamp. + +When set the ``ETHTOOL_A_TSCONFIG_TX_TYPES``, ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` +and the ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` attributes identify the Tx +type, the Rx filter and the flags configured for the current hw timestamping +provider. The attributes are propagated to the driver through the following +structure: + +.. kernel-doc:: include/linux/net_tstamp.h + :identifiers: kernel_hwtstamp_config + +TSCONFIG_SET +============ + +Set the information about the current hardware timestamping source and +configuration. + +It is similar to the deprecated ``SIOCSHWTSTAMP`` ioctl request. + +Request contents: + + ======================================== ====== ============================ + ``ETHTOOL_A_TSCONFIG_HEADER`` nested request header + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` nested PTP hw clock provider + ``ETHTOOL_A_TSCONFIG_TX_TYPES`` bitset hwtstamp Tx type + ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` bitset hwtstamp Rx filter + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` u32 hwtstamp flags + ======================================== ====== ============================ + +Kernel response contents: + + ======================================== ====== ============================ + ``ETHTOOL_A_TSCONFIG_HEADER`` nested request header + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` nested PTP hw clock provider + ``ETHTOOL_A_TSCONFIG_TX_TYPES`` bitset hwtstamp Tx type + ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` bitset hwtstamp Rx filter + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` u32 hwtstamp flags + ======================================== ====== ============================ + +For a description of each attribute, see ``TSCONFIG_GET``. + Request translation =================== @@ -2352,4 +2425,6 @@ are netlink only. n/a ``ETHTOOL_MSG_MM_SET`` n/a ``ETHTOOL_MSG_MODULE_FW_FLASH_ACT`` n/a ``ETHTOOL_MSG_PHY_GET`` + ``SIOCGHWTSTAMP`` ``ETHTOOL_MSG_TSCONFIG_GET`` + ``SIOCSHWTSTAMP`` ``ETHTOOL_MSG_TSCONFIG_SET`` =================================== ===================================== diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index b37bfbfc7d79..61ef9da10e28 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -525,8 +525,8 @@ implicitly defined. ts[0] holds a software timestamp if set, ts[1] is again deprecated and ts[2] holds a hardware timestamp if set. -3. Hardware Timestamping configuration: SIOCSHWTSTAMP and SIOCGHWTSTAMP -======================================================================= +3. Hardware Timestamping configuration: ETHTOOL_MSG_TSCONFIG_SET/GET +==================================================================== Hardware time stamping must also be initialized for each device driver that is expected to do hardware time stamping. The parameter is defined in @@ -539,12 +539,14 @@ include/uapi/linux/net_tstamp.h as:: }; Desired behavior is passed into the kernel and to a specific device by -calling ioctl(SIOCSHWTSTAMP) with a pointer to a struct ifreq whose -ifr_data points to a struct hwtstamp_config. The tx_type and -rx_filter are hints to the driver what it is expected to do. If -the requested fine-grained filtering for incoming packets is not -supported, the driver may time stamp more than just the requested types -of packets. +calling the tsconfig netlink socket ``ETHTOOL_MSG_TSCONFIG_SET``. +The ``ETHTOOL_A_TSCONFIG_TX_TYPES``, ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` and +``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` netlink attributes are then used to set +the struct hwtstamp_config accordingly. + +The ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` netlink nested attribute is used +to select the source of the hardware time stamping. It is composed of an index +for the device source and a qualifier for the type of time stamping. Drivers are free to use a more permissive configuration than the requested configuration. It is expected that drivers should only implement directly the @@ -563,9 +565,16 @@ Only a processes with admin rights may change the configuration. User space is responsible to ensure that multiple processes don't interfere with each other and that the settings are reset. -Any process can read the actual configuration by passing this -structure to ioctl(SIOCGHWTSTAMP) in the same way. However, this has -not been implemented in all drivers. +Any process can read the actual configuration by requesting tsconfig netlink +socket ``ETHTOOL_MSG_TSCONFIG_GET``. + +The legacy configuration is the use of the ioctl(SIOCSHWTSTAMP) with a pointer +to a struct ifreq whose ifr_data points to a struct hwtstamp_config. +The tx_type and rx_filter are hints to the driver what it is expected to do. +If the requested fine-grained filtering for incoming packets is not +supported, the driver may time stamp more than just the requested types +of packets. ioctl(SIOCGHWTSTAMP) is used in the same way as the +ioctl(SIOCSHWTSTAMP). However, this has not been implemented in all drivers. :: @@ -610,9 +619,10 @@ not been implemented in all drivers. -------------------------------------------------------- A driver which supports hardware time stamping must support the -SIOCSHWTSTAMP ioctl and update the supplied struct hwtstamp_config with -the actual values as described in the section on SIOCSHWTSTAMP. It -should also support SIOCGHWTSTAMP. +ndo_hwtstamp_set NDO or the legacy SIOCSHWTSTAMP ioctl and update the +supplied struct hwtstamp_config with the actual values as described in +the section on SIOCSHWTSTAMP. It should also support ndo_hwtstamp_get or +the legacy SIOCGHWTSTAMP. Time stamps for received packets must be stored in the skb. To get a pointer to the shared time stamp structure of the skb call skb_hwtstamps(). Then diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index df289dde0f61..43993a2d68e5 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -694,6 +694,18 @@ enum { ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) }; +enum { + ETHTOOL_A_TSCONFIG_UNSPEC, + ETHTOOL_A_TSCONFIG_HEADER, + ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER, + ETHTOOL_A_TSCONFIG_TX_TYPES, + ETHTOOL_A_TSCONFIG_RX_FILTERS, + ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS, + + __ETHTOOL_A_TSCONFIG_CNT, + ETHTOOL_A_TSCONFIG_MAX = (__ETHTOOL_A_TSCONFIG_CNT - 1) +}; + enum { ETHTOOL_MSG_USER_NONE = 0, ETHTOOL_MSG_STRSET_GET = 1, @@ -741,6 +753,8 @@ enum { ETHTOOL_MSG_MM_SET, ETHTOOL_MSG_MODULE_FW_FLASH_ACT, ETHTOOL_MSG_PHY_GET, + ETHTOOL_MSG_TSCONFIG_GET, + ETHTOOL_MSG_TSCONFIG_SET, __ETHTOOL_MSG_USER_CNT, ETHTOOL_MSG_USER_MAX = (__ETHTOOL_MSG_USER_CNT - 1) @@ -794,6 +808,8 @@ enum { ETHTOOL_MSG_MODULE_FW_FLASH_NTF, ETHTOOL_MSG_PHY_GET_REPLY, ETHTOOL_MSG_PHY_NTF, + ETHTOOL_MSG_TSCONFIG_GET_REPLY, + ETHTOOL_MSG_TSCONFIG_SET_REPLY, __ETHTOOL_MSG_KERNEL_CNT, ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1) diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 9b540644ba31..a1490c4afe6b 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -9,4 +9,4 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \ module.o cmis_fw_update.o cmis_cdb.o pse-pd.o plca.o mm.o \ - phy.o + phy.o tsconfig.o diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 666db40bcfda..02f941f667dd 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -797,7 +797,7 @@ int ethtool_net_get_ts_info_by_phc(struct net_device *dev, return -ENODEV; } -int +struct phy_device * ethtool_phy_get_ts_info_by_phc(struct net_device *dev, struct kernel_ethtool_ts_info *info, struct hwtstamp_provider_desc *hwprov_desc) @@ -806,7 +806,7 @@ ethtool_phy_get_ts_info_by_phc(struct net_device *dev, /* Only precise qualifier is supported in phydev */ if (hwprov_desc->qualifier != HWTSTAMP_PROVIDER_QUALIFIER_PRECISE) - return -ENODEV; + return ERR_PTR(-ENODEV); /* Look in the phy topology */ if (dev->link_topo) { @@ -820,12 +820,12 @@ ethtool_phy_get_ts_info_by_phc(struct net_device *dev, ethtool_init_tsinfo(info); err = phy_ts_info(pdn->phy, info); if (err) - return err; + return ERR_PTR(err); if (info->phc_index == hwprov_desc->index) - return 0; + return pdn->phy; } - return -ENODEV; + return ERR_PTR(-ENODEV); } /* Look on the dev->phydev */ @@ -833,13 +833,13 @@ ethtool_phy_get_ts_info_by_phc(struct net_device *dev, ethtool_init_tsinfo(info); err = phy_ts_info(dev->phydev, info); if (err) - return err; + return ERR_PTR(err); if (info->phc_index == hwprov_desc->index) - return 0; + return dev->phydev; } - return -ENODEV; + return ERR_PTR(-ENODEV); } int ethtool_get_ts_info_by_phc(struct net_device *dev, @@ -849,8 +849,15 @@ int ethtool_get_ts_info_by_phc(struct net_device *dev, int err; err = ethtool_net_get_ts_info_by_phc(dev, info, hwprov_desc); - if (err == -ENODEV) - err = ethtool_phy_get_ts_info_by_phc(dev, info, hwprov_desc); + if (err == -ENODEV) { + struct phy_device *phy; + + phy = ethtool_phy_get_ts_info_by_phc(dev, info, hwprov_desc); + if (IS_ERR(phy)) + err = PTR_ERR(phy); + else + err = 0; + } info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; diff --git a/net/ethtool/common.h b/net/ethtool/common.h index f5119204c8ff..850eadde4bfc 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -56,7 +56,7 @@ int ethtool_get_ts_info_by_phc(struct net_device *dev, int ethtool_net_get_ts_info_by_phc(struct net_device *dev, struct kernel_ethtool_ts_info *info, struct hwtstamp_provider_desc *hwprov_desc); -int +struct phy_device * ethtool_phy_get_ts_info_by_phc(struct net_device *dev, struct kernel_ethtool_ts_info *info, struct hwtstamp_provider_desc *hwprov_desc); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 6ae1d91f36e7..849c98e637c6 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -394,6 +394,8 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_PLCA_GET_STATUS] = ðnl_plca_status_request_ops, [ETHTOOL_MSG_MM_GET] = ðnl_mm_request_ops, [ETHTOOL_MSG_MM_SET] = ðnl_mm_request_ops, + [ETHTOOL_MSG_TSCONFIG_GET] = ðnl_tsconfig_request_ops, + [ETHTOOL_MSG_TSCONFIG_SET] = ðnl_tsconfig_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -1243,6 +1245,22 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_phy_get_policy, .maxattr = ARRAY_SIZE(ethnl_phy_get_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_TSCONFIG_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + .policy = ethnl_tsconfig_get_policy, + .maxattr = ARRAY_SIZE(ethnl_tsconfig_get_policy) - 1, + }, + { + .cmd = ETHTOOL_MSG_TSCONFIG_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_default_set_doit, + .policy = ethnl_tsconfig_set_policy, + .maxattr = ARRAY_SIZE(ethnl_tsconfig_set_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 960cda13e4fc..0a09298fff92 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -435,6 +435,7 @@ extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops; extern const struct ethnl_request_ops ethnl_plca_status_request_ops; extern const struct ethnl_request_ops ethnl_mm_request_ops; extern const struct ethnl_request_ops ethnl_phy_request_ops; +extern const struct ethnl_request_ops ethnl_tsconfig_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -485,6 +486,8 @@ extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1]; extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1]; extern const struct nla_policy ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1]; extern const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1]; +extern const struct nla_policy ethnl_tsconfig_get_policy[ETHTOOL_A_TSCONFIG_HEADER + 1]; +extern const struct nla_policy ethnl_tsconfig_set_policy[ETHTOOL_A_TSCONFIG_MAX + 1]; int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ethtool/tsconfig.c b/net/ethtool/tsconfig.c new file mode 100644 index 000000000000..9188e088fb2f --- /dev/null +++ b/net/ethtool/tsconfig.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +#include "netlink.h" +#include "common.h" +#include "bitset.h" +#include "../core/dev.h" +#include "ts.h" + +struct tsconfig_req_info { + struct ethnl_req_info base; +}; + +struct tsconfig_reply_data { + struct ethnl_reply_data base; + struct hwtstamp_provider_desc hwprov_desc; + struct { + u32 tx_type; + u32 rx_filter; + u32 flags; + } hwtst_config; +}; + +#define TSCONFIG_REPDATA(__reply_base) \ + container_of(__reply_base, struct tsconfig_reply_data, base) + +const struct nla_policy ethnl_tsconfig_get_policy[ETHTOOL_A_TSCONFIG_HEADER + 1] = { + [ETHTOOL_A_TSCONFIG_HEADER] = + NLA_POLICY_NESTED(ethnl_header_policy), +}; + +static int tsconfig_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + const struct genl_info *info) +{ + struct tsconfig_reply_data *data = TSCONFIG_REPDATA(reply_base); + struct hwtstamp_provider *hwprov = NULL; + struct net_device *dev = reply_base->dev; + struct kernel_hwtstamp_config cfg = {}; + int ret; + + if (!dev->netdev_ops->ndo_hwtstamp_get) + return -EOPNOTSUPP; + + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + + ret = dev_get_hwtstamp_phylib(dev, &cfg); + if (ret) + goto out; + + data->hwtst_config.tx_type = BIT(cfg.tx_type); + data->hwtst_config.rx_filter = BIT(cfg.rx_filter); + data->hwtst_config.flags = BIT(cfg.flags); + + data->hwprov_desc.index = -1; + hwprov = rtnl_dereference(dev->hwprov); + if (hwprov) { + data->hwprov_desc.index = hwprov->desc.index; + data->hwprov_desc.qualifier = hwprov->desc.qualifier; + } else { + struct kernel_ethtool_ts_info ts_info = {}; + + ts_info.phc_index = -1; + ret = __ethtool_get_ts_info(dev, &ts_info); + if (ret) + goto out; + + if (ts_info.phc_index == -1) + return -ENODEV; + + data->hwprov_desc.index = ts_info.phc_index; + data->hwprov_desc.qualifier = ts_info.phc_qualifier; + } + +out: + ethnl_ops_complete(dev); + return ret; +} + +static int tsconfig_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct tsconfig_reply_data *data = TSCONFIG_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + int len = 0; + int ret; + + BUILD_BUG_ON(__HWTSTAMP_TX_CNT > 32); + BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT > 32); + + if (data->hwtst_config.flags) + /* _TSCONFIG_HWTSTAMP_FLAGS */ + len += nla_total_size(sizeof(u32)); + + if (data->hwtst_config.tx_type) { + ret = ethnl_bitset32_size(&data->hwtst_config.tx_type, + NULL, __HWTSTAMP_TX_CNT, + ts_tx_type_names, compact); + if (ret < 0) + return ret; + len += ret; /* _TSCONFIG_TX_TYPES */ + } + if (data->hwtst_config.rx_filter) { + ret = ethnl_bitset32_size(&data->hwtst_config.rx_filter, + NULL, __HWTSTAMP_FILTER_CNT, + ts_rx_filter_names, compact); + if (ret < 0) + return ret; + len += ret; /* _TSCONFIG_RX_FILTERS */ + } + + if (data->hwprov_desc.index >= 0) + /* _TSCONFIG_HWTSTAMP_PROVIDER */ + len += nla_total_size(0) + + 2 * nla_total_size(sizeof(u32)); + + return len; +} + +static int tsconfig_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct tsconfig_reply_data *data = TSCONFIG_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + int ret; + + if (data->hwtst_config.flags) { + ret = nla_put_u32(skb, ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS, + data->hwtst_config.flags); + if (ret < 0) + return ret; + } + + if (data->hwtst_config.tx_type) { + ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSCONFIG_TX_TYPES, + &data->hwtst_config.tx_type, NULL, + __HWTSTAMP_TX_CNT, + ts_tx_type_names, compact); + if (ret < 0) + return ret; + } + + if (data->hwtst_config.rx_filter) { + ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSCONFIG_RX_FILTERS, + &data->hwtst_config.rx_filter, + NULL, __HWTSTAMP_FILTER_CNT, + ts_rx_filter_names, compact); + if (ret < 0) + return ret; + } + + if (data->hwprov_desc.index >= 0) { + struct nlattr *nest; + + nest = nla_nest_start(skb, ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX, + data->hwprov_desc.index) || + nla_put_u32(skb, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER, + data->hwprov_desc.qualifier)) { + nla_nest_cancel(skb, nest); + return -EMSGSIZE; + } + + nla_nest_end(skb, nest); + } + return 0; +} + +/* TSCONFIG_SET */ +const struct nla_policy ethnl_tsconfig_set_policy[ETHTOOL_A_TSCONFIG_MAX + 1] = { + [ETHTOOL_A_TSCONFIG_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), + [ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER] = + NLA_POLICY_NESTED(ethnl_ts_hwtst_prov_policy), + [ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS] = { .type = NLA_U32 }, + [ETHTOOL_A_TSCONFIG_RX_FILTERS] = { .type = NLA_NESTED }, + [ETHTOOL_A_TSCONFIG_TX_TYPES] = { .type = NLA_NESTED }, +}; + +static int tsconfig_send_reply(struct net_device *dev, struct genl_info *info) +{ + struct tsconfig_reply_data *reply_data; + struct tsconfig_req_info *req_info; + struct sk_buff *rskb; + void *reply_payload; + int reply_len = 0; + int ret; + + req_info = kzalloc(sizeof(*req_info), GFP_KERNEL); + if (!req_info) + return -ENOMEM; + reply_data = kmalloc(sizeof(*reply_data), GFP_KERNEL); + if (!reply_data) { + kfree(req_info); + return -ENOMEM; + } + + ASSERT_RTNL(); + reply_data->base.dev = dev; + ret = tsconfig_prepare_data(&req_info->base, &reply_data->base, info); + if (ret < 0) + goto err_cleanup; + + ret = tsconfig_reply_size(&req_info->base, &reply_data->base); + if (ret < 0) + goto err_cleanup; + + reply_len = ret + ethnl_reply_header_size(); + rskb = ethnl_reply_init(reply_len, dev, ETHTOOL_MSG_TSCONFIG_SET_REPLY, + ETHTOOL_A_TSCONFIG_HEADER, info, &reply_payload); + if (!rskb) + goto err_cleanup; + + ret = tsconfig_fill_reply(rskb, &req_info->base, &reply_data->base); + if (ret < 0) + goto err_cleanup; + + genlmsg_end(rskb, reply_payload); + ret = genlmsg_reply(rskb, info); + +err_cleanup: + kfree(reply_data); + kfree(req_info); + return ret; +} + +static int ethnl_set_tsconfig_validate(struct ethnl_req_info *req_base, + struct genl_info *info) +{ + const struct net_device_ops *ops = req_base->dev->netdev_ops; + + if (!ops->ndo_hwtstamp_set || !ops->ndo_hwtstamp_get) + return -EOPNOTSUPP; + + return 1; +} + +static struct hwtstamp_provider * +tsconfig_set_hwprov_from_desc(struct net_device *dev, + struct genl_info *info, + struct hwtstamp_provider_desc *hwprov_desc) +{ + struct kernel_ethtool_ts_info ts_info; + struct hwtstamp_provider *hwprov; + struct nlattr **tb = info->attrs; + struct phy_device *phy = NULL; + enum hwtstamp_source source; + int ret; + + ret = ethtool_net_get_ts_info_by_phc(dev, &ts_info, hwprov_desc); + if (!ret) { + /* Found */ + source = HWTSTAMP_SOURCE_NETDEV; + } else { + phy = ethtool_phy_get_ts_info_by_phc(dev, &ts_info, hwprov_desc); + if (IS_ERR(phy)) { + if (PTR_ERR(phy) == -ENODEV) + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER], + "phc not in this net device topology"); + return ERR_CAST(phy); + } + + source = HWTSTAMP_SOURCE_PHYLIB; + } + + hwprov = kzalloc(sizeof(*hwprov), GFP_KERNEL); + if (!hwprov) + return ERR_PTR(-ENOMEM); + + hwprov->desc.index = hwprov_desc->index; + hwprov->desc.qualifier = hwprov_desc->qualifier; + hwprov->source = source; + hwprov->phydev = phy; + + return hwprov; +} + +static int ethnl_set_tsconfig(struct ethnl_req_info *req_base, + struct genl_info *info) +{ + struct kernel_hwtstamp_config hwtst_config = {0}; + bool hwprov_mod = false, config_mod = false; + struct hwtstamp_provider *hwprov = NULL; + struct net_device *dev = req_base->dev; + struct nlattr **tb = info->attrs; + int ret; + + BUILD_BUG_ON(__HWTSTAMP_TX_CNT >= 32); + BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT >= 32); + + if (!netif_device_present(dev)) + return -ENODEV; + + if (tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER]) { + struct hwtstamp_provider_desc __hwprov_desc = {.index = -1}; + struct hwtstamp_provider *__hwprov; + + __hwprov = rtnl_dereference(dev->hwprov); + if (__hwprov) { + __hwprov_desc.index = __hwprov->desc.index; + __hwprov_desc.qualifier = __hwprov->desc.qualifier; + } + + ret = ts_parse_hwtst_provider(tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER], + &__hwprov_desc, info->extack, + &hwprov_mod); + if (ret < 0) + return ret; + + if (hwprov_mod) { + hwprov = tsconfig_set_hwprov_from_desc(dev, info, + &__hwprov_desc); + if (IS_ERR(hwprov)) + return PTR_ERR(hwprov); + } + } + + /* Get current hwtstamp config if we are not changing the + * hwtstamp source. It will be zeroed in the other case. + */ + if (!hwprov_mod) { + ret = dev_get_hwtstamp_phylib(dev, &hwtst_config); + if (ret < 0 && ret != -EOPNOTSUPP) + goto err_free_hwprov; + } + + /* Get the hwtstamp config from netlink */ + if (tb[ETHTOOL_A_TSCONFIG_TX_TYPES]) { + u32 req_tx_type; + + req_tx_type = BIT(hwtst_config.tx_type); + ret = ethnl_update_bitset32(&req_tx_type, + __HWTSTAMP_TX_CNT, + tb[ETHTOOL_A_TSCONFIG_TX_TYPES], + ts_tx_type_names, info->extack, + &config_mod); + if (ret < 0) + goto err_free_hwprov; + + /* Select only one tx type at a time */ + if (ffs(req_tx_type) != fls(req_tx_type)) { + ret = -EINVAL; + goto err_free_hwprov; + } + + hwtst_config.tx_type = ffs(req_tx_type) - 1; + } + + if (tb[ETHTOOL_A_TSCONFIG_RX_FILTERS]) { + u32 req_rx_filter; + + req_rx_filter = BIT(hwtst_config.rx_filter); + ret = ethnl_update_bitset32(&req_rx_filter, + __HWTSTAMP_FILTER_CNT, + tb[ETHTOOL_A_TSCONFIG_RX_FILTERS], + ts_rx_filter_names, info->extack, + &config_mod); + if (ret < 0) + goto err_free_hwprov; + + /* Select only one rx filter at a time */ + if (ffs(req_rx_filter) != fls(req_rx_filter)) { + ret = -EINVAL; + goto err_free_hwprov; + } + + hwtst_config.rx_filter = ffs(req_rx_filter) - 1; + } + + if (tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS]) { + ethnl_update_u32(&hwtst_config.flags, + tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS], + &config_mod); + } + + ret = net_hwtstamp_validate(&hwtst_config); + if (ret) + goto err_free_hwprov; + + if (hwprov_mod) { + struct kernel_hwtstamp_config zero_config = {0}; + struct hwtstamp_provider *__hwprov; + + /* Disable current time stamping if we try to enable + * another one + */ + ret = dev_set_hwtstamp_phylib(dev, &zero_config, info->extack); + if (ret < 0) + goto err_free_hwprov; + + /* Change the selected hwtstamp source */ + __hwprov = rcu_replace_pointer_rtnl(dev->hwprov, hwprov); + if (__hwprov) + kfree_rcu(__hwprov, rcu_head); + } + + if (config_mod) { + ret = dev_set_hwtstamp_phylib(dev, &hwtst_config, + info->extack); + if (ret < 0) + return ret; + } + + if (hwprov_mod || config_mod) { + ret = tsconfig_send_reply(dev, info); + if (ret && ret != -EOPNOTSUPP) { + NL_SET_ERR_MSG(info->extack, + "error while reading the new configuration set"); + return ret; + } + } + + /* tsconfig has no notification */ + return 0; + +err_free_hwprov: + kfree(hwprov); + + return ret; +} + +const struct ethnl_request_ops ethnl_tsconfig_request_ops = { + .request_cmd = ETHTOOL_MSG_TSCONFIG_GET, + .reply_cmd = ETHTOOL_MSG_TSCONFIG_GET_REPLY, + .hdr_attr = ETHTOOL_A_TSCONFIG_HEADER, + .req_info_size = sizeof(struct tsconfig_req_info), + .reply_data_size = sizeof(struct tsconfig_reply_data), + + .prepare_data = tsconfig_prepare_data, + .reply_size = tsconfig_reply_size, + .fill_reply = tsconfig_fill_reply, + + .set_validate = ethnl_set_tsconfig_validate, + .set = ethnl_set_tsconfig, +}; From f86e09fd393adfbbf078985e2cbf322e9892fbe3 Mon Sep 17 00:00:00 2001 From: Raj Kumar Bhagat Date: Wed, 11 Dec 2024 17:34:25 +0200 Subject: [PATCH 0415/1386] dt-bindings: net: wireless: Describe ath12k PCI module with WSI The QCN9274 WiFi device supports WSI (WLAN Serial Interface). WSI is used to exchange specific control information across radios using a doorbell mechanism. This WSI connection is essential for exchanging control information among these devices. The WSI interface in the QCN9274 includes TX and RX ports, which are used to connect multiple WSI-supported devices together, forming a WSI group. Describe QCN9274 PCI wifi device with WSI interface. Signed-off-by: Raj Kumar Bhagat Signed-off-by: Kalle Valo Reviewed-by: Conor Dooley Link: https://patch.msgid.link/20241211153432.775335-2-kvalo@kernel.org Signed-off-by: Jeff Johnson --- .../net/wireless/qcom,ath12k-wsi.yaml | 204 ++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml new file mode 100644 index 000000000000..cbfb559f6b69 --- /dev/null +++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath12k-wsi.yaml @@ -0,0 +1,204 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/wireless/qcom,ath12k-wsi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Technologies ath12k wireless devices (PCIe) with WSI interface + +maintainers: + - Jeff Johnson + - Kalle Valo + +description: | + Qualcomm Technologies IEEE 802.11be PCIe devices with WSI interface. + + The ath12k devices (QCN9274) feature WSI support. WSI stands for + WLAN Serial Interface. It is used for the exchange of specific + control information across radios based on the doorbell mechanism. + This WSI connection is essential to exchange control information + among these devices. + + The WSI interface includes TX and RX ports, which are used to connect + multiple WSI-supported devices together, forming a WSI group. + + Diagram to represent one WSI connection (one WSI group) among + three devices. + + +-------+ +-------+ +-------+ + | pcie1 | | pcie2 | | pcie3 | + | | | | | | + +----->| wsi |------->| wsi |------->| wsi |-----+ + | | grp 0 | | grp 0 | | grp 0 | | + | +-------+ +-------+ +-------+ | + +------------------------------------------------------+ + + Diagram to represent two WSI connections (two separate WSI groups) + among four devices. + + +-------+ +-------+ +-------+ +-------+ + | pcie0 | | pcie1 | | pcie2 | | pcie3 | + | | | | | | | | + +-->| wsi |--->| wsi |--+ +-->| wsi |--->| wsi |--+ + | | grp 0 | | grp 0 | | | | grp 1 | | grp 1 | | + | +-------+ +-------+ | | +-------+ +-------+ | + +---------------------------+ +---------------------------+ + +properties: + compatible: + enum: + - pci17cb,1109 # QCN9274 + + reg: + maxItems: 1 + + qcom,ath12k-calibration-variant: + $ref: /schemas/types.yaml#/definitions/string + description: + String to uniquely identify variant of the calibration data for designs + with colliding bus and device ids + + qcom,wsi-controller: + $ref: /schemas/types.yaml#/definitions/flag + description: + The WSI controller device in the WSI group aids (is capable) to + synchronize the Timing Synchronization Function (TSF) clock across + all devices in the WSI group. + + ports: + $ref: /schemas/graph.yaml#/properties/ports + properties: + port@0: + $ref: /schemas/graph.yaml#/properties/port + description: + This is the TX port of WSI interface. It is attached to the RX + port of the next device in the WSI connection. + + port@1: + $ref: /schemas/graph.yaml#/properties/port + description: + This is the RX port of WSI interface. It is attached to the TX + port of the previous device in the WSI connection. + +required: + - compatible + - reg + +additionalProperties: false + +examples: + - | + pcie { + #address-cells = <3>; + #size-cells = <2>; + + pcie@0 { + device_type = "pci"; + reg = <0x0 0x0 0x0 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + ranges; + + wifi@0 { + compatible = "pci17cb,1109"; + reg = <0x0 0x0 0x0 0x0 0x0>; + + qcom,ath12k-calibration-variant = "RDP433_1"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + wifi1_wsi_tx: endpoint { + remote-endpoint = <&wifi2_wsi_rx>; + }; + }; + + port@1 { + reg = <1>; + + wifi1_wsi_rx: endpoint { + remote-endpoint = <&wifi3_wsi_tx>; + }; + }; + }; + }; + }; + + pcie@1 { + device_type = "pci"; + reg = <0x0 0x0 0x1 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + ranges; + + wifi@0 { + compatible = "pci17cb,1109"; + reg = <0x0 0x0 0x0 0x0 0x0>; + + qcom,ath12k-calibration-variant = "RDP433_2"; + qcom,wsi-controller; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + wifi2_wsi_tx: endpoint { + remote-endpoint = <&wifi3_wsi_rx>; + }; + }; + + port@1 { + reg = <1>; + + wifi2_wsi_rx: endpoint { + remote-endpoint = <&wifi1_wsi_tx>; + }; + }; + }; + }; + }; + + pcie@2 { + device_type = "pci"; + reg = <0x0 0x0 0x2 0x0 0x0>; + #address-cells = <3>; + #size-cells = <2>; + ranges; + + wifi@0 { + compatible = "pci17cb,1109"; + reg = <0x0 0x0 0x0 0x0 0x0>; + + qcom,ath12k-calibration-variant = "RDP433_3"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + + wifi3_wsi_tx: endpoint { + remote-endpoint = <&wifi1_wsi_rx>; + }; + }; + + port@1 { + reg = <1>; + + wifi3_wsi_rx: endpoint { + remote-endpoint = <&wifi2_wsi_tx>; + }; + }; + }; + }; + }; + }; From 908c10c860e012f961bfb2b8e0823b23426beb9d Mon Sep 17 00:00:00 2001 From: Raj Kumar Bhagat Date: Wed, 11 Dec 2024 17:34:26 +0200 Subject: [PATCH 0416/1386] wifi: ath12k: parse multiple device information from Device Tree Currently, a single device is part of the device group abstraction. However, for multi-link operations, multiple devices need to be combined. This multi-device grouping is done via WSI (WLAN Serial Interface), which is described in the Device Tree. Information about different WSI groups and the number of devices involved in each group can be parsed from the Device Tree. Add changes to parse the Device Tree and determine WSI information, such as the different WSI groups and the number of devices per WSI group. Assign WSI index zero to the WSI controller device (to synchronize the clock among the devices within the WSI group), and increment the WSI index of each device in the order of the WSI connection. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Co-developed-by: Harshitha Prem Signed-off-by: Harshitha Prem Signed-off-by: Raj Kumar Bhagat Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241211153432.775335-3-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 183 +++++++++++++++++++++++-- drivers/net/wireless/ath/ath12k/core.h | 8 ++ 2 files changed, 178 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 49d1ac15cb7a..1a43e00cffb2 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "core.h" #include "dp_tx.h" #include "dp_rx.h" @@ -1383,20 +1384,24 @@ bool ath12k_core_hw_group_create_ready(struct ath12k_hw_group *ag) return (ag->num_probed == ag->num_devices); } -static struct ath12k_hw_group *ath12k_core_hw_group_alloc(u8 id, u8 max_devices) +static struct ath12k_hw_group *ath12k_core_hw_group_alloc(struct ath12k_base *ab) { struct ath12k_hw_group *ag; + int count = 0; lockdep_assert_held(&ath12k_hw_group_mutex); + list_for_each_entry(ag, &ath12k_hw_group_list, list) + count++; + ag = kzalloc(sizeof(*ag), GFP_KERNEL); if (!ag) return NULL; - ag->id = id; - ag->num_devices = max_devices; + ag->id = count; list_add(&ag->list, &ath12k_hw_group_list); mutex_init(&ag->mutex); + ag->mlo_capable = false; return ag; } @@ -1411,35 +1416,180 @@ static void ath12k_core_hw_group_free(struct ath12k_hw_group *ag) mutex_unlock(&ath12k_hw_group_mutex); } +static struct ath12k_hw_group *ath12k_core_hw_group_find_by_dt(struct ath12k_base *ab) +{ + struct ath12k_hw_group *ag; + int i; + + if (!ab->dev->of_node) + return NULL; + + list_for_each_entry(ag, &ath12k_hw_group_list, list) + for (i = 0; i < ag->num_devices; i++) + if (ag->wsi_node[i] == ab->dev->of_node) + return ag; + + return NULL; +} + +static int ath12k_core_get_wsi_info(struct ath12k_hw_group *ag, + struct ath12k_base *ab) +{ + struct device_node *wsi_dev = ab->dev->of_node, *next_wsi_dev; + struct device_node *tx_endpoint, *next_rx_endpoint; + int device_count = 0; + + next_wsi_dev = wsi_dev; + + if (!next_wsi_dev) + return -ENODEV; + + do { + ag->wsi_node[device_count] = next_wsi_dev; + + tx_endpoint = of_graph_get_endpoint_by_regs(next_wsi_dev, 0, -1); + if (!tx_endpoint) { + of_node_put(next_wsi_dev); + return -ENODEV; + } + + next_rx_endpoint = of_graph_get_remote_endpoint(tx_endpoint); + if (!next_rx_endpoint) { + of_node_put(next_wsi_dev); + of_node_put(tx_endpoint); + return -ENODEV; + } + + of_node_put(tx_endpoint); + of_node_put(next_wsi_dev); + + next_wsi_dev = of_graph_get_port_parent(next_rx_endpoint); + if (!next_wsi_dev) { + of_node_put(next_rx_endpoint); + return -ENODEV; + } + + of_node_put(next_rx_endpoint); + + device_count++; + if (device_count > ATH12K_MAX_SOCS) { + ath12k_warn(ab, "device count in DT %d is more than limit %d\n", + device_count, ATH12K_MAX_SOCS); + of_node_put(next_wsi_dev); + return -EINVAL; + } + } while (wsi_dev != next_wsi_dev); + + of_node_put(next_wsi_dev); + ag->num_devices = device_count; + + return 0; +} + +static int ath12k_core_get_wsi_index(struct ath12k_hw_group *ag, + struct ath12k_base *ab) +{ + int i, wsi_controller_index = -1, node_index = -1; + bool control; + + for (i = 0; i < ag->num_devices; i++) { + control = of_property_read_bool(ag->wsi_node[i], "qcom,wsi-controller"); + if (control) + wsi_controller_index = i; + + if (ag->wsi_node[i] == ab->dev->of_node) + node_index = i; + } + + if (wsi_controller_index == -1) { + ath12k_dbg(ab, ATH12K_DBG_BOOT, "wsi controller is not defined in dt"); + return -EINVAL; + } + + if (node_index == -1) { + ath12k_dbg(ab, ATH12K_DBG_BOOT, "unable to get WSI node index"); + return -EINVAL; + } + + ab->wsi_info.index = (ag->num_devices + node_index - wsi_controller_index) % + ag->num_devices; + + return 0; +} + static struct ath12k_hw_group *ath12k_core_hw_group_assign(struct ath12k_base *ab) { - u32 group_id = ATH12K_INVALID_GROUP_ID; + struct ath12k_wsi_info *wsi = &ab->wsi_info; struct ath12k_hw_group *ag; lockdep_assert_held(&ath12k_hw_group_mutex); /* The grouping of multiple devices will be done based on device tree file. - * TODO: device tree file parsing to know about the devices involved in group. + * The platforms that do not have any valid group information would have + * each device to be part of its own invalid group. * - * The platforms that do not have any valid group information would have each - * device to be part of its own invalid group. - * - * Currently, we are not parsing any device tree information and hence, grouping - * of multiple devices is not involved. Thus, single device is added to device - * group. + * We use group id ATH12K_INVALID_GROUP_ID for single device group + * which didn't have dt entry or wrong dt entry, there could be many + * groups with same group id, i.e ATH12K_INVALID_GROUP_ID. So + * default group id of ATH12K_INVALID_GROUP_ID combined with + * num devices in ath12k_hw_group determines if the group is + * multi device or single device group */ - ag = ath12k_core_hw_group_alloc(group_id, 1); + + ag = ath12k_core_hw_group_find_by_dt(ab); + if (!ag) { + ag = ath12k_core_hw_group_alloc(ab); + if (!ag) { + ath12k_warn(ab, "unable to create new hw group\n"); + return NULL; + } + + if (ath12k_core_get_wsi_info(ag, ab) || + ath12k_core_get_wsi_index(ag, ab)) { + ath12k_dbg(ab, ATH12K_DBG_BOOT, + "unable to get wsi info from dt, grouping single device"); + ag->id = ATH12K_INVALID_GROUP_ID; + ag->num_devices = 1; + memset(ag->wsi_node, 0, sizeof(ag->wsi_node)); + wsi->index = 0; + } + + goto exit; + } else if (test_bit(ATH12K_GROUP_FLAG_UNREGISTER, &ag->flags)) { + ath12k_dbg(ab, ATH12K_DBG_BOOT, "group id %d in unregister state\n", + ag->id); + goto invalid_group; + } else { + if (ath12k_core_get_wsi_index(ag, ab)) + goto invalid_group; + goto exit; + } + +invalid_group: + ag = ath12k_core_hw_group_alloc(ab); if (!ag) { ath12k_warn(ab, "unable to create new hw group\n"); return NULL; } + ag->id = ATH12K_INVALID_GROUP_ID; + ag->num_devices = 1; + wsi->index = 0; + ath12k_dbg(ab, ATH12K_DBG_BOOT, "single device added to hardware group\n"); +exit: + if (ag->num_probed >= ag->num_devices) { + ath12k_warn(ab, "unable to add new device to group, max limit reached\n"); + goto invalid_group; + } + ab->device_id = ag->num_probed++; ag->ab[ab->device_id] = ab; ab->ag = ag; - ag->mlo_capable = false; + + ath12k_dbg(ab, ATH12K_DBG_BOOT, "wsi group-id %d num-devices %d index %d", + ag->id, ag->num_devices, wsi->index); return ag; } @@ -1507,6 +1657,13 @@ static void ath12k_core_hw_group_cleanup(struct ath12k_hw_group *ag) mutex_lock(&ag->mutex); + if (test_bit(ATH12K_GROUP_FLAG_UNREGISTER, &ag->flags)) { + mutex_unlock(&ag->mutex); + return; + } + + set_bit(ATH12K_GROUP_FLAG_UNREGISTER, &ag->flags); + ath12k_core_hw_group_stop(ag); for (i = 0; i < ag->num_devices; i++) { diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 458e3d0071a8..d0e466819036 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -219,6 +219,7 @@ enum ath12k_scan_state { enum ath12k_hw_group_flags { ATH12K_GROUP_FLAG_REGISTERED, + ATH12K_GROUP_FLAG_UNREGISTER, }; enum ath12k_dev_flags { @@ -845,6 +846,12 @@ struct ath12k_hw_group { struct ath12k_hw *ah[ATH12K_GROUP_MAX_RADIO]; u8 num_hw; bool mlo_capable; + struct device_node *wsi_node[ATH12K_MAX_SOCS]; +}; + +/* Holds WSI info specific to each device, excluding WSI group info */ +struct ath12k_wsi_info { + u32 index; }; /* Master structure to hold the hw data which may be used in core module */ @@ -1028,6 +1035,7 @@ struct ath12k_base { struct notifier_block panic_nb; struct ath12k_hw_group *ag; + struct ath12k_wsi_info wsi_info; /* must be last */ u8 drv_priv[] __aligned(sizeof(void *)); From 08a4c51c6ea0790d8abcb713410833fcb0019a69 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Wed, 11 Dec 2024 17:34:27 +0200 Subject: [PATCH 0417/1386] wifi: ath12k: send partner device details in QMI MLO capability Currently, QMI MLO host capability is sent with the details of local links and hw_link id only for particular device. But in the case of multi device group abstraction, it has to include the details of hw_link_id, num_local_links of every partner device that is involved in the group during QMI MLO capability exchange. Add changes to send partner device details to the firmware in QMI MLO capability exchange. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241211153432.775335-4-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/qmi.c | 88 ++++++++++++++++++++++----- 1 file changed, 72 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index ba3cd2342465..2f10c83ef54a 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -2016,17 +2016,19 @@ static const struct qmi_elem_info qmi_wlanfw_wlan_ini_resp_msg_v01_ei[] = { }, }; -static void ath12k_host_cap_parse_mlo(struct ath12k_base *ab, - struct qmi_wlanfw_host_cap_req_msg_v01 *req) +static int ath12k_host_cap_parse_mlo(struct ath12k_base *ab, + struct qmi_wlanfw_host_cap_req_msg_v01 *req) { struct wlfw_host_mlo_chip_info_s_v01 *info; + struct ath12k_hw_group *ag = ab->ag; + struct ath12k_base *partner_ab; u8 hw_link_id = 0; - int i; + int i, j, ret; - if (!ab->ag->mlo_capable) { + if (!ag->mlo_capable) { ath12k_dbg(ab, ATH12K_DBG_QMI, "MLO is disabled hence skip QMI MLO cap"); - return; + return 0; } if (!ab->qmi.num_radios || ab->qmi.num_radios == U8_MAX) { @@ -2035,7 +2037,12 @@ static void ath12k_host_cap_parse_mlo(struct ath12k_base *ab, ath12k_dbg(ab, ATH12K_DBG_QMI, "skip QMI MLO cap due to invalid num_radio %d\n", ab->qmi.num_radios); - return; + return 0; + } + + if (ab->device_id == ATH12K_INVALID_DEVICE_ID) { + ath12k_err(ab, "failed to send MLO cap due to invalid device id\n"); + return -EINVAL; } req->mlo_capable_valid = 1; @@ -2043,27 +2050,74 @@ static void ath12k_host_cap_parse_mlo(struct ath12k_base *ab, req->mlo_chip_id_valid = 1; req->mlo_chip_id = ab->device_id; req->mlo_group_id_valid = 1; - req->mlo_group_id = 0; + req->mlo_group_id = ag->id; req->max_mlo_peer_valid = 1; /* Max peer number generally won't change for the same device * but needs to be synced with host driver. */ req->max_mlo_peer = ab->hw_params->max_mlo_peer; req->mlo_num_chips_valid = 1; - req->mlo_num_chips = 1; + req->mlo_num_chips = ag->num_devices; - info = &req->mlo_chip_info[0]; - info->chip_id = ab->device_id; - info->num_local_links = ab->qmi.num_radios; + mutex_lock(&ag->mutex); - for (i = 0; i < info->num_local_links; i++) { - info->hw_link_id[i] = hw_link_id; - info->valid_mlo_link_id[i] = 1; + for (i = 0; i < ag->num_devices; i++) { + info = &req->mlo_chip_info[i]; + partner_ab = ag->ab[i]; - hw_link_id++; + if (partner_ab->device_id == ATH12K_INVALID_DEVICE_ID) { + ath12k_err(ab, "failed to send MLO cap due to invalid partner device id\n"); + ret = -EINVAL; + goto device_cleanup; + } + + info->chip_id = partner_ab->device_id; + info->num_local_links = partner_ab->qmi.num_radios; + + ath12k_dbg(ab, ATH12K_DBG_QMI, "mlo device id %d num_link %d\n", + info->chip_id, info->num_local_links); + + for (j = 0; j < info->num_local_links; j++) { + info->hw_link_id[j] = hw_link_id; + info->valid_mlo_link_id[j] = 1; + + hw_link_id++; + } } + if (hw_link_id <= 0) + ag->mlo_capable = false; + req->mlo_chip_info_valid = 1; + + mutex_unlock(&ag->mutex); + + return 0; + +device_cleanup: + for (i = i - 1; i >= 0; i--) { + info = &req->mlo_chip_info[i]; + + memset(info, 0, sizeof(*info)); + } + + req->mlo_num_chips = 0; + req->mlo_num_chips_valid = 0; + + req->max_mlo_peer = 0; + req->max_mlo_peer_valid = 0; + req->mlo_group_id = 0; + req->mlo_group_id_valid = 0; + req->mlo_chip_id = 0; + req->mlo_chip_id_valid = 0; + req->mlo_capable = 0; + req->mlo_capable_valid = 0; + + ag->mlo_capable = false; + + mutex_unlock(&ag->mutex); + + return ret; } /* clang stack usage explodes if this is inlined */ @@ -2113,7 +2167,9 @@ int ath12k_qmi_host_cap_send(struct ath12k_base *ab) req.nm_modem |= PLATFORM_CAP_PCIE_GLOBAL_RESET; } - ath12k_host_cap_parse_mlo(ab, &req); + ret = ath12k_host_cap_parse_mlo(ab, &req); + if (ret < 0) + goto out; ret = qmi_txn_init(&ab->qmi.handle, &txn, qmi_wlanfw_host_cap_resp_msg_v01_ei, &resp); From 786f34b5b4a408f466f762ca7785121ef3dbf540 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Wed, 11 Dec 2024 17:34:28 +0200 Subject: [PATCH 0418/1386] wifi: ath12k: refactor ath12k_qmi_alloc_target_mem_chunk() Currently, all QMI target memory types share the same allocation logic within ath12k_qmi_alloc_target_mem_chunk(). However, for Multi-Link Operation (MLO), the firmware requests a new MLO global memory region. This memory is shared across different firmware (SoC) participating in the MLO. To accommodate this logic change, refactor ath12k_qmi_alloc_target_mem_chunk() and introduce a helper function ath12k_qmi_alloc_chunk() for memory chunk allocation. Subsequent patch will add MLO global memory allocation logic. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.1.1-00210-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Raj Kumar Bhagat Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241211153432.775335-5-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/qmi.c | 82 ++++++++++++++------------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index 2f10c83ef54a..7f3d5b269b9e 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -2423,9 +2423,50 @@ static void ath12k_qmi_free_target_mem_chunk(struct ath12k_base *ab) } } +static int ath12k_qmi_alloc_chunk(struct ath12k_base *ab, + struct target_mem_chunk *chunk) +{ + /* Firmware reloads in recovery/resume. + * In such cases, no need to allocate memory for FW again. + */ + if (chunk->v.addr) { + if (chunk->prev_type == chunk->type && + chunk->prev_size == chunk->size) + goto this_chunk_done; + + /* cannot reuse the existing chunk */ + dma_free_coherent(ab->dev, chunk->prev_size, + chunk->v.addr, chunk->paddr); + chunk->v.addr = NULL; + } + + chunk->v.addr = dma_alloc_coherent(ab->dev, + chunk->size, + &chunk->paddr, + GFP_KERNEL | __GFP_NOWARN); + if (!chunk->v.addr) { + if (chunk->size > ATH12K_QMI_MAX_CHUNK_SIZE) { + ab->qmi.target_mem_delayed = true; + ath12k_warn(ab, + "qmi dma allocation failed (%d B type %u), will try later with small size\n", + chunk->size, + chunk->type); + ath12k_qmi_free_target_mem_chunk(ab); + return 0; + } + ath12k_warn(ab, "memory allocation failure for %u size: %d\n", + chunk->type, chunk->size); + return -ENOMEM; + } + chunk->prev_type = chunk->type; + chunk->prev_size = chunk->size; +this_chunk_done: + return 0; +} + static int ath12k_qmi_alloc_target_mem_chunk(struct ath12k_base *ab) { - int i; + int i, ret = 0; struct target_mem_chunk *chunk; ab->qmi.target_mem_delayed = false; @@ -2442,42 +2483,7 @@ static int ath12k_qmi_alloc_target_mem_chunk(struct ath12k_base *ab) case M3_DUMP_REGION_TYPE: case PAGEABLE_MEM_REGION_TYPE: case CALDB_MEM_REGION_TYPE: - /* Firmware reloads in recovery/resume. - * In such cases, no need to allocate memory for FW again. - */ - if (chunk->v.addr) { - if (chunk->prev_type == chunk->type && - chunk->prev_size == chunk->size) - goto this_chunk_done; - - /* cannot reuse the existing chunk */ - dma_free_coherent(ab->dev, chunk->prev_size, - chunk->v.addr, chunk->paddr); - chunk->v.addr = NULL; - } - - chunk->v.addr = dma_alloc_coherent(ab->dev, - chunk->size, - &chunk->paddr, - GFP_KERNEL | __GFP_NOWARN); - if (!chunk->v.addr) { - if (chunk->size > ATH12K_QMI_MAX_CHUNK_SIZE) { - ab->qmi.target_mem_delayed = true; - ath12k_warn(ab, - "qmi dma allocation failed (%d B type %u), will try later with small size\n", - chunk->size, - chunk->type); - ath12k_qmi_free_target_mem_chunk(ab); - return 0; - } - ath12k_warn(ab, "memory allocation failure for %u size: %d\n", - chunk->type, chunk->size); - return -ENOMEM; - } - - chunk->prev_type = chunk->type; - chunk->prev_size = chunk->size; -this_chunk_done: + ret = ath12k_qmi_alloc_chunk(ab, chunk); break; default: ath12k_warn(ab, "memory type %u not supported\n", @@ -2487,7 +2493,7 @@ this_chunk_done: break; } } - return 0; + return ret; } /* clang stack usage explodes if this is inlined */ From 48090fae676ecef4bb39cc6a2faa9765a248b4f8 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Wed, 11 Dec 2024 17:34:29 +0200 Subject: [PATCH 0419/1386] wifi: ath12k: add support to allocate MLO global memory region To enable Multi-Link Operation (MLO), QCN9274 firmware requests MLO global memory (MLO_GLOBAL_MEM_REGION_TYPE). This memory region is shared across all the firmware (SoC) that are participation in the MLO. Hence, add support to allocate and free MLO global memory region. Allocate one MLO global memory per struct ath12k_hw_group and assign the same memory to all firmware in the same struct ath12k_hw_group. WCN7850 firmware does not request this memory type, therefore this change will have no impact on WCN7850 device. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.1.1-00210-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Co-developed-by: Raj Kumar Bhagat Signed-off-by: Raj Kumar Bhagat Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241211153432.775335-6-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 7 ++ drivers/net/wireless/ath/ath12k/qmi.c | 127 +++++++++++++++++++++++-- drivers/net/wireless/ath/ath12k/qmi.h | 1 + 3 files changed, 125 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index d0e466819036..bf310df3d8f7 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -822,6 +822,12 @@ struct ath12k_soc_dp_stats { struct ath12k_soc_dp_tx_err_stats tx_err; }; +struct ath12k_mlo_memory { + struct target_mem_chunk chunk[ATH12K_QMI_WLANFW_MAX_NUM_MEM_SEG_V01]; + int mlo_mem_size; + bool init_done; +}; + /* Holds info on the group of devices that are registered as a single * wiphy, protected with struct ath12k_hw_group::mutex. */ @@ -847,6 +853,7 @@ struct ath12k_hw_group { u8 num_hw; bool mlo_capable; struct device_node *wsi_node[ATH12K_MAX_SOCS]; + struct ath12k_mlo_memory mlo_mem; }; /* Holds WSI info specific to each device, excluding WSI group info */ diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index 7f3d5b269b9e..e7846aaca10a 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -2407,19 +2407,64 @@ out: return ret; } +static void ath12k_qmi_free_mlo_mem_chunk(struct ath12k_base *ab, + struct target_mem_chunk *chunk, + int idx) +{ + struct ath12k_hw_group *ag = ab->ag; + struct target_mem_chunk *mlo_chunk; + + lockdep_assert_held(&ag->mutex); + + if (!ag->mlo_mem.init_done || ag->num_started) + return; + + if (idx >= ARRAY_SIZE(ag->mlo_mem.chunk)) { + ath12k_warn(ab, "invalid index for MLO memory chunk free: %d\n", idx); + return; + } + + mlo_chunk = &ag->mlo_mem.chunk[idx]; + if (mlo_chunk->v.addr) { + dma_free_coherent(ab->dev, + mlo_chunk->size, + mlo_chunk->v.addr, + mlo_chunk->paddr); + mlo_chunk->v.addr = NULL; + } + + mlo_chunk->paddr = 0; + mlo_chunk->size = 0; + chunk->v.addr = NULL; + chunk->paddr = 0; + chunk->size = 0; +} + static void ath12k_qmi_free_target_mem_chunk(struct ath12k_base *ab) { - int i; + struct ath12k_hw_group *ag = ab->ag; + int i, mlo_idx; - for (i = 0; i < ab->qmi.mem_seg_count; i++) { + for (i = 0, mlo_idx = 0; i < ab->qmi.mem_seg_count; i++) { if (!ab->qmi.target_mem[i].v.addr) continue; - dma_free_coherent(ab->dev, - ab->qmi.target_mem[i].prev_size, - ab->qmi.target_mem[i].v.addr, - ab->qmi.target_mem[i].paddr); - ab->qmi.target_mem[i].v.addr = NULL; + if (ab->qmi.target_mem[i].type == MLO_GLOBAL_MEM_REGION_TYPE) { + ath12k_qmi_free_mlo_mem_chunk(ab, + &ab->qmi.target_mem[i], + mlo_idx++); + } else { + dma_free_coherent(ab->dev, + ab->qmi.target_mem[i].prev_size, + ab->qmi.target_mem[i].v.addr, + ab->qmi.target_mem[i].paddr); + ab->qmi.target_mem[i].v.addr = NULL; + } + } + + if (!ag->num_started && ag->mlo_mem.init_done) { + ag->mlo_mem.init_done = false; + ag->mlo_mem.mlo_mem_size = 0; } } @@ -2466,12 +2511,21 @@ this_chunk_done: static int ath12k_qmi_alloc_target_mem_chunk(struct ath12k_base *ab) { - int i, ret = 0; - struct target_mem_chunk *chunk; + struct target_mem_chunk *chunk, *mlo_chunk; + struct ath12k_hw_group *ag = ab->ag; + int i, mlo_idx, ret; + int mlo_size = 0; + + mutex_lock(&ag->mutex); + + if (!ag->mlo_mem.init_done) { + memset(ag->mlo_mem.chunk, 0, sizeof(ag->mlo_mem.chunk)); + ag->mlo_mem.init_done = true; + } ab->qmi.target_mem_delayed = false; - for (i = 0; i < ab->qmi.mem_seg_count; i++) { + for (i = 0, mlo_idx = 0; i < ab->qmi.mem_seg_count; i++) { chunk = &ab->qmi.target_mem[i]; /* Allocate memory for the region and the functionality supported @@ -2484,6 +2538,40 @@ static int ath12k_qmi_alloc_target_mem_chunk(struct ath12k_base *ab) case PAGEABLE_MEM_REGION_TYPE: case CALDB_MEM_REGION_TYPE: ret = ath12k_qmi_alloc_chunk(ab, chunk); + if (ret) + goto err; + break; + case MLO_GLOBAL_MEM_REGION_TYPE: + mlo_size += chunk->size; + if (ag->mlo_mem.mlo_mem_size && + mlo_size > ag->mlo_mem.mlo_mem_size) { + ath12k_err(ab, "QMI MLO memory allocation failure, requested size %d is more than allocated size %d", + mlo_size, ag->mlo_mem.mlo_mem_size); + ret = -EINVAL; + goto err; + } + + mlo_chunk = &ag->mlo_mem.chunk[mlo_idx]; + if (mlo_chunk->paddr) { + if (chunk->size != mlo_chunk->size) { + ath12k_err(ab, "QMI MLO chunk memory allocation failure for index %d, requested size %d is more than allocated size %d", + mlo_idx, chunk->size, mlo_chunk->size); + ret = -EINVAL; + goto err; + } + } else { + mlo_chunk->size = chunk->size; + mlo_chunk->type = chunk->type; + ret = ath12k_qmi_alloc_chunk(ab, mlo_chunk); + if (ret) + goto err; + memset(mlo_chunk->v.addr, 0, mlo_chunk->size); + } + + chunk->paddr = mlo_chunk->paddr; + chunk->v.addr = mlo_chunk->v.addr; + mlo_idx++; + break; default: ath12k_warn(ab, "memory type %u not supported\n", @@ -2493,6 +2581,25 @@ static int ath12k_qmi_alloc_target_mem_chunk(struct ath12k_base *ab) break; } } + + if (!ag->mlo_mem.mlo_mem_size) { + ag->mlo_mem.mlo_mem_size = mlo_size; + } else if (ag->mlo_mem.mlo_mem_size != mlo_size) { + ath12k_err(ab, "QMI MLO memory size error, expected size is %d but requestted size is %d", + ag->mlo_mem.mlo_mem_size, mlo_size); + ret = -EINVAL; + goto err; + } + + mutex_unlock(&ag->mutex); + + return 0; + +err: + ath12k_qmi_free_target_mem_chunk(ab); + + mutex_unlock(&ag->mutex); + return ret; } diff --git a/drivers/net/wireless/ath/ath12k/qmi.h b/drivers/net/wireless/ath/ath12k/qmi.h index 98f6009ab21e..45d7c3fcafdd 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.h +++ b/drivers/net/wireless/ath/ath12k/qmi.h @@ -172,6 +172,7 @@ enum ath12k_qmi_target_mem { BDF_MEM_REGION_TYPE = 0x2, M3_DUMP_REGION_TYPE = 0x3, CALDB_MEM_REGION_TYPE = 0x4, + MLO_GLOBAL_MEM_REGION_TYPE = 0x8, PAGEABLE_MEM_REGION_TYPE = 0x9, }; From cc64deef0bdb52d6b2d6f1a2cd427ae680ed1936 Mon Sep 17 00:00:00 2001 From: Bhagavathi Perumal S Date: Wed, 11 Dec 2024 17:34:30 +0200 Subject: [PATCH 0420/1386] wifi: ath12k: Add MLO WMI setup and teardown functions In case of multi device group abstraction, host has to exchange the MLO commands such as setup, teardown and ready to firmware. Once multi device group is ready, host has to exchange MLO setup command with partner devices link information and followed by MLO ready command to firmware. During deinit, MLO teardown command should be sent to firmware. Firmware would send MLO setup complete and MLO teardown complete to host for MLO setup command and MLO teardown command respectively. Added WMI helper functions for the MLO setup, ready and teardown command and the handling for corresponding event from firmware. Add appropriate WMI tag, command id and event id to parse the event and send request. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Co-developed-by: Karthikeyan Periyasamy Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Bhagavathi Perumal S Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241211153432.775335-7-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 177 ++++++++++++++++++++++++++ drivers/net/wireless/ath/ath12k/wmi.h | 48 +++++++ 2 files changed, 225 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 46c5027e4f1c..705e0973ebb0 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -7327,6 +7327,76 @@ static void ath12k_wmi_gtk_offload_status_event(struct ath12k_base *ab, kfree(tb); } +static void ath12k_wmi_event_mlo_setup_complete(struct ath12k_base *ab, + struct sk_buff *skb) +{ + const struct wmi_mlo_setup_complete_event *ev; + struct ath12k *ar = NULL; + struct ath12k_pdev *pdev; + const void **tb; + int ret, i; + + tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC); + if (IS_ERR(tb)) { + ret = PTR_ERR(tb); + ath12k_warn(ab, "failed to parse mlo setup complete event tlv: %d\n", + ret); + return; + } + + ev = tb[WMI_TAG_MLO_SETUP_COMPLETE_EVENT]; + if (!ev) { + ath12k_warn(ab, "failed to fetch mlo setup complete event\n"); + kfree(tb); + return; + } + + if (le32_to_cpu(ev->pdev_id) > ab->num_radios) + goto skip_lookup; + + for (i = 0; i < ab->num_radios; i++) { + pdev = &ab->pdevs[i]; + if (pdev && pdev->pdev_id == le32_to_cpu(ev->pdev_id)) { + ar = pdev->ar; + break; + } + } + +skip_lookup: + if (!ar) { + ath12k_warn(ab, "invalid pdev_id %d status %u in setup complete event\n", + ev->pdev_id, ev->status); + goto out; + } + +out: + kfree(tb); +} + +static void ath12k_wmi_event_teardown_complete(struct ath12k_base *ab, + struct sk_buff *skb) +{ + const struct wmi_mlo_teardown_complete_event *ev; + const void **tb; + int ret; + + tb = ath12k_wmi_tlv_parse_alloc(ab, skb, GFP_ATOMIC); + if (IS_ERR(tb)) { + ret = PTR_ERR(tb); + ath12k_warn(ab, "failed to parse teardown complete event tlv: %d\n", ret); + return; + } + + ev = tb[WMI_TAG_MLO_TEARDOWN_COMPLETE]; + if (!ev) { + ath12k_warn(ab, "failed to fetch teardown complete event\n"); + kfree(tb); + return; + } + + kfree(tb); +} + static void ath12k_wmi_op_rx(struct ath12k_base *ab, struct sk_buff *skb) { struct wmi_cmd_hdr *cmd_hdr; @@ -7453,6 +7523,12 @@ static void ath12k_wmi_op_rx(struct ath12k_base *ab, struct sk_buff *skb) case WMI_GTK_OFFLOAD_STATUS_EVENTID: ath12k_wmi_gtk_offload_status_event(ab, skb); break; + case WMI_MLO_SETUP_COMPLETE_EVENTID: + ath12k_wmi_event_mlo_setup_complete(ab, skb); + break; + case WMI_MLO_TEARDOWN_COMPLETE_EVENTID: + ath12k_wmi_event_teardown_complete(ab, skb); + break; /* TODO: Add remaining events */ default: ath12k_dbg(ab, ATH12K_DBG_WMI, "Unknown eventid: 0x%x\n", id); @@ -8269,3 +8345,104 @@ int ath12k_wmi_sta_keepalive(struct ath12k *ar, return ath12k_wmi_cmd_send(wmi, skb, WMI_STA_KEEPALIVE_CMDID); } + +int ath12k_wmi_mlo_setup(struct ath12k *ar, struct wmi_mlo_setup_arg *mlo_params) +{ + struct wmi_mlo_setup_cmd *cmd; + struct ath12k_wmi_pdev *wmi = ar->wmi; + u32 *partner_links, num_links; + int i, ret, buf_len, arg_len; + struct sk_buff *skb; + struct wmi_tlv *tlv; + void *ptr; + + num_links = mlo_params->num_partner_links; + arg_len = num_links * sizeof(u32); + buf_len = sizeof(*cmd) + TLV_HDR_SIZE + arg_len; + + skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, buf_len); + if (!skb) + return -ENOMEM; + + cmd = (struct wmi_mlo_setup_cmd *)skb->data; + cmd->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_MLO_SETUP_CMD, + sizeof(*cmd)); + cmd->mld_group_id = mlo_params->group_id; + cmd->pdev_id = cpu_to_le32(ar->pdev->pdev_id); + ptr = skb->data + sizeof(*cmd); + + tlv = ptr; + tlv->header = ath12k_wmi_tlv_hdr(WMI_TAG_ARRAY_UINT32, arg_len); + ptr += TLV_HDR_SIZE; + + partner_links = ptr; + for (i = 0; i < num_links; i++) + partner_links[i] = mlo_params->partner_link_id[i]; + + ret = ath12k_wmi_cmd_send(wmi, skb, WMI_MLO_SETUP_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to submit WMI_MLO_SETUP_CMDID command: %d\n", + ret); + dev_kfree_skb(skb); + return ret; + } + + return 0; +} + +int ath12k_wmi_mlo_ready(struct ath12k *ar) +{ + struct wmi_mlo_ready_cmd *cmd; + struct ath12k_wmi_pdev *wmi = ar->wmi; + struct sk_buff *skb; + int ret, len; + + len = sizeof(*cmd); + skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, len); + if (!skb) + return -ENOMEM; + + cmd = (struct wmi_mlo_ready_cmd *)skb->data; + cmd->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_MLO_READY_CMD, + sizeof(*cmd)); + cmd->pdev_id = cpu_to_le32(ar->pdev->pdev_id); + + ret = ath12k_wmi_cmd_send(wmi, skb, WMI_MLO_READY_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to submit WMI_MLO_READY_CMDID command: %d\n", + ret); + dev_kfree_skb(skb); + return ret; + } + + return 0; +} + +int ath12k_wmi_mlo_teardown(struct ath12k *ar) +{ + struct wmi_mlo_teardown_cmd *cmd; + struct ath12k_wmi_pdev *wmi = ar->wmi; + struct sk_buff *skb; + int ret, len; + + len = sizeof(*cmd); + skb = ath12k_wmi_alloc_skb(wmi->wmi_ab, len); + if (!skb) + return -ENOMEM; + + cmd = (struct wmi_mlo_teardown_cmd *)skb->data; + cmd->tlv_header = ath12k_wmi_tlv_cmd_hdr(WMI_TAG_MLO_TEARDOWN_CMD, + sizeof(*cmd)); + cmd->pdev_id = cpu_to_le32(ar->pdev->pdev_id); + cmd->reason_code = WMI_MLO_TEARDOWN_SSR_REASON; + + ret = ath12k_wmi_cmd_send(wmi, skb, WMI_MLO_TEARDOWN_CMDID); + if (ret) { + ath12k_warn(ar->ab, "failed to submit WMI MLO teardown command: %d\n", + ret); + dev_kfree_skb(skb); + return ret; + } + + return 0; +} diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 05aa9754118a..640720b68782 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -285,6 +285,7 @@ enum wmi_cmd_group { WMI_GRP_TWT = 0x3e, WMI_GRP_MOTION_DET = 0x3f, WMI_GRP_SPATIAL_REUSE = 0x40, + WMI_GRP_MLO = 0x48, }; #define WMI_CMD_GRP(grp_id) (((grp_id) << 12) | 0x1) @@ -665,6 +666,10 @@ enum wmi_tlv_cmd_id { WMI_PDEV_OBSS_PD_SPATIAL_REUSE_CMDID = WMI_TLV_CMD(WMI_GRP_SPATIAL_REUSE), WMI_PDEV_OBSS_PD_SPATIAL_REUSE_SET_DEF_OBSS_THRESH_CMDID, + WMI_MLO_LINK_SET_ACTIVE_CMDID = WMI_TLV_CMD(WMI_GRP_MLO), + WMI_MLO_SETUP_CMDID, + WMI_MLO_READY_CMDID, + WMI_MLO_TEARDOWN_CMDID, }; enum wmi_tlv_event_id { @@ -874,6 +879,9 @@ enum wmi_tlv_event_id { WMI_TWT_DEL_DIALOG_EVENTID, WMI_TWT_PAUSE_DIALOG_EVENTID, WMI_TWT_RESUME_DIALOG_EVENTID, + WMI_MLO_LINK_SET_ACTIVE_RESP_EVENTID = WMI_EVT_GRP_START_ID(WMI_GRP_MLO), + WMI_MLO_SETUP_COMPLETE_EVENTID, + WMI_MLO_TEARDOWN_COMPLETE_EVENTID, }; enum wmi_tlv_pdev_param { @@ -5026,6 +5034,43 @@ struct wmi_twt_disable_event { __le32 status; } __packed; +struct wmi_mlo_setup_cmd { + __le32 tlv_header; + __le32 mld_group_id; + __le32 pdev_id; +} __packed; + +struct wmi_mlo_setup_arg { + __le32 group_id; + u8 num_partner_links; + u8 *partner_link_id; +}; + +struct wmi_mlo_ready_cmd { + __le32 tlv_header; + __le32 pdev_id; +} __packed; + +enum wmi_mlo_tear_down_reason_code_type { + WMI_MLO_TEARDOWN_SSR_REASON, +}; + +struct wmi_mlo_teardown_cmd { + __le32 tlv_header; + __le32 pdev_id; + __le32 reason_code; +} __packed; + +struct wmi_mlo_setup_complete_event { + __le32 pdev_id; + __le32 status; +} __packed; + +struct wmi_mlo_teardown_complete_event { + __le32 pdev_id; + __le32 status; +} __packed; + /* WOW structures */ enum wmi_wow_wakeup_event { WOW_BMISS_EVENT = 0, @@ -5751,5 +5796,8 @@ int ath12k_wmi_gtk_rekey_getinfo(struct ath12k *ar, struct ath12k_link_vif *arvif); int ath12k_wmi_sta_keepalive(struct ath12k *ar, const struct wmi_sta_keepalive_arg *arg); +int ath12k_wmi_mlo_setup(struct ath12k *ar, struct wmi_mlo_setup_arg *mlo_params); +int ath12k_wmi_mlo_ready(struct ath12k *ar); +int ath12k_wmi_mlo_teardown(struct ath12k *ar); #endif From b716a10d99a287681fc5cef46a7f9399bec5f055 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Wed, 11 Dec 2024 17:34:31 +0200 Subject: [PATCH 0421/1386] wifi: ath12k: enable MLO setup and teardown from core In case of multi device group abstraction, host has to exchange the multi-link operation commands such as setup and ready to firmware before registering the device group to mac80211. The multi-link operation commands - setup, ready and teardown are necessary for many commands such as WMI_PEER_ASSOC_CMD, WMI_BCN_TMPL_CMD in case of multi-link interfaces. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Harshitha Prem Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241211153432.775335-8-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 73 ++++++++++++- drivers/net/wireless/ath/ath12k/core.h | 3 + drivers/net/wireless/ath/ath12k/mac.c | 142 +++++++++++++++++++++++++ drivers/net/wireless/ath/ath12k/mac.h | 3 + drivers/net/wireless/ath/ath12k/wmi.c | 3 + drivers/net/wireless/ath/ath12k/wmi.h | 1 + 6 files changed, 224 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index 1a43e00cffb2..af642b466ea0 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -887,6 +887,70 @@ static void ath12k_core_hw_group_stop(struct ath12k_hw_group *ag) ath12k_mac_destroy(ag); } +static int __ath12k_mac_mlo_ready(struct ath12k *ar) +{ + int ret; + + ret = ath12k_wmi_mlo_ready(ar); + if (ret) { + ath12k_err(ar->ab, "MLO ready failed for pdev %d: %d\n", + ar->pdev_idx, ret); + return ret; + } + + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mlo ready done for pdev %d\n", + ar->pdev_idx); + + return 0; +} + +int ath12k_mac_mlo_ready(struct ath12k_hw_group *ag) +{ + struct ath12k_hw *ah; + struct ath12k *ar; + int ret; + int i, j; + + for (i = 0; i < ag->num_hw; i++) { + ah = ag->ah[i]; + if (!ah) + continue; + + for_each_ar(ah, ar, j) { + ar = &ah->radio[j]; + ret = __ath12k_mac_mlo_ready(ar); + if (ret) + goto out; + } + } + +out: + return ret; +} + +static int ath12k_core_mlo_setup(struct ath12k_hw_group *ag) +{ + int ret; + + if (!ag->mlo_capable || ag->num_devices == 1) + return 0; + + ret = ath12k_mac_mlo_setup(ag); + if (ret) + return ret; + + ret = ath12k_mac_mlo_ready(ag); + if (ret) + goto err_mlo_teardown; + + return 0; + +err_mlo_teardown: + ath12k_mac_mlo_teardown(ag); + + return ret; +} + static int ath12k_core_hw_group_start(struct ath12k_hw_group *ag) { struct ath12k_base *ab; @@ -901,10 +965,14 @@ static int ath12k_core_hw_group_start(struct ath12k_hw_group *ag) if (WARN_ON(ret)) return ret; - ret = ath12k_mac_register(ag); + ret = ath12k_core_mlo_setup(ag); if (WARN_ON(ret)) goto err_mac_destroy; + ret = ath12k_mac_register(ag); + if (WARN_ON(ret)) + goto err_mlo_teardown; + set_bit(ATH12K_GROUP_FLAG_REGISTERED, &ag->flags); core_pdev_create: @@ -939,6 +1007,9 @@ err: ath12k_core_hw_group_stop(ag); return ret; +err_mlo_teardown: + ath12k_mac_mlo_teardown(ag); + err_mac_destroy: ath12k_mac_destroy(ag); diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index bf310df3d8f7..dc01f7b3fd73 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -715,6 +715,9 @@ struct ath12k { u32 freq_high; bool nlo_enabled; + + struct completion mlo_setup_done; + u32 mlo_setup_status; }; struct ath12k_hw { diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 47a80d28d1d7..161cc018230f 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -10810,6 +10810,7 @@ static void ath12k_mac_setup(struct ath12k *ar) init_completion(&ar->scan.started); init_completion(&ar->scan.completed); init_completion(&ar->scan.on_channel); + init_completion(&ar->mlo_setup_done); INIT_DELAYED_WORK(&ar->scan.timeout, ath12k_scan_timeout_work); INIT_WORK(&ar->regd_update_work, ath12k_regd_update_work); @@ -10818,6 +10819,147 @@ static void ath12k_mac_setup(struct ath12k *ar) skb_queue_head_init(&ar->wmi_mgmt_tx_queue); } +static int __ath12k_mac_mlo_setup(struct ath12k *ar) +{ + u8 num_link = 0, partner_link_id[ATH12K_GROUP_MAX_RADIO] = {}; + struct ath12k_base *partner_ab, *ab = ar->ab; + struct ath12k_hw_group *ag = ab->ag; + struct wmi_mlo_setup_arg mlo = {}; + struct ath12k_pdev *pdev; + unsigned long time_left; + int i, j, ret; + + lockdep_assert_held(&ag->mutex); + + reinit_completion(&ar->mlo_setup_done); + + for (i = 0; i < ag->num_devices; i++) { + partner_ab = ag->ab[i]; + + for (j = 0; j < partner_ab->num_radios; j++) { + pdev = &partner_ab->pdevs[j]; + + /* Avoid the self link */ + if (ar == pdev->ar) + continue; + + partner_link_id[num_link] = pdev->hw_link_id; + num_link++; + + ath12k_dbg(ab, ATH12K_DBG_MAC, "device %d pdev %d hw_link_id %d num_link %d\n", + i, j, pdev->hw_link_id, num_link); + } + } + + mlo.group_id = cpu_to_le32(ag->id); + mlo.partner_link_id = partner_link_id; + mlo.num_partner_links = num_link; + ar->mlo_setup_status = 0; + + ath12k_dbg(ab, ATH12K_DBG_MAC, "group id %d num_link %d\n", ag->id, num_link); + + ret = ath12k_wmi_mlo_setup(ar, &mlo); + if (ret) { + ath12k_err(ab, "failed to send setup MLO WMI command for pdev %d: %d\n", + ar->pdev_idx, ret); + return ret; + } + + time_left = wait_for_completion_timeout(&ar->mlo_setup_done, + WMI_MLO_CMD_TIMEOUT_HZ); + + if (!time_left || ar->mlo_setup_status) + return ar->mlo_setup_status ? : -ETIMEDOUT; + + ath12k_dbg(ab, ATH12K_DBG_MAC, "mlo setup done for pdev %d\n", ar->pdev_idx); + + return 0; +} + +static int __ath12k_mac_mlo_teardown(struct ath12k *ar) +{ + struct ath12k_base *ab = ar->ab; + int ret; + + if (test_bit(ATH12K_FLAG_RECOVERY, &ab->dev_flags)) + return 0; + + ret = ath12k_wmi_mlo_teardown(ar); + if (ret) { + ath12k_warn(ab, "failed to send MLO teardown WMI command for pdev %d: %d\n", + ar->pdev_idx, ret); + return ret; + } + + ath12k_dbg(ab, ATH12K_DBG_MAC, "mlo teardown for pdev %d\n", ar->pdev_idx); + + return 0; +} + +int ath12k_mac_mlo_setup(struct ath12k_hw_group *ag) +{ + struct ath12k_hw *ah; + struct ath12k *ar; + int ret; + int i, j; + + for (i = 0; i < ag->num_hw; i++) { + ah = ag->ah[i]; + if (!ah) + continue; + + for_each_ar(ah, ar, j) { + ar = &ah->radio[j]; + ret = __ath12k_mac_mlo_setup(ar); + if (ret) { + ath12k_err(ar->ab, "failed to setup MLO: %d\n", ret); + goto err_setup; + } + } + } + + return 0; + +err_setup: + for (i = i - 1; i >= 0; i--) { + ah = ag->ah[i]; + if (!ah) + continue; + + for (j = j - 1; j >= 0; j--) { + ar = &ah->radio[j]; + if (!ar) + continue; + + __ath12k_mac_mlo_teardown(ar); + } + } + + return ret; +} + +void ath12k_mac_mlo_teardown(struct ath12k_hw_group *ag) +{ + struct ath12k_hw *ah; + struct ath12k *ar; + int ret, i, j; + + for (i = 0; i < ag->num_hw; i++) { + ah = ag->ah[i]; + if (!ah) + continue; + + for_each_ar(ah, ar, j) { + ar = &ah->radio[j]; + ret = __ath12k_mac_mlo_teardown(ar); + if (ret) { + ath12k_err(ar->ab, "failed to teardown MLO: %d\n", ret); + break; + } + } + } +} + int ath12k_mac_register(struct ath12k_hw_group *ag) { struct ath12k_base *ab = ag->ab[0]; diff --git a/drivers/net/wireless/ath/ath12k/mac.h b/drivers/net/wireless/ath/ath12k/mac.h index ccfc215d83ff..81cfb950e6cd 100644 --- a/drivers/net/wireless/ath/ath12k/mac.h +++ b/drivers/net/wireless/ath/ath12k/mac.h @@ -96,6 +96,9 @@ int ath12k_mac_vif_set_keepalive(struct ath12k_link_vif *arvif, enum wmi_sta_keepalive_method method, u32 interval); u8 ath12k_mac_get_target_pdev_id(struct ath12k *ar); +int ath12k_mac_mlo_setup(struct ath12k_hw_group *ag); +int ath12k_mac_mlo_ready(struct ath12k_hw_group *ag); +void ath12k_mac_mlo_teardown(struct ath12k_hw_group *ag); int ath12k_mac_vdev_stop(struct ath12k_link_vif *arvif); #endif diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 705e0973ebb0..892cc4846e4f 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -7369,6 +7369,9 @@ skip_lookup: goto out; } + ar->mlo_setup_status = le32_to_cpu(ev->status); + complete(&ar->mlo_setup_done); + out: kfree(tb); } diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 640720b68782..270ed458302e 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -4938,6 +4938,7 @@ struct wmi_probe_tmpl_cmd { #define MAX_RADIOS 2 +#define WMI_MLO_CMD_TIMEOUT_HZ (5 * HZ) #define WMI_SERVICE_READY_TIMEOUT_HZ (5 * HZ) #define WMI_SEND_TIMEOUT_HZ (3 * HZ) From 628bbaa551da94d879d7aa5abc3b9632ed743fbe Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Wed, 11 Dec 2024 17:34:32 +0200 Subject: [PATCH 0422/1386] wifi: ath12k: avoid redundant code in DP Rx error process Currently, in DP rx error processing, the MAC id is fetched redundantly from the same descriptor for each MSDU. To avoid this redundancy, move the fetch handling before the iteration. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241211153432.775335-9-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_rx.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index f8e79eff2089..536a90ff9796 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -3517,6 +3517,13 @@ int ath12k_dp_rx_process_err(struct ath12k_base *ab, struct napi_struct *napi, ret); continue; } + + mac_id = le32_get_bits(reo_desc->info0, + HAL_REO_DEST_RING_INFO0_SRC_LINK_ID); + + pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, mac_id); + ar = ab->pdevs[pdev_id].ar; + link_desc_va = link_desc_banks[desc_bank].vaddr + (paddr - link_desc_banks[desc_bank].paddr); ath12k_hal_rx_msdu_link_info_get(link_desc_va, &num_msdus, msdu_cookies, @@ -3545,12 +3552,6 @@ int ath12k_dp_rx_process_err(struct ath12k_base *ab, struct napi_struct *napi, } for (i = 0; i < num_msdus; i++) { - mac_id = le32_get_bits(reo_desc->info0, - HAL_REO_DEST_RING_INFO0_SRC_LINK_ID); - - pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, mac_id); - ar = ab->pdevs[pdev_id].ar; - if (!ath12k_dp_process_rx_err_buf(ar, reo_desc, &rx_desc_used_list, drop, From 1a73acb5fba4d85ab5eed1147282a07d56af8550 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Mon, 9 Dec 2024 20:54:13 +0200 Subject: [PATCH 0423/1386] wifi: ath12k: move to HW link id based receive handling Currently, all the rx processing treats the hardware link id as the MAC id. The HW link id is a unique identifier for all hardware links participating in the multi link group. Therefore, the current MAC id derivation is insufficient to process the partner rx buffer in the Multi-Link Operation. So derive the MAC id from the HW link id and implementing this change in rx processing will enable the scaling of partner buffer processing in Multi-Link Operation. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241209185421.376381-2-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 8 +++- drivers/net/wireless/ath/ath12k/dp_rx.c | 49 +++++++++++++++---------- drivers/net/wireless/ath/ath12k/mac.c | 3 ++ 3 files changed, 39 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index dc01f7b3fd73..9386e9592dff 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -136,7 +136,7 @@ struct ath12k_skb_rxcb { struct hal_rx_desc *rx_desc; u8 err_rel_src; u8 err_code; - u8 mac_id; + u8 hw_link_id; u8 unmapped; u8 is_frag; u8 tid; @@ -831,6 +831,11 @@ struct ath12k_mlo_memory { bool init_done; }; +struct ath12k_hw_link { + u8 device_id; + u8 pdev_idx; +}; + /* Holds info on the group of devices that are registered as a single * wiphy, protected with struct ath12k_hw_group::mutex. */ @@ -857,6 +862,7 @@ struct ath12k_hw_group { bool mlo_capable; struct device_node *wsi_node[ATH12K_MAX_SOCS]; struct ath12k_mlo_memory mlo_mem; + struct ath12k_hw_link hw_links[ATH12K_GROUP_MAX_RADIO]; }; /* Holds WSI info specific to each device, excluding WSI group info */ diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 536a90ff9796..b68ceb403866 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -2595,11 +2595,13 @@ static void ath12k_dp_rx_process_received_packets(struct ath12k_base *ab, struct sk_buff_head *msdu_list, int ring_id) { + struct ath12k_hw_group *ag = ab->ag; struct ieee80211_rx_status rx_status = {0}; struct ath12k_skb_rxcb *rxcb; struct sk_buff *msdu; struct ath12k *ar; - u8 mac_id, pdev_id; + struct ath12k_hw_link *hw_links = ag->hw_links; + u8 hw_link_id, pdev_id; int ret; if (skb_queue_empty(msdu_list)) @@ -2609,8 +2611,10 @@ static void ath12k_dp_rx_process_received_packets(struct ath12k_base *ab, while ((msdu = __skb_dequeue(msdu_list))) { rxcb = ATH12K_SKB_RXCB(msdu); - mac_id = rxcb->mac_id; - pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, mac_id); + hw_link_id = rxcb->hw_link_id; + + pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, + hw_links[hw_link_id].pdev_idx); ar = ab->pdevs[pdev_id].ar; if (!rcu_dereference(ab->pdevs_active[pdev_id])) { dev_kfree_skb_any(msdu); @@ -2674,7 +2678,7 @@ int ath12k_dp_rx_process(struct ath12k_base *ab, int ring_id, struct hal_srng *srng; struct sk_buff *msdu; bool done = false; - int mac_id; + u8 hw_link_id; u64 desc_va; __skb_queue_head_init(&msdu_list); @@ -2695,8 +2699,8 @@ try_again: cookie = le32_get_bits(desc->buf_addr_info.info1, BUFFER_ADDR_INFO1_SW_COOKIE); - mac_id = le32_get_bits(desc->info0, - HAL_REO_DEST_RING_INFO0_SRC_LINK_ID); + hw_link_id = le32_get_bits(desc->info0, + HAL_REO_DEST_RING_INFO0_SRC_LINK_ID); desc_va = ((u64)le32_to_cpu(desc->buf_va_hi) << 32 | le32_to_cpu(desc->buf_va_lo)); @@ -2745,7 +2749,7 @@ try_again: RX_MSDU_DESC_INFO0_LAST_MSDU_IN_MPDU); rxcb->is_continuation = !!(le32_to_cpu(msdu_info->info0) & RX_MSDU_DESC_INFO0_MSDU_CONTINUATION); - rxcb->mac_id = mac_id; + rxcb->hw_link_id = hw_link_id; rxcb->peer_id = ath12k_dp_rx_get_peer_id(ab, dp->peer_metadata_ver, mpdu_info->peer_meta_data); rxcb->tid = le32_get_bits(mpdu_info->info0, @@ -3473,6 +3477,7 @@ exit: int ath12k_dp_rx_process_err(struct ath12k_base *ab, struct napi_struct *napi, int budget) { + struct ath12k_hw_group *ag = ab->ag; u32 msdu_cookies[HAL_NUM_RX_MSDUS_PER_LINK_DESC]; struct dp_link_desc_bank *link_desc_banks; enum hal_rx_buf_return_buf_manager rbm; @@ -3481,11 +3486,12 @@ int ath12k_dp_rx_process_err(struct ath12k_base *ab, struct napi_struct *napi, struct hal_reo_dest_ring *reo_desc; struct dp_rxdma_ring *rx_ring; struct dp_srng *reo_except; + struct ath12k_hw_link *hw_links = ag->hw_links; LIST_HEAD(rx_desc_used_list); u32 desc_bank, num_msdus; struct hal_srng *srng; struct ath12k_dp *dp; - int mac_id; + u8 hw_link_id; struct ath12k *ar; dma_addr_t paddr; bool is_frag; @@ -3518,10 +3524,11 @@ int ath12k_dp_rx_process_err(struct ath12k_base *ab, struct napi_struct *napi, continue; } - mac_id = le32_get_bits(reo_desc->info0, - HAL_REO_DEST_RING_INFO0_SRC_LINK_ID); + hw_link_id = le32_get_bits(reo_desc->info0, + HAL_REO_DEST_RING_INFO0_SRC_LINK_ID); - pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, mac_id); + pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, + hw_links[hw_link_id].pdev_idx); ar = ab->pdevs[pdev_id].ar; link_desc_va = link_desc_banks[desc_bank].vaddr + @@ -3802,9 +3809,10 @@ int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab, struct sk_buff_head msdu_list, scatter_msdu_list; struct ath12k_skb_rxcb *rxcb; void *rx_desc; - u8 mac_id; + u8 hw_link_id; int num_buffs_reaped = 0; struct ath12k_rx_desc_info *desc_info; + struct ath12k_hw_link *hw_links = ab->ag->hw_links; int ret, pdev_id; struct hal_rx_desc *msdu_data; @@ -3879,9 +3887,9 @@ int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab, continue; } - mac_id = ath12k_dp_rx_get_msdu_src_link(ab, - msdu_data); - if (mac_id >= MAX_RADIOS) { + hw_link_id = ath12k_dp_rx_get_msdu_src_link(ab, + msdu_data); + if (hw_link_id >= MAX_RADIOS) { dev_kfree_skb_any(msdu); /* In any case continuation bit is set @@ -3896,7 +3904,7 @@ int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab, skb_queue_walk(&scatter_msdu_list, msdu) { rxcb = ATH12K_SKB_RXCB(msdu); - rxcb->mac_id = mac_id; + rxcb->hw_link_id = hw_link_id; } skb_queue_splice_tail_init(&scatter_msdu_list, @@ -3904,7 +3912,7 @@ int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab, } rxcb = ATH12K_SKB_RXCB(msdu); - rxcb->mac_id = mac_id; + rxcb->hw_link_id = hw_link_id; __skb_queue_tail(&msdu_list, msdu); } @@ -3926,12 +3934,13 @@ int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab, rcu_read_lock(); while ((msdu = __skb_dequeue(&msdu_list))) { rxcb = ATH12K_SKB_RXCB(msdu); - mac_id = rxcb->mac_id; + hw_link_id = rxcb->hw_link_id; - pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, mac_id); + pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, + hw_links[hw_link_id].pdev_idx); ar = ab->pdevs[pdev_id].ar; - if (!ar || !rcu_dereference(ar->ab->pdevs_active[mac_id])) { + if (!ar || !rcu_dereference(ar->ab->pdevs_active[hw_link_id])) { dev_kfree_skb_any(msdu); continue; } diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 161cc018230f..b946f889c4fe 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -11050,6 +11050,9 @@ static struct ath12k_hw *ath12k_mac_hw_allocate(struct ath12k_hw_group *ag, ar->pdev_idx = pdev_idx; pdev->ar = ar; + ag->hw_links[ar->hw_link_id].device_id = ab->device_id; + ag->hw_links[ar->hw_link_id].pdev_idx = pdev_idx; + ath12k_mac_setup(ar); ath12k_dp_pdev_pre_alloc(ar); } From 1d18b197bc4b9884c3b53945356afe054b5340f4 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Mon, 9 Dec 2024 20:54:14 +0200 Subject: [PATCH 0424/1386] wifi: ath12k: add partner device buffer support in receive data path Currently, partner device buffer is not handled in the receive data path. In Multi-Link Operation, the partner device buffer is reported to the primary upper MAC rings. Therefore, add partner device buffer processing in the REO and Exception ring handler. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241209185421.376381-3-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 6 + drivers/net/wireless/ath/ath12k/dp.c | 1 + drivers/net/wireless/ath/ath12k/dp.h | 3 +- drivers/net/wireless/ath/ath12k/dp_rx.c | 179 +++++++++++++++++------- 4 files changed, 140 insertions(+), 49 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 9386e9592dff..d7caa58bb262 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -1229,4 +1229,10 @@ static inline void ath12k_core_stopped(struct ath12k_base *ab) ab->ag->num_started--; } +static inline struct ath12k_base *ath12k_ag_to_ab(struct ath12k_hw_group *ag, + u8 device_id) +{ + return ag->ab[device_id]; +} + #endif /* _CORE_H_ */ diff --git a/drivers/net/wireless/ath/ath12k/dp.c b/drivers/net/wireless/ath/ath12k/dp.c index 68abe9d4ab45..9a7df54bf570 100644 --- a/drivers/net/wireless/ath/ath12k/dp.c +++ b/drivers/net/wireless/ath/ath12k/dp.c @@ -1445,6 +1445,7 @@ static int ath12k_dp_cc_desc_init(struct ath12k_base *ab) for (j = 0; j < ATH12K_MAX_SPT_ENTRIES; j++) { rx_descs[j].cookie = ath12k_dp_cc_cookie_gen(cookie_ppt_idx, j); rx_descs[j].magic = ATH12K_DP_RX_DESC_MAGIC; + rx_descs[j].device_id = ab->device_id; list_add_tail(&rx_descs[j].list, &dp->rx_desc_free_list); /* Update descriptor VA in SPT */ diff --git a/drivers/net/wireless/ath/ath12k/dp.h b/drivers/net/wireless/ath/ath12k/dp.h index 021cd9e8ee1d..d3f3d39a1cd0 100644 --- a/drivers/net/wireless/ath/ath12k/dp.h +++ b/drivers/net/wireless/ath/ath12k/dp.h @@ -287,7 +287,8 @@ struct ath12k_rx_desc_info { u32 cookie; u32 magic; u8 in_use : 1, - reserved : 7; + device_id : 3, + reserved : 4; }; struct ath12k_tx_desc_info { diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index b68ceb403866..899a25f5994d 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -2601,6 +2601,7 @@ static void ath12k_dp_rx_process_received_packets(struct ath12k_base *ab, struct sk_buff *msdu; struct ath12k *ar; struct ath12k_hw_link *hw_links = ag->hw_links; + struct ath12k_base *partner_ab; u8 hw_link_id, pdev_id; int ret; @@ -2612,11 +2613,12 @@ static void ath12k_dp_rx_process_received_packets(struct ath12k_base *ab, while ((msdu = __skb_dequeue(msdu_list))) { rxcb = ATH12K_SKB_RXCB(msdu); hw_link_id = rxcb->hw_link_id; - - pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, + partner_ab = ath12k_ag_to_ab(ag, + hw_links[hw_link_id].device_id); + pdev_id = ath12k_hw_mac_id_to_pdev_id(partner_ab->hw_params, hw_links[hw_link_id].pdev_idx); - ar = ab->pdevs[pdev_id].ar; - if (!rcu_dereference(ab->pdevs_active[pdev_id])) { + ar = partner_ab->pdevs[pdev_id].ar; + if (!rcu_dereference(partner_ab->pdevs_active[pdev_id])) { dev_kfree_skb_any(msdu); continue; } @@ -2666,23 +2668,29 @@ static u16 ath12k_dp_rx_get_peer_id(struct ath12k_base *ab, int ath12k_dp_rx_process(struct ath12k_base *ab, int ring_id, struct napi_struct *napi, int budget) { - LIST_HEAD(rx_desc_used_list); + struct ath12k_hw_group *ag = ab->ag; + struct list_head rx_desc_used_list[ATH12K_MAX_SOCS]; + struct ath12k_hw_link *hw_links = ag->hw_links; + int num_buffs_reaped[ATH12K_MAX_SOCS] = {}; struct ath12k_rx_desc_info *desc_info; struct ath12k_dp *dp = &ab->dp; struct dp_rxdma_ring *rx_ring = &dp->rx_refill_buf_ring; struct hal_reo_dest_ring *desc; - int num_buffs_reaped = 0; + struct ath12k_base *partner_ab; struct sk_buff_head msdu_list; struct ath12k_skb_rxcb *rxcb; int total_msdu_reaped = 0; + u8 hw_link_id, device_id; struct hal_srng *srng; struct sk_buff *msdu; bool done = false; - u8 hw_link_id; u64 desc_va; __skb_queue_head_init(&msdu_list); + for (device_id = 0; device_id < ATH12K_MAX_SOCS; device_id++) + INIT_LIST_HEAD(&rx_desc_used_list[device_id]); + srng = &ab->hal.srng_list[dp->reo_dst_ring[ring_id].ring_id]; spin_lock_bh(&srng->lock); @@ -2706,11 +2714,22 @@ try_again: le32_to_cpu(desc->buf_va_lo)); desc_info = (struct ath12k_rx_desc_info *)((unsigned long)desc_va); + device_id = hw_links[hw_link_id].device_id; + partner_ab = ath12k_ag_to_ab(ag, device_id); + if (unlikely(!partner_ab)) { + if (desc_info->skb) { + dev_kfree_skb_any(desc_info->skb); + desc_info->skb = NULL; + } + + continue; + } + /* retry manual desc retrieval */ if (!desc_info) { - desc_info = ath12k_dp_get_rx_desc(ab, cookie); + desc_info = ath12k_dp_get_rx_desc(partner_ab, cookie); if (!desc_info) { - ath12k_warn(ab, "Invalid cookie in manual descriptor retrieval: 0x%x\n", + ath12k_warn(partner_ab, "Invalid cookie in manual descriptor retrieval: 0x%x\n", cookie); continue; } @@ -2722,14 +2741,14 @@ try_again: msdu = desc_info->skb; desc_info->skb = NULL; - list_add_tail(&desc_info->list, &rx_desc_used_list); + list_add_tail(&desc_info->list, &rx_desc_used_list[device_id]); rxcb = ATH12K_SKB_RXCB(msdu); - dma_unmap_single(ab->dev, rxcb->paddr, + dma_unmap_single(partner_ab->dev, rxcb->paddr, msdu->len + skb_tailroom(msdu), DMA_FROM_DEVICE); - num_buffs_reaped++; + num_buffs_reaped[device_id]++; push_reason = le32_get_bits(desc->info0, HAL_REO_DEST_RING_INFO0_PUSH_REASON); @@ -2786,8 +2805,17 @@ try_again: if (!total_msdu_reaped) goto exit; - ath12k_dp_rx_bufs_replenish(ab, rx_ring, &rx_desc_used_list, - num_buffs_reaped); + for (device_id = 0; device_id < ATH12K_MAX_SOCS; device_id++) { + if (!num_buffs_reaped[device_id]) + continue; + + partner_ab = ath12k_ag_to_ab(ag, device_id); + rx_ring = &partner_ab->dp.rx_refill_buf_ring; + + ath12k_dp_rx_bufs_replenish(partner_ab, rx_ring, + &rx_desc_used_list[device_id], + num_buffs_reaped[device_id]); + } ath12k_dp_rx_process_received_packets(ab, napi, &msdu_list, ring_id); @@ -3478,7 +3506,9 @@ int ath12k_dp_rx_process_err(struct ath12k_base *ab, struct napi_struct *napi, int budget) { struct ath12k_hw_group *ag = ab->ag; + struct list_head rx_desc_used_list[ATH12K_MAX_SOCS]; u32 msdu_cookies[HAL_NUM_RX_MSDUS_PER_LINK_DESC]; + int num_buffs_reaped[ATH12K_MAX_SOCS] = {}; struct dp_link_desc_bank *link_desc_banks; enum hal_rx_buf_return_buf_manager rbm; struct hal_rx_msdu_link *link_desc_va; @@ -3487,11 +3517,10 @@ int ath12k_dp_rx_process_err(struct ath12k_base *ab, struct napi_struct *napi, struct dp_rxdma_ring *rx_ring; struct dp_srng *reo_except; struct ath12k_hw_link *hw_links = ag->hw_links; - LIST_HEAD(rx_desc_used_list); + struct ath12k_base *partner_ab; + u8 hw_link_id, device_id; u32 desc_bank, num_msdus; struct hal_srng *srng; - struct ath12k_dp *dp; - u8 hw_link_id; struct ath12k *ar; dma_addr_t paddr; bool is_frag; @@ -3501,9 +3530,10 @@ int ath12k_dp_rx_process_err(struct ath12k_base *ab, struct napi_struct *napi, tot_n_bufs_reaped = 0; quota = budget; - dp = &ab->dp; - reo_except = &dp->reo_except_ring; - link_desc_banks = dp->link_desc_banks; + for (device_id = 0; device_id < ATH12K_MAX_SOCS; device_id++) + INIT_LIST_HEAD(&rx_desc_used_list[device_id]); + + reo_except = &ab->dp.reo_except_ring; srng = &ab->hal.srng_list[reo_except->ring_id]; @@ -3526,21 +3556,24 @@ int ath12k_dp_rx_process_err(struct ath12k_base *ab, struct napi_struct *napi, hw_link_id = le32_get_bits(reo_desc->info0, HAL_REO_DEST_RING_INFO0_SRC_LINK_ID); + device_id = hw_links[hw_link_id].device_id; + partner_ab = ath12k_ag_to_ab(ag, device_id); - pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, + pdev_id = ath12k_hw_mac_id_to_pdev_id(partner_ab->hw_params, hw_links[hw_link_id].pdev_idx); - ar = ab->pdevs[pdev_id].ar; + ar = partner_ab->pdevs[pdev_id].ar; + link_desc_banks = partner_ab->dp.link_desc_banks; link_desc_va = link_desc_banks[desc_bank].vaddr + (paddr - link_desc_banks[desc_bank].paddr); ath12k_hal_rx_msdu_link_info_get(link_desc_va, &num_msdus, msdu_cookies, &rbm); - if (rbm != dp->idle_link_rbm && + if (rbm != partner_ab->dp.idle_link_rbm && rbm != HAL_RX_BUF_RBM_SW3_BM && - rbm != ab->hw_params->hal_params->rx_buf_rbm) { + rbm != partner_ab->hw_params->hal_params->rx_buf_rbm) { ab->soc_stats.invalid_rbm++; ath12k_warn(ab, "invalid return buffer manager %d\n", rbm); - ath12k_dp_rx_link_desc_return(ab, reo_desc, + ath12k_dp_rx_link_desc_return(partner_ab, reo_desc, HAL_WBM_REL_BM_ACT_REL_MSDU); continue; } @@ -3550,20 +3583,26 @@ int ath12k_dp_rx_process_err(struct ath12k_base *ab, struct napi_struct *napi, /* Process only rx fragments with one msdu per link desc below, and drop * msdu's indicated due to error reasons. + * Dynamic fragmentation not supported in Multi-link client, so drop the + * partner device buffers. */ - if (!is_frag || num_msdus > 1) { + if (!is_frag || num_msdus > 1 || + partner_ab->device_id != ab->device_id) { drop = true; + /* Return the link desc back to wbm idle list */ - ath12k_dp_rx_link_desc_return(ab, reo_desc, + ath12k_dp_rx_link_desc_return(partner_ab, reo_desc, HAL_WBM_REL_BM_ACT_PUT_IN_IDLE); } for (i = 0; i < num_msdus; i++) { if (!ath12k_dp_process_rx_err_buf(ar, reo_desc, - &rx_desc_used_list, + &rx_desc_used_list[device_id], drop, - msdu_cookies[i])) + msdu_cookies[i])) { + num_buffs_reaped[device_id]++; tot_n_bufs_reaped++; + } } if (tot_n_bufs_reaped >= quota) { @@ -3579,10 +3618,17 @@ exit: spin_unlock_bh(&srng->lock); - rx_ring = &dp->rx_refill_buf_ring; + for (device_id = 0; device_id < ATH12K_MAX_SOCS; device_id++) { + if (!num_buffs_reaped[device_id]) + continue; - ath12k_dp_rx_bufs_replenish(ab, rx_ring, &rx_desc_used_list, - tot_n_bufs_reaped); + partner_ab = ath12k_ag_to_ab(ag, device_id); + rx_ring = &partner_ab->dp.rx_refill_buf_ring; + + ath12k_dp_rx_bufs_replenish(partner_ab, rx_ring, + &rx_desc_used_list[device_id], + num_buffs_reaped[device_id]); + } return tot_n_bufs_reaped; } @@ -3799,7 +3845,8 @@ static void ath12k_dp_rx_wbm_err(struct ath12k *ar, int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab, struct napi_struct *napi, int budget) { - LIST_HEAD(rx_desc_used_list); + struct list_head rx_desc_used_list[ATH12K_MAX_SOCS]; + struct ath12k_hw_group *ag = ab->ag; struct ath12k *ar; struct ath12k_dp *dp = &ab->dp; struct dp_rxdma_ring *rx_ring; @@ -3809,18 +3856,22 @@ int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab, struct sk_buff_head msdu_list, scatter_msdu_list; struct ath12k_skb_rxcb *rxcb; void *rx_desc; - u8 hw_link_id; - int num_buffs_reaped = 0; + int num_buffs_reaped[ATH12K_MAX_SOCS] = {}; + int total_num_buffs_reaped = 0; struct ath12k_rx_desc_info *desc_info; - struct ath12k_hw_link *hw_links = ab->ag->hw_links; + struct ath12k_hw_link *hw_links = ag->hw_links; + struct ath12k_base *partner_ab; + u8 hw_link_id, device_id; int ret, pdev_id; struct hal_rx_desc *msdu_data; __skb_queue_head_init(&msdu_list); __skb_queue_head_init(&scatter_msdu_list); + for (device_id = 0; device_id < ATH12K_MAX_SOCS; device_id++) + INIT_LIST_HEAD(&rx_desc_used_list[device_id]); + srng = &ab->hal.srng_list[dp->rx_rel_ring.ring_id]; - rx_ring = &dp->rx_refill_buf_ring; spin_lock_bh(&srng->lock); ath12k_hal_srng_access_begin(ab, srng); @@ -3856,14 +3907,27 @@ int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab, msdu = desc_info->skb; desc_info->skb = NULL; - list_add_tail(&desc_info->list, &rx_desc_used_list); + device_id = desc_info->device_id; + partner_ab = ath12k_ag_to_ab(ag, device_id); + if (unlikely(!partner_ab)) { + dev_kfree_skb_any(msdu); + + /* In any case continuation bit is set + * in the previous record, cleanup scatter_msdu_list + */ + ath12k_dp_clean_up_skb_list(&scatter_msdu_list); + continue; + } + + list_add_tail(&desc_info->list, &rx_desc_used_list[device_id]); rxcb = ATH12K_SKB_RXCB(msdu); - dma_unmap_single(ab->dev, rxcb->paddr, + dma_unmap_single(partner_ab->dev, rxcb->paddr, msdu->len + skb_tailroom(msdu), DMA_FROM_DEVICE); - num_buffs_reaped++; + num_buffs_reaped[device_id]++; + total_num_buffs_reaped++; if (!err_info.continuation) budget--; @@ -3887,9 +3951,9 @@ int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab, continue; } - hw_link_id = ath12k_dp_rx_get_msdu_src_link(ab, + hw_link_id = ath12k_dp_rx_get_msdu_src_link(partner_ab, msdu_data); - if (hw_link_id >= MAX_RADIOS) { + if (hw_link_id >= ATH12K_GROUP_MAX_RADIO) { dev_kfree_skb_any(msdu); /* In any case continuation bit is set @@ -3925,20 +3989,39 @@ int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab, spin_unlock_bh(&srng->lock); - if (!num_buffs_reaped) + if (!total_num_buffs_reaped) goto done; - ath12k_dp_rx_bufs_replenish(ab, rx_ring, &rx_desc_used_list, - num_buffs_reaped); + for (device_id = 0; device_id < ATH12K_MAX_SOCS; device_id++) { + if (!num_buffs_reaped[device_id]) + continue; + + partner_ab = ath12k_ag_to_ab(ag, device_id); + rx_ring = &partner_ab->dp.rx_refill_buf_ring; + + ath12k_dp_rx_bufs_replenish(ab, rx_ring, + &rx_desc_used_list[device_id], + num_buffs_reaped[device_id]); + } rcu_read_lock(); while ((msdu = __skb_dequeue(&msdu_list))) { rxcb = ATH12K_SKB_RXCB(msdu); hw_link_id = rxcb->hw_link_id; - pdev_id = ath12k_hw_mac_id_to_pdev_id(ab->hw_params, + device_id = hw_links[hw_link_id].device_id; + partner_ab = ath12k_ag_to_ab(ag, device_id); + if (unlikely(!partner_ab)) { + ath12k_dbg(ab, ATH12K_DBG_DATA, + "Unable to process WBM error msdu due to invalid hw link id %d device id %d\n", + hw_link_id, device_id); + dev_kfree_skb_any(msdu); + continue; + } + + pdev_id = ath12k_hw_mac_id_to_pdev_id(partner_ab->hw_params, hw_links[hw_link_id].pdev_idx); - ar = ab->pdevs[pdev_id].ar; + ar = partner_ab->pdevs[pdev_id].ar; if (!ar || !rcu_dereference(ar->ab->pdevs_active[hw_link_id])) { dev_kfree_skb_any(msdu); @@ -3953,7 +4036,7 @@ int ath12k_dp_rx_process_wbm_err(struct ath12k_base *ab, } rcu_read_unlock(); done: - return num_buffs_reaped; + return total_num_buffs_reaped; } void ath12k_dp_rx_process_reo_status(struct ath12k_base *ab) From 555872c477958e1a12244e79f3ef9f976ca2a077 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Mon, 9 Dec 2024 20:54:15 +0200 Subject: [PATCH 0425/1386] wifi: ath12k: add helper function to init partner cmem configuration In the Inter Multi-Link Operation, the primary device is expected to receive the partner device buffer. Therefore, each device initializes the partner device buffer in their cmem configuration. So add a helper function to initialize the partner device buffer in their cmem configuration. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241209185421.376381-4-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 5 ++++- drivers/net/wireless/ath/ath12k/dp.c | 13 +++++++++++++ drivers/net/wireless/ath/ath12k/dp.h | 1 + 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index af642b466ea0..ff79cb910523 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -930,7 +930,7 @@ out: static int ath12k_core_mlo_setup(struct ath12k_hw_group *ag) { - int ret; + int ret, i; if (!ag->mlo_capable || ag->num_devices == 1) return 0; @@ -939,6 +939,9 @@ static int ath12k_core_mlo_setup(struct ath12k_hw_group *ag) if (ret) return ret; + for (i = 0; i < ag->num_devices; i++) + ath12k_dp_partner_cc_init(ag->ab[i]); + ret = ath12k_mac_mlo_ready(ag); if (ret) goto err_mlo_teardown; diff --git a/drivers/net/wireless/ath/ath12k/dp.c b/drivers/net/wireless/ath/ath12k/dp.c index 9a7df54bf570..9e5a4e75f2f6 100644 --- a/drivers/net/wireless/ath/ath12k/dp.c +++ b/drivers/net/wireless/ath/ath12k/dp.c @@ -1522,6 +1522,19 @@ static int ath12k_dp_cmem_init(struct ath12k_base *ab, return 0; } +void ath12k_dp_partner_cc_init(struct ath12k_base *ab) +{ + struct ath12k_hw_group *ag = ab->ag; + int i; + + for (i = 0; i < ag->num_devices; i++) { + if (ag->ab[i] == ab) + continue; + + ath12k_dp_cmem_init(ab, &ag->ab[i]->dp, ATH12K_DP_RX_DESC); + } +} + static int ath12k_dp_cc_init(struct ath12k_base *ab) { struct ath12k_dp *dp = &ab->dp; diff --git a/drivers/net/wireless/ath/ath12k/dp.h b/drivers/net/wireless/ath/ath12k/dp.h index d3f3d39a1cd0..7700828375e3 100644 --- a/drivers/net/wireless/ath/ath12k/dp.h +++ b/drivers/net/wireless/ath/ath12k/dp.h @@ -1806,6 +1806,7 @@ void ath12k_dp_vdev_tx_attach(struct ath12k *ar, struct ath12k_link_vif *arvif); void ath12k_dp_free(struct ath12k_base *ab); int ath12k_dp_alloc(struct ath12k_base *ab); void ath12k_dp_cc_config(struct ath12k_base *ab); +void ath12k_dp_partner_cc_init(struct ath12k_base *ab); int ath12k_dp_pdev_alloc(struct ath12k_base *ab); void ath12k_dp_pdev_pre_alloc(struct ath12k *ar); void ath12k_dp_pdev_free(struct ath12k_base *ab); From 8fea0066b4b481bd604256f5359127837a5db7ce Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Mon, 9 Dec 2024 20:54:16 +0200 Subject: [PATCH 0426/1386] wifi: ath12k: introduce interface combination cleanup helper Introduce a cleanup helper function to avoid redundant code for interface combination cleanup. Remove the cleanup code from ath12k_mac_hw_unregister() and ath12k_mac_hw_register() and replace it with a new cleanup helper function. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Kalle Valo Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241209185421.376381-5-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index b946f889c4fe..808af521c57c 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -10351,6 +10351,14 @@ static bool ath12k_mac_is_iface_mode_enable(struct ath12k_hw *ah, return is_enable; } +static void ath12k_mac_cleanup_iface_combinations(struct ath12k_hw *ah) +{ + struct wiphy *wiphy = ah->hw->wiphy; + + kfree(wiphy->iface_combinations[0].limits); + kfree(wiphy->iface_combinations); +} + static int ath12k_mac_setup_iface_combinations(struct ath12k_hw *ah) { struct wiphy *wiphy = ah->hw->wiphy; @@ -10479,7 +10487,6 @@ static void ath12k_mac_cleanup_unregister(struct ath12k *ar) static void ath12k_mac_hw_unregister(struct ath12k_hw *ah) { struct ieee80211_hw *hw = ah->hw; - struct wiphy *wiphy = hw->wiphy; struct ath12k *ar; int i; @@ -10493,8 +10500,7 @@ static void ath12k_mac_hw_unregister(struct ath12k_hw *ah) for_each_ar(ah, ar, i) ath12k_mac_cleanup_unregister(ar); - kfree(wiphy->iface_combinations[0].limits); - kfree(wiphy->iface_combinations); + ath12k_mac_cleanup_iface_combinations(ah); SET_IEEE80211_DEV(hw, NULL); } @@ -10724,13 +10730,13 @@ static int ath12k_mac_hw_register(struct ath12k_hw *ah) ret = ath12k_wow_init(ar); if (ret) { ath12k_warn(ar->ab, "failed to init wow: %d\n", ret); - goto err_free_if_combs; + goto err_cleanup_if_combs; } ret = ieee80211_register_hw(hw); if (ret) { ath12k_err(ab, "ieee80211 registration failed: %d\n", ret); - goto err_free_if_combs; + goto err_cleanup_if_combs; } if (is_monitor_disable) @@ -10760,9 +10766,8 @@ err_unregister_hw: ieee80211_unregister_hw(hw); -err_free_if_combs: - kfree(wiphy->iface_combinations[0].limits); - kfree(wiphy->iface_combinations); +err_cleanup_if_combs: + ath12k_mac_cleanup_iface_combinations(ah); err_complete_cleanup_unregister: i = ah->num_radio; From 3c9bc818b8f192142280b722fa53e2389491a6d1 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Mon, 9 Dec 2024 20:54:17 +0200 Subject: [PATCH 0427/1386] wifi: ath12k: Refactor radio frequency information Currently, radio stores the low frequency and high frequency information as separate variables. However, cfg80211 already provides a suitable data structure struct wiphy_radio_freq_range to store this information efficiently. Additionally, for multi radio per wiphy infrastructure, this frequency range information is essential. In future patches using struct wiphy_radio_freq_range makes the code simpler. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241209185421.376381-6-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 3 +-- drivers/net/wireless/ath/ath12k/mac.c | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index d7caa58bb262..d09ebcdde94f 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -711,8 +711,7 @@ struct ath12k { bool monitor_started; int monitor_vdev_id; - u32 freq_low; - u32 freq_high; + struct wiphy_radio_freq_range freq_range; bool nlo_enabled; diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 808af521c57c..504ad36caca9 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -708,8 +708,8 @@ static struct ath12k *ath12k_mac_get_ar_by_chan(struct ieee80211_hw *hw, return ar; for_each_ar(ah, ar, i) { - if (channel->center_freq >= ar->freq_low && - channel->center_freq <= ar->freq_high) + if (channel->center_freq >= KHZ_TO_MHZ(ar->freq_range.start_freq) && + channel->center_freq <= KHZ_TO_MHZ(ar->freq_range.end_freq)) return ar; } return NULL; @@ -10203,8 +10203,8 @@ static void ath12k_mac_update_ch_list(struct ath12k *ar, band->channels[i].flags |= IEEE80211_CHAN_DISABLED; } - ar->freq_low = freq_low; - ar->freq_high = freq_high; + ar->freq_range.start_freq = MHZ_TO_KHZ(freq_low); + ar->freq_range.end_freq = MHZ_TO_KHZ(freq_high); } static u32 ath12k_get_phy_id(struct ath12k *ar, u32 band) From ae6b065282abd5cb097fbe96bfb96fa29a9fc321 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Mon, 9 Dec 2024 20:54:18 +0200 Subject: [PATCH 0428/1386] wifi: ath12k: advertise multi device interface combination The prerequisite for MLO support in cfg80211/mac80211 requires that all the links participating in MLO belong to the same wiphy/struct ieee80211_hw. The driver needs to group multiple discrete hardware components, each acting as a link in MLO, under one wiphy. Consequently, the driver advertises multi-hardware device interface combination capabilities specific to the radio, including supported frequencies. The global interface combination represent the combined interface capabilities. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241209185421.376381-7-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 222 +++++++++++++++++++++----- 1 file changed, 185 insertions(+), 37 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 504ad36caca9..705e0b673435 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -10336,14 +10336,20 @@ static bool ath12k_mac_is_iface_mode_enable(struct ath12k_hw *ah, { struct ath12k *ar; int i; - u16 interface_modes, mode; - bool is_enable = true; + u16 interface_modes, mode = 0; + bool is_enable = false; + + if (type == NL80211_IFTYPE_MESH_POINT) { + if (IS_ENABLED(CONFIG_MAC80211_MESH)) + mode = BIT(type); + } else { + mode = BIT(type); + } - mode = BIT(type); for_each_ar(ah, ar, i) { interface_modes = ar->ab->hw_params->interface_modes; - if (!(interface_modes & mode)) { - is_enable = false; + if (interface_modes & mode) { + is_enable = true; break; } } @@ -10351,31 +10357,20 @@ static bool ath12k_mac_is_iface_mode_enable(struct ath12k_hw *ah, return is_enable; } -static void ath12k_mac_cleanup_iface_combinations(struct ath12k_hw *ah) +static int +ath12k_mac_setup_radio_iface_comb(struct ath12k *ar, + struct ieee80211_iface_combination *comb) { - struct wiphy *wiphy = ah->hw->wiphy; - - kfree(wiphy->iface_combinations[0].limits); - kfree(wiphy->iface_combinations); -} - -static int ath12k_mac_setup_iface_combinations(struct ath12k_hw *ah) -{ - struct wiphy *wiphy = ah->hw->wiphy; - struct ieee80211_iface_combination *combinations; + u16 interface_modes = ar->ab->hw_params->interface_modes; struct ieee80211_iface_limit *limits; int n_limits, max_interfaces; bool ap, mesh, p2p; - ap = ath12k_mac_is_iface_mode_enable(ah, NL80211_IFTYPE_AP); - p2p = ath12k_mac_is_iface_mode_enable(ah, NL80211_IFTYPE_P2P_DEVICE); + ap = interface_modes & BIT(NL80211_IFTYPE_AP); + p2p = interface_modes & BIT(NL80211_IFTYPE_P2P_DEVICE); mesh = IS_ENABLED(CONFIG_MAC80211_MESH) && - ath12k_mac_is_iface_mode_enable(ah, NL80211_IFTYPE_MESH_POINT); - - combinations = kzalloc(sizeof(*combinations), GFP_KERNEL); - if (!combinations) - return -ENOMEM; + (interface_modes & BIT(NL80211_IFTYPE_MESH_POINT)); if ((ap || mesh) && !p2p) { n_limits = 2; @@ -10392,10 +10387,8 @@ static int ath12k_mac_setup_iface_combinations(struct ath12k_hw *ah) } limits = kcalloc(n_limits, sizeof(*limits), GFP_KERNEL); - if (!limits) { - kfree(combinations); + if (!limits) return -ENOMEM; - } limits[0].max = 1; limits[0].types |= BIT(NL80211_IFTYPE_STATION); @@ -10411,26 +10404,181 @@ static int ath12k_mac_setup_iface_combinations(struct ath12k_hw *ah) if (p2p) { limits[1].types |= BIT(NL80211_IFTYPE_P2P_CLIENT) | - BIT(NL80211_IFTYPE_P2P_GO); + BIT(NL80211_IFTYPE_P2P_GO); limits[2].max = 1; limits[2].types |= BIT(NL80211_IFTYPE_P2P_DEVICE); } - combinations[0].limits = limits; - combinations[0].n_limits = n_limits; - combinations[0].max_interfaces = max_interfaces; - combinations[0].num_different_channels = 1; - combinations[0].beacon_int_infra_match = true; - combinations[0].beacon_int_min_gcd = 100; - combinations[0].radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) | - BIT(NL80211_CHAN_WIDTH_20) | - BIT(NL80211_CHAN_WIDTH_40) | - BIT(NL80211_CHAN_WIDTH_80); + comb[0].limits = limits; + comb[0].n_limits = n_limits; + comb[0].max_interfaces = max_interfaces; + comb[0].num_different_channels = 1; + comb[0].beacon_int_infra_match = true; + comb[0].beacon_int_min_gcd = 100; + comb[0].radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) | + BIT(NL80211_CHAN_WIDTH_20) | + BIT(NL80211_CHAN_WIDTH_40) | + BIT(NL80211_CHAN_WIDTH_80); + return 0; +} + +static int +ath12k_mac_setup_global_iface_comb(struct ath12k_hw *ah, + struct wiphy_radio *radio, + u8 n_radio, + struct ieee80211_iface_combination *comb) +{ + const struct ieee80211_iface_combination *iter_comb; + struct ieee80211_iface_limit *limits; + int i, j, n_limits; + bool ap, mesh, p2p; + + if (!n_radio) + return 0; + + ap = ath12k_mac_is_iface_mode_enable(ah, NL80211_IFTYPE_AP); + p2p = ath12k_mac_is_iface_mode_enable(ah, NL80211_IFTYPE_P2P_DEVICE); + mesh = ath12k_mac_is_iface_mode_enable(ah, NL80211_IFTYPE_MESH_POINT); + + if ((ap || mesh) && !p2p) + n_limits = 2; + else if (p2p) + n_limits = 3; + else + n_limits = 1; + + limits = kcalloc(n_limits, sizeof(*limits), GFP_KERNEL); + if (!limits) + return -ENOMEM; + + for (i = 0; i < n_radio; i++) { + iter_comb = radio[i].iface_combinations; + for (j = 0; j < iter_comb->n_limits && j < n_limits; j++) { + limits[j].types |= iter_comb->limits[j].types; + limits[j].max += iter_comb->limits[j].max; + } + + comb->max_interfaces += iter_comb->max_interfaces; + comb->num_different_channels += iter_comb->num_different_channels; + comb->radar_detect_widths |= iter_comb->radar_detect_widths; + } + + comb->limits = limits; + comb->n_limits = n_limits; + comb->beacon_int_infra_match = true; + comb->beacon_int_min_gcd = 100; + + return 0; +} + +static +void ath12k_mac_cleanup_iface_comb(const struct ieee80211_iface_combination *iface_comb) +{ + kfree(iface_comb[0].limits); + kfree(iface_comb); +} + +static void ath12k_mac_cleanup_iface_combinations(struct ath12k_hw *ah) +{ + struct wiphy *wiphy = ah->hw->wiphy; + const struct wiphy_radio *radio; + int i; + + if (wiphy->n_radio > 0) { + radio = wiphy->radio; + for (i = 0; i < wiphy->n_radio; i++) + ath12k_mac_cleanup_iface_comb(radio[i].iface_combinations); + + kfree(wiphy->radio); + } + + ath12k_mac_cleanup_iface_comb(wiphy->iface_combinations); +} + +static int ath12k_mac_setup_iface_combinations(struct ath12k_hw *ah) +{ + struct ieee80211_iface_combination *combinations, *comb; + struct wiphy *wiphy = ah->hw->wiphy; + struct wiphy_radio *radio; + struct ath12k *ar; + int i, ret; + + combinations = kzalloc(sizeof(*combinations), GFP_KERNEL); + if (!combinations) + return -ENOMEM; + + if (ah->num_radio == 1) { + ret = ath12k_mac_setup_radio_iface_comb(&ah->radio[0], + combinations); + if (ret) { + ath12k_hw_warn(ah, "failed to setup radio interface combinations for one radio: %d", + ret); + goto err_free_combinations; + } + + goto out; + } + + /* there are multiple radios */ + + radio = kcalloc(ah->num_radio, sizeof(*radio), GFP_KERNEL); + if (!radio) { + ret = -ENOMEM; + goto err_free_combinations; + } + + for_each_ar(ah, ar, i) { + comb = kzalloc(sizeof(*comb), GFP_KERNEL); + if (!comb) { + ret = -ENOMEM; + goto err_free_radios; + } + + ret = ath12k_mac_setup_radio_iface_comb(ar, comb); + if (ret) { + ath12k_hw_warn(ah, "failed to setup radio interface combinations for radio %d: %d", + i, ret); + kfree(comb); + goto err_free_radios; + } + + radio[i].freq_range = &ar->freq_range; + radio[i].n_freq_range = 1; + + radio[i].iface_combinations = comb; + radio[i].n_iface_combinations = 1; + } + + ret = ath12k_mac_setup_global_iface_comb(ah, radio, ah->num_radio, combinations); + if (ret) { + ath12k_hw_warn(ah, "failed to setup global interface combinations: %d", + ret); + goto err_free_all_radios; + } + + wiphy->radio = radio; + wiphy->n_radio = ah->num_radio; + +out: wiphy->iface_combinations = combinations; wiphy->n_iface_combinations = 1; return 0; + +err_free_all_radios: + i = ah->num_radio; + +err_free_radios: + while (i--) + ath12k_mac_cleanup_iface_comb(radio[i].iface_combinations); + + kfree(radio); + +err_free_combinations: + kfree(combinations); + + return ret; } static const u8 ath12k_if_types_ext_capa[] = { From 78cf6fd16572912fb3f39237fb29879ccefb5a17 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Mon, 9 Dec 2024 20:54:19 +0200 Subject: [PATCH 0429/1386] wifi: ath12k: fix ath12k_qmi_alloc_chunk() to handle too large allocations If the requested memory chunk is too large, an error message is logged, but the request continues to be processed. However, no actual memory is allocated to the firmware from this request. Instead, the firmware sends another request with smaller chunks, where memory will be allocated accordingly. Therefore, it is pointless to proceed with parsing the request if at least one of the requests cannot be fulfilled. Hence, return -EAGAIN immediately and proceed to process the new request. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Aditya Kumar Singh Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241209185421.376381-8-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/qmi.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index e7846aaca10a..964d350be748 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -2497,7 +2497,7 @@ static int ath12k_qmi_alloc_chunk(struct ath12k_base *ab, chunk->size, chunk->type); ath12k_qmi_free_target_mem_chunk(ab); - return 0; + return -EAGAIN; } ath12k_warn(ab, "memory allocation failure for %u size: %d\n", chunk->type, chunk->size); @@ -2600,6 +2600,14 @@ err: mutex_unlock(&ag->mutex); + /* The firmware will attempt to request memory in smaller chunks + * on the next try. However, the current caller should be notified + * that this instance of request parsing was successful. + * Therefore, return 0 only. + */ + if (ret == -EAGAIN) + ret = 0; + return ret; } From 72c24b1b779d78674842012f252913c0b5beda73 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Mon, 9 Dec 2024 20:54:20 +0200 Subject: [PATCH 0430/1386] wifi: ath12k: fix ar->supports_6ghz usage during hw register In the ath12k_mac_hw_register() function's context, ar is an iterator variable, and there is no assurance that the last ar will be the one with 6 GHz enabled. Therefore, checking directly ar->supports_6ghz outside the loop is not appropriate. Additionally, 6 GHz lacks HT capabilities. To obtain the super set of HT capabilities across all grouped radios, 6 GHz should be excluded. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Aditya Kumar Singh Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241209185421.376381-9-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 705e0b673435..e3848485d029 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -10723,7 +10723,10 @@ static int ath12k_mac_hw_register(struct ath12k_hw *ah) if (ret) goto err_cleanup_unregister; - ht_cap &= ht_cap_info; + /* 6 GHz does not support HT Cap, hence do not consider it */ + if (!ar->supports_6ghz) + ht_cap &= ht_cap_info; + wiphy->max_ap_assoc_sta += ar->max_num_stations; /* Advertise the max antenna support of all radios, driver can handle @@ -10787,7 +10790,7 @@ static int ath12k_mac_hw_register(struct ath12k_hw *ah) ieee80211_hw_set(hw, SUPPORTS_TX_FRAG); ieee80211_hw_set(hw, REPORTS_LOW_ACK); - if ((ht_cap & WMI_HT_CAP_ENABLED) || ar->supports_6ghz) { + if ((ht_cap & WMI_HT_CAP_ENABLED) || is_6ghz) { ieee80211_hw_set(hw, AMPDU_AGGREGATION); ieee80211_hw_set(hw, TX_AMPDU_SETUP_IN_HW); ieee80211_hw_set(hw, SUPPORTS_REORDERING_BUFFER); @@ -10803,7 +10806,7 @@ static int ath12k_mac_hw_register(struct ath12k_hw *ah) * handle it when the ht capability different for each band. */ if (ht_cap & WMI_HT_CAP_DYNAMIC_SMPS || - (ar->supports_6ghz && ab->hw_params->supports_dynamic_smps_6ghz)) + (is_6ghz && ab->hw_params->supports_dynamic_smps_6ghz)) wiphy->features |= NL80211_FEATURE_DYNAMIC_SMPS; wiphy->max_scan_ssids = WLAN_SCAN_PARAMS_MAX_SSID; From 7462d67c660f52396e0bc5b3e13cc5c3a4dc01c3 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Mon, 9 Dec 2024 20:54:21 +0200 Subject: [PATCH 0431/1386] wifi: ath12k: pass link ID during MLO while delivering skb mac80211 expects link_id in some scenarios or else the packet might get dropped. Hence, add link_id information before delivering the skb. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Aditya Kumar Singh Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241209185421.376381-10-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_mon.c | 8 +++++++- drivers/net/wireless/ath/ath12k/dp_rx.c | 5 +++++ drivers/net/wireless/ath/ath12k/peer.c | 2 ++ drivers/net/wireless/ath/ath12k/peer.h | 3 +++ 4 files changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.c b/drivers/net/wireless/ath/ath12k/dp_mon.c index 494984133a91..2d53404095d6 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.c +++ b/drivers/net/wireless/ath/ath12k/dp_mon.c @@ -1093,8 +1093,14 @@ static void ath12k_dp_mon_rx_deliver_msdu(struct ath12k *ar, struct napi_struct decap = ath12k_dp_rx_h_decap_type(ar->ab, rxcb->rx_desc); spin_lock_bh(&ar->ab->base_lock); peer = ath12k_dp_rx_h_find_peer(ar->ab, msdu); - if (peer && peer->sta) + if (peer && peer->sta) { pubsta = peer->sta; + if (pubsta->valid_links) { + status->link_valid = 1; + status->link_id = peer->link_id; + } + } + spin_unlock_bh(&ar->ab->base_lock); ath12k_dbg(ar->ab, ATH12K_DBG_DATA, diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 899a25f5994d..66367bfb4acc 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -2474,6 +2474,11 @@ static void ath12k_dp_rx_deliver_msdu(struct ath12k *ar, struct napi_struct *nap pubsta = peer ? peer->sta : NULL; + if (pubsta && pubsta->valid_links) { + status->link_valid = 1; + status->link_id = peer->link_id; + } + spin_unlock_bh(&ab->base_lock); ath12k_dbg(ab, ATH12K_DBG_DATA, diff --git a/drivers/net/wireless/ath/ath12k/peer.c b/drivers/net/wireless/ath/ath12k/peer.c index 5763c5a40cfc..792cca8a3fb1 100644 --- a/drivers/net/wireless/ath/ath12k/peer.c +++ b/drivers/net/wireless/ath/ath12k/peer.c @@ -388,6 +388,8 @@ int ath12k_peer_create(struct ath12k *ar, struct ath12k_link_vif *arvif, arsta = wiphy_dereference(ath12k_ar_to_hw(ar)->wiphy, ahsta->link[link_id]); + peer->link_id = arsta->link_id; + /* Fill ML info into created peer */ if (sta->mlo) { ml_peer_id = ahsta->ml_peer_id; diff --git a/drivers/net/wireless/ath/ath12k/peer.h b/drivers/net/wireless/ath/ath12k/peer.h index 7e6231cb2b52..5870ee11a8c7 100644 --- a/drivers/net/wireless/ath/ath12k/peer.h +++ b/drivers/net/wireless/ath/ath12k/peer.h @@ -59,6 +59,9 @@ struct ath12k_peer { /* To ensure only certain work related to dp is done once */ bool primary_link; + + /* for reference to ath12k_link_sta */ + u8 link_id; }; struct ath12k_ml_peer { From 3863f014ad23f1f966b78e8fe6f4cbed97fd4737 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Wed, 11 Dec 2024 17:43:52 +0200 Subject: [PATCH 0432/1386] wifi: ath12k: symmetrize scan vdev creation and deletion during HW scan Currently, the hardware scan is initiated in the driver on scan link (15). After mapping to the appropriate radio based on the scan frequency, the vdev is created and the scan begins. However, the vdev is only deleted when channel assignment is about to occur after the scan. Additionally, it is also deleted if a new scan is requested on the same interface but the underlying radio differs in the new request. This imbalance leads to various hardware scan issues, especially when a non-MLO and MLO combination exists. In such cases, the latter tries to skip the scan and proceed with channel assignment while the former is still scanning, causing a firmware assert. To address this issue, symmetrize the scan vdev creation and deletion during hardware scan operations. This means creating a vdev when the scan starts and deleting it once the scan is completed or aborted. While at this, add a few debug prints in scan handling and a few empty lines for better code read. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Aditya Kumar Singh Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241211154358.776279-2-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 3 +- drivers/net/wireless/ath/ath12k/mac.c | 120 +++++++++++++++++++------ drivers/net/wireless/ath/ath12k/wmi.c | 3 +- 3 files changed, 99 insertions(+), 27 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index d09ebcdde94f..9aed24597548 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -603,9 +603,10 @@ struct ath12k { struct delayed_work timeout; enum ath12k_scan_state state; bool is_roc; - int vdev_id; int roc_freq; bool roc_notify; + struct wiphy_work vdev_clean_wk; + struct ath12k_link_vif *arvif; } scan; struct { diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index e3848485d029..1bd63b53408c 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4000,22 +4000,9 @@ void __ath12k_mac_scan_finish(struct ath12k *ar) ieee80211_remain_on_channel_expired(hw); fallthrough; case ATH12K_SCAN_STARTING: - if (!ar->scan.is_roc) { - struct cfg80211_scan_info info = { - .aborted = ((ar->scan.state == - ATH12K_SCAN_ABORTING) || - (ar->scan.state == - ATH12K_SCAN_STARTING)), - }; - - ieee80211_scan_completed(hw, &info); - } - - ar->scan.state = ATH12K_SCAN_IDLE; - ar->scan_channel = NULL; - ar->scan.roc_freq = 0; cancel_delayed_work(&ar->scan.timeout); complete(&ar->scan.completed); + wiphy_work_queue(ar->ah->hw->wiphy, &ar->scan.vdev_clean_wk); break; } } @@ -4056,15 +4043,15 @@ static int ath12k_scan_stop(struct ath12k *ar) } out: - /* Scan state should be updated upon scan completion but in case - * firmware fails to deliver the event (for whatever reason) it is - * desired to clean up scan state anyway. Firmware may have just - * dropped the scan completion event delivery due to transport pipe - * being overflown with data and/or it can recover on its own before - * next scan request is submitted. + /* Scan state should be updated in scan completion worker but in + * case firmware fails to deliver the event (for whatever reason) + * it is desired to clean up scan state anyway. Firmware may have + * just dropped the scan completion event delivery due to transport + * pipe being overflown with data and/or it can recover on its own + * before next scan request is submitted. */ spin_lock_bh(&ar->data_lock); - if (ar->scan.state != ATH12K_SCAN_IDLE) + if (ret) __ath12k_mac_scan_finish(ar); spin_unlock_bh(&ar->data_lock); @@ -4115,6 +4102,53 @@ static void ath12k_scan_timeout_work(struct work_struct *work) wiphy_unlock(ath12k_ar_to_hw(ar)->wiphy); } +static void ath12k_scan_vdev_clean_work(struct wiphy *wiphy, struct wiphy_work *work) +{ + struct ath12k *ar = container_of(work, struct ath12k, + scan.vdev_clean_wk); + struct ath12k_hw *ah = ar->ah; + struct ath12k_link_vif *arvif; + + lockdep_assert_wiphy(wiphy); + + arvif = ar->scan.arvif; + + /* The scan vdev has already been deleted. This can occur when a + * new scan request is made on the same vif with a different + * frequency, causing the scan arvif to move from one radio to + * another. Or, scan was abrupted and via remove interface, the + * arvif is already deleted. Alternatively, if the scan vdev is not + * being used as an actual vdev, then do not delete it. + */ + if (!arvif || arvif->is_started) + goto work_complete; + + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac clean scan vdev (link id %u)", + arvif->link_id); + + ath12k_mac_remove_link_interface(ah->hw, arvif); + ath12k_mac_unassign_link_vif(arvif); + +work_complete: + spin_lock_bh(&ar->data_lock); + ar->scan.arvif = NULL; + if (!ar->scan.is_roc) { + struct cfg80211_scan_info info = { + .aborted = ((ar->scan.state == + ATH12K_SCAN_ABORTING) || + (ar->scan.state == + ATH12K_SCAN_STARTING)), + }; + + ieee80211_scan_completed(ar->ah->hw, &info); + } + + ar->scan.state = ATH12K_SCAN_IDLE; + ar->scan_channel = NULL; + ar->scan.roc_freq = 0; + spin_unlock_bh(&ar->data_lock); +} + static int ath12k_start_scan(struct ath12k *ar, struct ath12k_wmi_scan_req_arg *arg) { @@ -4208,6 +4242,9 @@ static int ath12k_mac_op_hw_scan(struct ieee80211_hw *hw, link_id = ath12k_mac_find_link_id_by_ar(ahvif, ar); arvif = ath12k_mac_assign_link_vif(ah, vif, link_id); + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac link ID %d selected for scan", + arvif->link_id); + /* If the vif is already assigned to a specific vdev of an ar, * check whether its already started, vdev which is started * are not allowed to switch to a new radio. @@ -4231,6 +4268,7 @@ static int ath12k_mac_op_hw_scan(struct ieee80211_hw *hw, create = false; } } + if (create) { /* Previous arvif would've been cleared in radio switch block * above, assign arvif again for create. @@ -4251,7 +4289,7 @@ static int ath12k_mac_op_hw_scan(struct ieee80211_hw *hw, reinit_completion(&ar->scan.completed); ar->scan.state = ATH12K_SCAN_STARTING; ar->scan.is_roc = false; - ar->scan.vdev_id = arvif->vdev_id; + ar->scan.arvif = arvif; ret = 0; break; case ATH12K_SCAN_STARTING: @@ -4313,6 +4351,8 @@ static int ath12k_mac_op_hw_scan(struct ieee80211_hw *hw, spin_unlock_bh(&ar->data_lock); } + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac scan started"); + /* As per cfg80211/mac80211 scan design, it allows only one * scan at a time. Hence last_scan link id is used for * tracking the link id on which the scan is been done on @@ -4346,7 +4386,7 @@ static void ath12k_mac_op_cancel_hw_scan(struct ieee80211_hw *hw, lockdep_assert_wiphy(hw->wiphy); arvif = wiphy_dereference(hw->wiphy, ahvif->link[link_id]); - if (!arvif || !arvif->is_created) + if (!arvif || arvif->is_started) return; ar = arvif->ar; @@ -7404,6 +7444,7 @@ static void ath12k_mac_stop(struct ath12k *ar) clear_bit(ATH12K_CAC_RUNNING, &ar->dev_flags); cancel_delayed_work_sync(&ar->scan.timeout); + wiphy_work_cancel(ath12k_ar_to_hw(ar)->wiphy, &ar->scan.vdev_clean_wk); cancel_work_sync(&ar->regd_update_work); cancel_work_sync(&ar->ab->rfkill_work); @@ -8033,7 +8074,7 @@ static struct ath12k *ath12k_mac_assign_vif_to_vdev(struct ieee80211_hw *hw, scan_arvif = wiphy_dereference(hw->wiphy, ahvif->link[ATH12K_DEFAULT_SCAN_LINK]); if (scan_arvif && scan_arvif->ar == ar) { - ar->scan.vdev_id = -1; + ar->scan.arvif = NULL; ath12k_mac_remove_link_interface(hw, scan_arvif); ath12k_mac_unassign_link_vif(scan_arvif); } @@ -8234,6 +8275,7 @@ static void ath12k_mac_op_remove_interface(struct ieee80211_hw *hw, { struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); struct ath12k_link_vif *arvif; + struct ath12k *ar; u8 link_id; lockdep_assert_wiphy(hw->wiphy); @@ -8247,6 +8289,31 @@ static void ath12k_mac_op_remove_interface(struct ieee80211_hw *hw, if (!arvif || !arvif->is_created) continue; + ar = arvif->ar; + + /* Scan abortion is in progress since before this, cancel_hw_scan() + * is expected to be executed. Since link is anyways going to be removed + * now, just cancel the worker and send the scan aborted to user space + */ + if (ar->scan.arvif == arvif) { + wiphy_work_cancel(hw->wiphy, &ar->scan.vdev_clean_wk); + + spin_lock_bh(&ar->data_lock); + ar->scan.arvif = NULL; + if (!ar->scan.is_roc) { + struct cfg80211_scan_info info = { + .aborted = true, + }; + + ieee80211_scan_completed(ar->ah->hw, &info); + } + + ar->scan.state = ATH12K_SCAN_IDLE; + ar->scan_channel = NULL; + ar->scan.roc_freq = 0; + spin_unlock_bh(&ar->data_lock); + } + ath12k_mac_remove_link_interface(hw, arvif); ath12k_mac_unassign_link_vif(arvif); } @@ -9952,6 +10019,7 @@ static int ath12k_mac_op_cancel_remain_on_channel(struct ieee80211_hw *hw, ath12k_scan_abort(ar); cancel_delayed_work_sync(&ar->scan.timeout); + wiphy_work_cancel(hw->wiphy, &ar->scan.vdev_clean_wk); return 0; } @@ -10035,7 +10103,7 @@ static int ath12k_mac_op_remain_on_channel(struct ieee80211_hw *hw, reinit_completion(&ar->scan.on_channel); ar->scan.state = ATH12K_SCAN_STARTING; ar->scan.is_roc = true; - ar->scan.vdev_id = arvif->vdev_id; + ar->scan.arvif = arvif; ar->scan.roc_freq = chan->center_freq; ar->scan.roc_notify = true; ret = 0; @@ -10952,6 +11020,7 @@ static void ath12k_mac_setup(struct ath12k *ar) ar->cfg_rx_chainmask = pdev->cap.rx_chain_mask; ar->num_tx_chains = hweight32(pdev->cap.tx_chain_mask); ar->num_rx_chains = hweight32(pdev->cap.rx_chain_mask); + ar->scan.arvif = NULL; spin_lock_init(&ar->data_lock); INIT_LIST_HEAD(&ar->arvifs); @@ -10969,6 +11038,7 @@ static void ath12k_mac_setup(struct ath12k *ar) init_completion(&ar->mlo_setup_done); INIT_DELAYED_WORK(&ar->scan.timeout, ath12k_scan_timeout_work); + wiphy_work_init(&ar->scan.vdev_clean_wk, ath12k_scan_vdev_clean_work); INIT_WORK(&ar->regd_update_work, ath12k_regd_update_work); wiphy_work_init(&ar->wmi_mgmt_tx_work, ath12k_mgmt_over_wmi_tx_work); diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 892cc4846e4f..7bec414b0358 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -6338,7 +6338,8 @@ static struct ath12k *ath12k_get_ar_on_scan_state(struct ath12k_base *ab, spin_lock_bh(&ar->data_lock); if (ar->scan.state == state && - ar->scan.vdev_id == vdev_id) { + ar->scan.arvif && + ar->scan.arvif->vdev_id == vdev_id) { spin_unlock_bh(&ar->data_lock); return ar; } From 6792b3ca14adb666a3c41628bac99227680eb871 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Wed, 11 Dec 2024 17:43:53 +0200 Subject: [PATCH 0433/1386] wifi: ath12k: add can_activate_links mac operation When operating as an ML station, mac80211 initially activates only one link in the driver until the peer is authorized. Once the state changes to authorized, the driver should call mac80211 API to activate all other partner links. Before doing so, mac80211 checks if the driver supports activating links via the can_activate_links mac80211_ops. Therefore, add support for this mac80211_ops and call the API to activate the links once the state changes to authorized. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Aditya Kumar Singh Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241211154358.776279-3-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 1bd63b53408c..f7e505470f21 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -5645,6 +5645,19 @@ static int ath12k_mac_op_sta_state(struct ieee80211_hw *hw, } } + /* In the ML station scenario, activate all partner links once the + * client is transitioning to the associated state. + * + * FIXME: Ideally, this activation should occur when the client + * transitions to the authorized state. However, there are some + * issues with handling this in the firmware. Until the firmware + * can manage it properly, activate the links when the client is + * about to move to the associated state. + */ + if (ieee80211_vif_is_mld(vif) && vif->type == NL80211_IFTYPE_STATION && + old_state == IEEE80211_STA_AUTH && new_state == IEEE80211_STA_ASSOC) + ieee80211_set_active_links(vif, ieee80211_vif_usable_links(vif)); + /* Handle all the other state transitions in generic way */ valid_links = ahsta->links_map; for_each_set_bit(link_id, &valid_links, IEEE80211_MLD_MAX_NUM_LINKS) { @@ -5939,6 +5952,15 @@ static int ath12k_mac_op_change_sta_links(struct ieee80211_hw *hw, return 0; } +static bool ath12k_mac_op_can_activate_links(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u16 active_links) +{ + /* TODO: Handle recovery case */ + + return true; +} + static int ath12k_conf_tx_uapsd(struct ath12k_link_vif *arvif, u16 ac, bool enable) { @@ -10249,6 +10271,7 @@ static const struct ieee80211_ops ath12k_ops = { .remain_on_channel = ath12k_mac_op_remain_on_channel, .cancel_remain_on_channel = ath12k_mac_op_cancel_remain_on_channel, .change_sta_links = ath12k_mac_op_change_sta_links, + .can_activate_links = ath12k_mac_op_can_activate_links, #ifdef CONFIG_PM .suspend = ath12k_wow_op_suspend, .resume = ath12k_wow_op_resume, From 77478788c957d1d41462890f187c71f8babbd093 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Wed, 11 Dec 2024 17:43:54 +0200 Subject: [PATCH 0434/1386] wifi: ath12k: add no-op without debug print in WMI Rx event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, certain WMI events are frequently received by the host. Since the host lacks the logic to process these events, the console is flooded with ‘Unknown eventid:’ debug messages. To address this, handle these events gracefully without printing debug messages. There is already a block of event IDs that are ignored with a debug print. However, this new type of event occurs more frequently, so no debug print is necessary, and handling it should be a no-op. While at it, re-arrange the code so that all Unknown event IDs are towards the end of the switch block. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Aditya Kumar Singh Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241211154358.776279-4-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/wmi.c | 20 +++++++++++++------- drivers/net/wireless/ath/ath12k/wmi.h | 5 +++++ 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 7bec414b0358..3bc3ed29e429 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -7505,13 +7505,6 @@ static void ath12k_wmi_op_rx(struct ath12k_base *ab, struct sk_buff *skb) case WMI_P2P_NOA_EVENTID: ath12k_wmi_p2p_noa_event(ab, skb); break; - /* add Unsupported events here */ - case WMI_TBTTOFFSET_EXT_UPDATE_EVENTID: - case WMI_PEER_OPER_MODE_CHANGE_EVENTID: - case WMI_PDEV_DMA_RING_CFG_RSP_EVENTID: - ath12k_dbg(ab, ATH12K_DBG_WMI, - "ignoring unsupported event 0x%x\n", id); - break; case WMI_PDEV_DFS_RADAR_DETECTION_EVENTID: ath12k_wmi_pdev_dfs_radar_detected_event(ab, skb); break; @@ -7533,6 +7526,19 @@ static void ath12k_wmi_op_rx(struct ath12k_base *ab, struct sk_buff *skb) case WMI_MLO_TEARDOWN_COMPLETE_EVENTID: ath12k_wmi_event_teardown_complete(ab, skb); break; + /* add Unsupported events (rare) here */ + case WMI_TBTTOFFSET_EXT_UPDATE_EVENTID: + case WMI_PEER_OPER_MODE_CHANGE_EVENTID: + case WMI_PDEV_DMA_RING_CFG_RSP_EVENTID: + ath12k_dbg(ab, ATH12K_DBG_WMI, + "ignoring unsupported event 0x%x\n", id); + break; + /* add Unsupported events (frequent) here */ + case WMI_PDEV_GET_HALPHY_CAL_STATUS_EVENTID: + case WMI_MGMT_RX_FW_CONSUMED_EVENTID: + case WMI_OBSS_COLOR_COLLISION_DETECTION_EVENTID: + /* debug might flood hence silently ignore (no-op) */ + break; /* TODO: Add remaining events */ default: ath12k_dbg(ab, ATH12K_DBG_WMI, "Unknown eventid: 0x%x\n", id); diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index 270ed458302e..cd3de0a65543 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -711,6 +711,8 @@ enum wmi_tlv_event_id { WMI_PDEV_RAP_INFO_EVENTID, WMI_CHAN_RF_CHARACTERIZATION_INFO_EVENTID, WMI_SERVICE_READY_EXT2_EVENTID, + WMI_PDEV_GET_HALPHY_CAL_STATUS_EVENTID = + WMI_SERVICE_READY_EXT2_EVENTID + 4, WMI_VDEV_START_RESP_EVENTID = WMI_TLV_CMD(WMI_GRP_VDEV), WMI_VDEV_STOPPED_EVENTID, WMI_VDEV_INSTALL_KEY_COMPLETE_EVENTID, @@ -752,6 +754,7 @@ enum wmi_tlv_event_id { WMI_TBTTOFFSET_EXT_UPDATE_EVENTID, WMI_OFFCHAN_DATA_TX_COMPLETION_EVENTID, WMI_HOST_FILS_DISCOVERY_EVENTID, + WMI_MGMT_RX_FW_CONSUMED_EVENTID = WMI_HOST_FILS_DISCOVERY_EVENTID + 3, WMI_TX_DELBA_COMPLETE_EVENTID = WMI_TLV_CMD(WMI_GRP_BA_NEG), WMI_TX_ADDBA_COMPLETE_EVENTID, WMI_BA_RSP_SSN_EVENTID, @@ -850,6 +853,8 @@ enum wmi_tlv_event_id { WMI_MDNS_STATS_EVENTID = WMI_TLV_CMD(WMI_GRP_MDNS_OFL), WMI_SAP_OFL_ADD_STA_EVENTID = WMI_TLV_CMD(WMI_GRP_SAP_OFL), WMI_SAP_OFL_DEL_STA_EVENTID, + WMI_OBSS_COLOR_COLLISION_DETECTION_EVENTID = + WMI_EVT_GRP_START_ID(WMI_GRP_OBSS_OFL), WMI_OCB_SET_CONFIG_RESP_EVENTID = WMI_TLV_CMD(WMI_GRP_OCB), WMI_OCB_GET_TSF_TIMER_RESP_EVENTID, WMI_DCC_GET_STATS_RESP_EVENTID, From 2c737079493d79ac340cb2b1b14c1a49645cdf61 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Wed, 11 Dec 2024 17:43:55 +0200 Subject: [PATCH 0435/1386] wifi: ath12k: remove warning print in htt mlo offset event message In the function ath12k_htt_mlo_offset_event_handler(), it is possible that the ar is not yet active (started). The function ath12k_mac_get_ar_by_pdev_id() only searches for active pdev, so a NULL return is possible. Therefore, there is no need to print a warning, instead, just silently discard the message. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Aditya Kumar Singh Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241211154358.776279-5-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_rx.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_rx.c b/drivers/net/wireless/ath/ath12k/dp_rx.c index 66367bfb4acc..5c5a3aae393b 100644 --- a/drivers/net/wireless/ath/ath12k/dp_rx.c +++ b/drivers/net/wireless/ath/ath12k/dp_rx.c @@ -1697,7 +1697,11 @@ static void ath12k_htt_mlo_offset_event_handler(struct ath12k_base *ab, rcu_read_lock(); ar = ath12k_mac_get_ar_by_pdev_id(ab, pdev_id); if (!ar) { - ath12k_warn(ab, "invalid pdev id %d on htt mlo offset\n", pdev_id); + /* It is possible that the ar is not yet active (started). + * The above function will only look for the active pdev + * and hence %NULL return is possible. Just silently + * discard this message + */ goto exit; } From 043b473e3e02d4c371075956e9c72c32f17958fb Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Wed, 11 Dec 2024 17:43:56 +0200 Subject: [PATCH 0436/1386] wifi: ath12k: add ATH12K_FW_FEATURE_MLO capability firmware feature To maintain backward compatibility with older firmware versions, introduce a new feature bit, ATH12K_FW_FEATURE_MLO, to identify whether the firmware supports MLO. If the firmware-X.bin does not have this bit advertised in the feature, then MLO capability in the host will be disabled. This applies only for QCN9274 chipsets. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Aditya Kumar Singh Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241211154358.776279-6-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.c | 34 +++++++++++++++++++++++--- drivers/net/wireless/ath/ath12k/fw.h | 3 +++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.c b/drivers/net/wireless/ath/ath12k/core.c index ff79cb910523..0c6b35aac96e 100644 --- a/drivers/net/wireless/ath/ath12k/core.c +++ b/drivers/net/wireless/ath/ath12k/core.c @@ -1782,6 +1782,9 @@ static int ath12k_core_hw_group_create(struct ath12k_hw_group *ag) void ath12k_core_hw_group_set_mlo_capable(struct ath12k_hw_group *ag) { + struct ath12k_base *ab; + int i; + lockdep_assert_held(&ag->mutex); /* If more than one devices are grouped, then inter MLO @@ -1790,10 +1793,35 @@ void ath12k_core_hw_group_set_mlo_capable(struct ath12k_hw_group *ag) * Only when there is one device, then it depends whether the * device can support intra chip MLO or not */ - if (ag->num_devices > 1) + if (ag->num_devices > 1) { ag->mlo_capable = true; - else - ag->mlo_capable = ag->ab[0]->single_chip_mlo_supp; + } else { + ab = ag->ab[0]; + ag->mlo_capable = ab->single_chip_mlo_supp; + + /* WCN chipsets does not advertise in firmware features + * hence skip checking + */ + if (ab->hw_params->def_num_link) + return; + } + + if (!ag->mlo_capable) + return; + + for (i = 0; i < ag->num_devices; i++) { + ab = ag->ab[i]; + if (!ab) + continue; + + /* even if 1 device's firmware feature indicates MLO + * unsupported, make MLO unsupported for the whole group + */ + if (!test_bit(ATH12K_FW_FEATURE_MLO, ab->fw.fw_features)) { + ag->mlo_capable = false; + return; + } + } } int ath12k_core_init(struct ath12k_base *ab) diff --git a/drivers/net/wireless/ath/ath12k/fw.h b/drivers/net/wireless/ath/ath12k/fw.h index 3ff041f15fa0..273c003eff3b 100644 --- a/drivers/net/wireless/ath/ath12k/fw.h +++ b/drivers/net/wireless/ath/ath12k/fw.h @@ -23,6 +23,9 @@ enum ath12k_fw_features { */ ATH12K_FW_FEATURE_MULTI_QRTR_ID = 0, + /* The firmware supports MLO capability */ + ATH12K_FW_FEATURE_MLO, + /* keep last */ ATH12K_FW_FEATURE_COUNT, }; From 02213c21fb8dfe430e25e539153865eb846f1549 Mon Sep 17 00:00:00 2001 From: Aditya Kumar Singh Date: Wed, 11 Dec 2024 17:43:57 +0200 Subject: [PATCH 0437/1386] wifi: ath12k: assign unique hardware link IDs during QMI host cap Currently, in the QMI host capability, the device index, the number of local links, and the corresponding hardware link IDs are sent. The hardware link ID assignment is based on the local variable `hw_link_id`, which starts from 0 and ranges up to `num_local_links` in the device. Starting from 0 is not ideal because it can result in the same link ID being assigned to different devices in certain scenarios (e.g., split MAC). Additionally, for multi link operations the firmware expects the hardware link IDs in the same order as the Wireless Serial Interface (WSI) connection. Hence, for MLO to function seamlessly, the hardware link IDs across devices need to be unique and should follow the order of the WSI connection. To address this, a previous change read the WSI index from the Device Tree (DT) and stored it. Use this WSI index to determine the starting hardware link IDs for each device, ensuring uniqueness and correct order across all devices. While at it, add debug prints to clearly show the MLO capability advertisement sent during QMI host capability exchange. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Aditya Kumar Singh Co-developed-by: Raj Kumar Bhagat Signed-off-by: Raj Kumar Bhagat Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241211154358.776279-7-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 2 ++ drivers/net/wireless/ath/ath12k/qmi.c | 35 +++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index 9aed24597548..d07b54f441c3 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -863,11 +863,13 @@ struct ath12k_hw_group { struct device_node *wsi_node[ATH12K_MAX_SOCS]; struct ath12k_mlo_memory mlo_mem; struct ath12k_hw_link hw_links[ATH12K_GROUP_MAX_RADIO]; + bool hw_link_id_init_done; }; /* Holds WSI info specific to each device, excluding WSI group info */ struct ath12k_wsi_info { u32 index; + u32 hw_link_id_base; }; /* Master structure to hold the hw data which may be used in core module */ diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index 964d350be748..a8ed86a294c3 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -2016,6 +2016,30 @@ static const struct qmi_elem_info qmi_wlanfw_wlan_ini_resp_msg_v01_ei[] = { }, }; +static void ath12k_host_cap_hw_link_id_init(struct ath12k_hw_group *ag) +{ + struct ath12k_base *ab, *partner_ab; + int i, j, hw_id_base; + + for (i = 0; i < ag->num_devices; i++) { + hw_id_base = 0; + ab = ag->ab[i]; + + for (j = 0; j < ag->num_devices; j++) { + partner_ab = ag->ab[j]; + + if (partner_ab->wsi_info.index >= ab->wsi_info.index) + continue; + + hw_id_base += partner_ab->qmi.num_radios; + } + + ab->wsi_info.hw_link_id_base = hw_id_base; + } + + ag->hw_link_id_init_done = true; +} + static int ath12k_host_cap_parse_mlo(struct ath12k_base *ab, struct qmi_wlanfw_host_cap_req_msg_v01 *req) { @@ -2059,8 +2083,14 @@ static int ath12k_host_cap_parse_mlo(struct ath12k_base *ab, req->mlo_num_chips_valid = 1; req->mlo_num_chips = ag->num_devices; + ath12k_dbg(ab, ATH12K_DBG_QMI, "mlo capability advertisement device_id %d group_id %d num_devices %d", + req->mlo_chip_id, req->mlo_group_id, req->mlo_num_chips); + mutex_lock(&ag->mutex); + if (!ag->hw_link_id_init_done) + ath12k_host_cap_hw_link_id_init(ag); + for (i = 0; i < ag->num_devices; i++) { info = &req->mlo_chip_info[i]; partner_ab = ag->ab[i]; @@ -2078,9 +2108,12 @@ static int ath12k_host_cap_parse_mlo(struct ath12k_base *ab, info->chip_id, info->num_local_links); for (j = 0; j < info->num_local_links; j++) { - info->hw_link_id[j] = hw_link_id; + info->hw_link_id[j] = partner_ab->wsi_info.hw_link_id_base + j; info->valid_mlo_link_id[j] = 1; + ath12k_dbg(ab, ATH12K_DBG_QMI, "mlo hw_link_id %d\n", + info->hw_link_id[j]); + hw_link_id++; } } From d33bc467e8325be66b7209250e9829d199034ffe Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Wed, 11 Dec 2024 17:43:58 +0200 Subject: [PATCH 0438/1386] wifi: ath12k: advertise MLO support and capabilities Now everything in ath12k is in place and we can enable Multi-Link Operation (MLO) in the driver. For now it's only enabled for QCN9274 with firmware having ATH12K_FW_FEATURE_MLO feature bit set. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Rameshkumar Sundaram Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241211154358.776279-8-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 2 ++ drivers/net/wireless/ath/ath12k/mac.c | 13 ++++++++++++- drivers/net/wireless/ath/ath12k/wmi.c | 3 +++ drivers/net/wireless/ath/ath12k/wmi.h | 2 ++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index d07b54f441c3..ec61ad3d82c3 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -773,6 +773,8 @@ struct ath12k_pdev_cap { u32 tx_chain_mask_shift; u32 rx_chain_mask_shift; struct ath12k_band_cap band[NUM_NL80211_BANDS]; + u32 eml_cap; + u32 mld_cap; }; struct mlo_timestamp { diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index f7e505470f21..186765fa95f5 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -10693,7 +10693,7 @@ static const u8 ath12k_if_types_ext_capa_ap[] = { [10] = WLAN_EXT_CAPA11_EMA_SUPPORT, }; -static const struct wiphy_iftype_ext_capab ath12k_iftypes_ext_capa[] = { +static struct wiphy_iftype_ext_capab ath12k_iftypes_ext_capa[] = { { .extended_capabilities = ath12k_if_types_ext_capa, .extended_capabilities_mask = ath12k_if_types_ext_capa, @@ -10710,6 +10710,8 @@ static const struct wiphy_iftype_ext_capab ath12k_iftypes_ext_capa[] = { .extended_capabilities_mask = ath12k_if_types_ext_capa_ap, .extended_capabilities_len = sizeof(ath12k_if_types_ext_capa_ap), + .eml_capabilities = 0, + .mld_capa_and_ops = 0, }, }; @@ -10919,6 +10921,15 @@ static int ath12k_mac_hw_register(struct ath12k_hw *ah) */ wiphy->flags |= WIPHY_FLAG_DISABLE_WEXT; + /* Copy over MLO related capabilities received from + * WMI_SERVICE_READY_EXT2_EVENT if single_chip_mlo_supp is set. + */ + if (ab->ag->mlo_capable) { + ath12k_iftypes_ext_capa[2].eml_capabilities = cap->eml_cap; + ath12k_iftypes_ext_capa[2].mld_capa_and_ops = cap->mld_cap; + wiphy->flags |= WIPHY_FLAG_SUPPORTS_MLO; + } + hw->queues = ATH12K_HW_MAX_QUEUES; wiphy->tx_queue_len = ATH12K_QUEUE_LEN; hw->offchannel_tx_hw_queue = ATH12K_HW_MAX_QUEUES - 1; diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c index 3bc3ed29e429..562b0615ed06 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.c +++ b/drivers/net/wireless/ath/ath12k/wmi.c @@ -4662,6 +4662,9 @@ ath12k_wmi_tlv_mac_phy_caps_ext_parse(struct ath12k_base *ab, caps->eht_cap_info_internal); } + pdev->cap.eml_cap = le32_to_cpu(caps->eml_capability); + pdev->cap.mld_cap = le32_to_cpu(caps->mld_capability); + return 0; } diff --git a/drivers/net/wireless/ath/ath12k/wmi.h b/drivers/net/wireless/ath/ath12k/wmi.h index cd3de0a65543..b6a197389277 100644 --- a/drivers/net/wireless/ath/ath12k/wmi.h +++ b/drivers/net/wireless/ath/ath12k/wmi.h @@ -2716,6 +2716,8 @@ struct ath12k_wmi_caps_ext_params { __le32 eht_cap_info_internal; __le32 eht_supp_mcs_ext_2ghz[WMI_MAX_EHT_SUPP_MCS_2G_SIZE]; __le32 eht_supp_mcs_ext_5ghz[WMI_MAX_EHT_SUPP_MCS_5G_SIZE]; + __le32 eml_capability; + __le32 mld_capability; } __packed; /* 2 word representation of MAC addr */ From 8dccbecbb9692a96cf477eb826352a7c556a31e2 Mon Sep 17 00:00:00 2001 From: Bastien Curutchet Date: Fri, 13 Dec 2024 16:06:20 +0100 Subject: [PATCH 0439/1386] selftests/bpf: test_xdp_meta: Rename BPF sections SEC("t") and SEC("x") can't be loaded by the __load() helper. Rename these sections SEC("tc") and SEC("xdp") so they can be interpreted by the __load() helper in upcoming patch. Update the test_xdp_meta.sh to fit these new names. Signed-off-by: Bastien Curutchet Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20241213-xdp_meta-v2-1-634582725b90@bootlin.com --- tools/testing/selftests/bpf/progs/test_xdp_meta.c | 4 ++-- tools/testing/selftests/bpf/test_xdp_meta.sh | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index a7c4a7d49fe6..fe2d71ae0e71 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -8,7 +8,7 @@ #define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1) #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem -SEC("t") +SEC("tc") int ing_cls(struct __sk_buff *ctx) { __u8 *data, *data_meta, *data_end; @@ -28,7 +28,7 @@ int ing_cls(struct __sk_buff *ctx) return diff ? TC_ACT_SHOT : TC_ACT_OK; } -SEC("x") +SEC("xdp") int ing_xdp(struct xdp_md *ctx) { __u8 *data, *data_meta, *data_end; diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh index 2740322c1878..6039b92f1094 100755 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ b/tools/testing/selftests/bpf/test_xdp_meta.sh @@ -43,11 +43,11 @@ ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2 ip netns exec ${NS1} tc qdisc add dev veth1 clsact ip netns exec ${NS2} tc qdisc add dev veth2 clsact -ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec t -ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec t +ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec tc +ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec tc -ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec x -ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec x +ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec xdp +ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec xdp ip netns exec ${NS1} ip link set dev veth1 up ip netns exec ${NS2} ip link set dev veth2 up From df539cefb0abbd16be9fbcc6ec46a5a35495800f Mon Sep 17 00:00:00 2001 From: Bastien Curutchet Date: Fri, 13 Dec 2024 16:06:21 +0100 Subject: [PATCH 0440/1386] selftests/bpf: Migrate test_xdp_meta.sh into xdp_context_test_run.c test_xdp_meta.sh can't be used by the BPF CI. Migrate test_xdp_meta.sh in a new test case in xdp_context_test_run.c. It uses the same BPF programs located in progs/test_xdp_meta.c and the same network topology. Remove test_xdp_meta.sh and its Makefile entry. Signed-off-by: Bastien Curutchet Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20241213-xdp_meta-v2-2-634582725b90@bootlin.com --- tools/testing/selftests/bpf/Makefile | 1 - .../bpf/prog_tests/xdp_context_test_run.c | 87 +++++++++++++++++++ tools/testing/selftests/bpf/test_xdp_meta.sh | 58 ------------- 3 files changed, 87 insertions(+), 59 deletions(-) delete mode 100755 tools/testing/selftests/bpf/test_xdp_meta.sh diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 6ad3b1ba1920..772bfc6b63fa 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -129,7 +129,6 @@ TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c) TEST_PROGS := test_kmod.sh \ test_xdp_redirect.sh \ test_xdp_redirect_multi.sh \ - test_xdp_meta.sh \ test_tunnel.sh \ test_lwt_seg6local.sh \ test_lirc_mode2.sh \ diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index e6a783c7f5db..937da9b7532a 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -2,6 +2,14 @@ #include #include #include "test_xdp_context_test_run.skel.h" +#include "test_xdp_meta.skel.h" + +#define TX_ADDR "10.0.0.1" +#define RX_ADDR "10.0.0.2" +#define RX_NAME "veth0" +#define TX_NAME "veth1" +#define TX_NETNS "xdp_context_tx" +#define RX_NETNS "xdp_context_rx" void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts, __u32 data_meta, __u32 data, __u32 data_end, @@ -103,3 +111,82 @@ void test_xdp_context_test_run(void) test_xdp_context_test_run__destroy(skel); } + +void test_xdp_context_functional(void) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + struct netns_obj *rx_ns = NULL, *tx_ns = NULL; + struct bpf_program *tc_prog, *xdp_prog; + struct test_xdp_meta *skel = NULL; + struct nstoken *nstoken = NULL; + int rx_ifindex; + int ret; + + tx_ns = netns_new(TX_NETNS, false); + if (!ASSERT_OK_PTR(tx_ns, "create tx_ns")) + return; + + rx_ns = netns_new(RX_NETNS, false); + if (!ASSERT_OK_PTR(rx_ns, "create rx_ns")) + goto close; + + SYS(close, "ip link add " RX_NAME " netns " RX_NETNS + " type veth peer name " TX_NAME " netns " TX_NETNS); + + nstoken = open_netns(RX_NETNS); + if (!ASSERT_OK_PTR(nstoken, "setns rx_ns")) + goto close; + + SYS(close, "ip addr add " RX_ADDR "/24 dev " RX_NAME); + SYS(close, "ip link set dev " RX_NAME " up"); + + skel = test_xdp_meta__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skeleton")) + goto close; + + rx_ifindex = if_nametoindex(RX_NAME); + if (!ASSERT_GE(rx_ifindex, 0, "if_nametoindex rx")) + goto close; + + tc_hook.ifindex = rx_ifindex; + ret = bpf_tc_hook_create(&tc_hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto close; + + tc_prog = bpf_object__find_program_by_name(skel->obj, "ing_cls"); + if (!ASSERT_OK_PTR(tc_prog, "open ing_cls prog")) + goto close; + + tc_opts.prog_fd = bpf_program__fd(tc_prog); + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + + xdp_prog = bpf_object__find_program_by_name(skel->obj, "ing_xdp"); + if (!ASSERT_OK_PTR(xdp_prog, "open ing_xdp prog")) + goto close; + + ret = bpf_xdp_attach(rx_ifindex, + bpf_program__fd(xdp_prog), + 0, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) + goto close; + + close_netns(nstoken); + + nstoken = open_netns(TX_NETNS); + if (!ASSERT_OK_PTR(nstoken, "setns tx_ns")) + goto close; + + SYS(close, "ip addr add " TX_ADDR "/24 dev " TX_NAME); + SYS(close, "ip link set dev " TX_NAME " up"); + ASSERT_OK(SYS_NOFAIL("ping -c 1 " RX_ADDR), "ping"); + +close: + close_netns(nstoken); + test_xdp_meta__destroy(skel); + netns_free(rx_ns); + netns_free(tx_ns); +} + diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh deleted file mode 100755 index 6039b92f1094..000000000000 --- a/tools/testing/selftests/bpf/test_xdp_meta.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/sh - -BPF_FILE="test_xdp_meta.bpf.o" -# Kselftest framework requirement - SKIP code is 4. -readonly KSFT_SKIP=4 -readonly NS1="ns1-$(mktemp -u XXXXXX)" -readonly NS2="ns2-$(mktemp -u XXXXXX)" - -cleanup() -{ - if [ "$?" = "0" ]; then - echo "selftests: test_xdp_meta [PASS]"; - else - echo "selftests: test_xdp_meta [FAILED]"; - fi - - set +e - ip link del veth1 2> /dev/null - ip netns del ${NS1} 2> /dev/null - ip netns del ${NS2} 2> /dev/null -} - -ip link set dev lo xdp off 2>/dev/null > /dev/null -if [ $? -ne 0 ];then - echo "selftests: [SKIP] Could not run test without the ip xdp support" - exit $KSFT_SKIP -fi -set -e - -ip netns add ${NS1} -ip netns add ${NS2} - -trap cleanup 0 2 3 6 9 - -ip link add veth1 type veth peer name veth2 - -ip link set veth1 netns ${NS1} -ip link set veth2 netns ${NS2} - -ip netns exec ${NS1} ip addr add 10.1.1.11/24 dev veth1 -ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2 - -ip netns exec ${NS1} tc qdisc add dev veth1 clsact -ip netns exec ${NS2} tc qdisc add dev veth2 clsact - -ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec tc -ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec tc - -ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec xdp -ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec xdp - -ip netns exec ${NS1} ip link set dev veth1 up -ip netns exec ${NS2} ip link set dev veth2 up - -ip netns exec ${NS1} ping -c 1 10.1.1.22 -ip netns exec ${NS2} ping -c 1 10.1.1.11 - -exit 0 From d920270a6dbf756384b125ce39c17666a7c0c9f4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 12 Dec 2024 20:58:15 +0000 Subject: [PATCH 0441/1386] rxrpc: Disable IRQ, not BH, to take the lock for ->attend_link Use spin_lock_irq(), not spin_lock_bh() to take the lock when accessing the ->attend_link() to stop a delay in the I/O thread due to an interrupt being taken in the app thread whilst that holds the lock and vice versa. Fixes: a2ea9a907260 ("rxrpc: Use irq-disabling spinlocks between app and I/O thread") Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/2870146.1734037095@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/io_thread.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 2925c7fc82cf..64f8d77b8731 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -508,9 +508,9 @@ int rxrpc_io_thread(void *data) while ((conn = list_first_entry_or_null(&conn_attend_q, struct rxrpc_connection, attend_link))) { - spin_lock_bh(&local->lock); + spin_lock_irq(&local->lock); list_del_init(&conn->attend_link); - spin_unlock_bh(&local->lock); + spin_unlock_irq(&local->lock); rxrpc_input_conn_event(conn, NULL); rxrpc_put_connection(conn, rxrpc_conn_put_poke); } @@ -527,9 +527,9 @@ int rxrpc_io_thread(void *data) while ((call = list_first_entry_or_null(&call_attend_q, struct rxrpc_call, attend_link))) { - spin_lock_bh(&local->lock); + spin_lock_irq(&local->lock); list_del_init(&call->attend_link); - spin_unlock_bh(&local->lock); + spin_unlock_irq(&local->lock); trace_rxrpc_call_poked(call); rxrpc_input_call_event(call); rxrpc_put_call(call, rxrpc_call_put_poke); From ae4f899894792c436d792c17d3f3e6a2affb787f Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 12 Dec 2024 21:04:22 +0000 Subject: [PATCH 0442/1386] rxrpc: Fix ability to add more data to a call once MSG_MORE deasserted When userspace is adding data to an RPC call for transmission, it must pass MSG_MORE to sendmsg() if it intends to add more data in future calls to sendmsg(). Calling sendmsg() without MSG_MORE being asserted closes the transmission phase of the call (assuming sendmsg() adds all the data presented) and further attempts to add more data should be rejected. However, this is no longer the case. The change of call state that was previously the guard got bumped over to the I/O thread, which leaves a window for a repeat sendmsg() to insert more data. This previously went unnoticed, but the more recent patch that changed the structures behind the Tx queue added a warning: WARNING: CPU: 3 PID: 6639 at net/rxrpc/sendmsg.c:296 rxrpc_send_data+0x3f2/0x860 and rejected the additional data, returning error EPROTO. Fix this by adding a guard flag to the call, setting the flag when we queue the final packet and then rejecting further attempts to add data with EPROTO. Fixes: 2d689424b618 ("rxrpc: Move call state changes from sendmsg to I/O thread") Reported-by: syzbot+ff11be94dfcd7a5af8da@syzkaller.appspotmail.com Closes: https://lore.kernel.org/r/6757fb68.050a0220.2477f.005f.GAE@google.com/ Signed-off-by: David Howells Tested-by: syzbot+ff11be94dfcd7a5af8da@syzkaller.appspotmail.com cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/2870480.1734037462@warthog.procyon.org.uk Signed-off-by: Jakub Kicinski --- net/rxrpc/ar-internal.h | 1 + net/rxrpc/sendmsg.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 0c0a3c89dba3..718193df9d2e 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -571,6 +571,7 @@ enum rxrpc_call_flag { RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ RXRPC_CALL_TX_ALL_ACKED, /* Last packet has been hard-acked */ + RXRPC_CALL_TX_NO_MORE, /* No more data to transmit (MSG_MORE deasserted) */ RXRPC_CALL_SEND_PING, /* A ping will need to be sent */ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */ diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index c4c8b718cafa..0e8da909d4f2 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -266,6 +266,7 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, /* Order send_top after the queue->next pointer and txb content. */ smp_store_release(&call->send_top, seq); if (last) { + set_bit(RXRPC_CALL_TX_NO_MORE, &call->flags); rxrpc_notify_end_tx(rx, call, notify_end_tx); call->send_queue = NULL; } @@ -329,6 +330,13 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, bool more = msg->msg_flags & MSG_MORE; int ret, copied = 0; + if (test_bit(RXRPC_CALL_TX_NO_MORE, &call->flags)) { + trace_rxrpc_abort(call->debug_id, rxrpc_sendmsg_late_send, + call->cid, call->call_id, call->rx_consumed, + 0, -EPROTO); + return -EPROTO; + } + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); ret = rxrpc_wait_to_be_connected(call, &timeo); From 4fe205539c46ab0add34675ab037f49caa30607c Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 13 Dec 2024 11:25:50 +0000 Subject: [PATCH 0443/1386] netlink: specs: add phys-binding attr to rt_link spec Add the missing phys-binding attr to the mctp-attrs in the rt_link spec. This fixes commit 580db513b4a9 ("net: mctp: Expose transport binding identifier via IFLA attribute"). Note that enum mctp_phys_binding is not currently uapi, but perhaps it should be? Signed-off-by: Donald Hunter Link: https://patch.msgid.link/20241213112551.33557-1-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_link.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 9ffa13b77dcf..96465376d6fe 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -2086,6 +2086,9 @@ attribute-sets: - name: mctp-net type: u32 + - + name: phys-binding + type: u8 - name: stats-attrs name-prefix: ifla-stats- From 77ec16be758ea65de641833149963bec39f311da Mon Sep 17 00:00:00 2001 From: Anna Emese Nyiri Date: Fri, 13 Dec 2024 09:44:54 +0100 Subject: [PATCH 0444/1386] sock: Introduce sk_set_prio_allowed helper function Simplify priority setting permissions with the 'sk_set_prio_allowed' function, centralizing the validation logic. This change is made in anticipation of a second caller in a following patch. No functional changes. Reviewed-by: Willem de Bruijn Reviewed-by: Eric Dumazet Suggested-by: Willem de Bruijn Signed-off-by: Anna Emese Nyiri Link: https://patch.msgid.link/20241213084457.45120-2-annaemesenyiri@gmail.com Signed-off-by: Jakub Kicinski --- net/core/sock.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 74729d20cd00..9016f984d44e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -454,6 +454,13 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, return 0; } +static bool sk_set_prio_allowed(const struct sock *sk, int val) +{ + return ((val >= TC_PRIO_BESTEFFORT && val <= TC_PRIO_INTERACTIVE) || + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || + sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)); +} + static bool sock_needs_netstamp(const struct sock *sk) { switch (sk->sk_family) { @@ -1193,9 +1200,7 @@ int sk_setsockopt(struct sock *sk, int level, int optname, /* handle options which do not require locking the socket. */ switch (optname) { case SO_PRIORITY: - if ((val >= 0 && val <= 6) || - sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || - sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { + if (sk_set_prio_allowed(sk, val)) { sock_set_priority(sk, val); return 0; } From a32f3e9d1ed146f81162702605d65447a319eb76 Mon Sep 17 00:00:00 2001 From: Anna Emese Nyiri Date: Fri, 13 Dec 2024 09:44:55 +0100 Subject: [PATCH 0445/1386] sock: support SO_PRIORITY cmsg The Linux socket API currently allows setting SO_PRIORITY at the socket level, applying a uniform priority to all packets sent through that socket. The exception to this is IP_TOS, when the priority value is calculated during the handling of ancillary data, as implemented in commit f02db315b8d8 ("ipv4: IP_TOS and IP_TTL can be specified as ancillary data"). However, this is a computed value, and there is currently no mechanism to set a custom priority via control messages prior to this patch. According to this patch, if SO_PRIORITY is specified as ancillary data, the packet is sent with the priority value set through sockc->priority, overriding the socket-level values set via the traditional setsockopt() method. This is analogous to the existing support for SO_MARK, as implemented in commit c6af0c227a22 ("ip: support SO_MARK cmsg"). If both cmsg SO_PRIORITY and IP_TOS are passed, then the one that takes precedence is the last one in the cmsg list. This patch has the side effect that raw_send_hdrinc now interprets cmsg IP_TOS. Reviewed-by: Willem de Bruijn Suggested-by: Ferenc Fejes Signed-off-by: Anna Emese Nyiri Link: https://patch.msgid.link/20241213084457.45120-3-annaemesenyiri@gmail.com Signed-off-by: Jakub Kicinski --- include/net/inet_sock.h | 2 +- include/net/ip.h | 2 +- include/net/sock.h | 4 +++- net/can/raw.c | 2 +- net/core/sock.c | 7 +++++++ net/ipv4/ip_output.c | 4 ++-- net/ipv4/ip_sockglue.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv6/ip6_output.c | 3 ++- net/ipv6/ping.c | 1 + net/ipv6/raw.c | 3 ++- net/ipv6/udp.c | 1 + net/packet/af_packet.c | 2 +- 13 files changed, 24 insertions(+), 11 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 56d8bc5593d3..3ccbad881d74 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -172,7 +172,7 @@ struct inet_cork { u8 tx_flags; __u8 ttl; __s16 tos; - char priority; + u32 priority; __u16 gso_size; u32 ts_opt_id; u64 transmit_time; diff --git a/include/net/ip.h b/include/net/ip.h index 0e548c1f2a0e..9f5e33e371fc 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -81,7 +81,6 @@ struct ipcm_cookie { __u8 protocol; __u8 ttl; __s16 tos; - char priority; __u16 gso_size; }; @@ -96,6 +95,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, ipcm_init(ipcm); ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark); + ipcm->sockc.priority = READ_ONCE(inet->sk.sk_priority); ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags); ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; diff --git a/include/net/sock.h b/include/net/sock.h index 7464e9f9f47c..316a34d6c48b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1814,13 +1814,15 @@ struct sockcm_cookie { u32 mark; u32 tsflags; u32 ts_opt_id; + u32 priority; }; static inline void sockcm_init(struct sockcm_cookie *sockc, const struct sock *sk) { *sockc = (struct sockcm_cookie) { - .tsflags = READ_ONCE(sk->sk_tsflags) + .tsflags = READ_ONCE(sk->sk_tsflags), + .priority = READ_ONCE(sk->sk_priority), }; } diff --git a/net/can/raw.c b/net/can/raw.c index 255c0a8f39d6..46e8ed9d64da 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -962,7 +962,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } skb->dev = dev; - skb->priority = READ_ONCE(sk->sk_priority); + skb->priority = sockc.priority; skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; diff --git a/net/core/sock.c b/net/core/sock.c index 9016f984d44e..a3d9941c1d32 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2947,6 +2947,13 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, case SCM_RIGHTS: case SCM_CREDENTIALS: break; + case SO_PRIORITY: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + if (!sk_set_prio_allowed(sk, *(u32 *)CMSG_DATA(cmsg))) + return -EPERM; + sockc->priority = *(u32 *)CMSG_DATA(cmsg); + break; default: return -EINVAL; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a59204a8d850..f45a083f2c13 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1333,7 +1333,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->ttl = ipc->ttl; cork->tos = ipc->tos; cork->mark = ipc->sockc.mark; - cork->priority = ipc->priority; + cork->priority = ipc->sockc.priority; cork->transmit_time = ipc->sockc.transmit_time; cork->tx_flags = 0; sock_tx_timestamp(sk, &ipc->sockc, &cork->tx_flags); @@ -1470,7 +1470,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, ip_options_build(skb, opt, cork->addr, rt); } - skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); + skb->priority = cork->priority; skb->mark = cork->mark; if (sk_is_tcp(sk)) skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index cf377377b52d..f6a03b418dde 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -315,7 +315,7 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, if (val < 0 || val > 255) return -EINVAL; ipc->tos = val; - ipc->priority = rt_tos2priority(ipc->tos); + ipc->sockc.priority = rt_tos2priority(ipc->tos); break; case IP_PROTOCOL: if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 0e9e01967ec9..4304a68d1db0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -358,7 +358,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb_reserve(skb, hlen); skb->protocol = htons(ETH_P_IP); - skb->priority = READ_ONCE(sk->sk_priority); + skb->priority = sockc->priority; skb->mark = sockc->mark; skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); skb_dst_set(skb, &rt->dst); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3d672dea9f56..993106876604 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1401,6 +1401,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; cork->base.mark = ipc6->sockc.mark; + cork->base.priority = ipc6->sockc.priority; sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags); if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) { cork->base.flags |= IPCORK_TS_OPT_ID; @@ -1942,7 +1943,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, hdr->saddr = fl6->saddr; hdr->daddr = *final_dst; - skb->priority = READ_ONCE(sk->sk_priority); + skb->priority = cork->base.priority; skb->mark = cork->base.mark; if (sk_is_tcp(sk)) skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 88b3fcacd4f9..46b8adf6e7f8 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -119,6 +119,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EINVAL; ipcm6_init_sk(&ipc6, sk); + ipc6.sockc.priority = READ_ONCE(sk->sk_priority); ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = READ_ONCE(sk->sk_mark); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8476a3944a88..a45aba090aa4 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -619,7 +619,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb_reserve(skb, hlen); skb->protocol = htons(ETH_P_IPV6); - skb->priority = READ_ONCE(sk->sk_priority); + skb->priority = sockc->priority; skb->mark = sockc->mark; skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); @@ -780,6 +780,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipcm6_init(&ipc6); ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = fl6.flowi6_mark; + ipc6.sockc.priority = READ_ONCE(sk->sk_priority); if (sin6) { if (addr_len < SIN6_LEN_RFC2133) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d766fd798ecf..7c14c449804c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1448,6 +1448,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.gso_size = READ_ONCE(up->gso_size); ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = READ_ONCE(sk->sk_mark); + ipc6.sockc.priority = READ_ONCE(sk->sk_priority); /* destination address check */ if (sin6) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 886c0dd47b66..f8d87d622699 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3126,7 +3126,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->protocol = proto; skb->dev = dev; - skb->priority = READ_ONCE(sk->sk_priority); + skb->priority = sockc.priority; skb->mark = sockc.mark; skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); From cda7d5abe089cc8bd6d623cd6577627d8125d155 Mon Sep 17 00:00:00 2001 From: Anna Emese Nyiri Date: Fri, 13 Dec 2024 09:44:56 +0100 Subject: [PATCH 0446/1386] selftests: net: test SO_PRIORITY ancillary data with cmsg_sender Extend cmsg_sender.c with a new option '-Q' to send SO_PRIORITY ancillary data. cmsg_so_priority.sh script added to validate SO_PRIORITY behavior by creating VLAN device with egress QoS mapping and testing packet priorities using flower filters. Verify that packets with different priorities are correctly matched and counted by filters for multiple protocols and IP versions. Reviewed-by: Willem de Bruijn Acked-by: Willem de Bruijn Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Suggested-by: Ido Schimmel Signed-off-by: Anna Emese Nyiri Link: https://patch.msgid.link/20241213084457.45120-4-annaemesenyiri@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + tools/testing/selftests/net/cmsg_sender.c | 11 +- .../testing/selftests/net/cmsg_so_priority.sh | 151 ++++++++++++++++++ 3 files changed, 162 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/net/cmsg_so_priority.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index cb2fc601de66..f09bd96cc978 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -32,6 +32,7 @@ TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh +TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += netns-name.sh TEST_PROGS += nl_netdev.py diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 876c2db02a63..bc314382e4e1 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -59,6 +59,7 @@ struct options { unsigned int proto; } sock; struct option_cmsg_u32 mark; + struct option_cmsg_u32 priority; struct { bool ena; unsigned int delay; @@ -97,6 +98,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) "\n" "\t\t-m val Set SO_MARK with given value\n" "\t\t-M val Set SO_MARK via setsockopt\n" + "\t\t-P val Set SO_PRIORITY via setsockopt\n" + "\t\t-Q val Set SO_PRIORITY via cmsg\n" "\t\t-d val Set SO_TXTIME with given delay (usec)\n" "\t\t-t Enable time stamp reporting\n" "\t\t-f val Set don't fragment via cmsg\n" @@ -115,7 +118,7 @@ static void cs_parse_args(int argc, char *argv[]) { int o; - while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) { + while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:Q:")) != -1) { switch (o) { case 's': opt.silent_send = true; @@ -148,6 +151,10 @@ static void cs_parse_args(int argc, char *argv[]) opt.mark.ena = true; opt.mark.val = atoi(optarg); break; + case 'Q': + opt.priority.ena = true; + opt.priority.val = atoi(optarg); + break; case 'M': opt.sockopt.mark = atoi(optarg); break; @@ -252,6 +259,8 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, SOL_SOCKET, SO_MARK, &opt.mark); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_SOCKET, SO_PRIORITY, &opt.priority); ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag); ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, diff --git a/tools/testing/selftests/net/cmsg_so_priority.sh b/tools/testing/selftests/net/cmsg_so_priority.sh new file mode 100755 index 000000000000..ee07d8653262 --- /dev/null +++ b/tools/testing/selftests/net/cmsg_so_priority.sh @@ -0,0 +1,151 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +readonly KSFT_SKIP=4 + +IP4=192.0.2.1/24 +TGT4=192.0.2.2 +TGT4_RAW=192.0.2.3 +IP6=2001:db8::1/64 +TGT6=2001:db8::2 +TGT6_RAW=2001:db8::3 +PORT=1234 +TOTAL_TESTS=0 +FAILED_TESTS=0 + +if ! command -v jq &> /dev/null; then + echo "SKIP cmsg_so_priroity.sh test: jq is not installed." >&2 + exit "$KSFT_SKIP" +fi + +check_result() { + ((TOTAL_TESTS++)) + if [ "$1" -ne 0 ]; then + ((FAILED_TESTS++)) + fi +} + +cleanup() +{ + cleanup_ns $NS +} + +trap cleanup EXIT + +setup_ns NS + +create_filter() { + local handle=$1 + local vlan_prio=$2 + local ip_type=$3 + local proto=$4 + local dst_ip=$5 + local ip_proto + + if [[ "$proto" == "u" ]]; then + ip_proto="udp" + elif [[ "$ip_type" == "ipv4" && "$proto" == "i" ]]; then + ip_proto="icmp" + elif [[ "$ip_type" == "ipv6" && "$proto" == "i" ]]; then + ip_proto="icmpv6" + fi + + tc -n $NS filter add dev dummy1 \ + egress pref 1 handle "$handle" proto 802.1q \ + flower vlan_prio "$vlan_prio" vlan_ethtype "$ip_type" \ + dst_ip "$dst_ip" ${ip_proto:+ip_proto $ip_proto} \ + action pass +} + +ip -n $NS link set dev lo up +ip -n $NS link add name dummy1 up type dummy + +ip -n $NS link add link dummy1 name dummy1.10 up type vlan id 10 \ + egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + +ip -n $NS address add $IP4 dev dummy1.10 +ip -n $NS address add $IP6 dev dummy1.10 nodad + +ip netns exec $NS sysctl -wq net.ipv4.ping_group_range='0 2147483647' + +ip -n $NS neigh add $TGT4 lladdr 00:11:22:33:44:55 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT6 lladdr 00:11:22:33:44:55 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT4_RAW lladdr 00:11:22:33:44:66 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT6_RAW lladdr 00:11:22:33:44:66 nud permanent \ + dev dummy1.10 + +tc -n $NS qdisc add dev dummy1 clsact + +FILTER_COUNTER=10 + +for i in 4 6; do + for proto in u i r; do + echo "Test IPV$i, prot: $proto" + for priority in {0..7}; do + if [[ $i == 4 && $proto == "r" ]]; then + TGT=$TGT4_RAW + elif [[ $i == 6 && $proto == "r" ]]; then + TGT=$TGT6_RAW + elif [ $i == 4 ]; then + TGT=$TGT4 + else + TGT=$TGT6 + fi + + handle="${FILTER_COUNTER}${priority}" + + create_filter $handle $priority ipv$i $proto $TGT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + + if [[ $pkts == 0 ]]; then + check_result 0 + else + echo "prio $priority: expected 0, got $pkts" + check_result 1 + fi + + ip netns exec $NS ./cmsg_sender -$i -Q $priority \ + -p $proto $TGT $PORT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + if [[ $pkts == 1 ]]; then + check_result 0 + else + echo "prio $priority -Q: expected 1, got $pkts" + check_result 1 + fi + + ip netns exec $NS ./cmsg_sender -$i -P $priority \ + -p $proto $TGT $PORT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + if [[ $pkts == 2 ]]; then + check_result 0 + else + echo "prio $priority -P: expected 2, got $pkts" + check_result 1 + fi + done + FILTER_COUNTER=$((FILTER_COUNTER + 10)) + done +done + +if [ $FAILED_TESTS -ne 0 ]; then + echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed" + exit 1 +else + echo "OK - All $TOTAL_TESTS tests passed" + exit 0 +fi From e45469e594b255ef8d750ed5576698743450d2ac Mon Sep 17 00:00:00 2001 From: Anna Emese Nyiri Date: Fri, 13 Dec 2024 09:44:57 +0100 Subject: [PATCH 0447/1386] sock: Introduce SO_RCVPRIORITY socket option Add new socket option, SO_RCVPRIORITY, to include SO_PRIORITY in the ancillary data returned by recvmsg(). This is analogous to the existing support for SO_RCVMARK, as implemented in commit 6fd1d51cfa253 ("net: SO_RCVMARK socket option for SO_MARK with recvmsg()"). Reviewed-by: Willem de Bruijn Suggested-by: Ferenc Fejes Signed-off-by: Anna Emese Nyiri Link: https://patch.msgid.link/20241213084457.45120-5-annaemesenyiri@gmail.com Signed-off-by: Jakub Kicinski --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ include/net/sock.h | 4 +++- include/uapi/asm-generic/socket.h | 2 ++ net/core/sock.c | 8 ++++++++ net/socket.c | 11 +++++++++++ tools/include/uapi/asm-generic/socket.h | 2 ++ 9 files changed, 34 insertions(+), 1 deletion(-) diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 302507bf9b5d..3df5f2dd4c0f 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -148,6 +148,8 @@ #define SCM_TS_OPT_ID 81 +#define SO_RCVPRIORITY 82 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index d118d4731580..22fa8f19924a 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -159,6 +159,8 @@ #define SCM_TS_OPT_ID 81 +#define SO_RCVPRIORITY 82 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index d268d69bfcd2..aa9cd4b951fe 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -140,6 +140,8 @@ #define SCM_TS_OPT_ID 0x404C +#define SO_RCVPRIORITY 0x404D + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 113cd9f353e3..5b464a568664 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -141,6 +141,8 @@ #define SCM_TS_OPT_ID 0x005a +#define SO_RCVPRIORITY 0x005b + #if !defined(__KERNEL__) diff --git a/include/net/sock.h b/include/net/sock.h index 316a34d6c48b..d4bdd3286e03 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -953,6 +953,7 @@ enum sock_flags { SOCK_XDP, /* XDP is attached */ SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */ SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */ + SOCK_RCVPRIORITY, /* Receive SO_PRIORITY ancillary data with packet */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) @@ -2660,7 +2661,8 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, { #define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \ (1UL << SOCK_RCVTSTAMP) | \ - (1UL << SOCK_RCVMARK)) + (1UL << SOCK_RCVMARK) |\ + (1UL << SOCK_RCVPRIORITY)) #define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \ SOF_TIMESTAMPING_RAW_HARDWARE) diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index deacfd6dd197..aa5016ff3d91 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -143,6 +143,8 @@ #define SCM_TS_OPT_ID 81 +#define SO_RCVPRIORITY 82 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/net/core/sock.c b/net/core/sock.c index a3d9941c1d32..e7bcc8952248 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1519,6 +1519,10 @@ set_sndbuf: sock_valbool_flag(sk, SOCK_RCVMARK, valbool); break; + case SO_RCVPRIORITY: + sock_valbool_flag(sk, SOCK_RCVPRIORITY, valbool); + break; + case SO_RXQ_OVFL: sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); break; @@ -1947,6 +1951,10 @@ int sk_getsockopt(struct sock *sk, int level, int optname, v.val = sock_flag(sk, SOCK_RCVMARK); break; + case SO_RCVPRIORITY: + v.val = sock_flag(sk, SOCK_RCVPRIORITY); + break; + case SO_RXQ_OVFL: v.val = sock_flag(sk, SOCK_RXQ_OVFL); break; diff --git a/net/socket.c b/net/socket.c index 9a117248f18f..16402b8be5a7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1008,12 +1008,23 @@ static void sock_recv_mark(struct msghdr *msg, struct sock *sk, } } +static void sock_recv_priority(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) +{ + if (sock_flag(sk, SOCK_RCVPRIORITY) && skb) { + __u32 priority = skb->priority; + + put_cmsg(msg, SOL_SOCKET, SO_PRIORITY, sizeof(__u32), &priority); + } +} + void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { sock_recv_timestamp(msg, sk, skb); sock_recv_drops(msg, sk, skb); sock_recv_mark(msg, sk, skb); + sock_recv_priority(msg, sk, skb); } EXPORT_SYMBOL_GPL(__sock_recv_cmsgs); diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index 281df9139d2b..ffff554a5230 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -126,6 +126,8 @@ #define SCM_TS_OPT_ID 78 +#define SO_RCVPRIORITY 79 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) From b299ea0069284186b0d3d54aebe87f0d195d457a Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 13 Dec 2024 20:01:41 +0100 Subject: [PATCH 0448/1386] r8169: adjust version numbering for RTL8126 Adjust version numbering for RTL8126, so that it doesn't overlap with new RTL8125 versions. Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Link: https://patch.msgid.link/6a354364-20e9-48ad-a198-468264288757@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169.h | 4 +- drivers/net/ethernet/realtek/r8169_main.c | 62 +++++++++---------- .../net/ethernet/realtek/r8169_phy_config.c | 4 +- 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h index 8904aae41aca..00d74e76c6f2 100644 --- a/drivers/net/ethernet/realtek/r8169.h +++ b/drivers/net/ethernet/realtek/r8169.h @@ -69,8 +69,8 @@ enum mac_version { RTL_GIGA_MAC_VER_61, RTL_GIGA_MAC_VER_63, RTL_GIGA_MAC_VER_64, - RTL_GIGA_MAC_VER_65, - RTL_GIGA_MAC_VER_66, + RTL_GIGA_MAC_VER_70, + RTL_GIGA_MAC_VER_71, RTL_GIGA_MAC_NONE }; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 6934bdee2a91..d153fa5598d5 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -140,8 +140,8 @@ static const struct { /* reserve 62 for CFG_METHOD_4 in the vendor driver */ [RTL_GIGA_MAC_VER_63] = {"RTL8125B", FIRMWARE_8125B_2}, [RTL_GIGA_MAC_VER_64] = {"RTL8125D", FIRMWARE_8125D_1}, - [RTL_GIGA_MAC_VER_65] = {"RTL8126A", FIRMWARE_8126A_2}, - [RTL_GIGA_MAC_VER_66] = {"RTL8126A", FIRMWARE_8126A_3}, + [RTL_GIGA_MAC_VER_70] = {"RTL8126A", FIRMWARE_8126A_2}, + [RTL_GIGA_MAC_VER_71] = {"RTL8126A", FIRMWARE_8126A_3}, }; static const struct pci_device_id rtl8169_pci_tbl[] = { @@ -1228,7 +1228,7 @@ static void rtl_writephy(struct rtl8169_private *tp, int location, int val) case RTL_GIGA_MAC_VER_31: r8168dp_2_mdio_write(tp, location, val); break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_71: r8168g_mdio_write(tp, location, val); break; default: @@ -1243,7 +1243,7 @@ static int rtl_readphy(struct rtl8169_private *tp, int location) case RTL_GIGA_MAC_VER_28: case RTL_GIGA_MAC_VER_31: return r8168dp_2_mdio_read(tp, location); - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_71: return r8168g_mdio_read(tp, location); default: return r8169_mdio_read(tp, location); @@ -1574,7 +1574,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) break; case RTL_GIGA_MAC_VER_34: case RTL_GIGA_MAC_VER_37: - case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_71: r8169_mod_reg8_cond(tp, Config2, PME_SIGNAL, wolopts); break; default: @@ -2047,7 +2047,7 @@ static void rtl_set_eee_txidle_timer(struct rtl8169_private *tp) tp->tx_lpi_timer = timer_val; r8168_mac_ocp_write(tp, 0xe048, timer_val); break; - case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_71: tp->tx_lpi_timer = timer_val; RTL_W16(tp, EEE_TXIDLE_TIMER_8125, timer_val); break; @@ -2255,8 +2255,8 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii) enum mac_version ver; } mac_info[] = { /* 8126A family. */ - { 0x7cf, 0x64a, RTL_GIGA_MAC_VER_66 }, - { 0x7cf, 0x649, RTL_GIGA_MAC_VER_65 }, + { 0x7cf, 0x64a, RTL_GIGA_MAC_VER_71 }, + { 0x7cf, 0x649, RTL_GIGA_MAC_VER_70 }, /* 8125D family. */ { 0x7cf, 0x688, RTL_GIGA_MAC_VER_64 }, @@ -2526,7 +2526,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_61: RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST); break; - case RTL_GIGA_MAC_VER_63 ... RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_63 ... RTL_GIGA_MAC_VER_71: RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST | RX_PAUSE_SLOT_ON); break; @@ -2658,7 +2658,7 @@ static void rtl_wait_txrx_fifo_empty(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_61: rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42); break; - case RTL_GIGA_MAC_VER_63 ... RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_63 ... RTL_GIGA_MAC_VER_71: RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq); rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42); rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond_2, 100, 42); @@ -2901,7 +2901,7 @@ static void rtl_enable_exit_l1(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_37 ... RTL_GIGA_MAC_VER_38: rtl_eri_set_bits(tp, 0xd4, 0x0c00); break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_71: r8168_mac_ocp_modify(tp, 0xc0ac, 0, 0x1f80); break; default: @@ -2915,7 +2915,7 @@ static void rtl_disable_exit_l1(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38: rtl_eri_clear_bits(tp, 0xd4, 0x1f00); break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_71: r8168_mac_ocp_modify(tp, 0xc0ac, 0x1f80, 0); break; default: @@ -2941,8 +2941,8 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) rtl_mod_config5(tp, 0, ASPM_en); switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_65: - case RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_70: + case RTL_GIGA_MAC_VER_71: val8 = RTL_R8(tp, INT_CFG0_8125) | INT_CFG0_CLKREQEN; RTL_W8(tp, INT_CFG0_8125, val8); break; @@ -2953,7 +2953,7 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) switch (tp->mac_version) { case RTL_GIGA_MAC_VER_46 ... RTL_GIGA_MAC_VER_48: - case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_71: /* reset ephy tx/rx disable timer */ r8168_mac_ocp_modify(tp, 0xe094, 0xff00, 0); /* chip can trigger L1.2 */ @@ -2965,7 +2965,7 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) } else { switch (tp->mac_version) { case RTL_GIGA_MAC_VER_46 ... RTL_GIGA_MAC_VER_48: - case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_71: r8168_mac_ocp_modify(tp, 0xe092, 0x00ff, 0); break; default: @@ -2973,8 +2973,8 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable) } switch (tp->mac_version) { - case RTL_GIGA_MAC_VER_65: - case RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_70: + case RTL_GIGA_MAC_VER_71: val8 = RTL_R8(tp, INT_CFG0_8125) & ~INT_CFG0_CLKREQEN; RTL_W8(tp, INT_CFG0_8125, val8); break; @@ -3694,12 +3694,12 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp) /* disable new tx descriptor format */ r8168_mac_ocp_modify(tp, 0xeb58, 0x0001, 0x0000); - if (tp->mac_version == RTL_GIGA_MAC_VER_65 || - tp->mac_version == RTL_GIGA_MAC_VER_66) + if (tp->mac_version == RTL_GIGA_MAC_VER_70 || + tp->mac_version == RTL_GIGA_MAC_VER_71) RTL_W8(tp, 0xD8, RTL_R8(tp, 0xD8) & ~0x02); - if (tp->mac_version == RTL_GIGA_MAC_VER_65 || - tp->mac_version == RTL_GIGA_MAC_VER_66) + if (tp->mac_version == RTL_GIGA_MAC_VER_70 || + tp->mac_version == RTL_GIGA_MAC_VER_71) r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0400); else if (tp->mac_version == RTL_GIGA_MAC_VER_63) r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0200); @@ -3717,8 +3717,8 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp) r8168_mac_ocp_modify(tp, 0xe056, 0x00f0, 0x0030); r8168_mac_ocp_modify(tp, 0xe040, 0x1000, 0x0000); r8168_mac_ocp_modify(tp, 0xea1c, 0x0003, 0x0001); - if (tp->mac_version == RTL_GIGA_MAC_VER_65 || - tp->mac_version == RTL_GIGA_MAC_VER_66) + if (tp->mac_version == RTL_GIGA_MAC_VER_70 || + tp->mac_version == RTL_GIGA_MAC_VER_71) r8168_mac_ocp_modify(tp, 0xea1c, 0x0300, 0x0000); else r8168_mac_ocp_modify(tp, 0xea1c, 0x0004, 0x0000); @@ -3837,8 +3837,8 @@ static void rtl_hw_config(struct rtl8169_private *tp) [RTL_GIGA_MAC_VER_61] = rtl_hw_start_8125a_2, [RTL_GIGA_MAC_VER_63] = rtl_hw_start_8125b, [RTL_GIGA_MAC_VER_64] = rtl_hw_start_8125d, - [RTL_GIGA_MAC_VER_65] = rtl_hw_start_8126a, - [RTL_GIGA_MAC_VER_66] = rtl_hw_start_8126a, + [RTL_GIGA_MAC_VER_70] = rtl_hw_start_8126a, + [RTL_GIGA_MAC_VER_71] = rtl_hw_start_8126a, }; if (hw_configs[tp->mac_version]) @@ -3859,8 +3859,8 @@ static void rtl_hw_start_8125(struct rtl8169_private *tp) RTL_W32(tp, i, 0); break; case RTL_GIGA_MAC_VER_63: - case RTL_GIGA_MAC_VER_65: - case RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_70: + case RTL_GIGA_MAC_VER_71: for (i = 0xa00; i < 0xa80; i += 4) RTL_W32(tp, i, 0); RTL_W16(tp, INT_CFG1_8125, 0x0000); @@ -4092,7 +4092,7 @@ static void rtl8169_cleanup(struct rtl8169_private *tp) RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq); rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666); break; - case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_71: rtl_enable_rxdvgate(tp); fsleep(2000); break; @@ -4249,7 +4249,7 @@ static unsigned int rtl_quirk_packet_padto(struct rtl8169_private *tp, switch (tp->mac_version) { case RTL_GIGA_MAC_VER_34: - case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_71: padto = max_t(unsigned int, padto, ETH_ZLEN); break; default: @@ -5267,7 +5267,7 @@ static void rtl_hw_initialize(struct rtl8169_private *tp) case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_48: rtl_hw_init_8168g(tp); break; - case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_66: + case RTL_GIGA_MAC_VER_61 ... RTL_GIGA_MAC_VER_71: rtl_hw_init_8125(tp); break; default: diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c index b28b30390e84..bc498ea78034 100644 --- a/drivers/net/ethernet/realtek/r8169_phy_config.c +++ b/drivers/net/ethernet/realtek/r8169_phy_config.c @@ -1162,8 +1162,8 @@ void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev, [RTL_GIGA_MAC_VER_61] = rtl8125a_2_hw_phy_config, [RTL_GIGA_MAC_VER_63] = rtl8125b_hw_phy_config, [RTL_GIGA_MAC_VER_64] = rtl8125d_hw_phy_config, - [RTL_GIGA_MAC_VER_65] = rtl8126a_hw_phy_config, - [RTL_GIGA_MAC_VER_66] = rtl8126a_hw_phy_config, + [RTL_GIGA_MAC_VER_70] = rtl8126a_hw_phy_config, + [RTL_GIGA_MAC_VER_71] = rtl8126a_hw_phy_config, }; if (phy_configs[ver]) From b3593df26ab19f114d613693fa8a92ab202803d0 Mon Sep 17 00:00:00 2001 From: ChunHao Lin Date: Fri, 13 Dec 2024 20:02:58 +0100 Subject: [PATCH 0449/1386] r8169: add support for RTL8125D rev.b Add support for RTL8125D rev.b. Its XID is 0x689. It is basically based on the one with XID 0x688, but with different firmware file. Signed-off-by: ChunHao Lin [hkallweit1@gmail.com: rebased after adjusted version numbering] Signed-off-by: Heiner Kallweit Reviewed-by: Simon Horman Link: https://patch.msgid.link/75e5e9ec-d01f-43ac-b0f4-e7456baf18d1@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/realtek/r8169.h | 1 + drivers/net/ethernet/realtek/r8169_main.c | 6 ++++++ drivers/net/ethernet/realtek/r8169_phy_config.c | 1 + 3 files changed, 8 insertions(+) diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h index 00d74e76c6f2..e0817f2a311a 100644 --- a/drivers/net/ethernet/realtek/r8169.h +++ b/drivers/net/ethernet/realtek/r8169.h @@ -69,6 +69,7 @@ enum mac_version { RTL_GIGA_MAC_VER_61, RTL_GIGA_MAC_VER_63, RTL_GIGA_MAC_VER_64, + RTL_GIGA_MAC_VER_65, RTL_GIGA_MAC_VER_70, RTL_GIGA_MAC_VER_71, RTL_GIGA_MAC_NONE diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index d153fa5598d5..5724f650f9c6 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -57,6 +57,7 @@ #define FIRMWARE_8125A_3 "rtl_nic/rtl8125a-3.fw" #define FIRMWARE_8125B_2 "rtl_nic/rtl8125b-2.fw" #define FIRMWARE_8125D_1 "rtl_nic/rtl8125d-1.fw" +#define FIRMWARE_8125D_2 "rtl_nic/rtl8125d-2.fw" #define FIRMWARE_8126A_2 "rtl_nic/rtl8126a-2.fw" #define FIRMWARE_8126A_3 "rtl_nic/rtl8126a-3.fw" @@ -140,6 +141,7 @@ static const struct { /* reserve 62 for CFG_METHOD_4 in the vendor driver */ [RTL_GIGA_MAC_VER_63] = {"RTL8125B", FIRMWARE_8125B_2}, [RTL_GIGA_MAC_VER_64] = {"RTL8125D", FIRMWARE_8125D_1}, + [RTL_GIGA_MAC_VER_65] = {"RTL8125D", FIRMWARE_8125D_2}, [RTL_GIGA_MAC_VER_70] = {"RTL8126A", FIRMWARE_8126A_2}, [RTL_GIGA_MAC_VER_71] = {"RTL8126A", FIRMWARE_8126A_3}, }; @@ -706,6 +708,7 @@ MODULE_FIRMWARE(FIRMWARE_8107E_2); MODULE_FIRMWARE(FIRMWARE_8125A_3); MODULE_FIRMWARE(FIRMWARE_8125B_2); MODULE_FIRMWARE(FIRMWARE_8125D_1); +MODULE_FIRMWARE(FIRMWARE_8125D_2); MODULE_FIRMWARE(FIRMWARE_8126A_2); MODULE_FIRMWARE(FIRMWARE_8126A_3); @@ -2259,6 +2262,7 @@ static enum mac_version rtl8169_get_mac_version(u16 xid, bool gmii) { 0x7cf, 0x649, RTL_GIGA_MAC_VER_70 }, /* 8125D family. */ + { 0x7cf, 0x689, RTL_GIGA_MAC_VER_65 }, { 0x7cf, 0x688, RTL_GIGA_MAC_VER_64 }, /* 8125B family. */ @@ -3837,6 +3841,7 @@ static void rtl_hw_config(struct rtl8169_private *tp) [RTL_GIGA_MAC_VER_61] = rtl_hw_start_8125a_2, [RTL_GIGA_MAC_VER_63] = rtl_hw_start_8125b, [RTL_GIGA_MAC_VER_64] = rtl_hw_start_8125d, + [RTL_GIGA_MAC_VER_65] = rtl_hw_start_8125d, [RTL_GIGA_MAC_VER_70] = rtl_hw_start_8126a, [RTL_GIGA_MAC_VER_71] = rtl_hw_start_8126a, }; @@ -3855,6 +3860,7 @@ static void rtl_hw_start_8125(struct rtl8169_private *tp) switch (tp->mac_version) { case RTL_GIGA_MAC_VER_61: case RTL_GIGA_MAC_VER_64: + case RTL_GIGA_MAC_VER_65: for (i = 0xa00; i < 0xb00; i += 4) RTL_W32(tp, i, 0); break; diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c index bc498ea78034..968c8a2185a4 100644 --- a/drivers/net/ethernet/realtek/r8169_phy_config.c +++ b/drivers/net/ethernet/realtek/r8169_phy_config.c @@ -1162,6 +1162,7 @@ void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev, [RTL_GIGA_MAC_VER_61] = rtl8125a_2_hw_phy_config, [RTL_GIGA_MAC_VER_63] = rtl8125b_hw_phy_config, [RTL_GIGA_MAC_VER_64] = rtl8125d_hw_phy_config, + [RTL_GIGA_MAC_VER_65] = rtl8125d_hw_phy_config, [RTL_GIGA_MAC_VER_70] = rtl8126a_hw_phy_config, [RTL_GIGA_MAC_VER_71] = rtl8126a_hw_phy_config, }; From bf2a5a622a50b47560e4b29387b2fd4cfb33881a Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Mon, 16 Dec 2024 19:37:12 +0100 Subject: [PATCH 0450/1386] batman-adv: Map VID 0 to untagged TT VLAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VID 0 is not a valid VLAN according to "802.1Q-2011" "Table 9-2—Reserved VID values". It is only used to indicate "priority tag" frames which only contain priority information and no VID. The 8021q is also redirecting the priority tagged frames to the underlying interface since commit ad1afb003939 ("vlan_dev: VLAN 0 should be treated as "no vlan tag" (802.1p packet)"). But at the same time, it automatically adds the VID 0 to all devices to ensure that VID 0 is in the allowed list of the HW filter. This resulted in a VLAN 0 which was always announced in OGM messages. batman-adv should therefore not create a new batadv_softif_vlan for VID 0 and handle all VID 0 related frames using the "untagged" global/local translation tables. Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Signed-off-by: Simon Wunderlich --- net/batman-adv/main.c | 7 +++++++ net/batman-adv/soft-interface.c | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 8e0f44c71696..333e947afcce 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -637,6 +637,13 @@ unsigned short batadv_get_vid(struct sk_buff *skb, size_t header_len) vhdr = (struct vlan_ethhdr *)(skb->data + header_len); vid = ntohs(vhdr->h_vlan_TCI) & VLAN_VID_MASK; + + /* VID 0 is only used to indicate "priority tag" frames which only + * contain priority information and no VID. + */ + if (vid == 0) + return BATADV_NO_FLAGS; + vid |= BATADV_VLAN_HAS_TAG; return vid; diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 5666c268cead..822d788a5f86 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -637,6 +637,14 @@ static int batadv_interface_add_vid(struct net_device *dev, __be16 proto, if (proto != htons(ETH_P_8021Q)) return -EINVAL; + /* VID 0 is only used to indicate "priority tag" frames which only + * contain priority information and no VID. No management structures + * should be created for this VID and it should be handled like an + * untagged frame. + */ + if (vid == 0) + return 0; + vid |= BATADV_VLAN_HAS_TAG; /* if a new vlan is getting created and it already exists, it means that @@ -684,6 +692,12 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto, if (proto != htons(ETH_P_8021Q)) return -EINVAL; + /* "priority tag" frames are handled like "untagged" frames + * and no softif_vlan needs to be destroyed + */ + if (vid == 0) + return 0; + vlan = batadv_softif_vlan_get(bat_priv, vid | BATADV_VLAN_HAS_TAG); if (!vlan) return -ENOENT; From 34c899af6c1a9d65aa85c765b2eecb1b9a88e8b8 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:39 +0900 Subject: [PATCH 0451/1386] af_unix: Set error only when needed in unix_stream_connect(). We will introduce skb drop reason for AF_UNIX, then we need to set an errno and a drop reason for each path. Let's set an error only when it's needed in unix_stream_connect(). Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 6b1762300443..23f419f561b8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1575,12 +1575,12 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto out; } - err = -ENOMEM; - /* Allocate skb for sending to listening sock */ skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); - if (skb == NULL) + if (!skb) { + err = -ENOMEM; goto out; + } restart: /* Find listening sock. */ @@ -1600,16 +1600,17 @@ restart: goto restart; } - err = -ECONNREFUSED; - if (other->sk_state != TCP_LISTEN) - goto out_unlock; - if (other->sk_shutdown & RCV_SHUTDOWN) + if (other->sk_state != TCP_LISTEN || + other->sk_shutdown & RCV_SHUTDOWN) { + err = -ECONNREFUSED; goto out_unlock; + } if (unix_recvq_full_lockless(other)) { - err = -EAGAIN; - if (!timeo) + if (!timeo) { + err = -EAGAIN; goto out_unlock; + } timeo = unix_wait_for_peer(other, timeo); From e26ee0a736bd949ce6fa51829fd0a2f6381391de Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:40 +0900 Subject: [PATCH 0452/1386] af_unix: Clean up error paths in unix_stream_connect(). The label order is weird in unix_stream_connect(), and all NULL checks are unnecessary if reordered. Let's clean up the error paths to make it easy to set a drop reason for each path. While at it, a comment with the old style is updated. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 23f419f561b8..21e17e739f88 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1563,15 +1563,14 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); /* First of all allocate resources. - If we will make it after state is locked, - we will have to recheck all again in any case. + * If we will make it after state is locked, + * we will have to recheck all again in any case. */ /* create new sock for complete connection */ newsk = unix_create1(net, NULL, 0, sock->type); if (IS_ERR(newsk)) { err = PTR_ERR(newsk); - newsk = NULL; goto out; } @@ -1579,7 +1578,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL); if (!skb) { err = -ENOMEM; - goto out; + goto out_free_sk; } restart: @@ -1587,8 +1586,7 @@ restart: other = unix_find_other(net, sunaddr, addr_len, sk->sk_type); if (IS_ERR(other)) { err = PTR_ERR(other); - other = NULL; - goto out; + goto out_free_skb; } unix_state_lock(other); @@ -1613,11 +1611,12 @@ restart: } timeo = unix_wait_for_peer(other, timeo); + sock_put(other); err = sock_intr_errno(timeo); if (signal_pending(current)) - goto out; - sock_put(other); + goto out_free_skb; + goto restart; } @@ -1702,15 +1701,13 @@ restart: return 0; out_unlock: - if (other) - unix_state_unlock(other); - -out: + unix_state_unlock(other); + sock_put(other); +out_free_skb: kfree_skb(skb); - if (newsk) - unix_release_sock(newsk, 0); - if (other) - sock_put(other); +out_free_sk: + unix_release_sock(newsk, 0); +out: return err; } From 6c444255b193b5b9c5a18c3784d960e10e1833a2 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:41 +0900 Subject: [PATCH 0453/1386] af_unix: Set error only when needed in unix_stream_sendmsg(). We will introduce skb drop reason for AF_UNIX, then we need to set an errno and a drop reason for each path. Let's set an error only when it's needed in unix_stream_sendmsg(). Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 21e17e739f88..660d8b8130ca 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2254,8 +2254,8 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, wait_for_unix_gc(scm.fp); - err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) { + err = -EOPNOTSUPP; #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (len) len--; @@ -2268,10 +2268,11 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out_err; } else { - err = -ENOTCONN; other = unix_peer(sk); - if (!other) + if (!other) { + err = -ENOTCONN; goto out_err; + } } if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) From d460b04bc452cf15810b79c15381fffd9d201915 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:42 +0900 Subject: [PATCH 0454/1386] af_unix: Clean up error paths in unix_stream_sendmsg(). If we move send_sig() to the SEND_SHUTDOWN check before the while loop, then we can reuse the same kfree_skb() after the pipe_err_free label. Let's gather the scattered kfree_skb()s in error paths. While at it, some style issues are fixed, and the pipe_err_free label is renamed to out_pipe to match other label names. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 660d8b8130ca..d30bcd50527e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2275,8 +2275,13 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, } } - if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) - goto pipe_err; + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) { + if (!(msg->msg_flags & MSG_NOSIGNAL)) + send_sig(SIGPIPE, current, 0); + + err = -EPIPE; + goto out_err; + } while (sent < len) { size = len - sent; @@ -2305,20 +2310,18 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, /* Only send the fds in the first buffer */ err = unix_scm_to_skb(&scm, skb, !fds_sent); - if (err < 0) { - kfree_skb(skb); - goto out_err; - } + if (err < 0) + goto out_free; + fds_sent = true; if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) { skb->ip_summed = CHECKSUM_UNNECESSARY; err = skb_splice_from_iter(skb, &msg->msg_iter, size, sk->sk_allocation); - if (err < 0) { - kfree_skb(skb); - goto out_err; - } + if (err < 0) + goto out_free; + size = err; refcount_add(size, &sk->sk_wmem_alloc); } else { @@ -2326,17 +2329,15 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, skb->data_len = data_len; skb->len = size; err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); - if (err) { - kfree_skb(skb); - goto out_err; - } + if (err) + goto out_free; } unix_state_lock(other); if (sock_flag(other, SOCK_DEAD) || (other->sk_shutdown & RCV_SHUTDOWN)) - goto pipe_err_free; + goto out_pipe; maybe_add_creds(skb, sock, other); scm_stat_add(other, skb); @@ -2359,13 +2360,13 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, return sent; -pipe_err_free: +out_pipe: unix_state_unlock(other); - kfree_skb(skb); -pipe_err: - if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL)) + if (!sent && !(msg->msg_flags & MSG_NOSIGNAL)) send_sig(SIGPIPE, current, 0); err = -EPIPE; +out_free: + kfree_skb(skb); out_err: scm_destroy(&scm); return sent ? : err; From 001a25088c35ab69bd4b2f208e47eb8acbce6353 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:43 +0900 Subject: [PATCH 0455/1386] af_unix: Set error only when needed in unix_dgram_sendmsg(). We will introduce skb drop reason for AF_UNIX, then we need to set an errno and a drop reason for each path. Let's set an error only when it's needed in unix_dgram_sendmsg(). Then, we need not (re)set 0 to err. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d30bcd50527e..07d6fba99a7c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1978,9 +1978,10 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, wait_for_unix_gc(scm.fp); - err = -EOPNOTSUPP; - if (msg->msg_flags&MSG_OOB) + if (msg->msg_flags & MSG_OOB) { + err = -EOPNOTSUPP; goto out; + } if (msg->msg_namelen) { err = unix_validate_addr(sunaddr, msg->msg_namelen); @@ -1995,10 +1996,11 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } else { sunaddr = NULL; - err = -ENOTCONN; other = unix_peer_get(sk); - if (!other) + if (!other) { + err = -ENOTCONN; goto out; + } } if ((test_bit(SOCK_PASSCRED, &sock->flags) || @@ -2009,9 +2011,10 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } - err = -EMSGSIZE; - if (len > READ_ONCE(sk->sk_sndbuf) - 32) + if (len > READ_ONCE(sk->sk_sndbuf) - 32) { + err = -EMSGSIZE; goto out; + } if (len > SKB_MAX_ALLOC) { data_len = min_t(size_t, @@ -2043,9 +2046,10 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, restart: if (!other) { - err = -ECONNRESET; - if (sunaddr == NULL) + if (!sunaddr) { + err = -ECONNRESET; goto out_free; + } other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, sk->sk_type); @@ -2065,9 +2069,11 @@ restart: sk_locked = 0; unix_state_lock(other); restart_locked: - err = -EPERM; - if (!unix_may_send(sk, other)) + + if (!unix_may_send(sk, other)) { + err = -EPERM; goto out_unlock; + } if (unlikely(sock_flag(other, SOCK_DEAD))) { /* @@ -2080,7 +2086,6 @@ restart_locked: if (!sk_locked) unix_state_lock(sk); - err = 0; if (sk->sk_type == SOCK_SEQPACKET) { /* We are here only when racing with unix_release_sock() * is clearing @other. Never change state to TCP_CLOSE @@ -2108,9 +2113,10 @@ restart_locked: goto restart; } - err = -EPIPE; - if (other->sk_shutdown & RCV_SHUTDOWN) + if (other->sk_shutdown & RCV_SHUTDOWN) { + err = -EPIPE; goto out_unlock; + } if (sk->sk_type != SOCK_SEQPACKET) { err = security_unix_may_send(sk->sk_socket, other->sk_socket); From f4dd63165b08ba3b72117973d5daea456f36377d Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:44 +0900 Subject: [PATCH 0456/1386] af_unix: Move !sunaddr case in unix_dgram_sendmsg(). When other is NULL in unix_dgram_sendmsg(), we check if sunaddr is NULL before looking up a receiver socket. There are three paths going through the check, but it's always false for 2 out of the 3 paths: the first socket lookup and the second 'goto restart'. The condition can be true for the first 'goto restart' only when SOCK_DEAD is flagged for the socket found with msg->msg_name. Let's move the check to the single appropriate path. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 07d6fba99a7c..111f95384990 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2046,11 +2046,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, restart: if (!other) { - if (!sunaddr) { - err = -ECONNRESET; - goto out_free; - } - other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, sk->sk_type); if (IS_ERR(other)) { @@ -2105,6 +2100,9 @@ restart_locked: err = -ECONNREFUSED; } else { unix_state_unlock(sk); + + if (!sunaddr) + err = -ECONNRESET; } other = NULL; From 3c05329a2abe312ed85a60a325b930063f61e817 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:45 +0900 Subject: [PATCH 0457/1386] af_unix: Use msg->{msg_name,msg_namelen} in unix_dgram_sendmsg(). In unix_dgram_sendmsg(), we use a local variable sunaddr pointing NULL or msg->msg_name based on msg->msg_namelen. Let's remove sunaddr and simplify the usage. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 111f95384990..ae74fdcf5dcd 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1962,7 +1962,6 @@ static void scm_stat_del(struct sock *sk, struct sk_buff *skb) static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { - DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); struct sock *sk = sock->sk, *other = NULL; struct unix_sock *u = unix_sk(sk); struct scm_cookie scm; @@ -1984,7 +1983,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, } if (msg->msg_namelen) { - err = unix_validate_addr(sunaddr, msg->msg_namelen); + err = unix_validate_addr(msg->msg_name, msg->msg_namelen); if (err) goto out; @@ -1995,7 +1994,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, if (err) goto out; } else { - sunaddr = NULL; other = unix_peer_get(sk); if (!other) { err = -ENOTCONN; @@ -2046,8 +2044,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, restart: if (!other) { - other = unix_find_other(sock_net(sk), sunaddr, msg->msg_namelen, - sk->sk_type); + other = unix_find_other(sock_net(sk), msg->msg_name, + msg->msg_namelen, sk->sk_type); if (IS_ERR(other)) { err = PTR_ERR(other); other = NULL; @@ -2101,7 +2099,7 @@ restart_locked: } else { unix_state_unlock(sk); - if (!sunaddr) + if (!msg->msg_namelen) err = -ECONNRESET; } From a700b43358ccc3c5ae857eeea37ff50ce0529b1c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:46 +0900 Subject: [PATCH 0458/1386] af_unix: Split restart label in unix_dgram_sendmsg(). There are two paths jumping to the restart label in unix_dgram_sendmsg(). One requires another lookup and sk_filter(), but the other doesn't. Let's split the label to make each flow more straightforward. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ae74fdcf5dcd..513d0fd12e6a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2042,8 +2042,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); -restart: if (!other) { +lookup: other = unix_find_other(sock_net(sk), msg->msg_name, msg->msg_namelen, sk->sk_type); if (IS_ERR(other)) { @@ -2059,6 +2059,7 @@ restart: goto out_free; } +restart: sk_locked = 0; unix_state_lock(other); restart_locked: @@ -2106,7 +2107,8 @@ restart_locked: other = NULL; if (err) goto out_free; - goto restart; + + goto lookup; } if (other->sk_shutdown & RCV_SHUTDOWN) { From 689c398885cc27d2a5bb2ad5d70324107d4a78ec Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:47 +0900 Subject: [PATCH 0459/1386] af_unix: Defer sock_put() to clean up path in unix_dgram_sendmsg(). When other has SOCK_DEAD in unix_dgram_sendmsg(), we call sock_put() for it first and then set NULL to other before jumping to the error path. This is to skip sock_put() in the error path. Let's not set NULL to other and defer the sock_put() to the error path to clean up the labels later. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 513d0fd12e6a..b8adfb41d11b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2075,7 +2075,6 @@ restart_locked: * datagram error */ unix_state_unlock(other); - sock_put(other); if (!sk_locked) unix_state_lock(sk); @@ -2104,7 +2103,6 @@ restart_locked: err = -ECONNRESET; } - other = NULL; if (err) goto out_free; From 106d979b85e575b0ab10224fcde5c3eb94566e05 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:48 +0900 Subject: [PATCH 0460/1386] af_unix: Clean up SOCK_DEAD error paths in unix_dgram_sendmsg(). When other has SOCK_DEAD in unix_dgram_sendmsg(), we hold unix_state_lock() for the sender socket first. However, we do not need it for sk->sk_type. Let's move the lock down a bit. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index b8adfb41d11b..22c689b0044f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2070,23 +2070,23 @@ restart_locked: } if (unlikely(sock_flag(other, SOCK_DEAD))) { - /* - * Check with 1003.1g - what should - * datagram error - */ - unix_state_unlock(other); + /* Check with 1003.1g - what should datagram error */ - if (!sk_locked) - unix_state_lock(sk); + unix_state_unlock(other); if (sk->sk_type == SOCK_SEQPACKET) { /* We are here only when racing with unix_release_sock() * is clearing @other. Never change state to TCP_CLOSE * unlike SOCK_DGRAM wants. */ - unix_state_unlock(sk); err = -EPIPE; - } else if (unix_peer(sk) == other) { + goto out_free; + } + + if (!sk_locked) + unix_state_lock(sk); + + if (unix_peer(sk) == other) { unix_peer(sk) = NULL; unix_dgram_peer_wake_disconnect_wakeup(sk, other); @@ -2096,15 +2096,15 @@ restart_locked: unix_dgram_disconnected(sk, other); sock_put(other); err = -ECONNREFUSED; - } else { - unix_state_unlock(sk); - - if (!msg->msg_namelen) - err = -ECONNRESET; + goto out_free; } - if (err) + unix_state_unlock(sk); + + if (!msg->msg_namelen) { + err = -ECONNRESET; goto out_free; + } goto lookup; } From 62c6db251e667e8a240dc8209c00313240120fd6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:49 +0900 Subject: [PATCH 0461/1386] af_unix: Clean up error paths in unix_dgram_sendmsg(). The error path is complicated in unix_dgram_sendmsg() because there are two timings when other could be non-NULL: when it's fetched from unix_peer_get() and when it's looked up by unix_find_other(). Let's move unix_peer_get() to the else branch for unix_find_other() and clean up the error paths. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 22c689b0044f..239ce2f77d55 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1993,12 +1993,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, NULL); if (err) goto out; - } else { - other = unix_peer_get(sk); - if (!other) { - err = -ENOTCONN; - goto out; - } } if ((test_bit(SOCK_PASSCRED, &sock->flags) || @@ -2026,7 +2020,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, skb = sock_alloc_send_pskb(sk, len - data_len, data_len, msg->msg_flags & MSG_DONTWAIT, &err, PAGE_ALLOC_COSTLY_ORDER); - if (skb == NULL) + if (!skb) goto out; err = unix_scm_to_skb(&scm, skb, true); @@ -2042,13 +2036,18 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); - if (!other) { + if (msg->msg_namelen) { lookup: other = unix_find_other(sock_net(sk), msg->msg_name, msg->msg_namelen, sk->sk_type); if (IS_ERR(other)) { err = PTR_ERR(other); - other = NULL; + goto out_free; + } + } else { + other = unix_peer_get(sk); + if (!other) { + err = -ENOTCONN; goto out_free; } } @@ -2056,7 +2055,7 @@ lookup: if (sk_filter(other, skb) < 0) { /* Toss the packet but do not return any error to the sender */ err = len; - goto out_free; + goto out_sock_put; } restart: @@ -2080,7 +2079,7 @@ restart_locked: * unlike SOCK_DGRAM wants. */ err = -EPIPE; - goto out_free; + goto out_sock_put; } if (!sk_locked) @@ -2096,14 +2095,14 @@ restart_locked: unix_dgram_disconnected(sk, other); sock_put(other); err = -ECONNREFUSED; - goto out_free; + goto out_sock_put; } unix_state_unlock(sk); if (!msg->msg_namelen) { err = -ECONNRESET; - goto out_free; + goto out_sock_put; } goto lookup; @@ -2132,7 +2131,7 @@ restart_locked: err = sock_intr_errno(timeo); if (signal_pending(current)) - goto out_free; + goto out_sock_put; goto restart; } @@ -2173,11 +2172,11 @@ out_unlock: if (sk_locked) unix_state_unlock(sk); unix_state_unlock(other); +out_sock_put: + sock_put(other); out_free: kfree_skb(skb); out: - if (other) - sock_put(other); scm_destroy(&scm); return err; } From bf61ffeb9cc48ee7d1945f26578291da5d9305e4 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Fri, 13 Dec 2024 20:08:50 +0900 Subject: [PATCH 0462/1386] af_unix: Remove unix_our_peer(). unix_our_peer() is used only in unix_may_send(). Let's inline it in unix_may_send(). Signed-off-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- net/unix/af_unix.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 239ce2f77d55..8f2b605ce5b3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -286,14 +286,9 @@ static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) } #endif /* CONFIG_SECURITY_NETWORK */ -static inline int unix_our_peer(struct sock *sk, struct sock *osk) -{ - return unix_peer(osk) == sk; -} - static inline int unix_may_send(struct sock *sk, struct sock *osk) { - return unix_peer(osk) == NULL || unix_our_peer(sk, osk); + return !unix_peer(osk) || unix_peer(osk) == sk; } static inline int unix_recvq_full_lockless(const struct sock *sk) From ff9f17ce2e53887e74fc0e72711ece42526836ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 14 Dec 2024 17:50:59 +0100 Subject: [PATCH 0463/1386] net/sched: Add drop reasons for AQM-based qdiscs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that we have generic QDISC_CONGESTED and QDISC_OVERLIMIT drop reasons, let's have all the qdiscs that contain an AQM apply them consistently when dropping packets. Signed-off-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241214-fq-codel-drop-reasons-v1-1-2a814e884c37@redhat.com Signed-off-by: Paolo Abeni --- net/sched/sch_codel.c | 5 +++-- net/sched/sch_fq_codel.c | 3 ++- net/sched/sch_fq_pie.c | 6 ++++-- net/sched/sch_gred.c | 4 ++-- net/sched/sch_pie.c | 5 ++++- net/sched/sch_red.c | 4 +++- net/sched/sch_sfb.c | 4 +++- 7 files changed, 21 insertions(+), 10 deletions(-) diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 3e8d4fe4d91e..81189d02fee7 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -52,7 +52,7 @@ static void drop_func(struct sk_buff *skb, void *ctx) { struct Qdisc *sch = ctx; - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_CONGESTED); qdisc_qstats_drop(sch); } @@ -89,7 +89,8 @@ static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, } q = qdisc_priv(sch); q->drop_overlimit++; - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, + SKB_DROP_REASON_QDISC_OVERLIMIT); } static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = { diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 4f908c11ba95..799f5397ad4c 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -168,6 +168,7 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, skb = dequeue_head(flow); len += qdisc_pkt_len(skb); mem += get_codel_cb(skb)->mem_usage; + tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_OVERLIMIT); __qdisc_drop(skb, to_free); } while (++i < max_packets && len < threshold); @@ -274,7 +275,7 @@ static void drop_func(struct sk_buff *skb, void *ctx) { struct Qdisc *sch = ctx; - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_QDISC_CONGESTED); qdisc_qstats_drop(sch); } diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index c38f33ff80bd..93c36afbf576 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -130,6 +130,7 @@ static inline void flow_queue_add(struct fq_pie_flow *flow, static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_OVERLIMIT; struct fq_pie_sched_data *q = qdisc_priv(sch); struct fq_pie_flow *sel_flow; int ret; @@ -161,6 +162,8 @@ static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->overmemory++; } + reason = SKB_DROP_REASON_QDISC_CONGESTED; + if (!pie_drop_early(sch, &q->p_params, &sel_flow->vars, sel_flow->backlog, skb->len)) { enqueue = true; @@ -198,8 +201,7 @@ static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, out: q->stats.dropped++; sel_flow->vars.accu_prob = 0; - __qdisc_drop(skb, to_free); - qdisc_qstats_drop(sch); + qdisc_drop_reason(skb, sch, to_free, reason); return NET_XMIT_CN; } diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 7d2151c62c4a..ab6234b4fcd5 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -251,10 +251,10 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->stats.pdrop++; drop: - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT); congestion_drop: - qdisc_drop(skb, sch, to_free); + qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_CONGESTED); return NET_XMIT_CN; } diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index b3dcb845b327..bb1fa9aa530b 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -85,6 +85,7 @@ EXPORT_SYMBOL_GPL(pie_drop_early); static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_OVERLIMIT; struct pie_sched_data *q = qdisc_priv(sch); bool enqueue = false; @@ -93,6 +94,8 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, goto out; } + reason = SKB_DROP_REASON_QDISC_CONGESTED; + if (!pie_drop_early(sch, &q->params, &q->vars, sch->qstats.backlog, skb->len)) { enqueue = true; @@ -121,7 +124,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, out: q->stats.dropped++; q->vars.accu_prob = 0; - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, reason); } static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = { diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 6029bc29b51e..ef8a2afed26b 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -70,6 +70,7 @@ static int red_use_nodrop(struct red_sched_data *q) static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_CONGESTED; struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; unsigned int len; @@ -107,6 +108,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, break; case RED_HARD_MARK: + reason = SKB_DROP_REASON_QDISC_OVERLIMIT; qdisc_qstats_overlimit(sch); if (red_use_harddrop(q) || !red_use_ecn(q)) { q->stats.forced_drop++; @@ -143,7 +145,7 @@ congestion_drop: if (!skb) return NET_XMIT_CN | ret; - qdisc_drop(skb, sch, to_free); + qdisc_drop_reason(skb, sch, to_free, reason); return NET_XMIT_CN; } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index b717e15a3a17..d2835f1168e1 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -280,6 +280,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { + enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_OVERLIMIT; struct sfb_sched_data *q = qdisc_priv(sch); unsigned int len = qdisc_pkt_len(skb); struct Qdisc *child = q->qdisc; @@ -380,6 +381,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, } r = get_random_u16() & SFB_MAX_PROB; + reason = SKB_DROP_REASON_QDISC_CONGESTED; if (unlikely(r < p_min)) { if (unlikely(p_min > SFB_MAX_PROB / 2)) { @@ -414,7 +416,7 @@ enqueue: return ret; drop: - qdisc_drop(skb, sch, to_free); + qdisc_drop_reason(skb, sch, to_free, reason); return NET_XMIT_CN; other_drop: if (ret & __NET_XMIT_BYPASS) From d22f955cc2cb9684dd45396f974101f288869485 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Sat, 14 Dec 2024 19:43:06 +0000 Subject: [PATCH 0464/1386] rust: net::phy scope ThisModule usage in the module_phy_driver macro Similar to the use of $crate::Module, ThisModule should be referred to as $crate::ThisModule in the macro evaluation. The reason the macro previously did not cause any errors is because all the users of the macro would use kernel::prelude::*, bringing ThisModule into scope. Signed-off-by: Rahul Rameshbabu Reviewed-by: FUJITA Tomonori Reviewed-by: Alice Ryhl Link: https://patch.msgid.link/20241214194242.19505-1-sergeantsagara@protonmail.com Signed-off-by: Paolo Abeni --- rust/kernel/net/phy.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/kernel/net/phy.rs b/rust/kernel/net/phy.rs index b89c681d97c0..00c3100f5ebd 100644 --- a/rust/kernel/net/phy.rs +++ b/rust/kernel/net/phy.rs @@ -837,7 +837,7 @@ impl DeviceMask { /// [::kernel::net::phy::create_phy_driver::()]; /// /// impl ::kernel::Module for Module { -/// fn init(module: &'static ThisModule) -> Result { +/// fn init(module: &'static ::kernel::ThisModule) -> Result { /// let drivers = unsafe { &mut DRIVERS }; /// let mut reg = ::kernel::net::phy::Registration::register( /// module, @@ -903,7 +903,7 @@ macro_rules! module_phy_driver { [$($crate::net::phy::create_phy_driver::<$driver>()),+]; impl $crate::Module for Module { - fn init(module: &'static ThisModule) -> Result { + fn init(module: &'static $crate::ThisModule) -> Result { // SAFETY: The anonymous constant guarantees that nobody else can access // the `DRIVERS` static. The array is used only in the C side. let drivers = unsafe { &mut DRIVERS }; From 20d00cfae627f048560c46ba5849011a34515103 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Mon, 16 Dec 2024 15:15:30 +0100 Subject: [PATCH 0465/1386] checkpatch: don't complain on _Generic() use Improve CamelCase recognition logic to avoid reporting on _Generic() use. Other C keywords, such as _Bool, are intentionally omitted, as those should be rather avoided in new source code. Reviewed-by: Wojciech Drewek Reviewed-by: Simon Horman Signed-off-by: Mateusz Polchlopek Acked-by: Joe Perches Signed-off-by: Przemek Kitszel Signed-off-by: Tony Nguyen --- scripts/checkpatch.pl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 9eed3683ad76..a2066a6c9dd8 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5843,6 +5843,8 @@ sub process { #CamelCase if ($var !~ /^$Constant$/ && $var =~ /[A-Z][a-z]|[a-z][A-Z]/ && +#Ignore C keywords + $var !~ /^_Generic$/ && #Ignore some autogenerated defines and enum values $var !~ /^(?:[A-Z]+_){1,5}[A-Z]{1,3}[a-z]/ && #Ignore Page variants From 346947223bacf96155f603528823a60b18b92d9a Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Mon, 16 Dec 2024 15:15:31 +0100 Subject: [PATCH 0466/1386] devlink: add devlink_fmsg_put() macro Add devlink_fmsg_put() that dispatches based on the type of the value to put, example: bool -> devlink_fmsg_bool_pair_put(). Reviewed-by: Wojciech Drewek Reviewed-by: Simon Horman Signed-off-by: Mateusz Polchlopek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Przemek Kitszel Signed-off-by: Tony Nguyen --- include/net/devlink.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index fbb9a2668e24..b5e1427ea4d7 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1261,6 +1261,17 @@ enum devlink_trap_group_generic_id { .min_burst = _min_burst, \ } +#define devlink_fmsg_put(fmsg, name, value) ( \ + _Generic((value), \ + bool : devlink_fmsg_bool_pair_put, \ + u8 : devlink_fmsg_u8_pair_put, \ + u16 : devlink_fmsg_u32_pair_put, \ + u32 : devlink_fmsg_u32_pair_put, \ + u64 : devlink_fmsg_u64_pair_put, \ + char * : devlink_fmsg_string_pair_put, \ + const char * : devlink_fmsg_string_pair_put) \ + (fmsg, name, (value))) + enum { /* device supports reload operations */ DEVLINK_F_RELOAD = 1UL << 0, From 3dbfde7f6bc7b8efff26e3e98fdd8cba20287da7 Mon Sep 17 00:00:00 2001 From: Mateusz Polchlopek Date: Mon, 16 Dec 2024 15:15:32 +0100 Subject: [PATCH 0467/1386] devlink: add devlink_fmsg_dump_skb() function Add devlink_fmsg_dump_skb() function that adds some diagnostic information about skb (like length, pkt type, MAC, etc) to devlink fmsg mechanism using bunch of devlink_fmsg_put() function calls. Signed-off-by: Mateusz Polchlopek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Przemek Kitszel Signed-off-by: Tony Nguyen --- include/net/devlink.h | 2 ++ net/devlink/health.c | 67 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index b5e1427ea4d7..58e33959c852 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1268,6 +1268,7 @@ enum devlink_trap_group_generic_id { u16 : devlink_fmsg_u32_pair_put, \ u32 : devlink_fmsg_u32_pair_put, \ u64 : devlink_fmsg_u64_pair_put, \ + int : devlink_fmsg_u32_pair_put, \ char * : devlink_fmsg_string_pair_put, \ const char * : devlink_fmsg_string_pair_put) \ (fmsg, name, (value))) @@ -2005,6 +2006,7 @@ int devlink_compat_switch_id_get(struct net_device *dev, int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port); size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port); +void devlink_fmsg_dump_skb(struct devlink_fmsg *fmsg, const struct sk_buff *skb); #else diff --git a/net/devlink/health.c b/net/devlink/health.c index b8d3084e6fe0..57db6799722a 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -1238,3 +1238,70 @@ int devlink_nl_health_reporter_test_doit(struct sk_buff *skb, return reporter->ops->test(reporter, info->extack); } + +/** + * devlink_fmsg_dump_skb - Dump sk_buffer structure + * @fmsg: devlink formatted message pointer + * @skb: pointer to skb + * + * Dump diagnostic information about sk_buff structure, like headroom, length, + * tailroom, MAC, etc. + */ +void devlink_fmsg_dump_skb(struct devlink_fmsg *fmsg, const struct sk_buff *skb) +{ + struct skb_shared_info *sh = skb_shinfo(skb); + struct sock *sk = skb->sk; + bool has_mac, has_trans; + + has_mac = skb_mac_header_was_set(skb); + has_trans = skb_transport_header_was_set(skb); + + devlink_fmsg_pair_nest_start(fmsg, "skb"); + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_put(fmsg, "actual len", skb->len); + devlink_fmsg_put(fmsg, "head len", skb_headlen(skb)); + devlink_fmsg_put(fmsg, "data len", skb->data_len); + devlink_fmsg_put(fmsg, "tail len", skb_tailroom(skb)); + devlink_fmsg_put(fmsg, "MAC", has_mac ? skb->mac_header : -1); + devlink_fmsg_put(fmsg, "MAC len", + has_mac ? skb_mac_header_len(skb) : -1); + devlink_fmsg_put(fmsg, "network hdr", skb->network_header); + devlink_fmsg_put(fmsg, "network hdr len", + has_trans ? skb_network_header_len(skb) : -1); + devlink_fmsg_put(fmsg, "transport hdr", + has_trans ? skb->transport_header : -1); + devlink_fmsg_put(fmsg, "csum", (__force u32)skb->csum); + devlink_fmsg_put(fmsg, "csum_ip_summed", (u8)skb->ip_summed); + devlink_fmsg_put(fmsg, "csum_complete_sw", !!skb->csum_complete_sw); + devlink_fmsg_put(fmsg, "csum_valid", !!skb->csum_valid); + devlink_fmsg_put(fmsg, "csum_level", (u8)skb->csum_level); + devlink_fmsg_put(fmsg, "sw_hash", !!skb->sw_hash); + devlink_fmsg_put(fmsg, "l4_hash", !!skb->l4_hash); + devlink_fmsg_put(fmsg, "proto", ntohs(skb->protocol)); + devlink_fmsg_put(fmsg, "pkt_type", (u8)skb->pkt_type); + devlink_fmsg_put(fmsg, "iif", skb->skb_iif); + + if (sk) { + devlink_fmsg_pair_nest_start(fmsg, "sk"); + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_put(fmsg, "family", sk->sk_type); + devlink_fmsg_put(fmsg, "type", sk->sk_type); + devlink_fmsg_put(fmsg, "proto", sk->sk_protocol); + devlink_fmsg_obj_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); + } + + devlink_fmsg_obj_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); + + devlink_fmsg_pair_nest_start(fmsg, "shinfo"); + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_put(fmsg, "tx_flags", sh->tx_flags); + devlink_fmsg_put(fmsg, "nr_frags", sh->nr_frags); + devlink_fmsg_put(fmsg, "gso_size", sh->gso_size); + devlink_fmsg_put(fmsg, "gso_type", sh->gso_type); + devlink_fmsg_put(fmsg, "gso_segs", sh->gso_segs); + devlink_fmsg_obj_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_dump_skb); From 2846fe5614ac15117fddaa45b86e7e77d91dd569 Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Mon, 16 Dec 2024 15:15:33 +0100 Subject: [PATCH 0468/1386] ice: rename devlink_port.[ch] to port.[ch] Drop "devlink_" prefix from files that sit in devlink/. I'm going to add more files there, and repeating "devlink" does not feel good. This is also the scheme used in most other places, most notably the devlink core files are named like that. devlink.[ch] stays as is. Reviewed-by: Kalesh AP Signed-off-by: Przemek Kitszel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/Makefile | 2 +- drivers/net/ethernet/intel/ice/devlink/devlink.c | 2 +- .../net/ethernet/intel/ice/devlink/{devlink_port.c => port.c} | 2 +- .../net/ethernet/intel/ice/devlink/{devlink_port.h => port.h} | 0 drivers/net/ethernet/intel/ice/ice_eswitch.h | 2 +- drivers/net/ethernet/intel/ice/ice_main.c | 2 +- drivers/net/ethernet/intel/ice/ice_repr.c | 2 +- drivers/net/ethernet/intel/ice/ice_sf_eth.c | 2 +- 8 files changed, 7 insertions(+), 7 deletions(-) rename drivers/net/ethernet/intel/ice/devlink/{devlink_port.c => port.c} (99%) rename drivers/net/ethernet/intel/ice/devlink/{devlink_port.h => port.h} (100%) diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 3307d551f431..56aa23aee472 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -32,7 +32,7 @@ ice-y := ice_main.o \ ice_parser_rt.o \ ice_idc.o \ devlink/devlink.o \ - devlink/devlink_port.o \ + devlink/port.o \ ice_sf_eth.o \ ice_sf_vsi_vlan_ops.o \ ice_ddp.o \ diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index 415445cefdb2..1b10682c00b8 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -6,7 +6,7 @@ #include "ice.h" #include "ice_lib.h" #include "devlink.h" -#include "devlink_port.h" +#include "port.h" #include "ice_eswitch.h" #include "ice_fw_update.h" #include "ice_dcb_lib.h" diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c b/drivers/net/ethernet/intel/ice/devlink/port.c similarity index 99% rename from drivers/net/ethernet/intel/ice/devlink/devlink_port.c rename to drivers/net/ethernet/intel/ice/devlink/port.c index c6779d9dffff..767419a67fef 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c +++ b/drivers/net/ethernet/intel/ice/devlink/port.c @@ -5,7 +5,7 @@ #include "ice.h" #include "devlink.h" -#include "devlink_port.h" +#include "port.h" #include "ice_lib.h" #include "ice_fltr.h" diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.h b/drivers/net/ethernet/intel/ice/devlink/port.h similarity index 100% rename from drivers/net/ethernet/intel/ice/devlink/devlink_port.h rename to drivers/net/ethernet/intel/ice/devlink/port.h diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h index ac7db100e2cd..5c7dcf21b222 100644 --- a/drivers/net/ethernet/intel/ice/ice_eswitch.h +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h @@ -5,7 +5,7 @@ #define _ICE_ESWITCH_H_ #include -#include "devlink/devlink_port.h" +#include "devlink/port.h" #ifdef CONFIG_ICE_SWITCHDEV void ice_eswitch_detach_vf(struct ice_pf *pf, struct ice_vf *vf); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 0ab35607e5d5..d641dd8b8184 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -14,7 +14,7 @@ #include "ice_dcb_lib.h" #include "ice_dcb_nl.h" #include "devlink/devlink.h" -#include "devlink/devlink_port.h" +#include "devlink/port.h" #include "ice_sf_eth.h" #include "ice_hwmon.h" /* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c index 970a99a52bf1..fb7a1b9a4313 100644 --- a/drivers/net/ethernet/intel/ice/ice_repr.c +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -4,7 +4,7 @@ #include "ice.h" #include "ice_eswitch.h" #include "devlink/devlink.h" -#include "devlink/devlink_port.h" +#include "devlink/port.h" #include "ice_sriov.h" #include "ice_tc_lib.h" #include "ice_dcb_lib.h" diff --git a/drivers/net/ethernet/intel/ice/ice_sf_eth.c b/drivers/net/ethernet/intel/ice/ice_sf_eth.c index 75d7147e1c01..1a2c94375ca7 100644 --- a/drivers/net/ethernet/intel/ice/ice_sf_eth.c +++ b/drivers/net/ethernet/intel/ice/ice_sf_eth.c @@ -5,8 +5,8 @@ #include "ice_txrx.h" #include "ice_fltr.h" #include "ice_sf_eth.h" -#include "devlink/devlink_port.h" #include "devlink/devlink.h" +#include "devlink/port.h" static const struct net_device_ops ice_sf_netdev_ops = { .ndo_open = ice_open, From 2a82874a3b7be3f424eb6e94cd4f225e928efe2a Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Mon, 16 Dec 2024 15:15:34 +0100 Subject: [PATCH 0469/1386] ice: add Tx hang devlink health reporter Add Tx hang devlink health reporter, see struct ice_tx_hang_event to see what exactly is reported. For now dump descriptors with little metadata and skb diagnostic information. Reviewed-by: Igor Bagnucki Reviewed-by: Wojciech Drewek Co-developed-by: Mateusz Polchlopek Signed-off-by: Mateusz Polchlopek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Przemek Kitszel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/Makefile | 1 + .../net/ethernet/intel/ice/devlink/health.c | 192 ++++++++++++++++++ .../net/ethernet/intel/ice/devlink/health.h | 47 +++++ drivers/net/ethernet/intel/ice/ice.h | 2 + drivers/net/ethernet/intel/ice/ice_main.c | 18 +- 5 files changed, 255 insertions(+), 5 deletions(-) create mode 100644 drivers/net/ethernet/intel/ice/devlink/health.c create mode 100644 drivers/net/ethernet/intel/ice/devlink/health.h diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 56aa23aee472..9e0d9f710441 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -32,6 +32,7 @@ ice-y := ice_main.o \ ice_parser_rt.o \ ice_idc.o \ devlink/devlink.o \ + devlink/health.o \ devlink/port.o \ ice_sf_eth.o \ ice_sf_vsi_vlan_ops.o \ diff --git a/drivers/net/ethernet/intel/ice/devlink/health.c b/drivers/net/ethernet/intel/ice/devlink/health.c new file mode 100644 index 000000000000..984d910fc41d --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/health.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024, Intel Corporation. */ + +#include "health.h" +#include "ice.h" + +#define ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, obj, name) \ + devlink_fmsg_put(fmsg, #name, (obj)->name) + +/** + * ice_devlink_health_report - boilerplate to call given @reporter + * + * @reporter: devlink health reporter to call, do nothing on NULL + * @msg: message to pass up, "event name" is fine + * @priv_ctx: typically some event struct + */ +static void ice_devlink_health_report(struct devlink_health_reporter *reporter, + const char *msg, void *priv_ctx) +{ + if (!reporter) + return; + + /* We do not do auto recovering, so return value of the below function + * will always be 0, thus we do ignore it. + */ + devlink_health_report(reporter, msg, priv_ctx); +} + +/** + * ice_fmsg_put_ptr - put hex value of pointer into fmsg + * + * @fmsg: devlink fmsg under construction + * @name: name to pass + * @ptr: 64 bit value to print as hex and put into fmsg + */ +static void ice_fmsg_put_ptr(struct devlink_fmsg *fmsg, const char *name, + void *ptr) +{ + char buf[sizeof(ptr) * 3]; + + sprintf(buf, "%p", ptr); + devlink_fmsg_put(fmsg, name, buf); +} + +struct ice_tx_hang_event { + u32 head; + u32 intr; + u16 vsi_num; + u16 queue; + u16 next_to_clean; + u16 next_to_use; + struct ice_tx_ring *tx_ring; +}; + +static int ice_tx_hang_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct ice_tx_hang_event *event = priv_ctx; + struct sk_buff *skb; + + if (!event) + return 0; + + skb = event->tx_ring->tx_buf->skb; + devlink_fmsg_obj_nest_start(fmsg); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, head); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, intr); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, vsi_num); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, queue); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_clean); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, event, next_to_use); + devlink_fmsg_put(fmsg, "irq-mapping", event->tx_ring->q_vector->name); + ice_fmsg_put_ptr(fmsg, "desc-ptr", event->tx_ring->desc); + ice_fmsg_put_ptr(fmsg, "dma-ptr", (void *)(long)event->tx_ring->dma); + ice_fmsg_put_ptr(fmsg, "skb-ptr", skb); + devlink_fmsg_binary_pair_put(fmsg, "desc", event->tx_ring->desc, + event->tx_ring->count * sizeof(struct ice_tx_desc)); + devlink_fmsg_dump_skb(fmsg, skb); + devlink_fmsg_obj_nest_end(fmsg); + + return 0; +} + +void ice_prep_tx_hang_report(struct ice_pf *pf, struct ice_tx_ring *tx_ring, + u16 vsi_num, u32 head, u32 intr) +{ + struct ice_health_tx_hang_buf *buf = &pf->health_reporters.tx_hang_buf; + + buf->tx_ring = tx_ring; + buf->vsi_num = vsi_num; + buf->head = head; + buf->intr = intr; +} + +void ice_report_tx_hang(struct ice_pf *pf) +{ + struct ice_health_tx_hang_buf *buf = &pf->health_reporters.tx_hang_buf; + struct ice_tx_ring *tx_ring = buf->tx_ring; + + struct ice_tx_hang_event ev = { + .head = buf->head, + .intr = buf->intr, + .vsi_num = buf->vsi_num, + .queue = tx_ring->q_index, + .next_to_clean = tx_ring->next_to_clean, + .next_to_use = tx_ring->next_to_use, + .tx_ring = tx_ring, + }; + + ice_devlink_health_report(pf->health_reporters.tx_hang, "Tx hang", &ev); +} + +static struct devlink_health_reporter * +ice_init_devlink_rep(struct ice_pf *pf, + const struct devlink_health_reporter_ops *ops) +{ + struct devlink *devlink = priv_to_devlink(pf); + struct devlink_health_reporter *rep; + const u64 graceful_period = 0; + + rep = devl_health_reporter_create(devlink, ops, graceful_period, pf); + if (IS_ERR(rep)) { + struct device *dev = ice_pf_to_dev(pf); + + dev_err(dev, "failed to create devlink %s health report er", + ops->name); + return NULL; + } + return rep; +} + +#define ICE_DEFINE_HEALTH_REPORTER_OPS(_name) \ + static const struct devlink_health_reporter_ops ice_ ## _name ## _reporter_ops = { \ + .name = #_name, \ + .dump = ice_ ## _name ## _reporter_dump, \ +} + +ICE_DEFINE_HEALTH_REPORTER_OPS(tx_hang); + +/** + * ice_health_init - allocate and init all ice devlink health reporters and + * accompanied data + * + * @pf: PF struct + */ +void ice_health_init(struct ice_pf *pf) +{ + struct ice_health *reps = &pf->health_reporters; + + reps->tx_hang = ice_init_devlink_rep(pf, &ice_tx_hang_reporter_ops); +} + +/** + * ice_deinit_devl_reporter - destroy given devlink health reporter + * @reporter: reporter to destroy + */ +static void ice_deinit_devl_reporter(struct devlink_health_reporter *reporter) +{ + if (reporter) + devl_health_reporter_destroy(reporter); +} + +/** + * ice_health_deinit - deallocate all ice devlink health reporters and + * accompanied data + * + * @pf: PF struct + */ +void ice_health_deinit(struct ice_pf *pf) +{ + ice_deinit_devl_reporter(pf->health_reporters.tx_hang); +} + +static +void ice_health_assign_healthy_state(struct devlink_health_reporter *reporter) +{ + if (reporter) + devlink_health_reporter_state_update(reporter, + DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); +} + +/** + * ice_health_clear - clear devlink health issues after a reset + * @pf: the PF device structure + * + * Mark the PF in healthy state again after a reset has completed. + */ +void ice_health_clear(struct ice_pf *pf) +{ + ice_health_assign_healthy_state(pf->health_reporters.tx_hang); +} diff --git a/drivers/net/ethernet/intel/ice/devlink/health.h b/drivers/net/ethernet/intel/ice/devlink/health.h new file mode 100644 index 000000000000..5ce601227acb --- /dev/null +++ b/drivers/net/ethernet/intel/ice/devlink/health.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2024, Intel Corporation. */ + +#ifndef _HEALTH_H_ +#define _HEALTH_H_ + +#include + +/** + * DOC: health.h + * + * This header file stores everything that is needed for broadly understood + * devlink health mechanism for ice driver. + */ + +struct ice_pf; +struct ice_tx_ring; + +/** + * struct ice_health - stores ice devlink health reporters and accompanied data + * @tx_hang: devlink health reporter for tx_hang event + * @tx_hang_buf: pre-allocated place to put info for Tx hang reporter from + * non-sleeping context + * @tx_ring: ring that the hang occurred on + * @head: descriptor head + * @intr: interrupt register value + * @vsi_num: VSI owning the queue that the hang occurred on + */ +struct ice_health { + struct devlink_health_reporter *tx_hang; + struct_group_tagged(ice_health_tx_hang_buf, tx_hang_buf, + struct ice_tx_ring *tx_ring; + u32 head; + u32 intr; + u16 vsi_num; + ); +}; + +void ice_health_init(struct ice_pf *pf); +void ice_health_deinit(struct ice_pf *pf); +void ice_health_clear(struct ice_pf *pf); + +void ice_prep_tx_hang_report(struct ice_pf *pf, struct ice_tx_ring *tx_ring, + u16 vsi_num, u32 head, u32 intr); +void ice_report_tx_hang(struct ice_pf *pf); + +#endif /* _HEALTH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 2f5d6f974185..71e05d30f0fd 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -78,6 +78,7 @@ #include "ice_irq.h" #include "ice_dpll.h" #include "ice_adapter.h" +#include "devlink/health.h" #define ICE_BAR0 0 #define ICE_REQ_DESC_MULTIPLE 32 @@ -665,6 +666,7 @@ struct ice_pf { struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES]; struct ice_dplls dplls; struct device *hwmon_dev; + struct ice_health health_reporters; u8 num_quanta_prof_used; }; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index d641dd8b8184..316f5109bd3f 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2364,9 +2364,11 @@ static void ice_service_task(struct work_struct *work) struct ice_pf *pf = container_of(work, struct ice_pf, serv_task); unsigned long start_time = jiffies; - /* subtasks */ + if (pf->health_reporters.tx_hang_buf.tx_ring) { + ice_report_tx_hang(pf); + pf->health_reporters.tx_hang_buf.tx_ring = NULL; + } - /* process reset requests first */ ice_reset_subtask(pf); /* bail if a reset/recovery cycle is pending or rebuild failed */ @@ -5087,6 +5089,7 @@ static int ice_init_devlink(struct ice_pf *pf) return err; ice_devlink_init_regions(pf); + ice_health_init(pf); ice_devlink_register(pf); return 0; @@ -5095,6 +5098,7 @@ static int ice_init_devlink(struct ice_pf *pf) static void ice_deinit_devlink(struct ice_pf *pf) { ice_devlink_unregister(pf); + ice_health_deinit(pf); ice_devlink_destroy_regions(pf); ice_devlink_unregister_params(pf); } @@ -7793,6 +7797,8 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) /* if we get here, reset flow is successful */ clear_bit(ICE_RESET_FAILED, pf->state); + ice_health_clear(pf); + ice_plug_aux_dev(pf); if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG)) ice_lag_rebuild(pf); @@ -8283,16 +8289,18 @@ void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) if (tx_ring) { struct ice_hw *hw = &pf->hw; - u32 head, val = 0; + u32 head, intr = 0; head = FIELD_GET(QTX_COMM_HEAD_HEAD_M, rd32(hw, QTX_COMM_HEAD(vsi->txq_map[txqueue]))); /* Read interrupt register */ - val = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx)); + intr = rd32(hw, GLINT_DYN_CTL(tx_ring->q_vector->reg_idx)); netdev_info(netdev, "tx_timeout: VSI_num: %d, Q %u, NTC: 0x%x, HW_HEAD: 0x%x, NTU: 0x%x, INT: 0x%x\n", vsi->vsi_num, txqueue, tx_ring->next_to_clean, - head, tx_ring->next_to_use, val); + head, tx_ring->next_to_use, intr); + + ice_prep_tx_hang_report(pf, tx_ring, vsi->vsi_num, head, intr); } pf->tx_timeout_last_recovery = jiffies; From bc1027473986dbbd93f9eb41de33307f9abe1319 Mon Sep 17 00:00:00 2001 From: Ben Shelton Date: Mon, 16 Dec 2024 15:15:35 +0100 Subject: [PATCH 0470/1386] ice: Add MDD logging via devlink health Add a devlink health reporter for MDD events. The 'dump' handler will return the information captured in each call to ice_handle_mdd_event(). A device reset (CORER/PFR) will put the reporter back in healthy state. Signed-off-by: Ben Shelton Reviewed-by: Igor Bagnucki Reviewed-by: Wojciech Drewek Reviewed-by: Simon Horman Signed-off-by: Mateusz Polchlopek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Co-developed-by: Przemek Kitszel Signed-off-by: Przemek Kitszel Signed-off-by: Tony Nguyen --- .../net/ethernet/intel/ice/devlink/health.c | 77 +++++++++++++++++++ .../net/ethernet/intel/ice/devlink/health.h | 11 +++ drivers/net/ethernet/intel/ice/ice_main.c | 6 ++ 3 files changed, 94 insertions(+) diff --git a/drivers/net/ethernet/intel/ice/devlink/health.c b/drivers/net/ethernet/intel/ice/devlink/health.c index 984d910fc41d..d23ae3aafaa7 100644 --- a/drivers/net/ethernet/intel/ice/devlink/health.c +++ b/drivers/net/ethernet/intel/ice/devlink/health.c @@ -26,6 +26,79 @@ static void ice_devlink_health_report(struct devlink_health_reporter *reporter, devlink_health_report(reporter, msg, priv_ctx); } +struct ice_mdd_event { + enum ice_mdd_src src; + u16 vf_num; + u16 queue; + u8 pf_num; + u8 event; +}; + +static const char *ice_mdd_src_to_str(enum ice_mdd_src src) +{ + switch (src) { + case ICE_MDD_SRC_TX_PQM: + return "tx_pqm"; + case ICE_MDD_SRC_TX_TCLAN: + return "tx_tclan"; + case ICE_MDD_SRC_TX_TDPU: + return "tx_tdpu"; + case ICE_MDD_SRC_RX: + return "rx"; + default: + return "invalid"; + } +} + +static int +ice_mdd_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct ice_mdd_event *mdd_event = priv_ctx; + const char *src; + + if (!mdd_event) + return 0; + + src = ice_mdd_src_to_str(mdd_event->src); + + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_put(fmsg, "src", src); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, pf_num); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, vf_num); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, event); + ICE_DEVLINK_FMSG_PUT_FIELD(fmsg, mdd_event, queue); + devlink_fmsg_obj_nest_end(fmsg); + + return 0; +} + +/** + * ice_report_mdd_event - Report an MDD event through devlink health + * @pf: the PF device structure + * @src: the HW block that was the source of this MDD event + * @pf_num: the pf_num on which the MDD event occurred + * @vf_num: the vf_num on which the MDD event occurred + * @event: the event type of the MDD event + * @queue: the queue on which the MDD event occurred + * + * Report an MDD event that has occurred on this PF. + */ +void ice_report_mdd_event(struct ice_pf *pf, enum ice_mdd_src src, u8 pf_num, + u16 vf_num, u8 event, u16 queue) +{ + struct ice_mdd_event ev = { + .src = src, + .pf_num = pf_num, + .vf_num = vf_num, + .event = event, + .queue = queue, + }; + + ice_devlink_health_report(pf->health_reporters.mdd, "MDD event", &ev); +} + /** * ice_fmsg_put_ptr - put hex value of pointer into fmsg * @@ -136,6 +209,7 @@ ice_init_devlink_rep(struct ice_pf *pf, .dump = ice_ ## _name ## _reporter_dump, \ } +ICE_DEFINE_HEALTH_REPORTER_OPS(mdd); ICE_DEFINE_HEALTH_REPORTER_OPS(tx_hang); /** @@ -148,6 +222,7 @@ void ice_health_init(struct ice_pf *pf) { struct ice_health *reps = &pf->health_reporters; + reps->mdd = ice_init_devlink_rep(pf, &ice_mdd_reporter_ops); reps->tx_hang = ice_init_devlink_rep(pf, &ice_tx_hang_reporter_ops); } @@ -169,6 +244,7 @@ static void ice_deinit_devl_reporter(struct devlink_health_reporter *reporter) */ void ice_health_deinit(struct ice_pf *pf) { + ice_deinit_devl_reporter(pf->health_reporters.mdd); ice_deinit_devl_reporter(pf->health_reporters.tx_hang); } @@ -188,5 +264,6 @@ void ice_health_assign_healthy_state(struct devlink_health_reporter *reporter) */ void ice_health_clear(struct ice_pf *pf) { + ice_health_assign_healthy_state(pf->health_reporters.mdd); ice_health_assign_healthy_state(pf->health_reporters.tx_hang); } diff --git a/drivers/net/ethernet/intel/ice/devlink/health.h b/drivers/net/ethernet/intel/ice/devlink/health.h index 5ce601227acb..532277fc57d7 100644 --- a/drivers/net/ethernet/intel/ice/devlink/health.h +++ b/drivers/net/ethernet/intel/ice/devlink/health.h @@ -16,9 +16,17 @@ struct ice_pf; struct ice_tx_ring; +enum ice_mdd_src { + ICE_MDD_SRC_TX_PQM, + ICE_MDD_SRC_TX_TCLAN, + ICE_MDD_SRC_TX_TDPU, + ICE_MDD_SRC_RX, +}; + /** * struct ice_health - stores ice devlink health reporters and accompanied data * @tx_hang: devlink health reporter for tx_hang event + * @mdd: devlink health reporter for MDD detection event * @tx_hang_buf: pre-allocated place to put info for Tx hang reporter from * non-sleeping context * @tx_ring: ring that the hang occurred on @@ -27,6 +35,7 @@ struct ice_tx_ring; * @vsi_num: VSI owning the queue that the hang occurred on */ struct ice_health { + struct devlink_health_reporter *mdd; struct devlink_health_reporter *tx_hang; struct_group_tagged(ice_health_tx_hang_buf, tx_hang_buf, struct ice_tx_ring *tx_ring; @@ -42,6 +51,8 @@ void ice_health_clear(struct ice_pf *pf); void ice_prep_tx_hang_report(struct ice_pf *pf, struct ice_tx_ring *tx_ring, u16 vsi_num, u32 head, u32 intr); +void ice_report_mdd_event(struct ice_pf *pf, enum ice_mdd_src src, u8 pf_num, + u16 vf_num, u8 event, u16 queue); void ice_report_tx_hang(struct ice_pf *pf); #endif /* _HEALTH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 316f5109bd3f..1701f7143f24 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1816,6 +1816,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); + ice_report_mdd_event(pf, ICE_MDD_SRC_TX_PQM, pf_num, vf_num, + event, queue); wr32(hw, GL_MDET_TX_PQM, 0xffffffff); } @@ -1829,6 +1831,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (netif_msg_tx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); + ice_report_mdd_event(pf, ICE_MDD_SRC_TX_TCLAN, pf_num, vf_num, + event, queue); wr32(hw, GL_MDET_TX_TCLAN_BY_MAC(hw), U32_MAX); } @@ -1842,6 +1846,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf) if (netif_msg_rx_err(pf)) dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n", event, queue, pf_num, vf_num); + ice_report_mdd_event(pf, ICE_MDD_SRC_RX, pf_num, vf_num, event, + queue); wr32(hw, GL_MDET_RX, 0xffffffff); } From d3c9510dc900e9ff3ea330189c0465c9f00fba18 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 15 Dec 2024 13:29:38 -0800 Subject: [PATCH 0471/1386] net: page_pool: rename page_pool_is_last_ref() page_pool_is_last_ref() releases a reference while the name, to me at least, suggests it just checks if the refcount is 1. The semantics of the function are the same as those of atomic_dec_and_test() and refcount_dec_and_test(), so just use the _and_test() suffix. Reviewed-by: Alexander Lobakin Reviewed-by: Ilias Apalodimas Acked-by: Jesper Dangaard Brouer Link: https://patch.msgid.link/20241215212938.99210-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 4 ++-- net/core/page_pool.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index e555921e5233..776a3008ac28 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -306,7 +306,7 @@ static inline void page_pool_ref_page(struct page *page) page_pool_ref_netmem(page_to_netmem(page)); } -static inline bool page_pool_is_last_ref(netmem_ref netmem) +static inline bool page_pool_unref_and_test(netmem_ref netmem) { /* If page_pool_unref_page() returns 0, we were the last user */ return page_pool_unref_netmem(netmem, 1) == 0; @@ -321,7 +321,7 @@ static inline void page_pool_put_netmem(struct page_pool *pool, * allow registering MEM_TYPE_PAGE_POOL, but shield linker. */ #ifdef CONFIG_PAGE_POOL - if (!page_pool_is_last_ref(netmem)) + if (!page_pool_unref_and_test(netmem)) return; page_pool_put_unrefed_netmem(pool, netmem, dma_sync_size, allow_direct); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index e07ad7315955..9733206d6406 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -897,7 +897,7 @@ void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) for (u32 i = 0; i < count; i++) { netmem_ref netmem = netmem_compound_head(data[i]); - if (page_pool_is_last_ref(netmem)) + if (page_pool_unref_and_test(netmem)) data[bulk_len++] = netmem; } From a2558b410de3b0b6c38222ac4858188a55bc52ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 12:30:08 +0100 Subject: [PATCH 0472/1386] net: bridge: constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Simon Horman Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20241216-sysfs-const-bin_attr-net-v1-1-ec460b91f274@weissschuh.net Signed-off-by: Jakub Kicinski --- net/bridge/br_sysfs_br.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index ea733542244c..c1176a5e02c4 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -1002,7 +1002,7 @@ static const struct attribute_group bridge_group = { * Returns the number of bytes read. */ static ssize_t brforward_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -1023,10 +1023,10 @@ static ssize_t brforward_read(struct file *filp, struct kobject *kobj, return n; } -static struct bin_attribute bridge_forward = { +static const struct bin_attribute bridge_forward = { .attr = { .name = SYSFS_BRIDGE_FDB, .mode = 0444, }, - .read = brforward_read, + .read_new = brforward_read, }; /* From 2d7b422fa7952e3f15fc0912b12530af1d265193 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 12:30:09 +0100 Subject: [PATCH 0473/1386] net: phy: ks8995: constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241216-sysfs-const-bin_attr-net-v1-2-ec460b91f274@weissschuh.net Signed-off-by: Jakub Kicinski --- drivers/net/phy/spi_ks8995.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/phy/spi_ks8995.c b/drivers/net/phy/spi_ks8995.c index 7196e927c2cd..076a370be849 100644 --- a/drivers/net/phy/spi_ks8995.c +++ b/drivers/net/phy/spi_ks8995.c @@ -289,7 +289,7 @@ static int ks8995_reset(struct ks8995_switch *ks) } static ssize_t ks8995_registers_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct device *dev; struct ks8995_switch *ks8995; @@ -301,7 +301,7 @@ static ssize_t ks8995_registers_read(struct file *filp, struct kobject *kobj, } static ssize_t ks8995_registers_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct device *dev; struct ks8995_switch *ks8995; @@ -401,8 +401,8 @@ static const struct bin_attribute ks8995_registers_attr = { .mode = 0600, }, .size = KS8995_REGS_SIZE, - .read = ks8995_registers_read, - .write = ks8995_registers_write, + .read_new = ks8995_registers_read, + .write_new = ks8995_registers_write, }; /* ------------------------------------------------------------------------ */ From ae026eae08e7a0a118abc31192041e49bcda3a8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 12:30:11 +0100 Subject: [PATCH 0474/1386] netxen_nic: constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241216-sysfs-const-bin_attr-net-v1-4-ec460b91f274@weissschuh.net Signed-off-by: Jakub Kicinski --- .../ethernet/qlogic/netxen/netxen_nic_main.c | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index 9cff0a8ffb2c..3383ee1dad14 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -2832,7 +2832,7 @@ netxen_sysfs_validate_crb(struct netxen_adapter *adapter, static ssize_t netxen_sysfs_read_crb(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = kobj_to_dev(kobj); @@ -2860,7 +2860,7 @@ netxen_sysfs_read_crb(struct file *filp, struct kobject *kobj, static ssize_t netxen_sysfs_write_crb(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = kobj_to_dev(kobj); @@ -2901,7 +2901,7 @@ netxen_sysfs_validate_mem(struct netxen_adapter *adapter, static ssize_t netxen_sysfs_read_mem(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = kobj_to_dev(kobj); @@ -2922,7 +2922,7 @@ netxen_sysfs_read_mem(struct file *filp, struct kobject *kobj, } static ssize_t netxen_sysfs_write_mem(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = kobj_to_dev(kobj); @@ -2946,20 +2946,20 @@ static ssize_t netxen_sysfs_write_mem(struct file *filp, struct kobject *kobj, static const struct bin_attribute bin_attr_crb = { .attr = { .name = "crb", .mode = 0644 }, .size = 0, - .read = netxen_sysfs_read_crb, - .write = netxen_sysfs_write_crb, + .read_new = netxen_sysfs_read_crb, + .write_new = netxen_sysfs_write_crb, }; static const struct bin_attribute bin_attr_mem = { .attr = { .name = "mem", .mode = 0644 }, .size = 0, - .read = netxen_sysfs_read_mem, - .write = netxen_sysfs_write_mem, + .read_new = netxen_sysfs_read_mem, + .write_new = netxen_sysfs_write_mem, }; static ssize_t netxen_sysfs_read_dimm(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = kobj_to_dev(kobj); @@ -3082,7 +3082,7 @@ out: static const struct bin_attribute bin_attr_dimm = { .attr = { .name = "dimm", .mode = 0644 }, .size = sizeof(struct netxen_dimm_cfg), - .read = netxen_sysfs_read_dimm, + .read_new = netxen_sysfs_read_dimm, }; From 661cd8fc8e9039819ca0c22e0add52b632240a9e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 15 Dec 2024 17:56:26 +0000 Subject: [PATCH 0475/1386] inetpeer: remove create argument of inet_getpeer_v[46]() All callers of inet_getpeer_v4() and inet_getpeer_v6() want to create an inetpeer. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20241215175629.1248773-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inetpeer.h | 9 ++++----- net/ipv4/icmp.c | 2 +- net/ipv4/ip_fragment.c | 2 +- net/ipv4/route.c | 4 ++-- net/ipv6/icmp.c | 2 +- net/ipv6/ip6_output.c | 2 +- net/ipv6/ndisc.c | 2 +- 7 files changed, 11 insertions(+), 12 deletions(-) diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 74ff688568a0..6f51f81d6cb1 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -101,25 +101,24 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base, __be32 v4daddr, - int vif, int create) + int vif) { struct inetpeer_addr daddr; daddr.a4.addr = v4daddr; daddr.a4.vif = vif; daddr.family = AF_INET; - return inet_getpeer(base, &daddr, create); + return inet_getpeer(base, &daddr, 1); } static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base, - const struct in6_addr *v6daddr, - int create) + const struct in6_addr *v6daddr) { struct inetpeer_addr daddr; daddr.a6 = *v6daddr; daddr.family = AF_INET6; - return inet_getpeer(base, &daddr, create); + return inet_getpeer(base, &daddr, 1); } static inline int inetpeer_addr_cmp(const struct inetpeer_addr *a, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 963a89ae9c26..5eeb9f569a70 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -322,7 +322,7 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, goto out; vif = l3mdev_master_ifindex(dst->dev); - peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); + peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif); rc = inet_peer_xrlim_allow(peer, READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); if (peer) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 07036a2943c1..46e1171299f2 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -89,7 +89,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a) q->key.v4 = *key; qp->ecn = 0; qp->peer = q->fqdir->max_dist ? - inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) : + inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif) : NULL; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0fbec3509618..297a9939c6e7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -873,7 +873,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) rcu_read_unlock(); net = dev_net(rt->dst.dev); - peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1); + peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif); if (!peer) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt_nexthop(rt, ip_hdr(skb)->daddr)); @@ -976,7 +976,7 @@ static int ip_error(struct sk_buff *skb) } peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, - l3mdev_master_ifindex(skb->dev), 1); + l3mdev_master_ifindex(skb->dev)); send = true; if (peer) { diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 071b0bc1179d..4593e3992c67 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -222,7 +222,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); - peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1); + peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); res = inet_peer_xrlim_allow(peer, tmo); if (peer) inet_putpeer(peer); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 993106876604..cdcbb3b6c5da 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -613,7 +613,7 @@ int ip6_forward(struct sk_buff *skb) else target = &hdr->daddr; - peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); + peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr); /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index aba94a348673..f113554d1332 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1731,7 +1731,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) "Redirect: destination is not a neighbour\n"); goto release; } - peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1); + peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr); ret = inet_peer_xrlim_allow(peer, 1*HZ); if (peer) inet_putpeer(peer); From 7a596a50c4a4eab946aec149171c72321b4934aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 15 Dec 2024 17:56:27 +0000 Subject: [PATCH 0476/1386] inetpeer: remove create argument of inet_getpeer() All callers of inet_getpeer() want to create an inetpeer. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20241215175629.1248773-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/inetpeer.h | 7 +++---- net/ipv4/inetpeer.c | 11 ++--------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 6f51f81d6cb1..f475757daafb 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -96,8 +96,7 @@ static inline struct in6_addr *inetpeer_get_addr_v6(struct inetpeer_addr *iaddr) /* can be called with or without local BH being disabled */ struct inet_peer *inet_getpeer(struct inet_peer_base *base, - const struct inetpeer_addr *daddr, - int create); + const struct inetpeer_addr *daddr); static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base, __be32 v4daddr, @@ -108,7 +107,7 @@ static inline struct inet_peer *inet_getpeer_v4(struct inet_peer_base *base, daddr.a4.addr = v4daddr; daddr.a4.vif = vif; daddr.family = AF_INET; - return inet_getpeer(base, &daddr, 1); + return inet_getpeer(base, &daddr); } static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base, @@ -118,7 +117,7 @@ static inline struct inet_peer *inet_getpeer_v6(struct inet_peer_base *base, daddr.a6 = *v6daddr; daddr.family = AF_INET6; - return inet_getpeer(base, &daddr, 1); + return inet_getpeer(base, &daddr); } static inline int inetpeer_addr_cmp(const struct inetpeer_addr *a, diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 5ab56f4cb529..bc79cc9d13eb 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -169,13 +169,11 @@ static void inet_peer_gc(struct inet_peer_base *base, } struct inet_peer *inet_getpeer(struct inet_peer_base *base, - const struct inetpeer_addr *daddr, - int create) + const struct inetpeer_addr *daddr) { struct inet_peer *p, *gc_stack[PEER_MAX_GC]; struct rb_node **pp, *parent; unsigned int gc_cnt, seq; - int invalidated; /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. @@ -183,16 +181,11 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, rcu_read_lock(); seq = read_seqbegin(&base->lock); p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp); - invalidated = read_seqretry(&base->lock, seq); rcu_read_unlock(); if (p) return p; - /* If no writer did a change during our lookup, we can return early. */ - if (!create && !invalidated) - return NULL; - /* retry an exact lookup, taking the lock before. * At least, nodes should be hot in our cache. */ @@ -201,7 +194,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, gc_cnt = 0; p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp); - if (!p && create) { + if (!p) { p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC); if (p) { p->daddr = *daddr; From 50b362f21d6c10b0f7939c1482c6a1b43da82f1a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 15 Dec 2024 17:56:28 +0000 Subject: [PATCH 0477/1386] inetpeer: update inetpeer timestamp in inet_getpeer() inet_putpeer() will be removed in the following patch, because we will no longer use refcounts. Update inetpeer timestamp (p->dtime) at lookup time. Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20241215175629.1248773-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/inetpeer.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index bc79cc9d13eb..28c3ae5bc4a0 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -95,6 +95,7 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr, { struct rb_node **pp, *parent, *next; struct inet_peer *p; + u32 now; pp = &base->rb_root.rb_node; parent = NULL; @@ -110,6 +111,9 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr, if (cmp == 0) { if (!refcount_inc_not_zero(&p->refcnt)) break; + now = jiffies; + if (READ_ONCE(p->dtime) != now) + WRITE_ONCE(p->dtime, now); return p; } if (gc_stack) { @@ -150,9 +154,6 @@ static void inet_peer_gc(struct inet_peer_base *base, for (i = 0; i < gc_cnt; i++) { p = gc_stack[i]; - /* The READ_ONCE() pairs with the WRITE_ONCE() - * in inet_putpeer() - */ delta = (__u32)jiffies - READ_ONCE(p->dtime); if (delta < ttl || !refcount_dec_if_one(&p->refcnt)) @@ -224,11 +225,6 @@ EXPORT_SYMBOL_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { - /* The WRITE_ONCE() pairs with itself (we run lockless) - * and the READ_ONCE() in inet_peer_gc() - */ - WRITE_ONCE(p->dtime, (__u32)jiffies); - if (refcount_dec_and_test(&p->refcnt)) kfree_rcu(p, rcu); } From a853c609504e2d1d83e71285e3622fda1f1451d8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 15 Dec 2024 17:56:29 +0000 Subject: [PATCH 0478/1386] inetpeer: do not get a refcount in inet_getpeer() All inet_getpeer() callers except ip4_frag_init() don't need to acquire a permanent refcount on the inetpeer. They can switch to full RCU protection. Move the refcount_inc_not_zero() into ip4_frag_init(), so that all the other callers no longer have to perform a pair of expensive atomic operations on a possibly contended cache line. inet_putpeer() no longer needs to be exported. After this patch, my DUT can receive 8,400,000 UDP packets per second targeting closed ports, using 50% less cpu cycles than before. Also change two calls to l3mdev_master_ifindex() by l3mdev_master_ifindex_rcu() (Ido ideas) Fixes: 8c2bd38b95f7 ("icmp: change the order of rate limits") Signed-off-by: Eric Dumazet Link: https://patch.msgid.link/20241215175629.1248773-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/icmp.c | 9 ++++----- net/ipv4/inetpeer.c | 8 ++------ net/ipv4/ip_fragment.c | 15 ++++++++++----- net/ipv4/route.c | 15 ++++++++------- net/ipv6/icmp.c | 4 ++-- net/ipv6/ip6_output.c | 4 ++-- net/ipv6/ndisc.c | 6 ++++-- 7 files changed, 32 insertions(+), 29 deletions(-) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 5eeb9f569a70..094084b61bff 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -312,7 +312,6 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, struct dst_entry *dst = &rt->dst; struct inet_peer *peer; bool rc = true; - int vif; if (!apply_ratelimit) return true; @@ -321,12 +320,12 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) goto out; - vif = l3mdev_master_ifindex(dst->dev); - peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif); + rcu_read_lock(); + peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, + l3mdev_master_ifindex_rcu(dst->dev)); rc = inet_peer_xrlim_allow(peer, READ_ONCE(net->ipv4.sysctl_icmp_ratelimit)); - if (peer) - inet_putpeer(peer); + rcu_read_unlock(); out: if (!rc) __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 28c3ae5bc4a0..e02484f4d22b 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -109,8 +109,6 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr, p = rb_entry(parent, struct inet_peer, rb_node); cmp = inetpeer_addr_cmp(daddr, &p->daddr); if (cmp == 0) { - if (!refcount_inc_not_zero(&p->refcnt)) - break; now = jiffies; if (READ_ONCE(p->dtime) != now) WRITE_ONCE(p->dtime, now); @@ -169,6 +167,7 @@ static void inet_peer_gc(struct inet_peer_base *base, } } +/* Must be called under RCU : No refcount change is done here. */ struct inet_peer *inet_getpeer(struct inet_peer_base *base, const struct inetpeer_addr *daddr) { @@ -179,10 +178,8 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ - rcu_read_lock(); seq = read_seqbegin(&base->lock); p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp); - rcu_read_unlock(); if (p) return p; @@ -200,7 +197,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, if (p) { p->daddr = *daddr; p->dtime = (__u32)jiffies; - refcount_set(&p->refcnt, 2); + refcount_set(&p->refcnt, 1); atomic_set(&p->rid, 0); p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; @@ -228,7 +225,6 @@ void inet_putpeer(struct inet_peer *p) if (refcount_dec_and_test(&p->refcnt)) kfree_rcu(p, rcu); } -EXPORT_SYMBOL_GPL(inet_putpeer); /* * Check transmit rate limitation for given message. diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 46e1171299f2..7a435746a22d 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -82,15 +82,20 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, static void ip4_frag_init(struct inet_frag_queue *q, const void *a) { struct ipq *qp = container_of(q, struct ipq, q); - struct net *net = q->fqdir->net; - const struct frag_v4_compare_key *key = a; + struct net *net = q->fqdir->net; + struct inet_peer *p = NULL; q->key.v4 = *key; qp->ecn = 0; - qp->peer = q->fqdir->max_dist ? - inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif) : - NULL; + if (q->fqdir->max_dist) { + rcu_read_lock(); + p = inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif); + if (p && !refcount_inc_not_zero(&p->refcnt)) + p = NULL; + rcu_read_unlock(); + } + qp->peer = p; } static void ip4_frag_free(struct inet_frag_queue *q) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 297a9939c6e7..9f9d4e6ea1b9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -870,11 +870,11 @@ void ip_rt_send_redirect(struct sk_buff *skb) } log_martians = IN_DEV_LOG_MARTIANS(in_dev); vif = l3mdev_master_ifindex_rcu(rt->dst.dev); - rcu_read_unlock(); net = dev_net(rt->dst.dev); peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif); if (!peer) { + rcu_read_unlock(); icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt_nexthop(rt, ip_hdr(skb)->daddr)); return; @@ -893,7 +893,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) */ if (peer->n_redirects >= ip_rt_redirect_number) { peer->rate_last = jiffies; - goto out_put_peer; + goto out_unlock; } /* Check for load limit; set rate_last to the latest sent @@ -914,8 +914,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) &ip_hdr(skb)->saddr, inet_iif(skb), &ip_hdr(skb)->daddr, &gw); } -out_put_peer: - inet_putpeer(peer); +out_unlock: + rcu_read_unlock(); } static int ip_error(struct sk_buff *skb) @@ -975,9 +975,9 @@ static int ip_error(struct sk_buff *skb) break; } + rcu_read_lock(); peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, - l3mdev_master_ifindex(skb->dev)); - + l3mdev_master_ifindex_rcu(skb->dev)); send = true; if (peer) { now = jiffies; @@ -989,8 +989,9 @@ static int ip_error(struct sk_buff *skb) peer->rate_tokens -= ip_rt_error_cost; else send = false; - inet_putpeer(peer); } + rcu_read_unlock(); + if (send) icmp_send(skb, ICMP_DEST_UNREACH, code, 0); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4593e3992c67..a6984a29fdb9 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -222,10 +222,10 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, if (rt->rt6i_dst.plen < 128) tmo >>= ((128 - rt->rt6i_dst.plen)>>5); + rcu_read_lock(); peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr); res = inet_peer_xrlim_allow(peer, tmo); - if (peer) - inet_putpeer(peer); + rcu_read_unlock(); } if (!res) __ICMP6_INC_STATS(net, ip6_dst_idev(dst), diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cdcbb3b6c5da..d577bf2f3053 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -613,6 +613,7 @@ int ip6_forward(struct sk_buff *skb) else target = &hdr->daddr; + rcu_read_lock(); peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr); /* Limit redirects both by destination (here) @@ -620,8 +621,7 @@ int ip6_forward(struct sk_buff *skb) */ if (inet_peer_xrlim_allow(peer, 1*HZ)) ndisc_send_redirect(skb, target); - if (peer) - inet_putpeer(peer); + rcu_read_unlock(); } else { int addrtype = ipv6_addr_type(&hdr->saddr); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f113554d1332..d044c67019de 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1731,10 +1731,12 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) "Redirect: destination is not a neighbour\n"); goto release; } + + rcu_read_lock(); peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr); ret = inet_peer_xrlim_allow(peer, 1*HZ); - if (peer) - inet_putpeer(peer); + rcu_read_unlock(); + if (!ret) goto release; From 1ba06ca96ca255c079ce5ea6a75cc0bfd5e97921 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 16 Dec 2024 14:18:44 +0100 Subject: [PATCH 0479/1386] mlxsw: Switch to napi_gro_receive() Benefit from the recent conversion of the driver to NAPI and enable GRO support through the use of napi_gro_receive(). Pass the NAPI pointer from the bus driver (mlxsw_pci) to the switch driver (mlxsw_spectrum) through the skb control block where various packet metadata is already encoded. The main motivation is to improve forwarding performance through the use of GRO fraglist [1]. In my testing, when the forwarding data path is simple (routing between two ports) there is not much difference in forwarding performance between GRO disabled and GRO enabled with fraglist. The improvement becomes more noticeable as the data path becomes more complex since it is traversed less times with GRO enabled. For example, with 10 ingress and 10 egress flower filters with different priorities on the two ports between which routing is performed, there is an improvement of about 140% in forwarded bandwidth. [1] https://lore.kernel.org/netdev/20200125102645.4782-1-steffen.klassert@secunet.com/ Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Reviewed-by: Amit Cohen Signed-off-by: Petr Machata Reviewed-by: Alexander Lobakin Link: https://patch.msgid.link/21258fe55f608ccf1ee2783a5a4534220af28903.1734354812.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlxsw/core.h | 1 + drivers/net/ethernet/mellanox/mlxsw/pci.c | 4 +++- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +- drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c | 2 +- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index 6d11225594dd..24c3ff6fcf71 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -73,6 +73,7 @@ struct mlxsw_tx_info { }; struct mlxsw_rx_md_info { + struct napi_struct *napi; u32 cookie_index; u32 latency; u32 tx_congestion; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index d6f37456fb31..0863dca2fc0b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -737,6 +737,7 @@ static void mlxsw_pci_cqe_rdq_md_init(struct sk_buff *skb, const char *cqe) } static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, + struct napi_struct *napi, struct mlxsw_pci_queue *q, u16 consumer_counter_limit, enum mlxsw_pci_cqe_v cqe_v, char *cqe) @@ -807,6 +808,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, } mlxsw_pci_skb_cb_ts_set(mlxsw_pci, skb, cqe_v, cqe); + mlxsw_skb_cb(skb)->rx_md_info.napi = napi; mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); @@ -869,7 +871,7 @@ static int mlxsw_pci_napi_poll_cq_rx(struct napi_struct *napi, int budget) continue; } - mlxsw_pci_cqe_rdq_handle(mlxsw_pci, rdq, + mlxsw_pci_cqe_rdq_handle(mlxsw_pci, napi, rdq, wqe_counter, q->u.cq.v, cqe); if (++work_done == budget) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 3f5e5d99251b..aa71993daf28 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2449,7 +2449,7 @@ void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb, u64_stats_update_end(&pcpu_stats->syncp); skb->protocol = eth_type_trans(skb, skb->dev); - netif_receive_skb(skb); + napi_gro_receive(mlxsw_skb_cb(skb)->rx_md_info.napi, skb); } static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u16 local_port, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c index 899c954e0e5f..1f9c1c86839f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c @@ -173,7 +173,7 @@ static void mlxsw_sp_rx_no_mark_listener(struct sk_buff *skb, u16 local_port, if (err) return; - netif_receive_skb(skb); + napi_gro_receive(mlxsw_skb_cb(skb)->rx_md_info.napi, skb); } static void mlxsw_sp_rx_mark_listener(struct sk_buff *skb, u16 local_port, From 33d06d1d28124b042178894584b727fdf83660b1 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 16 Dec 2024 15:51:22 +0000 Subject: [PATCH 0480/1386] niu: Use page->private instead of page->index We are close to removing page->index. Use page->private instead, which is least likely to be removed. Signed-off-by: Matthew Wilcox (Oracle) Link: https://patch.msgid.link/20241216155124.3114-1-willy@infradead.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sun/niu.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index df6d35d41b97..d7459866d24c 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -3303,7 +3303,7 @@ static struct page *niu_find_rxpage(struct rx_ring_info *rp, u64 addr, addr &= PAGE_MASK; pp = &rp->rxhash[h]; for (; (p = *pp) != NULL; pp = &niu_next_page(p)) { - if (p->index == addr) { + if (p->private == addr) { *link = pp; goto found; } @@ -3318,7 +3318,7 @@ static void niu_hash_page(struct rx_ring_info *rp, struct page *page, u64 base) { unsigned int h = niu_hash_rxaddr(rp, base); - page->index = base; + page->private = base; niu_next_page(page) = rp->rxhash[h]; rp->rxhash[h] = page; } @@ -3400,11 +3400,11 @@ static int niu_rx_pkt_ignore(struct niu *np, struct rx_ring_info *rp) rcr_size = rp->rbr_sizes[(val & RCR_ENTRY_PKTBUFSZ) >> RCR_ENTRY_PKTBUFSZ_SHIFT]; - if ((page->index + PAGE_SIZE) - rcr_size == addr) { + if ((page->private + PAGE_SIZE) - rcr_size == addr) { *link = niu_next_page(page); - np->ops->unmap_page(np->device, page->index, + np->ops->unmap_page(np->device, page->private, PAGE_SIZE, DMA_FROM_DEVICE); - page->index = 0; + page->private = 0; niu_next_page(page) = NULL; __free_page(page); rp->rbr_refill_pending++; @@ -3469,11 +3469,11 @@ static int niu_process_rx_pkt(struct napi_struct *napi, struct niu *np, append_size = append_size - skb->len; niu_rx_skb_append(skb, page, off, append_size, rcr_size); - if ((page->index + rp->rbr_block_size) - rcr_size == addr) { + if ((page->private + rp->rbr_block_size) - rcr_size == addr) { *link = niu_next_page(page); - np->ops->unmap_page(np->device, page->index, + np->ops->unmap_page(np->device, page->private, PAGE_SIZE, DMA_FROM_DEVICE); - page->index = 0; + page->private = 0; niu_next_page(page) = NULL; rp->rbr_refill_pending++; } else @@ -3538,11 +3538,11 @@ static void niu_rbr_free(struct niu *np, struct rx_ring_info *rp) page = rp->rxhash[i]; while (page) { struct page *next = niu_next_page(page); - u64 base = page->index; + u64 base = page->private; np->ops->unmap_page(np->device, base, PAGE_SIZE, DMA_FROM_DEVICE); - page->index = 0; + page->private = 0; niu_next_page(page) = NULL; __free_page(page); @@ -6460,7 +6460,7 @@ static void niu_reset_buffers(struct niu *np) page = rp->rxhash[j]; while (page) { struct page *next = niu_next_page(page); - u64 base = page->index; + u64 base = page->private; base = base >> RBR_DESCR_ADDR_SHIFT; rp->rbr[k++] = cpu_to_le32(base); page = next; From 30c63abaee9024ed7524325b3eeb7f2d26727c31 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 16 Dec 2024 13:09:36 +0100 Subject: [PATCH 0481/1386] net: usb: lan78xx: Add error handling to lan78xx_get_regs Update `lan78xx_get_regs` to handle errors during register and PHY reads. Log warnings for failed reads and exit the function early if an error occurs. Drop all previously logged registers to signal inconsistent readings to the user space. This ensures that invalid data is not returned to users. Signed-off-by: Oleksij Rempel Link: https://patch.msgid.link/20241216120941.1690908-2-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 4661d131b190..270345fcad65 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2108,20 +2108,44 @@ static void lan78xx_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *buf) { - u32 *data = buf; - int i, j; struct lan78xx_net *dev = netdev_priv(netdev); + unsigned int data_count = 0; + u32 *data = buf; + int i, j, ret; /* Read Device/MAC registers */ - for (i = 0; i < ARRAY_SIZE(lan78xx_regs); i++) - lan78xx_read_reg(dev, lan78xx_regs[i], &data[i]); + for (i = 0; i < ARRAY_SIZE(lan78xx_regs); i++) { + ret = lan78xx_read_reg(dev, lan78xx_regs[i], &data[i]); + if (ret < 0) { + netdev_warn(dev->net, + "failed to read register 0x%08x\n", + lan78xx_regs[i]); + goto clean_data; + } + + data_count++; + } if (!netdev->phydev) return; /* Read PHY registers */ - for (j = 0; j < 32; i++, j++) - data[i] = phy_read(netdev->phydev, j); + for (j = 0; j < 32; i++, j++) { + ret = phy_read(netdev->phydev, j); + if (ret < 0) { + netdev_warn(dev->net, + "failed to read PHY register 0x%02x\n", j); + goto clean_data; + } + + data[i] = ret; + data_count++; + } + + return; + +clean_data: + memset(data, 0, data_count * sizeof(u32)); } static const struct ethtool_ops lan78xx_ethtool_ops = { From 18bdefe62439c75227021ddbbf6510aa2f2f4e54 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 16 Dec 2024 13:09:37 +0100 Subject: [PATCH 0482/1386] net: usb: lan78xx: Use ETIMEDOUT instead of ETIME in lan78xx_stop_hw Update lan78xx_stop_hw to return -ETIMEDOUT instead of -ETIME when a timeout occurs. While -ETIME indicates a general timer expiration, -ETIMEDOUT is more commonly used for signaling operation timeouts and provides better consistency with standard error handling in the driver. The -ETIME checks in tx_complete() and rx_complete() are unrelated to this error handling change. In these functions, the error values are derived from urb->status, which reflects USB transfer errors. The error value from lan78xx_stop_hw will be exposed in the following cases: - usb_driver::suspend - net_device_ops::ndo_stop (potentially, though currently the return value is not used). Signed-off-by: Oleksij Rempel Reviewed-by: Mateusz Polchlopek Link: https://patch.msgid.link/20241216120941.1690908-3-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 270345fcad65..4674051f5c9c 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -844,9 +844,7 @@ static int lan78xx_stop_hw(struct lan78xx_net *dev, u32 reg, u32 hw_enabled, } while (!stopped && !time_after(jiffies, timeout)); } - ret = stopped ? 0 : -ETIME; - - return ret; + return stopped ? 0 : -ETIMEDOUT; } static int lan78xx_flush_fifo(struct lan78xx_net *dev, u32 reg, u32 fifo_flush) From 7433d022b915977a0e361a036aa06a0d382a9630 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 16 Dec 2024 13:09:38 +0100 Subject: [PATCH 0483/1386] net: usb: lan78xx: Use action-specific label in lan78xx_mac_reset Rename the generic `done` label to the action-specific `exit_unlock` label in `lan78xx_mac_reset`. This improves clarity by indicating the specific cleanup action (mutex unlock) and aligns with best practices for error handling and cleanup labels. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Reviewed-by: Mateusz Polchlopek Link: https://patch.msgid.link/20241216120941.1690908-4-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 4674051f5c9c..30301af29ab2 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1604,16 +1604,16 @@ static int lan78xx_mac_reset(struct lan78xx_net *dev) */ ret = lan78xx_mdiobus_wait_not_busy(dev); if (ret < 0) - goto done; + goto exit_unlock; ret = lan78xx_read_reg(dev, MAC_CR, &val); if (ret < 0) - goto done; + goto exit_unlock; val |= MAC_CR_RST_; ret = lan78xx_write_reg(dev, MAC_CR, val); if (ret < 0) - goto done; + goto exit_unlock; /* Wait for the reset to complete before allowing any further * MAC register accesses otherwise the MAC may lock up. @@ -1621,16 +1621,16 @@ static int lan78xx_mac_reset(struct lan78xx_net *dev) do { ret = lan78xx_read_reg(dev, MAC_CR, &val); if (ret < 0) - goto done; + goto exit_unlock; if (!(val & MAC_CR_RST_)) { ret = 0; - goto done; + goto exit_unlock; } } while (!time_after(jiffies, start_time + HZ)); ret = -ETIMEDOUT; -done: +exit_unlock: mutex_unlock(&dev->phy_mutex); return ret; From 3a59437ed9072fa812e3e30bf0637ca94a239652 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 16 Dec 2024 13:09:39 +0100 Subject: [PATCH 0484/1386] net: usb: lan78xx: rename phy_mutex to mdiobus_mutex Rename `phy_mutex` to `mdiobus_mutex` for clarity, as the mutex protects MDIO bus access rather than PHY-specific operations. Update all references to ensure consistency. Signed-off-by: Oleksij Rempel Link: https://patch.msgid.link/20241216120941.1690908-5-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 30301af29ab2..78c75599b8f1 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -439,7 +439,7 @@ struct lan78xx_net { struct usb_anchor deferred; struct mutex dev_mutex; /* serialise open/stop wrt suspend/resume */ - struct mutex phy_mutex; /* for phy access */ + struct mutex mdiobus_mutex; /* for MDIO bus access */ unsigned int pipe_in, pipe_out, pipe_intr; unsigned int bulk_in_delay; @@ -952,7 +952,7 @@ static int lan78xx_flush_rx_fifo(struct lan78xx_net *dev) return lan78xx_flush_fifo(dev, FCT_RX_CTL, FCT_RX_CTL_RST_); } -/* Loop until the read is completed with timeout called with phy_mutex held */ +/* Loop until the read is completed with timeout called with mdiobus_mutex held */ static int lan78xx_mdiobus_wait_not_busy(struct lan78xx_net *dev) { unsigned long start_time = jiffies; @@ -1596,7 +1596,7 @@ static int lan78xx_mac_reset(struct lan78xx_net *dev) u32 val; int ret; - mutex_lock(&dev->phy_mutex); + mutex_lock(&dev->mdiobus_mutex); /* Resetting the device while there is activity on the MDIO * bus can result in the MAC interface locking up and not @@ -1631,7 +1631,7 @@ static int lan78xx_mac_reset(struct lan78xx_net *dev) ret = -ETIMEDOUT; exit_unlock: - mutex_unlock(&dev->phy_mutex); + mutex_unlock(&dev->mdiobus_mutex); return ret; } @@ -2249,7 +2249,7 @@ static int lan78xx_mdiobus_read(struct mii_bus *bus, int phy_id, int idx) if (ret < 0) return ret; - mutex_lock(&dev->phy_mutex); + mutex_lock(&dev->mdiobus_mutex); /* confirm MII not busy */ ret = lan78xx_mdiobus_wait_not_busy(dev); @@ -2273,7 +2273,7 @@ static int lan78xx_mdiobus_read(struct mii_bus *bus, int phy_id, int idx) ret = (int)(val & 0xFFFF); done: - mutex_unlock(&dev->phy_mutex); + mutex_unlock(&dev->mdiobus_mutex); usb_autopm_put_interface(dev->intf); return ret; @@ -2290,7 +2290,7 @@ static int lan78xx_mdiobus_write(struct mii_bus *bus, int phy_id, int idx, if (ret < 0) return ret; - mutex_lock(&dev->phy_mutex); + mutex_lock(&dev->mdiobus_mutex); /* confirm MII not busy */ ret = lan78xx_mdiobus_wait_not_busy(dev); @@ -2313,7 +2313,7 @@ static int lan78xx_mdiobus_write(struct mii_bus *bus, int phy_id, int idx, goto done; done: - mutex_unlock(&dev->phy_mutex); + mutex_unlock(&dev->mdiobus_mutex); usb_autopm_put_interface(dev->intf); return ret; } @@ -4476,7 +4476,7 @@ static int lan78xx_probe(struct usb_interface *intf, skb_queue_head_init(&dev->rxq_done); skb_queue_head_init(&dev->txq_pend); skb_queue_head_init(&dev->rxq_overflow); - mutex_init(&dev->phy_mutex); + mutex_init(&dev->mdiobus_mutex); mutex_init(&dev->dev_mutex); ret = lan78xx_urb_config_init(dev); From d09de7ebd4abf26d9aee072b82a514c372d278b5 Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 16 Dec 2024 13:09:40 +0100 Subject: [PATCH 0485/1386] net: usb: lan78xx: remove PHY register access from ethtool get_regs Remove PHY register handling from `lan78xx_get_regs` and `lan78xx_get_regs_len`. Since the controller can have different PHYs attached, the first 32 registers are not universally relevant or the most interesting. Simplify the implementation to focus on MAC and device registers. Signed-off-by: Oleksij Rempel Link: https://patch.msgid.link/20241216120941.1690908-6-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 78c75599b8f1..6c9dab290f3f 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -2096,10 +2096,7 @@ exit: static int lan78xx_get_regs_len(struct net_device *netdev) { - if (!netdev->phydev) - return (sizeof(lan78xx_regs)); - else - return (sizeof(lan78xx_regs) + PHY_REG_SIZE); + return sizeof(lan78xx_regs); } static void @@ -2109,7 +2106,7 @@ lan78xx_get_regs(struct net_device *netdev, struct ethtool_regs *regs, struct lan78xx_net *dev = netdev_priv(netdev); unsigned int data_count = 0; u32 *data = buf; - int i, j, ret; + int i, ret; /* Read Device/MAC registers */ for (i = 0; i < ARRAY_SIZE(lan78xx_regs); i++) { @@ -2124,22 +2121,6 @@ lan78xx_get_regs(struct net_device *netdev, struct ethtool_regs *regs, data_count++; } - if (!netdev->phydev) - return; - - /* Read PHY registers */ - for (j = 0; j < 32; i++, j++) { - ret = phy_read(netdev->phydev, j); - if (ret < 0) { - netdev_warn(dev->net, - "failed to read PHY register 0x%02x\n", j); - goto clean_data; - } - - data[i] = ret; - data_count++; - } - return; clean_data: From 01e2f4d55bda0e24548e1458e77975898683a2cd Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Mon, 16 Dec 2024 13:09:41 +0100 Subject: [PATCH 0486/1386] net: usb: lan78xx: Improve error handling in WoL operations Enhance error handling in Wake-on-LAN (WoL) operations: - Log a warning in `lan78xx_get_wol` if `lan78xx_read_reg` fails. - Check and handle errors from `device_set_wakeup_enable` and `phy_ethtool_set_wol` in `lan78xx_set_wol`. - Ensure proper cleanup with a unified error handling path. Signed-off-by: Oleksij Rempel Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241216120941.1690908-7-o.rempel@pengutronix.de Signed-off-by: Jakub Kicinski --- drivers/net/usb/lan78xx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 6c9dab290f3f..a91bf9c7e31d 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1857,6 +1857,7 @@ static void lan78xx_get_wol(struct net_device *netdev, ret = lan78xx_read_reg(dev, USB_CFG0, &buf); if (unlikely(ret < 0)) { + netdev_warn(dev->net, "failed to get WoL %pe", ERR_PTR(ret)); wol->supported = 0; wol->wolopts = 0; } else { @@ -1888,10 +1889,13 @@ static int lan78xx_set_wol(struct net_device *netdev, pdata->wol = wol->wolopts; - device_set_wakeup_enable(&dev->udev->dev, (bool)wol->wolopts); + ret = device_set_wakeup_enable(&dev->udev->dev, (bool)wol->wolopts); + if (ret < 0) + goto exit_pm_put; - phy_ethtool_set_wol(netdev->phydev, wol); + ret = phy_ethtool_set_wol(netdev->phydev, wol); +exit_pm_put: usb_autopm_put_interface(dev->intf); return ret; From 86331b510260bdb4b4b0dcac2eeb81a82eb161c3 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Mon, 16 Dec 2024 12:05:26 +0800 Subject: [PATCH 0487/1386] net: hibmcge: Add debugfs supported in this module This patch initializes debugfs and creates root directory for each device. The tx_ring and rx_ring debugfs files are implemented together. Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241216040532.1566229-2-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/hisilicon/hibmcge/Makefile | 3 +- .../ethernet/hisilicon/hibmcge/hbg_debugfs.c | 94 +++++++++++++++++++ .../ethernet/hisilicon/hibmcge/hbg_debugfs.h | 12 +++ .../net/ethernet/hisilicon/hibmcge/hbg_main.c | 29 +++++- 4 files changed, 135 insertions(+), 3 deletions(-) create mode 100644 drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c create mode 100644 drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.h diff --git a/drivers/net/ethernet/hisilicon/hibmcge/Makefile b/drivers/net/ethernet/hisilicon/hibmcge/Makefile index ae58ac38c206..1a0ec2fb8c24 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/Makefile +++ b/drivers/net/ethernet/hisilicon/hibmcge/Makefile @@ -5,4 +5,5 @@ obj-$(CONFIG_HIBMCGE) += hibmcge.o -hibmcge-objs = hbg_main.o hbg_hw.o hbg_mdio.o hbg_irq.o hbg_txrx.o hbg_ethtool.o +hibmcge-objs = hbg_main.o hbg_hw.o hbg_mdio.o hbg_irq.o hbg_txrx.o hbg_ethtool.o \ + hbg_debugfs.o diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c new file mode 100644 index 000000000000..773a6434b114 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (c) 2024 Hisilicon Limited. + +#include +#include +#include +#include +#include +#include "hbg_common.h" +#include "hbg_debugfs.h" +#include "hbg_hw.h" +#include "hbg_irq.h" +#include "hbg_txrx.h" + +static struct dentry *hbg_dbgfs_root; + +struct hbg_dbg_info { + const char *name; + int (*read)(struct seq_file *seq, void *data); +}; + +static void hbg_dbg_ring(struct hbg_priv *priv, struct hbg_ring *ring, + struct seq_file *s) +{ + u32 irq_mask = ring->dir == HBG_DIR_TX ? HBG_INT_MSK_TX_B : + HBG_INT_MSK_RX_B; + + seq_printf(s, "ring used num: %u\n", + hbg_get_queue_used_num(ring)); + seq_printf(s, "ring max num: %u\n", ring->len); + seq_printf(s, "ring head: %u, tail: %u\n", ring->head, ring->tail); + seq_printf(s, "fifo used num: %u\n", + hbg_hw_get_fifo_used_num(priv, ring->dir)); + seq_printf(s, "fifo max num: %u\n", + hbg_get_spec_fifo_max_num(priv, ring->dir)); + seq_printf(s, "irq enabled: %s\n", + str_true_false(hbg_hw_irq_is_enabled(priv, irq_mask))); +} + +static int hbg_dbg_tx_ring(struct seq_file *s, void *unused) +{ + struct net_device *netdev = dev_get_drvdata(s->private); + struct hbg_priv *priv = netdev_priv(netdev); + + hbg_dbg_ring(priv, &priv->tx_ring, s); + return 0; +} + +static int hbg_dbg_rx_ring(struct seq_file *s, void *unused) +{ + struct net_device *netdev = dev_get_drvdata(s->private); + struct hbg_priv *priv = netdev_priv(netdev); + + hbg_dbg_ring(priv, &priv->rx_ring, s); + return 0; +} + +static const struct hbg_dbg_info hbg_dbg_infos[] = { + { "tx_ring", hbg_dbg_tx_ring }, + { "rx_ring", hbg_dbg_rx_ring }, +}; + +static void hbg_debugfs_uninit(void *data) +{ + debugfs_remove_recursive((struct dentry *)data); +} + +void hbg_debugfs_init(struct hbg_priv *priv) +{ + const char *name = pci_name(priv->pdev); + struct device *dev = &priv->pdev->dev; + struct dentry *root; + u32 i; + + root = debugfs_create_dir(name, hbg_dbgfs_root); + + for (i = 0; i < ARRAY_SIZE(hbg_dbg_infos); i++) + debugfs_create_devm_seqfile(dev, hbg_dbg_infos[i].name, + root, hbg_dbg_infos[i].read); + + /* Ignore the failure because debugfs is not a key feature. */ + devm_add_action_or_reset(dev, hbg_debugfs_uninit, root); +} + +void hbg_debugfs_register(void) +{ + hbg_dbgfs_root = debugfs_create_dir("hibmcge", NULL); +} + +void hbg_debugfs_unregister(void) +{ + debugfs_remove_recursive(hbg_dbgfs_root); + hbg_dbgfs_root = NULL; +} diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.h new file mode 100644 index 000000000000..80670d66bbeb --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2024 Hisilicon Limited. */ + +#ifndef __HBG_DEBUGFS_H +#define __HBG_DEBUGFS_H + +void hbg_debugfs_register(void); +void hbg_debugfs_unregister(void); + +void hbg_debugfs_init(struct hbg_priv *priv); + +#endif diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c index 75505fb5cc4a..7a03fdfa32a7 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c @@ -11,6 +11,7 @@ #include "hbg_irq.h" #include "hbg_mdio.h" #include "hbg_txrx.h" +#include "hbg_debugfs.h" static void hbg_change_mtu(struct hbg_priv *priv, int new_mtu); @@ -160,7 +161,12 @@ static int hbg_init(struct hbg_priv *priv) if (ret) return ret; - return hbg_mdio_init(priv); + ret = hbg_mdio_init(priv); + if (ret) + return ret; + + hbg_debugfs_init(priv); + return 0; } static int hbg_pci_init(struct pci_dev *pdev) @@ -245,7 +251,26 @@ static struct pci_driver hbg_driver = { .id_table = hbg_pci_tbl, .probe = hbg_probe, }; -module_pci_driver(hbg_driver); + +static int __init hbg_module_init(void) +{ + int ret; + + hbg_debugfs_register(); + ret = pci_register_driver(&hbg_driver); + if (ret) + hbg_debugfs_unregister(); + + return ret; +} +module_init(hbg_module_init); + +static void __exit hbg_module_exit(void) +{ + pci_unregister_driver(&hbg_driver); + hbg_debugfs_unregister(); +} +module_exit(hbg_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Huawei Tech. Co., Ltd."); From df491c419bcb37af6e250d15a872218673141fb2 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Mon, 16 Dec 2024 12:05:27 +0800 Subject: [PATCH 0488/1386] net: hibmcge: Add irq_info file to debugfs the driver requested three interrupts: "tx", "rx", "err". The err interrupt is a summary interrupt. We distinguish different errors based on the status register and mask. With "cat /proc/interrupts | grep hibmcge", we can't distinguish the detailed cause of the error, so we added this file to debugfs. the following effects are achieved: [root@localhost sjj]# cat /sys/kernel/debug/hibmcge/0000\:83\:00.1/irq_info RX : enabled: true , logged: false, count: 0 TX : enabled: true , logged: false, count: 0 MAC_MII_FIFO_ERR : enabled: false, logged: true , count: 0 MAC_PCS_RX_FIFO_ERR : enabled: false, logged: true , count: 0 MAC_PCS_TX_FIFO_ERR : enabled: false, logged: true , count: 0 MAC_APP_RX_FIFO_ERR : enabled: false, logged: true , count: 0 MAC_APP_TX_FIFO_ERR : enabled: false, logged: true , count: 0 SRAM_PARITY_ERR : enabled: true , logged: true , count: 0 TX_AHB_ERR : enabled: true , logged: true , count: 0 RX_BUF_AVL : enabled: true , logged: false, count: 0 REL_BUF_ERR : enabled: true , logged: true , count: 0 TXCFG_AVL : enabled: true , logged: false, count: 0 TX_DROP : enabled: true , logged: false, count: 0 RX_DROP : enabled: true , logged: false, count: 0 RX_AHB_ERR : enabled: true , logged: true , count: 0 MAC_FIFO_ERR : enabled: true , logged: false, count: 0 RBREQ_ERR : enabled: true , logged: false, count: 0 WE_ERR : enabled: true , logged: false, count: 0 The irq framework of hibmcge driver also includes tx/rx interrupts. Therefore, TX and RX are not moved separately form this file. Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241216040532.1566229-3-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../ethernet/hisilicon/hibmcge/hbg_debugfs.c | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c index 773a6434b114..56d8599563c0 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c @@ -55,9 +55,31 @@ static int hbg_dbg_rx_ring(struct seq_file *s, void *unused) return 0; } +static int hbg_dbg_irq_info(struct seq_file *s, void *unused) +{ + struct net_device *netdev = dev_get_drvdata(s->private); + struct hbg_priv *priv = netdev_priv(netdev); + struct hbg_irq_info *info; + u32 i; + + for (i = 0; i < priv->vectors.info_array_len; i++) { + info = &priv->vectors.info_array[i]; + seq_printf(s, + "%-20s: enabled: %-5s, logged: %-5s, count: %llu\n", + info->name, + str_true_false(hbg_hw_irq_is_enabled(priv, + info->mask)), + str_true_false(info->need_print), + info->count); + } + + return 0; +} + static const struct hbg_dbg_info hbg_dbg_infos[] = { { "tx_ring", hbg_dbg_tx_ring }, { "rx_ring", hbg_dbg_rx_ring }, + { "irq_info", hbg_dbg_irq_info }, }; static void hbg_debugfs_uninit(void *data) From 37b367d60d0f91260cd787ffbfba8e71e8f6fc7c Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Mon, 16 Dec 2024 12:05:28 +0800 Subject: [PATCH 0489/1386] net: hibmcge: Add unicast frame filter supported in this module MAC supports filtering unmatched unicast packets according to the MAC address table. This patch adds the support for unicast frame filtering. To support automatic restoration of MAC entries after reset, the driver saves a copy of MAC entries in the driver. Signed-off-by: Jijie Shao Reviewed-by: Hariprasad Kelam Link: https://patch.msgid.link/20241216040532.1566229-4-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../ethernet/hisilicon/hibmcge/hbg_common.h | 12 ++ .../ethernet/hisilicon/hibmcge/hbg_debugfs.c | 22 +++ .../net/ethernet/hisilicon/hibmcge/hbg_hw.c | 17 ++- .../net/ethernet/hisilicon/hibmcge/hbg_hw.h | 3 +- .../net/ethernet/hisilicon/hibmcge/hbg_main.c | 136 +++++++++++++++++- .../net/ethernet/hisilicon/hibmcge/hbg_reg.h | 3 + 6 files changed, 187 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h index 96daf058d387..9bb3abe88377 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h @@ -84,6 +84,7 @@ struct hbg_dev_specs { u32 vlan_layers; u32 max_mtu; u32 min_mtu; + u32 uc_mac_num; u32 max_frame_len; u32 rx_buf_size; @@ -116,6 +117,16 @@ struct hbg_mac { u32 link_status; }; +struct hbg_mac_table_entry { + u8 addr[ETH_ALEN]; +}; + +struct hbg_mac_filter { + struct hbg_mac_table_entry *mac_table; + u32 table_max_len; + bool enabled; +}; + struct hbg_priv { struct net_device *netdev; struct pci_dev *pdev; @@ -126,6 +137,7 @@ struct hbg_priv { struct hbg_vector vectors; struct hbg_ring tx_ring; struct hbg_ring rx_ring; + struct hbg_mac_filter filter; }; #endif diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c index 56d8599563c0..616b86333eec 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c @@ -76,10 +76,32 @@ static int hbg_dbg_irq_info(struct seq_file *s, void *unused) return 0; } +static int hbg_dbg_mac_table(struct seq_file *s, void *unused) +{ + struct net_device *netdev = dev_get_drvdata(s->private); + struct hbg_priv *priv = netdev_priv(netdev); + struct hbg_mac_filter *filter; + u32 i; + + filter = &priv->filter; + seq_printf(s, "mac addr max count: %u\n", filter->table_max_len); + seq_printf(s, "filter enabled: %s\n", str_true_false(filter->enabled)); + + for (i = 0; i < filter->table_max_len; i++) { + if (is_zero_ether_addr(filter->mac_table[i].addr)) + continue; + + seq_printf(s, "[%u] %pM\n", i, filter->mac_table[i].addr); + } + + return 0; +} + static const struct hbg_dbg_info hbg_dbg_infos[] = { { "tx_ring", hbg_dbg_tx_ring }, { "rx_ring", hbg_dbg_rx_ring }, { "irq_info", hbg_dbg_irq_info }, + { "mac_table", hbg_dbg_mac_table }, }; static void hbg_debugfs_uninit(void *data) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index 05295c2ad439..29d66a0ea0a6 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -67,6 +67,8 @@ static int hbg_hw_dev_specs_init(struct hbg_priv *priv) specs->vlan_layers = hbg_reg_read(priv, HBG_REG_VLAN_LAYERS_ADDR); specs->rx_fifo_num = hbg_reg_read(priv, HBG_REG_RX_FIFO_NUM_ADDR); specs->tx_fifo_num = hbg_reg_read(priv, HBG_REG_TX_FIFO_NUM_ADDR); + specs->uc_mac_num = hbg_reg_read(priv, HBG_REG_UC_MAC_NUM_ADDR); + mac_addr = hbg_reg_read64(priv, HBG_REG_MAC_ADDR_ADDR); u64_to_ether_addr(mac_addr, (u8 *)specs->mac_addr.sa_data); @@ -135,9 +137,13 @@ void hbg_hw_irq_enable(struct hbg_priv *priv, u32 mask, bool enable) hbg_reg_write(priv, HBG_REG_CF_INTRPT_MSK_ADDR, value); } -void hbg_hw_set_uc_addr(struct hbg_priv *priv, u64 mac_addr) +void hbg_hw_set_uc_addr(struct hbg_priv *priv, u64 mac_addr, u32 index) { - hbg_reg_write64(priv, HBG_REG_STATION_ADDR_LOW_2_ADDR, mac_addr); + u32 addr; + + /* mac addr is u64, so the addr offset is 0x8 */ + addr = HBG_REG_STATION_ADDR_LOW_2_ADDR + (index * 0x8); + hbg_reg_write64(priv, addr, mac_addr); } static void hbg_hw_set_pcu_max_frame_len(struct hbg_priv *priv, @@ -207,6 +213,13 @@ void hbg_hw_adjust_link(struct hbg_priv *priv, u32 speed, u32 duplex) HBG_REG_DUPLEX_B, duplex); } +/* only support uc filter */ +void hbg_hw_set_mac_filter_enable(struct hbg_priv *priv, u32 enable) +{ + hbg_reg_write_field(priv, HBG_REG_REC_FILT_CTRL_ADDR, + HBG_REG_REC_FILT_CTRL_UC_MATCH_EN_B, enable); +} + static void hbg_hw_init_transmit_ctrl(struct hbg_priv *priv) { u32 ctrl = 0; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.h index 14fb39241c93..6eb4b7d2cba8 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.h @@ -51,9 +51,10 @@ bool hbg_hw_irq_is_enabled(struct hbg_priv *priv, u32 mask); void hbg_hw_irq_enable(struct hbg_priv *priv, u32 mask, bool enable); void hbg_hw_set_mtu(struct hbg_priv *priv, u16 mtu); void hbg_hw_mac_enable(struct hbg_priv *priv, u32 enable); -void hbg_hw_set_uc_addr(struct hbg_priv *priv, u64 mac_addr); +void hbg_hw_set_uc_addr(struct hbg_priv *priv, u64 mac_addr, u32 index); u32 hbg_hw_get_fifo_used_num(struct hbg_priv *priv, enum hbg_dir dir); void hbg_hw_set_tx_desc(struct hbg_priv *priv, struct hbg_tx_desc *tx_desc); void hbg_hw_fill_buffer(struct hbg_priv *priv, u32 buffer_dma_addr); +void hbg_hw_set_mac_filter_enable(struct hbg_priv *priv, u32 enable); #endif diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c index 7a03fdfa32a7..578ba8ee409b 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c @@ -57,7 +57,7 @@ static int hbg_hw_txrx_clear(struct hbg_priv *priv) /* After reset, regs need to be reconfigured */ hbg_hw_init(priv); - hbg_hw_set_uc_addr(priv, ether_addr_to_u64(priv->netdev->dev_addr)); + hbg_hw_set_uc_addr(priv, ether_addr_to_u64(priv->netdev->dev_addr), 0); hbg_change_mtu(priv, priv->netdev->mtu); return 0; @@ -75,19 +75,123 @@ static int hbg_net_stop(struct net_device *netdev) return hbg_hw_txrx_clear(priv); } +static void hbg_update_promisc_mode(struct net_device *netdev, bool overflow) +{ + struct hbg_priv *priv = netdev_priv(netdev); + + /* Only when not table_overflow, and netdev->flags not set IFF_PROMISC, + * The MAC filter will be enabled. + * Otherwise the filter will be disabled. + */ + priv->filter.enabled = !(overflow || (netdev->flags & IFF_PROMISC)); + hbg_hw_set_mac_filter_enable(priv, priv->filter.enabled); +} + +static void hbg_set_mac_to_mac_table(struct hbg_priv *priv, + u32 index, const u8 *addr) +{ + if (addr) { + ether_addr_copy(priv->filter.mac_table[index].addr, addr); + hbg_hw_set_uc_addr(priv, ether_addr_to_u64(addr), index); + } else { + eth_zero_addr(priv->filter.mac_table[index].addr); + hbg_hw_set_uc_addr(priv, 0, index); + } +} + +static int hbg_get_index_from_mac_table(struct hbg_priv *priv, + const u8 *addr, u32 *index) +{ + u32 i; + + for (i = 0; i < priv->filter.table_max_len; i++) + if (ether_addr_equal(priv->filter.mac_table[i].addr, addr)) { + *index = i; + return 0; + } + + return -EINVAL; +} + +static int hbg_add_mac_to_filter(struct hbg_priv *priv, const u8 *addr) +{ + u32 index; + + /* already exists */ + if (!hbg_get_index_from_mac_table(priv, addr, &index)) + return 0; + + for (index = 0; index < priv->filter.table_max_len; index++) + if (is_zero_ether_addr(priv->filter.mac_table[index].addr)) { + hbg_set_mac_to_mac_table(priv, index, addr); + return 0; + } + + return -ENOSPC; +} + +static void hbg_del_mac_from_filter(struct hbg_priv *priv, const u8 *addr) +{ + u32 index; + + /* not exists */ + if (hbg_get_index_from_mac_table(priv, addr, &index)) + return; + + hbg_set_mac_to_mac_table(priv, index, NULL); +} + +static int hbg_uc_sync(struct net_device *netdev, const unsigned char *addr) +{ + struct hbg_priv *priv = netdev_priv(netdev); + + return hbg_add_mac_to_filter(priv, addr); +} + +static int hbg_uc_unsync(struct net_device *netdev, const unsigned char *addr) +{ + struct hbg_priv *priv = netdev_priv(netdev); + + if (ether_addr_equal(netdev->dev_addr, (u8 *)addr)) + return 0; + + hbg_del_mac_from_filter(priv, addr); + return 0; +} + +static void hbg_net_set_rx_mode(struct net_device *netdev) +{ + int ret; + + ret = __dev_uc_sync(netdev, hbg_uc_sync, hbg_uc_unsync); + + /* If ret != 0, overflow has occurred */ + hbg_update_promisc_mode(netdev, !!ret); +} + static int hbg_net_set_mac_address(struct net_device *netdev, void *addr) { struct hbg_priv *priv = netdev_priv(netdev); u8 *mac_addr; + bool exists; + u32 index; mac_addr = ((struct sockaddr *)addr)->sa_data; if (!is_valid_ether_addr(mac_addr)) return -EADDRNOTAVAIL; - hbg_hw_set_uc_addr(priv, ether_addr_to_u64(mac_addr)); - dev_addr_set(netdev, mac_addr); + /* The index of host mac is always 0. + * If new mac address already exists, + * delete the existing mac address and + * add it to the position with index 0. + */ + exists = !hbg_get_index_from_mac_table(priv, mac_addr, &index); + hbg_set_mac_to_mac_table(priv, 0, mac_addr); + if (exists) + hbg_set_mac_to_mac_table(priv, index, NULL); + dev_addr_set(netdev, mac_addr); return 0; } @@ -143,8 +247,28 @@ static const struct net_device_ops hbg_netdev_ops = { .ndo_set_mac_address = hbg_net_set_mac_address, .ndo_change_mtu = hbg_net_change_mtu, .ndo_tx_timeout = hbg_net_tx_timeout, + .ndo_set_rx_mode = hbg_net_set_rx_mode, }; +static int hbg_mac_filter_init(struct hbg_priv *priv) +{ + struct hbg_dev_specs *dev_specs = &priv->dev_specs; + struct hbg_mac_filter *filter = &priv->filter; + struct hbg_mac_table_entry *tmp_table; + + tmp_table = devm_kcalloc(&priv->pdev->dev, dev_specs->uc_mac_num, + sizeof(*tmp_table), GFP_KERNEL); + if (!tmp_table) + return -ENOMEM; + + filter->mac_table = tmp_table; + filter->table_max_len = dev_specs->uc_mac_num; + filter->enabled = true; + + hbg_hw_set_mac_filter_enable(priv, filter->enabled); + return 0; +} + static int hbg_init(struct hbg_priv *priv) { int ret; @@ -165,6 +289,10 @@ static int hbg_init(struct hbg_priv *priv) if (ret) return ret; + ret = hbg_mac_filter_init(priv); + if (ret) + return ret; + hbg_debugfs_init(priv); return 0; } @@ -222,6 +350,8 @@ static int hbg_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) return ret; + netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; netdev->max_mtu = priv->dev_specs.max_mtu; netdev->min_mtu = priv->dev_specs.min_mtu; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h index 57d81c6d7633..8993f57ecea4 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h @@ -10,6 +10,7 @@ #define HBG_REG_MAC_ID_ADDR 0x0008 #define HBG_REG_PHY_ID_ADDR 0x000C #define HBG_REG_MAC_ADDR_ADDR 0x0010 +#define HBG_REG_UC_MAC_NUM_ADDR 0x0018 #define HBG_REG_MDIO_FREQ_ADDR 0x0024 #define HBG_REG_MAX_MTU_ADDR 0x0028 #define HBG_REG_MIN_MTU_ADDR 0x002C @@ -47,6 +48,8 @@ #define HBG_REG_TRANSMIT_CTRL_PAD_EN_B BIT(7) #define HBG_REG_TRANSMIT_CTRL_CRC_ADD_B BIT(6) #define HBG_REG_TRANSMIT_CTRL_AN_EN_B BIT(5) +#define HBG_REG_REC_FILT_CTRL_ADDR (HBG_REG_SGMII_BASE + 0x0064) +#define HBG_REG_REC_FILT_CTRL_UC_MATCH_EN_B BIT(0) #define HBG_REG_CF_CRC_STRIP_ADDR (HBG_REG_SGMII_BASE + 0x01B0) #define HBG_REG_CF_CRC_STRIP_B BIT(0) #define HBG_REG_MODE_CHANGE_EN_ADDR (HBG_REG_SGMII_BASE + 0x01B4) From 51574da8dce3c08f388893d727292364a1db8cc0 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Mon, 16 Dec 2024 12:05:29 +0800 Subject: [PATCH 0490/1386] net: hibmcge: Add register dump supported in this module The dump register is an effective way to analyze problems. To ensure code flexibility, each register contains the type, offset, and value information. The ethtool does the pretty print based on these information. The driver can dynamically add or delete registers that need to be dumped in the future because information such as type and offset is contained. ethtool always can do pretty print. With the ethtool of a specific version, the following effects are achieved: [root@localhost sjj]# ./ethtool -d enp131s0f1 [SPEC] VALID [0x0000]: 0x00000001 [SPEC] EVENT_REQ [0x0004]: 0x00000000 [SPEC] MAC_ID [0x0008]: 0x00000002 [SPEC] PHY_ADDR [0x000c]: 0x00000002 [SPEC] MAC_ADDR_L [0x0010]: 0x00000808 [SPEC] MAC_ADDR_H [0x0014]: 0x08080802 [SPEC] UC_MAX_NUM [0x0018]: 0x00000004 [SPEC] MAX_MTU [0x0028]: 0x00000fc2 [SPEC] MIN_MTU [0x002c]: 0x00000100 [SPEC] TX_FIFO_NUM [0x0030]: 0x00000040 [SPEC] RX_FIFO_NUM [0x0034]: 0x0000007f [SPEC] VLAN_LAYERS [0x0038]: 0x00000002 [MDIO] COMMAND_REG [0x0000]: 0x0000185f [MDIO] ADDR_REG [0x0004]: 0x00000000 [MDIO] WDATA_REG [0x0008]: 0x0000a000 [MDIO] RDATA_REG [0x000c]: 0x00000000 [MDIO] STA_REG [0x0010]: 0x00000000 [GMAC] DUPLEX_TYPE [0x0008]: 0x00000001 [GMAC] FD_FC_TYPE [0x000c]: 0x00008808 [GMAC] FC_TX_TIMER [0x001c]: 0x000000ff [GMAC] FD_FC_ADDR_LOW [0x0020]: 0xc2000001 [GMAC] FD_FC_ADDR_HIGH [0x0024]: 0x00000180 [GMAC] MAX_FRM_SIZE [0x003c]: 0x000005f6 [GMAC] PORT_MODE [0x0040]: 0x00000002 [GMAC] PORT_EN [0x0044]: 0x00000006 ... Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241216040532.1566229-5-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../ethernet/hisilicon/hibmcge/hbg_ethtool.c | 140 ++++++++++++++++++ .../net/ethernet/hisilicon/hibmcge/hbg_reg.h | 34 +++++ 2 files changed, 174 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c index c3370114aef3..e7f169d2abb7 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c @@ -3,12 +3,152 @@ #include #include +#include "hbg_common.h" #include "hbg_ethtool.h" +#include "hbg_hw.h" + +enum hbg_reg_dump_type { + HBG_DUMP_REG_TYPE_SPEC = 0, + HBG_DUMP_REG_TYPE_MDIO, + HBG_DUMP_REG_TYPE_GMAC, + HBG_DUMP_REG_TYPE_PCU, +}; + +struct hbg_reg_info { + u32 type; + u32 offset; + u32 val; +}; + +#define HBG_DUMP_SPEC_I(offset) {HBG_DUMP_REG_TYPE_SPEC, offset, 0} +#define HBG_DUMP_MDIO_I(offset) {HBG_DUMP_REG_TYPE_MDIO, offset, 0} +#define HBG_DUMP_GMAC_I(offset) {HBG_DUMP_REG_TYPE_GMAC, offset, 0} +#define HBG_DUMP_PCU_I(offset) {HBG_DUMP_REG_TYPE_PCU, offset, 0} + +static const struct hbg_reg_info hbg_dump_reg_infos[] = { + /* dev specs */ + HBG_DUMP_SPEC_I(HBG_REG_SPEC_VALID_ADDR), + HBG_DUMP_SPEC_I(HBG_REG_EVENT_REQ_ADDR), + HBG_DUMP_SPEC_I(HBG_REG_MAC_ID_ADDR), + HBG_DUMP_SPEC_I(HBG_REG_PHY_ID_ADDR), + HBG_DUMP_SPEC_I(HBG_REG_MAC_ADDR_ADDR), + HBG_DUMP_SPEC_I(HBG_REG_MAC_ADDR_HIGH_ADDR), + HBG_DUMP_SPEC_I(HBG_REG_UC_MAC_NUM_ADDR), + HBG_DUMP_SPEC_I(HBG_REG_MDIO_FREQ_ADDR), + HBG_DUMP_SPEC_I(HBG_REG_MAX_MTU_ADDR), + HBG_DUMP_SPEC_I(HBG_REG_MIN_MTU_ADDR), + HBG_DUMP_SPEC_I(HBG_REG_TX_FIFO_NUM_ADDR), + HBG_DUMP_SPEC_I(HBG_REG_RX_FIFO_NUM_ADDR), + HBG_DUMP_SPEC_I(HBG_REG_VLAN_LAYERS_ADDR), + + /* mdio */ + HBG_DUMP_MDIO_I(HBG_REG_MDIO_COMMAND_ADDR), + HBG_DUMP_MDIO_I(HBG_REG_MDIO_ADDR_ADDR), + HBG_DUMP_MDIO_I(HBG_REG_MDIO_WDATA_ADDR), + HBG_DUMP_MDIO_I(HBG_REG_MDIO_RDATA_ADDR), + HBG_DUMP_MDIO_I(HBG_REG_MDIO_STA_ADDR), + + /* gmac */ + HBG_DUMP_GMAC_I(HBG_REG_DUPLEX_TYPE_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_FD_FC_TYPE_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_FC_TX_TIMER_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_FD_FC_ADDR_LOW_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_FD_FC_ADDR_HIGH_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_MAX_FRAME_SIZE_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_PORT_MODE_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_PORT_ENABLE_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_PAUSE_ENABLE_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_AN_NEG_STATE_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_TRANSMIT_CTRL_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_REC_FILT_CTRL_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_LINE_LOOP_BACK_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_CF_CRC_STRIP_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_MODE_CHANGE_EN_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_LOOP_REG_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_RECV_CTRL_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_VLAN_CODE_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_STATION_ADDR_LOW_0_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_STATION_ADDR_HIGH_0_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_STATION_ADDR_LOW_1_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_STATION_ADDR_HIGH_1_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_STATION_ADDR_LOW_2_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_STATION_ADDR_HIGH_2_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_STATION_ADDR_LOW_3_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_STATION_ADDR_HIGH_3_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_STATION_ADDR_LOW_4_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_STATION_ADDR_HIGH_4_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_STATION_ADDR_LOW_5_ADDR), + HBG_DUMP_GMAC_I(HBG_REG_STATION_ADDR_HIGH_5_ADDR), + + /* pcu */ + HBG_DUMP_PCU_I(HBG_REG_TX_FIFO_THRSLD_ADDR), + HBG_DUMP_PCU_I(HBG_REG_RX_FIFO_THRSLD_ADDR), + HBG_DUMP_PCU_I(HBG_REG_CFG_FIFO_THRSLD_ADDR), + HBG_DUMP_PCU_I(HBG_REG_CF_INTRPT_MSK_ADDR), + HBG_DUMP_PCU_I(HBG_REG_CF_INTRPT_STAT_ADDR), + HBG_DUMP_PCU_I(HBG_REG_CF_INTRPT_CLR_ADDR), + HBG_DUMP_PCU_I(HBG_REG_TX_BUS_ERR_ADDR_ADDR), + HBG_DUMP_PCU_I(HBG_REG_RX_BUS_ERR_ADDR_ADDR), + HBG_DUMP_PCU_I(HBG_REG_MAX_FRAME_LEN_ADDR), + HBG_DUMP_PCU_I(HBG_REG_DEBUG_ST_MCH_ADDR), + HBG_DUMP_PCU_I(HBG_REG_FIFO_CURR_STATUS_ADDR), + HBG_DUMP_PCU_I(HBG_REG_FIFO_HIST_STATUS_ADDR), + HBG_DUMP_PCU_I(HBG_REG_CF_CFF_DATA_NUM_ADDR), + HBG_DUMP_PCU_I(HBG_REG_CF_TX_PAUSE_ADDR), + HBG_DUMP_PCU_I(HBG_REG_RX_CFF_ADDR_ADDR), + HBG_DUMP_PCU_I(HBG_REG_RX_BUF_SIZE_ADDR), + HBG_DUMP_PCU_I(HBG_REG_BUS_CTRL_ADDR), + HBG_DUMP_PCU_I(HBG_REG_RX_CTRL_ADDR), + HBG_DUMP_PCU_I(HBG_REG_RX_PKT_MODE_ADDR), + HBG_DUMP_PCU_I(HBG_REG_DBG_ST0_ADDR), + HBG_DUMP_PCU_I(HBG_REG_DBG_ST1_ADDR), + HBG_DUMP_PCU_I(HBG_REG_DBG_ST2_ADDR), + HBG_DUMP_PCU_I(HBG_REG_BUS_RST_EN_ADDR), + HBG_DUMP_PCU_I(HBG_REG_CF_IND_TXINT_MSK_ADDR), + HBG_DUMP_PCU_I(HBG_REG_CF_IND_TXINT_STAT_ADDR), + HBG_DUMP_PCU_I(HBG_REG_CF_IND_TXINT_CLR_ADDR), + HBG_DUMP_PCU_I(HBG_REG_CF_IND_RXINT_MSK_ADDR), + HBG_DUMP_PCU_I(HBG_REG_CF_IND_RXINT_STAT_ADDR), + HBG_DUMP_PCU_I(HBG_REG_CF_IND_RXINT_CLR_ADDR), +}; + +static const u32 hbg_dump_type_base_array[] = { + [HBG_DUMP_REG_TYPE_SPEC] = 0, + [HBG_DUMP_REG_TYPE_MDIO] = HBG_REG_MDIO_BASE, + [HBG_DUMP_REG_TYPE_GMAC] = HBG_REG_SGMII_BASE, + [HBG_DUMP_REG_TYPE_PCU] = HBG_REG_SGMII_BASE, +}; + +static int hbg_ethtool_get_regs_len(struct net_device *netdev) +{ + return ARRAY_SIZE(hbg_dump_reg_infos) * sizeof(struct hbg_reg_info); +} + +static void hbg_ethtool_get_regs(struct net_device *netdev, + struct ethtool_regs *regs, void *data) +{ + struct hbg_priv *priv = netdev_priv(netdev); + struct hbg_reg_info *info; + u32 i, offset = 0; + + regs->version = 0; + for (i = 0; i < ARRAY_SIZE(hbg_dump_reg_infos); i++) { + info = data + offset; + + *info = hbg_dump_reg_infos[i]; + info->val = hbg_reg_read(priv, info->offset); + info->offset -= hbg_dump_type_base_array[info->type]; + + offset += sizeof(*info); + } +} static const struct ethtool_ops hbg_ethtool_ops = { .get_link = ethtool_op_get_link, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_regs_len = hbg_ethtool_get_regs_len, + .get_regs = hbg_ethtool_get_regs, }; void hbg_ethtool_set_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h index 8993f57ecea4..665666712c7c 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h @@ -10,6 +10,7 @@ #define HBG_REG_MAC_ID_ADDR 0x0008 #define HBG_REG_PHY_ID_ADDR 0x000C #define HBG_REG_MAC_ADDR_ADDR 0x0010 +#define HBG_REG_MAC_ADDR_HIGH_ADDR 0x0014 #define HBG_REG_UC_MAC_NUM_ADDR 0x0018 #define HBG_REG_MDIO_FREQ_ADDR 0x0024 #define HBG_REG_MAX_MTU_ADDR 0x0028 @@ -29,6 +30,7 @@ #define HBG_REG_MDIO_COMMAND_OP_M GENMASK(11, 10) #define HBG_REG_MDIO_COMMAND_PRTAD_M GENMASK(9, 5) #define HBG_REG_MDIO_COMMAND_DEVAD_M GENMASK(4, 0) +#define HBG_REG_MDIO_ADDR_ADDR (HBG_REG_MDIO_BASE + 0x0004) #define HBG_REG_MDIO_WDATA_ADDR (HBG_REG_MDIO_BASE + 0x0008) #define HBG_REG_MDIO_WDATA_M GENMASK(15, 0) #define HBG_REG_MDIO_RDATA_ADDR (HBG_REG_MDIO_BASE + 0x000C) @@ -37,6 +39,10 @@ /* GMAC */ #define HBG_REG_SGMII_BASE 0x10000 #define HBG_REG_DUPLEX_TYPE_ADDR (HBG_REG_SGMII_BASE + 0x0008) +#define HBG_REG_FD_FC_TYPE_ADDR (HBG_REG_SGMII_BASE + 0x000C) +#define HBG_REG_FC_TX_TIMER_ADDR (HBG_REG_SGMII_BASE + 0x001C) +#define HBG_REG_FD_FC_ADDR_LOW_ADDR (HBG_REG_SGMII_BASE + 0x0020) +#define HBG_REG_FD_FC_ADDR_HIGH_ADDR (HBG_REG_SGMII_BASE + 0x0024) #define HBG_REG_DUPLEX_B BIT(0) #define HBG_REG_MAX_FRAME_SIZE_ADDR (HBG_REG_SGMII_BASE + 0x003C) #define HBG_REG_PORT_MODE_ADDR (HBG_REG_SGMII_BASE + 0x0040) @@ -44,22 +50,40 @@ #define HBG_REG_PORT_ENABLE_ADDR (HBG_REG_SGMII_BASE + 0x0044) #define HBG_REG_PORT_ENABLE_RX_B BIT(1) #define HBG_REG_PORT_ENABLE_TX_B BIT(2) +#define HBG_REG_PAUSE_ENABLE_ADDR (HBG_REG_SGMII_BASE + 0x0048) +#define HBG_REG_AN_NEG_STATE_ADDR (HBG_REG_SGMII_BASE + 0x0058) #define HBG_REG_TRANSMIT_CTRL_ADDR (HBG_REG_SGMII_BASE + 0x0060) #define HBG_REG_TRANSMIT_CTRL_PAD_EN_B BIT(7) #define HBG_REG_TRANSMIT_CTRL_CRC_ADD_B BIT(6) #define HBG_REG_TRANSMIT_CTRL_AN_EN_B BIT(5) #define HBG_REG_REC_FILT_CTRL_ADDR (HBG_REG_SGMII_BASE + 0x0064) #define HBG_REG_REC_FILT_CTRL_UC_MATCH_EN_B BIT(0) +#define HBG_REG_LINE_LOOP_BACK_ADDR (HBG_REG_SGMII_BASE + 0x01A8) #define HBG_REG_CF_CRC_STRIP_ADDR (HBG_REG_SGMII_BASE + 0x01B0) #define HBG_REG_CF_CRC_STRIP_B BIT(0) #define HBG_REG_MODE_CHANGE_EN_ADDR (HBG_REG_SGMII_BASE + 0x01B4) #define HBG_REG_MODE_CHANGE_EN_B BIT(0) +#define HBG_REG_LOOP_REG_ADDR (HBG_REG_SGMII_BASE + 0x01DC) #define HBG_REG_RECV_CTRL_ADDR (HBG_REG_SGMII_BASE + 0x01E0) #define HBG_REG_RECV_CTRL_STRIP_PAD_EN_B BIT(3) +#define HBG_REG_VLAN_CODE_ADDR (HBG_REG_SGMII_BASE + 0x01E8) +#define HBG_REG_STATION_ADDR_LOW_0_ADDR (HBG_REG_SGMII_BASE + 0x0200) +#define HBG_REG_STATION_ADDR_HIGH_0_ADDR (HBG_REG_SGMII_BASE + 0x0204) +#define HBG_REG_STATION_ADDR_LOW_1_ADDR (HBG_REG_SGMII_BASE + 0x0208) +#define HBG_REG_STATION_ADDR_HIGH_1_ADDR (HBG_REG_SGMII_BASE + 0x020C) #define HBG_REG_STATION_ADDR_LOW_2_ADDR (HBG_REG_SGMII_BASE + 0x0210) #define HBG_REG_STATION_ADDR_HIGH_2_ADDR (HBG_REG_SGMII_BASE + 0x0214) +#define HBG_REG_STATION_ADDR_LOW_3_ADDR (HBG_REG_SGMII_BASE + 0x0218) +#define HBG_REG_STATION_ADDR_HIGH_3_ADDR (HBG_REG_SGMII_BASE + 0x021C) +#define HBG_REG_STATION_ADDR_LOW_4_ADDR (HBG_REG_SGMII_BASE + 0x0220) +#define HBG_REG_STATION_ADDR_HIGH_4_ADDR (HBG_REG_SGMII_BASE + 0x0224) +#define HBG_REG_STATION_ADDR_LOW_5_ADDR (HBG_REG_SGMII_BASE + 0x0228) +#define HBG_REG_STATION_ADDR_HIGH_5_ADDR (HBG_REG_SGMII_BASE + 0x022C) /* PCU */ +#define HBG_REG_TX_FIFO_THRSLD_ADDR (HBG_REG_SGMII_BASE + 0x0420) +#define HBG_REG_RX_FIFO_THRSLD_ADDR (HBG_REG_SGMII_BASE + 0x0424) +#define HBG_REG_CFG_FIFO_THRSLD_ADDR (HBG_REG_SGMII_BASE + 0x0428) #define HBG_REG_CF_INTRPT_MSK_ADDR (HBG_REG_SGMII_BASE + 0x042C) #define HBG_INT_MSK_WE_ERR_B BIT(31) #define HBG_INT_MSK_RBREQ_ERR_B BIT(30) @@ -81,11 +105,17 @@ #define HBG_INT_MSK_RX_B BIT(0) /* just used in driver */ #define HBG_REG_CF_INTRPT_STAT_ADDR (HBG_REG_SGMII_BASE + 0x0434) #define HBG_REG_CF_INTRPT_CLR_ADDR (HBG_REG_SGMII_BASE + 0x0438) +#define HBG_REG_TX_BUS_ERR_ADDR_ADDR (HBG_REG_SGMII_BASE + 0x043C) +#define HBG_REG_RX_BUS_ERR_ADDR_ADDR (HBG_REG_SGMII_BASE + 0x0440) #define HBG_REG_MAX_FRAME_LEN_ADDR (HBG_REG_SGMII_BASE + 0x0444) #define HBG_REG_MAX_FRAME_LEN_M GENMASK(15, 0) +#define HBG_REG_DEBUG_ST_MCH_ADDR (HBG_REG_SGMII_BASE + 0x0450) +#define HBG_REG_FIFO_CURR_STATUS_ADDR (HBG_REG_SGMII_BASE + 0x0454) +#define HBG_REG_FIFO_HIST_STATUS_ADDR (HBG_REG_SGMII_BASE + 0x0458) #define HBG_REG_CF_CFF_DATA_NUM_ADDR (HBG_REG_SGMII_BASE + 0x045C) #define HBG_REG_CF_CFF_DATA_NUM_ADDR_TX_M GENMASK(8, 0) #define HBG_REG_CF_CFF_DATA_NUM_ADDR_RX_M GENMASK(24, 16) +#define HBG_REG_CF_TX_PAUSE_ADDR (HBG_REG_SGMII_BASE + 0x0470) #define HBG_REG_TX_CFF_ADDR_0_ADDR (HBG_REG_SGMII_BASE + 0x0488) #define HBG_REG_TX_CFF_ADDR_1_ADDR (HBG_REG_SGMII_BASE + 0x048C) #define HBG_REG_TX_CFF_ADDR_2_ADDR (HBG_REG_SGMII_BASE + 0x0490) @@ -104,6 +134,10 @@ #define HBG_REG_RX_CTRL_RXBUF_1ST_SKIP_SIZE2_M GENMASK(3, 0) #define HBG_REG_RX_PKT_MODE_ADDR (HBG_REG_SGMII_BASE + 0x04F4) #define HBG_REG_RX_PKT_MODE_PARSE_MODE_M GENMASK(22, 21) +#define HBG_REG_DBG_ST0_ADDR (HBG_REG_SGMII_BASE + 0x05E4) +#define HBG_REG_DBG_ST1_ADDR (HBG_REG_SGMII_BASE + 0x05E8) +#define HBG_REG_DBG_ST2_ADDR (HBG_REG_SGMII_BASE + 0x05EC) +#define HBG_REG_BUS_RST_EN_ADDR (HBG_REG_SGMII_BASE + 0x0688) #define HBG_REG_CF_IND_TXINT_MSK_ADDR (HBG_REG_SGMII_BASE + 0x0694) #define HBG_REG_IND_INTR_MASK_B BIT(0) #define HBG_REG_CF_IND_TXINT_STAT_ADDR (HBG_REG_SGMII_BASE + 0x0698) From 3a03763f38769707a4dd0ca44474806fed3a7f81 Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Mon, 16 Dec 2024 12:05:30 +0800 Subject: [PATCH 0491/1386] net: hibmcge: Add pauseparam supported in this module The MAC can automatically send or respond to pause frames. This patch supports the function of enabling pause frames by using ethtool. Signed-off-by: Jijie Shao Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241216040532.1566229-6-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../ethernet/hisilicon/hibmcge/hbg_common.h | 1 + .../ethernet/hisilicon/hibmcge/hbg_ethtool.c | 25 +++++++++++++++++++ .../net/ethernet/hisilicon/hibmcge/hbg_hw.c | 21 ++++++++++++++++ .../net/ethernet/hisilicon/hibmcge/hbg_hw.h | 3 +++ .../net/ethernet/hisilicon/hibmcge/hbg_main.c | 1 + .../net/ethernet/hisilicon/hibmcge/hbg_mdio.c | 15 +++++++++++ .../net/ethernet/hisilicon/hibmcge/hbg_reg.h | 2 ++ 7 files changed, 68 insertions(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h index 9bb3abe88377..cc143a536713 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h @@ -115,6 +115,7 @@ struct hbg_mac { u32 duplex; u32 autoneg; u32 link_status; + u32 pause_autoneg; }; struct hbg_mac_table_entry { diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c index e7f169d2abb7..a821a92db43d 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c @@ -143,12 +143,37 @@ static void hbg_ethtool_get_regs(struct net_device *netdev, } } +static void hbg_ethtool_get_pauseparam(struct net_device *net_dev, + struct ethtool_pauseparam *param) +{ + struct hbg_priv *priv = netdev_priv(net_dev); + + param->autoneg = priv->mac.pause_autoneg; + hbg_hw_get_pause_enable(priv, ¶m->tx_pause, ¶m->rx_pause); +} + +static int hbg_ethtool_set_pauseparam(struct net_device *net_dev, + struct ethtool_pauseparam *param) +{ + struct hbg_priv *priv = netdev_priv(net_dev); + + priv->mac.pause_autoneg = param->autoneg; + phy_set_asym_pause(priv->mac.phydev, param->rx_pause, param->tx_pause); + + if (!param->autoneg) + hbg_hw_set_pause_enable(priv, param->tx_pause, param->rx_pause); + + return 0; +} + static const struct ethtool_ops hbg_ethtool_ops = { .get_link = ethtool_op_get_link, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, .get_regs_len = hbg_ethtool_get_regs_len, .get_regs = hbg_ethtool_get_regs, + .get_pauseparam = hbg_ethtool_get_pauseparam, + .set_pauseparam = hbg_ethtool_set_pauseparam, }; void hbg_ethtool_set_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index 29d66a0ea0a6..0cbe9f7229b3 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -220,6 +220,27 @@ void hbg_hw_set_mac_filter_enable(struct hbg_priv *priv, u32 enable) HBG_REG_REC_FILT_CTRL_UC_MATCH_EN_B, enable); } +void hbg_hw_set_pause_enable(struct hbg_priv *priv, u32 tx_en, u32 rx_en) +{ + hbg_reg_write_field(priv, HBG_REG_PAUSE_ENABLE_ADDR, + HBG_REG_PAUSE_ENABLE_TX_B, tx_en); + hbg_reg_write_field(priv, HBG_REG_PAUSE_ENABLE_ADDR, + HBG_REG_PAUSE_ENABLE_RX_B, rx_en); +} + +void hbg_hw_get_pause_enable(struct hbg_priv *priv, u32 *tx_en, u32 *rx_en) +{ + *tx_en = hbg_reg_read_field(priv, HBG_REG_PAUSE_ENABLE_ADDR, + HBG_REG_PAUSE_ENABLE_TX_B); + *rx_en = hbg_reg_read_field(priv, HBG_REG_PAUSE_ENABLE_ADDR, + HBG_REG_PAUSE_ENABLE_RX_B); +} + +void hbg_hw_set_rx_pause_mac_addr(struct hbg_priv *priv, u64 mac_addr) +{ + hbg_reg_write64(priv, HBG_REG_FD_FC_ADDR_LOW_ADDR, mac_addr); +} + static void hbg_hw_init_transmit_ctrl(struct hbg_priv *priv) { u32 ctrl = 0; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.h index 6eb4b7d2cba8..a4a049b5121d 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.h @@ -56,5 +56,8 @@ u32 hbg_hw_get_fifo_used_num(struct hbg_priv *priv, enum hbg_dir dir); void hbg_hw_set_tx_desc(struct hbg_priv *priv, struct hbg_tx_desc *tx_desc); void hbg_hw_fill_buffer(struct hbg_priv *priv, u32 buffer_dma_addr); void hbg_hw_set_mac_filter_enable(struct hbg_priv *priv, u32 enable); +void hbg_hw_set_pause_enable(struct hbg_priv *priv, u32 tx_en, u32 rx_en); +void hbg_hw_get_pause_enable(struct hbg_priv *priv, u32 *tx_en, u32 *rx_en); +void hbg_hw_set_rx_pause_mac_addr(struct hbg_priv *priv, u64 mac_addr); #endif diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c index 578ba8ee409b..8a2d63c0c196 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c @@ -191,6 +191,7 @@ static int hbg_net_set_mac_address(struct net_device *netdev, void *addr) if (exists) hbg_set_mac_to_mac_table(priv, index, NULL); + hbg_hw_set_rx_pause_mac_addr(priv, ether_addr_to_u64(mac_addr)); dev_addr_set(netdev, mac_addr); return 0; } diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c index a3479fba8501..db6bc4cfb971 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c @@ -114,6 +114,19 @@ static void hbg_mdio_init_hw(struct hbg_priv *priv) hbg_mdio_set_command(mac, cmd); } +static void hbg_flowctrl_cfg(struct hbg_priv *priv) +{ + struct phy_device *phydev = priv->mac.phydev; + bool rx_pause; + bool tx_pause; + + if (!priv->mac.pause_autoneg) + return; + + phy_get_pause(phydev, &tx_pause, &rx_pause); + hbg_hw_set_pause_enable(priv, tx_pause, rx_pause); +} + static void hbg_phy_adjust_link(struct net_device *netdev) { struct hbg_priv *priv = netdev_priv(netdev); @@ -140,6 +153,7 @@ static void hbg_phy_adjust_link(struct net_device *netdev) priv->mac.duplex = phydev->duplex; priv->mac.autoneg = phydev->autoneg; hbg_hw_adjust_link(priv, speed, phydev->duplex); + hbg_flowctrl_cfg(priv); } priv->mac.link_status = phydev->link; @@ -168,6 +182,7 @@ static int hbg_phy_connect(struct hbg_priv *priv) return ret; phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT); + phy_support_asym_pause(phydev); phy_attached_info(phydev); return 0; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h index 665666712c7c..f12efc12f3c5 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_reg.h @@ -51,6 +51,8 @@ #define HBG_REG_PORT_ENABLE_RX_B BIT(1) #define HBG_REG_PORT_ENABLE_TX_B BIT(2) #define HBG_REG_PAUSE_ENABLE_ADDR (HBG_REG_SGMII_BASE + 0x0048) +#define HBG_REG_PAUSE_ENABLE_RX_B BIT(0) +#define HBG_REG_PAUSE_ENABLE_TX_B BIT(1) #define HBG_REG_AN_NEG_STATE_ADDR (HBG_REG_SGMII_BASE + 0x0058) #define HBG_REG_TRANSMIT_CTRL_ADDR (HBG_REG_SGMII_BASE + 0x0060) #define HBG_REG_TRANSMIT_CTRL_PAD_EN_B BIT(7) From 3f5a61f6d504f55ed1a36cce044d5123d508721f Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Mon, 16 Dec 2024 12:05:31 +0800 Subject: [PATCH 0492/1386] net: hibmcge: Add reset supported in this module Sometimes, if the port doesn't work, we can try to fix it by resetting it. This patch supports reset triggered by ethtool or FLR of PCIe, For example: ethtool --reset eth0 dedicated echo 1 > /sys/bus/pci/devices/0000\:83\:00.1/reset We hope that the reset can be performed only when the port is down, and the port cannot be up during the reset. Therefore, the entire reset process is protected by the rtnl lock. After the reset is complete, the hardware registers are restored to their default values. Therefore, some rebuild operations are required to rewrite the user configuration to the registers. Signed-off-by: Jijie Shao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241216040532.1566229-7-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/hisilicon/hibmcge/Makefile | 2 +- .../ethernet/hisilicon/hibmcge/hbg_common.h | 16 +++ .../ethernet/hisilicon/hibmcge/hbg_debugfs.c | 22 +++ .../net/ethernet/hisilicon/hibmcge/hbg_err.c | 134 ++++++++++++++++++ .../net/ethernet/hisilicon/hibmcge/hbg_err.h | 13 ++ .../ethernet/hisilicon/hibmcge/hbg_ethtool.c | 15 ++ .../net/ethernet/hisilicon/hibmcge/hbg_hw.c | 10 +- .../net/ethernet/hisilicon/hibmcge/hbg_main.c | 35 +++-- 8 files changed, 226 insertions(+), 21 deletions(-) create mode 100644 drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c create mode 100644 drivers/net/ethernet/hisilicon/hibmcge/hbg_err.h diff --git a/drivers/net/ethernet/hisilicon/hibmcge/Makefile b/drivers/net/ethernet/hisilicon/hibmcge/Makefile index 1a0ec2fb8c24..7ea15f9ef849 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/Makefile +++ b/drivers/net/ethernet/hisilicon/hibmcge/Makefile @@ -6,4 +6,4 @@ obj-$(CONFIG_HIBMCGE) += hibmcge.o hibmcge-objs = hbg_main.o hbg_hw.o hbg_mdio.o hbg_irq.o hbg_txrx.o hbg_ethtool.o \ - hbg_debugfs.o + hbg_debugfs.o hbg_err.o diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h index cc143a536713..b4300d8ea4ad 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_common.h @@ -4,6 +4,7 @@ #ifndef __HBG_COMMON_H #define __HBG_COMMON_H +#include #include #include #include "hbg_reg.h" @@ -33,6 +34,14 @@ enum hbg_tx_state { enum hbg_nic_state { HBG_NIC_STATE_EVENT_HANDLING = 0, + HBG_NIC_STATE_RESETTING, + HBG_NIC_STATE_RESET_FAIL, +}; + +enum hbg_reset_type { + HBG_RESET_TYPE_NONE = 0, + HBG_RESET_TYPE_FLR, + HBG_RESET_TYPE_FUNCTION, }; struct hbg_buffer { @@ -128,6 +137,11 @@ struct hbg_mac_filter { bool enabled; }; +/* saved for restore after rest */ +struct hbg_user_def { + struct ethtool_pauseparam pause_param; +}; + struct hbg_priv { struct net_device *netdev; struct pci_dev *pdev; @@ -139,6 +153,8 @@ struct hbg_priv { struct hbg_ring tx_ring; struct hbg_ring rx_ring; struct hbg_mac_filter filter; + enum hbg_reset_type reset_type; + struct hbg_user_def user_def; }; #endif diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c index 616b86333eec..8473c43d171a 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_debugfs.c @@ -19,6 +19,8 @@ struct hbg_dbg_info { int (*read)(struct seq_file *seq, void *data); }; +#define state_str_true_false(p, s) str_true_false(test_bit(s, &(p)->state)) + static void hbg_dbg_ring(struct hbg_priv *priv, struct hbg_ring *ring, struct seq_file *s) { @@ -97,11 +99,31 @@ static int hbg_dbg_mac_table(struct seq_file *s, void *unused) return 0; } +static const char * const reset_type_str[] = {"None", "FLR", "Function"}; + +static int hbg_dbg_nic_state(struct seq_file *s, void *unused) +{ + struct net_device *netdev = dev_get_drvdata(s->private); + struct hbg_priv *priv = netdev_priv(netdev); + + seq_printf(s, "event handling state: %s\n", + state_str_true_false(priv, HBG_NIC_STATE_EVENT_HANDLING)); + seq_printf(s, "resetting state: %s\n", + state_str_true_false(priv, HBG_NIC_STATE_RESETTING)); + seq_printf(s, "reset fail state: %s\n", + state_str_true_false(priv, HBG_NIC_STATE_RESET_FAIL)); + seq_printf(s, "last reset type: %s\n", + reset_type_str[priv->reset_type]); + + return 0; +} + static const struct hbg_dbg_info hbg_dbg_infos[] = { { "tx_ring", hbg_dbg_tx_ring }, { "rx_ring", hbg_dbg_rx_ring }, { "irq_info", hbg_dbg_irq_info }, { "mac_table", hbg_dbg_mac_table }, + { "nic_state", hbg_dbg_nic_state }, }; static void hbg_debugfs_uninit(void *data) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c new file mode 100644 index 000000000000..4d1f4a33391a --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0+ +// Copyright (c) 2024 Hisilicon Limited. + +#include +#include +#include +#include +#include "hbg_common.h" +#include "hbg_err.h" +#include "hbg_hw.h" + +static void hbg_restore_mac_table(struct hbg_priv *priv) +{ + struct hbg_mac_filter *filter = &priv->filter; + u64 addr; + u32 i; + + for (i = 0; i < filter->table_max_len; i++) + if (!is_zero_ether_addr(filter->mac_table[i].addr)) { + addr = ether_addr_to_u64(filter->mac_table[i].addr); + hbg_hw_set_uc_addr(priv, addr, i); + } + + hbg_hw_set_mac_filter_enable(priv, priv->filter.enabled); +} + +static void hbg_restore_user_def_settings(struct hbg_priv *priv) +{ + struct ethtool_pauseparam *pause_param = &priv->user_def.pause_param; + + hbg_restore_mac_table(priv); + hbg_hw_set_mtu(priv, priv->netdev->mtu); + hbg_hw_set_pause_enable(priv, pause_param->tx_pause, + pause_param->rx_pause); +} + +int hbg_rebuild(struct hbg_priv *priv) +{ + int ret; + + ret = hbg_hw_init(priv); + if (ret) + return ret; + + hbg_restore_user_def_settings(priv); + return 0; +} + +static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type) +{ + int ret; + + ASSERT_RTNL(); + + if (netif_running(priv->netdev)) { + dev_warn(&priv->pdev->dev, + "failed to reset because port is up\n"); + return -EBUSY; + } + + priv->reset_type = type; + set_bit(HBG_NIC_STATE_RESETTING, &priv->state); + clear_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state); + ret = hbg_hw_event_notify(priv, HBG_HW_EVENT_RESET); + if (ret) { + set_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state); + clear_bit(HBG_NIC_STATE_RESETTING, &priv->state); + } + + return ret; +} + +static int hbg_reset_done(struct hbg_priv *priv, enum hbg_reset_type type) +{ + int ret; + + if (!test_bit(HBG_NIC_STATE_RESETTING, &priv->state) || + type != priv->reset_type) + return 0; + + ASSERT_RTNL(); + + clear_bit(HBG_NIC_STATE_RESETTING, &priv->state); + ret = hbg_rebuild(priv); + if (ret) { + set_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state); + dev_err(&priv->pdev->dev, "failed to rebuild after reset\n"); + return ret; + } + + dev_info(&priv->pdev->dev, "reset done\n"); + return ret; +} + +/* must be protected by rtnl lock */ +int hbg_reset(struct hbg_priv *priv) +{ + int ret; + + ASSERT_RTNL(); + ret = hbg_reset_prepare(priv, HBG_RESET_TYPE_FUNCTION); + if (ret) + return ret; + + return hbg_reset_done(priv, HBG_RESET_TYPE_FUNCTION); +} + +static void hbg_pci_err_reset_prepare(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct hbg_priv *priv = netdev_priv(netdev); + + rtnl_lock(); + hbg_reset_prepare(priv, HBG_RESET_TYPE_FLR); +} + +static void hbg_pci_err_reset_done(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct hbg_priv *priv = netdev_priv(netdev); + + hbg_reset_done(priv, HBG_RESET_TYPE_FLR); + rtnl_unlock(); +} + +static const struct pci_error_handlers hbg_pci_err_handler = { + .reset_prepare = hbg_pci_err_reset_prepare, + .reset_done = hbg_pci_err_reset_done, +}; + +void hbg_set_pci_err_handler(struct pci_driver *pdrv) +{ + pdrv->err_handler = &hbg_pci_err_handler; +} diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.h b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.h new file mode 100644 index 000000000000..d7828e446308 --- /dev/null +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* Copyright (c) 2024 Hisilicon Limited. */ + +#ifndef __HBG_ERR_H +#define __HBG_ERR_H + +#include + +void hbg_set_pci_err_handler(struct pci_driver *pdrv); +int hbg_reset(struct hbg_priv *priv); +int hbg_rebuild(struct hbg_priv *priv); + +#endif diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c index a821a92db43d..326228b7b801 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c @@ -3,7 +3,9 @@ #include #include +#include #include "hbg_common.h" +#include "hbg_err.h" #include "hbg_ethtool.h" #include "hbg_hw.h" @@ -163,9 +165,21 @@ static int hbg_ethtool_set_pauseparam(struct net_device *net_dev, if (!param->autoneg) hbg_hw_set_pause_enable(priv, param->tx_pause, param->rx_pause); + priv->user_def.pause_param = *param; return 0; } +static int hbg_ethtool_reset(struct net_device *netdev, u32 *flags) +{ + struct hbg_priv *priv = netdev_priv(netdev); + + if (*flags != ETH_RESET_DEDICATED) + return -EOPNOTSUPP; + + *flags = 0; + return hbg_reset(priv); +} + static const struct ethtool_ops hbg_ethtool_ops = { .get_link = ethtool_op_get_link, .get_link_ksettings = phy_ethtool_get_link_ksettings, @@ -174,6 +188,7 @@ static const struct ethtool_ops hbg_ethtool_ops = { .get_regs = hbg_ethtool_get_regs, .get_pauseparam = hbg_ethtool_get_pauseparam, .set_pauseparam = hbg_ethtool_set_pauseparam, + .reset = hbg_ethtool_reset, }; void hbg_ethtool_set_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c index 0cbe9f7229b3..e7798f213645 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_hw.c @@ -3,6 +3,7 @@ #include #include +#include #include #include #include "hbg_common.h" @@ -167,8 +168,13 @@ static void hbg_hw_set_mac_max_frame_len(struct hbg_priv *priv, void hbg_hw_set_mtu(struct hbg_priv *priv, u16 mtu) { - hbg_hw_set_pcu_max_frame_len(priv, mtu); - hbg_hw_set_mac_max_frame_len(priv, mtu); + u32 frame_len; + + frame_len = mtu + VLAN_HLEN * priv->dev_specs.vlan_layers + + ETH_HLEN + ETH_FCS_LEN; + + hbg_hw_set_pcu_max_frame_len(priv, frame_len); + hbg_hw_set_mac_max_frame_len(priv, frame_len); } void hbg_hw_mac_enable(struct hbg_priv *priv, u32 enable) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c index 8a2d63c0c196..bb0f25ac9760 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c @@ -6,6 +6,7 @@ #include #include #include "hbg_common.h" +#include "hbg_err.h" #include "hbg_ethtool.h" #include "hbg_hw.h" #include "hbg_irq.h" @@ -13,8 +14,6 @@ #include "hbg_txrx.h" #include "hbg_debugfs.h" -static void hbg_change_mtu(struct hbg_priv *priv, int new_mtu); - static void hbg_all_irq_enable(struct hbg_priv *priv, bool enabled) { struct hbg_irq_info *info; @@ -56,11 +55,7 @@ static int hbg_hw_txrx_clear(struct hbg_priv *priv) return ret; /* After reset, regs need to be reconfigured */ - hbg_hw_init(priv); - hbg_hw_set_uc_addr(priv, ether_addr_to_u64(priv->netdev->dev_addr), 0); - hbg_change_mtu(priv, priv->netdev->mtu); - - return 0; + return hbg_rebuild(priv); } static int hbg_net_stop(struct net_device *netdev) @@ -196,15 +191,6 @@ static int hbg_net_set_mac_address(struct net_device *netdev, void *addr) return 0; } -static void hbg_change_mtu(struct hbg_priv *priv, int new_mtu) -{ - u32 frame_len; - - frame_len = new_mtu + VLAN_HLEN * priv->dev_specs.vlan_layers + - ETH_HLEN + ETH_FCS_LEN; - hbg_hw_set_mtu(priv, frame_len); -} - static int hbg_net_change_mtu(struct net_device *netdev, int new_mtu) { struct hbg_priv *priv = netdev_priv(netdev); @@ -212,7 +198,7 @@ static int hbg_net_change_mtu(struct net_device *netdev, int new_mtu) if (netif_running(netdev)) return -EBUSY; - hbg_change_mtu(priv, new_mtu); + hbg_hw_set_mtu(priv, new_mtu); WRITE_ONCE(netdev->mtu, new_mtu); dev_dbg(&priv->pdev->dev, @@ -270,6 +256,17 @@ static int hbg_mac_filter_init(struct hbg_priv *priv) return 0; } +static void hbg_init_user_def(struct hbg_priv *priv) +{ + struct ethtool_pauseparam *pause_param = &priv->user_def.pause_param; + + priv->mac.pause_autoneg = HBG_STATUS_ENABLE; + + pause_param->autoneg = priv->mac.pause_autoneg; + hbg_hw_get_pause_enable(priv, &pause_param->tx_pause, + &pause_param->rx_pause); +} + static int hbg_init(struct hbg_priv *priv) { int ret; @@ -295,6 +292,7 @@ static int hbg_init(struct hbg_priv *priv) return ret; hbg_debugfs_init(priv); + hbg_init_user_def(priv); return 0; } @@ -359,7 +357,7 @@ static int hbg_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->netdev_ops = &hbg_netdev_ops; netdev->watchdog_timeo = 5 * HZ; - hbg_change_mtu(priv, ETH_DATA_LEN); + hbg_hw_set_mtu(priv, ETH_DATA_LEN); hbg_net_set_mac_address(priv->netdev, &priv->dev_specs.mac_addr); hbg_ethtool_set_ops(netdev); @@ -388,6 +386,7 @@ static int __init hbg_module_init(void) int ret; hbg_debugfs_register(); + hbg_set_pci_err_handler(&hbg_driver); ret = pci_register_driver(&hbg_driver); if (ret) hbg_debugfs_unregister(); From adb42b1e0ef32f80f6f02374342aa5c223e9d17f Mon Sep 17 00:00:00 2001 From: Jijie Shao Date: Mon, 16 Dec 2024 12:05:32 +0800 Subject: [PATCH 0493/1386] net: hibmcge: Add nway_reset supported in this module Add nway_reset supported in this module Signed-off-by: Jijie Shao Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241216040532.1566229-8-shaojijie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c index 326228b7b801..00364a438ec2 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_ethtool.c @@ -189,6 +189,7 @@ static const struct ethtool_ops hbg_ethtool_ops = { .get_pauseparam = hbg_ethtool_get_pauseparam, .set_pauseparam = hbg_ethtool_set_pauseparam, .reset = hbg_ethtool_reset, + .nway_reset = phy_ethtool_nway_reset, }; void hbg_ethtool_set_ops(struct net_device *netdev) From 5a49edec44f638952da8dc8d754e76f462c19034 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sun, 15 Dec 2024 17:43:55 +0000 Subject: [PATCH 0494/1386] net: dsa: qca8k: Fix inconsistent use of jiffies vs milliseconds wait_for_complete_timeout() expects a timeout in jiffies. With the driver, some call sites converted QCA8K_ETHERNET_TIMEOUT to jiffies, others did not. Make the code consistent by changes the #define to include a call to msecs_to_jiffies, and remove all other calls to msecs_to_jiffies. Signed-off-by: Andrew Lunn Tested-by: from Christian would be very welcome. Signed-off-by: David S. Miller --- drivers/net/dsa/qca/qca8k-8xxx.c | 4 ++-- drivers/net/dsa/qca/qca8k.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c index ec74e3c2b0e9..90e24bc00b99 100644 --- a/drivers/net/dsa/qca/qca8k-8xxx.c +++ b/drivers/net/dsa/qca/qca8k-8xxx.c @@ -342,7 +342,7 @@ static int qca8k_read_eth(struct qca8k_priv *priv, u32 reg, u32 *val, int len) dev_queue_xmit(skb); ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done, - msecs_to_jiffies(QCA8K_ETHERNET_TIMEOUT)); + QCA8K_ETHERNET_TIMEOUT); *val = mgmt_eth_data->data[0]; if (len > QCA_HDR_MGMT_DATA1_LEN) @@ -394,7 +394,7 @@ static int qca8k_write_eth(struct qca8k_priv *priv, u32 reg, u32 *val, int len) dev_queue_xmit(skb); ret = wait_for_completion_timeout(&mgmt_eth_data->rw_done, - msecs_to_jiffies(QCA8K_ETHERNET_TIMEOUT)); + QCA8K_ETHERNET_TIMEOUT); ack = mgmt_eth_data->ack; diff --git a/drivers/net/dsa/qca/qca8k.h b/drivers/net/dsa/qca/qca8k.h index 3664a2e2f1f6..24962a395754 100644 --- a/drivers/net/dsa/qca/qca8k.h +++ b/drivers/net/dsa/qca/qca8k.h @@ -16,7 +16,7 @@ #define QCA8K_ETHERNET_MDIO_PRIORITY 7 #define QCA8K_ETHERNET_PHY_PRIORITY 6 -#define QCA8K_ETHERNET_TIMEOUT 5 +#define QCA8K_ETHERNET_TIMEOUT msecs_to_jiffies(5) #define QCA8K_NUM_PORTS 7 #define QCA8K_NUM_CPU_PORTS 2 From c1bad69f8baf562b1d522740dc76e48f2a2a1918 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 16 Dec 2024 16:56:05 +0000 Subject: [PATCH 0495/1386] net: Remove bouncing hippi list linux-hippi is bouncing with: : Sorry, no mailbox here by that name. (#5.1.1) Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 6cced90772fc..907b379af010 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10287,7 +10287,6 @@ F: drivers/input/touchscreen/himax_hx83112b.c HIPPI M: Jes Sorensen -L: linux-hippi@sunsite.dk S: Maintained F: drivers/net/hippi/ F: include/linux/hippidevice.h From aeda9245c7ce6afbf0bf1be164ecef7552384c29 Mon Sep 17 00:00:00 2001 From: Alex Shumsky Date: Fri, 13 Dec 2024 11:14:02 +0300 Subject: [PATCH 0496/1386] wifi: brcmfmac: clarify unmodifiable headroom log message Replace misleading log "insufficient headroom (0)" with more clear "unmodifiable headroom". Signed-off-by: Alex Shumsky Acked-by: Arend van Spriel Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241213081402.625003-1-alexthreed@gmail.com --- drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index da72fd2d541f..c3a57e30c855 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -327,8 +327,8 @@ static netdev_tx_t brcmf_netdev_start_xmit(struct sk_buff *skb, if (skb_headroom(skb) < drvr->hdrlen || skb_header_cloned(skb)) { head_delta = max_t(int, drvr->hdrlen - skb_headroom(skb), 0); - brcmf_dbg(INFO, "%s: insufficient headroom (%d)\n", - brcmf_ifname(ifp), head_delta); + brcmf_dbg(INFO, "%s: %s headroom\n", brcmf_ifname(ifp), + head_delta ? "insufficient" : "unmodifiable"); atomic_inc(&drvr->bus_if->stats.pktcowed); ret = pskb_expand_head(skb, ALIGN(head_delta, NET_SKB_PAD), 0, GFP_ATOMIC); From 8ab3bf4764136e8ad8d1064c304be50297bcf9ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 16 Dec 2024 12:30:10 +0100 Subject: [PATCH 0497/1386] wifi: wlcore: sysfs: constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241216-sysfs-const-bin_attr-net-v1-3-ec460b91f274@weissschuh.net --- drivers/net/wireless/ti/wlcore/sysfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/sysfs.c b/drivers/net/wireless/ti/wlcore/sysfs.c index c07acfcbbd9c..7c57d4c8744a 100644 --- a/drivers/net/wireless/ti/wlcore/sysfs.c +++ b/drivers/net/wireless/ti/wlcore/sysfs.c @@ -88,7 +88,7 @@ static ssize_t hw_pg_ver_show(struct device *dev, static DEVICE_ATTR_RO(hw_pg_ver); static ssize_t wl1271_sysfs_read_fwlog(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -121,7 +121,7 @@ static ssize_t wl1271_sysfs_read_fwlog(struct file *filp, struct kobject *kobj, static const struct bin_attribute fwlog_attr = { .attr = { .name = "fwlog", .mode = 0400 }, - .read = wl1271_sysfs_read_fwlog, + .read_new = wl1271_sysfs_read_fwlog, }; int wlcore_sysfs_init(struct wl1271 *wl) From 88395c071f08d9ea2314045230206cc5a3f82ef0 Mon Sep 17 00:00:00 2001 From: Soham Chakradeo Date: Tue, 17 Dec 2024 18:51:58 +0000 Subject: [PATCH 0498/1386] selftests/net: packetdrill: import tcp/ecn, tcp/close, tcp/sack, tcp/tcp_info Same as initial tests, import verbatim from github.com/google/packetdrill, aside from: - update `source ./defaults.sh` path to adjust for flat dir - add SPDX headers - remove author statements if any - drop blank lines at EOF Same test process as previous tests. Both with and without debug mode. Recording the steps once: make mrproper vng --build \ --config tools/testing/selftests/net/packetdrill/config \ --config kernel/configs/debug.config vng -v --run . --user root --cpus 4 -- \ make -C tools/testing/selftests TARGETS=net/packetdrill run_tests Signed-off-by: Willem de Bruijn Signed-off-by: Soham Chakradeo Link: https://patch.msgid.link/20241217185203.297935-2-sohamch.kernel@gmail.com Signed-off-by: Jakub Kicinski --- ...lose_close-local-close-then-remote-fin.pkt | 23 +++++++ .../tcp_close_close-on-syn-sent.pkt | 21 ++++++ .../tcp_close_close-remote-fin-then-close.pkt | 36 ++++++++++ .../net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt | 21 ++++++ .../tcp_sack_sack-route-refresh-ip-tos.pkt | 37 +++++++++++ ...ack_sack-shift-sacked-2-6-8-3-9-nofack.pkt | 64 ++++++++++++++++++ ..._sack_sack-shift-sacked-7-3-4-8-9-fack.pkt | 66 +++++++++++++++++++ ..._sack_sack-shift-sacked-7-5-6-8-9-fack.pkt | 62 +++++++++++++++++ .../tcp_tcp_info_tcp-info-last_data_recv.pkt | 20 ++++++ .../tcp_tcp_info_tcp-info-rwnd-limited.pkt | 54 +++++++++++++++ .../tcp_tcp_info_tcp-info-sndbuf-limited.pkt | 38 +++++++++++ 11 files changed, 442 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt new file mode 100644 index 000000000000..8514d6bdbb6d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test basic connection teardown where local process closes first: +// the local process calls close() first, so we send a FIN, and receive an ACK. +// Then we receive a FIN and ACK it. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +.01...0.011 connect(3, ..., ...) = 0 + +0 > S 0:0(0) <...> + +0 < S. 0:0(0) ack 1 win 32768 + +0 > . 1:1(0) ack 1 + + +0 write(3, ..., 1000) = 1000 + +0 > P. 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1001 win 257 + + +0 close(3) = 0 + +0 > F. 1001:1001(0) ack 1 + +0 < . 1:1(0) ack 1002 win 257 + + +0 < F. 1:1(0) ack 1002 win 257 + +0 > . 1002:1002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt new file mode 100644 index 000000000000..04103134bd99 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test to make sure no RST is being sent when close() +// is called on a socket with SYN_SENT state. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <...> + +// Application decideds to close the socket in SYN_SENT state +// Make sure no RST is sent after close(). + +0 close(3) = 0 + +// Receive syn-ack to trigger the send side packet examination: +// If a RESET were sent right after close(), it would have failed with +// a mismatched timestamp. + +.1 < S. 0:0(0) ack 1 win 32000 + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt new file mode 100644 index 000000000000..5f3a2914213a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify behavior for the sequence: remote side sends FIN, then we close(). +// Since the remote side (client) closes first, we test our LAST_ACK code path. + +`./defaults.sh` + +// Initialize a server socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +// Client closes first. + +.01 < F. 1:1(0) ack 1 win 257 + +0 > . 1:1(0) ack 2 + +// App notices that client closed. + +0 read(4, ..., 1000) = 0 + +// Then we close. + +.01 close(4) = 0 + +0 > F. 1:1(0) ack 2 + +// Client ACKs our FIN. + +.01 < . 2:2(0) ack 2 win 257 + +// Verify that we send RST in response to any incoming segments +// (because the kernel no longer has any record of this socket). + +.01 < . 2:2(0) ack 2 win 257 + +0 > R 2:2(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt new file mode 100644 index 000000000000..643baf3267cf --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test ECN: verify that Linux TCP ECN sending code uses ECT0 (not ECT1). +// +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 # fully enabled +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +// ECN handshake: send EW flags in SYN packet, E flag in SYN-ACK response ++.002 ... 0.004 connect(4, ..., ...) = 0 + + +0 > SEW 0:0(0) ++.002 < SE. 0:0(0) ack 1 win 32767 + +0 > . 1:1(0) ack 1 + +// Write 1 MSS. ++.002 write(4, ..., 1000) = 1000 +// Send 1 MSS with ect0. + +0 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt new file mode 100644 index 000000000000..310ef31518da --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify that setsockopt calls that force a route refresh do not +// cause problems matching SACKs with packets in the write queue. +// This variant tests IP_TOS. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_IP, IP_MTU_DISCOVER, [IP_PMTUDISC_DONT], 1) = 0 + +0...0.010 connect(3, ..., ...) = 0 + + +0 > S 0:0(0) + +.01 < S. 0:0(0) ack 1 win 65535 + +0 > . 1:1(0) ack 1 + + +.01 write(3, ..., 5840) = 5840 + +0 > P. 1:5841(5840) ack 1 + +.01 < . 1:1(0) ack 5841 win 65535 + + +.01 write(3, ..., 5840) = 5840 + +0 > P. 5841:11681(5840) ack 1 + +.01 < . 1:1(0) ack 11681 win 65535 + + +.01 write(3, ..., 14600) = 14600 + +0 > P. 11681:26281(14600) ack 1 + +// Try the socket option that we know can force a route refresh. + +0 setsockopt(3, SOL_IP, IP_TOS, [4], 1) = 0 +// Then revert to avoid routing/mangling/etc implications of that setting. + +0 setsockopt(3, SOL_IP, IP_TOS, [0], 1) = 0 + +// Verify that we do not retransmit the SACKed segments. + +.01 < . 1:1(0) ack 13141 win 65535 + +0 > . 13141:16061(2920) ack 1 + +0 > P. 17521:20441(2920) ack 1 + +.01 < . 1:1(0) ack 26281 win 65535 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt new file mode 100644 index 000000000000..f185e1ac57ea --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests non-FACK SACK with SACKs coming in the order +// 2 6 8 3 9, to test what happens when we get a new SACKed range +// (for packet 3) that is on the right of an existing SACKed range +// (for packet 2). + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + + +.1 < . 1:1(0) ack 1 win 257 ++.001 < . 1:1(0) ack 1 win 257 ++.001 < . 1:1(0) ack 1 win 257 + +// 3 SACKed packets, so we enter Fast Recovery. + +0 > . 1:1001(1000) ack 1 + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_lost == 6, tcpi_lost }% + +// SACK for 3001:4001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. ++.007 < . 1:1(0) ack 1 win 257 + +0 > . 1001:2001(1000) ack 1 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +0 %{ assert tcpi_reordering == 6, tcpi_reordering }% // 8001:9001 -> 3001:4001 is 6 + +// SACK for 9001:10001. + +.01 < . 1:1(0) ack 1 win 257 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +// ACK for 1:1001 as packets from t=0.303 arrive. ++.083 < . 1:1(0) ack 1001 win 257 + +0 %{ assert tcpi_lost == 4,tcpi_lost }% + +// ACK for 1:4001 as packets from t=0.310 arrive. ++.017 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_lost == 3,tcpi_lost }% + +// ACK for 1:7001 as packets from t=0.320 arrive. + +.01 < . 1:1(0) ack 7001 win 257 + +// ACK for all data as packets from t=0.403 arrive. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt new file mode 100644 index 000000000000..0093b4973934 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests the case where we mark packets 0-4 lost, then +// get a SACK for 3, and then a SACK for 4. + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// SACK for 7001:8001. Using RACK we delay the fast retransmit. + +.1 < . 1:1(0) ack 1 win 257 +// RACK reordering timer ++.027 > . 1:1001(1000) ack 1 + +0 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_lost == 7, tcpi_lost # RACK thinks 1:7001 are lost +assert tcpi_reordering == 3, tcpi_reordering +}% + +// SACK for 3001:4001. ++.002 < . 1:1(0) ack 1 win 257 + +0 > . 1001:2001(1000) ack 1 + +0 %{ +assert tcpi_lost == 6, tcpi_lost # since 3001:4001 is no longer lost +assert tcpi_reordering == 5, tcpi_reordering # 7001:8001 -> 3001:4001 +}% + +// SACK for 4001:5001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. +// It uses the RFC3517 algorithm to mark 1:3001 lost +// because >=3 higher-sequence packets are SACKed. ++.002 < . 1:1(0) ack 1 win 257 + +0 > . 2001:3001(1000) ack 1 + +0 %{ +assert tcpi_lost == 5,tcpi_lost # SACK/RFC3517 thinks 1:3001 are lost +}% + +// SACK for 8001:9001. ++.002 < . 1:1(0) ack 1 win 257 + +// SACK for 9001:10001. ++.002 < . 1:1(0) ack 1 win 257 + +0 > . 5001:6001(1000) ack 1 + +// To simplify clean-up, say we get an ACK for all data. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt new file mode 100644 index 000000000000..980a832dc81c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests the case where we mark packets 0-4 lost, then +// get a SACK for 5, and then a SACK for 6. + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// SACK for 7001:8001. Using RACK we delay a fast retransmit. + +.1 < . 1:1(0) ack 1 win 257 ++.027 > . 1:1001(1000) ack 1 + +0 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_lost == 7,tcpi_lost # RACK thinks 1:7001 are lost +assert tcpi_reordering == 3, tcpi_reordering +}% + +// SACK for 5001:6001. + +0 < . 1:1(0) ack 1 win 257 + +0 > . 1001:2001(1000) ack 1 + +0 %{ +assert tcpi_lost == 6, tcpi_lost +assert tcpi_reordering == 3, tcpi_reordering # 7001:8001 -> 5001:6001 is 3 +}% + +// SACK for 6001:7001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. + +0 < . 1:1(0) ack 1 win 257 + +0 > . 2001:3001(1000) ack 1 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +// SACK for 8001:9001. + +0 < . 1:1(0) ack 1 win 257 + +0 > . 3001:4001(1000) ack 1 + +// SACK for 9001:10001. + +0 < . 1:1(0) ack 1 win 257 + +0 > . 4001:5001(1000) ack 1 + +// To simplify clean-up, say we get an ACK for all data. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt new file mode 100644 index 000000000000..d7fdb43a8e89 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tcpi_last_data_recv for active session +`./defaults.sh` + +// Create a socket and set it to non-blocking. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + ++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) ++0 > S 0:0(0) ++.030 < S. 0:0(0) ack 1 win 10000 ++0 > . 1:1(0) ack 1 + ++1 %{ assert 990 <= tcpi_last_data_recv <= 1010, tcpi_last_data_recv }% + ++0 < . 1:1001(1000) ack 1 win 300 ++0 > . 1:1(0) ack 1001 + ++0 %{ assert tcpi_last_data_recv <= 10, tcpi_last_data_recv }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt new file mode 100644 index 000000000000..a9bcd46f6cb6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test rwnd limited time in tcp_info for client side. + +`./defaults.sh` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) + +// Server advertises 0 receive window. + +.01 < S. 0:0(0) ack 1 win 0 + + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + +// Make sure that initial rwnd limited time is 0. + +0 %{ assert tcpi_rwnd_limited == 0, tcpi_rwnd_limited }% + +// Receive window limited time starts here. + +0 write(3, ..., 1000) = 1000 + +// Check that rwnd limited time in tcp_info is around 0.1s. + +.1 %{ assert 98000 <= tcpi_rwnd_limited <= 110000, tcpi_rwnd_limited }% + +// Server opens the receive window. + +.1 < . 1:1(0) ack 1 win 2000 + +// Check that rwnd limited time in tcp_info is around 0.2s. + +0 %{ assert 198000 <= tcpi_rwnd_limited <= 210000, tcpi_rwnd_limited }% + + +0 > P. 1:1001(1000) ack 1 + +// Server advertises a very small receive window. + +.03 < . 1:1(0) ack 1001 win 10 + +// Receive window limited time starts again. + +0 write(3, ..., 1000) = 1000 + +// Server opens the receive window again. + +.1 < . 1:1(0) ack 1001 win 2000 +// Check that rwnd limited time in tcp_info is around 0.3s +// and busy time is 0.3 + 0.03 (server opened small window temporarily). + +0 %{ assert 298000 <= tcpi_rwnd_limited <= 310000, tcpi_rwnd_limited;\ + assert 328000 <= tcpi_busy_time <= 340000, tcpi_busy_time;\ +}% + + +0 > P. 1001:2001(1000) ack 1 + +.02 < . 1:1(0) ack 2001 win 2000 + +0 %{ assert 348000 <= tcpi_busy_time <= 360000, tcpi_busy_time }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt new file mode 100644 index 000000000000..f0de2acd0f8e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test send-buffer-limited time in tcp_info for client side. +`./defaults.sh` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) + +.01 < S. 0:0(0) ack 1 win 10000 + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [10000], 4) = 0 + +0 getsockopt(3, SOL_SOCKET, SO_SNDBUF, [20000], [4]) = 0 + + +.09...0.14 write(3, ..., 150000) = 150000 + + +.01 < . 1:1(0) ack 10001 win 10000 + + +.01 < . 1:1(0) ack 30001 win 10000 + +// cwnd goes from 40(60KB) to 80(120KB), and that we hit the tiny sndbuf limit 10KB + +.01 < . 1:1(0) ack 70001 win 10000 + + +.02 < . 1:1(0) ack 95001 win 10000 + +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited; \ + assert 49000 <= tcpi_busy_time <= 52000, tcpi_busy_time; \ + assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }% + +// This ack frees up enough buffer so we are no longer +// buffer limited (socket flag SOCK_NOSPACE is cleared) + +.02 < . 1:1(0) ack 150001 win 10000 + +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited;\ + assert 69000 <= tcpi_busy_time <= 73000, tcpi_busy_time;\ + assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }% From eab35989cc37e168550b7bfa690905ea2d1ae603 Mon Sep 17 00:00:00 2001 From: Soham Chakradeo Date: Tue, 17 Dec 2024 18:51:59 +0000 Subject: [PATCH 0499/1386] selftests/net: packetdrill: import tcp/fast_recovery, tcp/nagle, tcp/timestamping Use the standard import and testing method, as described in the import of tcp/ecn , tcp/close , tcp/sack , tcp/tcp_info. Signed-off-by: Willem de Bruijn Signed-off-by: Soham Chakradeo Link: https://patch.msgid.link/20241217185203.297935-3-sohamch.kernel@gmail.com Signed-off-by: Jakub Kicinski --- .../tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt | 72 +++++++++ ...t_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt | 50 ++++++ ...tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt | 43 ++++++ ...ecovery_prr-ss-ack-below-snd_una-cubic.pkt | 41 +++++ .../packetdrill/tcp_nagle_https_client.pkt | 40 +++++ .../tcp_nagle_sendmsg_msg_more.pkt | 66 ++++++++ .../tcp_nagle_sockopt_cork_nodelay.pkt | 43 ++++++ ...tcp_timestamping_client-only-last-byte.pkt | 92 +++++++++++ .../packetdrill/tcp_timestamping_partial.pkt | 91 +++++++++++ .../packetdrill/tcp_timestamping_server.pkt | 145 ++++++++++++++++++ 10 files changed, 683 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt new file mode 100644 index 000000000000..0d3c8077e830 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. +// In this variant we test a simple case where in-flight == ssthresh +// all the way through recovery, so during fast recovery we send one segment +// for each segment SACKed/ACKed. + +// Set up config. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 +// RTT 100ms + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 10 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1:1001. + +.11 < . 1:1(0) ack 1 win 320 + +.01 < . 1:1(0) ack 1 win 320 + +.01 < . 1:1(0) ack 1 win 320 +// Enter fast recovery. + +0 > . 1:1001(1000) ack 1 + +.01 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd +assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh +}% + +// Write some more, which we will send 1 MSS at a time, +// as in-flight segments are SACKed or ACKed. + +.01 write(4, ..., 7000) = 7000 + + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 10001:11001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 11001:12001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 12001:13001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 13001:14001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 14001:15001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 15001:16001(1000) ack 1 + + +.02 < . 1:1(0) ack 10001 win 320 + +0 > P. 16001:17001(1000) ack 1 +// Leave fast recovery. + +.01 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd +assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh +}% + + +.03 < . 1:1(0) ack 12001 win 320 + +.02 < . 1:1(0) ack 14001 win 320 + +.02 < . 1:1(0) ack 16001 win 320 + +.02 < . 1:1(0) ack 17001 win 320 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt new file mode 100644 index 000000000000..7842a10b6967 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. The sender sends 20 packets. Packet +// 1 to 4, and 11 to 16 are dropped. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write 20 data segments. + +0 write(4, ..., 20000) = 20000 + +0 > P. 1:10001(10000) ack 1 + +// Receive first DUPACK, entering PRR part + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 10001:11001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 + +0 > . 11001:12001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 + +0 > . 1:1001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 + +0 > . 1001:2001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 + +0 > . 2001:3001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 + +0 > . 3001:4001(1000) ack 1 +// Enter PRR CRB ++.002 < . 1:1(0) ack 1 win 320 + +0 > . 12001:13001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 + +0 > . 13001:14001(1000) ack 1 +// Enter PRR slow start + +.01 < . 1:1(0) ack 1001 win 320 + +0 > P. 14001:16001(2000) ack 1 ++.002 < . 1:1(0) ack 1001 win 320 + +0 > . 1001:2001(1000) ack 1 + +0 > . 16001:17001(1000) ack 1 +// inflight reaches ssthresh, goes into packet conservation mode ++.002 < . 1:1(0) ack 1001 win 320 + +0 > . 17001:18001(1000) ack 1 ++.002 < . 1:1(0) ack 1001 win 320 + +0 > . 18001:19001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt new file mode 100644 index 000000000000..b66d7644c3b6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. The sender sends 20 packets. Packet +// 1 to 4 are lost. The sender writes another 10 packets. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 20 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1,2,3,4 + +.01 < . 1:1(0) ack 1 win 320 ++.002 < . 1:1(0) ack 1 win 320 + +0 < . 1:1(0) ack 1 win 320 + +0 > . 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 + +0 > . 1001:2001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 + +0 > . 2001:3001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 + +0 > . 3001:4001(1000) ack 1 + +// Receiver ACKs all data. + +.01 < . 1:1(0) ack 1001 win 320 + +0 < . 1:1(0) ack 2001 win 320 + +0 < . 1:1(0) ack 3001 win 320 + +0 < . 1:1(0) ack 10001 win 320 + +// Writes another 10 packets, which the ssthresh*mss amount +// should be sent right away + +.01 write(4, ..., 10000) = 10000 + +0 > . 10001:17001(7000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt new file mode 100644 index 000000000000..8e87bfecabb5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. +// In this variant we verify that the sender uses SACK info on an ACK +// below snd_una. + +// Set up config. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 +// RTT 10ms + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 10 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1:1001,4001:5001,7001:8001. + +.01 < . 1:1(0) ack 1 win 320 + +0 < . 1:1(0) ack 1 win 320 + +0 < . 1:1(0) ack 1 win 320 + +0 > . 1:1001(1000) ack 1 + ++.012 < . 1:1(0) ack 4001 win 320 + +0 > . 4001:7001(3000) ack 1 + + +0 write(4, ..., 10000) = 10000 + +// The following ACK was reordered - delayed so that it arrives with +// an ACK field below snd_una. Here we check that the newly-SACKed +// 2MSS at 5001:7001 cause us to send out 2 more MSS. ++.002 < . 1:1(0) ack 3001 win 320 + +0 > . 7001:8001(1000) ack 1 + +0 > . 10001:11001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt new file mode 100644 index 000000000000..7adae7a9ef4a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +// This is a test inspired by an Android client app using SSL. This +// test verifies using TCP_NODELAY would save application latency +// (Perhaps even better with TCP_NAGLE). +// +`./defaults.sh +ethtool -K tun0 tso off gso off +./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + + +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) + +.1 < S. 0:0(0) ack 1 win 5792 + +0 > . 1:1(0) ack 1 + +// SSL handshake (resumed session) + +0 write(4, ..., 517) = 517 + +0 > P. 1:518(517) ack 1 + +.1 < . 1:1(0) ack 518 win 229 + + +0 < P. 1:144(143) ack 1 win 229 + +0 > . 518:518(0) ack 144 + +0 read(4, ..., 1000) = 143 + +// Application POST header (51B) and body (2002B) + +0 write(4, ..., 51) = 51 + +0 > P. 518:569(51) ack 144 + +.03 write(4, ..., 2002) = 2002 + +0 > . 569:1543(974) ack 144 + +0 > P. 1543:2517(974) ack 144 +// Without disabling Nagle, this packet will not happen until the remote ACK. + +0 > P. 2517:2571(54) ack 144 + + +.1 < . 1:1(0) ack 2571 win 229 + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt new file mode 100644 index 000000000000..fa9c01813996 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test the MSG_MORE flag will correctly corks the tiny writes +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 +// Disable Nagle by default on this socket. + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +// Test the basic case: MSG_MORE overwrites TCP_NODELAY and enables Nagle. + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 40}], msg_flags=0}, MSG_MORE) = 40 + +.21~+.215 > P. 1:41(40) ack 1 + +.01 < . 1:1(0) ack 41 win 257 + +// Test unsetting MSG_MORE releases the packet + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 100}], msg_flags=0}, MSG_MORE) = 100 ++.005 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 160}], msg_flags=0}, MSG_MORE) = 160 + +.01 sendmsg(4, {msg_name(...)=..., + msg_iov(3)=[{..., 100}, {..., 200}, {..., 195}], + msg_flags=0}, MSG_MORE) = 495 ++.008 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 5}], msg_flags=0}, 0) = 5 + +0 > P. 41:801(760) ack 1 + +.02 < . 1:1(0) ack 801 win 257 + + +// Test >MSS write will unleash MSS packets but hold on the remaining data. + +.1 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 3100}], msg_flags=0}, MSG_MORE) = 3100 + +0 > . 801:3801(3000) ack 1 ++.003 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 50}], msg_flags=0}, MSG_MORE) = 50 + + +.01 < . 1:1(0) ack 2801 win 257 +// Err... we relase the remaining right after the ACK? note that PUSH is reset + +0 > . 3801:3951(150) ack 1 + +// Test we'll hold on the subsequent writes when inflight (3801:3951) > 0 ++.001 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 1}], msg_flags=0}, MSG_MORE) = 1 ++.002 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 2}], msg_flags=0}, MSG_MORE) = 2 ++.003 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 3}], msg_flags=0}, MSG_MORE) = 3 ++.004 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 4}], msg_flags=0}, MSG_MORE) = 4 + +.02 < . 1:1(0) ack 3951 win 257 + +0 > . 3951:3961(10) ack 1 + +.02 < . 1:1(0) ack 3961 win 257 + + +// Test the case a MSG_MORE send followed by a write flushes the data + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 20}], msg_flags=0}, MSG_MORE) = 20 + +.05 write(4, ..., 20) = 20 + +0 > P. 3961:4001(40) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt new file mode 100644 index 000000000000..0ddec5f7dc1a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_CORK and TCP_NODELAY sockopt behavior +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 +// Set TCP_CORK sockopt to hold small packets + +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0 + + +0 write(4, ..., 40) = 40 + +.05 write(4, ..., 40) = 40 + +// Unset TCP_CORK should push pending bytes out + +.01 setsockopt(4, SOL_TCP, TCP_CORK, [0], 4) = 0 + +0 > P. 1:81(80) ack 1 + +.01 < . 1:1(0) ack 81 win 257 + +// Set TCP_CORK sockopt to hold small packets + +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0 + + +0 write(4, ..., 40) = 40 + +.05 write(4, ..., 40) = 40 + +// Set TCP_NODELAY sockopt should push pending bytes out + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +0 > P. 81:161(80) ack 1 + +.01 < . 1:1(0) ack 161 win 257 + +// Set MSG_MORE to hold small packets + +0 send(4, ..., 40, MSG_MORE) = 40 + +.05 send(4, ..., 40, MSG_MORE) = 40 + +// Set TCP_NODELAY sockopt should push pending bytes out + +.01 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +0 > . 161:241(80) ack 1 + +.01 < . 1:1(0) ack 241 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt new file mode 100644 index 000000000000..2087ec0c746a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that tx timestamping sends timestamps only for +// the last byte of each sendmsg. +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) + +.01 < S. 0:0(0) ack 1 win 20000 + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + + +0 write(3, ..., 11000) = 11000 + +0 > P. 1:10001(10000) ack 1 + +.01 < . 1:1(0) ack 10001 win 4000 + +0 > P. 10001:11001(1000) ack 1 + +.01 < . 1:1(0) ack 11001 win 4000 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the last byte should be received almost immediately +// once 10001 is acked at t=20ms. +// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...) +// is called after when SYN is acked. So, we expect the last byte of the first +// chunk to have a timestamp key of 10999 (i.e., 11000 - 1). + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the last byte should be received almost immediately +// once 10001 is acked at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the last byte should be received at t=30ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt new file mode 100644 index 000000000000..876024a31110 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tx timestamping for partial writes (IPv4). +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) + +.01 < S. 0:0(0) ack 1 win 2000 + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [1000], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// We have a partial write. + +0 write(3, ..., 10000) = 2964 + +0 > . 1:989(988) ack 1 + +0 > P. 989:1977(988) ack 1 + +.01 < . 1:1(0) ack 1977 win 92 + +0 > P. 1977:2965(988) ack 1 + +.01 < . 1:1(0) ack 2965 win 92 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately +// after the first ack at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the first chunk should be received almost immediately +// after the first ack at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the first chunk should be received after the last ack at +// t=30ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt new file mode 100644 index 000000000000..84d94780e6be --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tx timestamping for server-side (IPv4). +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// Write two 2KB chunks. +// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...) +// is called after when SYN is acked. So, we expect the last byte of the first +// and the second chunks to have timestamp keys of 1999 (i.e., 2000 - 1) and +// 3999 (i.e., 4000 - 1) respectively. + +0 write(4, ..., 2000) = 2000 + +0 write(4, ..., 2000) = 2000 + +0 > P. 1:2001(2000) ack 1 + +0 > P. 2001:4001(2000) ack 1 + +.01 < . 1:1(0) ack 2001 win 514 + +.01 < . 1:1(0) ack 4001 win 514 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately +// after write at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the first chunk should be received almost immediately +// after write at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SCHED for the second chunk should be received almost immediately +// after that at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the second chunk should be received almost immediately +// after that at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the first chunk should be received at t=20ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the second chunk should be received at t=30ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 From 6f6692053939038f48c2f9f404fe414038a44431 Mon Sep 17 00:00:00 2001 From: Soham Chakradeo Date: Tue, 17 Dec 2024 18:52:00 +0000 Subject: [PATCH 0500/1386] selftests/net: packetdrill: import tcp/eor, tcp/splice, tcp/ts_recent, tcp/blocking Use the standard import and testing method, as described in the import of tcp/ecn and tcp/close , tcp/sack , tcp/tcp_info. Signed-off-by: Willem de Bruijn Signed-off-by: Soham Chakradeo Link: https://patch.msgid.link/20241217185203.297935-4-sohamch.kernel@gmail.com Signed-off-by: Jakub Kicinski --- .../tcp_blocking_blocking-accept.pkt | 18 +++++ .../tcp_blocking_blocking-connect.pkt | 13 ++++ .../tcp_blocking_blocking-read.pkt | 29 ++++++++ .../tcp_blocking_blocking-write.pkt | 35 +++++++++ .../packetdrill/tcp_eor_no-coalesce-large.pkt | 38 ++++++++++ .../tcp_eor_no-coalesce-retrans.pkt | 72 +++++++++++++++++++ .../packetdrill/tcp_eor_no-coalesce-small.pkt | 36 ++++++++++ .../tcp_eor_no-coalesce-subsequent.pkt | 66 +++++++++++++++++ .../tcp_splice_tcp_splice_loop_test.pkt | 20 ++++++ .../packetdrill/tcp_ts_recent_fin_tsval.pkt | 23 ++++++ .../packetdrill/tcp_ts_recent_invalid_ack.pkt | 25 +++++++ .../packetdrill/tcp_ts_recent_reset_tsval.pkt | 25 +++++++ 12 files changed, 400 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt new file mode 100644 index 000000000000..38535701656e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking accept. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0...0.200 accept(3, ..., ...) = 4 + + +.1 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 257 + + +.1 write(4, ..., 2000) = 2000 + +0 > P. 1:2001(2000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt new file mode 100644 index 000000000000..3692ef102381 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking connect. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + + +.1...0.200 connect(3, ..., ...) = 0 + + +0 > S 0:0(0) + +.1 < S. 0:0(0) ack 1 win 5792 + +0 > . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt new file mode 100644 index 000000000000..914eabab367a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking read. +--tolerance_usecs=10000 + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + + +0...0.100 read(4, ..., 2000) = 2000 + +.1 < P. 1:2001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 2001 + + +.1...0.200 read(4, ..., 2000) = 2000 + +.1 < P. 2001:4001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 + + +.1 < P. 4001:6001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 6001 + +0...0.000 read(4, ..., 1000) = 1000 + +0...0.000 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt new file mode 100644 index 000000000000..cec5a0725d95 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking write. +--tolerance_usecs=10000 + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_min_tso_segs=10 +` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 50000 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 50000 + +0 accept(3, ..., ...) = 4 + +// Kernel doubles our value -> sk->sk_sndbuf is set to 42000 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [21000], 4) = 0 + +0 getsockopt(4, SOL_SOCKET, SO_SNDBUF, [42000], [4]) = 0 + +// A write of 60000 does not block. + +0...0.300 write(4, ..., 61000) = 61000 // this write() blocks + + +.1 < . 1:1(0) ack 10001 win 50000 + + +.1 < . 1:1(0) ack 30001 win 50000 + +// This ACK should wakeup the write(). An ACK of 35001 does not. + +.1 < . 1:1(0) ack 36001 win 50000 + +// Reset to sysctls defaults. +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt new file mode 100644 index 000000000000..f95b9b3c9fa1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. The large chunk itself should be packetized as +// usual. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write another 10040B chunk with no coalescing options. + +0 send(4, ..., 10400, MSG_EOR) = 10400 + +// Write a 2KB chunk. This chunk should not be appended to the packets created +// the previous chunk. + +0 write(4, ..., 2000) = 2000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:20801(10800) ack 1 ++.001 < . 1:1(0) ack 20801 win 514 +// This 2KB packet should be sent alone. + +0 > P. 20801:22801(2000) ack 1 ++.001 < . 1:1(0) ack 22801 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt new file mode 100644 index 000000000000..2ff66075288e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. Also, when packets are retransmitted, they +// will not be coalesce into the same skb. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write 10 400B chunks with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 +// The 9 remaining 400B chunks should be sent as individual packets. + +0 > P. 10801:11201(400) ack 1 + +0 > P. 11201:11601(400) ack 1 + +0 > P. 11601:12001(400) ack 1 + +0 > P. 12001:12401(400) ack 1 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 + +0 > P. 13201:13601(400) ack 1 + +0 > P. 13601:14001(400) ack 1 + +0 > P. 14001:14401(400) ack 1 +// The last 10KB chunk should be sent separately. + +0 > P. 14401:24401(10000) ack 1 + ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 11201 win 514 ++.001 < . 1:1(0) ack 11601 win 514 ++.001 < . 1:1(0) ack 12001 win 514 +// TCP should fill the hole but no coalescing should happen, and all +// retransmissions should be sent out as individual packets. + +// Note : This is timeout based retransmit. +// Do not put +0 here or flakes will come back. ++.004~+.008 > P. 12001:12401(400) ack 1 + ++.001 < . 1:1(0) ack 12401 win 514 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 ++.001 < . 1:1(0) ack 12801 win 514 ++.001 < . 1:1(0) ack 14401 win 514 ++.001 < . 1:1(0) ack 24401 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt new file mode 100644 index 000000000000..77039c5aac39 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write a 400B chunk with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 + +0 > P. 10801:20801(10000) ack 1 ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 20801 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt new file mode 100644 index 000000000000..dd5a06250595 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk even though we have 10 back-to-back small +// writes. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write 10 400B chunks with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 +// The 9 remaining 400B chunks should be sent as individual packets. + +0 > P. 10801:11201(400) ack 1 + +0 > P. 11201:11601(400) ack 1 + +0 > P. 11601:12001(400) ack 1 + +0 > P. 12001:12401(400) ack 1 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 + +0 > P. 13201:13601(400) ack 1 + +0 > P. 13601:14001(400) ack 1 + +0 > P. 14001:14401(400) ack 1 +// The last 10KB chunk should be sent separately. + +0 > P. 14401:24401(10000) ack 1 + ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 11201 win 514 ++.001 < . 1:1(0) ack 11601 win 514 ++.001 < . 1:1(0) ack 12001 win 514 ++.001 < . 1:1(0) ack 12401 win 514 ++.001 < . 1:1(0) ack 12801 win 514 ++.001 < . 1:1(0) ack 13201 win 514 ++.001 < . 1:1(0) ack 13601 win 514 ++.001 < . 1:1(0) ack 14001 win 514 ++.001 < . 1:1(0) ack 14401 win 514 ++.001 < . 1:1(0) ack 24401 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt new file mode 100644 index 000000000000..0cbd43253236 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh` + +// Initialize a server socket + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_IP, IP_FREEBIND, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Connection should get accepted + +0 < S 0:0(0) win 32972 + +0 > S. 0:0(0) ack 1 <...> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + + +0 pipe([5, 6]) = 0 + +0 < U. 1:101(100) ack 1 win 257 urg 100 + +0 splice(4, NULL, 6, NULL, 99, 0) = 99 + +0 splice(4, NULL, 6, NULL, 1, 0) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt new file mode 100644 index 000000000000..e61424a7bd0a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we send FIN packet with correct TSval +--tcp_ts_tick_usecs=1000 +--tolerance_usecs=7000 + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 20000 + +0 accept(3, ..., ...) = 4 + + +1 close(4) = 0 +// Check that FIN TSval is updated properly, one second has passed since last sent packet. + +0 > F. 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt new file mode 100644 index 000000000000..174ce9a1bfc0 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we reject TS val updates on a packet with invalid ACK sequence + +`./defaults.sh +` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +.1 < S 0:0(0) win 20000 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 20000 + +0 accept(3, ..., ...) = 4 + +// bad packet with high tsval (its ACK sequence is above our sndnxt) + +0 < F. 1:1(0) ack 9999 win 20000 + + + +0 < . 1:1001(1000) ack 1 win 20000 + +0 > . 1:1(0) ack 1001 diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt new file mode 100644 index 000000000000..2e3b3bb7493a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we send RST packet with correct TSval +--tcp_ts_tick_usecs=1000 + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 20000 + +0 accept(3, ..., ...) = 4 + + +0 < . 1:1001(1000) ack 1 win 20000 + +0 > . 1:1(0) ack 1001 + + +1 close(4) = 0 +// Check that RST TSval is updated properly, one second has passed since last sent packet. + +0 > R. 1:1(0) ack 1001 From 5d4cadef52f29eea779a0b44e09f59657c1b46d8 Mon Sep 17 00:00:00 2001 From: Soham Chakradeo Date: Tue, 17 Dec 2024 18:52:01 +0000 Subject: [PATCH 0501/1386] selftests/net: packetdrill: import tcp/user_timeout, tcp/validate, tcp/sendfile, tcp/limited-transmit, tcp/syscall_bad_arg Use the standard import and testing method, as described in the import of tcp/ecn and tcp/close , tcp/sack , tcp/tcp_info. Signed-off-by: Willem de Bruijn Signed-off-by: Soham Chakradeo Link: https://patch.msgid.link/20241217185203.297935-5-sohamch.kernel@gmail.com Signed-off-by: Jakub Kicinski --- ...ited_transmit_limited-transmit-no-sack.pkt | 53 +++++++++++++++++++ ...limited_transmit_limited-transmit-sack.pkt | 50 +++++++++++++++++ .../tcp_sendfile_sendfile-simple.pkt | 26 +++++++++ ...scall_bad_arg_fastopen-invalid-buf-ptr.pkt | 42 +++++++++++++++ .../tcp_syscall_bad_arg_sendmsg-empty-iov.pkt | 30 +++++++++++ ...yscall_bad_arg_syscall-invalid-buf-ptr.pkt | 25 +++++++++ .../tcp_user_timeout_user-timeout-probe.pkt | 37 +++++++++++++ .../tcp_user_timeout_user_timeout.pkt | 32 +++++++++++ ...validate_validate-established-no-flags.pkt | 24 +++++++++ 9 files changed, 319 insertions(+) create mode 100644 tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt create mode 100644 tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt new file mode 100644 index 000000000000..96b01eb5b7a4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test RFC 3042 "Limited Transmit": "sending a new data segment in +// response to each of the first two duplicate acknowledgments that +// arrive at the sender". +// This variation tests a receiver that doesn't support SACK. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write some data, and send the initial congestion window. + +0 write(4, ..., 15000) = 15000 + +0 > P. 1:10001(10000) ack 1 + +// Limited transmit: on first dupack, send a new data segment. + +.11 < . 1:1(0) ack 1 win 320 + +0 > . 10001:11001(1000) ack 1 + +// Limited transmit: on second dupack, send a new data segment. + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 11001:12001(1000) ack 1 + +// It turned out to be reordering, not loss. +// We have one packet newly acked (1001:3001 were DUP-ACK'd) +// So we revert state back to Open. Slow start cwnd from 10 to 11 +// and send 11 - 9 = 2 packets + +.01 < . 1:1(0) ack 3001 win 320 + +0 > P. 12001:14001(2000) ack 1 + + +.02 < . 1:1(0) ack 5001 win 320 + +0 > P. 14001:15001(1000) ack 1 + +// Client gradually ACKs all data. + +.02 < . 1:1(0) ack 7001 win 320 + +.02 < . 1:1(0) ack 9001 win 320 + +.02 < . 1:1(0) ack 11001 win 320 + +.02 < . 1:1(0) ack 13001 win 320 + +.02 < . 1:1(0) ack 15001 win 320 + +// Clean up. + +.17 close(4) = 0 + +0 > F. 15001:15001(0) ack 1 + +.1 < F. 1:1(0) ack 15002 win 257 + +0 > . 15002:15002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt new file mode 100644 index 000000000000..642da51ec3a4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test RFC 3042 "Limited Transmit": "sending a new data segment in +// response to each of the first two duplicate acknowledgments that +// arrive at the sender". +// This variation tests a receiver that supports SACK. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write some data, and send the initial congestion window. + +0 write(4, ..., 15000) = 15000 + +0 > P. 1:10001(10000) ack 1 + +// Limited transmit: on first dupack, send a new data segment. + +.11 < . 1:1(0) ack 1 win 320 + +0 > . 10001:11001(1000) ack 1 + +// Limited transmit: on second dupack, send a new data segment. + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 11001:12001(1000) ack 1 + +// It turned out to be reordering, not loss. + +.01 < . 1:1(0) ack 3001 win 320 + +0 > P. 12001:14001(2000) ack 1 + + +.02 < . 1:1(0) ack 5001 win 320 + +0 > P. 14001:15001(1000) ack 1 + +// Client gradually ACKs all data. + +.02 < . 1:1(0) ack 7001 win 320 + +.02 < . 1:1(0) ack 9001 win 320 + +.02 < . 1:1(0) ack 11001 win 320 + +.02 < . 1:1(0) ack 13001 win 320 + +.02 < . 1:1(0) ack 15001 win 320 + +// Clean up. + +.17 close(4) = 0 + +0 > F. 15001:15001(0) ack 1 + +.1 < F. 1:1(0) ack 15002 win 257 + +0 > . 15002:15002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt new file mode 100644 index 000000000000..6740859a1360 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// Simplest possible test of open() and then sendfile(). +// We write some zeroes into a file (since packetdrill expects payloads +// to be all zeroes) and then open() the file, then use sendfile() +// and verify that the correct number of zeroes goes out. + +`./defaults.sh +/bin/rm -f /tmp/testfile +/bin/dd bs=1 count=5 if=/dev/zero of=/tmp/testfile status=none +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +0 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 open("/tmp/testfile", O_RDONLY) = 5 + +0 sendfile(4, 5, [0], 5) = 5 + +0 > P. 1:6(5) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt new file mode 100644 index 000000000000..8940726a3ec2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP fastopen behavior with NULL as buffer pointer, but a non-zero +// buffer length. +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + +// Cache warmup: send a Fast Open cookie request + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation is now in progress) ++0 > S 0:0(0) ++0 < S. 123:123(0) ack 1 win 14600 ++0 > . 1:1(0) ack 1 ++0 close(3) = 0 ++0 > F. 1:1(0) ack 1 ++0 < F. 1:1(0) ack 2 win 92 ++0 > . 2:2(0) ack 2 + +// Test with MSG_FASTOPEN without TCP_FASTOPEN_CONNECT. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 sendto(4, NULL, 1, MSG_FASTOPEN, ..., ...) = -1 ++0 close(4) = 0 + +// Test with TCP_FASTOPEN_CONNECT without MSG_FASTOPEN. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5 ++0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(5, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(5, ..., ...) = 0 ++0 sendto(5, NULL, 1, 0, ..., ...) = -1 ++0 close(5) = 0 + +// Test with both TCP_FASTOPEN_CONNECT and MSG_FASTOPEN. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 6 ++0 fcntl(6, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(6, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(6, ..., ...) = 0 ++0 sendto(6, NULL, 1, MSG_FASTOPEN, ..., ...) = -1 ++0 close(6) = 0 + +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt new file mode 100644 index 000000000000..b2b2cdf27e20 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we correctly skip zero-length IOVs. +`./defaults.sh` + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 40}, {..., 0}, {..., 20}], + msg_flags=0}, 0) = 60 + +0 > P. 1:61(60) ack 1 + +.01 < . 1:1(0) ack 61 win 257 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 0}, {..., 0}, {..., 0}], + msg_flags=0}, MSG_ZEROCOPY) = 0 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 10}, {..., 0}, {..., 50}], + msg_flags=0}, MSG_ZEROCOPY) = 60 + +0 > P. 61:121(60) ack 1 + +.01 < . 1:1(0) ack 121 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt new file mode 100644 index 000000000000..59f5903f285c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test kernel behavior with NULL as buffer pointer + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.2 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 write(4, NULL, 1000) = -1 EFAULT (Bad address) + +0 send(4, NULL, 1000, 0) = -1 EFAULT (Bad address) + +0 sendto(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address) + + +0 < . 1:1001(1000) ack 1 win 200 + +0 read(4, NULL, 1000) = -1 EFAULT (Bad address) + +0 recv(4, NULL, 1000, 0) = -1 EFAULT (Bad address) + +0 recvfrom(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address) diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt new file mode 100644 index 000000000000..183051ba0cae --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + + +0 < S 0:0(0) win 0 + +0 > S. 0:0(0) ack 1 + + +.1 < . 1:1(0) ack 1 win 65530 + +0 accept(3, ..., ...) = 4 + + +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +0 write(4, ..., 24) = 24 + +0 > P. 1:25(24) ack 1 + +.1 < . 1:1(0) ack 25 win 65530 + +0 %{ assert tcpi_probes == 0, tcpi_probes; \ + assert tcpi_backoff == 0, tcpi_backoff }% + +// install a qdisc dropping all packets + +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0` + +0 write(4, ..., 24) = 24 + // When qdisc is congested we retry every 500ms + // (TCP_RESOURCE_PROBE_INTERVAL) and therefore + // we retry 6 times before hitting 3s timeout. + // First verify that the connection is alive: ++3.250 write(4, ..., 24) = 24 + // Now verify that shortly after that the socket is dead: + +.100 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out) + + +0 %{ assert tcpi_probes == 6, tcpi_probes; \ + assert tcpi_backoff == 0, tcpi_backoff }% + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt new file mode 100644 index 000000000000..2efe02bfba9c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +.1 < . 1:1(0) ack 1 win 32792 + + + +0 accept(3, ..., ...) = 4 + +// Okay, we received nothing, and decide to close this idle socket. +// We set TCP_USER_TIMEOUT to 3 seconds because really it is not worth +// trying hard to cleanly close this flow, at the price of keeping +// a TCP structure in kernel for about 1 minute ! + +2 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +0 close(4) = 0 + + +0 > F. 1:1(0) ack 1 + +.3~+.400 > F. 1:1(0) ack 1 + +.3~+.400 > F. 1:1(0) ack 1 + +.6~+.800 > F. 1:1(0) ack 1 + +// We finally receive something from the peer, but it is way too late +// Our socket vanished because TCP_USER_TIMEOUT was really small + +0 < . 1:2(1) ack 1 win 32792 + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt new file mode 100644 index 000000000000..8bd60226ccfc --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify that established connections drop a segment without the ACK flag set. + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 + +0 > S. 0:0(0) ack 1 + +.01 < . 1:1(0) ack 1 win 20000 + +0 accept(3, ..., ...) = 4 + +// Receive a segment with no flags set, verify that it's not enqueued. + +.01 < - 1:1001(1000) win 20000 + +0 ioctl(4, SIOCINQ, [0]) = 0 + +// Receive a segment with ACK flag set, verify that it is enqueued. + +.01 < . 1:1001(1000) ack 1 win 20000 + +0 ioctl(4, SIOCINQ, [1000]) = 0 From c9cfced17365b1df8c6ae6cd5db56aebd7ed9b57 Mon Sep 17 00:00:00 2001 From: Yafang Shao Date: Tue, 10 Dec 2024 10:27:06 +0800 Subject: [PATCH 0502/1386] net/mlx5e: Report rx_discards_phy via rx_dropped MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We noticed a high number of rx_discards_phy events on certain servers while running `ethtool -S`. However, this critical counter is not currently included in the standard /proc/net/dev statistics file, making it difficult to monitor effectively—especially given the diversity of vendors across a large fleet of servers. Let's report it via the standard rx_dropped metric. Suggested-by: Jakub Kicinski Signed-off-by: Yafang Shao Cc: Saeed Mahameed Cc: Leon Romanovsky Cc: Gal Pressman Reviewed-by: Simon Horman Reviewed-by: Tariq Toukan Link: https://patch.msgid.link/20241210022706.6665-1-laoar.shao@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index dd16d73000c3..15e765a41d72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3946,6 +3946,7 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) } stats->rx_missed_errors = priv->stats.qcnt.rx_out_of_buffer; + stats->rx_dropped = PPORT_2863_GET(pstats, if_in_discards); stats->rx_length_errors = PPORT_802_3_GET(pstats, a_in_range_length_errors) + From 65c233d8e329c152e88fe796155702fd21028883 Mon Sep 17 00:00:00 2001 From: shunlizhou Date: Mon, 16 Dec 2024 13:54:46 +0000 Subject: [PATCH 0503/1386] docs: net: bonding: fix typos The bonding documentation had several "insure" which is not properly used in the context. Suggest to change to "ensure" to improve readability. Signed-off-by: shunlizhou Link: https://patch.msgid.link/20241216135447.57681-1-shunlizhou@aliyun.com Signed-off-by: Jakub Kicinski --- Documentation/networking/bonding.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index 7c8d22d68682..a4c1291d2561 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -1963,7 +1963,7 @@ obtain its hardware address from the first slave, which might not match the hardware address of the VLAN interfaces (which was ultimately copied from an earlier slave). -There are two methods to insure that the VLAN device operates +There are two methods to ensure that the VLAN device operates with the correct hardware address if all slaves are removed from a bond interface: @@ -2078,7 +2078,7 @@ as an unsolicited ARP reply (because ARP matches replies on an interface basis), and is discarded. The MII monitor is not affected by the state of the routing table. -The solution here is simply to insure that slaves do not have +The solution here is simply to ensure that slaves do not have routes of their own, and if for some reason they must, those routes do not supersede routes of their master. This should generally be the case, but unusual configurations or errant manual or automatic static @@ -2295,7 +2295,7 @@ active-backup: the switches have an ISL and play together well. If the network configuration is such that one switch is specifically a backup switch (e.g., has lower capacity, higher cost, etc), - then the primary option can be used to insure that the + then the primary option can be used to ensure that the preferred link is always used when it is available. broadcast: @@ -2322,7 +2322,7 @@ monitor can provide a higher level of reliability in detecting end to end connectivity failures (which may be caused by the failure of any individual component to pass traffic for any reason). Additionally, the ARP monitor should be configured with multiple targets (at least -one for each switch in the network). This will insure that, +one for each switch in the network). This will ensure that, regardless of which switch is active, the ARP monitor has a suitable target to query. From a126061c80d5efb4baef4bcf346094139cd81df6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 17 Dec 2024 13:51:21 +0000 Subject: [PATCH 0504/1386] ptr_ring: do not block hard interrupts in ptr_ring_resize_multiple() Jakub added a lockdep_assert_no_hardirq() check in __page_pool_put_page() to increase test coverage. syzbot found a splat caused by hard irq blocking in ptr_ring_resize_multiple() [1] As current users of ptr_ring_resize_multiple() do not require hard irqs being masked, replace it to only block BH. Rename helpers to better reflect they are safe against BH only. - ptr_ring_resize_multiple() to ptr_ring_resize_multiple_bh() - skb_array_resize_multiple() to skb_array_resize_multiple_bh() [1] WARNING: CPU: 1 PID: 9150 at net/core/page_pool.c:709 __page_pool_put_page net/core/page_pool.c:709 [inline] WARNING: CPU: 1 PID: 9150 at net/core/page_pool.c:709 page_pool_put_unrefed_netmem+0x157/0xa40 net/core/page_pool.c:780 Modules linked in: CPU: 1 UID: 0 PID: 9150 Comm: syz.1.1052 Not tainted 6.11.0-rc3-syzkaller-00202-gf8669d7b5f5d #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/06/2024 RIP: 0010:__page_pool_put_page net/core/page_pool.c:709 [inline] RIP: 0010:page_pool_put_unrefed_netmem+0x157/0xa40 net/core/page_pool.c:780 Code: 74 0e e8 7c aa fb f7 eb 43 e8 75 aa fb f7 eb 3c 65 8b 1d 38 a8 6a 76 31 ff 89 de e8 a3 ae fb f7 85 db 74 0b e8 5a aa fb f7 90 <0f> 0b 90 eb 1d 65 8b 1d 15 a8 6a 76 31 ff 89 de e8 84 ae fb f7 85 RSP: 0018:ffffc9000bda6b58 EFLAGS: 00010083 RAX: ffffffff8997e523 RBX: 0000000000000000 RCX: 0000000000040000 RDX: ffffc9000fbd0000 RSI: 0000000000001842 RDI: 0000000000001843 RBP: 0000000000000000 R08: ffffffff8997df2c R09: 1ffffd40003a000d R10: dffffc0000000000 R11: fffff940003a000e R12: ffffea0001d00040 R13: ffff88802e8a4000 R14: dffffc0000000000 R15: 00000000ffffffff FS: 00007fb7aaf716c0(0000) GS:ffff8880b9300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa15a0d4b72 CR3: 00000000561b0000 CR4: 00000000003506f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tun_ptr_free drivers/net/tun.c:617 [inline] __ptr_ring_swap_queue include/linux/ptr_ring.h:571 [inline] ptr_ring_resize_multiple_noprof include/linux/ptr_ring.h:643 [inline] tun_queue_resize drivers/net/tun.c:3694 [inline] tun_device_event+0xaaf/0x1080 drivers/net/tun.c:3714 notifier_call_chain+0x19f/0x3e0 kernel/notifier.c:93 call_netdevice_notifiers_extack net/core/dev.c:2032 [inline] call_netdevice_notifiers net/core/dev.c:2046 [inline] dev_change_tx_queue_len+0x158/0x2a0 net/core/dev.c:9024 do_setlink+0xff6/0x41f0 net/core/rtnetlink.c:2923 rtnl_setlink+0x40d/0x5a0 net/core/rtnetlink.c:3201 rtnetlink_rcv_msg+0x73f/0xcf0 net/core/rtnetlink.c:6647 netlink_rcv_skb+0x1e3/0x430 net/netlink/af_netlink.c:2550 Fixes: ff4e538c8c3e ("page_pool: add a lockdep check for recycling in hardirq") Reported-by: syzbot+f56a5c5eac2b28439810@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/671e10df.050a0220.2b8c0f.01cf.GAE@google.com/T/ Signed-off-by: Eric Dumazet Acked-by: Michael S. Tsirkin Acked-by: Jason Wang Link: https://patch.msgid.link/20241217135121.326370-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/tap.c | 6 +++--- drivers/net/tun.c | 6 +++--- include/linux/ptr_ring.h | 21 ++++++++++----------- include/linux/skb_array.h | 17 +++++++++-------- net/sched/sch_generic.c | 4 ++-- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 5aa41d5f7765..5ca6ecf0ce5f 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1329,9 +1329,9 @@ int tap_queue_resize(struct tap_dev *tap) list_for_each_entry(q, &tap->queue_list, next) rings[i++] = &q->ring; - ret = ptr_ring_resize_multiple(rings, n, - dev->tx_queue_len, GFP_KERNEL, - __skb_array_destroy_skb); + ret = ptr_ring_resize_multiple_bh(rings, n, + dev->tx_queue_len, GFP_KERNEL, + __skb_array_destroy_skb); kfree(rings); return ret; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 8e94df88392c..41e3eeac06fd 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3701,9 +3701,9 @@ static int tun_queue_resize(struct tun_struct *tun) list_for_each_entry(tfile, &tun->disabled, next) rings[i++] = &tfile->tx_ring; - ret = ptr_ring_resize_multiple(rings, n, - dev->tx_queue_len, GFP_KERNEL, - tun_ptr_free); + ret = ptr_ring_resize_multiple_bh(rings, n, + dev->tx_queue_len, GFP_KERNEL, + tun_ptr_free); kfree(rings); return ret; diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index fd037c127bb0..551329220e4f 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -615,15 +615,14 @@ static inline int ptr_ring_resize_noprof(struct ptr_ring *r, int size, gfp_t gfp /* * Note: producer lock is nested within consumer lock, so if you * resize you must make sure all uses nest correctly. - * In particular if you consume ring in interrupt or BH context, you must - * disable interrupts/BH when doing so. + * In particular if you consume ring in BH context, you must + * disable BH when doing so. */ -static inline int ptr_ring_resize_multiple_noprof(struct ptr_ring **rings, - unsigned int nrings, - int size, - gfp_t gfp, void (*destroy)(void *)) +static inline int ptr_ring_resize_multiple_bh_noprof(struct ptr_ring **rings, + unsigned int nrings, + int size, gfp_t gfp, + void (*destroy)(void *)) { - unsigned long flags; void ***queues; int i; @@ -638,12 +637,12 @@ static inline int ptr_ring_resize_multiple_noprof(struct ptr_ring **rings, } for (i = 0; i < nrings; ++i) { - spin_lock_irqsave(&(rings[i])->consumer_lock, flags); + spin_lock_bh(&(rings[i])->consumer_lock); spin_lock(&(rings[i])->producer_lock); queues[i] = __ptr_ring_swap_queue(rings[i], queues[i], size, gfp, destroy); spin_unlock(&(rings[i])->producer_lock); - spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags); + spin_unlock_bh(&(rings[i])->consumer_lock); } for (i = 0; i < nrings; ++i) @@ -662,8 +661,8 @@ nomem: noqueues: return -ENOMEM; } -#define ptr_ring_resize_multiple(...) \ - alloc_hooks(ptr_ring_resize_multiple_noprof(__VA_ARGS__)) +#define ptr_ring_resize_multiple_bh(...) \ + alloc_hooks(ptr_ring_resize_multiple_bh_noprof(__VA_ARGS__)) static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *)) { diff --git a/include/linux/skb_array.h b/include/linux/skb_array.h index 926496c9cc9c..bf178238a308 100644 --- a/include/linux/skb_array.h +++ b/include/linux/skb_array.h @@ -199,17 +199,18 @@ static inline int skb_array_resize(struct skb_array *a, int size, gfp_t gfp) return ptr_ring_resize(&a->ring, size, gfp, __skb_array_destroy_skb); } -static inline int skb_array_resize_multiple_noprof(struct skb_array **rings, - int nrings, unsigned int size, - gfp_t gfp) +static inline int skb_array_resize_multiple_bh_noprof(struct skb_array **rings, + int nrings, + unsigned int size, + gfp_t gfp) { BUILD_BUG_ON(offsetof(struct skb_array, ring)); - return ptr_ring_resize_multiple_noprof((struct ptr_ring **)rings, - nrings, size, gfp, - __skb_array_destroy_skb); + return ptr_ring_resize_multiple_bh_noprof((struct ptr_ring **)rings, + nrings, size, gfp, + __skb_array_destroy_skb); } -#define skb_array_resize_multiple(...) \ - alloc_hooks(skb_array_resize_multiple_noprof(__VA_ARGS__)) +#define skb_array_resize_multiple_bh(...) \ + alloc_hooks(skb_array_resize_multiple_bh_noprof(__VA_ARGS__)) static inline void skb_array_cleanup(struct skb_array *a) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 38ec18f73de4..8874ae668095 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -911,8 +911,8 @@ static int pfifo_fast_change_tx_queue_len(struct Qdisc *sch, bands[prio] = q; } - return skb_array_resize_multiple(bands, PFIFO_FAST_BANDS, new_len, - GFP_KERNEL); + return skb_array_resize_multiple_bh(bands, PFIFO_FAST_BANDS, new_len, + GFP_KERNEL); } struct Qdisc_ops pfifo_fast_ops __read_mostly = { From 206112fa65790221ca3ebbc43092911bb8836a19 Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Mon, 16 Dec 2024 12:19:53 +0500 Subject: [PATCH 0505/1386] net: renesas: rswitch: do not write to MPSM register at init time MPSM register is used to execute mdio bus transactions. There is no need to initialize it early. Signed-off-by: Nikita Yushchenko Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://patch.msgid.link/20241216071957.2587354-2-nikita.yoush@cogentembedded.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rswitch.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 5fc8c94d1e4b..12efee9f75d8 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1166,7 +1166,6 @@ static void rswitch_etha_enable_mii(struct rswitch_etha *etha) { rswitch_modify(etha->addr, MPIC, MPIC_PSMCS_MASK | MPIC_PSMHT_MASK, MPIC_PSMCS(etha->psmcs) | MPIC_PSMHT(0x06)); - rswitch_modify(etha->addr, MPSM, 0, MPSM_MFF_C45); } static int rswitch_etha_hw_init(struct rswitch_etha *etha, const u8 *mac) From da75ba93e3383fc10af71e5029b5a57378a57576 Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Mon, 16 Dec 2024 12:19:54 +0500 Subject: [PATCH 0506/1386] net: renesas: rswitch: use FIELD_PREP for remaining MPIC register fields Commit fb9e6039c325 ("net: renesas: rswitch: fix initial MPIC register setting") converted setting some MPIC fields to FIELD_PREP. To keep common style, do the same with mii bus related fields of the same register. Signed-off-by: Nikita Yushchenko Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://patch.msgid.link/20241216071957.2587354-3-nikita.yoush@cogentembedded.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rswitch.c | 5 +++-- drivers/net/ethernet/renesas/rswitch.h | 10 ++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 12efee9f75d8..e1541a206687 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1164,8 +1164,9 @@ static void rswitch_rmac_setting(struct rswitch_etha *etha, const u8 *mac) static void rswitch_etha_enable_mii(struct rswitch_etha *etha) { - rswitch_modify(etha->addr, MPIC, MPIC_PSMCS_MASK | MPIC_PSMHT_MASK, - MPIC_PSMCS(etha->psmcs) | MPIC_PSMHT(0x06)); + rswitch_modify(etha->addr, MPIC, MPIC_PSMCS | MPIC_PSMHT, + FIELD_PREP(MPIC_PSMCS, etha->psmcs) | + FIELD_PREP(MPIC_PSMHT, 0x06)); } static int rswitch_etha_hw_init(struct rswitch_etha *etha, const u8 *mac) diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h index 4b1489100330..78c0325cdf30 100644 --- a/drivers/net/ethernet/renesas/rswitch.h +++ b/drivers/net/ethernet/renesas/rswitch.h @@ -732,6 +732,8 @@ enum rswitch_etha_mode { #define MPIC_LSC_100M 1 #define MPIC_LSC_1G 2 #define MPIC_LSC_2_5G 3 +#define MPIC_PSMCS GENMASK(22, 16) +#define MPIC_PSMHT GENMASK(26, 24) #define MDIO_READ_C45 0x03 #define MDIO_WRITE_C45 0x01 @@ -747,14 +749,6 @@ enum rswitch_etha_mode { #define MMIS1_PRACS BIT(0) /* Read */ #define MMIS1_CLEAR_FLAGS 0xf -#define MPIC_PSMCS_SHIFT 16 -#define MPIC_PSMCS_MASK GENMASK(22, MPIC_PSMCS_SHIFT) -#define MPIC_PSMCS(val) ((val) << MPIC_PSMCS_SHIFT) - -#define MPIC_PSMHT_SHIFT 24 -#define MPIC_PSMHT_MASK GENMASK(26, MPIC_PSMHT_SHIFT) -#define MPIC_PSMHT(val) ((val) << MPIC_PSMHT_SHIFT) - #define MLVC_PLV BIT(16) /* GWCA */ From 1ced1b8cacf396d6ff979f594ba40ace42087797 Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Mon, 16 Dec 2024 12:19:55 +0500 Subject: [PATCH 0507/1386] net: renesas: rswitch: align mdio C45 operations with datasheet Per rswitch datasheet, software can know that mdio operation completed either by polling MPSM.PSME bit, or via interrupt. Instead, the driver currently polls for interrupt status bit. Although this still provides correct result, it requires additional register operations to clean the interrupt status bits, and generally looks wrong. Fix it to poll MPSM.PSME bit, as the datasheet suggests. Signed-off-by: Nikita Yushchenko Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://patch.msgid.link/20241216071957.2587354-4-nikita.yoush@cogentembedded.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rswitch.c | 12 +++--------- drivers/net/ethernet/renesas/rswitch.h | 6 ------ 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index e1541a206687..6e3f162ae3b3 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1205,32 +1205,26 @@ static int rswitch_etha_set_access(struct rswitch_etha *etha, bool read, if (devad == 0xffffffff) return -ENODEV; - writel(MMIS1_CLEAR_FLAGS, etha->addr + MMIS1); - val = MPSM_PSME | MPSM_MFF_C45; iowrite32((regad << 16) | (devad << 8) | (phyad << 3) | val, etha->addr + MPSM); - ret = rswitch_reg_wait(etha->addr, MMIS1, MMIS1_PAACS, MMIS1_PAACS); + ret = rswitch_reg_wait(etha->addr, MPSM, MPSM_PSME, 0); if (ret) return ret; - rswitch_modify(etha->addr, MMIS1, MMIS1_PAACS, MMIS1_PAACS); - if (read) { writel((pop << 13) | (devad << 8) | (phyad << 3) | val, etha->addr + MPSM); - ret = rswitch_reg_wait(etha->addr, MMIS1, MMIS1_PRACS, MMIS1_PRACS); + ret = rswitch_reg_wait(etha->addr, MPSM, MPSM_PSME, 0); if (ret) return ret; ret = (ioread32(etha->addr + MPSM) & MPSM_PRD_MASK) >> 16; - - rswitch_modify(etha->addr, MMIS1, MMIS1_PRACS, MMIS1_PRACS); } else { iowrite32((data << 16) | (pop << 13) | (devad << 8) | (phyad << 3) | val, etha->addr + MPSM); - ret = rswitch_reg_wait(etha->addr, MMIS1, MMIS1_PWACS, MMIS1_PWACS); + ret = rswitch_reg_wait(etha->addr, MPSM, MPSM_PSME, 0); } return ret; diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h index 78c0325cdf30..2cb66f3f4716 100644 --- a/drivers/net/ethernet/renesas/rswitch.h +++ b/drivers/net/ethernet/renesas/rswitch.h @@ -743,12 +743,6 @@ enum rswitch_etha_mode { #define MPSM_PRD_SHIFT 16 #define MPSM_PRD_MASK GENMASK(31, MPSM_PRD_SHIFT) -/* Completion flags */ -#define MMIS1_PAACS BIT(2) /* Address */ -#define MMIS1_PWACS BIT(1) /* Write */ -#define MMIS1_PRACS BIT(0) /* Read */ -#define MMIS1_CLEAR_FLAGS 0xf - #define MLVC_PLV BIT(16) /* GWCA */ From 2aa722b6d81c3118d33dcb8eea9aac49f56af790 Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Mon, 16 Dec 2024 12:19:56 +0500 Subject: [PATCH 0508/1386] net: renesas: rswitch: use generic MPSM operation for mdio C45 Introduce rswitch_etha_mpsm_op() that accepts values for MPSM register fields and executes the transaction. This avoids some code duptication, and can be used both for C45 and C22. Convert C45 read and write operations to use that. Signed-off-by: Nikita Yushchenko Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://patch.msgid.link/20241216071957.2587354-5-nikita.yoush@cogentembedded.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rswitch.c | 51 +++++++++++++++----------- drivers/net/ethernet/renesas/rswitch.h | 17 ++++++--- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 6e3f162ae3b3..a3ba2a91c0ab 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1195,36 +1195,29 @@ static int rswitch_etha_hw_init(struct rswitch_etha *etha, const u8 *mac) return rswitch_etha_change_mode(etha, EAMC_OPC_OPERATION); } -static int rswitch_etha_set_access(struct rswitch_etha *etha, bool read, - int phyad, int devad, int regad, int data) +static int rswitch_etha_mpsm_op(struct rswitch_etha *etha, bool read, + unsigned int mmf, unsigned int pda, + unsigned int pra, unsigned int pop, + unsigned int prd) { - int pop = read ? MDIO_READ_C45 : MDIO_WRITE_C45; u32 val; int ret; - if (devad == 0xffffffff) - return -ENODEV; - - val = MPSM_PSME | MPSM_MFF_C45; - iowrite32((regad << 16) | (devad << 8) | (phyad << 3) | val, etha->addr + MPSM); + val = MPSM_PSME | + FIELD_PREP(MPSM_MFF, mmf) | + FIELD_PREP(MPSM_PDA, pda) | + FIELD_PREP(MPSM_PRA, pra) | + FIELD_PREP(MPSM_POP, pop) | + FIELD_PREP(MPSM_PRD, prd); + iowrite32(val, etha->addr + MPSM); ret = rswitch_reg_wait(etha->addr, MPSM, MPSM_PSME, 0); if (ret) return ret; if (read) { - writel((pop << 13) | (devad << 8) | (phyad << 3) | val, etha->addr + MPSM); - - ret = rswitch_reg_wait(etha->addr, MPSM, MPSM_PSME, 0); - if (ret) - return ret; - - ret = (ioread32(etha->addr + MPSM) & MPSM_PRD_MASK) >> 16; - } else { - iowrite32((data << 16) | (pop << 13) | (devad << 8) | (phyad << 3) | val, - etha->addr + MPSM); - - ret = rswitch_reg_wait(etha->addr, MPSM, MPSM_PSME, 0); + val = ioread32(etha->addr + MPSM); + ret = FIELD_GET(MPSM_PRD, val); } return ret; @@ -1234,16 +1227,30 @@ static int rswitch_etha_mii_read_c45(struct mii_bus *bus, int addr, int devad, int regad) { struct rswitch_etha *etha = bus->priv; + int ret; - return rswitch_etha_set_access(etha, true, addr, devad, regad, 0); + ret = rswitch_etha_mpsm_op(etha, false, MPSM_MMF_C45, addr, devad, + MPSM_POP_ADDRESS, regad); + if (ret) + return ret; + + return rswitch_etha_mpsm_op(etha, true, MPSM_MMF_C45, addr, devad, + MPSM_POP_READ_C45, 0); } static int rswitch_etha_mii_write_c45(struct mii_bus *bus, int addr, int devad, int regad, u16 val) { struct rswitch_etha *etha = bus->priv; + int ret; - return rswitch_etha_set_access(etha, false, addr, devad, regad, val); + ret = rswitch_etha_mpsm_op(etha, false, MPSM_MMF_C45, addr, devad, + MPSM_POP_ADDRESS, regad); + if (ret) + return ret; + + return rswitch_etha_mpsm_op(etha, false, MPSM_MMF_C45, addr, devad, + MPSM_POP_WRITE, val); } /* Call of_node_put(port) after done */ diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h index 2cb66f3f4716..35ee73766396 100644 --- a/drivers/net/ethernet/renesas/rswitch.h +++ b/drivers/net/ethernet/renesas/rswitch.h @@ -735,13 +735,18 @@ enum rswitch_etha_mode { #define MPIC_PSMCS GENMASK(22, 16) #define MPIC_PSMHT GENMASK(26, 24) -#define MDIO_READ_C45 0x03 -#define MDIO_WRITE_C45 0x01 - #define MPSM_PSME BIT(0) -#define MPSM_MFF_C45 BIT(2) -#define MPSM_PRD_SHIFT 16 -#define MPSM_PRD_MASK GENMASK(31, MPSM_PRD_SHIFT) +#define MPSM_MFF BIT(2) +#define MPSM_MMF_C22 0 +#define MPSM_MMF_C45 1 +#define MPSM_PDA GENMASK(7, 3) +#define MPSM_PRA GENMASK(12, 8) +#define MPSM_POP GENMASK(14, 13) +#define MPSM_POP_ADDRESS 0 +#define MPSM_POP_WRITE 1 +#define MPSM_POP_READ_C22 2 +#define MPSM_POP_READ_C45 3 +#define MPSM_PRD GENMASK(31, 16) #define MLVC_PLV BIT(16) From db48fe905d8ae90d0c35238ddd90e816d543316c Mon Sep 17 00:00:00 2001 From: Nikita Yushchenko Date: Mon, 16 Dec 2024 12:19:57 +0500 Subject: [PATCH 0509/1386] net: renesas: rswitch: add mdio C22 support The generic MPSM operation added by the previous patch can be used both for C45 and C22. Add handlers for C22 operations. Signed-off-by: Nikita Yushchenko Reviewed-by: Yoshihiro Shimoda Tested-by: Yoshihiro Shimoda Link: https://patch.msgid.link/20241216071957.2587354-6-nikita.yoush@cogentembedded.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/renesas/rswitch.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index a3ba2a91c0ab..aae26098bc0c 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1253,6 +1253,23 @@ static int rswitch_etha_mii_write_c45(struct mii_bus *bus, int addr, int devad, MPSM_POP_WRITE, val); } +static int rswitch_etha_mii_read_c22(struct mii_bus *bus, int phyad, int regad) +{ + struct rswitch_etha *etha = bus->priv; + + return rswitch_etha_mpsm_op(etha, true, MPSM_MMF_C22, phyad, regad, + MPSM_POP_READ_C22, 0); +} + +static int rswitch_etha_mii_write_c22(struct mii_bus *bus, int phyad, + int regad, u16 val) +{ + struct rswitch_etha *etha = bus->priv; + + return rswitch_etha_mpsm_op(etha, false, MPSM_MMF_C22, phyad, regad, + MPSM_POP_WRITE, val); +} + /* Call of_node_put(port) after done */ static struct device_node *rswitch_get_port_node(struct rswitch_device *rdev) { @@ -1335,6 +1352,8 @@ static int rswitch_mii_register(struct rswitch_device *rdev) mii_bus->priv = rdev->etha; mii_bus->read_c45 = rswitch_etha_mii_read_c45; mii_bus->write_c45 = rswitch_etha_mii_write_c45; + mii_bus->read = rswitch_etha_mii_read_c22; + mii_bus->write = rswitch_etha_mii_write_c22; mii_bus->parent = &rdev->priv->pdev->dev; mdio_np = of_get_child_by_name(rdev->np_port, "mdio"); From d1d761b3012e99d55d288d435384be606302cb2c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 16 Dec 2024 19:11:53 +0200 Subject: [PATCH 0510/1386] net: fib_rules: Add flow label selector attributes Add new FIB rule attributes which will allow user space to match on the IPv6 flow label with a mask. Temporarily set the type of the attributes to 'NLA_REJECT' while support is being added in the IPv6 code. Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: Paolo Abeni --- include/uapi/linux/fib_rules.h | 2 ++ net/core/fib_rules.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h index a6924dd3aff1..00e9890ca3c0 100644 --- a/include/uapi/linux/fib_rules.h +++ b/include/uapi/linux/fib_rules.h @@ -68,6 +68,8 @@ enum { FRA_SPORT_RANGE, /* sport */ FRA_DPORT_RANGE, /* dport */ FRA_DSCP, /* dscp */ + FRA_FLOWLABEL, /* flowlabel */ + FRA_FLOWLABEL_MASK, /* flowlabel mask */ __FRA_MAX }; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 34185d138c95..153b14aade42 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -770,6 +770,8 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = { [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, [FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2), + [FRA_FLOWLABEL] = { .type = NLA_REJECT }, + [FRA_FLOWLABEL_MASK] = { .type = NLA_REJECT }, }; int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, From f0c898d8c279e6cfdf5e25dc04424d518dec1aa4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 16 Dec 2024 19:11:54 +0200 Subject: [PATCH 0511/1386] ipv4: fib_rules: Reject flow label attributes IPv4 FIB rules cannot match on flow label so reject requests that try to add such rules. Do that in the IPv4 configure callback as the netlink policy resides in the core and used by both IPv4 and IPv6. Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: Paolo Abeni --- net/ipv4/fib_rules.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 8325224ef072..9517b8667e00 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -249,6 +249,12 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, int err = -EINVAL; struct fib4_rule *rule4 = (struct fib4_rule *) rule; + if (tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) { + NL_SET_ERR_MSG(extack, + "Flow label cannot be specified for IPv4 FIB rules"); + goto errout; + } + if (!inet_validate_dscp(frh->tos)) { NL_SET_ERR_MSG(extack, "Invalid dsfield (tos): ECN bits must be 0"); From 9aa77531a1314dd46d3694eac5dc469a6690fca7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 16 Dec 2024 19:11:55 +0200 Subject: [PATCH 0512/1386] ipv6: fib_rules: Add flow label support Implement support for the new flow label selector which allows IPv6 FIB rules to match on the flow label with a mask. Ensure that both flow label attributes are specified (or none) and that the mask is valid. Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: Paolo Abeni --- net/ipv6/fib6_rules.c | 57 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index c85c1627cb16..67d39114d9a6 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -26,6 +26,8 @@ struct fib6_rule { struct fib_rule common; struct rt6key src; struct rt6key dst; + __be32 flowlabel; + __be32 flowlabel_mask; dscp_t dscp; u8 dscp_full:1; /* DSCP or TOS selector */ }; @@ -34,7 +36,7 @@ static bool fib6_rule_matchall(const struct fib_rule *rule) { struct fib6_rule *r = container_of(rule, struct fib6_rule, common); - if (r->dst.plen || r->src.plen || r->dscp) + if (r->dst.plen || r->src.plen || r->dscp || r->flowlabel_mask) return false; return fib_rule_matchall(rule); } @@ -332,6 +334,9 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_match(struct fib_rule *rule, if (r->dscp && r->dscp != ip6_dscp(fl6->flowlabel)) return 0; + if ((r->flowlabel ^ flowi6_get_flowlabel(fl6)) & r->flowlabel_mask) + return 0; + if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto)) return 0; @@ -360,6 +365,35 @@ static int fib6_nl2rule_dscp(const struct nlattr *nla, struct fib6_rule *rule6, return 0; } +static int fib6_nl2rule_flowlabel(struct nlattr **tb, struct fib6_rule *rule6, + struct netlink_ext_ack *extack) +{ + __be32 flowlabel, flowlabel_mask; + + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, FRA_FLOWLABEL) || + NL_REQ_ATTR_CHECK(extack, NULL, tb, FRA_FLOWLABEL_MASK)) + return -EINVAL; + + flowlabel = nla_get_be32(tb[FRA_FLOWLABEL]); + flowlabel_mask = nla_get_be32(tb[FRA_FLOWLABEL_MASK]); + + if (flowlabel_mask & ~IPV6_FLOWLABEL_MASK) { + NL_SET_ERR_MSG_ATTR(extack, tb[FRA_FLOWLABEL_MASK], + "Invalid flow label mask"); + return -EINVAL; + } + + if (flowlabel & ~flowlabel_mask) { + NL_SET_ERR_MSG(extack, "Flow label and mask do not match"); + return -EINVAL; + } + + rule6->flowlabel = flowlabel; + rule6->flowlabel_mask = flowlabel_mask; + + return 0; +} + static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, struct fib_rule_hdr *frh, struct nlattr **tb, @@ -379,6 +413,10 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, if (tb[FRA_DSCP] && fib6_nl2rule_dscp(tb[FRA_DSCP], rule6, extack) < 0) goto errout; + if ((tb[FRA_FLOWLABEL] || tb[FRA_FLOWLABEL_MASK]) && + fib6_nl2rule_flowlabel(tb, rule6, extack) < 0) + goto errout; + if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) { if (rule->table == RT6_TABLE_UNSPEC) { NL_SET_ERR_MSG(extack, "Invalid table"); @@ -444,6 +482,14 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, return 0; } + if (tb[FRA_FLOWLABEL] && + nla_get_be32(tb[FRA_FLOWLABEL]) != rule6->flowlabel) + return 0; + + if (tb[FRA_FLOWLABEL_MASK] && + nla_get_be32(tb[FRA_FLOWLABEL_MASK]) != rule6->flowlabel_mask) + return 0; + if (frh->src_len && nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr))) return 0; @@ -472,6 +518,11 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->tos = inet_dscp_to_dsfield(rule6->dscp); } + if (rule6->flowlabel_mask && + (nla_put_be32(skb, FRA_FLOWLABEL, rule6->flowlabel) || + nla_put_be32(skb, FRA_FLOWLABEL_MASK, rule6->flowlabel_mask))) + goto nla_put_failure; + if ((rule6->dst.plen && nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) || (rule6->src.plen && @@ -487,7 +538,9 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule) { return nla_total_size(16) /* dst */ + nla_total_size(16) /* src */ - + nla_total_size(1); /* dscp */ + + nla_total_size(1) /* dscp */ + + nla_total_size(4) /* flowlabel */ + + nla_total_size(4); /* flowlabel mask */ } static void fib6_rule_flush_cache(struct fib_rules_ops *ops) From 4c25f3f0519486382644c76ee11b127026095c61 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 16 Dec 2024 19:11:56 +0200 Subject: [PATCH 0513/1386] net: fib_rules: Enable flow label selector usage Now that both IPv4 and IPv6 correctly handle the new flow label attributes, enable user space to configure FIB rules that make use of the flow label by changing the policy to stop rejecting them and accepting 32 bit values in big-endian byte order. Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: Paolo Abeni --- net/core/fib_rules.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 153b14aade42..e684ba3ebb38 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -770,8 +770,8 @@ static const struct nla_policy fib_rule_policy[FRA_MAX + 1] = { [FRA_SPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, [FRA_DPORT_RANGE] = { .len = sizeof(struct fib_rule_port_range) }, [FRA_DSCP] = NLA_POLICY_MAX(NLA_U8, INET_DSCP_MASK >> 2), - [FRA_FLOWLABEL] = { .type = NLA_REJECT }, - [FRA_FLOWLABEL_MASK] = { .type = NLA_REJECT }, + [FRA_FLOWLABEL] = { .type = NLA_BE32 }, + [FRA_FLOWLABEL_MASK] = { .type = NLA_BE32 }, }; int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, From c72004aac60a9ffdf4bc29b1e7ff0798a7eab3c2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 16 Dec 2024 19:11:57 +0200 Subject: [PATCH 0514/1386] netlink: specs: Add FIB rule flow label attributes Add the new flow label attributes to the spec. Example: # ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/rt_rule.yaml \ --do newrule \ --json '{"family": 10, "flowlabel": 1, "flowlabel-mask": 1, "action": 1, "table": 1}' None $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/rt_rule.yaml \ --dump getrule --json '{"family": 10}' --output-json \ | jq '.[] | select(.flowlabel == "0x1")' { "table": 1, "suppress-prefixlen": "0xffffffff", "protocol": 0, "priority": 32765, "flowlabel": "0x1", "flowlabel-mask": "0x1", "family": 10, "dst-len": 0, "src-len": 0, "tos": 0, "action": "to-tbl", "flags": 0 } Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/rt_rule.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Documentation/netlink/specs/rt_rule.yaml b/Documentation/netlink/specs/rt_rule.yaml index 03a8eef7952e..a9debac3058a 100644 --- a/Documentation/netlink/specs/rt_rule.yaml +++ b/Documentation/netlink/specs/rt_rule.yaml @@ -172,6 +172,16 @@ attribute-sets: - name: dscp type: u8 + - + name: flowlabel + type: u32 + byte-order: big-endian + display-hint: hex + - + name: flowlabel-mask + type: u32 + byte-order: big-endian + display-hint: hex operations: enum-model: directional @@ -203,6 +213,8 @@ operations: - sport-range - dport-range - dscp + - flowlabel + - flowlabel-mask - name: newrule-ntf doc: Notify a rule creation From ba4138032ae3b5b8e2b68d2f2647cdc0817b05a6 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 16 Dec 2024 19:11:58 +0200 Subject: [PATCH 0515/1386] ipv6: Add flow label to route get requests The default IPv6 multipath hash policy takes the flow label into account when calculating a multipath hash and previous patches added a flow label selector to IPv6 FIB rules. Allow user space to specify a flow label in route get requests by adding a new netlink attribute and using its value to populate the "flowlabel" field in the IPv6 flow info structure prior to a route lookup. Deny the attribute in RTM_{NEW,DEL}ROUTE requests by checking for it in rtm_to_fib6_config() and returning an error if present. A subsequent patch will use this capability to test the new flow label selector in IPv6 FIB rules. Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: Paolo Abeni --- include/uapi/linux/rtnetlink.h | 1 + net/ipv6/route.c | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index eccc0e7dcb7d..5ee94c511a28 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -393,6 +393,7 @@ enum rtattr_type_t { RTA_SPORT, RTA_DPORT, RTA_NH_ID, + RTA_FLOWLABEL, __RTA_MAX }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 67ff16c04718..78362822b907 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -5005,6 +5005,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_SPORT] = { .type = NLA_U16 }, [RTA_DPORT] = { .type = NLA_U16 }, [RTA_NH_ID] = { .type = NLA_U32 }, + [RTA_FLOWLABEL] = { .type = NLA_BE32 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -5030,6 +5031,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, goto errout; } + if (tb[RTA_FLOWLABEL]) { + NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL], + "Flow label cannot be specified for this operation"); + goto errout; + } + *cfg = (struct fib6_config){ .fc_table = rtm->rtm_table, .fc_dst_len = rtm->rtm_dst_len, @@ -6013,6 +6020,13 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, return -EINVAL; } + if (tb[RTA_FLOWLABEL] && + (nla_get_be32(tb[RTA_FLOWLABEL]) & ~IPV6_FLOWLABEL_MASK)) { + NL_SET_ERR_MSG_ATTR(extack, tb[RTA_FLOWLABEL], + "Invalid flow label"); + return -EINVAL; + } + for (i = 0; i <= RTA_MAX; i++) { if (!tb[i]) continue; @@ -6027,6 +6041,7 @@ static int inet6_rtm_valid_getroute_req(struct sk_buff *skb, case RTA_SPORT: case RTA_DPORT: case RTA_IP_PROTO: + case RTA_FLOWLABEL: break; default: NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request"); @@ -6049,6 +6064,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct sk_buff *skb; struct rtmsg *rtm; struct flowi6 fl6 = {}; + __be32 flowlabel; bool fibmatch; err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack); @@ -6057,7 +6073,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, err = -EINVAL; rtm = nlmsg_data(nlh); - fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, 0); fibmatch = !!(rtm->rtm_flags & RTM_F_FIB_MATCH); if (tb[RTA_SRC]) { @@ -6103,6 +6118,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout; } + flowlabel = nla_get_be32_default(tb[RTA_FLOWLABEL], 0); + fl6.flowlabel = ip6_make_flowinfo(rtm->rtm_tos, flowlabel); + if (iif) { struct net_device *dev; int flags = 0; From d26b8267d9e02b02c8d1aeb38d7730b5efab3b64 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 16 Dec 2024 19:11:59 +0200 Subject: [PATCH 0516/1386] netlink: specs: Add route flow label attribute Add the new flow label attribute to the spec. Example: # ip link add name dummy1 up type dummy # ip -6 route add default table 254 dev dummy1 # ip -6 route add default table 10 dev dummy1 # ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/rt_rule.yaml \ --do newrule \ --json '{"family": 10, "priority": 1, "flowlabel": 10, "flowlabel-mask": 255, "action": 1, "table": 10}' None $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/rt_route.yaml \ --do getroute \ --json '{"rtm-family": 10, "rta-flowlabel": 1}' --output-json \ | jq '.["rta-table"]' 254 $ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/rt_route.yaml \ --do getroute \ --json '{"rtm-family": 10, "rta-flowlabel": 10}' --output-json \ | jq '.["rta-table"]' 10 Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/rt_route.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Documentation/netlink/specs/rt_route.yaml b/Documentation/netlink/specs/rt_route.yaml index f4368be0caed..a674103e5bc4 100644 --- a/Documentation/netlink/specs/rt_route.yaml +++ b/Documentation/netlink/specs/rt_route.yaml @@ -177,6 +177,11 @@ attribute-sets: - name: rta-nh-id type: u32 + - + name: rta-flowlabel + type: u32 + byte-order: big-endian + display-hint: hex - name: rta-metrics attributes: @@ -260,6 +265,7 @@ operations: - rta-dport - rta-mark - rta-uid + - rta-flowlabel reply: value: 24 attributes: &all-route-attrs @@ -299,6 +305,7 @@ operations: - rta-sport - rta-dport - rta-nh-id + - rta-flowlabel dump: request: value: 26 From 002bf68a3b3e5f90ce61ea8fd11b8b62fd0765ce Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 16 Dec 2024 19:12:00 +0200 Subject: [PATCH 0517/1386] tracing: ipv6: Add flow label to fib6_table_lookup tracepoint The different parameters affecting the IPv6 route lookup are printed to the trace buffer by the fib6_table_lookup tracepoint. Add the IPv6 flow label for better observability as it can affect the route lookup both in terms of multipath hash calculation and policy based routing (FIB rules). Example: # echo 1 > /sys/kernel/tracing/events/fib6/fib6_table_lookup/enable # ip -6 route get ::1 flowlabel 0x12345 ipproto udp sport 12345 dport 54321 &> /dev/null # cat /sys/kernel/tracing/trace_pipe ip-358 [010] ..... 44.897484: fib6_table_lookup: table 255 oif 0 iif 1 proto 17 ::/12345 -> ::1/54321 flowlabel 0x12345 tos 0 scope 0 flags 0 ==> dev lo gw :: err 0 Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: Paolo Abeni --- include/trace/events/fib6.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index 5d7ee2610728..8d22b2e98d48 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -22,6 +22,7 @@ TRACE_EVENT(fib6_table_lookup, __field( int, err ) __field( int, oif ) __field( int, iif ) + __field( u32, flowlabel ) __field( __u8, tos ) __field( __u8, scope ) __field( __u8, flags ) @@ -42,6 +43,7 @@ TRACE_EVENT(fib6_table_lookup, __entry->err = ip6_rt_type_to_error(res->fib6_type); __entry->oif = flp->flowi6_oif; __entry->iif = flp->flowi6_iif; + __entry->flowlabel = ntohl(flowi6_get_flowlabel(flp)); __entry->tos = ip6_tclass(flp->flowlabel); __entry->scope = flp->flowi6_scope; __entry->flags = flp->flowi6_flags; @@ -76,11 +78,11 @@ TRACE_EVENT(fib6_table_lookup, } ), - TP_printk("table %3u oif %d iif %d proto %u %pI6c/%u -> %pI6c/%u tos %d scope %d flags %x ==> dev %s gw %pI6c err %d", + TP_printk("table %3u oif %d iif %d proto %u %pI6c/%u -> %pI6c/%u flowlabel %#x tos %d scope %d flags %x ==> dev %s gw %pI6c err %d", __entry->tb_id, __entry->oif, __entry->iif, __entry->proto, __entry->src, __entry->sport, __entry->dst, __entry->dport, - __entry->tos, __entry->scope, __entry->flags, - __entry->name, __entry->gw, __entry->err) + __entry->flowlabel, __entry->tos, __entry->scope, + __entry->flags, __entry->name, __entry->gw, __entry->err) ); #endif /* _TRACE_FIB6_H */ From 5760711e198d86bd0d0b9270a54a494ae9a501e0 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 16 Dec 2024 19:12:01 +0200 Subject: [PATCH 0518/1386] selftests: fib_rule_tests: Add flow label selector match tests Add tests for the new FIB rule flow label selector. Test both good and bad flows and with both input and output routes. # ./fib_rule_tests.sh IPv6 FIB rule tests [...] TEST: rule6 check: flowlabel redirect to table [ OK ] TEST: rule6 check: flowlabel no redirect to table [ OK ] TEST: rule6 del by pref: flowlabel redirect to table [ OK ] TEST: rule6 check: iif flowlabel redirect to table [ OK ] TEST: rule6 check: iif flowlabel no redirect to table [ OK ] TEST: rule6 del by pref: iif flowlabel redirect to table [ OK ] TEST: rule6 check: flowlabel masked redirect to table [ OK ] TEST: rule6 check: flowlabel masked no redirect to table [ OK ] TEST: rule6 del by pref: flowlabel masked redirect to table [ OK ] TEST: rule6 check: iif flowlabel masked redirect to table [ OK ] TEST: rule6 check: iif flowlabel masked no redirect to table [ OK ] TEST: rule6 del by pref: iif flowlabel masked redirect to table [ OK ] [...] Tests passed: 268 Tests failed: 0 Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Reviewed-by: Guillaume Nault Signed-off-by: Paolo Abeni --- tools/testing/selftests/net/fib_rule_tests.sh | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 1d58b3b87465..847936363a12 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -291,6 +291,37 @@ fib_rule6_test() "$getnomatch" "iif dscp redirect to table" \ "iif dscp no redirect to table" fi + + fib_check_iproute_support "flowlabel" "flowlabel" + if [ $? -eq 0 ]; then + match="flowlabel 0xfffff" + getmatch="flowlabel 0xfffff" + getnomatch="flowlabel 0xf" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "flowlabel redirect to table" \ + "flowlabel no redirect to table" + + match="flowlabel 0xfffff" + getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff" + getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif flowlabel redirect to table" \ + "iif flowlabel no redirect to table" + + match="flowlabel 0x08000/0x08000" + getmatch="flowlabel 0xfffff" + getnomatch="flowlabel 0xf7fff" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "flowlabel masked redirect to table" \ + "flowlabel masked no redirect to table" + + match="flowlabel 0x08000/0x08000" + getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff" + getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf7fff" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif flowlabel masked redirect to table" \ + "iif flowlabel masked no redirect to table" + fi } fib_rule6_vrf_test() From 2a7e02fa9116d9b077983257774e6644af064857 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Mon, 9 Dec 2024 18:50:25 +0300 Subject: [PATCH 0519/1386] wifi: ath9k: cleanup ath_txq_skb_done() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 'txq' argument of 'ath_txq_skb_done()' is actually (mis|un)used, convert the former to local variable and adjust all related users. Compile tested only. Found by Linux Verification Center (linuxtesting.org) with SVACE. Signed-off-by: Dmitry Antipov Acked-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241209155027.636400-1-dmantipov@yandex.ru Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath9k/xmit.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 0a24439dd30d..f41f03f55b07 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -208,10 +208,10 @@ static void ath_set_rates(struct ieee80211_vif *vif, struct ieee80211_sta *sta, ARRAY_SIZE(bf->rates)); } -static void ath_txq_skb_done(struct ath_softc *sc, struct ath_txq *txq, - struct sk_buff *skb) +static void ath_txq_skb_done(struct ath_softc *sc, struct sk_buff *skb) { struct ath_frame_info *fi = get_frame_info(skb); + struct ath_txq *txq; int q = fi->txq; if (q < 0) @@ -294,7 +294,7 @@ static void ath_tx_flush_tid(struct ath_softc *sc, struct ath_atx_tid *tid) fi = get_frame_info(skb); bf = fi->bf; if (!bf) { - ath_txq_skb_done(sc, txq, skb); + ath_txq_skb_done(sc, skb); ieee80211_free_txskb(sc->hw, skb); continue; } @@ -962,7 +962,7 @@ ath_tx_get_tid_subframe(struct ath_softc *sc, struct ath_txq *txq, bf->bf_state.stale = false; if (!bf) { - ath_txq_skb_done(sc, txq, skb); + ath_txq_skb_done(sc, skb); ieee80211_free_txskb(sc->hw, skb); continue; } @@ -2379,7 +2379,7 @@ int ath_tx_start(struct ieee80211_hw *hw, struct sk_buff *skb, bf = ath_tx_setup_buffer(sc, txq, tid, skb); if (!bf) { - ath_txq_skb_done(sc, txq, skb); + ath_txq_skb_done(sc, skb); if (txctl->paprd) dev_kfree_skb_any(skb); else @@ -2514,7 +2514,7 @@ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, } spin_unlock_irqrestore(&sc->sc_pm_lock, flags); - ath_txq_skb_done(sc, txq, skb); + ath_txq_skb_done(sc, skb); tx_info->status.status_driver_data[0] = sta; __skb_queue_tail(&txq->complete_q, skb); } From d19ac7ef6ee997298a42335d0dd09b67c6cb19bf Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Mon, 9 Dec 2024 18:50:26 +0300 Subject: [PATCH 0520/1386] wifi: ath9k: cleanup a few (mostly) TX-related routines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove unused 'struct ath_softc *' argument of 'ath_pkt_duration()', 'ath_tx_update_baw()', 'ath_get_skb_tid()', 'ath_tx_addto_baw()' and 'ath_tx_count_frames()', adjust related users. Compile tested only. Signed-off-by: Dmitry Antipov Acked-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241209155027.636400-2-dmantipov@yandex.ru Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath9k/ath9k.h | 4 +-- drivers/net/wireless/ath/ath9k/recv.c | 4 +-- drivers/net/wireless/ath/ath9k/xmit.c | 40 ++++++++++++-------------- 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index bcfc8df0efe5..3e06a7e85941 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -592,8 +592,8 @@ void ath_txq_schedule_all(struct ath_softc *sc); int ath_tx_init(struct ath_softc *sc, int nbufs); int ath_txq_update(struct ath_softc *sc, int qnum, struct ath9k_tx_queue_info *q); -u32 ath_pkt_duration(struct ath_softc *sc, u8 rix, int pktlen, - int width, int half_gi, bool shortPreamble); +u32 ath_pkt_duration(u8 rix, int pktlen, int width, + int half_gi, bool shortPreamble); void ath_update_max_aggr_framelen(struct ath_softc *sc, int queue, int txop); void ath_assign_seq(struct ath_common *common, struct sk_buff *skb); int ath_tx_start(struct ieee80211_hw *hw, struct sk_buff *skb, diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index 0c0624a3b40d..34c74ed99b7b 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -1042,8 +1042,8 @@ static void ath_rx_count_airtime(struct ath_softc *sc, if (!!(rxs->encoding == RX_ENC_HT)) { /* MCS rates */ - airtime += ath_pkt_duration(sc, rxs->rate_idx, len, - is_40, is_sgi, is_sp); + airtime += ath_pkt_duration(rxs->rate_idx, len, + is_40, is_sgi, is_sp); } else { phy = IS_CCK_RATE(rs->rs_rate) ? WLAN_RC_PHY_CCK : WLAN_RC_PHY_OFDM; diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index f41f03f55b07..db07ce6dbc08 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -67,8 +67,7 @@ static void ath_tx_txqaddbuf(struct ath_softc *sc, struct ath_txq *txq, static void ath_tx_rc_status(struct ath_softc *sc, struct ath_buf *bf, struct ath_tx_status *ts, int nframes, int nbad, int txok); -static void ath_tx_update_baw(struct ath_softc *sc, struct ath_atx_tid *tid, - struct ath_buf *bf); +static void ath_tx_update_baw(struct ath_atx_tid *tid, struct ath_buf *bf); static struct ath_buf *ath_tx_setup_buffer(struct ath_softc *sc, struct ath_txq *txq, struct ath_atx_tid *tid, @@ -224,7 +223,7 @@ static void ath_txq_skb_done(struct ath_softc *sc, struct sk_buff *skb) } static struct ath_atx_tid * -ath_get_skb_tid(struct ath_softc *sc, struct ath_node *an, struct sk_buff *skb) +ath_get_skb_tid(struct ath_node *an, struct sk_buff *skb) { u8 tidno = skb->priority & IEEE80211_QOS_CTL_TID_MASK; return ATH_AN_2_TID(an, tidno); @@ -300,7 +299,7 @@ static void ath_tx_flush_tid(struct ath_softc *sc, struct ath_atx_tid *tid) } if (fi->baw_tracked) { - ath_tx_update_baw(sc, tid, bf); + ath_tx_update_baw(tid, bf); sendbar = true; } @@ -315,8 +314,7 @@ static void ath_tx_flush_tid(struct ath_softc *sc, struct ath_atx_tid *tid) } } -static void ath_tx_update_baw(struct ath_softc *sc, struct ath_atx_tid *tid, - struct ath_buf *bf) +static void ath_tx_update_baw(struct ath_atx_tid *tid, struct ath_buf *bf) { struct ath_frame_info *fi = get_frame_info(bf->bf_mpdu); u16 seqno = bf->bf_state.seqno; @@ -338,8 +336,7 @@ static void ath_tx_update_baw(struct ath_softc *sc, struct ath_atx_tid *tid, } } -static void ath_tx_addto_baw(struct ath_softc *sc, struct ath_atx_tid *tid, - struct ath_buf *bf) +static void ath_tx_addto_baw(struct ath_atx_tid *tid, struct ath_buf *bf) { struct ath_frame_info *fi = get_frame_info(bf->bf_mpdu); u16 seqno = bf->bf_state.seqno; @@ -452,9 +449,8 @@ static struct ath_buf* ath_clone_txbuf(struct ath_softc *sc, struct ath_buf *bf) return tbf; } -static void ath_tx_count_frames(struct ath_softc *sc, struct ath_buf *bf, - struct ath_tx_status *ts, int txok, - int *nframes, int *nbad) +static void ath_tx_count_frames(struct ath_buf *bf, struct ath_tx_status *ts, + int txok, int *nframes, int *nbad) { u16 seq_st = 0; u32 ba[WME_BA_BMP_SIZE >> 5]; @@ -568,7 +564,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, __skb_queue_head_init(&bf_pending); - ath_tx_count_frames(sc, bf, ts, txok, &nframes, &nbad); + ath_tx_count_frames(bf, ts, txok, &nframes, &nbad); while (bf) { u16 seqno = bf->bf_state.seqno; @@ -621,7 +617,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, * complete the acked-ones/xretried ones; update * block-ack window */ - ath_tx_update_baw(sc, tid, bf); + ath_tx_update_baw(tid, bf); if (rc_update && (acked_cnt == 1 || txfail_cnt == 1)) { memcpy(tx_info->control.rates, rates, sizeof(rates)); @@ -651,7 +647,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, * run out of tx buf. */ if (!tbf) { - ath_tx_update_baw(sc, tid, bf); + ath_tx_update_baw(tid, bf); ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, ts, @@ -752,7 +748,7 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2); if (sta) { struct ath_node *an = (struct ath_node *)sta->drv_priv; - tid = ath_get_skb_tid(sc, an, bf->bf_mpdu); + tid = ath_get_skb_tid(an, bf->bf_mpdu); ath_tx_count_airtime(sc, sta, bf, ts, tid->tidno); if (ts->ts_status & (ATH9K_TXERR_FILT | ATH9K_TXERR_XRETRY)) tid->clear_ps_filter = true; @@ -1012,13 +1008,13 @@ ath_tx_get_tid_subframe(struct ath_softc *sc, struct ath_txq *txq, INIT_LIST_HEAD(&bf_head); list_add(&bf->list, &bf_head); - ath_tx_update_baw(sc, tid, bf); + ath_tx_update_baw(tid, bf); ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0); continue; } if (bf_isampdu(bf)) - ath_tx_addto_baw(sc, tid, bf); + ath_tx_addto_baw(tid, bf); break; } @@ -1114,8 +1110,8 @@ finish: * width - 0 for 20 MHz, 1 for 40 MHz * half_gi - to use 4us v/s 3.6 us for symbol time */ -u32 ath_pkt_duration(struct ath_softc *sc, u8 rix, int pktlen, - int width, int half_gi, bool shortPreamble) +u32 ath_pkt_duration(u8 rix, int pktlen, int width, + int half_gi, bool shortPreamble) { u32 nbits, nsymbits, duration, nsymbols; int streams; @@ -1327,7 +1323,7 @@ static void ath_buf_set_rate(struct ath_softc *sc, struct ath_buf *bf, info->rates[i].Rate = rix | 0x80; info->rates[i].ChSel = ath_txchainmask_reduction(sc, ah->txchainmask, info->rates[i].Rate); - info->rates[i].PktDuration = ath_pkt_duration(sc, rix, len, + info->rates[i].PktDuration = ath_pkt_duration(rix, len, is_40, is_sgi, is_sp); if (rix < 8 && (tx_info->flags & IEEE80211_TX_CTL_STBC)) info->rates[i].RateFlags |= ATH9K_RATESERIES_STBC; @@ -2122,7 +2118,7 @@ static void ath_tx_send_normal(struct ath_softc *sc, struct ath_txq *txq, bf->bf_state.bf_type = 0; if (tid && (tx_info->flags & IEEE80211_TX_CTL_AMPDU)) { bf->bf_state.bf_type = BUF_AMPDU; - ath_tx_addto_baw(sc, tid, bf); + ath_tx_addto_baw(tid, bf); } bf->bf_next = NULL; @@ -2368,7 +2364,7 @@ int ath_tx_start(struct ieee80211_hw *hw, struct sk_buff *skb, if (txctl->sta) { an = (struct ath_node *) sta->drv_priv; - tid = ath_get_skb_tid(sc, an, skb); + tid = ath_get_skb_tid(an, skb); } ath_txq_lock(sc, txq); From 0cc6510ca4639a20c8921f223f05faa485795204 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Mon, 9 Dec 2024 18:50:27 +0300 Subject: [PATCH 0521/1386] wifi: ath9k: simplify internal time management MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prefer 'ktime_t' over 'struct timespec64' for 'struct ath_chanctx' and 'struct ath_softc' timestamps, choose standard kernel time API over an ad-hoc math in 'chanctx_event_delta()' and 'ath9k_hw_get_tsf_offset()', adjust related users. Compile tested only. Signed-off-by: Dmitry Antipov Acked-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241209155027.636400-3-dmantipov@yandex.ru Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath9k/ath9k.h | 4 ++-- drivers/net/wireless/ath/ath9k/beacon.c | 2 +- drivers/net/wireless/ath/ath9k/channel.c | 29 ++++++++++-------------- drivers/net/wireless/ath/ath9k/hw.c | 25 +++++++------------- drivers/net/wireless/ath/ath9k/hw.h | 2 +- drivers/net/wireless/ath/ath9k/main.c | 9 ++++---- 6 files changed, 28 insertions(+), 43 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index 3e06a7e85941..a728cc0387df 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -338,7 +338,7 @@ struct ath_chanctx { struct ath_beacon_config beacon; struct ath9k_hw_cal_data caldata; - struct timespec64 tsf_ts; + ktime_t tsf_ts; u64 tsf_val; u32 last_beacon; @@ -1011,7 +1011,7 @@ struct ath_softc { struct ath_offchannel offchannel; struct ath_chanctx *next_chan; struct completion go_beacon; - struct timespec64 last_event_time; + ktime_t last_event_time; #endif unsigned long driver_data; diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c index b399a7926ef5..4a27e3753c03 100644 --- a/drivers/net/wireless/ath/ath9k/beacon.c +++ b/drivers/net/wireless/ath/ath9k/beacon.c @@ -293,7 +293,7 @@ void ath9k_beacon_ensure_primary_slot(struct ath_softc *sc) /* Modify TSF as required and update the HW. */ avp->chanctx->tsf_val += tsfadjust; if (sc->cur_chan == avp->chanctx) { - offset = ath9k_hw_get_tsf_offset(&avp->chanctx->tsf_ts, NULL); + offset = ath9k_hw_get_tsf_offset(avp->chanctx->tsf_ts, 0); ath9k_hw_settsf64(sc->sc_ah, avp->chanctx->tsf_val + offset); } diff --git a/drivers/net/wireless/ath/ath9k/channel.c b/drivers/net/wireless/ath/ath9k/channel.c index 02237d106f8c..bae24e3d3168 100644 --- a/drivers/net/wireless/ath/ath9k/channel.c +++ b/drivers/net/wireless/ath/ath9k/channel.c @@ -232,16 +232,11 @@ static const char *chanctx_state_string(enum ath_chanctx_state state) static u32 chanctx_event_delta(struct ath_softc *sc) { - u64 ms; - struct timespec64 ts, *old; + ktime_t ts = ktime_get_raw(); + s64 ms = ktime_ms_delta(ts, sc->last_event_time); - ktime_get_raw_ts64(&ts); - old = &sc->last_event_time; - ms = ts.tv_sec * 1000 + ts.tv_nsec / 1000000; - ms -= old->tv_sec * 1000 + old->tv_nsec / 1000000; sc->last_event_time = ts; - - return (u32)ms; + return ms; } void ath_chanctx_check_active(struct ath_softc *sc, struct ath_chanctx *ctx) @@ -334,8 +329,8 @@ ath_chanctx_get_next(struct ath_softc *sc, struct ath_chanctx *ctx) static void ath_chanctx_adjust_tbtt_delta(struct ath_softc *sc) { struct ath_chanctx *prev, *cur; - struct timespec64 ts; u32 cur_tsf, prev_tsf, beacon_int; + ktime_t ts; s32 offset; beacon_int = TU_TO_USEC(sc->cur_chan->beacon.beacon_interval); @@ -346,12 +341,12 @@ static void ath_chanctx_adjust_tbtt_delta(struct ath_softc *sc) if (!prev->switch_after_beacon) return; - ktime_get_raw_ts64(&ts); + ts = ktime_get_raw(); cur_tsf = (u32) cur->tsf_val + - ath9k_hw_get_tsf_offset(&cur->tsf_ts, &ts); + ath9k_hw_get_tsf_offset(cur->tsf_ts, ts); prev_tsf = prev->last_beacon - (u32) prev->tsf_val + cur_tsf; - prev_tsf -= ath9k_hw_get_tsf_offset(&prev->tsf_ts, &ts); + prev_tsf -= ath9k_hw_get_tsf_offset(prev->tsf_ts, ts); /* Adjust the TSF time of the AP chanctx to keep its beacons * at half beacon interval offset relative to the STA chanctx. @@ -691,7 +686,7 @@ void ath_chanctx_event(struct ath_softc *sc, struct ieee80211_vif *vif, */ tsf_time = sc->sched.switch_start_time; tsf_time -= (u32) sc->cur_chan->tsf_val + - ath9k_hw_get_tsf_offset(&sc->cur_chan->tsf_ts, NULL); + ath9k_hw_get_tsf_offset(sc->cur_chan->tsf_ts, 0); tsf_time += ath9k_hw_gettsf32(ah); sc->sched.beacon_adjust = false; @@ -1230,10 +1225,10 @@ void ath_chanctx_set_next(struct ath_softc *sc, bool force) { struct ath_common *common = ath9k_hw_common(sc->sc_ah); struct ath_chanctx *old_ctx; - struct timespec64 ts; bool measure_time = false; bool send_ps = false; bool queues_stopped = false; + ktime_t ts; spin_lock_bh(&sc->chan_lock); if (!sc->next_chan) { @@ -1260,7 +1255,7 @@ void ath_chanctx_set_next(struct ath_softc *sc, bool force) spin_unlock_bh(&sc->chan_lock); if (sc->next_chan == &sc->offchannel.chan) { - ktime_get_raw_ts64(&ts); + ts = ktime_get_raw(); measure_time = true; } @@ -1277,7 +1272,7 @@ void ath_chanctx_set_next(struct ath_softc *sc, bool force) spin_lock_bh(&sc->chan_lock); if (sc->cur_chan != &sc->offchannel.chan) { - ktime_get_raw_ts64(&sc->cur_chan->tsf_ts); + sc->cur_chan->tsf_ts = ktime_get_raw(); sc->cur_chan->tsf_val = ath9k_hw_gettsf64(sc->sc_ah); } } @@ -1303,7 +1298,7 @@ void ath_chanctx_set_next(struct ath_softc *sc, bool force) ath_set_channel(sc); if (measure_time) sc->sched.channel_switch_time = - ath9k_hw_get_tsf_offset(&ts, NULL); + ath9k_hw_get_tsf_offset(ts, 0); /* * A reset will ensure that all queues are woken up, * so there is no need to awaken them again. diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c index a25eacabc664..f9a774bd0e13 100644 --- a/drivers/net/wireless/ath/ath9k/hw.c +++ b/drivers/net/wireless/ath/ath9k/hw.c @@ -1847,20 +1847,11 @@ fail: return -EINVAL; } -u32 ath9k_hw_get_tsf_offset(struct timespec64 *last, struct timespec64 *cur) +u32 ath9k_hw_get_tsf_offset(ktime_t last, ktime_t cur) { - struct timespec64 ts; - s64 usec; - - if (!cur) { - ktime_get_raw_ts64(&ts); - cur = &ts; - } - - usec = cur->tv_sec * 1000000ULL + cur->tv_nsec / 1000; - usec -= last->tv_sec * 1000000ULL + last->tv_nsec / 1000; - - return (u32) usec; + if (cur == 0) + cur = ktime_get_raw(); + return ktime_us_delta(cur, last); } EXPORT_SYMBOL(ath9k_hw_get_tsf_offset); @@ -1871,7 +1862,7 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan, u32 saveLedState; u32 saveDefAntenna; u32 macStaId1; - struct timespec64 tsf_ts; + ktime_t tsf_ts; u32 tsf_offset; u64 tsf = 0; int r; @@ -1917,7 +1908,7 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan, macStaId1 = REG_READ(ah, AR_STA_ID1) & AR_STA_ID1_BASE_RATE_11B; /* Save TSF before chip reset, a cold reset clears it */ - ktime_get_raw_ts64(&tsf_ts); + tsf_ts = ktime_get_raw(); tsf = ath9k_hw_gettsf64(ah); saveLedState = REG_READ(ah, AR_CFG_LED) & @@ -1951,7 +1942,7 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan, } /* Restore TSF */ - tsf_offset = ath9k_hw_get_tsf_offset(&tsf_ts, NULL); + tsf_offset = ath9k_hw_get_tsf_offset(tsf_ts, 0); ath9k_hw_settsf64(ah, tsf + tsf_offset); if (AR_SREV_9280_20_OR_LATER(ah)) @@ -1975,7 +1966,7 @@ int ath9k_hw_reset(struct ath_hw *ah, struct ath9k_channel *chan, * value after the initvals have been applied. */ if (AR_SREV_9100(ah) && (ath9k_hw_gettsf64(ah) < tsf)) { - tsf_offset = ath9k_hw_get_tsf_offset(&tsf_ts, NULL); + tsf_offset = ath9k_hw_get_tsf_offset(tsf_ts, 0); ath9k_hw_settsf64(ah, tsf + tsf_offset); } diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h index e2cbf3f00da0..eaa07d6dbde0 100644 --- a/drivers/net/wireless/ath/ath9k/hw.h +++ b/drivers/net/wireless/ath/ath9k/hw.h @@ -1066,7 +1066,7 @@ u32 ath9k_hw_gettsf32(struct ath_hw *ah); u64 ath9k_hw_gettsf64(struct ath_hw *ah); void ath9k_hw_settsf64(struct ath_hw *ah, u64 tsf64); void ath9k_hw_reset_tsf(struct ath_hw *ah); -u32 ath9k_hw_get_tsf_offset(struct timespec64 *last, struct timespec64 *cur); +u32 ath9k_hw_get_tsf_offset(ktime_t last, ktime_t cur); void ath9k_hw_set_tsfadjust(struct ath_hw *ah, bool set); void ath9k_hw_init_global_settings(struct ath_hw *ah); u32 ar9003_get_pll_sqsum_dvc(struct ath_hw *ah); diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 812e0c6bde3e..a70c94564814 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -249,8 +249,7 @@ static bool ath_complete_reset(struct ath_softc *sc, bool start) if (sc->cur_chan->tsf_val) { u32 offset; - offset = ath9k_hw_get_tsf_offset(&sc->cur_chan->tsf_ts, - NULL); + offset = ath9k_hw_get_tsf_offset(sc->cur_chan->tsf_ts, 0); ath9k_hw_settsf64(ah, sc->cur_chan->tsf_val + offset); } @@ -1956,7 +1955,7 @@ static u64 ath9k_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif) tsf = ath9k_hw_gettsf64(sc->sc_ah); } else { tsf = sc->cur_chan->tsf_val + - ath9k_hw_get_tsf_offset(&sc->cur_chan->tsf_ts, NULL); + ath9k_hw_get_tsf_offset(sc->cur_chan->tsf_ts, 0); } tsf += le64_to_cpu(avp->tsf_adjust); ath9k_ps_restore(sc); @@ -1975,7 +1974,7 @@ static void ath9k_set_tsf(struct ieee80211_hw *hw, mutex_lock(&sc->mutex); ath9k_ps_wakeup(sc); tsf -= le64_to_cpu(avp->tsf_adjust); - ktime_get_raw_ts64(&avp->chanctx->tsf_ts); + avp->chanctx->tsf_ts = ktime_get_raw(); if (sc->cur_chan == avp->chanctx) ath9k_hw_settsf64(sc->sc_ah, tsf); avp->chanctx->tsf_val = tsf; @@ -1991,7 +1990,7 @@ static void ath9k_reset_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif) mutex_lock(&sc->mutex); ath9k_ps_wakeup(sc); - ktime_get_raw_ts64(&avp->chanctx->tsf_ts); + avp->chanctx->tsf_ts = ktime_get_raw(); if (sc->cur_chan == avp->chanctx) ath9k_hw_reset_tsf(sc->sc_ah); avp->chanctx->tsf_val = 0; From be8d47f181fd4f341b8beee1ca11a96d296d2df2 Mon Sep 17 00:00:00 2001 From: P Praneesh Date: Tue, 17 Dec 2024 15:20:58 +0530 Subject: [PATCH 0522/1386] wifi: ath12k: Add support for parsing 64-bit TLVs There is mismatch between the format of monitor destination TLVs received and the expected format by the current implementation. The received TLVs are in 64-bit format, while the implementation is designed to handle 32-bit TLVs. This leads to incorrect parsing. Fix it by adding support for parsing 64-bit TLVs. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: P Praneesh Acked-by: Kalle Valo Acked-by: Jeff Johnson Link: https://patch.msgid.link/20241217095058.2725755-1-quic_ppranees@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_mon.c | 14 +++++++------- drivers/net/wireless/ath/ath12k/hal_desc.h | 2 ++ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.c b/drivers/net/wireless/ath/ath12k/dp_mon.c index 2d53404095d6..c6cc4a1a5230 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.c +++ b/drivers/net/wireless/ath/ath12k/dp_mon.c @@ -1205,19 +1205,19 @@ ath12k_dp_mon_parse_rx_dest(struct ath12k_base *ab, struct ath12k_mon_data *pmon struct sk_buff *skb) { struct hal_rx_mon_ppdu_info *ppdu_info = &pmon->mon_ppdu_info; - struct hal_tlv_hdr *tlv; + struct hal_tlv_64_hdr *tlv; enum hal_rx_mon_status hal_status; - u32 tlv_userid = 0; + u32 tlv_userid; u16 tlv_tag, tlv_len; u8 *ptr = skb->data; memset(ppdu_info, 0, sizeof(struct hal_rx_mon_ppdu_info)); do { - tlv = (struct hal_tlv_hdr *)ptr; - tlv_tag = le32_get_bits(tlv->tl, HAL_TLV_HDR_TAG); - tlv_len = le32_get_bits(tlv->tl, HAL_TLV_HDR_LEN); - tlv_userid = le32_get_bits(tlv->tl, HAL_TLV_USR_ID); + tlv = (struct hal_tlv_64_hdr *)ptr; + tlv_tag = le64_get_bits(tlv->tl, HAL_TLV_64_HDR_TAG); + tlv_len = le64_get_bits(tlv->tl, HAL_TLV_64_HDR_LEN); + tlv_userid = le64_get_bits(tlv->tl, HAL_TLV_64_USR_ID); ptr += sizeof(*tlv); /* The actual length of PPDU_END is the combined length of many PHY @@ -1232,7 +1232,7 @@ ath12k_dp_mon_parse_rx_dest(struct ath12k_base *ab, struct ath12k_mon_data *pmon hal_status = ath12k_dp_mon_rx_parse_status_tlv(ab, pmon, tlv_tag, ptr, tlv_userid); ptr += tlv_len; - ptr = PTR_ALIGN(ptr, HAL_TLV_ALIGN); + ptr = PTR_ALIGN(ptr, HAL_TLV_64_ALIGN); if ((ptr - skb->data) >= DP_RX_BUFFER_SIZE) break; diff --git a/drivers/net/wireless/ath/ath12k/hal_desc.h b/drivers/net/wireless/ath/ath12k/hal_desc.h index a460d432288f..b90a6da72e29 100644 --- a/drivers/net/wireless/ath/ath12k/hal_desc.h +++ b/drivers/net/wireless/ath/ath12k/hal_desc.h @@ -579,6 +579,8 @@ struct hal_tlv_hdr { #define HAL_TLV_64_HDR_TAG GENMASK(9, 1) #define HAL_TLV_64_HDR_LEN GENMASK(21, 10) +#define HAL_TLV_64_USR_ID GENMASK(31, 26) +#define HAL_TLV_64_ALIGN 8 struct hal_tlv_64_hdr { __le64 tl; From aa21668ab3c7c479998be11393e1a1c3c2624fce Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 17 Dec 2024 22:26:15 +0200 Subject: [PATCH 0523/1386] wifi: ath12k: Decrease ath12k_mac_op_remain_on_channel() stack usage Building the ath12k driver with llvm-18.1.7-x86_64 produces the warning: drivers/net/wireless/ath/ath12k/mac.c:10028:12: warning: stack frame size (1080) exceeds limit (1024) in 'ath12k_mac_op_remain_on_channel' [-Wframe-larger-than] A major contributor to the stack usage in this function is: struct ath12k_wmi_scan_req_arg arg; Avoid the excess stack usage by dynamically allocating arg instead of declaring it on the stack. As part of the effort use __free() for both this new allocation as well as the existing chan_list allocation, and since then no central cleanup is required, replace all cleanup gotos with returns. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241217202618.1329312-2-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 76 ++++++++++++--------------- 1 file changed, 33 insertions(+), 43 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 186765fa95f5..05d2c75e5df2 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -10054,7 +10054,6 @@ static int ath12k_mac_op_remain_on_channel(struct ieee80211_hw *hw, { struct ath12k_vif *ahvif = ath12k_vif_to_ahvif(vif); struct ath12k_hw *ah = ath12k_hw_to_ah(hw); - struct ath12k_wmi_scan_req_arg arg; struct ath12k_link_vif *arvif; struct ath12k *ar; u32 scan_time_msec; @@ -10065,10 +10064,8 @@ static int ath12k_mac_op_remain_on_channel(struct ieee80211_hw *hw, lockdep_assert_wiphy(hw->wiphy); ar = ath12k_mac_select_scan_device(hw, vif, chan->center_freq); - if (!ar) { - ret = -EINVAL; - goto exit; - } + if (!ar) + return -EINVAL; /* check if any of the links of ML VIF is already started on * radio(ar) correpsondig to given scan frequency and use it, @@ -10087,15 +10084,11 @@ static int ath12k_mac_op_remain_on_channel(struct ieee80211_hw *hw, * always on the same band for the vif */ if (arvif->is_created) { - if (WARN_ON(!arvif->ar)) { - ret = -EINVAL; - goto exit; - } + if (WARN_ON(!arvif->ar)) + return -EINVAL; - if (ar != arvif->ar && arvif->is_started) { - ret = -EBUSY; - goto exit; - } + if (ar != arvif->ar && arvif->is_started) + return -EBUSY; if (ar != arvif->ar) { ath12k_mac_remove_link_interface(hw, arvif); @@ -10112,7 +10105,7 @@ static int ath12k_mac_op_remain_on_channel(struct ieee80211_hw *hw, if (ret) { ath12k_warn(ar->ab, "unable to create scan vdev for roc: %d\n", ret); - goto exit; + return ret; } } @@ -10140,37 +10133,41 @@ static int ath12k_mac_op_remain_on_channel(struct ieee80211_hw *hw, spin_unlock_bh(&ar->data_lock); if (ret) - goto exit; + return ret; scan_time_msec = hw->wiphy->max_remain_on_channel_duration * 2; - memset(&arg, 0, sizeof(arg)); - ath12k_wmi_start_scan_init(ar, &arg); - arg.num_chan = 1; - arg.chan_list = kcalloc(arg.num_chan, sizeof(*arg.chan_list), - GFP_KERNEL); - if (!arg.chan_list) { - ret = -ENOMEM; - goto exit; - } + struct ath12k_wmi_scan_req_arg *arg __free(kfree) = + kzalloc(sizeof(*arg), GFP_KERNEL); + if (!arg) + return -ENOMEM; - arg.vdev_id = arvif->vdev_id; - arg.scan_id = ATH12K_SCAN_ID; - arg.chan_list[0] = chan->center_freq; - arg.dwell_time_active = scan_time_msec; - arg.dwell_time_passive = scan_time_msec; - arg.max_scan_time = scan_time_msec; - arg.scan_f_passive = 1; - arg.burst_duration = duration; + ath12k_wmi_start_scan_init(ar, arg); + arg->num_chan = 1; - ret = ath12k_start_scan(ar, &arg); + u32 *chan_list __free(kfree) = kcalloc(arg->num_chan, sizeof(*chan_list), + GFP_KERNEL); + if (!chan_list) + return -ENOMEM; + + arg->chan_list = chan_list; + arg->vdev_id = arvif->vdev_id; + arg->scan_id = ATH12K_SCAN_ID; + arg->chan_list[0] = chan->center_freq; + arg->dwell_time_active = scan_time_msec; + arg->dwell_time_passive = scan_time_msec; + arg->max_scan_time = scan_time_msec; + arg->scan_f_passive = 1; + arg->burst_duration = duration; + + ret = ath12k_start_scan(ar, arg); if (ret) { ath12k_warn(ar->ab, "failed to start roc scan: %d\n", ret); spin_lock_bh(&ar->data_lock); ar->scan.state = ATH12K_SCAN_IDLE; spin_unlock_bh(&ar->data_lock); - goto free_chan_list; + return ret; } ret = wait_for_completion_timeout(&ar->scan.on_channel, 3 * HZ); @@ -10179,20 +10176,13 @@ static int ath12k_mac_op_remain_on_channel(struct ieee80211_hw *hw, ret = ath12k_scan_stop(ar); if (ret) ath12k_warn(ar->ab, "failed to stop scan: %d\n", ret); - ret = -ETIMEDOUT; - goto free_chan_list; + return -ETIMEDOUT; } ieee80211_queue_delayed_work(hw, &ar->scan.timeout, msecs_to_jiffies(duration)); - ret = 0; - -free_chan_list: - kfree(arg.chan_list); - -exit: - return ret; + return 0; } static void ath12k_mac_op_set_rekey_data(struct ieee80211_hw *hw, From 445718c9958c8c160654068014c0e72505f59d63 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 17 Dec 2024 22:26:16 +0200 Subject: [PATCH 0524/1386] wifi: ath12k: Decrease ath12k_bss_assoc() stack usage Currently when building ath12k with gcc-14.2.0 the following warning is observed: drivers/net/wireless/ath/ath12k/mac.c: In function 'ath12k_bss_assoc': drivers/net/wireless/ath/ath12k/mac.c:3080:1: warning: the frame size of 1040 bytes is larger than 1024 bytes [-Wframe-larger-than=] A major contributor to the stack usage in this function is: struct ath12k_wmi_peer_assoc_arg peer_arg; Avoid the excess stack usage by dynamically allocating peer_arg instead of declaring it on the stack. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241217202618.1329312-3-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 05d2c75e5df2..2a30a11903c5 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -3133,7 +3133,6 @@ static void ath12k_bss_assoc(struct ath12k *ar, struct ath12k_vif *ahvif = arvif->ahvif; struct ieee80211_vif *vif = ath12k_ahvif_to_vif(ahvif); struct ath12k_wmi_vdev_up_params params = {}; - struct ath12k_wmi_peer_assoc_arg peer_arg = {}; struct ieee80211_link_sta *link_sta; u8 link_id = bss_conf->link_id; struct ath12k_link_sta *arsta; @@ -3145,6 +3144,11 @@ static void ath12k_bss_assoc(struct ath12k *ar, lockdep_assert_wiphy(ath12k_ar_to_hw(ar)->wiphy); + struct ath12k_wmi_peer_assoc_arg *peer_arg __free(kfree) = + kzalloc(sizeof(*peer_arg), GFP_KERNEL); + if (!peer_arg) + return; + ath12k_dbg(ar->ab, ATH12K_DBG_MAC, "mac vdev %i link id %u assoc bssid %pM aid %d\n", arvif->vdev_id, link_id, arvif->bssid, ahvif->aid); @@ -3177,11 +3181,11 @@ static void ath12k_bss_assoc(struct ath12k *ar, return; } - ath12k_peer_assoc_prepare(ar, arvif, arsta, &peer_arg, false); + ath12k_peer_assoc_prepare(ar, arvif, arsta, peer_arg, false); rcu_read_unlock(); - ret = ath12k_wmi_send_peer_assoc_cmd(ar, &peer_arg); + ret = ath12k_wmi_send_peer_assoc_cmd(ar, peer_arg); if (ret) { ath12k_warn(ar->ab, "failed to run peer assoc for %pM vdev %i: %d\n", bss_conf->bssid, arvif->vdev_id, ret); From 6ff412420e5ea1635385038a0bb4c77420862bc9 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 17 Dec 2024 22:26:17 +0200 Subject: [PATCH 0525/1386] wifi: ath12k: Decrease ath12k_sta_rc_update_wk() stack usage Currently when building ath12k with llvm-18.1.7-x86_64 the following warning is observed: drivers/net/wireless/ath/ath12k/mac.c:4946:13: warning: stack frame size (1112) exceeds limit (1024) in 'ath12k_sta_rc_update_wk' [-Wframe-larger-than] A major contributor to the stack usage in this function is: struct ath12k_wmi_peer_assoc_arg peer_arg; Avoid the excess stack usage by dynamically allocating peer_arg instead of declaring it on the stack. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241217202618.1329312-4-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 2a30a11903c5..10293e9c1d49 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4956,7 +4956,6 @@ static void ath12k_sta_rc_update_wk(struct wiphy *wiphy, struct wiphy_work *wk) u32 changed, bw, nss, smps, bw_prev; int err, num_vht_rates; const struct cfg80211_bitrate_mask *mask; - struct ath12k_wmi_peer_assoc_arg peer_arg; enum wmi_phy_mode peer_phymode; struct ath12k_link_sta *arsta; struct ieee80211_vif *vif; @@ -4992,9 +4991,14 @@ static void ath12k_sta_rc_update_wk(struct wiphy *wiphy, struct wiphy_work *wk) nss = min(nss, max(ath12k_mac_max_ht_nss(ht_mcs_mask), ath12k_mac_max_vht_nss(vht_mcs_mask))); + struct ath12k_wmi_peer_assoc_arg *peer_arg __free(kfree) = + kzalloc(sizeof(*peer_arg), GFP_KERNEL); + if (!peer_arg) + return; + if (changed & IEEE80211_RC_BW_CHANGED) { - ath12k_peer_assoc_h_phymode(ar, arvif, arsta, &peer_arg); - peer_phymode = peer_arg.peer_phymode; + ath12k_peer_assoc_h_phymode(ar, arvif, arsta, peer_arg); + peer_phymode = peer_arg->peer_phymode; if (bw > bw_prev) { /* Phymode shows maximum supported channel width, if we @@ -5096,9 +5100,9 @@ static void ath12k_sta_rc_update_wk(struct wiphy *wiphy, struct wiphy_work *wk) * other rates using peer_assoc command. */ ath12k_peer_assoc_prepare(ar, arvif, arsta, - &peer_arg, true); + peer_arg, true); - err = ath12k_wmi_send_peer_assoc_cmd(ar, &peer_arg); + err = ath12k_wmi_send_peer_assoc_cmd(ar, peer_arg); if (err) ath12k_warn(ar->ab, "failed to run peer assoc for STA %pM vdev %i: %d\n", arsta->addr, arvif->vdev_id, err); From bf2da5c4f5b576d45f5f0cc0f508b8255f7ab015 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 17 Dec 2024 22:26:18 +0200 Subject: [PATCH 0526/1386] wifi: ath12k: Decrease ath12k_mac_station_assoc() stack usage Building the ath12k driver with llvm-18.1.7-x86_64 produces the warning: drivers/net/wireless/ath/ath12k/mac.c:5606:12: warning: stack frame size (1176) exceeds limit (1024) in 'ath12k_mac_op_sta_state' [-Wframe-larger-than] ath12k_mac_op_sta_state() itself does not consume much stack, but it calls ath12k_mac_handle_link_sta_state() which in turn calls ath12k_mac_station_add(). Since those are both static functions with only one caller, it is suspected that these both get inlined, and their stack usage is reported for ath12k_mac_op_sta_state(). A major contributor to the ath12k_mac_station_assoc() stack usage is: struct ath12k_wmi_peer_assoc_arg peer_arg; Avoid the excess stack usage by dynamically allocating peer_arg instead of declaring it on the stack. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Jeff Johnson Signed-off-by: Kalle Valo Link: https://patch.msgid.link/20241217202618.1329312-5-kvalo@kernel.org Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/mac.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 10293e9c1d49..6f10813d9378 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -4834,7 +4834,6 @@ static int ath12k_mac_station_assoc(struct ath12k *ar, { struct ieee80211_vif *vif = ath12k_ahvif_to_vif(arvif->ahvif); struct ieee80211_sta *sta = ath12k_ahsta_to_sta(arsta->ahsta); - struct ath12k_wmi_peer_assoc_arg peer_arg; struct ieee80211_link_sta *link_sta; int ret; struct cfg80211_chan_def def; @@ -4854,14 +4853,19 @@ static int ath12k_mac_station_assoc(struct ath12k *ar, band = def.chan->band; mask = &arvif->bitrate_mask; - ath12k_peer_assoc_prepare(ar, arvif, arsta, &peer_arg, reassoc); + struct ath12k_wmi_peer_assoc_arg *peer_arg __free(kfree) = + kzalloc(sizeof(*peer_arg), GFP_KERNEL); + if (!peer_arg) + return -ENOMEM; - if (peer_arg.peer_nss < 1) { + ath12k_peer_assoc_prepare(ar, arvif, arsta, peer_arg, reassoc); + + if (peer_arg->peer_nss < 1) { ath12k_warn(ar->ab, - "invalid peer NSS %d\n", peer_arg.peer_nss); + "invalid peer NSS %d\n", peer_arg->peer_nss); return -EINVAL; } - ret = ath12k_wmi_send_peer_assoc_cmd(ar, &peer_arg); + ret = ath12k_wmi_send_peer_assoc_cmd(ar, peer_arg); if (ret) { ath12k_warn(ar->ab, "failed to run peer assoc for STA %pM vdev %i: %d\n", arsta->addr, arvif->vdev_id, ret); From d506e55fe39bcd6a78bd1f23210cbcd8cee4f844 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Tue, 17 Dec 2024 14:15:04 +0530 Subject: [PATCH 0527/1386] wifi: ath12k: Add documentation HTT_H2T_MSG_TYPE_RX_RING_SELECTION_CFG Add missing field documentation for HTT_H2T_MSG_TYPE_RX_RING_SELECTION_CFG command with indentation alignment. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Acked-by: Jeff Johnson Acked-by: Kalle Valo Link: https://patch.msgid.link/20241217084511.2981515-2-quic_periyasa@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp.h b/drivers/net/wireless/ath/ath12k/dp.h index 7700828375e3..b178921aaf1d 100644 --- a/drivers/net/wireless/ath/ath12k/dp.h +++ b/drivers/net/wireless/ath/ath12k/dp.h @@ -696,9 +696,9 @@ enum htt_stats_internal_ppdu_frametype { * * The message would appear as follows: * - * |31 26|25|24|23 16|15 8|7 0| - * |-----------------+----------------+----------------+---------------| - * | rsvd1 |PS|SS| ring_id | pdev_id | msg_type | + * |31 29|28|27|26|25|24|23 16|15 8|7 0| + * |-------+--+--+--+--+--+-----------+----------------+---------------| + * | rsvd1 |ED|DT|OV|PS|SS| ring_id | pdev_id | msg_type | * |-------------------------------------------------------------------| * | rsvd2 | ring_buffer_size | * |-------------------------------------------------------------------| @@ -725,7 +725,13 @@ enum htt_stats_internal_ppdu_frametype { * More details can be got from enum htt_srng_ring_id * b'24 - status_swap: 1 is to swap status TLV * b'25 - pkt_swap: 1 is to swap packet TLV - * b'26:31 - rsvd1: reserved for future use + * b'26 - rx_offset_valid (OV): flag to indicate rx offsets + * configuration fields are valid + * b'27 - drop_thresh_valid (DT): flag to indicate if the + * rx_drop_threshold field is valid + * b'28 - rx_mon_global_en: Enable/Disable global register + * configuration in Rx monitor module. + * b'29:31 - rsvd1: reserved for future use * dword1 - b'0:16 - ring_buffer_size: size of buffers referenced by rx ring, * in byte units. * Valid only for HW_TO_SW_RING and SW_TO_HW_RING From 61a0d9a879c3682391f88855220dd766bb9d6542 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Tue, 17 Dec 2024 14:15:05 +0530 Subject: [PATCH 0528/1386] wifi: ath12k: Refactor monitor status TLV structure The following TLV structures and bitmask definitions were inherited from the ath11k but were not updated for the ath12k 802.11be hardware. These data structure and bitmask will be used to parse the monitor status TLV data in the Rx path. 1. hal_rx_ppdu_end_user_stats_ext structure 2. hal_rx_ppdu_end_duration structure 3. HAL_RX_HE_SIG_B2_OFDMA_INFO_INFO0_STA_TXBF bitmask 4. HAL_RX_MPDU_START_INFO1_PEERID bitmask 5. HAL_INVALID_PEERID 6. hal_rx_ppdu_end_user_stats bitmask Currently, there is no issue since the monitor status Rx path is not enabled. However, in the future, the monitor status Rx path will be enabled. Therefore, update the above TLV structures and bitmask to align with the ath12k 802.11be hardware. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Acked-by: Jeff Johnson Acked-by: Kalle Valo Link: https://patch.msgid.link/20241217084511.2981515-3-quic_periyasa@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/hal_rx.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/hal_rx.h b/drivers/net/wireless/ath/ath12k/hal_rx.h index 2de7b0eba9f2..5cf3c5787ab7 100644 --- a/drivers/net/wireless/ath/ath12k/hal_rx.h +++ b/drivers/net/wireless/ath/ath12k/hal_rx.h @@ -19,7 +19,7 @@ struct hal_rx_wbm_rel_info { bool hw_cc_done; }; -#define HAL_INVALID_PEERID 0xffff +#define HAL_INVALID_PEERID 0x3fff #define VHT_SIG_SU_NSS_MASK 0x7 #define HAL_RX_MAX_MCS 12 @@ -245,6 +245,8 @@ struct hal_rx_ppdu_start { __le32 rsvd[2]; } __packed; +#define HAL_RX_PPDU_END_USER_STATS_INFO0_PEER_ID GENMASK(13, 0) +#define HAL_RX_PPDU_END_USER_STATS_INFO0_DEVICE_ID GENMASK(15, 14) #define HAL_RX_PPDU_END_USER_STATS_INFO0_MPDU_CNT_FCS_ERR GENMASK(26, 16) #define HAL_RX_PPDU_END_USER_STATS_INFO1_MPDU_CNT_FCS_OK GENMASK(10, 0) @@ -299,6 +301,7 @@ struct hal_rx_ppdu_end_user_stats_ext { __le32 info4; __le32 info5; __le32 info6; + __le32 rsvd; } __packed; #define HAL_RX_HT_SIG_INFO_INFO0_MCS GENMASK(6, 0) @@ -425,7 +428,7 @@ struct hal_rx_he_sig_b2_mu_info { #define HAL_RX_HE_SIG_B2_OFDMA_INFO_INFO0_STA_ID GENMASK(10, 0) #define HAL_RX_HE_SIG_B2_OFDMA_INFO_INFO0_STA_NSTS GENMASK(13, 11) -#define HAL_RX_HE_SIG_B2_OFDMA_INFO_INFO0_STA_TXBF BIT(19) +#define HAL_RX_HE_SIG_B2_OFDMA_INFO_INFO0_STA_TXBF BIT(14) #define HAL_RX_HE_SIG_B2_OFDMA_INFO_INFO0_STA_MCS GENMASK(18, 15) #define HAL_RX_HE_SIG_B2_OFDMA_INFO_INFO0_STA_DCM BIT(19) #define HAL_RX_HE_SIG_B2_OFDMA_INFO_INFO0_STA_CODING BIT(20) @@ -453,7 +456,8 @@ struct hal_rx_phyrx_rssi_legacy_info { } __packed; #define HAL_RX_MPDU_START_INFO0_PPDU_ID GENMASK(31, 16) -#define HAL_RX_MPDU_START_INFO1_PEERID GENMASK(31, 16) +#define HAL_RX_MPDU_START_INFO1_PEERID GENMASK(29, 16) +#define HAL_RX_MPDU_START_INFO1_DEVICE_ID GENMASK(31, 30) #define HAL_RX_MPDU_START_INFO2_MPDU_LEN GENMASK(13, 0) struct hal_rx_mpdu_start { __le32 rsvd0[9]; @@ -468,7 +472,7 @@ struct hal_rx_mpdu_start { struct hal_rx_ppdu_end_duration { __le32 rsvd0[9]; __le32 info0; - __le32 rsvd1[4]; + __le32 rsvd1[18]; } __packed; struct hal_rx_rxpcu_classification_overview { From 6a6d941a39947c359ed245fca490dcdb09551235 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Tue, 17 Dec 2024 14:15:06 +0530 Subject: [PATCH 0529/1386] wifi: ath12k: cleanup Rx peer statistics structure Currently, unused fields are present in the Rx peer statistics structure. These fields are already present in the same structure under the ath12k_rx_peer_rate_stats container structure. Therefore, remove the unused fields from the Rx peer statistics structure. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Acked-by: Jeff Johnson Acked-by: Kalle Valo Link: https://patch.msgid.link/20241217084511.2981515-4-quic_periyasa@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/core.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/core.h b/drivers/net/wireless/ath/ath12k/core.h index ec61ad3d82c3..b789b375b891 100644 --- a/drivers/net/wireless/ath/ath12k/core.h +++ b/drivers/net/wireless/ath/ath12k/core.h @@ -381,10 +381,6 @@ struct ath12k_rx_peer_stats { u64 non_ampdu_msdu_count; u64 stbc_count; u64 beamformed_count; - u64 mcs_count[HAL_RX_MAX_MCS + 1]; - u64 nss_count[HAL_RX_MAX_NSS]; - u64 bw_count[HAL_RX_BW_MAX]; - u64 gi_count[HAL_RX_GI_MAX]; u64 coding_count[HAL_RX_SU_MU_CODING_MAX]; u64 tid_count[IEEE80211_NUM_TIDS + 1]; u64 pream_cnt[HAL_RX_PREAMBLE_MAX]; From b79462532cd56119fb409f81f50dc74b12724b5e Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Tue, 17 Dec 2024 14:15:07 +0530 Subject: [PATCH 0530/1386] wifi: ath12k: Fix the misspelled of hal TLV tag HAL_PHYRX_GENERICHT_SIG There is "HAL_PHYRX_GENERICHT_SIG" misspelled as "HAL_PHYRX_GENERIC_EHT_SIG" in the comments. Fix the spelling. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Acked-by: Jeff Johnson Acked-by: Kalle Valo Link: https://patch.msgid.link/20241217084511.2981515-5-quic_periyasa@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/hal_desc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/hal_desc.h b/drivers/net/wireless/ath/ath12k/hal_desc.h index b90a6da72e29..7b0403d245e5 100644 --- a/drivers/net/wireless/ath/ath12k/hal_desc.h +++ b/drivers/net/wireless/ath/ath12k/hal_desc.h @@ -522,7 +522,7 @@ enum hal_tlv_tag { HAL_PHYRXHT_SIG_USR_SU = 468 /* 0x1d4 */, HAL_PHYRXHT_SIG_USR_MU_MIMO = 469 /* 0x1d5 */, HAL_PHYRX_GENERIC_U_SIG = 470 /* 0x1d6 */, - HAL_PHYRX_GENERICHT_SIG = 471 /* 0x1d7 */, + HAL_PHYRX_GENERIC_EHT_SIG = 471 /* 0x1d7 */, HAL_OVERWRITE_RESP_START = 472 /* 0x1d8 */, HAL_OVERWRITE_RESP_PREAMBLE_INFO = 473 /* 0x1d9 */, HAL_OVERWRITE_RESP_FRAME_INFO = 474 /* 0x1da */, From ebee84cc961cd3947015efbf4a5dbea63b11c5d3 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Tue, 17 Dec 2024 14:15:08 +0530 Subject: [PATCH 0531/1386] wifi: ath12k: fix incorrect TID updation in DP monitor status path Currently, an incorrect TID value gets populated in the monitor status Rx path due to an incorrect bitmap value given to the ffs() built-in helper function. Therefore, avoid the decrement and directly provide the TID bitmap to the ffs() built-in helper function for the correct TID update in the monitor status Rx path. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Acked-by: Jeff Johnson Acked-by: Kalle Valo Link: https://patch.msgid.link/20241217084511.2981515-6-quic_periyasa@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_mon.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.c b/drivers/net/wireless/ath/ath12k/dp_mon.c index c6cc4a1a5230..d9b0087a7a25 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.c +++ b/drivers/net/wireless/ath/ath12k/dp_mon.c @@ -617,6 +617,7 @@ ath12k_dp_mon_rx_parse_status_tlv(struct ath12k_base *ab, case HAL_RX_PPDU_END_USER_STATS: { struct hal_rx_ppdu_end_user_stats *eu_stats = (struct hal_rx_ppdu_end_user_stats *)tlv_data; + u32 tid_bitmap; info[0] = __le32_to_cpu(eu_stats->info0); info[1] = __le32_to_cpu(eu_stats->info1); @@ -629,10 +630,9 @@ ath12k_dp_mon_rx_parse_status_tlv(struct ath12k_base *ab, u32_get_bits(info[2], HAL_RX_PPDU_END_USER_STATS_INFO2_AST_INDEX); ppdu_info->fc_valid = u32_get_bits(info[1], HAL_RX_PPDU_END_USER_STATS_INFO1_FC_VALID); - ppdu_info->tid = - ffs(u32_get_bits(info[6], - HAL_RX_PPDU_END_USER_STATS_INFO6_TID_BITMAP) - - 1); + tid_bitmap = u32_get_bits(info[6], + HAL_RX_PPDU_END_USER_STATS_INFO6_TID_BITMAP); + ppdu_info->tid = ffs(tid_bitmap) - 1; ppdu_info->tcp_msdu_count = u32_get_bits(info[4], HAL_RX_PPDU_END_USER_STATS_INFO4_TCP_MSDU_CNT); From 0345f28a122656a4703442a2a97d2dca370c27a0 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Tue, 17 Dec 2024 14:15:09 +0530 Subject: [PATCH 0532/1386] wifi: ath12k: Remove unused HAL Rx mask in DP monitor path Currently, CODING and TXBF are unused masks defined in the HAL Rx monitor status TLV parsing code path. Therefore, remove the unused masks to prevent incorrect assumptions for code readers. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Acked-by: Jeff Johnson Acked-by: Kalle Valo Link: https://patch.msgid.link/20241217084511.2981515-7-quic_periyasa@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/hal_rx.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/hal_rx.h b/drivers/net/wireless/ath/ath12k/hal_rx.h index 5cf3c5787ab7..b08aa2e79f41 100644 --- a/drivers/net/wireless/ath/ath12k/hal_rx.h +++ b/drivers/net/wireless/ath/ath12k/hal_rx.h @@ -398,11 +398,9 @@ struct hal_rx_he_sig_a_su_info { #define HAL_RX_HE_SIG_A_MU_DL_INFO0_DOPPLER_INDICATION BIT(25) #define HAL_RX_HE_SIG_A_MU_DL_INFO1_TXOP_DURATION GENMASK(6, 0) -#define HAL_RX_HE_SIG_A_MU_DL_INFO1_CODING BIT(7) #define HAL_RX_HE_SIG_A_MU_DL_INFO1_NUM_LTF_SYMB GENMASK(10, 8) #define HAL_RX_HE_SIG_A_MU_DL_INFO1_LDPC_EXTRA BIT(11) #define HAL_RX_HE_SIG_A_MU_DL_INFO1_STBC BIT(12) -#define HAL_RX_HE_SIG_A_MU_DL_INFO1_TXBF BIT(10) #define HAL_RX_HE_SIG_A_MU_DL_INFO1_PKT_EXT_FACTOR GENMASK(14, 13) #define HAL_RX_HE_SIG_A_MU_DL_INFO1_PKT_EXT_PE_DISAM BIT(15) From 61f247a06c3cdeb9093b3d90afd7d51168d089f4 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Tue, 17 Dec 2024 14:15:10 +0530 Subject: [PATCH 0533/1386] wifi: ath12k: Change the Tx monitor SRNG ring ID The Tx monitor SRNG ring ID does not align with the ath12k 802.11be hardware architecture. Currently, there is no issue since the Tx monitor is not enabled. However, in the future, the Tx monitor will be enabled. Therefore, change the HAL_SRNG_RING_ID_WMAC1_SW2TXMON_BUF0 SRNG ID and assign the correct start ring ID for the ring type HAL_TX_MONITOR_BUF. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Acked-by: Jeff Johnson Acked-by: Kalle Valo Link: https://patch.msgid.link/20241217084511.2981515-8-quic_periyasa@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/hal.c | 2 +- drivers/net/wireless/ath/ath12k/hal.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/hal.c b/drivers/net/wireless/ath/ath12k/hal.c index fd98fac16dd5..cd59ff8e6c7b 100644 --- a/drivers/net/wireless/ath/ath12k/hal.c +++ b/drivers/net/wireless/ath/ath12k/hal.c @@ -181,7 +181,7 @@ static const struct hal_srng_config hw_srng_config_template[] = { .max_size = HAL_WBM2PPE_RELEASE_RING_BASE_MSB_RING_SIZE, }, [HAL_TX_MONITOR_BUF] = { - .start_ring_id = HAL_SRNG_SW2TXMON_BUF0, + .start_ring_id = HAL_SRNG_RING_ID_WMAC1_SW2TXMON_BUF0, .max_rings = 1, .entry_size = sizeof(struct hal_mon_buf_ring) >> 2, .mac_type = ATH12K_HAL_SRNG_PMAC, diff --git a/drivers/net/wireless/ath/ath12k/hal.h b/drivers/net/wireless/ath/ath12k/hal.h index 8a78bb9a10bc..94e2e8735958 100644 --- a/drivers/net/wireless/ath/ath12k/hal.h +++ b/drivers/net/wireless/ath/ath12k/hal.h @@ -485,8 +485,8 @@ enum hal_srng_ring_id { HAL_SRNG_RING_ID_WMAC1_RXMON2SW0 = HAL_SRNG_RING_ID_WMAC1_RXDMA2SW1, HAL_SRNG_RING_ID_WMAC1_SW2RXDMA1_DESC, HAL_SRNG_RING_ID_RXDMA_DIR_BUF, - HAL_SRNG_RING_ID_WMAC1_SW2TXMON_BUF0, HAL_SRNG_RING_ID_WMAC1_TXMON2SW0_BUF0, + HAL_SRNG_RING_ID_WMAC1_SW2TXMON_BUF0, HAL_SRNG_RING_ID_PMAC1_ID_END, }; From 8534c42397ed8f05257dbddcd305a351ad40add1 Mon Sep 17 00:00:00 2001 From: Karthikeyan Periyasamy Date: Tue, 17 Dec 2024 14:15:11 +0530 Subject: [PATCH 0534/1386] wifi: ath12k: Avoid explicit type cast in monitor status parse handler Currently, monitor status parse procedure handles all the supported TLV tags. Each TLV tag has its own data structure for parsing. Now, this handler is passed the tlv_data as a u8 pointer, so explicit type cast conversion happens for every TLV tag parsing. Therefore, avoid the explicit type conversion by changing the tlv_data type from a u8 pointer to a const void pointer. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.3.1-00173-QCAHKSWPL_SILICONZ-1 Tested-on: WCN7850 hw2.0 PCI WLAN.HMT.1.0.c5-00481-QCAHMTSWPL_V1.0_V2.0_SILICONZ-3 Signed-off-by: Karthikeyan Periyasamy Acked-by: Jeff Johnson Acked-by: Kalle Valo Link: https://patch.msgid.link/20241217084511.2981515-9-quic_periyasa@quicinc.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/dp_mon.c | 126 +++++++++-------------- 1 file changed, 47 insertions(+), 79 deletions(-) diff --git a/drivers/net/wireless/ath/ath12k/dp_mon.c b/drivers/net/wireless/ath/ath12k/dp_mon.c index d9b0087a7a25..5a21961cfd46 100644 --- a/drivers/net/wireless/ath/ath12k/dp_mon.c +++ b/drivers/net/wireless/ath/ath12k/dp_mon.c @@ -10,11 +10,10 @@ #include "dp_tx.h" #include "peer.h" -static void ath12k_dp_mon_rx_handle_ofdma_info(void *rx_tlv, - struct hal_rx_user_status *rx_user_status) +static void +ath12k_dp_mon_rx_handle_ofdma_info(const struct hal_rx_ppdu_end_user_stats *ppdu_end_user, + struct hal_rx_user_status *rx_user_status) { - struct hal_rx_ppdu_end_user_stats *ppdu_end_user = rx_tlv; - rx_user_status->ul_ofdma_user_v0_word0 = __le32_to_cpu(ppdu_end_user->usr_resp_ref); rx_user_status->ul_ofdma_user_v0_word1 = @@ -35,7 +34,7 @@ ath12k_dp_mon_rx_populate_byte_count(const struct hal_rx_ppdu_end_user_stats *st } static void -ath12k_dp_mon_rx_populate_mu_user_info(void *rx_tlv, +ath12k_dp_mon_rx_populate_mu_user_info(const struct hal_rx_ppdu_end_user_stats *rx_tlv, struct hal_rx_mon_ppdu_info *ppdu_info, struct hal_rx_user_status *rx_user_status) { @@ -73,11 +72,9 @@ ath12k_dp_mon_rx_populate_mu_user_info(void *rx_tlv, ath12k_dp_mon_rx_populate_byte_count(rx_tlv, ppdu_info, rx_user_status); } -static void ath12k_dp_mon_parse_vht_sig_a(u8 *tlv_data, +static void ath12k_dp_mon_parse_vht_sig_a(const struct hal_rx_vht_sig_a_info *vht_sig, struct hal_rx_mon_ppdu_info *ppdu_info) { - struct hal_rx_vht_sig_a_info *vht_sig = - (struct hal_rx_vht_sig_a_info *)tlv_data; u32 nsts, group_id, info0, info1; u8 gi_setting; @@ -119,11 +116,9 @@ static void ath12k_dp_mon_parse_vht_sig_a(u8 *tlv_data, u32_get_bits(info1, HAL_RX_VHT_SIG_A_INFO_INFO1_SU_MU_CODING); } -static void ath12k_dp_mon_parse_ht_sig(u8 *tlv_data, +static void ath12k_dp_mon_parse_ht_sig(const struct hal_rx_ht_sig_info *ht_sig, struct hal_rx_mon_ppdu_info *ppdu_info) { - struct hal_rx_ht_sig_info *ht_sig = - (struct hal_rx_ht_sig_info *)tlv_data; u32 info0 = __le32_to_cpu(ht_sig->info0); u32 info1 = __le32_to_cpu(ht_sig->info1); @@ -136,11 +131,9 @@ static void ath12k_dp_mon_parse_ht_sig(u8 *tlv_data, ppdu_info->reception_type = HAL_RX_RECEPTION_TYPE_SU; } -static void ath12k_dp_mon_parse_l_sig_b(u8 *tlv_data, +static void ath12k_dp_mon_parse_l_sig_b(const struct hal_rx_lsig_b_info *lsigb, struct hal_rx_mon_ppdu_info *ppdu_info) { - struct hal_rx_lsig_b_info *lsigb = - (struct hal_rx_lsig_b_info *)tlv_data; u32 info0 = __le32_to_cpu(lsigb->info0); u8 rate; @@ -170,11 +163,9 @@ static void ath12k_dp_mon_parse_l_sig_b(u8 *tlv_data, ppdu_info->reception_type = HAL_RX_RECEPTION_TYPE_SU; } -static void ath12k_dp_mon_parse_l_sig_a(u8 *tlv_data, +static void ath12k_dp_mon_parse_l_sig_a(const struct hal_rx_lsig_a_info *lsiga, struct hal_rx_mon_ppdu_info *ppdu_info) { - struct hal_rx_lsig_a_info *lsiga = - (struct hal_rx_lsig_a_info *)tlv_data; u32 info0 = __le32_to_cpu(lsiga->info0); u8 rate; @@ -212,14 +203,13 @@ static void ath12k_dp_mon_parse_l_sig_a(u8 *tlv_data, ppdu_info->reception_type = HAL_RX_RECEPTION_TYPE_SU; } -static void ath12k_dp_mon_parse_he_sig_b2_ofdma(u8 *tlv_data, - struct hal_rx_mon_ppdu_info *ppdu_info) +static void +ath12k_dp_mon_parse_he_sig_b2_ofdma(const struct hal_rx_he_sig_b2_ofdma_info *ofdma, + struct hal_rx_mon_ppdu_info *ppdu_info) { - struct hal_rx_he_sig_b2_ofdma_info *he_sig_b2_ofdma = - (struct hal_rx_he_sig_b2_ofdma_info *)tlv_data; u32 info0, value; - info0 = __le32_to_cpu(he_sig_b2_ofdma->info0); + info0 = __le32_to_cpu(ofdma->info0); ppdu_info->he_data1 |= HE_MCS_KNOWN | HE_DCM_KNOWN | HE_CODING_KNOWN; @@ -250,11 +240,10 @@ static void ath12k_dp_mon_parse_he_sig_b2_ofdma(u8 *tlv_data, ppdu_info->reception_type = HAL_RX_RECEPTION_TYPE_MU_OFDMA; } -static void ath12k_dp_mon_parse_he_sig_b2_mu(u8 *tlv_data, - struct hal_rx_mon_ppdu_info *ppdu_info) +static void +ath12k_dp_mon_parse_he_sig_b2_mu(const struct hal_rx_he_sig_b2_mu_info *he_sig_b2_mu, + struct hal_rx_mon_ppdu_info *ppdu_info) { - struct hal_rx_he_sig_b2_mu_info *he_sig_b2_mu = - (struct hal_rx_he_sig_b2_mu_info *)tlv_data; u32 info0, value; info0 = __le32_to_cpu(he_sig_b2_mu->info0); @@ -277,11 +266,10 @@ static void ath12k_dp_mon_parse_he_sig_b2_mu(u8 *tlv_data, ppdu_info->nss = u32_get_bits(info0, HAL_RX_HE_SIG_B2_MU_INFO_INFO0_STA_NSTS); } -static void ath12k_dp_mon_parse_he_sig_b1_mu(u8 *tlv_data, - struct hal_rx_mon_ppdu_info *ppdu_info) +static void +ath12k_dp_mon_parse_he_sig_b1_mu(const struct hal_rx_he_sig_b1_mu_info *he_sig_b1_mu, + struct hal_rx_mon_ppdu_info *ppdu_info) { - struct hal_rx_he_sig_b1_mu_info *he_sig_b1_mu = - (struct hal_rx_he_sig_b1_mu_info *)tlv_data; u32 info0 = __le32_to_cpu(he_sig_b1_mu->info0); u16 ru_tones; @@ -292,11 +280,10 @@ static void ath12k_dp_mon_parse_he_sig_b1_mu(u8 *tlv_data, ppdu_info->reception_type = HAL_RX_RECEPTION_TYPE_MU_MIMO; } -static void ath12k_dp_mon_parse_he_sig_mu(u8 *tlv_data, - struct hal_rx_mon_ppdu_info *ppdu_info) +static void +ath12k_dp_mon_parse_he_sig_mu(const struct hal_rx_he_sig_a_mu_dl_info *he_sig_a_mu_dl, + struct hal_rx_mon_ppdu_info *ppdu_info) { - struct hal_rx_he_sig_a_mu_dl_info *he_sig_a_mu_dl = - (struct hal_rx_he_sig_a_mu_dl_info *)tlv_data; u32 info0, info1, value; u16 he_gi = 0, he_ltf = 0; @@ -427,11 +414,9 @@ static void ath12k_dp_mon_parse_he_sig_mu(u8 *tlv_data, ppdu_info->reception_type = HAL_RX_RECEPTION_TYPE_MU_MIMO; } -static void ath12k_dp_mon_parse_he_sig_su(u8 *tlv_data, +static void ath12k_dp_mon_parse_he_sig_su(const struct hal_rx_he_sig_a_su_info *he_sig_a, struct hal_rx_mon_ppdu_info *ppdu_info) { - struct hal_rx_he_sig_a_su_info *he_sig_a = - (struct hal_rx_he_sig_a_su_info *)tlv_data; u32 info0, info1, value; u32 dcm; u8 he_dcm = 0, he_stbc = 0; @@ -580,15 +565,15 @@ static void ath12k_dp_mon_parse_he_sig_su(u8 *tlv_data, static enum hal_rx_mon_status ath12k_dp_mon_rx_parse_status_tlv(struct ath12k_base *ab, struct ath12k_mon_data *pmon, - u32 tlv_tag, u8 *tlv_data, u32 userid) + u32 tlv_tag, const void *tlv_data, + u32 userid) { struct hal_rx_mon_ppdu_info *ppdu_info = &pmon->mon_ppdu_info; u32 info[7]; switch (tlv_tag) { case HAL_RX_PPDU_START: { - struct hal_rx_ppdu_start *ppdu_start = - (struct hal_rx_ppdu_start *)tlv_data; + const struct hal_rx_ppdu_start *ppdu_start = tlv_data; u64 ppdu_ts = ath12k_le32hilo_to_u64(ppdu_start->ppdu_start_ts_63_32, ppdu_start->ppdu_start_ts_31_0); @@ -615,8 +600,7 @@ ath12k_dp_mon_rx_parse_status_tlv(struct ath12k_base *ab, break; } case HAL_RX_PPDU_END_USER_STATS: { - struct hal_rx_ppdu_end_user_stats *eu_stats = - (struct hal_rx_ppdu_end_user_stats *)tlv_data; + const struct hal_rx_ppdu_end_user_stats *eu_stats = tlv_data; u32 tid_bitmap; info[0] = __le32_to_cpu(eu_stats->info0); @@ -673,8 +657,8 @@ ath12k_dp_mon_rx_parse_status_tlv(struct ath12k_base *ab, &ppdu_info->userstats[userid]; ppdu_info->num_users += 1; - ath12k_dp_mon_rx_handle_ofdma_info(tlv_data, rxuser_stats); - ath12k_dp_mon_rx_populate_mu_user_info(tlv_data, ppdu_info, + ath12k_dp_mon_rx_handle_ofdma_info(eu_stats, rxuser_stats); + ath12k_dp_mon_rx_populate_mu_user_info(eu_stats, ppdu_info, rxuser_stats); } ppdu_info->mpdu_fcs_ok_bitmap[0] = __le32_to_cpu(eu_stats->rsvd1[0]); @@ -682,8 +666,8 @@ ath12k_dp_mon_rx_parse_status_tlv(struct ath12k_base *ab, break; } case HAL_RX_PPDU_END_USER_STATS_EXT: { - struct hal_rx_ppdu_end_user_stats_ext *eu_stats = - (struct hal_rx_ppdu_end_user_stats_ext *)tlv_data; + const struct hal_rx_ppdu_end_user_stats_ext *eu_stats = tlv_data; + ppdu_info->mpdu_fcs_ok_bitmap[2] = __le32_to_cpu(eu_stats->info1); ppdu_info->mpdu_fcs_ok_bitmap[3] = __le32_to_cpu(eu_stats->info2); ppdu_info->mpdu_fcs_ok_bitmap[4] = __le32_to_cpu(eu_stats->info3); @@ -729,8 +713,7 @@ ath12k_dp_mon_rx_parse_status_tlv(struct ath12k_base *ab, break; case HAL_PHYRX_RSSI_LEGACY: { - struct hal_rx_phyrx_rssi_legacy_info *rssi = - (struct hal_rx_phyrx_rssi_legacy_info *)tlv_data; + const struct hal_rx_phyrx_rssi_legacy_info *rssi = tlv_data; info[0] = __le32_to_cpu(rssi->info0); info[1] = __le32_to_cpu(rssi->info1); @@ -748,8 +731,7 @@ ath12k_dp_mon_rx_parse_status_tlv(struct ath12k_base *ab, break; } case HAL_RXPCU_PPDU_END_INFO: { - struct hal_rx_ppdu_end_duration *ppdu_rx_duration = - (struct hal_rx_ppdu_end_duration *)tlv_data; + const struct hal_rx_ppdu_end_duration *ppdu_rx_duration = tlv_data; info[0] = __le32_to_cpu(ppdu_rx_duration->info0); ppdu_info->rx_duration = @@ -760,8 +742,7 @@ ath12k_dp_mon_rx_parse_status_tlv(struct ath12k_base *ab, break; } case HAL_RX_MPDU_START: { - struct hal_rx_mpdu_start *mpdu_start = - (struct hal_rx_mpdu_start *)tlv_data; + const struct hal_rx_mpdu_start *mpdu_start = tlv_data; struct dp_mon_mpdu *mon_mpdu = pmon->mon_mpdu; u16 peer_id; @@ -790,8 +771,7 @@ ath12k_dp_mon_rx_parse_status_tlv(struct ath12k_base *ab, break; case HAL_MON_BUF_ADDR: { struct dp_rxdma_mon_ring *buf_ring = &ab->dp.rxdma_mon_buf_ring; - struct dp_mon_packet_info *packet_info = - (struct dp_mon_packet_info *)tlv_data; + const struct dp_mon_packet_info *packet_info = tlv_data; int buf_id = u32_get_bits(packet_info->cookie, DP_RXDMA_BUF_COOKIE_BUF_ID); struct sk_buff *msdu; @@ -823,8 +803,7 @@ ath12k_dp_mon_rx_parse_status_tlv(struct ath12k_base *ab, break; } case HAL_RX_MSDU_END: { - struct rx_msdu_end_qcn9274 *msdu_end = - (struct rx_msdu_end_qcn9274 *)tlv_data; + const struct rx_msdu_end_qcn9274 *msdu_end = tlv_data; bool is_first_msdu_in_mpdu; u16 msdu_end_info; @@ -1609,7 +1588,7 @@ ath12k_dp_mon_tx_gen_prot_frame(struct dp_mon_tx_ppdu_info *tx_ppdu_info) static enum dp_mon_tx_tlv_status ath12k_dp_mon_tx_parse_status_tlv(struct ath12k_base *ab, struct ath12k_mon_data *pmon, - u16 tlv_tag, u8 *tlv_data, u32 userid) + u16 tlv_tag, const void *tlv_data, u32 userid) { struct dp_mon_tx_ppdu_info *tx_ppdu_info; enum dp_mon_tx_tlv_status status = DP_MON_TX_STATUS_PPDU_NOT_DONE; @@ -1619,8 +1598,7 @@ ath12k_dp_mon_tx_parse_status_tlv(struct ath12k_base *ab, switch (tlv_tag) { case HAL_TX_FES_SETUP: { - struct hal_tx_fes_setup *tx_fes_setup = - (struct hal_tx_fes_setup *)tlv_data; + const struct hal_tx_fes_setup *tx_fes_setup = tlv_data; info[0] = __le32_to_cpu(tx_fes_setup->info0); tx_ppdu_info->ppdu_id = __le32_to_cpu(tx_fes_setup->schedule_id); @@ -1631,8 +1609,7 @@ ath12k_dp_mon_tx_parse_status_tlv(struct ath12k_base *ab, } case HAL_TX_FES_STATUS_END: { - struct hal_tx_fes_status_end *tx_fes_status_end = - (struct hal_tx_fes_status_end *)tlv_data; + const struct hal_tx_fes_status_end *tx_fes_status_end = tlv_data; u32 tst_15_0, tst_31_16; info[0] = __le32_to_cpu(tx_fes_status_end->info0); @@ -1649,8 +1626,7 @@ ath12k_dp_mon_tx_parse_status_tlv(struct ath12k_base *ab, } case HAL_RX_RESPONSE_REQUIRED_INFO: { - struct hal_rx_resp_req_info *rx_resp_req_info = - (struct hal_rx_resp_req_info *)tlv_data; + const struct hal_rx_resp_req_info *rx_resp_req_info = tlv_data; u32 addr_32; u16 addr_16; @@ -1695,8 +1671,7 @@ ath12k_dp_mon_tx_parse_status_tlv(struct ath12k_base *ab, } case HAL_PCU_PPDU_SETUP_INIT: { - struct hal_tx_pcu_ppdu_setup_init *ppdu_setup = - (struct hal_tx_pcu_ppdu_setup_init *)tlv_data; + const struct hal_tx_pcu_ppdu_setup_init *ppdu_setup = tlv_data; u32 addr_32; u16 addr_16; @@ -1742,8 +1717,7 @@ ath12k_dp_mon_tx_parse_status_tlv(struct ath12k_base *ab, } case HAL_TX_QUEUE_EXTENSION: { - struct hal_tx_queue_exten *tx_q_exten = - (struct hal_tx_queue_exten *)tlv_data; + const struct hal_tx_queue_exten *tx_q_exten = tlv_data; info[0] = __le32_to_cpu(tx_q_exten->info0); @@ -1755,8 +1729,7 @@ ath12k_dp_mon_tx_parse_status_tlv(struct ath12k_base *ab, } case HAL_TX_FES_STATUS_START: { - struct hal_tx_fes_status_start *tx_fes_start = - (struct hal_tx_fes_status_start *)tlv_data; + const struct hal_tx_fes_status_start *tx_fes_start = tlv_data; info[0] = __le32_to_cpu(tx_fes_start->info0); @@ -1767,8 +1740,7 @@ ath12k_dp_mon_tx_parse_status_tlv(struct ath12k_base *ab, } case HAL_TX_FES_STATUS_PROT: { - struct hal_tx_fes_status_prot *tx_fes_status = - (struct hal_tx_fes_status_prot *)tlv_data; + const struct hal_tx_fes_status_prot *tx_fes_status = tlv_data; u32 start_timestamp; u32 end_timestamp; @@ -1795,8 +1767,7 @@ ath12k_dp_mon_tx_parse_status_tlv(struct ath12k_base *ab, case HAL_TX_FES_STATUS_START_PPDU: case HAL_TX_FES_STATUS_START_PROT: { - struct hal_tx_fes_status_start_prot *tx_fes_stat_start = - (struct hal_tx_fes_status_start_prot *)tlv_data; + const struct hal_tx_fes_status_start_prot *tx_fes_stat_start = tlv_data; u64 ppdu_ts; info[0] = __le32_to_cpu(tx_fes_stat_start->info0); @@ -1811,8 +1782,7 @@ ath12k_dp_mon_tx_parse_status_tlv(struct ath12k_base *ab, } case HAL_TX_FES_STATUS_USER_PPDU: { - struct hal_tx_fes_status_user_ppdu *tx_fes_usr_ppdu = - (struct hal_tx_fes_status_user_ppdu *)tlv_data; + const struct hal_tx_fes_status_user_ppdu *tx_fes_usr_ppdu = tlv_data; info[0] = __le32_to_cpu(tx_fes_usr_ppdu->info0); @@ -1855,8 +1825,7 @@ ath12k_dp_mon_tx_parse_status_tlv(struct ath12k_base *ab, break; case HAL_RX_FRAME_BITMAP_ACK: { - struct hal_rx_frame_bitmap_ack *fbm_ack = - (struct hal_rx_frame_bitmap_ack *)tlv_data; + const struct hal_rx_frame_bitmap_ack *fbm_ack = tlv_data; u32 addr_32; u16 addr_16; @@ -1874,8 +1843,7 @@ ath12k_dp_mon_tx_parse_status_tlv(struct ath12k_base *ab, } case HAL_MACTX_PHY_DESC: { - struct hal_tx_phy_desc *tx_phy_desc = - (struct hal_tx_phy_desc *)tlv_data; + const struct hal_tx_phy_desc *tx_phy_desc = tlv_data; info[0] = __le32_to_cpu(tx_phy_desc->info0); info[1] = __le32_to_cpu(tx_phy_desc->info1); From 578f6fc55c2ced5f68a7f87edbf6db3663dc6b57 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 17 Dec 2024 10:55:05 +0000 Subject: [PATCH 0535/1386] wifi: ath12k: Fix spelling mistake "requestted" -> "requested" There is a spelling mistake in an ath12k_err error message. Fix it. Signed-off-by: Colin Ian King Acked-by: Kalle Valo Link: https://patch.msgid.link/20241217105505.306047-1-colin.i.king@gmail.com Signed-off-by: Jeff Johnson --- drivers/net/wireless/ath/ath12k/qmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath12k/qmi.c b/drivers/net/wireless/ath/ath12k/qmi.c index a8ed86a294c3..5c3563383fab 100644 --- a/drivers/net/wireless/ath/ath12k/qmi.c +++ b/drivers/net/wireless/ath/ath12k/qmi.c @@ -2618,7 +2618,7 @@ static int ath12k_qmi_alloc_target_mem_chunk(struct ath12k_base *ab) if (!ag->mlo_mem.mlo_mem_size) { ag->mlo_mem.mlo_mem_size = mlo_size; } else if (ag->mlo_mem.mlo_mem_size != mlo_size) { - ath12k_err(ab, "QMI MLO memory size error, expected size is %d but requestted size is %d", + ath12k_err(ab, "QMI MLO memory size error, expected size is %d but requested size is %d", ag->mlo_mem.mlo_mem_size, mlo_size); ret = -EINVAL; goto err; From b1b66ae094cd2aa49c6841683cb7846bd46f38ca Mon Sep 17 00:00:00 2001 From: Hongguang Gao Date: Tue, 17 Dec 2024 10:26:15 -0800 Subject: [PATCH 0536/1386] bnxt_en: Use FW defined resource limits for RoCE If FW supports setting resource limits for RoCE, then just use the FW limits instead of using some fixed values in the driver. These limits will be used to allocate context memory for QP, SRQ, AH, and MR resources for RoCE. Reviewed-by: Damodharam Ammepalli Reviewed-by: Kalesh AP Signed-off-by: Hongguang Gao Signed-off-by: Selvin Xavier Signed-off-by: Michael Chan Link: https://patch.msgid.link/20241217182620.2454075-2-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 41 +++++++++++++------ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 3 ++ drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 2 + drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h | 2 + 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index b86f980fa7ea..469352ac1f7e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9117,10 +9117,18 @@ static int bnxt_alloc_ctx_mem(struct bnxt *bp) ena = 0; if ((bp->flags & BNXT_FLAG_ROCE_CAP) && !is_kdump_kernel()) { pg_lvl = 2; - extra_qps = min_t(u32, 65536, max_qps - l2_qps - qp1_qps); - /* allocate extra qps if fw supports RoCE fast qp destroy feature */ - extra_qps += fast_qpmd_qps; - extra_srqs = min_t(u32, 8192, max_srqs - srqs); + if (BNXT_SW_RES_LMT(bp)) { + extra_qps = max_qps - l2_qps - qp1_qps; + extra_srqs = max_srqs - srqs; + } else { + extra_qps = min_t(u32, 65536, + max_qps - l2_qps - qp1_qps); + /* allocate extra qps if fw supports RoCE fast qp + * destroy feature + */ + extra_qps += fast_qpmd_qps; + extra_srqs = min_t(u32, 8192, max_srqs - srqs); + } if (fast_qpmd_qps) ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_QP_FAST_QPMD; } @@ -9156,14 +9164,20 @@ static int bnxt_alloc_ctx_mem(struct bnxt *bp) goto skip_rdma; ctxm = &ctx->ctx_arr[BNXT_CTX_MRAV]; - /* 128K extra is needed to accommodate static AH context - * allocation by f/w. - */ - num_mr = min_t(u32, ctxm->max_entries / 2, 1024 * 256); - num_ah = min_t(u32, num_mr, 1024 * 128); - ctxm->split_entry_cnt = BNXT_CTX_MRAV_AV_SPLIT_ENTRY + 1; - if (!ctxm->mrav_av_entries || ctxm->mrav_av_entries > num_ah) - ctxm->mrav_av_entries = num_ah; + if (BNXT_SW_RES_LMT(bp) && + ctxm->split_entry_cnt == BNXT_CTX_MRAV_AV_SPLIT_ENTRY + 1) { + num_ah = ctxm->mrav_av_entries; + num_mr = ctxm->max_entries - num_ah; + } else { + /* 128K extra is needed to accommodate static AH context + * allocation by f/w. + */ + num_mr = min_t(u32, ctxm->max_entries / 2, 1024 * 256); + num_ah = min_t(u32, num_mr, 1024 * 128); + ctxm->split_entry_cnt = BNXT_CTX_MRAV_AV_SPLIT_ENTRY + 1; + if (!ctxm->mrav_av_entries || ctxm->mrav_av_entries > num_ah) + ctxm->mrav_av_entries = num_ah; + } rc = bnxt_setup_ctxm_pg_tbls(bp, ctxm, num_mr + num_ah, 2); if (rc) @@ -9470,6 +9484,9 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) bp->flags |= BNXT_FLAG_UDP_GSO_CAP; if (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_TX_PKT_TS_CMPL_SUPPORTED) bp->fw_cap |= BNXT_FW_CAP_TX_TS_CMP; + if (flags_ext2 & + FUNC_QCAPS_RESP_FLAGS_EXT2_SW_MAX_RESOURCE_LIMITS_SUPPORTED) + bp->fw_cap |= BNXT_FW_CAP_SW_MAX_RESOURCE_LIMITS; if (BNXT_PF(bp) && (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_ROCE_VF_RESOURCE_MGMT_SUPPORTED)) bp->fw_cap |= BNXT_FW_CAP_ROCE_VF_RESC_MGMT_SUPPORTED; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 7df7a2233307..3e20d200da62 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2482,6 +2482,7 @@ struct bnxt { #define BNXT_FW_CAP_CFA_NTUPLE_RX_EXT_IP_PROTO BIT_ULL(38) #define BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V3 BIT_ULL(39) #define BNXT_FW_CAP_VNIC_RE_FLUSH BIT_ULL(40) + #define BNXT_FW_CAP_SW_MAX_RESOURCE_LIMITS BIT_ULL(41) u32 fw_dbg_cap; @@ -2501,6 +2502,8 @@ struct bnxt { ((bp)->fw_cap & BNXT_FW_CAP_ENABLE_RDMA_SRIOV) #define BNXT_ROCE_VF_RESC_CAP(bp) \ ((bp)->fw_cap & BNXT_FW_CAP_ROCE_VF_RESC_MGMT_SUPPORTED) +#define BNXT_SW_RES_LMT(bp) \ + ((bp)->fw_cap & BNXT_FW_CAP_SW_MAX_RESOURCE_LIMITS) u32 hwrm_spec_code; u16 hwrm_cmd_seq; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index b771c84cdd89..94c6a0928ca0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -416,6 +416,8 @@ static void bnxt_set_edev_info(struct bnxt_en_dev *edev, struct bnxt *bp) edev->flags |= BNXT_EN_FLAG_VF; if (BNXT_ROCE_VF_RESC_CAP(bp)) edev->flags |= BNXT_EN_FLAG_ROCE_VF_RES_MGMT; + if (BNXT_SW_RES_LMT(bp)) + edev->flags |= BNXT_EN_FLAG_SW_RES_LMT; edev->chip_num = bp->chip_num; edev->hw_ring_stats_size = bp->hw_ring_stats_size; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h index 5d6aac60f236..54ad9f8273d7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h @@ -65,6 +65,8 @@ struct bnxt_en_dev { #define BNXT_EN_FLAG_VF 0x10 #define BNXT_EN_VF(edev) ((edev)->flags & BNXT_EN_FLAG_VF) #define BNXT_EN_FLAG_ROCE_VF_RES_MGMT 0x20 + #define BNXT_EN_FLAG_SW_RES_LMT 0x40 +#define BNXT_EN_SW_RES_LMT(edev) ((edev)->flags & BNXT_EN_FLAG_SW_RES_LMT) struct bnxt_ulp *ulp_tbl; int l2_db_size; /* Doorbell BAR size in From fac5472fc845115ea543acbe9b183d330d6277ed Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 17 Dec 2024 10:26:16 -0800 Subject: [PATCH 0537/1386] bnxt_en: Do not allow ethtool -m on an untrusted VF Block all ethtool module operations on an untrusted VF. The firmware won't allow it and will return error. Reviewed-by: Kalesh AP Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://patch.msgid.link/20241217182620.2454075-3-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 6 +++++- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 5 +++++ drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 12 ++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 469352ac1f7e..c0728d5ff8bc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8279,16 +8279,20 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp) if (rc) goto func_qcfg_exit; + flags = le16_to_cpu(resp->flags); #ifdef CONFIG_BNXT_SRIOV if (BNXT_VF(bp)) { struct bnxt_vf_info *vf = &bp->vf; vf->vlan = le16_to_cpu(resp->vlan) & VLAN_VID_MASK; + if (flags & FUNC_QCFG_RESP_FLAGS_TRUSTED_VF) + vf->flags |= BNXT_VF_TRUST; + else + vf->flags &= ~BNXT_VF_TRUST; } else { bp->pf.registered_vfs = le16_to_cpu(resp->registered_vfs); } #endif - flags = le16_to_cpu(resp->flags); if (flags & (FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED | FUNC_QCFG_RESP_FLAGS_FW_LLDP_AGENT_ENABLED)) { bp->fw_cap |= BNXT_FW_CAP_LLDP_AGENT; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 3e20d200da62..d5e81e008ab5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2270,6 +2270,11 @@ struct bnxt { #define BNXT_PF(bp) (!((bp)->flags & BNXT_FLAG_VF)) #define BNXT_VF(bp) ((bp)->flags & BNXT_FLAG_VF) +#ifdef CONFIG_BNXT_SRIOV +#define BNXT_VF_IS_TRUSTED(bp) ((bp)->vf.flags & BNXT_VF_TRUST) +#else +#define BNXT_VF_IS_TRUSTED(bp) 0 +#endif #define BNXT_NPAR(bp) ((bp)->port_partition_type) #define BNXT_MH(bp) ((bp)->flags & BNXT_FLAG_MULTI_HOST) #define BNXT_SINGLE_PF(bp) (BNXT_PF(bp) && !BNXT_NPAR(bp) && !BNXT_MH(bp)) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index d87681d71106..28f2c471652c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -4375,6 +4375,9 @@ static int bnxt_get_module_info(struct net_device *dev, struct bnxt *bp = netdev_priv(dev); int rc; + if (BNXT_VF(bp) && !BNXT_VF_IS_TRUSTED(bp)) + return -EPERM; + /* No point in going further if phy status indicates * module is not inserted or if it is powered down or * if it is of type 10GBase-T @@ -4426,6 +4429,9 @@ static int bnxt_get_module_eeprom(struct net_device *dev, u16 start = eeprom->offset, length = eeprom->len; int rc = 0; + if (BNXT_VF(bp) && !BNXT_VF_IS_TRUSTED(bp)) + return -EPERM; + memset(data, 0, eeprom->len); /* Read A0 portion of the EEPROM */ @@ -4480,6 +4486,12 @@ static int bnxt_get_module_eeprom_by_page(struct net_device *dev, struct bnxt *bp = netdev_priv(dev); int rc; + if (BNXT_VF(bp) && !BNXT_VF_IS_TRUSTED(bp)) { + NL_SET_ERR_MSG_MOD(extack, + "Module read not permitted on untrusted VF"); + return -EPERM; + } + rc = bnxt_get_module_status(bp, extack); if (rc) return rc; From 36d1e70a90e98c59ee6635552db243d9ebc3c5ea Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 17 Dec 2024 10:26:17 -0800 Subject: [PATCH 0538/1386] bnxt_en: Skip PHY loopback ethtool selftest if unsupported by FW Skip PHY loopback selftest if firmware advertises that it is unsupported in the HWRM_PORT_PHY_QCAPS call. Only show PHY loopback test result to be 0 if the test has run and passes. Do the same for external loopback to be consistent. Reviewed-by: Pavan Chebbi Reviewed-by: Kalesh AP Signed-off-by: Michael Chan Link: https://patch.msgid.link/20241217182620.2454075-4-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 28f2c471652c..8001849af879 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -4914,20 +4914,26 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, buf[BNXT_MACLPBK_TEST_IDX] = 0; bnxt_hwrm_mac_loopback(bp, false); + buf[BNXT_PHYLPBK_TEST_IDX] = 1; + if (bp->phy_flags & BNXT_PHY_FL_NO_PHY_LPBK) + goto skip_phy_loopback; + bnxt_hwrm_phy_loopback(bp, true, false); msleep(1000); - if (bnxt_run_loopback(bp)) { - buf[BNXT_PHYLPBK_TEST_IDX] = 1; + if (bnxt_run_loopback(bp)) etest->flags |= ETH_TEST_FL_FAILED; - } + else + buf[BNXT_PHYLPBK_TEST_IDX] = 0; +skip_phy_loopback: + buf[BNXT_EXTLPBK_TEST_IDX] = 1; if (do_ext_lpbk) { etest->flags |= ETH_TEST_FL_EXTERNAL_LB_DONE; bnxt_hwrm_phy_loopback(bp, true, true); msleep(1000); - if (bnxt_run_loopback(bp)) { - buf[BNXT_EXTLPBK_TEST_IDX] = 1; + if (bnxt_run_loopback(bp)) etest->flags |= ETH_TEST_FL_FAILED; - } + else + buf[BNXT_EXTLPBK_TEST_IDX] = 0; } bnxt_hwrm_phy_loopback(bp, false, false); bnxt_half_close_nic(bp); From b45a850585ca0cc45f7fe0f83be33a769ecc43ab Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 17 Dec 2024 10:26:18 -0800 Subject: [PATCH 0539/1386] bnxt_en: Skip MAC loopback selftest if it is unsupported by FW Call the new HWRM_PORT_MAC_QCAPS to check if mac loopback is supported. Skip the MAC loopback ethtool self test if it is not supported. Reviewed-by: Kalesh AP Reviewed-by: Pavan Chebbi Reviewed-by: Andy Gospodarek Signed-off-by: Michael Chan Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/20241217182620.2454075-5-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 24 +++++++++++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt.h | 5 ++++ .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 11 +++++---- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c0728d5ff8bc..46edea75e062 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -11551,6 +11551,26 @@ hwrm_phy_qcaps_exit: return rc; } +static void bnxt_hwrm_mac_qcaps(struct bnxt *bp) +{ + struct hwrm_port_mac_qcaps_output *resp; + struct hwrm_port_mac_qcaps_input *req; + int rc; + + if (bp->hwrm_spec_code < 0x10a03) + return; + + rc = hwrm_req_init(bp, req, HWRM_PORT_MAC_QCAPS); + if (rc) + return; + + resp = hwrm_req_hold(bp, req); + rc = hwrm_req_send_silent(bp, req); + if (!rc) + bp->mac_flags = resp->flags; + hwrm_req_drop(bp, req); +} + static bool bnxt_support_dropped(u16 advertising, u16 supported) { u16 diff = advertising ^ supported; @@ -15679,6 +15699,10 @@ static int bnxt_probe_phy(struct bnxt *bp, bool fw_dflt) bp->dev->priv_flags |= IFF_SUPP_NOFCS; else bp->dev->priv_flags &= ~IFF_SUPP_NOFCS; + + bp->mac_flags = 0; + bnxt_hwrm_mac_qcaps(bp); + if (!fw_dflt) return 0; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index d5e81e008ab5..094c9e95b463 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2668,6 +2668,11 @@ struct bnxt { #define BNXT_PHY_FL_BANK_SEL (PORT_PHY_QCAPS_RESP_FLAGS2_BANK_ADDR_SUPPORTED << 8) #define BNXT_PHY_FL_SPEEDS2 (PORT_PHY_QCAPS_RESP_FLAGS2_SPEEDS2_SUPPORTED << 8) + /* copied from flags in hwrm_port_mac_qcaps_output */ + u8 mac_flags; +#define BNXT_MAC_FL_NO_MAC_LPBK \ + PORT_MAC_QCAPS_RESP_FLAGS_LOCAL_LPBK_NOT_SUPPORTED + u8 num_tests; struct bnxt_test_info *test_info; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 8001849af879..c094abfa1ebc 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -4899,21 +4899,24 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest, bnxt_close_nic(bp, true, false); bnxt_run_fw_tests(bp, test_mask, &test_results); - buf[BNXT_MACLPBK_TEST_IDX] = 1; - bnxt_hwrm_mac_loopback(bp, true); - msleep(250); rc = bnxt_half_open_nic(bp); if (rc) { - bnxt_hwrm_mac_loopback(bp, false); etest->flags |= ETH_TEST_FL_FAILED; return; } + buf[BNXT_MACLPBK_TEST_IDX] = 1; + if (bp->mac_flags & BNXT_MAC_FL_NO_MAC_LPBK) + goto skip_mac_loopback; + + bnxt_hwrm_mac_loopback(bp, true); + msleep(250); if (bnxt_run_loopback(bp)) etest->flags |= ETH_TEST_FL_FAILED; else buf[BNXT_MACLPBK_TEST_IDX] = 0; bnxt_hwrm_mac_loopback(bp, false); +skip_mac_loopback: buf[BNXT_PHYLPBK_TEST_IDX] = 1; if (bp->phy_flags & BNXT_PHY_FL_NO_PHY_LPBK) goto skip_phy_loopback; From bf2afe0f1493e992852df4a58e4aabd01ab8b384 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 17 Dec 2024 10:26:19 -0800 Subject: [PATCH 0540/1386] bnxt_en: Skip reading PXP registers during ethtool -d if unsupported Newer firmware does not allow reading the PXP registers during ethtool -d, so skip the firmware call in that case. Userspace (bnxt.c) always expects the register block to be populated so zeroes will be returned instead. Reviewed-by: Ajit Khaparde Reviewed-by: Kalesh AP Reviewed-by: Somnath Kotur Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://patch.msgid.link/20241217182620.2454075-6-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index c094abfa1ebc..75a59dd72bce 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -2050,7 +2050,8 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs, int rc; regs->version = 0; - bnxt_dbg_hwrm_rd_reg(bp, 0, BNXT_PXP_REG_LEN / 4, _p); + if (!(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_REG_ACCESS_RESTRICTED)) + bnxt_dbg_hwrm_rd_reg(bp, 0, BNXT_PXP_REG_LEN / 4, _p); if (!(bp->fw_cap & BNXT_FW_CAP_PCIE_STATS_SUPPORTED)) return; From 73df38b097a608ee5d0054211e6cb479c8edad91 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Tue, 17 Dec 2024 10:26:20 -0800 Subject: [PATCH 0541/1386] MAINTAINERS: bnxt_en: Add Pavan Chebbi as co-maintainer Reviewed-by: Pavan Chebbi Signed-off-by: Michael Chan Link: https://patch.msgid.link/20241217182620.2454075-7-michael.chan@broadcom.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index e67e0c188349..1579124ef426 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4611,6 +4611,7 @@ F: drivers/net/ethernet/broadcom/bnx2x/ BROADCOM BNXT_EN 50 GIGABIT ETHERNET DRIVER M: Michael Chan +M: Pavan Chebbi L: netdev@vger.kernel.org S: Supported F: drivers/firmware/broadcom/tee_bnxt_fw.c From 3fc87cb94f5f3224a9ea168ee935286d915d2a6a Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Tue, 17 Dec 2024 18:03:11 -0800 Subject: [PATCH 0542/1386] net: dsa: microchip: Add suspend/resume support to KSZ DSA driver The KSZ DSA driver starts a timer to read MIB counters periodically to avoid count overrun. During system suspend this will give an error for not able to write to register as the SPI system returns an error when it is in suspend state. This implementation stops the timer when the system goes into suspend and restarts it when resumed. Signed-off-by: Tristram Ha Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241218020311.70628-1-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz9477_i2c.c | 4 +++ drivers/net/dsa/microchip/ksz_common.c | 37 +++++++++++++++++++++++++ drivers/net/dsa/microchip/ksz_common.h | 2 ++ drivers/net/dsa/microchip/ksz_spi.c | 4 +++ 4 files changed, 47 insertions(+) diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c index 1c6d7fc16772..a2beb27459f1 100644 --- a/drivers/net/dsa/microchip/ksz9477_i2c.c +++ b/drivers/net/dsa/microchip/ksz9477_i2c.c @@ -127,10 +127,14 @@ static const struct of_device_id ksz9477_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, ksz9477_dt_ids); +static DEFINE_SIMPLE_DEV_PM_OPS(ksz_i2c_pm_ops, + ksz_switch_suspend, ksz_switch_resume); + static struct i2c_driver ksz9477_i2c_driver = { .driver = { .name = "ksz9477-switch", .of_match_table = ksz9477_dt_ids, + .pm = &ksz_i2c_pm_ops, }, .probe = ksz9477_i2c_probe, .remove = ksz9477_i2c_remove, diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index df314724e6a7..a8dac7ff6b81 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -4586,6 +4586,23 @@ static int ksz_hsr_leave(struct dsa_switch *ds, int port, return 0; } +static int ksz_suspend(struct dsa_switch *ds) +{ + struct ksz_device *dev = ds->priv; + + cancel_delayed_work_sync(&dev->mib_read); + return 0; +} + +static int ksz_resume(struct dsa_switch *ds) +{ + struct ksz_device *dev = ds->priv; + + if (dev->mib_read_interval) + schedule_delayed_work(&dev->mib_read, dev->mib_read_interval); + return 0; +} + static const struct dsa_switch_ops ksz_switch_ops = { .get_tag_protocol = ksz_get_tag_protocol, .connect_tag_protocol = ksz_connect_tag_protocol, @@ -4626,6 +4643,8 @@ static const struct dsa_switch_ops ksz_switch_ops = { .port_max_mtu = ksz_max_mtu, .get_wol = ksz_get_wol, .set_wol = ksz_set_wol, + .suspend = ksz_suspend, + .resume = ksz_resume, .get_ts_info = ksz_get_ts_info, .port_hwtstamp_get = ksz_hwtstamp_get, .port_hwtstamp_set = ksz_hwtstamp_set, @@ -5126,6 +5145,24 @@ void ksz_switch_remove(struct ksz_device *dev) } EXPORT_SYMBOL(ksz_switch_remove); +#ifdef CONFIG_PM_SLEEP +int ksz_switch_suspend(struct device *dev) +{ + struct ksz_device *priv = dev_get_drvdata(dev); + + return dsa_switch_suspend(priv->ds); +} +EXPORT_SYMBOL(ksz_switch_suspend); + +int ksz_switch_resume(struct device *dev) +{ + struct ksz_device *priv = dev_get_drvdata(dev); + + return dsa_switch_resume(priv->ds); +} +EXPORT_SYMBOL(ksz_switch_resume); +#endif + MODULE_AUTHOR("Woojung Huh "); MODULE_DESCRIPTION("Microchip KSZ Series Switch DSA Driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index b3bb75ca0796..2bc96127a447 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -444,6 +444,8 @@ struct ksz_dev_ops { struct ksz_device *ksz_switch_alloc(struct device *base, void *priv); int ksz_switch_register(struct ksz_device *dev); void ksz_switch_remove(struct ksz_device *dev); +int ksz_switch_suspend(struct device *dev); +int ksz_switch_resume(struct device *dev); void ksz_init_mib_timer(struct ksz_device *dev); bool ksz_is_port_mac_global_usable(struct dsa_switch *ds, int port); diff --git a/drivers/net/dsa/microchip/ksz_spi.c b/drivers/net/dsa/microchip/ksz_spi.c index 108a958dc356..b633d263098c 100644 --- a/drivers/net/dsa/microchip/ksz_spi.c +++ b/drivers/net/dsa/microchip/ksz_spi.c @@ -239,10 +239,14 @@ static const struct spi_device_id ksz_spi_ids[] = { }; MODULE_DEVICE_TABLE(spi, ksz_spi_ids); +static DEFINE_SIMPLE_DEV_PM_OPS(ksz_spi_pm_ops, + ksz_switch_suspend, ksz_switch_resume); + static struct spi_driver ksz_spi_driver = { .driver = { .name = "ksz-switch", .of_match_table = ksz_dt_ids, + .pm = &ksz_spi_pm_ops, }, .id_table = ksz_spi_ids, .probe = ksz_spi_probe, From 75e2c86c7b180fd1068ad271178c2820a199e7eb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 17 Dec 2024 18:44:00 -0800 Subject: [PATCH 0543/1386] net: netlink: catch attempts to send empty messages syzbot can figure out a way to redirect a netlink message to a tap. Sending empty skbs to devices is not valid and we end up hitting a skb_assert_len() in __dev_queue_xmit(). Make catching these mistakes easier, assert the skb size directly in netlink core. Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241218024400.824355-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/netlink/af_netlink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index f4e7b5e4bb59..85311226183a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1287,6 +1287,7 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation) { int delta; + skb_assert_len(skb); WARN_ON(skb->sk != NULL); delta = skb->end - skb->tail; if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) From b55498ff14bd14860d48dc8d2a0b6889b218c408 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 16 Dec 2024 22:31:18 +0100 Subject: [PATCH 0544/1386] net: phy: add phy_disable_eee If a MAC driver doesn't support EEE, then the PHY shouldn't advertise it. Add phy_disable_eee() for this purpose. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/fd51738c-dcd6-4d61-b8c5-faa6ac0f1026@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 16 ++++++++++++++++ include/linux/phy.h | 1 + 2 files changed, 17 insertions(+) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 1a908af4175b..928dc3c509b6 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3003,6 +3003,22 @@ void phy_support_eee(struct phy_device *phydev) } EXPORT_SYMBOL(phy_support_eee); +/** + * phy_disable_eee - Disable EEE for the PHY + * @phydev: Target phy_device struct + * + * This function is used by MAC drivers for MAC's which don't support EEE. + * It disables EEE on the PHY layer. + */ +void phy_disable_eee(struct phy_device *phydev) +{ + linkmode_zero(phydev->supported_eee); + linkmode_zero(phydev->advertising_eee); + phydev->eee_cfg.tx_lpi_enabled = false; + phydev->eee_cfg.eee_enabled = false; +} +EXPORT_SYMBOL_GPL(phy_disable_eee); + /** * phy_support_sym_pause - Enable support of symmetrical pause * @phydev: target phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index e597a32cc787..5bc71d59910c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -2071,6 +2071,7 @@ void phy_advertise_eee_all(struct phy_device *phydev); void phy_support_sym_pause(struct phy_device *phydev); void phy_support_asym_pause(struct phy_device *phydev); void phy_support_eee(struct phy_device *phydev); +void phy_disable_eee(struct phy_device *phydev); void phy_set_sym_pause(struct phy_device *phydev, bool rx, bool tx, bool autoneg); void phy_set_asym_pause(struct phy_device *phydev, bool rx, bool tx); From c9f5a5dabbf5ab53a6392d7c782d373d2c892e21 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 16 Dec 2024 22:32:25 +0100 Subject: [PATCH 0545/1386] net: ethernet: ti: cpsw: disable PHY EEE advertisement It seems the cpsw MAC doesn't support EEE. See e.g. the commit message of ce2899428ec0 ("ARM: dts: am335x-baltos: disable EEE for Atheros 8035 PHY"). There are cases where this causes issues if the PHY's on both sides have negotiated EEE. As a workaround EEE modes of the PHY are marked broken in DT, effectively disabling EEE advertisement. Improve this by using new function phy_disable_eee() in the MAC driver. This properly disables EEE advertisement, and allows to remove the eee-broken-xxx properties from DT. As EEE is disabled anyway, we can remove also the set_eee ethtool op. Signed-off-by: Heiner Kallweit Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/d08a798e-8565-422c-b2ed-121794db077f@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/cpsw.c | 3 ++- drivers/net/ethernet/ti/cpsw_ethtool.c | 12 ------------ drivers/net/ethernet/ti/cpsw_new.c | 3 ++- drivers/net/ethernet/ti/cpsw_priv.h | 1 - 4 files changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 4ef8cf6ea135..1e290ee8edfd 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -635,6 +635,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) slave->phy = phy; + phy_disable_eee(slave->phy); + phy_attached_info(slave->phy); phy_start(slave->phy); @@ -1225,7 +1227,6 @@ static const struct ethtool_ops cpsw_ethtool_ops = { .get_link_ksettings = cpsw_get_link_ksettings, .set_link_ksettings = cpsw_set_link_ksettings, .get_eee = cpsw_get_eee, - .set_eee = cpsw_set_eee, .nway_reset = cpsw_nway_reset, .get_ringparam = cpsw_get_ringparam, .set_ringparam = cpsw_set_ringparam, diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c index 21d55a180ef6..bdc4db0d169c 100644 --- a/drivers/net/ethernet/ti/cpsw_ethtool.c +++ b/drivers/net/ethernet/ti/cpsw_ethtool.c @@ -434,18 +434,6 @@ int cpsw_get_eee(struct net_device *ndev, struct ethtool_keee *edata) return -EOPNOTSUPP; } -int cpsw_set_eee(struct net_device *ndev, struct ethtool_keee *edata) -{ - struct cpsw_priv *priv = netdev_priv(ndev); - struct cpsw_common *cpsw = priv->cpsw; - int slave_no = cpsw_slave_index(cpsw, priv); - - if (cpsw->slaves[slave_no].phy) - return phy_ethtool_set_eee(cpsw->slaves[slave_no].phy, edata); - else - return -EOPNOTSUPP; -} - int cpsw_nway_reset(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index a98bcc5eb566..be4d90c1cbe7 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -778,6 +778,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) slave->phy = phy; + phy_disable_eee(slave->phy); + phy_attached_info(slave->phy); phy_start(slave->phy); @@ -1209,7 +1211,6 @@ static const struct ethtool_ops cpsw_ethtool_ops = { .get_link_ksettings = cpsw_get_link_ksettings, .set_link_ksettings = cpsw_set_link_ksettings, .get_eee = cpsw_get_eee, - .set_eee = cpsw_set_eee, .nway_reset = cpsw_nway_reset, .get_ringparam = cpsw_get_ringparam, .set_ringparam = cpsw_set_ringparam, diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h index 1f448290b9f4..f2fc55d9295d 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.h +++ b/drivers/net/ethernet/ti/cpsw_priv.h @@ -497,7 +497,6 @@ int cpsw_get_link_ksettings(struct net_device *ndev, int cpsw_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *ecmd); int cpsw_get_eee(struct net_device *ndev, struct ethtool_keee *edata); -int cpsw_set_eee(struct net_device *ndev, struct ethtool_keee *edata); int cpsw_nway_reset(struct net_device *ndev); void cpsw_get_ringparam(struct net_device *ndev, struct ethtool_ringparam *ering, From 0c7469ee718e1dd929f52bfb142a7f6fb68f0765 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 16 Dec 2024 18:47:33 +0100 Subject: [PATCH 0546/1386] net: airoha: Fix error path in airoha_probe() Do not run napi_disable() if airoha_hw_init() fails since Tx/Rx napi has not been started yet. In order to fix the issue, introduce airoha_qdma_stop_napi routine and remove napi_disable in airoha_hw_cleanup(). Fixes: 23020f049327 ("net: airoha: Introduce ethernet support for EN7581 SoC") Reviewed-by: Michal Swiatkowski Signed-off-by: Lorenzo Bianconi Link: https://patch.msgid.link/20241216-airoha_probe-error-path-fix-v2-1-6b10e04e9a5c@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mediatek/airoha_eth.c | 33 ++++++++++++++++------ 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mediatek/airoha_eth.c b/drivers/net/ethernet/mediatek/airoha_eth.c index 6c683a12d5aa..d8bfc21a5b19 100644 --- a/drivers/net/ethernet/mediatek/airoha_eth.c +++ b/drivers/net/ethernet/mediatek/airoha_eth.c @@ -2138,17 +2138,14 @@ static void airoha_hw_cleanup(struct airoha_qdma *qdma) if (!qdma->q_rx[i].ndesc) continue; - napi_disable(&qdma->q_rx[i].napi); netif_napi_del(&qdma->q_rx[i].napi); airoha_qdma_cleanup_rx_queue(&qdma->q_rx[i]); if (qdma->q_rx[i].page_pool) page_pool_destroy(qdma->q_rx[i].page_pool); } - for (i = 0; i < ARRAY_SIZE(qdma->q_tx_irq); i++) { - napi_disable(&qdma->q_tx_irq[i].napi); + for (i = 0; i < ARRAY_SIZE(qdma->q_tx_irq); i++) netif_napi_del(&qdma->q_tx_irq[i].napi); - } for (i = 0; i < ARRAY_SIZE(qdma->q_tx); i++) { if (!qdma->q_tx[i].ndesc) @@ -2173,6 +2170,21 @@ static void airoha_qdma_start_napi(struct airoha_qdma *qdma) } } +static void airoha_qdma_stop_napi(struct airoha_qdma *qdma) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(qdma->q_tx_irq); i++) + napi_disable(&qdma->q_tx_irq[i].napi); + + for (i = 0; i < ARRAY_SIZE(qdma->q_rx); i++) { + if (!qdma->q_rx[i].ndesc) + continue; + + napi_disable(&qdma->q_rx[i].napi); + } +} + static void airoha_update_hw_stats(struct airoha_gdm_port *port) { struct airoha_eth *eth = port->qdma->eth; @@ -2738,7 +2750,7 @@ static int airoha_probe(struct platform_device *pdev) err = airoha_hw_init(pdev, eth); if (err) - goto error; + goto error_hw_cleanup; for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) airoha_qdma_start_napi(ð->qdma[i]); @@ -2753,13 +2765,16 @@ static int airoha_probe(struct platform_device *pdev) err = airoha_alloc_gdm_port(eth, np); if (err) { of_node_put(np); - goto error; + goto error_napi_stop; } } return 0; -error: +error_napi_stop: + for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) + airoha_qdma_stop_napi(ð->qdma[i]); +error_hw_cleanup: for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) airoha_hw_cleanup(ð->qdma[i]); @@ -2780,8 +2795,10 @@ static void airoha_remove(struct platform_device *pdev) struct airoha_eth *eth = platform_get_drvdata(pdev); int i; - for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) + for (i = 0; i < ARRAY_SIZE(eth->qdma); i++) { + airoha_qdma_stop_napi(ð->qdma[i]); airoha_hw_cleanup(ð->qdma[i]); + } for (i = 0; i < ARRAY_SIZE(eth->ports); i++) { struct airoha_gdm_port *port = eth->ports[i]; From f6038d913b13b41dcaf10ff2a89f76d7ffac9edc Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Tue, 17 Dec 2024 20:12:06 +0000 Subject: [PATCH 0547/1386] net: Document netmem driver support Document expectations from drivers looking to add support for device memory tcp or other netmem based features. Signed-off-by: Mina Almasry Reviewed-by: Bagas Sanjaya Reviewed-by: Shannon Nelson Link: https://patch.msgid.link/20241217201206.2360389-1-almasrymina@google.com Signed-off-by: Jakub Kicinski --- Documentation/networking/index.rst | 1 + Documentation/networking/netmem.rst | 79 +++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 Documentation/networking/netmem.rst diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 46c178e564b3..058193ed2eeb 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -86,6 +86,7 @@ Contents: netdevices netfilter-sysctl netif-msg + netmem nexthop-group-resilient nf_conntrack-sysctl nf_flowtable diff --git a/Documentation/networking/netmem.rst b/Documentation/networking/netmem.rst new file mode 100644 index 000000000000..7de21ddb5412 --- /dev/null +++ b/Documentation/networking/netmem.rst @@ -0,0 +1,79 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================== +Netmem Support for Network Drivers +================================== + +This document outlines the requirements for network drivers to support netmem, +an abstract memory type that enables features like device memory TCP. By +supporting netmem, drivers can work with various underlying memory types +with little to no modification. + +Benefits of Netmem : + +* Flexibility: Netmem can be backed by different memory types (e.g., struct + page, DMA-buf), allowing drivers to support various use cases such as device + memory TCP. +* Future-proof: Drivers with netmem support are ready for upcoming + features that rely on it. +* Simplified Development: Drivers interact with a consistent API, + regardless of the underlying memory implementation. + +Driver Requirements +=================== + +1. The driver must support page_pool. + +2. The driver must support the tcp-data-split ethtool option. + +3. The driver must use the page_pool netmem APIs for payload memory. The netmem + APIs currently 1-to-1 correspond with page APIs. Conversion to netmem should + be achievable by switching the page APIs to netmem APIs and tracking memory + via netmem_refs in the driver rather than struct page * : + + - page_pool_alloc -> page_pool_alloc_netmem + - page_pool_get_dma_addr -> page_pool_get_dma_addr_netmem + - page_pool_put_page -> page_pool_put_netmem + + Not all page APIs have netmem equivalents at the moment. If your driver + relies on a missing netmem API, feel free to add and propose to netdev@, or + reach out to the maintainers and/or almasrymina@google.com for help adding + the netmem API. + +4. The driver must use the following PP_FLAGS: + + - PP_FLAG_DMA_MAP: netmem is not dma-mappable by the driver. The driver + must delegate the dma mapping to the page_pool, which knows when + dma-mapping is (or is not) appropriate. + - PP_FLAG_DMA_SYNC_DEV: netmem dma addr is not necessarily dma-syncable + by the driver. The driver must delegate the dma syncing to the page_pool, + which knows when dma-syncing is (or is not) appropriate. + - PP_FLAG_ALLOW_UNREADABLE_NETMEM. The driver must specify this flag iff + tcp-data-split is enabled. + +5. The driver must not assume the netmem is readable and/or backed by pages. + The netmem returned by the page_pool may be unreadable, in which case + netmem_address() will return NULL. The driver must correctly handle + unreadable netmem, i.e. don't attempt to handle its contents when + netmem_address() is NULL. + + Ideally, drivers should not have to check the underlying netmem type via + helpers like netmem_is_net_iov() or convert the netmem to any of its + underlying types via netmem_to_page() or netmem_to_net_iov(). In most cases, + netmem or page_pool helpers that abstract this complexity are provided + (and more can be added). + +6. The driver must use page_pool_dma_sync_netmem_for_cpu() in lieu of + dma_sync_single_range_for_cpu(). For some memory providers, dma_syncing for + CPU will be done by the page_pool, for others (particularly dmabuf memory + provider), dma syncing for CPU is the responsibility of the userspace using + dmabuf APIs. The driver must delegate the entire dma-syncing operation to + the page_pool which will do it correctly. + +7. Avoid implementing driver-specific recycling on top of the page_pool. Drivers + cannot hold onto a struct page to do their own recycling as the netmem may + not be backed by a struct page. However, you may hold onto a page_pool + reference with page_pool_fragment_netmem() or page_pool_ref_netmem() for + that purpose, but be mindful that some netmem types might have longer + circulation times, such as when userspace holds a reference in zerocopy + scenarios. From 5c98e89d96ecbf0e4ed38a20c46727c0ed2c112b Mon Sep 17 00:00:00 2001 From: Furong Xu <0x1207@gmail.com> Date: Wed, 18 Dec 2024 16:34:07 +0800 Subject: [PATCH 0548/1386] net: stmmac: Drop useless code related to ethtool rx-copybreak After commit 2af6106ae949 ("net: stmmac: Introducing support for Page Pool"), the driver always copies frames to get a better performance, zero-copy for RX frames is no more, then these code turned to be useless and users of ethtool may get confused about the unhandled rx-copybreak parameter. This patch mostly reverts commit 22ad38381547 ("stmmac: do not perform zero-copy for rx frames") Signed-off-by: Furong Xu <0x1207@gmail.com> Link: https://patch.msgid.link/20241218083407.390509-1-0x1207@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac.h | 2 - .../ethernet/stmicro/stmmac/stmmac_ethtool.c | 39 ------------------- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 5 --- 3 files changed, 46 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 1d86439b8a14..b8d631e559c0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -126,7 +126,6 @@ struct stmmac_rx_queue { unsigned int cur_rx; unsigned int dirty_rx; unsigned int buf_alloc_num; - u32 rx_zeroc_thresh; dma_addr_t dma_rx_phy; u32 rx_tail_addr; unsigned int state_saved; @@ -266,7 +265,6 @@ struct stmmac_priv { int sph_cap; u32 sarc_type; - unsigned int rx_copybreak; u32 rx_riwt[MTL_MAX_TX_QUEUES]; int hwts_rx_en; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 1d77389ce953..16b4d8c21c90 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -1227,43 +1227,6 @@ static int stmmac_get_ts_info(struct net_device *dev, return ethtool_op_get_ts_info(dev, info); } -static int stmmac_get_tunable(struct net_device *dev, - const struct ethtool_tunable *tuna, void *data) -{ - struct stmmac_priv *priv = netdev_priv(dev); - int ret = 0; - - switch (tuna->id) { - case ETHTOOL_RX_COPYBREAK: - *(u32 *)data = priv->rx_copybreak; - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - -static int stmmac_set_tunable(struct net_device *dev, - const struct ethtool_tunable *tuna, - const void *data) -{ - struct stmmac_priv *priv = netdev_priv(dev); - int ret = 0; - - switch (tuna->id) { - case ETHTOOL_RX_COPYBREAK: - priv->rx_copybreak = *(u32 *)data; - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - static int stmmac_get_mm(struct net_device *ndev, struct ethtool_mm_state *state) { @@ -1390,8 +1353,6 @@ static const struct ethtool_ops stmmac_ethtool_ops = { .set_per_queue_coalesce = stmmac_set_per_queue_coalesce, .get_channels = stmmac_get_channels, .set_channels = stmmac_set_channels, - .get_tunable = stmmac_get_tunable, - .set_tunable = stmmac_set_tunable, .get_link_ksettings = stmmac_ethtool_get_link_ksettings, .set_link_ksettings = stmmac_ethtool_set_link_ksettings, .get_mm = stmmac_get_mm, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 16b8bcfa8b11..6bc10ffe7a2b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -77,7 +77,6 @@ module_param(phyaddr, int, 0444); MODULE_PARM_DESC(phyaddr, "Physical device address"); #define STMMAC_TX_THRESH(x) ((x)->dma_conf.dma_tx_size / 4) -#define STMMAC_RX_THRESH(x) ((x)->dma_conf.dma_rx_size / 4) /* Limit to make sure XDP TX and slow path can coexist */ #define STMMAC_XSK_TX_BUDGET_MAX 256 @@ -107,8 +106,6 @@ static int buf_sz = DEFAULT_BUFSIZE; module_param(buf_sz, int, 0644); MODULE_PARM_DESC(buf_sz, "DMA buffer size"); -#define STMMAC_RX_COPYBREAK 256 - static const u32 default_msg_level = (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN | NETIF_MSG_TIMER); @@ -3927,8 +3924,6 @@ static int __stmmac_open(struct net_device *dev, } } - priv->rx_copybreak = STMMAC_RX_COPYBREAK; - buf_sz = dma_conf->dma_buf_sz; for (int i = 0; i < MTL_MAX_TX_QUEUES; i++) if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN) From 455e135c3042540cf15fff629a56600c11bea396 Mon Sep 17 00:00:00 2001 From: Andy Moreton Date: Wed, 18 Dec 2024 13:59:30 +0000 Subject: [PATCH 0549/1386] sfc: remove efx_writed_page_locked From: Andy Moreton efx_writed_page_locked is a workaround for Siena hardware that is not needed on later adapters, and has no callers. Remove it. Signed-off-by: Andy Moreton Signed-off-by: Edward Cree Link: https://patch.msgid.link/20241218135930.2350358-1-edward.cree@amd.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/io.h | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/drivers/net/ethernet/sfc/io.h b/drivers/net/ethernet/sfc/io.h index 4cc7b501135f..ef374a8e05c3 100644 --- a/drivers/net/ethernet/sfc/io.h +++ b/drivers/net/ethernet/sfc/io.h @@ -217,28 +217,4 @@ _efx_writed_page(struct efx_nic *efx, const efx_dword_t *value, (reg) != 0xa1c), \ page) -/* Write TIMER_COMMAND. This is a page-mapped 32-bit CSR, but a bug - * in the BIU means that writes to TIMER_COMMAND[0] invalidate the - * collector register. - */ -static inline void _efx_writed_page_locked(struct efx_nic *efx, - const efx_dword_t *value, - unsigned int reg, - unsigned int page) -{ - unsigned long flags __attribute__ ((unused)); - - if (page == 0) { - spin_lock_irqsave(&efx->biu_lock, flags); - efx_writed(efx, value, efx_paged_reg(efx, page, reg)); - spin_unlock_irqrestore(&efx->biu_lock, flags); - } else { - efx_writed(efx, value, efx_paged_reg(efx, page, reg)); - } -} -#define efx_writed_page_locked(efx, value, reg, page) \ - _efx_writed_page_locked(efx, value, \ - reg + BUILD_BUG_ON_ZERO((reg) != 0x420), \ - page) - #endif /* EFX_IO_H */ From 6724bc65e59b57e64f65269da8956f8bdc12bb03 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Wed, 18 Dec 2024 09:00:18 -0500 Subject: [PATCH 0550/1386] selftests: net: remove redundant ncdevmem print Remove extrenous fprintf Signed-off-by: Jamal Hadi Salim Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20241218140018.15607-1-jhs@mojatatu.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/ncdevmem.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 8e502a1f8f9b..19a6969643f4 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -619,9 +619,6 @@ int do_server(struct memory_buffer *mem) fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", page_aligned_frags, non_page_aligned_frags); - fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", - page_aligned_frags, non_page_aligned_frags); - cleanup: free(tmp_mem); From 5155cbcdbf03f207095f9a3794942a25aa7e5f58 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Wed, 18 Dec 2024 15:33:34 +0100 Subject: [PATCH 0551/1386] af_unix: Add a prompt to CONFIG_AF_UNIX_OOB This makes it possible to disable the MSG_OOB support in .config. Signed-off-by: Florent Revest Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241218143334.1507465-1-revest@chromium.org Signed-off-by: Jakub Kicinski --- net/unix/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/unix/Kconfig b/net/unix/Kconfig index 8b5d04210d7c..6f1783c1659b 100644 --- a/net/unix/Kconfig +++ b/net/unix/Kconfig @@ -17,9 +17,11 @@ config UNIX Say Y unless you know what you are doing. config AF_UNIX_OOB - bool + bool "UNIX: out-of-bound messages" depends on UNIX default y + help + Support for MSG_OOB in UNIX domain sockets. If unsure, say Y. config UNIX_DIAG tristate "UNIX: socket monitoring interface" From 29b540795b42a3e610c0d5e9d908a8d6c1333676 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 18 Dec 2024 14:17:16 +0100 Subject: [PATCH 0552/1386] gre: Drop ip_route_output_gre(). We already have enough variants of ip_route_output*() functions. We don't need a GRE specific one in the generic route.h header file. Furthermore, ip_route_output_gre() is only used once, in ipgre_open(), where it can be easily replaced by a simple call to ip_route_output_key(). While there, and for clarity, explicitly set .flowi4_scope to RT_SCOPE_UNIVERSE instead of relying on the implicit zero initialisation. Signed-off-by: Guillaume Nault Reviewed-by: Michal Swiatkowski Link: https://patch.msgid.link/ab7cba47b8558cd4bfe2dc843c38b622a95ee48e.1734527729.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 14 -------------- net/ipv4/ip_gre.c | 17 ++++++++++------- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index 84cb1e04f5cd..6947a155d501 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -185,20 +185,6 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi return ip_route_output_flow(net, fl4, sk); } -static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4 *fl4, - __be32 daddr, __be32 saddr, - __be32 gre_key, __u8 tos, int oif) -{ - memset(fl4, 0, sizeof(*fl4)); - fl4->flowi4_oif = oif; - fl4->daddr = daddr; - fl4->saddr = saddr; - fl4->flowi4_tos = tos; - fl4->flowi4_proto = IPPROTO_GRE; - fl4->fl4_gre_key = gre_key; - return ip_route_output_key(net, fl4); -} - enum skb_drop_reason ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev, diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f1f31ebfc793..a020342f618d 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -924,15 +924,18 @@ static int ipgre_open(struct net_device *dev) struct ip_tunnel *t = netdev_priv(dev); if (ipv4_is_multicast(t->parms.iph.daddr)) { - struct flowi4 fl4; + struct flowi4 fl4 = { + .flowi4_oif = t->parms.link, + .flowi4_tos = t->parms.iph.tos & INET_DSCP_MASK, + .flowi4_scope = RT_SCOPE_UNIVERSE, + .flowi4_proto = IPPROTO_GRE, + .saddr = t->parms.iph.saddr, + .daddr = t->parms.iph.daddr, + .fl4_gre_key = t->parms.o_key, + }; struct rtable *rt; - rt = ip_route_output_gre(t->net, &fl4, - t->parms.iph.daddr, - t->parms.iph.saddr, - t->parms.o_key, - t->parms.iph.tos & INET_DSCP_MASK, - t->parms.link); + rt = ip_route_output_key(t->net, &fl4); if (IS_ERR(rt)) return -EADDRNOTAVAIL; dev = rt->dst.dev; From a19d0236f466f1ce8f44a04a96c302d3023eebf4 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 18 Dec 2024 18:44:29 +0100 Subject: [PATCH 0553/1386] page_pool: add page_pool_dev_alloc_netmem() Similarly to other _dev shorthands, add one for page_pool_alloc_netmem() to allocate a netmem using the default Rx GFP flags (ATOMIC | NOWARN) to make the page -> netmem transition of drivers easier. Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241218174435.1445282-2-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 776a3008ac28..543f54fa3020 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -144,6 +144,15 @@ static inline netmem_ref page_pool_alloc_netmem(struct page_pool *pool, return netmem; } +static inline netmem_ref page_pool_dev_alloc_netmem(struct page_pool *pool, + unsigned int *offset, + unsigned int *size) +{ + gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; + + return page_pool_alloc_netmem(pool, offset, size, gfp); +} + static inline struct page *page_pool_alloc(struct page_pool *pool, unsigned int *offset, unsigned int *size, gfp_t gfp) From 68ddc8ae17685a8c4ac78260bde8fe4a79511aef Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 18 Dec 2024 18:44:30 +0100 Subject: [PATCH 0554/1386] xdp: add generic xdp_buff_add_frag() The code piece which would attach a frag to &xdp_buff is almost identical across the drivers supporting XDP multi-buffer on Rx. Make it a generic elegant "oneliner". Also, I see lots of drivers calculating frags_truesize as `xdp->frame_sz * nr_frags`. I can't say this is fully correct, since frags might be backed by chunks of different sizes, especially with stuff like the header split. Even page_pool_alloc() can give you two different truesizes on two subsequent requests to allocate the same buffer size. Add a field to &skb_shared_info (unionized as there's no free slot currently on x86_64) to track the "true" truesize. It can be used later when updating the skb. Reviewed-by: Maciej Fijalkowski Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241218174435.1445282-3-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 16 +++++-- include/net/xdp.h | 96 +++++++++++++++++++++++++++++++++++++++++- net/core/xdp.c | 11 +++++ 3 files changed, 118 insertions(+), 5 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b2509cd0b930..bb2b751d274a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -608,11 +608,19 @@ struct skb_shared_info { * Warning : all fields before dataref are cleared in __alloc_skb() */ atomic_t dataref; - unsigned int xdp_frags_size; - /* Intermediate layers must ensure that destructor_arg - * remains valid until skb destructor */ - void * destructor_arg; + union { + struct { + u32 xdp_frags_size; + u32 xdp_frags_truesize; + }; + + /* + * Intermediate layers must ensure that destructor_arg + * remains valid until skb destructor. + */ + void *destructor_arg; + }; /* must be last field, see pskb_expand_head() */ skb_frag_t frags[MAX_SKB_FRAGS]; diff --git a/include/net/xdp.h b/include/net/xdp.h index d2089cfecefd..11139c210b49 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -167,6 +167,93 @@ out: return len; } +void xdp_return_frag(netmem_ref netmem, const struct xdp_buff *xdp); + +/** + * __xdp_buff_add_frag - attach frag to &xdp_buff + * @xdp: XDP buffer to attach the frag to + * @netmem: network memory containing the frag + * @offset: offset at which the frag starts + * @size: size of the frag + * @truesize: total memory size occupied by the frag + * @try_coalesce: whether to try coalescing the frags (not valid for XSk) + * + * Attach frag to the XDP buffer. If it currently has no frags attached, + * initialize the related fields, otherwise check that the frag number + * didn't reach the limit of ``MAX_SKB_FRAGS``. If possible, try coalescing + * the frag with the previous one. + * The function doesn't check/update the pfmemalloc bit. Please use the + * non-underscored wrapper in drivers. + * + * Return: true on success, false if there's no space for the frag in + * the shared info struct. + */ +static inline bool __xdp_buff_add_frag(struct xdp_buff *xdp, netmem_ref netmem, + u32 offset, u32 size, u32 truesize, + bool try_coalesce) +{ + struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp); + skb_frag_t *prev; + u32 nr_frags; + + if (!xdp_buff_has_frags(xdp)) { + xdp_buff_set_frags_flag(xdp); + + nr_frags = 0; + sinfo->xdp_frags_size = 0; + sinfo->xdp_frags_truesize = 0; + + goto fill; + } + + nr_frags = sinfo->nr_frags; + prev = &sinfo->frags[nr_frags - 1]; + + if (try_coalesce && netmem == skb_frag_netmem(prev) && + offset == skb_frag_off(prev) + skb_frag_size(prev)) { + skb_frag_size_add(prev, size); + /* Guaranteed to only decrement the refcount */ + xdp_return_frag(netmem, xdp); + } else if (unlikely(nr_frags == MAX_SKB_FRAGS)) { + return false; + } else { +fill: + __skb_fill_netmem_desc_noacc(sinfo, nr_frags++, netmem, + offset, size); + } + + sinfo->nr_frags = nr_frags; + sinfo->xdp_frags_size += size; + sinfo->xdp_frags_truesize += truesize; + + return true; +} + +/** + * xdp_buff_add_frag - attach frag to &xdp_buff + * @xdp: XDP buffer to attach the frag to + * @netmem: network memory containing the frag + * @offset: offset at which the frag starts + * @size: size of the frag + * @truesize: total memory size occupied by the frag + * + * Version of __xdp_buff_add_frag() which takes care of the pfmemalloc bit. + * + * Return: true on success, false if there's no space for the frag in + * the shared info struct. + */ +static inline bool xdp_buff_add_frag(struct xdp_buff *xdp, netmem_ref netmem, + u32 offset, u32 size, u32 truesize) +{ + if (!__xdp_buff_add_frag(xdp, netmem, offset, size, truesize, true)) + return false; + + if (unlikely(netmem_is_pfmemalloc(netmem))) + xdp_buff_set_frag_pfmemalloc(xdp); + + return true; +} + struct xdp_frame { void *data; u32 len; @@ -230,7 +317,14 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, unsigned int size, unsigned int truesize, bool pfmemalloc) { - skb_shinfo(skb)->nr_frags = nr_frags; + struct skb_shared_info *sinfo = skb_shinfo(skb); + + sinfo->nr_frags = nr_frags; + /* + * ``destructor_arg`` is unionized with ``xdp_frags_{,true}size``, + * reset it after that these fields aren't used anymore. + */ + sinfo->destructor_arg = NULL; skb->len += size; skb->data_len += size; diff --git a/net/core/xdp.c b/net/core/xdp.c index f1165a35411b..a66a4e036f53 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -535,6 +535,17 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf, } EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); +/** + * xdp_return_frag -- free one XDP frag or decrement its refcount + * @netmem: network memory reference to release + * @xdp: &xdp_buff to release the frag for + */ +void xdp_return_frag(netmem_ref netmem, const struct xdp_buff *xdp) +{ + __xdp_return(netmem, xdp->rxq->mem.type, true, NULL); +} +EXPORT_SYMBOL_GPL(xdp_return_frag); + void xdp_return_buff(struct xdp_buff *xdp) { struct skb_shared_info *sinfo; From 539c1fba1ac77184215d892eda0857f5687b7366 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 18 Dec 2024 18:44:31 +0100 Subject: [PATCH 0555/1386] xdp: add generic xdp_build_skb_from_buff() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code which builds an skb from an &xdp_buff keeps multiplying itself around the drivers with almost no changes. Let's try to stop that by adding a generic function. Unlike __xdp_build_skb_from_frame(), always allocate an skbuff head using napi_build_skb() and make use of the available xdp_rxq pointer to assign the Rx queue index. In case of PP-backed buffer, mark the skb to be recycled, as every PP user's been switched to recycle skbs. Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241218174435.1445282-4-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/xdp.h | 1 + net/core/xdp.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/include/net/xdp.h b/include/net/xdp.h index 11139c210b49..aa24fa78cbe6 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -336,6 +336,7 @@ xdp_update_skb_shared_info(struct sk_buff *skb, u8 nr_frags, void xdp_warn(const char *msg, const char *func, const int line); #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__) +struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp); struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp); struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, struct sk_buff *skb, diff --git a/net/core/xdp.c b/net/core/xdp.c index a66a4e036f53..704203a15a18 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -629,6 +629,61 @@ int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp) } EXPORT_SYMBOL_GPL(xdp_alloc_skb_bulk); +/** + * xdp_build_skb_from_buff - create an skb from &xdp_buff + * @xdp: &xdp_buff to convert to an skb + * + * Perform common operations to create a new skb to pass up the stack from + * &xdp_buff: allocate an skb head from the NAPI percpu cache, initialize + * skb data pointers and offsets, set the recycle bit if the buff is + * PP-backed, Rx queue index, protocol and update frags info. + * + * Return: new &sk_buff on success, %NULL on error. + */ +struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp) +{ + const struct xdp_rxq_info *rxq = xdp->rxq; + const struct skb_shared_info *sinfo; + struct sk_buff *skb; + u32 nr_frags = 0; + int metalen; + + if (unlikely(xdp_buff_has_frags(xdp))) { + sinfo = xdp_get_shared_info_from_buff(xdp); + nr_frags = sinfo->nr_frags; + } + + skb = napi_build_skb(xdp->data_hard_start, xdp->frame_sz); + if (unlikely(!skb)) + return NULL; + + skb_reserve(skb, xdp->data - xdp->data_hard_start); + __skb_put(skb, xdp->data_end - xdp->data); + + metalen = xdp->data - xdp->data_meta; + if (metalen > 0) + skb_metadata_set(skb, metalen); + + if (rxq->mem.type == MEM_TYPE_PAGE_POOL) + skb_mark_for_recycle(skb); + + skb_record_rx_queue(skb, rxq->queue_index); + + if (unlikely(nr_frags)) { + u32 tsize; + + tsize = sinfo->xdp_frags_truesize ? : nr_frags * xdp->frame_sz; + xdp_update_skb_shared_info(skb, nr_frags, + sinfo->xdp_frags_size, tsize, + xdp_buff_is_frag_pfmemalloc(xdp)); + } + + skb->protocol = eth_type_trans(skb, rxq->dev); + + return skb; +} +EXPORT_SYMBOL_GPL(xdp_build_skb_from_buff); + struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, struct sk_buff *skb, struct net_device *dev) From 51205f841a495c78aa59d0e41683463dac23eb27 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 18 Dec 2024 18:44:32 +0100 Subject: [PATCH 0556/1386] xsk: make xsk_buff_add_frag() really add the frag via __xdp_buff_add_frag() Currently, xsk_buff_add_frag() only adds the frag to pool's linked list, not doing anything with the &xdp_buff. The drivers do that manually and the logic is the same. Make it really add an skb frag, just like xdp_buff_add_frag() does that, and freeing frags on error if needed. This allows to remove repeating code from i40e and ice and not add the same code again and again. Acked-by: Maciej Fijalkowski Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241218174435.1445282-5-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 30 ++------------------ drivers/net/ethernet/intel/ice/ice_xsk.c | 32 ++-------------------- include/net/xdp_sock_drv.h | 18 ++++++++++-- 3 files changed, 20 insertions(+), 60 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 4e885df789ef..e28f1905a4a0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -395,32 +395,6 @@ static void i40e_handle_xdp_result_zc(struct i40e_ring *rx_ring, WARN_ON_ONCE(1); } -static int -i40e_add_xsk_frag(struct i40e_ring *rx_ring, struct xdp_buff *first, - struct xdp_buff *xdp, const unsigned int size) -{ - struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(first); - - if (!xdp_buff_has_frags(first)) { - sinfo->nr_frags = 0; - sinfo->xdp_frags_size = 0; - xdp_buff_set_frags_flag(first); - } - - if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { - xsk_buff_free(first); - return -ENOMEM; - } - - __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, - virt_to_page(xdp->data_hard_start), - XDP_PACKET_HEADROOM, size); - sinfo->xdp_frags_size += size; - xsk_buff_add_frag(xdp); - - return 0; -} - /** * i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring * @rx_ring: Rx ring @@ -486,8 +460,10 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) if (!first) first = bi; - else if (i40e_add_xsk_frag(rx_ring, first, bi, size)) + else if (!xsk_buff_add_frag(first, bi)) { + xsk_buff_free(first); break; + } if (++next_to_process == count) next_to_process = 0; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 334ae945d640..8975d2971bc3 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -801,35 +801,6 @@ out_failure: return result; } -static int -ice_add_xsk_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *first, - struct xdp_buff *xdp, const unsigned int size) -{ - struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(first); - - if (!size) - return 0; - - if (!xdp_buff_has_frags(first)) { - sinfo->nr_frags = 0; - sinfo->xdp_frags_size = 0; - xdp_buff_set_frags_flag(first); - } - - if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { - xsk_buff_free(first); - return -ENOMEM; - } - - __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, - virt_to_page(xdp->data_hard_start), - XDP_PACKET_HEADROOM, size); - sinfo->xdp_frags_size += size; - xsk_buff_add_frag(xdp); - - return 0; -} - /** * ice_clean_rx_irq_zc - consumes packets from the hardware ring * @rx_ring: AF_XDP Rx ring @@ -895,7 +866,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, if (!first) { first = xdp; - } else if (ice_add_xsk_frag(rx_ring, first, xdp, size)) { + } else if (likely(size) && !xsk_buff_add_frag(first, xdp)) { + xsk_buff_free(first); break; } diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index f3175a5d28f7..86620c818965 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -136,11 +136,21 @@ out: xp_free(xskb); } -static inline void xsk_buff_add_frag(struct xdp_buff *xdp) +static inline bool xsk_buff_add_frag(struct xdp_buff *head, + struct xdp_buff *xdp) { - struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp); + const void *data = xdp->data; + struct xdp_buff_xsk *frag; + if (!__xdp_buff_add_frag(head, virt_to_netmem(data), + offset_in_page(data), xdp->data_end - data, + xdp->frame_sz, false)) + return false; + + frag = container_of(xdp, struct xdp_buff_xsk, xdp); list_add_tail(&frag->list_node, &frag->pool->xskb_list); + + return true; } static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first) @@ -357,8 +367,10 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) { } -static inline void xsk_buff_add_frag(struct xdp_buff *xdp) +static inline bool xsk_buff_add_frag(struct xdp_buff *head, + struct xdp_buff *xdp) { + return false; } static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first) From 560d958c6c68fa62ddb4bd6f890c363598d184b0 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 18 Dec 2024 18:44:33 +0100 Subject: [PATCH 0557/1386] xsk: add generic XSk &xdp_buff -> skb conversion Same as with converting &xdp_buff to skb on Rx, the code which allocates a new skb and copies the XSk frame there is identical across the drivers, so make it generic. This includes copying all the frags if they are present in the original buff. System percpu page_pools greatly improve XDP_PASS performance on XSk: instead of page_alloc() + page_free(), the net core recycles the same pages, so the only overhead left is memcpy()s. When the Page Pool is not compiled in, the whole function is a return-NULL (but it always gets selected when eBPF is enabled). Note that the passed buff gets freed if the conversion is done w/o any error, assuming you don't need this buffer after you convert it to an skb. Reviewed-by: Maciej Fijalkowski Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241218174435.1445282-6-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/xdp.h | 1 + net/core/xdp.c | 112 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) diff --git a/include/net/xdp.h b/include/net/xdp.h index aa24fa78cbe6..6da0e746cf75 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -337,6 +337,7 @@ void xdp_warn(const char *msg, const char *func, const int line); #define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__) struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp); +struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp); struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp); struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, struct sk_buff *skb, diff --git a/net/core/xdp.c b/net/core/xdp.c index 704203a15a18..67b53fc7191e 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -684,6 +684,118 @@ struct sk_buff *xdp_build_skb_from_buff(const struct xdp_buff *xdp) } EXPORT_SYMBOL_GPL(xdp_build_skb_from_buff); +/** + * xdp_copy_frags_from_zc - copy frags from XSk buff to skb + * @skb: skb to copy frags to + * @xdp: XSk &xdp_buff from which the frags will be copied + * @pp: &page_pool backing page allocation, if available + * + * Copy all frags from XSk &xdp_buff to the skb to pass it up the stack. + * Allocate a new buffer for each frag, copy it and attach to the skb. + * + * Return: true on success, false on netmem allocation fail. + */ +static noinline bool xdp_copy_frags_from_zc(struct sk_buff *skb, + const struct xdp_buff *xdp, + struct page_pool *pp) +{ + struct skb_shared_info *sinfo = skb_shinfo(skb); + const struct skb_shared_info *xinfo; + u32 nr_frags, tsize = 0; + bool pfmemalloc = false; + + xinfo = xdp_get_shared_info_from_buff(xdp); + nr_frags = xinfo->nr_frags; + + for (u32 i = 0; i < nr_frags; i++) { + u32 len = skb_frag_size(&xinfo->frags[i]); + u32 offset, truesize = len; + netmem_ref netmem; + + netmem = page_pool_dev_alloc_netmem(pp, &offset, &truesize); + if (unlikely(!netmem)) { + sinfo->nr_frags = i; + return false; + } + + memcpy(__netmem_address(netmem), + __netmem_address(xinfo->frags[i].netmem), + LARGEST_ALIGN(len)); + __skb_fill_netmem_desc_noacc(sinfo, i, netmem, offset, len); + + tsize += truesize; + pfmemalloc |= netmem_is_pfmemalloc(netmem); + } + + xdp_update_skb_shared_info(skb, nr_frags, xinfo->xdp_frags_size, + tsize, pfmemalloc); + + return true; +} + +/** + * xdp_build_skb_from_zc - create an skb from XSk &xdp_buff + * @xdp: source XSk buff + * + * Similar to xdp_build_skb_from_buff(), but for XSk frames. Allocate an skb + * head, new buffer for the head, copy the data and initialize the skb fields. + * If there are frags, allocate new buffers for them and copy. + * Buffers are allocated from the system percpu pools to try recycling them. + * If new skb was built successfully, @xdp is returned to XSk pool's freelist. + * On error, it remains untouched and the caller must take care of this. + * + * Return: new &sk_buff on success, %NULL on error. + */ +struct sk_buff *xdp_build_skb_from_zc(struct xdp_buff *xdp) +{ + struct page_pool *pp = this_cpu_read(system_page_pool); + const struct xdp_rxq_info *rxq = xdp->rxq; + u32 len = xdp->data_end - xdp->data_meta; + u32 truesize = xdp->frame_sz; + struct sk_buff *skb; + int metalen; + void *data; + + if (!IS_ENABLED(CONFIG_PAGE_POOL)) + return NULL; + + data = page_pool_dev_alloc_va(pp, &truesize); + if (unlikely(!data)) + return NULL; + + skb = napi_build_skb(data, truesize); + if (unlikely(!skb)) { + page_pool_free_va(pp, data, true); + return NULL; + } + + skb_mark_for_recycle(skb); + skb_reserve(skb, xdp->data_meta - xdp->data_hard_start); + + memcpy(__skb_put(skb, len), xdp->data_meta, LARGEST_ALIGN(len)); + + metalen = xdp->data - xdp->data_meta; + if (metalen > 0) { + skb_metadata_set(skb, metalen); + __skb_pull(skb, metalen); + } + + skb_record_rx_queue(skb, rxq->queue_index); + + if (unlikely(xdp_buff_has_frags(xdp)) && + unlikely(!xdp_copy_frags_from_zc(skb, xdp, pp))) { + napi_consume_skb(skb, true); + return NULL; + } + + xsk_buff_free(xdp); + + skb->protocol = eth_type_trans(skb, rxq->dev); + + return skb; +} +EXPORT_SYMBOL_GPL(xdp_build_skb_from_zc); + struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, struct sk_buff *skb, struct net_device *dev) From 46761fd52a8868a1420f75b675caf209793b8dd1 Mon Sep 17 00:00:00 2001 From: Piotr Kwapulinski Date: Thu, 5 Dec 2024 09:44:43 +0100 Subject: [PATCH 0558/1386] ixgbe: Add support for E610 FW Admin Command Interface Add low level support for Admin Command Interface (ACI). ACI is the Firmware interface used by a driver to communicate with E610 adapter. Add the following ACI features: - data structures, macros, register definitions - commands handling - events handling Co-developed-by: Stefan Wegrzyn Signed-off-by: Stefan Wegrzyn Co-developed-by: Jedrzej Jagielski Signed-off-by: Jedrzej Jagielski Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Bharath R Signed-off-by: Piotr Kwapulinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/Makefile | 4 +- .../net/ethernet/intel/ixgbe/ixgbe_common.c | 6 +- drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c | 497 ++++++++ drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h | 19 + drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 72 +- .../ethernet/intel/ixgbe/ixgbe_type_e610.h | 1066 +++++++++++++++++ 6 files changed, 1657 insertions(+), 7 deletions(-) create mode 100644 drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c create mode 100644 drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h create mode 100644 drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile index 965e5ce1b326..b456d102655a 100644 --- a/drivers/net/ethernet/intel/ixgbe/Makefile +++ b/drivers/net/ethernet/intel/ixgbe/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -# Copyright(c) 1999 - 2018 Intel Corporation. +# Copyright(c) 1999 - 2024 Intel Corporation. # # Makefile for the Intel(R) 10GbE PCI Express ethernet driver # @@ -9,7 +9,7 @@ obj-$(CONFIG_IXGBE) += ixgbe.o ixgbe-y := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \ ixgbe_82599.o ixgbe_82598.o ixgbe_phy.o ixgbe_sriov.o \ ixgbe_mbx.o ixgbe_x540.o ixgbe_x550.o ixgbe_lib.o ixgbe_ptp.o \ - ixgbe_xsk.o + ixgbe_xsk.o ixgbe_e610.o ixgbe-$(CONFIG_IXGBE_DCB) += ixgbe_dcb.o ixgbe_dcb_82598.o \ ixgbe_dcb_82599.o ixgbe_dcb_nl.o diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 3be1bfb16498..bfab2c0ee0aa 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -660,7 +660,11 @@ int ixgbe_get_bus_info_generic(struct ixgbe_hw *hw) hw->bus.type = ixgbe_bus_type_pci_express; /* Get the negotiated link width and speed from PCI config space */ - link_status = ixgbe_read_pci_cfg_word(hw, IXGBE_PCI_LINK_STATUS); + if (hw->mac.type == ixgbe_mac_e610) + link_status = ixgbe_read_pci_cfg_word(hw, IXGBE_PCI_LINK_STATUS_E610); + else + link_status = ixgbe_read_pci_cfg_word(hw, + IXGBE_PCI_LINK_STATUS); hw->bus.width = ixgbe_convert_bus_width(link_status); hw->bus.speed = ixgbe_convert_bus_speed(link_status); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c new file mode 100644 index 000000000000..6a26f6b4d3d5 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c @@ -0,0 +1,497 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2024 Intel Corporation. */ + +#include "ixgbe_common.h" +#include "ixgbe_e610.h" +#include "ixgbe_type.h" +#include "ixgbe_x540.h" +#include "ixgbe_phy.h" + +/** + * ixgbe_should_retry_aci_send_cmd_execute - decide if ACI command should + * be resent + * @opcode: ACI opcode + * + * Check if ACI command should be sent again depending on the provided opcode. + * It may happen when CSR is busy during link state changes. + * + * Return: true if the sending command routine should be repeated, + * otherwise false. + */ +static bool ixgbe_should_retry_aci_send_cmd_execute(u16 opcode) +{ + switch (opcode) { + case ixgbe_aci_opc_disable_rxen: + case ixgbe_aci_opc_get_phy_caps: + case ixgbe_aci_opc_get_link_status: + case ixgbe_aci_opc_get_link_topo: + return true; + } + + return false; +} + +/** + * ixgbe_aci_send_cmd_execute - execute sending FW Admin Command to FW Admin + * Command Interface + * @hw: pointer to the HW struct + * @desc: descriptor describing the command + * @buf: buffer to use for indirect commands (NULL for direct commands) + * @buf_size: size of buffer for indirect commands (0 for direct commands) + * + * Admin Command is sent using CSR by setting descriptor and buffer in specific + * registers. + * + * Return: the exit code of the operation. + * * - 0 - success. + * * - -EIO - CSR mechanism is not enabled. + * * - -EBUSY - CSR mechanism is busy. + * * - -EINVAL - buf_size is too big or + * invalid argument buf or buf_size. + * * - -ETIME - Admin Command X command timeout. + * * - -EIO - Admin Command X invalid state of HICR register or + * Admin Command failed because of bad opcode was returned or + * Admin Command failed with error Y. + */ +static int ixgbe_aci_send_cmd_execute(struct ixgbe_hw *hw, + struct ixgbe_aci_desc *desc, + void *buf, u16 buf_size) +{ + u16 opcode, buf_tail_size = buf_size % 4; + u32 *raw_desc = (u32 *)desc; + u32 hicr, i, buf_tail = 0; + bool valid_buf = false; + + hw->aci.last_status = IXGBE_ACI_RC_OK; + + /* It's necessary to check if mechanism is enabled */ + hicr = IXGBE_READ_REG(hw, IXGBE_PF_HICR); + + if (!(hicr & IXGBE_PF_HICR_EN)) + return -EIO; + + if (hicr & IXGBE_PF_HICR_C) { + hw->aci.last_status = IXGBE_ACI_RC_EBUSY; + return -EBUSY; + } + + opcode = le16_to_cpu(desc->opcode); + + if (buf_size > IXGBE_ACI_MAX_BUFFER_SIZE) + return -EINVAL; + + if (buf) + desc->flags |= cpu_to_le16(IXGBE_ACI_FLAG_BUF); + + if (desc->flags & cpu_to_le16(IXGBE_ACI_FLAG_BUF)) { + if ((buf && !buf_size) || + (!buf && buf_size)) + return -EINVAL; + if (buf && buf_size) + valid_buf = true; + } + + if (valid_buf) { + if (buf_tail_size) + memcpy(&buf_tail, buf + buf_size - buf_tail_size, + buf_tail_size); + + if (((buf_size + 3) & ~0x3) > IXGBE_ACI_LG_BUF) + desc->flags |= cpu_to_le16(IXGBE_ACI_FLAG_LB); + + desc->datalen = cpu_to_le16(buf_size); + + if (desc->flags & cpu_to_le16(IXGBE_ACI_FLAG_RD)) { + for (i = 0; i < buf_size / 4; i++) + IXGBE_WRITE_REG(hw, IXGBE_PF_HIBA(i), ((u32 *)buf)[i]); + if (buf_tail_size) + IXGBE_WRITE_REG(hw, IXGBE_PF_HIBA(i), buf_tail); + } + } + + /* Descriptor is written to specific registers */ + for (i = 0; i < IXGBE_ACI_DESC_SIZE_IN_DWORDS; i++) + IXGBE_WRITE_REG(hw, IXGBE_PF_HIDA(i), raw_desc[i]); + + /* SW has to set PF_HICR.C bit and clear PF_HICR.SV and + * PF_HICR_EV + */ + hicr = (IXGBE_READ_REG(hw, IXGBE_PF_HICR) | IXGBE_PF_HICR_C) & + ~(IXGBE_PF_HICR_SV | IXGBE_PF_HICR_EV); + IXGBE_WRITE_REG(hw, IXGBE_PF_HICR, hicr); + +#define MAX_SLEEP_RESP_US 1000 +#define MAX_TMOUT_RESP_SYNC_US 100000000 + + /* Wait for sync Admin Command response */ + read_poll_timeout(IXGBE_READ_REG, hicr, + (hicr & IXGBE_PF_HICR_SV) || + !(hicr & IXGBE_PF_HICR_C), + MAX_SLEEP_RESP_US, MAX_TMOUT_RESP_SYNC_US, true, hw, + IXGBE_PF_HICR); + +#define MAX_TMOUT_RESP_ASYNC_US 150000000 + + /* Wait for async Admin Command response */ + read_poll_timeout(IXGBE_READ_REG, hicr, + (hicr & IXGBE_PF_HICR_EV) || + !(hicr & IXGBE_PF_HICR_C), + MAX_SLEEP_RESP_US, MAX_TMOUT_RESP_ASYNC_US, true, hw, + IXGBE_PF_HICR); + + /* Read sync Admin Command response */ + if ((hicr & IXGBE_PF_HICR_SV)) { + for (i = 0; i < IXGBE_ACI_DESC_SIZE_IN_DWORDS; i++) { + raw_desc[i] = IXGBE_READ_REG(hw, IXGBE_PF_HIDA(i)); + raw_desc[i] = raw_desc[i]; + } + } + + /* Read async Admin Command response */ + if ((hicr & IXGBE_PF_HICR_EV) && !(hicr & IXGBE_PF_HICR_C)) { + for (i = 0; i < IXGBE_ACI_DESC_SIZE_IN_DWORDS; i++) { + raw_desc[i] = IXGBE_READ_REG(hw, IXGBE_PF_HIDA_2(i)); + raw_desc[i] = raw_desc[i]; + } + } + + /* Handle timeout and invalid state of HICR register */ + if (hicr & IXGBE_PF_HICR_C) + return -ETIME; + + if (!(hicr & IXGBE_PF_HICR_SV) && !(hicr & IXGBE_PF_HICR_EV)) + return -EIO; + + /* For every command other than 0x0014 treat opcode mismatch + * as an error. Response to 0x0014 command read from HIDA_2 + * is a descriptor of an event which is expected to contain + * different opcode than the command. + */ + if (desc->opcode != cpu_to_le16(opcode) && + opcode != ixgbe_aci_opc_get_fw_event) + return -EIO; + + if (desc->retval) { + hw->aci.last_status = (enum ixgbe_aci_err) + le16_to_cpu(desc->retval); + return -EIO; + } + + /* Write a response values to a buf */ + if (valid_buf) { + for (i = 0; i < buf_size / 4; i++) + ((u32 *)buf)[i] = IXGBE_READ_REG(hw, IXGBE_PF_HIBA(i)); + if (buf_tail_size) { + buf_tail = IXGBE_READ_REG(hw, IXGBE_PF_HIBA(i)); + memcpy(buf + buf_size - buf_tail_size, &buf_tail, + buf_tail_size); + } + } + + return 0; +} + +/** + * ixgbe_aci_send_cmd - send FW Admin Command to FW Admin Command Interface + * @hw: pointer to the HW struct + * @desc: descriptor describing the command + * @buf: buffer to use for indirect commands (NULL for direct commands) + * @buf_size: size of buffer for indirect commands (0 for direct commands) + * + * Helper function to send FW Admin Commands to the FW Admin Command Interface. + * + * Retry sending the FW Admin Command multiple times to the FW ACI + * if the EBUSY Admin Command error is returned. + * + * Return: the exit code of the operation. + */ +int ixgbe_aci_send_cmd(struct ixgbe_hw *hw, struct ixgbe_aci_desc *desc, + void *buf, u16 buf_size) +{ + u16 opcode = le16_to_cpu(desc->opcode); + struct ixgbe_aci_desc desc_cpy; + enum ixgbe_aci_err last_status; + u8 idx = 0, *buf_cpy = NULL; + bool is_cmd_for_retry; + unsigned long timeout; + int err; + + is_cmd_for_retry = ixgbe_should_retry_aci_send_cmd_execute(opcode); + if (is_cmd_for_retry) { + if (buf) { + buf_cpy = kmalloc(buf_size, GFP_KERNEL); + if (!buf_cpy) + return -ENOMEM; + *buf_cpy = *(u8 *)buf; + } + desc_cpy = *desc; + } + + timeout = jiffies + msecs_to_jiffies(IXGBE_ACI_SEND_TIMEOUT_MS); + do { + mutex_lock(&hw->aci.lock); + err = ixgbe_aci_send_cmd_execute(hw, desc, buf, buf_size); + last_status = hw->aci.last_status; + mutex_unlock(&hw->aci.lock); + + if (!is_cmd_for_retry || !err || + last_status != IXGBE_ACI_RC_EBUSY) + break; + + if (buf) + memcpy(buf, buf_cpy, buf_size); + *desc = desc_cpy; + + msleep(IXGBE_ACI_SEND_DELAY_TIME_MS); + } while (++idx < IXGBE_ACI_SEND_MAX_EXECUTE && + time_before(jiffies, timeout)); + + kfree(buf_cpy); + + return err; +} + +/** + * ixgbe_aci_check_event_pending - check if there are any pending events + * @hw: pointer to the HW struct + * + * Determine if there are any pending events. + * + * Return: true if there are any currently pending events + * otherwise false. + */ +bool ixgbe_aci_check_event_pending(struct ixgbe_hw *hw) +{ + u32 ep_bit_mask = hw->bus.func ? GL_FWSTS_EP_PF1 : GL_FWSTS_EP_PF0; + u32 fwsts = IXGBE_READ_REG(hw, GL_FWSTS); + + return (fwsts & ep_bit_mask) ? true : false; +} + +/** + * ixgbe_aci_get_event - get an event from ACI + * @hw: pointer to the HW struct + * @e: event information structure + * @pending: optional flag signaling that there are more pending events + * + * Obtain an event from ACI and return its content + * through 'e' using ACI command (0x0014). + * Provide information if there are more events + * to retrieve through 'pending'. + * + * Return: the exit code of the operation. + */ +int ixgbe_aci_get_event(struct ixgbe_hw *hw, struct ixgbe_aci_event *e, + bool *pending) +{ + struct ixgbe_aci_desc desc; + int err; + + if (!e || (!e->msg_buf && e->buf_len)) + return -EINVAL; + + mutex_lock(&hw->aci.lock); + + /* Check if there are any events pending */ + if (!ixgbe_aci_check_event_pending(hw)) { + err = -ENOENT; + goto aci_get_event_exit; + } + + /* Obtain pending event */ + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_fw_event); + err = ixgbe_aci_send_cmd_execute(hw, &desc, e->msg_buf, e->buf_len); + if (err) + goto aci_get_event_exit; + + /* Returned 0x0014 opcode indicates that no event was obtained */ + if (desc.opcode == cpu_to_le16(ixgbe_aci_opc_get_fw_event)) { + err = -ENOENT; + goto aci_get_event_exit; + } + + /* Determine size of event data */ + e->msg_len = min_t(u16, le16_to_cpu(desc.datalen), e->buf_len); + /* Write event descriptor to event info structure */ + memcpy(&e->desc, &desc, sizeof(e->desc)); + + /* Check if there are any further events pending */ + if (pending) + *pending = ixgbe_aci_check_event_pending(hw); + +aci_get_event_exit: + mutex_unlock(&hw->aci.lock); + + return err; +} + +/** + * ixgbe_fill_dflt_direct_cmd_desc - fill ACI descriptor with default values. + * @desc: pointer to the temp descriptor (non DMA mem) + * @opcode: the opcode can be used to decide which flags to turn off or on + * + * Helper function to fill the descriptor desc with default values + * and the provided opcode. + */ +void ixgbe_fill_dflt_direct_cmd_desc(struct ixgbe_aci_desc *desc, u16 opcode) +{ + /* Zero out the desc. */ + memset(desc, 0, sizeof(*desc)); + desc->opcode = cpu_to_le16(opcode); + desc->flags = cpu_to_le16(IXGBE_ACI_FLAG_SI); +} + +/** + * ixgbe_aci_req_res - request a common resource + * @hw: pointer to the HW struct + * @res: resource ID + * @access: access type + * @sdp_number: resource number + * @timeout: the maximum time in ms that the driver may hold the resource + * + * Requests a common resource using the ACI command (0x0008). + * Specifies the maximum time the driver may hold the resource. + * If the requested resource is currently occupied by some other driver, + * a busy return value is returned and the timeout field value indicates the + * maximum time the current owner has to free it. + * + * Return: the exit code of the operation. + */ +static int ixgbe_aci_req_res(struct ixgbe_hw *hw, enum ixgbe_aci_res_ids res, + enum ixgbe_aci_res_access_type access, + u8 sdp_number, u32 *timeout) +{ + struct ixgbe_aci_cmd_req_res *cmd_resp; + struct ixgbe_aci_desc desc; + int err; + + cmd_resp = &desc.params.res_owner; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_req_res); + + cmd_resp->res_id = cpu_to_le16(res); + cmd_resp->access_type = cpu_to_le16(access); + cmd_resp->res_number = cpu_to_le32(sdp_number); + cmd_resp->timeout = cpu_to_le32(*timeout); + *timeout = 0; + + err = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + + /* If the resource is held by some other driver, the command completes + * with a busy return value and the timeout field indicates the maximum + * time the current owner of the resource has to free it. + */ + if (!err || hw->aci.last_status == IXGBE_ACI_RC_EBUSY) + *timeout = le32_to_cpu(cmd_resp->timeout); + + return err; +} + +/** + * ixgbe_aci_release_res - release a common resource using ACI + * @hw: pointer to the HW struct + * @res: resource ID + * @sdp_number: resource number + * + * Release a common resource using ACI command (0x0009). + * + * Return: the exit code of the operation. + */ +static int ixgbe_aci_release_res(struct ixgbe_hw *hw, + enum ixgbe_aci_res_ids res, u8 sdp_number) +{ + struct ixgbe_aci_cmd_req_res *cmd; + struct ixgbe_aci_desc desc; + + cmd = &desc.params.res_owner; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_release_res); + + cmd->res_id = cpu_to_le16(res); + cmd->res_number = cpu_to_le32(sdp_number); + + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** + * ixgbe_acquire_res - acquire the ownership of a resource + * @hw: pointer to the HW structure + * @res: resource ID + * @access: access type (read or write) + * @timeout: timeout in milliseconds + * + * Make an attempt to acquire the ownership of a resource using + * the ixgbe_aci_req_res to utilize ACI. + * In case if some other driver has previously acquired the resource and + * performed any necessary updates, the -EALREADY is returned, + * and the caller does not obtain the resource and has no further work to do. + * If needed, the function will poll until the current lock owner timeouts. + * + * Return: the exit code of the operation. + */ +int ixgbe_acquire_res(struct ixgbe_hw *hw, enum ixgbe_aci_res_ids res, + enum ixgbe_aci_res_access_type access, u32 timeout) +{ +#define IXGBE_RES_POLLING_DELAY_MS 10 + u32 delay = IXGBE_RES_POLLING_DELAY_MS; + u32 res_timeout = timeout; + u32 retry_timeout; + int err; + + err = ixgbe_aci_req_res(hw, res, access, 0, &res_timeout); + + /* A return code of -EALREADY means that another driver has + * previously acquired the resource and performed any necessary updates; + * in this case the caller does not obtain the resource and has no + * further work to do. + */ + if (err == -EALREADY) + return err; + + /* If necessary, poll until the current lock owner timeouts. + * Set retry_timeout to the timeout value reported by the FW in the + * response to the "Request Resource Ownership" (0x0008) Admin Command + * as it indicates the maximum time the current owner of the resource + * is allowed to hold it. + */ + retry_timeout = res_timeout; + while (err && retry_timeout && res_timeout) { + msleep(delay); + retry_timeout = (retry_timeout > delay) ? + retry_timeout - delay : 0; + err = ixgbe_aci_req_res(hw, res, access, 0, &res_timeout); + + /* Success - lock acquired. + * -EALREADY - lock free, no work to do. + */ + if (!err || err == -EALREADY) + break; + } + + return err; +} + +/** + * ixgbe_release_res - release a common resource + * @hw: pointer to the HW structure + * @res: resource ID + * + * Release a common resource using ixgbe_aci_release_res. + */ +void ixgbe_release_res(struct ixgbe_hw *hw, enum ixgbe_aci_res_ids res) +{ + u32 total_delay = 0; + int err; + + err = ixgbe_aci_release_res(hw, res, 0); + + /* There are some rare cases when trying to release the resource + * results in an admin command timeout, so handle them correctly. + */ + while (err == -ETIME && + total_delay < IXGBE_ACI_RELEASE_RES_TIMEOUT) { + usleep_range(1000, 1500); + err = ixgbe_aci_release_res(hw, res, 0); + total_delay++; + } +} diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h new file mode 100644 index 000000000000..18b831b6797d --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2024 Intel Corporation. */ + +#ifndef _IXGBE_E610_H_ +#define _IXGBE_E610_H_ + +#include "ixgbe_type.h" + +int ixgbe_aci_send_cmd(struct ixgbe_hw *hw, struct ixgbe_aci_desc *desc, + void *buf, u16 buf_size); +bool ixgbe_aci_check_event_pending(struct ixgbe_hw *hw); +int ixgbe_aci_get_event(struct ixgbe_hw *hw, struct ixgbe_aci_event *e, + bool *pending); +void ixgbe_fill_dflt_direct_cmd_desc(struct ixgbe_aci_desc *desc, u16 opcode); +int ixgbe_acquire_res(struct ixgbe_hw *hw, enum ixgbe_aci_res_ids res, + enum ixgbe_aci_res_access_type access, u32 timeout); +void ixgbe_release_res(struct ixgbe_hw *hw, enum ixgbe_aci_res_ids res); + +#endif /* _IXGBE_E610_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 9baccacd02a1..5fdf32d79d82 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ #ifndef _IXGBE_TYPE_H_ #define _IXGBE_TYPE_H_ @@ -7,6 +7,7 @@ #include #include #include +#include "ixgbe_type_e610.h" /* Device IDs */ #define IXGBE_DEV_ID_82598 0x10B6 @@ -71,12 +72,19 @@ #define IXGBE_DEV_ID_X550EM_A_1G_T 0x15E4 #define IXGBE_DEV_ID_X550EM_A_1G_T_L 0x15E5 +#define IXGBE_DEV_ID_E610_BACKPLANE 0x57AE +#define IXGBE_DEV_ID_E610_SFP 0x57AF +#define IXGBE_DEV_ID_E610_10G_T 0x57B0 +#define IXGBE_DEV_ID_E610_2_5G_T 0x57B1 +#define IXGBE_DEV_ID_E610_SGMII 0x57B2 + /* VF Device IDs */ #define IXGBE_DEV_ID_82599_VF 0x10ED #define IXGBE_DEV_ID_X540_VF 0x1515 #define IXGBE_DEV_ID_X550_VF 0x1565 #define IXGBE_DEV_ID_X550EM_X_VF 0x15A8 #define IXGBE_DEV_ID_X550EM_A_VF 0x15C5 +#define IXGBE_DEV_ID_E610_VF 0x57AD #define IXGBE_CAT(r, m) IXGBE_##r##_##m @@ -1600,7 +1608,7 @@ enum { #define IXGBE_EICR_PCI 0x00040000 /* PCI Exception */ #define IXGBE_EICR_MAILBOX 0x00080000 /* VF to PF Mailbox Interrupt */ #define IXGBE_EICR_LSC 0x00100000 /* Link Status Change */ -#define IXGBE_EICR_LINKSEC 0x00200000 /* PN Threshold */ +#define IXGBE_EICR_FW_EVENT 0x00200000 /* Async FW event */ #define IXGBE_EICR_MNG 0x00400000 /* Manageability Event Interrupt */ #define IXGBE_EICR_TS 0x00800000 /* Thermal Sensor Event */ #define IXGBE_EICR_TIMESYNC 0x01000000 /* Timesync Event */ @@ -1636,6 +1644,7 @@ enum { #define IXGBE_EICS_PCI IXGBE_EICR_PCI /* PCI Exception */ #define IXGBE_EICS_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ #define IXGBE_EICS_LSC IXGBE_EICR_LSC /* Link Status Change */ +#define IXGBE_EICS_FW_EVENT IXGBE_EICR_FW_EVENT /* Async FW event */ #define IXGBE_EICS_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ #define IXGBE_EICS_TIMESYNC IXGBE_EICR_TIMESYNC /* Timesync Event */ #define IXGBE_EICS_GPI_SDP0(_hw) IXGBE_EICR_GPI_SDP0(_hw) @@ -1654,6 +1663,7 @@ enum { #define IXGBE_EIMS_PCI IXGBE_EICR_PCI /* PCI Exception */ #define IXGBE_EIMS_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ #define IXGBE_EIMS_LSC IXGBE_EICR_LSC /* Link Status Change */ +#define IXGBE_EIMS_FW_EVENT IXGBE_EICR_FW_EVENT /* Async FW event */ #define IXGBE_EIMS_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ #define IXGBE_EIMS_TS IXGBE_EICR_TS /* Thermel Sensor Event */ #define IXGBE_EIMS_TIMESYNC IXGBE_EICR_TIMESYNC /* Timesync Event */ @@ -1673,6 +1683,7 @@ enum { #define IXGBE_EIMC_PCI IXGBE_EICR_PCI /* PCI Exception */ #define IXGBE_EIMC_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ #define IXGBE_EIMC_LSC IXGBE_EICR_LSC /* Link Status Change */ +#define IXGBE_EIMC_FW_EVENT IXGBE_EICR_FW_EVENT /* Async FW event */ #define IXGBE_EIMC_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ #define IXGBE_EIMC_TIMESYNC IXGBE_EICR_TIMESYNC /* Timesync Event */ #define IXGBE_EIMC_GPI_SDP0(_hw) IXGBE_EICR_GPI_SDP0(_hw) @@ -2068,6 +2079,7 @@ enum { #define IXGBE_SAN_MAC_ADDR_PTR 0x28 #define IXGBE_DEVICE_CAPS 0x2C #define IXGBE_SERIAL_NUMBER_MAC_ADDR 0x11 +#define IXGBE_PCIE_MSIX_E610_CAPS 0xB2 #define IXGBE_PCIE_MSIX_82599_CAPS 0x72 #define IXGBE_MAX_MSIX_VECTORS_82599 0x40 #define IXGBE_PCIE_MSIX_82598_CAPS 0x62 @@ -2168,6 +2180,7 @@ enum { #define IXGBE_PCI_DEVICE_STATUS 0xAA #define IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING 0x0020 #define IXGBE_PCI_LINK_STATUS 0xB2 +#define IXGBE_PCI_LINK_STATUS_E610 0x82 #define IXGBE_PCI_DEVICE_CONTROL2 0xC8 #define IXGBE_PCI_LINK_WIDTH 0x3F0 #define IXGBE_PCI_LINK_WIDTH_1 0x10 @@ -2288,6 +2301,7 @@ enum { #define IXGBE_RXMTRL_V2_MGMT_MSG 0x0D00 #define IXGBE_FCTRL_SBP 0x00000002 /* Store Bad Packet */ +#define IXGBE_FCTRL_TPE 0x00000080 /* Tag Promiscuous Ena*/ #define IXGBE_FCTRL_MPE 0x00000100 /* Multicast Promiscuous Ena*/ #define IXGBE_FCTRL_UPE 0x00000200 /* Unicast Promiscuous Ena */ #define IXGBE_FCTRL_BAM 0x00000400 /* Broadcast Accept Mode */ @@ -2351,6 +2365,7 @@ enum { /* Multiple Transmit Queue Command Register */ #define IXGBE_MTQC_RT_ENA 0x1 /* DCB Enable */ #define IXGBE_MTQC_VT_ENA 0x2 /* VMDQ2 Enable */ +#define IXGBE_MTQC_NUM_TC_OR_Q 0xC /* Number of TCs or TxQs per pool */ #define IXGBE_MTQC_64Q_1PB 0x0 /* 64 queues 1 pack buffer */ #define IXGBE_MTQC_32VF 0x8 /* 4 TX Queues per pool w/32VF's */ #define IXGBE_MTQC_64VF 0x4 /* 2 TX Queues per pool w/64VF's */ @@ -2970,6 +2985,29 @@ typedef u32 ixgbe_link_speed; IXGBE_LINK_SPEED_1GB_FULL | \ IXGBE_LINK_SPEED_10GB_FULL) +/* Physical layer type */ +typedef u64 ixgbe_physical_layer; +#define IXGBE_PHYSICAL_LAYER_UNKNOWN 0 +#define IXGBE_PHYSICAL_LAYER_10GBASE_T 0x00001 +#define IXGBE_PHYSICAL_LAYER_1000BASE_T 0x00002 +#define IXGBE_PHYSICAL_LAYER_100BASE_TX 0x00004 +#define IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU 0x00008 +#define IXGBE_PHYSICAL_LAYER_10GBASE_LR 0x00010 +#define IXGBE_PHYSICAL_LAYER_10GBASE_LRM 0x00020 +#define IXGBE_PHYSICAL_LAYER_10GBASE_SR 0x00040 +#define IXGBE_PHYSICAL_LAYER_10GBASE_KX4 0x00080 +#define IXGBE_PHYSICAL_LAYER_10GBASE_CX4 0x00100 +#define IXGBE_PHYSICAL_LAYER_1000BASE_KX 0x00200 +#define IXGBE_PHYSICAL_LAYER_1000BASE_BX 0x00400 +#define IXGBE_PHYSICAL_LAYER_10GBASE_KR 0x00800 +#define IXGBE_PHYSICAL_LAYER_10GBASE_XAUI 0x01000 +#define IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA 0x02000 +#define IXGBE_PHYSICAL_LAYER_1000BASE_SX 0x04000 +#define IXGBE_PHYSICAL_LAYER_10BASE_T 0x08000 +#define IXGBE_PHYSICAL_LAYER_2500BASE_KX 0x10000 +#define IXGBE_PHYSICAL_LAYER_2500BASE_T 0x20000 +#define IXGBE_PHYSICAL_LAYER_5000BASE_T 0x40000 + /* Flow Control Data Sheet defined values * Calculation and defines taken from 802.1bb Annex O */ @@ -3145,6 +3183,8 @@ enum ixgbe_mac_type { ixgbe_mac_X550, ixgbe_mac_X550EM_x, ixgbe_mac_x550em_a, + ixgbe_mac_e610, + ixgbe_mac_e610_vf, ixgbe_num_macs }; @@ -3224,7 +3264,9 @@ enum ixgbe_media_type { ixgbe_media_type_copper, ixgbe_media_type_backplane, ixgbe_media_type_cx4, - ixgbe_media_type_virtual + ixgbe_media_type_virtual, + ixgbe_media_type_da, + ixgbe_media_type_aui, }; /* Flow Control Settings */ @@ -3233,7 +3275,8 @@ enum ixgbe_fc_mode { ixgbe_fc_rx_pause, ixgbe_fc_tx_pause, ixgbe_fc_full, - ixgbe_fc_default + ixgbe_fc_default, + ixgbe_fc_pfc, }; /* Smart Speed Settings */ @@ -3533,6 +3576,9 @@ struct ixgbe_link_operations { struct ixgbe_link_info { struct ixgbe_link_operations ops; u8 addr; + struct ixgbe_link_status link_info; + struct ixgbe_link_status link_info_old; + u8 get_link_info; }; struct ixgbe_eeprom_info { @@ -3575,6 +3621,7 @@ struct ixgbe_mac_info { u8 san_mac_rar_index; struct ixgbe_thermal_sensor_data thermal_sensor_data; bool set_lben; + u32 max_link_up_time; u8 led_link_act; }; @@ -3599,6 +3646,10 @@ struct ixgbe_phy_info { bool reset_if_overtemp; bool qsfp_shared_i2c_bus; u32 nw_mng_if_sel; + u64 phy_type_low; + u64 phy_type_high; + u16 curr_user_speed_req; + struct ixgbe_aci_cmd_set_phy_cfg_data curr_user_phy_cfg; }; struct ixgbe_mbx_stats { @@ -3643,6 +3694,19 @@ struct ixgbe_hw { bool allow_unsupported_sfp; bool wol_enabled; bool need_crosstalk_fix; + u8 api_branch; + u8 api_maj_ver; + u8 api_min_ver; + u8 api_patch; + u8 fw_branch; + u8 fw_maj_ver; + u8 fw_min_ver; + u8 fw_patch; + u32 fw_build; + struct ixgbe_aci_info aci; + struct ixgbe_flash_info flash; + struct ixgbe_hw_dev_caps dev_caps; + struct ixgbe_hw_func_caps func_caps; }; struct ixgbe_info { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h new file mode 100644 index 000000000000..5978cb06f732 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h @@ -0,0 +1,1066 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2024 Intel Corporation. */ + +#ifndef _IXGBE_TYPE_E610_H_ +#define _IXGBE_TYPE_E610_H_ + +#define BYTES_PER_DWORD 4 + +/* General E610 defines */ +#define IXGBE_MAX_VSI 768 + +/* Checksum and Shadow RAM pointers */ +#define E610_SR_SW_CHECKSUM_WORD 0x3F + +/* Firmware Status Register (GL_FWSTS) */ +#define GL_FWSTS 0x00083048 /* Reset Source: POR */ +#define GL_FWSTS_EP_PF0 BIT(24) +#define GL_FWSTS_EP_PF1 BIT(25) + +/* Flash Access Register */ +#define IXGBE_GLNVM_FLA 0x000B6108 /* Reset Source: POR */ +#define IXGBE_GLNVM_FLA_LOCKED_S 6 +#define IXGBE_GLNVM_FLA_LOCKED_M BIT(6) + +/* Admin Command Interface (ACI) registers */ +#define IXGBE_PF_HIDA(_i) (0x00085000 + ((_i) * 4)) +#define IXGBE_PF_HIDA_2(_i) (0x00085020 + ((_i) * 4)) +#define IXGBE_PF_HIBA(_i) (0x00084000 + ((_i) * 4)) +#define IXGBE_PF_HICR 0x00082048 + +#define IXGBE_PF_HICR_EN BIT(0) +#define IXGBE_PF_HICR_C BIT(1) +#define IXGBE_PF_HICR_SV BIT(2) +#define IXGBE_PF_HICR_EV BIT(3) + +#define IXGBE_ACI_DESC_SIZE 32 +#define IXGBE_ACI_DESC_SIZE_IN_DWORDS (IXGBE_ACI_DESC_SIZE / BYTES_PER_DWORD) + +#define IXGBE_ACI_MAX_BUFFER_SIZE 4096 /* Size in bytes */ +#define IXGBE_ACI_SEND_DELAY_TIME_MS 10 +#define IXGBE_ACI_SEND_MAX_EXECUTE 3 +#define IXGBE_ACI_SEND_TIMEOUT_MS \ + (IXGBE_ACI_SEND_MAX_EXECUTE * IXGBE_ACI_SEND_DELAY_TIME_MS) +/* [ms] timeout of waiting for sync response */ +#define IXGBE_ACI_SYNC_RESPONSE_TIMEOUT 100000 +/* [ms] timeout of waiting for async response */ +#define IXGBE_ACI_ASYNC_RESPONSE_TIMEOUT 150000 +/* [ms] timeout of waiting for resource release */ +#define IXGBE_ACI_RELEASE_RES_TIMEOUT 10000 + +/* FW defined boundary for a large buffer, 4k >= Large buffer > 512 bytes */ +#define IXGBE_ACI_LG_BUF 512 + +/* Flags sub-structure + * |0 |1 |2 |3 |4 |5 |6 |7 |8 |9 |10 |11 |12 |13 |14 |15 | + * |DD |CMP|ERR|VFE| * * RESERVED * * |LB |RD |VFC|BUF|SI |EI |FE | + */ + +#define IXGBE_ACI_FLAG_DD BIT(0) /* 0x1 */ +#define IXGBE_ACI_FLAG_CMP BIT(1) /* 0x2 */ +#define IXGBE_ACI_FLAG_ERR BIT(2) /* 0x4 */ +#define IXGBE_ACI_FLAG_VFE BIT(3) /* 0x8 */ +#define IXGBE_ACI_FLAG_LB BIT(9) /* 0x200 */ +#define IXGBE_ACI_FLAG_RD BIT(10) /* 0x400 */ +#define IXGBE_ACI_FLAG_VFC BIT(11) /* 0x800 */ +#define IXGBE_ACI_FLAG_BUF BIT(12) /* 0x1000 */ +#define IXGBE_ACI_FLAG_SI BIT(13) /* 0x2000 */ +#define IXGBE_ACI_FLAG_EI BIT(14) /* 0x4000 */ +#define IXGBE_ACI_FLAG_FE BIT(15) /* 0x8000 */ + +/* Admin Command Interface (ACI) error codes */ +enum ixgbe_aci_err { + IXGBE_ACI_RC_OK = 0, /* Success */ + IXGBE_ACI_RC_EPERM = 1, /* Operation not permitted */ + IXGBE_ACI_RC_ENOENT = 2, /* No such element */ + IXGBE_ACI_RC_ESRCH = 3, /* Bad opcode */ + IXGBE_ACI_RC_EINTR = 4, /* Operation interrupted */ + IXGBE_ACI_RC_EIO = 5, /* I/O error */ + IXGBE_ACI_RC_ENXIO = 6, /* No such resource */ + IXGBE_ACI_RC_E2BIG = 7, /* Arg too long */ + IXGBE_ACI_RC_EAGAIN = 8, /* Try again */ + IXGBE_ACI_RC_ENOMEM = 9, /* Out of memory */ + IXGBE_ACI_RC_EACCES = 10, /* Permission denied */ + IXGBE_ACI_RC_EFAULT = 11, /* Bad address */ + IXGBE_ACI_RC_EBUSY = 12, /* Device or resource busy */ + IXGBE_ACI_RC_EEXIST = 13, /* Object already exists */ + IXGBE_ACI_RC_EINVAL = 14, /* Invalid argument */ + IXGBE_ACI_RC_ENOTTY = 15, /* Not a typewriter */ + IXGBE_ACI_RC_ENOSPC = 16, /* No space left or alloc failure */ + IXGBE_ACI_RC_ENOSYS = 17, /* Function not implemented */ + IXGBE_ACI_RC_ERANGE = 18, /* Parameter out of range */ + IXGBE_ACI_RC_EFLUSHED = 19, /* Cmd flushed due to prev cmd error */ + IXGBE_ACI_RC_BAD_ADDR = 20, /* Descriptor contains a bad pointer */ + IXGBE_ACI_RC_EMODE = 21, /* Op not allowed in current dev mode */ + IXGBE_ACI_RC_EFBIG = 22, /* File too big */ + IXGBE_ACI_RC_ESBCOMP = 23, /* SB-IOSF completion unsuccessful */ + IXGBE_ACI_RC_ENOSEC = 24, /* Missing security manifest */ + IXGBE_ACI_RC_EBADSIG = 25, /* Bad RSA signature */ + IXGBE_ACI_RC_ESVN = 26, /* SVN number prohibits this package */ + IXGBE_ACI_RC_EBADMAN = 27, /* Manifest hash mismatch */ + IXGBE_ACI_RC_EBADBUF = 28, /* Buffer hash mismatches manifest */ + IXGBE_ACI_RC_EACCES_BMCU = 29, /* BMC Update in progress */ +}; + +/* Admin Command Interface (ACI) opcodes */ +enum ixgbe_aci_opc { + ixgbe_aci_opc_get_ver = 0x0001, + ixgbe_aci_opc_driver_ver = 0x0002, + ixgbe_aci_opc_get_exp_err = 0x0005, + + /* resource ownership */ + ixgbe_aci_opc_req_res = 0x0008, + ixgbe_aci_opc_release_res = 0x0009, + + /* device/function capabilities */ + ixgbe_aci_opc_list_func_caps = 0x000A, + ixgbe_aci_opc_list_dev_caps = 0x000B, + + /* safe disable of RXEN */ + ixgbe_aci_opc_disable_rxen = 0x000C, + + /* FW events */ + ixgbe_aci_opc_get_fw_event = 0x0014, + + /* PHY commands */ + ixgbe_aci_opc_get_phy_caps = 0x0600, + ixgbe_aci_opc_set_phy_cfg = 0x0601, + ixgbe_aci_opc_restart_an = 0x0605, + ixgbe_aci_opc_get_link_status = 0x0607, + ixgbe_aci_opc_set_event_mask = 0x0613, + ixgbe_aci_opc_get_link_topo = 0x06E0, + ixgbe_aci_opc_get_link_topo_pin = 0x06E1, + ixgbe_aci_opc_read_i2c = 0x06E2, + ixgbe_aci_opc_write_i2c = 0x06E3, + ixgbe_aci_opc_read_mdio = 0x06E4, + ixgbe_aci_opc_write_mdio = 0x06E5, + ixgbe_aci_opc_set_gpio_by_func = 0x06E6, + ixgbe_aci_opc_get_gpio_by_func = 0x06E7, + ixgbe_aci_opc_set_gpio = 0x06EC, + ixgbe_aci_opc_get_gpio = 0x06ED, + ixgbe_aci_opc_sff_eeprom = 0x06EE, + ixgbe_aci_opc_prog_topo_dev_nvm = 0x06F2, + ixgbe_aci_opc_read_topo_dev_nvm = 0x06F3, + + /* NVM commands */ + ixgbe_aci_opc_nvm_read = 0x0701, + ixgbe_aci_opc_nvm_erase = 0x0702, + ixgbe_aci_opc_nvm_write = 0x0703, + ixgbe_aci_opc_nvm_cfg_read = 0x0704, + ixgbe_aci_opc_nvm_cfg_write = 0x0705, + ixgbe_aci_opc_nvm_checksum = 0x0706, + ixgbe_aci_opc_nvm_write_activate = 0x0707, + ixgbe_aci_opc_nvm_sr_dump = 0x0707, + ixgbe_aci_opc_nvm_save_factory_settings = 0x0708, + ixgbe_aci_opc_nvm_update_empr = 0x0709, + ixgbe_aci_opc_nvm_pkg_data = 0x070A, + ixgbe_aci_opc_nvm_pass_component_tbl = 0x070B, + + /* Alternate Structure Commands */ + ixgbe_aci_opc_write_alt_direct = 0x0900, + ixgbe_aci_opc_write_alt_indirect = 0x0901, + ixgbe_aci_opc_read_alt_direct = 0x0902, + ixgbe_aci_opc_read_alt_indirect = 0x0903, + ixgbe_aci_opc_done_alt_write = 0x0904, + ixgbe_aci_opc_clear_port_alt_write = 0x0906, + + /* debug commands */ + ixgbe_aci_opc_debug_dump_internals = 0xFF08, + + /* SystemDiagnostic commands */ + ixgbe_aci_opc_set_health_status_config = 0xFF20, + ixgbe_aci_opc_get_supported_health_status_codes = 0xFF21, + ixgbe_aci_opc_get_health_status = 0xFF22, + ixgbe_aci_opc_clear_health_status = 0xFF23, +}; + +/* Get version (direct 0x0001) */ +struct ixgbe_aci_cmd_get_ver { + __le32 rom_ver; + __le32 fw_build; + u8 fw_branch; + u8 fw_major; + u8 fw_minor; + u8 fw_patch; + u8 api_branch; + u8 api_major; + u8 api_minor; + u8 api_patch; +}; + +#define IXGBE_DRV_VER_STR_LEN_E610 32 + +/* Send driver version (indirect 0x0002) */ +struct ixgbe_aci_cmd_driver_ver { + u8 major_ver; + u8 minor_ver; + u8 build_ver; + u8 subbuild_ver; + u8 reserved[4]; + __le32 addr_high; + __le32 addr_low; +}; + +/* Get Expanded Error Code (0x0005, direct) */ +struct ixgbe_aci_cmd_get_exp_err { + __le32 reason; +#define IXGBE_ACI_EXPANDED_ERROR_NOT_PROVIDED 0xFFFFFFFF + __le32 identifier; + u8 rsvd[8]; +}; + +/* FW update timeout definitions are in milliseconds */ +#define IXGBE_NVM_TIMEOUT 180000 + +enum ixgbe_aci_res_access_type { + IXGBE_RES_READ = 1, + IXGBE_RES_WRITE +}; + +enum ixgbe_aci_res_ids { + IXGBE_NVM_RES_ID = 1, + IXGBE_SPD_RES_ID, + IXGBE_CHANGE_LOCK_RES_ID, + IXGBE_GLOBAL_CFG_LOCK_RES_ID +}; + +/* Request resource ownership (direct 0x0008) + * Release resource ownership (direct 0x0009) + */ +struct ixgbe_aci_cmd_req_res { + __le16 res_id; + __le16 access_type; + + /* Upon successful completion, FW writes this value and driver is + * expected to release resource before timeout. This value is provided + * in milliseconds. + */ + __le32 timeout; +#define IXGBE_ACI_RES_NVM_READ_DFLT_TIMEOUT_MS 3000 +#define IXGBE_ACI_RES_NVM_WRITE_DFLT_TIMEOUT_MS 180000 +#define IXGBE_ACI_RES_CHNG_LOCK_DFLT_TIMEOUT_MS 1000 +#define IXGBE_ACI_RES_GLBL_LOCK_DFLT_TIMEOUT_MS 3000 + /* For SDP: pin ID of the SDP */ + __le32 res_number; + __le16 status; +#define IXGBE_ACI_RES_GLBL_SUCCESS 0 +#define IXGBE_ACI_RES_GLBL_IN_PROG 1 +#define IXGBE_ACI_RES_GLBL_DONE 2 + u8 reserved[2]; +}; + +/* Get function capabilities (indirect 0x000A) + * Get device capabilities (indirect 0x000B) + */ +struct ixgbe_aci_cmd_list_caps { + u8 cmd_flags; + u8 pf_index; + u8 reserved[2]; + __le32 count; + __le32 addr_high; + __le32 addr_low; +}; + +/* Device/Function buffer entry, repeated per reported capability */ +struct ixgbe_aci_cmd_list_caps_elem { + __le16 cap; +#define IXGBE_ACI_CAPS_VALID_FUNCTIONS 0x0005 +#define IXGBE_ACI_MAX_VALID_FUNCTIONS 0x8 +#define IXGBE_ACI_CAPS_SRIOV 0x0012 +#define IXGBE_ACI_CAPS_VF 0x0013 +#define IXGBE_ACI_CAPS_VMDQ 0x0014 +#define IXGBE_ACI_CAPS_VSI 0x0017 +#define IXGBE_ACI_CAPS_DCB 0x0018 +#define IXGBE_ACI_CAPS_RSS 0x0040 +#define IXGBE_ACI_CAPS_RXQS 0x0041 +#define IXGBE_ACI_CAPS_TXQS 0x0042 +#define IXGBE_ACI_CAPS_MSIX 0x0043 +#define IXGBE_ACI_CAPS_FD 0x0045 +#define IXGBE_ACI_CAPS_1588 0x0046 +#define IXGBE_ACI_CAPS_MAX_MTU 0x0047 +#define IXGBE_ACI_CAPS_NVM_VER 0x0048 +#define IXGBE_ACI_CAPS_PENDING_NVM_VER 0x0049 +#define IXGBE_ACI_CAPS_OROM_VER 0x004A +#define IXGBE_ACI_CAPS_PENDING_OROM_VER 0x004B +#define IXGBE_ACI_CAPS_PENDING_NET_VER 0x004D +#define IXGBE_ACI_CAPS_INLINE_IPSEC 0x0070 +#define IXGBE_ACI_CAPS_NUM_ENABLED_PORTS 0x0072 +#define IXGBE_ACI_CAPS_PCIE_RESET_AVOIDANCE 0x0076 +#define IXGBE_ACI_CAPS_POST_UPDATE_RESET_RESTRICT 0x0077 +#define IXGBE_ACI_CAPS_NVM_MGMT 0x0080 +#define IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG0 0x0081 +#define IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG1 0x0082 +#define IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG2 0x0083 +#define IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG3 0x0084 + u8 major_ver; + u8 minor_ver; + /* Number of resources described by this capability */ + __le32 number; + /* Only meaningful for some types of resources */ + __le32 logical_id; + /* Only meaningful for some types of resources */ + __le32 phys_id; + __le64 rsvd1; + __le64 rsvd2; +}; + +/* Disable RXEN (direct 0x000C) */ +struct ixgbe_aci_cmd_disable_rxen { + u8 lport_num; + u8 reserved[15]; +}; + +/* Get PHY capabilities (indirect 0x0600) */ +struct ixgbe_aci_cmd_get_phy_caps { + u8 lport_num; + u8 reserved; + __le16 param0; + /* 18.0 - Report qualified modules */ +#define IXGBE_ACI_GET_PHY_RQM BIT(0) + /* 18.1 - 18.3 : Report mode + * 000b - Report topology capabilities, without media + * 001b - Report topology capabilities, with media + * 010b - Report Active configuration + * 011b - Report PHY Type and FEC mode capabilities + * 100b - Report Default capabilities + */ +#define IXGBE_ACI_REPORT_MODE_M GENMASK(3, 1) +#define IXGBE_ACI_REPORT_TOPO_CAP_NO_MEDIA 0 +#define IXGBE_ACI_REPORT_TOPO_CAP_MEDIA BIT(1) +#define IXGBE_ACI_REPORT_ACTIVE_CFG BIT(2) +#define IXGBE_ACI_REPORT_DFLT_CFG BIT(3) + __le32 reserved1; + __le32 addr_high; + __le32 addr_low; +}; + +/* This is #define of PHY type (Extended): + * The first set of defines is for phy_type_low. + */ +#define IXGBE_PHY_TYPE_LOW_100BASE_TX BIT_ULL(0) +#define IXGBE_PHY_TYPE_LOW_100M_SGMII BIT_ULL(1) +#define IXGBE_PHY_TYPE_LOW_1000BASE_T BIT_ULL(2) +#define IXGBE_PHY_TYPE_LOW_1000BASE_SX BIT_ULL(3) +#define IXGBE_PHY_TYPE_LOW_1000BASE_LX BIT_ULL(4) +#define IXGBE_PHY_TYPE_LOW_1000BASE_KX BIT_ULL(5) +#define IXGBE_PHY_TYPE_LOW_1G_SGMII BIT_ULL(6) +#define IXGBE_PHY_TYPE_LOW_2500BASE_T BIT_ULL(7) +#define IXGBE_PHY_TYPE_LOW_2500BASE_X BIT_ULL(8) +#define IXGBE_PHY_TYPE_LOW_2500BASE_KX BIT_ULL(9) +#define IXGBE_PHY_TYPE_LOW_5GBASE_T BIT_ULL(10) +#define IXGBE_PHY_TYPE_LOW_5GBASE_KR BIT_ULL(11) +#define IXGBE_PHY_TYPE_LOW_10GBASE_T BIT_ULL(12) +#define IXGBE_PHY_TYPE_LOW_10G_SFI_DA BIT_ULL(13) +#define IXGBE_PHY_TYPE_LOW_10GBASE_SR BIT_ULL(14) +#define IXGBE_PHY_TYPE_LOW_10GBASE_LR BIT_ULL(15) +#define IXGBE_PHY_TYPE_LOW_10GBASE_KR_CR1 BIT_ULL(16) +#define IXGBE_PHY_TYPE_LOW_10G_SFI_AOC_ACC BIT_ULL(17) +#define IXGBE_PHY_TYPE_LOW_10G_SFI_C2C BIT_ULL(18) +#define IXGBE_PHY_TYPE_LOW_25GBASE_T BIT_ULL(19) +#define IXGBE_PHY_TYPE_LOW_25GBASE_CR BIT_ULL(20) +#define IXGBE_PHY_TYPE_LOW_25GBASE_CR_S BIT_ULL(21) +#define IXGBE_PHY_TYPE_LOW_25GBASE_CR1 BIT_ULL(22) +#define IXGBE_PHY_TYPE_LOW_25GBASE_SR BIT_ULL(23) +#define IXGBE_PHY_TYPE_LOW_25GBASE_LR BIT_ULL(24) +#define IXGBE_PHY_TYPE_LOW_25GBASE_KR BIT_ULL(25) +#define IXGBE_PHY_TYPE_LOW_25GBASE_KR_S BIT_ULL(26) +#define IXGBE_PHY_TYPE_LOW_25GBASE_KR1 BIT_ULL(27) +#define IXGBE_PHY_TYPE_LOW_25G_AUI_AOC_ACC BIT_ULL(28) +#define IXGBE_PHY_TYPE_LOW_25G_AUI_C2C BIT_ULL(29) +#define IXGBE_PHY_TYPE_LOW_MAX_INDEX 29 +/* The second set of defines is for phy_type_high. */ +#define IXGBE_PHY_TYPE_HIGH_10BASE_T BIT_ULL(1) +#define IXGBE_PHY_TYPE_HIGH_10M_SGMII BIT_ULL(2) +#define IXGBE_PHY_TYPE_HIGH_2500M_SGMII BIT_ULL(56) +#define IXGBE_PHY_TYPE_HIGH_100M_USXGMII BIT_ULL(57) +#define IXGBE_PHY_TYPE_HIGH_1G_USXGMII BIT_ULL(58) +#define IXGBE_PHY_TYPE_HIGH_2500M_USXGMII BIT_ULL(59) +#define IXGBE_PHY_TYPE_HIGH_5G_USXGMII BIT_ULL(60) +#define IXGBE_PHY_TYPE_HIGH_10G_USXGMII BIT_ULL(61) +#define IXGBE_PHY_TYPE_HIGH_MAX_INDEX 61 + +struct ixgbe_aci_cmd_get_phy_caps_data { + __le64 phy_type_low; /* Use values from IXGBE_PHY_TYPE_LOW_* */ + __le64 phy_type_high; /* Use values from IXGBE_PHY_TYPE_HIGH_* */ + u8 caps; +#define IXGBE_ACI_PHY_EN_TX_LINK_PAUSE BIT(0) +#define IXGBE_ACI_PHY_EN_RX_LINK_PAUSE BIT(1) +#define IXGBE_ACI_PHY_LOW_POWER_MODE BIT(2) +#define IXGBE_ACI_PHY_EN_LINK BIT(3) +#define IXGBE_ACI_PHY_AN_MODE BIT(4) +#define IXGBE_ACI_PHY_EN_MOD_QUAL BIT(5) +#define IXGBE_ACI_PHY_EN_LESM BIT(6) +#define IXGBE_ACI_PHY_EN_AUTO_FEC BIT(7) +#define IXGBE_ACI_PHY_CAPS_MASK GENMASK(7, 0) + u8 low_power_ctrl_an; +#define IXGBE_ACI_PHY_EN_D3COLD_LOW_POWER_AUTONEG BIT(0) +#define IXGBE_ACI_PHY_AN_EN_CLAUSE28 BIT(1) +#define IXGBE_ACI_PHY_AN_EN_CLAUSE73 BIT(2) +#define IXGBE_ACI_PHY_AN_EN_CLAUSE37 BIT(3) + __le16 eee_cap; +#define IXGBE_ACI_PHY_EEE_EN_100BASE_TX BIT(0) +#define IXGBE_ACI_PHY_EEE_EN_1000BASE_T BIT(1) +#define IXGBE_ACI_PHY_EEE_EN_10GBASE_T BIT(2) +#define IXGBE_ACI_PHY_EEE_EN_1000BASE_KX BIT(3) +#define IXGBE_ACI_PHY_EEE_EN_10GBASE_KR BIT(4) +#define IXGBE_ACI_PHY_EEE_EN_25GBASE_KR BIT(5) +#define IXGBE_ACI_PHY_EEE_EN_10BASE_T BIT(11) + __le16 eeer_value; + u8 phy_id_oui[4]; /* PHY/Module ID connected on the port */ + u8 phy_fw_ver[8]; + u8 link_fec_options; +#define IXGBE_ACI_PHY_FEC_10G_KR_40G_KR4_EN BIT(0) +#define IXGBE_ACI_PHY_FEC_10G_KR_40G_KR4_REQ BIT(1) +#define IXGBE_ACI_PHY_FEC_25G_RS_528_REQ BIT(2) +#define IXGBE_ACI_PHY_FEC_25G_KR_REQ BIT(3) +#define IXGBE_ACI_PHY_FEC_25G_RS_544_REQ BIT(4) +#define IXGBE_ACI_PHY_FEC_25G_RS_CLAUSE91_EN BIT(6) +#define IXGBE_ACI_PHY_FEC_25G_KR_CLAUSE74_EN BIT(7) +#define IXGBE_ACI_PHY_FEC_MASK 0xdf + u8 module_compliance_enforcement; +#define IXGBE_ACI_MOD_ENFORCE_STRICT_MODE BIT(0) + u8 extended_compliance_code; +#define IXGBE_ACI_MODULE_TYPE_TOTAL_BYTE 3 + u8 module_type[IXGBE_ACI_MODULE_TYPE_TOTAL_BYTE]; +#define IXGBE_ACI_MOD_TYPE_BYTE0_SFP_PLUS 0xA0 +#define IXGBE_ACI_MOD_TYPE_BYTE0_QSFP_PLUS 0x80 +#define IXGBE_ACI_MOD_TYPE_IDENT 1 +#define IXGBE_ACI_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE BIT(0) +#define IXGBE_ACI_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE BIT(1) +#define IXGBE_ACI_MOD_TYPE_BYTE1_10G_BASE_SR BIT(4) +#define IXGBE_ACI_MOD_TYPE_BYTE1_10G_BASE_LR BIT(5) +#define IXGBE_ACI_MOD_TYPE_BYTE1_10G_BASE_LRM BIT(6) +#define IXGBE_ACI_MOD_TYPE_BYTE1_10G_BASE_ER BIT(7) +#define IXGBE_ACI_MOD_TYPE_BYTE2_SFP_PLUS 0xA0 +#define IXGBE_ACI_MOD_TYPE_BYTE2_QSFP_PLUS 0x86 + u8 qualified_module_count; + u8 rsvd2[7]; /* Bytes 47:41 reserved */ +#define IXGBE_ACI_QUAL_MOD_COUNT_MAX 16 + struct { + u8 v_oui[3]; + u8 rsvd3; + u8 v_part[16]; + __le32 v_rev; + __le64 rsvd4; + } qual_modules[IXGBE_ACI_QUAL_MOD_COUNT_MAX]; +}; + +/* Set PHY capabilities (direct 0x0601) + * NOTE: This command must be followed by setup link and restart auto-neg + */ +struct ixgbe_aci_cmd_set_phy_cfg { + u8 lport_num; + u8 reserved[7]; + __le32 addr_high; + __le32 addr_low; +}; + +/* Set PHY config command data structure */ +struct ixgbe_aci_cmd_set_phy_cfg_data { + __le64 phy_type_low; /* Use values from IXGBE_PHY_TYPE_LOW_* */ + __le64 phy_type_high; /* Use values from IXGBE_PHY_TYPE_HIGH_* */ + u8 caps; +#define IXGBE_ACI_PHY_ENA_VALID_MASK 0xef +#define IXGBE_ACI_PHY_ENA_TX_PAUSE_ABILITY BIT(0) +#define IXGBE_ACI_PHY_ENA_RX_PAUSE_ABILITY BIT(1) +#define IXGBE_ACI_PHY_ENA_LOW_POWER BIT(2) +#define IXGBE_ACI_PHY_ENA_LINK BIT(3) +#define IXGBE_ACI_PHY_ENA_AUTO_LINK_UPDT BIT(5) +#define IXGBE_ACI_PHY_ENA_LESM BIT(6) +#define IXGBE_ACI_PHY_ENA_AUTO_FEC BIT(7) + u8 low_power_ctrl_an; + __le16 eee_cap; /* Value from ixgbe_aci_get_phy_caps */ + __le16 eeer_value; /* Use defines from ixgbe_aci_get_phy_caps */ + u8 link_fec_opt; /* Use defines from ixgbe_aci_get_phy_caps */ + u8 module_compliance_enforcement; +}; + +/* Restart AN command data structure (direct 0x0605) + * Also used for response, with only the lport_num field present. + */ +struct ixgbe_aci_cmd_restart_an { + u8 lport_num; + u8 reserved; + u8 cmd_flags; +#define IXGBE_ACI_RESTART_AN_LINK_RESTART BIT(1) +#define IXGBE_ACI_RESTART_AN_LINK_ENABLE BIT(2) + u8 reserved2[13]; +}; + +/* Get link status (indirect 0x0607), also used for Link Status Event */ +struct ixgbe_aci_cmd_get_link_status { + u8 lport_num; + u8 reserved; + __le16 cmd_flags; +#define IXGBE_ACI_LSE_M GENMASK(1, 0) +#define IXGBE_ACI_LSE_NOP 0x0 +#define IXGBE_ACI_LSE_DIS 0x2 +#define IXGBE_ACI_LSE_ENA 0x3 + /* only response uses this flag */ +#define IXGBE_ACI_LSE_IS_ENABLED 0x1 + __le32 reserved2; + __le32 addr_high; + __le32 addr_low; +}; + +/* Get link status response data structure, also used for Link Status Event */ +struct ixgbe_aci_cmd_get_link_status_data { + u8 topo_media_conflict; +#define IXGBE_ACI_LINK_TOPO_CONFLICT BIT(0) +#define IXGBE_ACI_LINK_MEDIA_CONFLICT BIT(1) +#define IXGBE_ACI_LINK_TOPO_CORRUPT BIT(2) +#define IXGBE_ACI_LINK_TOPO_UNREACH_PRT BIT(4) +#define IXGBE_ACI_LINK_TOPO_UNDRUTIL_PRT BIT(5) +#define IXGBE_ACI_LINK_TOPO_UNDRUTIL_MEDIA BIT(6) +#define IXGBE_ACI_LINK_TOPO_UNSUPP_MEDIA BIT(7) + u8 link_cfg_err; +#define IXGBE_ACI_LINK_CFG_ERR BIT(0) +#define IXGBE_ACI_LINK_CFG_COMPLETED BIT(1) +#define IXGBE_ACI_LINK_ACT_PORT_OPT_INVAL BIT(2) +#define IXGBE_ACI_LINK_FEAT_ID_OR_CONFIG_ID_INVAL BIT(3) +#define IXGBE_ACI_LINK_TOPO_CRITICAL_SDP_ERR BIT(4) +#define IXGBE_ACI_LINK_MODULE_POWER_UNSUPPORTED BIT(5) +#define IXGBE_ACI_LINK_EXTERNAL_PHY_LOAD_FAILURE BIT(6) +#define IXGBE_ACI_LINK_INVAL_MAX_POWER_LIMIT BIT(7) + u8 link_info; +#define IXGBE_ACI_LINK_UP BIT(0) /* Link Status */ +#define IXGBE_ACI_LINK_FAULT BIT(1) +#define IXGBE_ACI_LINK_FAULT_TX BIT(2) +#define IXGBE_ACI_LINK_FAULT_RX BIT(3) +#define IXGBE_ACI_LINK_FAULT_REMOTE BIT(4) +#define IXGBE_ACI_LINK_UP_PORT BIT(5) /* External Port Link Status */ +#define IXGBE_ACI_MEDIA_AVAILABLE BIT(6) +#define IXGBE_ACI_SIGNAL_DETECT BIT(7) + u8 an_info; +#define IXGBE_ACI_AN_COMPLETED BIT(0) +#define IXGBE_ACI_LP_AN_ABILITY BIT(1) +#define IXGBE_ACI_PD_FAULT BIT(2) /* Parallel Detection Fault */ +#define IXGBE_ACI_FEC_EN BIT(3) +#define IXGBE_ACI_PHY_LOW_POWER BIT(4) /* Low Power State */ +#define IXGBE_ACI_LINK_PAUSE_TX BIT(5) +#define IXGBE_ACI_LINK_PAUSE_RX BIT(6) +#define IXGBE_ACI_QUALIFIED_MODULE BIT(7) + u8 ext_info; +#define IXGBE_ACI_LINK_PHY_TEMP_ALARM BIT(0) +#define IXGBE_ACI_LINK_EXCESSIVE_ERRORS BIT(1) /* Excessive Link Errors */ + /* Port Tx Suspended */ +#define IXGBE_ACI_LINK_TX_ACTIVE 0 +#define IXGBE_ACI_LINK_TX_DRAINED 1 +#define IXGBE_ACI_LINK_TX_FLUSHED 3 + u8 lb_status; +#define IXGBE_ACI_LINK_LB_PHY_LCL BIT(0) +#define IXGBE_ACI_LINK_LB_PHY_RMT BIT(1) +#define IXGBE_ACI_LINK_LB_MAC_LCL BIT(2) + __le16 max_frame_size; + u8 cfg; +#define IXGBE_ACI_LINK_25G_KR_FEC_EN BIT(0) +#define IXGBE_ACI_LINK_25G_RS_528_FEC_EN BIT(1) +#define IXGBE_ACI_LINK_25G_RS_544_FEC_EN BIT(2) +#define IXGBE_ACI_FEC_MASK GENMASK(2, 0) + /* Pacing Config */ +#define IXGBE_ACI_CFG_PACING_M GENMASK(6, 3) +#define IXGBE_ACI_CFG_PACING_TYPE_M BIT(7) +#define IXGBE_ACI_CFG_PACING_TYPE_AVG 0 +#define IXGBE_ACI_CFG_PACING_TYPE_FIXED IXGBE_ACI_CFG_PACING_TYPE_M + /* External Device Power Ability */ + u8 power_desc; +#define IXGBE_ACI_PWR_CLASS_M GENMASK(5, 0) +#define IXGBE_ACI_LINK_PWR_BASET_LOW_HIGH 0 +#define IXGBE_ACI_LINK_PWR_BASET_HIGH 1 +#define IXGBE_ACI_LINK_PWR_QSFP_CLASS_1 0 +#define IXGBE_ACI_LINK_PWR_QSFP_CLASS_2 1 +#define IXGBE_ACI_LINK_PWR_QSFP_CLASS_3 2 +#define IXGBE_ACI_LINK_PWR_QSFP_CLASS_4 3 + __le16 link_speed; +#define IXGBE_ACI_LINK_SPEED_M GENMASK(10, 0) +#define IXGBE_ACI_LINK_SPEED_10MB BIT(0) +#define IXGBE_ACI_LINK_SPEED_100MB BIT(1) +#define IXGBE_ACI_LINK_SPEED_1000MB BIT(2) +#define IXGBE_ACI_LINK_SPEED_2500MB BIT(3) +#define IXGBE_ACI_LINK_SPEED_5GB BIT(4) +#define IXGBE_ACI_LINK_SPEED_10GB BIT(5) +#define IXGBE_ACI_LINK_SPEED_20GB BIT(6) +#define IXGBE_ACI_LINK_SPEED_25GB BIT(7) +#define IXGBE_ACI_LINK_SPEED_40GB BIT(8) +#define IXGBE_ACI_LINK_SPEED_50GB BIT(9) +#define IXGBE_ACI_LINK_SPEED_100GB BIT(10) +#define IXGBE_ACI_LINK_SPEED_200GB BIT(11) +#define IXGBE_ACI_LINK_SPEED_UNKNOWN BIT(15) + __le16 reserved3; + u8 ext_fec_status; +#define IXGBE_ACI_LINK_RS_272_FEC_EN BIT(0) /* RS 272 FEC enabled */ + u8 reserved4; + __le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ + __le64 phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */ + /* Get link status version 2 link partner data */ + __le64 lp_phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */ + __le64 lp_phy_type_high; /* Use values from ICE_PHY_TYPE_HIGH_* */ + u8 lp_fec_adv; +#define IXGBE_ACI_LINK_LP_10G_KR_FEC_CAP BIT(0) +#define IXGBE_ACI_LINK_LP_25G_KR_FEC_CAP BIT(1) +#define IXGBE_ACI_LINK_LP_RS_528_FEC_CAP BIT(2) +#define IXGBE_ACI_LINK_LP_50G_KR_272_FEC_CAP BIT(3) +#define IXGBE_ACI_LINK_LP_100G_KR_272_FEC_CAP BIT(4) +#define IXGBE_ACI_LINK_LP_200G_KR_272_FEC_CAP BIT(5) + u8 lp_fec_req; +#define IXGBE_ACI_LINK_LP_10G_KR_FEC_REQ BIT(0) +#define IXGBE_ACI_LINK_LP_25G_KR_FEC_REQ BIT(1) +#define IXGBE_ACI_LINK_LP_RS_528_FEC_REQ BIT(2) +#define IXGBE_ACI_LINK_LP_KR_272_FEC_REQ BIT(3) + u8 lp_flowcontrol; +#define IXGBE_ACI_LINK_LP_PAUSE_ADV BIT(0) +#define IXGBE_ACI_LINK_LP_ASM_DIR_ADV BIT(1) + u8 reserved5[5]; +} __packed; + +/* Set event mask command (direct 0x0613) */ +struct ixgbe_aci_cmd_set_event_mask { + u8 lport_num; + u8 reserved[7]; + __le16 event_mask; +#define IXGBE_ACI_LINK_EVENT_UPDOWN BIT(1) +#define IXGBE_ACI_LINK_EVENT_MEDIA_NA BIT(2) +#define IXGBE_ACI_LINK_EVENT_LINK_FAULT BIT(3) +#define IXGBE_ACI_LINK_EVENT_PHY_TEMP_ALARM BIT(4) +#define IXGBE_ACI_LINK_EVENT_EXCESSIVE_ERRORS BIT(5) +#define IXGBE_ACI_LINK_EVENT_SIGNAL_DETECT BIT(6) +#define IXGBE_ACI_LINK_EVENT_AN_COMPLETED BIT(7) +#define IXGBE_ACI_LINK_EVENT_MODULE_QUAL_FAIL BIT(8) +#define IXGBE_ACI_LINK_EVENT_PORT_TX_SUSPENDED BIT(9) +#define IXGBE_ACI_LINK_EVENT_TOPO_CONFLICT BIT(10) +#define IXGBE_ACI_LINK_EVENT_MEDIA_CONFLICT BIT(11) +#define IXGBE_ACI_LINK_EVENT_PHY_FW_LOAD_FAIL BIT(12) + u8 reserved1[6]; +}; + +struct ixgbe_aci_cmd_link_topo_params { + u8 lport_num; + u8 lport_num_valid; +#define IXGBE_ACI_LINK_TOPO_PORT_NUM_VALID BIT(0) + u8 node_type_ctx; +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_M GENMASK(3, 0) +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_PHY 0 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_GPIO_CTRL 1 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_MUX_CTRL 2 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_LED_CTRL 3 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_LED 4 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_THERMAL 5 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_CAGE 6 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_MEZZ 7 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_ID_EEPROM 8 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_CLK_CTRL 9 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_CLK_MUX 10 +#define IXGBE_ACI_LINK_TOPO_NODE_TYPE_GPS 11 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_S 4 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_GLOBAL 0 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_BOARD 1 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_PORT 2 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_NODE 3 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_NODE_HANDLE 4 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_DIRECT_BUS_ACCESS 5 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_NODE_HANDLE_BUS_ADDRESS 6 + u8 index; +}; + +struct ixgbe_aci_cmd_link_topo_addr { + struct ixgbe_aci_cmd_link_topo_params topo_params; + __le16 handle; +/* Used to decode the handle field */ +#define IXGBE_ACI_LINK_TOPO_HANDLE_BRD_TYPE_M BIT(9) +#define IXGBE_ACI_LINK_TOPO_HANDLE_BRD_TYPE_LOM BIT(9) +#define IXGBE_ACI_LINK_TOPO_HANDLE_BRD_TYPE_MEZZ 0 +}; + +/* Get Link Topology Handle (direct, 0x06E0) */ +struct ixgbe_aci_cmd_get_link_topo { + struct ixgbe_aci_cmd_link_topo_addr addr; + u8 node_part_num; +#define IXGBE_ACI_GET_LINK_TOPO_NODE_NR_PCA9575 0x21 +#define IXGBE_ACI_GET_LINK_TOPO_NODE_NR_ZL30632_80032 0x24 +#define IXGBE_ACI_GET_LINK_TOPO_NODE_NR_SI5384 0x25 +#define IXGBE_ACI_GET_LINK_TOPO_NODE_NR_C827 0x31 +#define IXGBE_ACI_GET_LINK_TOPO_NODE_NR_GEN_CLK_MUX 0x47 +#define IXGBE_ACI_GET_LINK_TOPO_NODE_NR_GEN_GPS 0x48 +#define IXGBE_ACI_GET_LINK_TOPO_NODE_NR_E610_PTC 0x49 + u8 rsvd[9]; +}; + +/* Get Link Topology Pin (direct, 0x06E1) */ +struct ixgbe_aci_cmd_get_link_topo_pin { + struct ixgbe_aci_cmd_link_topo_addr addr; + u8 input_io_params; +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_GPIO 0 +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_RESET_N 1 +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_INT_N 2 +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_PRESENT_N 3 +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_TX_DIS 4 +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_MODSEL_N 5 +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_LPMODE 6 +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_TX_FAULT 7 +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_RX_LOSS 8 +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_RS0 9 +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_RS1 10 +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_EEPROM_WP 11 +/* 12 repeats intentionally due to two different uses depending on context */ +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_LED 12 +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_RED_LED 12 +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_GREEN_LED 13 +#define IXGBE_ACI_LINK_TOPO_IO_FUNC_BLUE_LED 14 +#define IXGBE_ACI_LINK_TOPO_INPUT_IO_TYPE_GPIO 3 +/* Use IXGBE_ACI_LINK_TOPO_NODE_TYPE_* for the type values */ + u8 output_io_params; +/* Use IXGBE_ACI_LINK_TOPO_NODE_TYPE_* for the type values */ + u8 output_io_flags; +#define IXGBE_ACI_LINK_TOPO_OUTPUT_POLARITY BIT(5) +#define IXGBE_ACI_LINK_TOPO_OUTPUT_VALUE BIT(6) +#define IXGBE_ACI_LINK_TOPO_OUTPUT_DRIVEN BIT(7) + u8 rsvd[7]; +}; + +/* Read/Write SFF EEPROM command (indirect 0x06EE) */ +struct ixgbe_aci_cmd_sff_eeprom { + u8 lport_num; + u8 lport_num_valid; +#define IXGBE_ACI_SFF_PORT_NUM_VALID BIT(0) + __le16 i2c_bus_addr; +#define IXGBE_ACI_SFF_I2CBUS_7BIT_M GENMASK(6, 0) +#define IXGBE_ACI_SFF_I2CBUS_10BIT_M GENMASK(9, 0) +#define IXGBE_ACI_SFF_I2CBUS_TYPE_M BIT(10) +#define IXGBE_ACI_SFF_I2CBUS_TYPE_7BIT 0 +#define IXGBE_ACI_SFF_I2CBUS_TYPE_10BIT IXGBE_ACI_SFF_I2CBUS_TYPE_M +#define IXGBE_ACI_SFF_NO_PAGE_BANK_UPDATE 0 +#define IXGBE_ACI_SFF_UPDATE_PAGE 1 +#define IXGBE_ACI_SFF_UPDATE_BANK 2 +#define IXGBE_ACI_SFF_UPDATE_PAGE_BANK 3 +#define IXGBE_ACI_SFF_IS_WRITE BIT(15) + __le16 i2c_offset; + u8 module_bank; + u8 module_page; + __le32 addr_high; + __le32 addr_low; +}; + +/* NVM Read command (indirect 0x0701) + * NVM Erase commands (direct 0x0702) + * NVM Write commands (indirect 0x0703) + * NVM Write Activate commands (direct 0x0707) + * NVM Shadow RAM Dump commands (direct 0x0707) + */ +struct ixgbe_aci_cmd_nvm { +#define IXGBE_ACI_NVM_MAX_OFFSET 0xFFFFFF + __le16 offset_low; + u8 offset_high; /* For Write Activate offset_high is used as flags2 */ + u8 cmd_flags; +#define IXGBE_ACI_NVM_LAST_CMD BIT(0) +#define IXGBE_ACI_NVM_PCIR_REQ BIT(0) /* Used by NVM Write reply */ +#define IXGBE_ACI_NVM_PRESERVE_ALL BIT(1) +#define IXGBE_ACI_NVM_ACTIV_SEL_NVM BIT(3) /* Write Activate/SR Dump only */ +#define IXGBE_ACI_NVM_ACTIV_SEL_OROM BIT(4) +#define IXGBE_ACI_NVM_ACTIV_SEL_NETLIST BIT(5) +#define IXGBE_ACI_NVM_SPECIAL_UPDATE BIT(6) +#define IXGBE_ACI_NVM_REVERT_LAST_ACTIV BIT(6) /* Write Activate only */ +#define IXGBE_ACI_NVM_FLASH_ONLY BIT(7) +#define IXGBE_ACI_NVM_RESET_LVL_M GENMASK(1, 0) /* Write reply only */ +#define IXGBE_ACI_NVM_POR_FLAG 0 +#define IXGBE_ACI_NVM_PERST_FLAG 1 +#define IXGBE_ACI_NVM_EMPR_FLAG 2 +#define IXGBE_ACI_NVM_EMPR_ENA BIT(0) /* Write Activate reply only */ + /* For Write Activate, several flags are sent as part of a separate + * flags2 field using a separate byte. For simplicity of the software + * interface, we pass the flags as a 16 bit value so these flags are + * all offset by 8 bits + */ +#define IXGBE_ACI_NVM_ACTIV_REQ_EMPR BIT(8) /* NVM Write Activate only */ + __le16 module_typeid; + __le16 length; +#define IXGBE_ACI_NVM_ERASE_LEN 0xFFFF + __le32 addr_high; + __le32 addr_low; +}; + +/* NVM Module_Type ID, needed offset and read_len for + * struct ixgbe_aci_cmd_nvm. + */ +#define IXGBE_ACI_NVM_START_POINT 0 + +/* NVM Checksum Command (direct, 0x0706) */ +struct ixgbe_aci_cmd_nvm_checksum { + u8 flags; +#define IXGBE_ACI_NVM_CHECKSUM_VERIFY BIT(0) +#define IXGBE_ACI_NVM_CHECKSUM_RECALC BIT(1) + u8 rsvd; + __le16 checksum; /* Used only by response */ +#define IXGBE_ACI_NVM_CHECKSUM_CORRECT 0xBABA + u8 rsvd2[12]; +}; + +/** + * struct ixgbe_aci_desc - Admin Command (AC) descriptor + * @flags: IXGBE_ACI_FLAG_* flags + * @opcode: Admin command opcode + * @datalen: length in bytes of indirect/external data buffer + * @retval: return value from firmware + * @cookie_high: opaque data high-half + * @cookie_low: opaque data low-half + * @params: command-specific parameters + * + * Descriptor format for commands the driver posts via the + * Admin Command Interface (ACI). + * The firmware writes back onto the command descriptor and returns + * the result of the command. Asynchronous events that are not an immediate + * result of the command are written to the Admin Command Interface (ACI) using + * the same descriptor format. Descriptors are in little-endian notation with + * 32-bit words. + */ +struct ixgbe_aci_desc { + __le16 flags; + __le16 opcode; + __le16 datalen; + __le16 retval; + __le32 cookie_high; + __le32 cookie_low; + union { + u8 raw[16]; + struct ixgbe_aci_cmd_get_ver get_ver; + struct ixgbe_aci_cmd_driver_ver driver_ver; + struct ixgbe_aci_cmd_get_exp_err exp_err; + struct ixgbe_aci_cmd_req_res res_owner; + struct ixgbe_aci_cmd_list_caps get_cap; + struct ixgbe_aci_cmd_disable_rxen disable_rxen; + struct ixgbe_aci_cmd_get_phy_caps get_phy; + struct ixgbe_aci_cmd_set_phy_cfg set_phy; + struct ixgbe_aci_cmd_restart_an restart_an; + struct ixgbe_aci_cmd_get_link_status get_link_status; + struct ixgbe_aci_cmd_set_event_mask set_event_mask; + struct ixgbe_aci_cmd_get_link_topo get_link_topo; + struct ixgbe_aci_cmd_get_link_topo_pin get_link_topo_pin; + struct ixgbe_aci_cmd_sff_eeprom read_write_sff_param; + struct ixgbe_aci_cmd_nvm nvm; + struct ixgbe_aci_cmd_nvm_checksum nvm_checksum; + } params; +}; + +/* E610-specific adapter context structures */ + +struct ixgbe_link_status { + /* Refer to ixgbe_aci_phy_type for bits definition */ + u64 phy_type_low; + u64 phy_type_high; + u16 max_frame_size; + u16 link_speed; + u16 req_speeds; + u8 topo_media_conflict; + u8 link_cfg_err; + u8 lse_ena; /* Link Status Event notification */ + u8 link_info; + u8 an_info; + u8 ext_info; + u8 fec_info; + u8 pacing; + /* Refer to #define from module_type[IXGBE_ACI_MODULE_TYPE_TOTAL_BYTE] + * of ixgbe_aci_get_phy_caps structure + */ + u8 module_type[IXGBE_ACI_MODULE_TYPE_TOTAL_BYTE]; +}; + +/* Common HW capabilities for SW use */ +struct ixgbe_hw_caps { + /* Write CSR protection */ + u64 wr_csr_prot; + u32 switching_mode; + /* switching mode supported - EVB switching (including cloud) */ +#define IXGBE_NVM_IMAGE_TYPE_EVB 0x0 + + /* Manageability mode & supported protocols over MCTP */ + u32 mgmt_mode; +#define IXGBE_MGMT_MODE_PASS_THRU_MODE_M GENMASK(3, 0) +#define IXGBE_MGMT_MODE_CTL_INTERFACE_M GENMASK(7, 4) +#define IXGBE_MGMT_MODE_REDIR_SB_INTERFACE_M GENMASK(11, 8) + + u32 mgmt_protocols_mctp; +#define IXGBE_MGMT_MODE_PROTO_RSVD BIT(0) +#define IXGBE_MGMT_MODE_PROTO_PLDM BIT(1) +#define IXGBE_MGMT_MODE_PROTO_OEM BIT(2) +#define IXGBE_MGMT_MODE_PROTO_NC_SI BIT(3) + + u32 os2bmc; + u32 valid_functions; + /* DCB capabilities */ + u32 active_tc_bitmap; + u32 maxtc; + + /* RSS related capabilities */ + u32 rss_table_size; /* 512 for PFs and 64 for VFs */ + u32 rss_table_entry_width; /* RSS Entry width in bits */ + + /* Tx/Rx queues */ + u32 num_rxq; /* Number/Total Rx queues */ + u32 rxq_first_id; /* First queue ID for Rx queues */ + u32 num_txq; /* Number/Total Tx queues */ + u32 txq_first_id; /* First queue ID for Tx queues */ + + /* MSI-X vectors */ + u32 num_msix_vectors; + u32 msix_vector_first_id; + + /* Max MTU for function or device */ + u32 max_mtu; + + /* WOL related */ + u32 num_wol_proxy_fltr; + u32 wol_proxy_vsi_seid; + + /* LED/SDP pin count */ + u32 led_pin_num; + u32 sdp_pin_num; + + /* LED/SDP - Supports up to 12 LED pins and 8 SDP signals */ +#define IXGBE_MAX_SUPPORTED_GPIO_LED 12 +#define IXGBE_MAX_SUPPORTED_GPIO_SDP 8 + u8 led[IXGBE_MAX_SUPPORTED_GPIO_LED]; + u8 sdp[IXGBE_MAX_SUPPORTED_GPIO_SDP]; + /* SR-IOV virtualization */ + u8 sr_iov_1_1; /* SR-IOV enabled */ + /* VMDQ */ + u8 vmdq; /* VMDQ supported */ + + /* EVB capabilities */ + u8 evb_802_1_qbg; /* Edge Virtual Bridging */ + u8 evb_802_1_qbh; /* Bridge Port Extension */ + + u8 dcb; + u8 iscsi; + u8 ieee_1588; + u8 mgmt_cem; + + /* WoL and APM support */ +#define IXGBE_WOL_SUPPORT_M BIT(0) +#define IXGBE_ACPI_PROG_MTHD_M BIT(1) +#define IXGBE_PROXY_SUPPORT_M BIT(2) + u8 apm_wol_support; + u8 acpi_prog_mthd; + u8 proxy_support; + bool nvm_update_pending_nvm; + bool nvm_update_pending_orom; + bool nvm_update_pending_netlist; +#define IXGBE_NVM_PENDING_NVM_IMAGE BIT(0) +#define IXGBE_NVM_PENDING_OROM BIT(1) +#define IXGBE_NVM_PENDING_NETLIST BIT(2) + bool sec_rev_disabled; + bool update_disabled; + bool nvm_unified_update; + bool netlist_auth; +#define IXGBE_NVM_MGMT_SEC_REV_DISABLED BIT(0) +#define IXGBE_NVM_MGMT_UPDATE_DISABLED BIT(1) +#define IXGBE_NVM_MGMT_UNIFIED_UPD_SUPPORT BIT(3) +#define IXGBE_NVM_MGMT_NETLIST_AUTH_SUPPORT BIT(5) + bool no_drop_policy_support; + /* PCIe reset avoidance */ + bool pcie_reset_avoidance; /* false: not supported, true: supported */ + /* Post update reset restriction */ + bool reset_restrict_support; /* false: not supported, true: supported */ + + /* External topology device images within the NVM */ +#define IXGBE_EXT_TOPO_DEV_IMG_COUNT 4 + u32 ext_topo_dev_img_ver_high[IXGBE_EXT_TOPO_DEV_IMG_COUNT]; + u32 ext_topo_dev_img_ver_low[IXGBE_EXT_TOPO_DEV_IMG_COUNT]; + u8 ext_topo_dev_img_part_num[IXGBE_EXT_TOPO_DEV_IMG_COUNT]; +#define IXGBE_EXT_TOPO_DEV_IMG_PART_NUM_S 8 +#define IXGBE_EXT_TOPO_DEV_IMG_PART_NUM_M GENMASK(15, 8) + bool ext_topo_dev_img_load_en[IXGBE_EXT_TOPO_DEV_IMG_COUNT]; +#define IXGBE_EXT_TOPO_DEV_IMG_LOAD_EN BIT(0) + bool ext_topo_dev_img_prog_en[IXGBE_EXT_TOPO_DEV_IMG_COUNT]; +#define IXGBE_EXT_TOPO_DEV_IMG_PROG_EN BIT(1) +} __packed; + +/* Function specific capabilities */ +struct ixgbe_hw_func_caps { + u32 num_allocd_vfs; /* Number of allocated VFs */ + u32 vf_base_id; /* Logical ID of the first VF */ + u32 guar_num_vsi; + struct ixgbe_hw_caps common_cap; + bool no_drop_policy_ena; +}; + +/* Device wide capabilities */ +struct ixgbe_hw_dev_caps { + struct ixgbe_hw_caps common_cap; + u32 num_vfs_exposed; /* Total number of VFs exposed */ + u32 num_vsi_allocd_to_host; /* Excluding EMP VSI */ + u32 num_flow_director_fltr; /* Number of FD filters available */ + u32 num_funcs; +}; + +/* ACI event information */ +struct ixgbe_aci_event { + struct ixgbe_aci_desc desc; + u8 *msg_buf; + u16 msg_len; + u16 buf_len; +}; + +struct ixgbe_aci_info { + struct mutex lock; /* admin command interface lock */ + enum ixgbe_aci_err last_status; /* last status of sent admin command */ +}; + +/* Option ROM version information */ +struct ixgbe_orom_info { + u8 major; /* Major version of OROM */ + u8 patch; /* Patch version of OROM */ + u16 build; /* Build version of OROM */ + u32 srev; /* Security revision */ +}; + +/* NVM version information */ +struct ixgbe_nvm_info { + u32 eetrack; + u32 srev; + u8 major; + u8 minor; +} __packed; + +/* netlist version information */ +struct ixgbe_netlist_info { + u32 major; /* major high/low */ + u32 minor; /* minor high/low */ + u32 type; /* type high/low */ + u32 rev; /* revision high/low */ + u32 hash; /* SHA-1 hash word */ + u16 cust_ver; /* customer version */ +} __packed; + +/* Enumeration of possible flash banks for the NVM, OROM, and Netlist modules + * of the flash image. + */ +enum ixgbe_flash_bank { + IXGBE_INVALID_FLASH_BANK, + IXGBE_1ST_FLASH_BANK, + IXGBE_2ND_FLASH_BANK, +}; + +/* information for accessing NVM, OROM, and Netlist flash banks */ +struct ixgbe_bank_info { + u32 nvm_ptr; /* Pointer to 1st NVM bank */ + u32 nvm_size; /* Size of NVM bank */ + u32 orom_ptr; /* Pointer to 1st OROM bank */ + u32 orom_size; /* Size of OROM bank */ + u32 netlist_ptr; /* Ptr to 1st Netlist bank */ + u32 netlist_size; /* Size of Netlist bank */ + enum ixgbe_flash_bank nvm_bank; /* Active NVM bank */ + enum ixgbe_flash_bank orom_bank; /* Active OROM bank */ + enum ixgbe_flash_bank netlist_bank; /* Active Netlist bank */ +}; + +/* Flash Chip Information */ +struct ixgbe_flash_info { + struct ixgbe_orom_info orom; /* Option ROM version info */ + u32 flash_size; /* Available flash size in bytes */ + struct ixgbe_nvm_info nvm; /* NVM version information */ + struct ixgbe_netlist_info netlist; /* Netlist version info */ + struct ixgbe_bank_info banks; /* Flash Bank information */ + u16 sr_words; /* Shadow RAM size in words */ + u8 blank_nvm_mode; /* is NVM empty (no FW present) */ +}; + +#endif /* _IXGBE_TYPE_E610_H_ */ From 7c3aa0fccb1944921f13a6e0084d3aa070b4ff7b Mon Sep 17 00:00:00 2001 From: Piotr Kwapulinski Date: Thu, 5 Dec 2024 09:44:44 +0100 Subject: [PATCH 0559/1386] ixgbe: Add support for E610 device capabilities detection Add low level support for E610 device capabilities detection. The capabilities are discovered via the Admin Command Interface. Discover the following capabilities: - function caps: vmdq, dcb, rss, rx/tx qs, msix, nvm, orom, reset - device caps: vsi, fdir, 1588 - phy caps Co-developed-by: Stefan Wegrzyn Signed-off-by: Stefan Wegrzyn Co-developed-by: Jedrzej Jagielski Signed-off-by: Jedrzej Jagielski Reviewed-by: Jan Sokolowski Reviewed-by: Simon Horman Tested-by: Bharath R Signed-off-by: Piotr Kwapulinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c | 529 ++++++++++++++++++ drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h | 12 + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 7 + 3 files changed, 548 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c index 6a26f6b4d3d5..9e15b9c11120 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c @@ -495,3 +495,532 @@ void ixgbe_release_res(struct ixgbe_hw *hw, enum ixgbe_aci_res_ids res) total_delay++; } } + +/** + * ixgbe_parse_e610_caps - Parse common device/function capabilities + * @hw: pointer to the HW struct + * @caps: pointer to common capabilities structure + * @elem: the capability element to parse + * @prefix: message prefix for tracing capabilities + * + * Given a capability element, extract relevant details into the common + * capability structure. + * + * Return: true if the capability matches one of the common capability ids, + * false otherwise. + */ +static bool ixgbe_parse_e610_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_caps *caps, + struct ixgbe_aci_cmd_list_caps_elem *elem, + const char *prefix) +{ + u32 logical_id = le32_to_cpu(elem->logical_id); + u32 phys_id = le32_to_cpu(elem->phys_id); + u32 number = le32_to_cpu(elem->number); + u16 cap = le16_to_cpu(elem->cap); + + switch (cap) { + case IXGBE_ACI_CAPS_VALID_FUNCTIONS: + caps->valid_functions = number; + break; + case IXGBE_ACI_CAPS_SRIOV: + caps->sr_iov_1_1 = (number == 1); + break; + case IXGBE_ACI_CAPS_VMDQ: + caps->vmdq = (number == 1); + break; + case IXGBE_ACI_CAPS_DCB: + caps->dcb = (number == 1); + caps->active_tc_bitmap = logical_id; + caps->maxtc = phys_id; + break; + case IXGBE_ACI_CAPS_RSS: + caps->rss_table_size = number; + caps->rss_table_entry_width = logical_id; + break; + case IXGBE_ACI_CAPS_RXQS: + caps->num_rxq = number; + caps->rxq_first_id = phys_id; + break; + case IXGBE_ACI_CAPS_TXQS: + caps->num_txq = number; + caps->txq_first_id = phys_id; + break; + case IXGBE_ACI_CAPS_MSIX: + caps->num_msix_vectors = number; + caps->msix_vector_first_id = phys_id; + break; + case IXGBE_ACI_CAPS_NVM_VER: + break; + case IXGBE_ACI_CAPS_MAX_MTU: + caps->max_mtu = number; + break; + case IXGBE_ACI_CAPS_PCIE_RESET_AVOIDANCE: + caps->pcie_reset_avoidance = (number > 0); + break; + case IXGBE_ACI_CAPS_POST_UPDATE_RESET_RESTRICT: + caps->reset_restrict_support = (number == 1); + break; + case IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG0: + case IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG1: + case IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG2: + case IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG3: + { + u8 index = cap - IXGBE_ACI_CAPS_EXT_TOPO_DEV_IMG0; + + caps->ext_topo_dev_img_ver_high[index] = number; + caps->ext_topo_dev_img_ver_low[index] = logical_id; + caps->ext_topo_dev_img_part_num[index] = + FIELD_GET(IXGBE_EXT_TOPO_DEV_IMG_PART_NUM_M, phys_id); + caps->ext_topo_dev_img_load_en[index] = + (phys_id & IXGBE_EXT_TOPO_DEV_IMG_LOAD_EN) != 0; + caps->ext_topo_dev_img_prog_en[index] = + (phys_id & IXGBE_EXT_TOPO_DEV_IMG_PROG_EN) != 0; + break; + } + default: + /* Not one of the recognized common capabilities */ + return false; + } + + return true; +} + +/** + * ixgbe_parse_valid_functions_cap - Parse IXGBE_ACI_CAPS_VALID_FUNCTIONS caps + * @hw: pointer to the HW struct + * @dev_p: pointer to device capabilities structure + * @cap: capability element to parse + * + * Parse IXGBE_ACI_CAPS_VALID_FUNCTIONS for device capabilities. + */ +static void +ixgbe_parse_valid_functions_cap(struct ixgbe_hw *hw, + struct ixgbe_hw_dev_caps *dev_p, + struct ixgbe_aci_cmd_list_caps_elem *cap) +{ + dev_p->num_funcs = hweight32(le32_to_cpu(cap->number)); +} + +/** + * ixgbe_parse_vf_dev_caps - Parse IXGBE_ACI_CAPS_VF device caps + * @hw: pointer to the HW struct + * @dev_p: pointer to device capabilities structure + * @cap: capability element to parse + * + * Parse IXGBE_ACI_CAPS_VF for device capabilities. + */ +static void ixgbe_parse_vf_dev_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_dev_caps *dev_p, + struct ixgbe_aci_cmd_list_caps_elem *cap) +{ + dev_p->num_vfs_exposed = le32_to_cpu(cap->number); +} + +/** + * ixgbe_parse_vsi_dev_caps - Parse IXGBE_ACI_CAPS_VSI device caps + * @hw: pointer to the HW struct + * @dev_p: pointer to device capabilities structure + * @cap: capability element to parse + * + * Parse IXGBE_ACI_CAPS_VSI for device capabilities. + */ +static void ixgbe_parse_vsi_dev_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_dev_caps *dev_p, + struct ixgbe_aci_cmd_list_caps_elem *cap) +{ + dev_p->num_vsi_allocd_to_host = le32_to_cpu(cap->number); +} + +/** + * ixgbe_parse_fdir_dev_caps - Parse IXGBE_ACI_CAPS_FD device caps + * @hw: pointer to the HW struct + * @dev_p: pointer to device capabilities structure + * @cap: capability element to parse + * + * Parse IXGBE_ACI_CAPS_FD for device capabilities. + */ +static void ixgbe_parse_fdir_dev_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_dev_caps *dev_p, + struct ixgbe_aci_cmd_list_caps_elem *cap) +{ + dev_p->num_flow_director_fltr = le32_to_cpu(cap->number); +} + +/** + * ixgbe_parse_dev_caps - Parse device capabilities + * @hw: pointer to the HW struct + * @dev_p: pointer to device capabilities structure + * @buf: buffer containing the device capability records + * @cap_count: the number of capabilities + * + * Helper device to parse device (0x000B) capabilities list. For + * capabilities shared between device and function, this relies on + * ixgbe_parse_e610_caps. + * + * Loop through the list of provided capabilities and extract the relevant + * data into the device capabilities structured. + */ +static void ixgbe_parse_dev_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_dev_caps *dev_p, + void *buf, u32 cap_count) +{ + struct ixgbe_aci_cmd_list_caps_elem *cap_resp; + u32 i; + + cap_resp = (struct ixgbe_aci_cmd_list_caps_elem *)buf; + + memset(dev_p, 0, sizeof(*dev_p)); + + for (i = 0; i < cap_count; i++) { + u16 cap = le16_to_cpu(cap_resp[i].cap); + + ixgbe_parse_e610_caps(hw, &dev_p->common_cap, &cap_resp[i], + "dev caps"); + + switch (cap) { + case IXGBE_ACI_CAPS_VALID_FUNCTIONS: + ixgbe_parse_valid_functions_cap(hw, dev_p, + &cap_resp[i]); + break; + case IXGBE_ACI_CAPS_VF: + ixgbe_parse_vf_dev_caps(hw, dev_p, &cap_resp[i]); + break; + case IXGBE_ACI_CAPS_VSI: + ixgbe_parse_vsi_dev_caps(hw, dev_p, &cap_resp[i]); + break; + case IXGBE_ACI_CAPS_FD: + ixgbe_parse_fdir_dev_caps(hw, dev_p, &cap_resp[i]); + break; + default: + /* Don't list common capabilities as unknown */ + break; + } + } +} + +/** + * ixgbe_parse_vf_func_caps - Parse IXGBE_ACI_CAPS_VF function caps + * @hw: pointer to the HW struct + * @func_p: pointer to function capabilities structure + * @cap: pointer to the capability element to parse + * + * Extract function capabilities for IXGBE_ACI_CAPS_VF. + */ +static void ixgbe_parse_vf_func_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_func_caps *func_p, + struct ixgbe_aci_cmd_list_caps_elem *cap) +{ + func_p->num_allocd_vfs = le32_to_cpu(cap->number); + func_p->vf_base_id = le32_to_cpu(cap->logical_id); +} + +/** + * ixgbe_get_num_per_func - determine number of resources per PF + * @hw: pointer to the HW structure + * @max: value to be evenly split between each PF + * + * Determine the number of valid functions by going through the bitmap returned + * from parsing capabilities and use this to calculate the number of resources + * per PF based on the max value passed in. + * + * Return: the number of resources per PF or 0, if no PH are available. + */ +static u32 ixgbe_get_num_per_func(struct ixgbe_hw *hw, u32 max) +{ +#define IXGBE_CAPS_VALID_FUNCS_M GENMASK(7, 0) + u8 funcs = hweight8(hw->dev_caps.common_cap.valid_functions & + IXGBE_CAPS_VALID_FUNCS_M); + + return funcs ? (max / funcs) : 0; +} + +/** + * ixgbe_parse_vsi_func_caps - Parse IXGBE_ACI_CAPS_VSI function caps + * @hw: pointer to the HW struct + * @func_p: pointer to function capabilities structure + * @cap: pointer to the capability element to parse + * + * Extract function capabilities for IXGBE_ACI_CAPS_VSI. + */ +static void ixgbe_parse_vsi_func_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_func_caps *func_p, + struct ixgbe_aci_cmd_list_caps_elem *cap) +{ + func_p->guar_num_vsi = ixgbe_get_num_per_func(hw, IXGBE_MAX_VSI); +} + +/** + * ixgbe_parse_func_caps - Parse function capabilities + * @hw: pointer to the HW struct + * @func_p: pointer to function capabilities structure + * @buf: buffer containing the function capability records + * @cap_count: the number of capabilities + * + * Helper function to parse function (0x000A) capabilities list. For + * capabilities shared between device and function, this relies on + * ixgbe_parse_e610_caps. + * + * Loop through the list of provided capabilities and extract the relevant + * data into the function capabilities structured. + */ +static void ixgbe_parse_func_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_func_caps *func_p, + void *buf, u32 cap_count) +{ + struct ixgbe_aci_cmd_list_caps_elem *cap_resp; + u32 i; + + cap_resp = (struct ixgbe_aci_cmd_list_caps_elem *)buf; + + memset(func_p, 0, sizeof(*func_p)); + + for (i = 0; i < cap_count; i++) { + u16 cap = le16_to_cpu(cap_resp[i].cap); + + ixgbe_parse_e610_caps(hw, &func_p->common_cap, + &cap_resp[i], "func caps"); + + switch (cap) { + case IXGBE_ACI_CAPS_VF: + ixgbe_parse_vf_func_caps(hw, func_p, &cap_resp[i]); + break; + case IXGBE_ACI_CAPS_VSI: + ixgbe_parse_vsi_func_caps(hw, func_p, &cap_resp[i]); + break; + default: + /* Don't list common capabilities as unknown */ + break; + } + } +} + +/** + * ixgbe_aci_list_caps - query function/device capabilities + * @hw: pointer to the HW struct + * @buf: a buffer to hold the capabilities + * @buf_size: size of the buffer + * @cap_count: if not NULL, set to the number of capabilities reported + * @opc: capabilities type to discover, device or function + * + * Get the function (0x000A) or device (0x000B) capabilities description from + * firmware and store it in the buffer. + * + * If the cap_count pointer is not NULL, then it is set to the number of + * capabilities firmware will report. Note that if the buffer size is too + * small, it is possible the command will return -ENOMEM. The + * cap_count will still be updated in this case. It is recommended that the + * buffer size be set to IXGBE_ACI_MAX_BUFFER_SIZE (the largest possible + * buffer that firmware could return) to avoid this. + * + * Return: the exit code of the operation. + * Exit code of -ENOMEM means the buffer size is too small. + */ +int ixgbe_aci_list_caps(struct ixgbe_hw *hw, void *buf, u16 buf_size, + u32 *cap_count, enum ixgbe_aci_opc opc) +{ + struct ixgbe_aci_cmd_list_caps *cmd; + struct ixgbe_aci_desc desc; + int err; + + cmd = &desc.params.get_cap; + + if (opc != ixgbe_aci_opc_list_func_caps && + opc != ixgbe_aci_opc_list_dev_caps) + return -EINVAL; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, opc); + err = ixgbe_aci_send_cmd(hw, &desc, buf, buf_size); + + if (cap_count) + *cap_count = le32_to_cpu(cmd->count); + + return err; +} + +/** + * ixgbe_discover_dev_caps - Read and extract device capabilities + * @hw: pointer to the hardware structure + * @dev_caps: pointer to device capabilities structure + * + * Read the device capabilities and extract them into the dev_caps structure + * for later use. + * + * Return: the exit code of the operation. + */ +int ixgbe_discover_dev_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_dev_caps *dev_caps) +{ + u32 cap_count; + u8 *cbuf; + int err; + + cbuf = kzalloc(IXGBE_ACI_MAX_BUFFER_SIZE, GFP_KERNEL); + if (!cbuf) + return -ENOMEM; + + /* Although the driver doesn't know the number of capabilities the + * device will return, we can simply send a 4KB buffer, the maximum + * possible size that firmware can return. + */ + cap_count = IXGBE_ACI_MAX_BUFFER_SIZE / + sizeof(struct ixgbe_aci_cmd_list_caps_elem); + + err = ixgbe_aci_list_caps(hw, cbuf, IXGBE_ACI_MAX_BUFFER_SIZE, + &cap_count, + ixgbe_aci_opc_list_dev_caps); + if (!err) + ixgbe_parse_dev_caps(hw, dev_caps, cbuf, cap_count); + + kfree(cbuf); + + return 0; +} + +/** + * ixgbe_discover_func_caps - Read and extract function capabilities + * @hw: pointer to the hardware structure + * @func_caps: pointer to function capabilities structure + * + * Read the function capabilities and extract them into the func_caps structure + * for later use. + * + * Return: the exit code of the operation. + */ +int ixgbe_discover_func_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_func_caps *func_caps) +{ + u32 cap_count; + u8 *cbuf; + int err; + + cbuf = kzalloc(IXGBE_ACI_MAX_BUFFER_SIZE, GFP_KERNEL); + if (!cbuf) + return -ENOMEM; + + /* Although the driver doesn't know the number of capabilities the + * device will return, we can simply send a 4KB buffer, the maximum + * possible size that firmware can return. + */ + cap_count = IXGBE_ACI_MAX_BUFFER_SIZE / + sizeof(struct ixgbe_aci_cmd_list_caps_elem); + + err = ixgbe_aci_list_caps(hw, cbuf, IXGBE_ACI_MAX_BUFFER_SIZE, + &cap_count, + ixgbe_aci_opc_list_func_caps); + if (!err) + ixgbe_parse_func_caps(hw, func_caps, cbuf, cap_count); + + kfree(cbuf); + + return 0; +} + +/** + * ixgbe_get_caps - get info about the HW + * @hw: pointer to the hardware structure + * + * Retrieve both device and function capabilities. + * + * Return: the exit code of the operation. + */ +int ixgbe_get_caps(struct ixgbe_hw *hw) +{ + int err; + + err = ixgbe_discover_dev_caps(hw, &hw->dev_caps); + if (err) + return err; + + return ixgbe_discover_func_caps(hw, &hw->func_caps); +} + +/** + * ixgbe_aci_disable_rxen - disable RX + * @hw: pointer to the HW struct + * + * Request a safe disable of Receive Enable using ACI command (0x000C). + * + * Return: the exit code of the operation. + */ +int ixgbe_aci_disable_rxen(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_disable_rxen *cmd; + struct ixgbe_aci_desc desc; + + cmd = &desc.params.disable_rxen; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_disable_rxen); + + cmd->lport_num = hw->bus.func; + + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** + * ixgbe_aci_get_phy_caps - returns PHY capabilities + * @hw: pointer to the HW struct + * @qual_mods: report qualified modules + * @report_mode: report mode capabilities + * @pcaps: structure for PHY capabilities to be filled + * + * Returns the various PHY capabilities supported on the Port + * using ACI command (0x0600). + * + * Return: the exit code of the operation. + */ +int ixgbe_aci_get_phy_caps(struct ixgbe_hw *hw, bool qual_mods, u8 report_mode, + struct ixgbe_aci_cmd_get_phy_caps_data *pcaps) +{ + struct ixgbe_aci_cmd_get_phy_caps *cmd; + u16 pcaps_size = sizeof(*pcaps); + struct ixgbe_aci_desc desc; + int err; + + cmd = &desc.params.get_phy; + + if (!pcaps || (report_mode & ~IXGBE_ACI_REPORT_MODE_M)) + return -EINVAL; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_phy_caps); + + if (qual_mods) + cmd->param0 |= cpu_to_le16(IXGBE_ACI_GET_PHY_RQM); + + cmd->param0 |= cpu_to_le16(report_mode); + err = ixgbe_aci_send_cmd(hw, &desc, pcaps, pcaps_size); + if (!err && report_mode == IXGBE_ACI_REPORT_TOPO_CAP_MEDIA) { + hw->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low); + hw->phy.phy_type_high = le64_to_cpu(pcaps->phy_type_high); + memcpy(hw->link.link_info.module_type, &pcaps->module_type, + sizeof(hw->link.link_info.module_type)); + } + + return err; +} + +/** + * ixgbe_copy_phy_caps_to_cfg - Copy PHY ability data to configuration data + * @caps: PHY ability structure to copy data from + * @cfg: PHY configuration structure to copy data to + * + * Helper function to copy data from PHY capabilities data structure + * to PHY configuration data structure + */ +void ixgbe_copy_phy_caps_to_cfg(struct ixgbe_aci_cmd_get_phy_caps_data *caps, + struct ixgbe_aci_cmd_set_phy_cfg_data *cfg) +{ + if (!caps || !cfg) + return; + + memset(cfg, 0, sizeof(*cfg)); + cfg->phy_type_low = caps->phy_type_low; + cfg->phy_type_high = caps->phy_type_high; + cfg->caps = caps->caps; + cfg->low_power_ctrl_an = caps->low_power_ctrl_an; + cfg->eee_cap = caps->eee_cap; + cfg->eeer_value = caps->eeer_value; + cfg->link_fec_opt = caps->link_fec_options; + cfg->module_compliance_enforcement = + caps->module_compliance_enforcement; +} diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h index 18b831b6797d..5c5a6769b566 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h @@ -15,5 +15,17 @@ void ixgbe_fill_dflt_direct_cmd_desc(struct ixgbe_aci_desc *desc, u16 opcode); int ixgbe_acquire_res(struct ixgbe_hw *hw, enum ixgbe_aci_res_ids res, enum ixgbe_aci_res_access_type access, u32 timeout); void ixgbe_release_res(struct ixgbe_hw *hw, enum ixgbe_aci_res_ids res); +int ixgbe_aci_list_caps(struct ixgbe_hw *hw, void *buf, u16 buf_size, + u32 *cap_count, enum ixgbe_aci_opc opc); +int ixgbe_discover_dev_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_dev_caps *dev_caps); +int ixgbe_discover_func_caps(struct ixgbe_hw *hw, + struct ixgbe_hw_func_caps *func_caps); +int ixgbe_get_caps(struct ixgbe_hw *hw); +int ixgbe_aci_disable_rxen(struct ixgbe_hw *hw); +int ixgbe_aci_get_phy_caps(struct ixgbe_hw *hw, bool qual_mods, u8 report_mode, + struct ixgbe_aci_cmd_get_phy_caps_data *pcaps); +void ixgbe_copy_phy_caps_to_cfg(struct ixgbe_aci_cmd_get_phy_caps_data *caps, + struct ixgbe_aci_cmd_set_phy_cfg_data *cfg); #endif /* _IXGBE_E610_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 2e38e8f6fac1..13b777d702a2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -42,6 +42,7 @@ #include "ixgbe.h" #include "ixgbe_common.h" +#include "ixgbe_e610.h" #include "ixgbe_dcb_82599.h" #include "ixgbe_mbx.h" #include "ixgbe_phy.h" @@ -10933,6 +10934,12 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_sw_init; + if (adapter->hw.mac.type == ixgbe_mac_e610) { + err = ixgbe_get_caps(&adapter->hw); + if (err) + dev_err(&pdev->dev, "ixgbe_get_caps failed %d\n", err); + } + if (adapter->hw.mac.type == ixgbe_mac_82599EB) adapter->flags2 |= IXGBE_FLAG2_AUTO_DISABLE_VF; From 23c0e5a16bccd120caf1e7a9bfdf002fea107fa8 Mon Sep 17 00:00:00 2001 From: Piotr Kwapulinski Date: Thu, 5 Dec 2024 09:44:45 +0100 Subject: [PATCH 0560/1386] ixgbe: Add link management support for E610 device Add low level link management support for E610 device. Link management operations are handled via the Admin Command Interface. Add the following link management operations: - get link capabilities - set up link - get media type - get link status, link status events - link power management Co-developed-by: Stefan Wegrzyn Signed-off-by: Stefan Wegrzyn Co-developed-by: Jedrzej Jagielski Signed-off-by: Jedrzej Jagielski Reviewed-by: Jan Glaza Tested-by: Bharath R Signed-off-by: Piotr Kwapulinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c | 1081 +++++++++++++++++ drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h | 32 + .../ethernet/intel/ixgbe/ixgbe_type_e610.h | 1 + 3 files changed, 1114 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c index 9e15b9c11120..0b578c4006b1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c @@ -1024,3 +1024,1084 @@ void ixgbe_copy_phy_caps_to_cfg(struct ixgbe_aci_cmd_get_phy_caps_data *caps, cfg->module_compliance_enforcement = caps->module_compliance_enforcement; } + +/** + * ixgbe_aci_set_phy_cfg - set PHY configuration + * @hw: pointer to the HW struct + * @cfg: structure with PHY configuration data to be set + * + * Set the various PHY configuration parameters supported on the Port + * using ACI command (0x0601). + * One or more of the Set PHY config parameters may be ignored in an MFP + * mode as the PF may not have the privilege to set some of the PHY Config + * parameters. + * + * Return: the exit code of the operation. + */ +int ixgbe_aci_set_phy_cfg(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_set_phy_cfg_data *cfg) +{ + struct ixgbe_aci_desc desc; + int err; + + if (!cfg) + return -EINVAL; + + /* Ensure that only valid bits of cfg->caps can be turned on. */ + cfg->caps &= IXGBE_ACI_PHY_ENA_VALID_MASK; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_set_phy_cfg); + desc.params.set_phy.lport_num = hw->bus.func; + desc.flags |= cpu_to_le16(IXGBE_ACI_FLAG_RD); + + err = ixgbe_aci_send_cmd(hw, &desc, cfg, sizeof(*cfg)); + if (!err) + hw->phy.curr_user_phy_cfg = *cfg; + + return err; +} + +/** + * ixgbe_aci_set_link_restart_an - set up link and restart AN + * @hw: pointer to the HW struct + * @ena_link: if true: enable link, if false: disable link + * + * Function sets up the link and restarts the Auto-Negotiation over the link. + * + * Return: the exit code of the operation. + */ +int ixgbe_aci_set_link_restart_an(struct ixgbe_hw *hw, bool ena_link) +{ + struct ixgbe_aci_cmd_restart_an *cmd; + struct ixgbe_aci_desc desc; + + cmd = &desc.params.restart_an; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_restart_an); + + cmd->cmd_flags = IXGBE_ACI_RESTART_AN_LINK_RESTART; + cmd->lport_num = hw->bus.func; + if (ena_link) + cmd->cmd_flags |= IXGBE_ACI_RESTART_AN_LINK_ENABLE; + else + cmd->cmd_flags &= ~IXGBE_ACI_RESTART_AN_LINK_ENABLE; + + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** + * ixgbe_is_media_cage_present - check if media cage is present + * @hw: pointer to the HW struct + * + * Identify presence of media cage using the ACI command (0x06E0). + * + * Return: true if media cage is present, else false. If no cage, then + * media type is backplane or BASE-T. + */ +static bool ixgbe_is_media_cage_present(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_link_topo *cmd; + struct ixgbe_aci_desc desc; + + cmd = &desc.params.get_link_topo; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_link_topo); + + cmd->addr.topo_params.node_type_ctx = + FIELD_PREP(IXGBE_ACI_LINK_TOPO_NODE_CTX_M, + IXGBE_ACI_LINK_TOPO_NODE_CTX_PORT); + + /* Set node type. */ + cmd->addr.topo_params.node_type_ctx |= + FIELD_PREP(IXGBE_ACI_LINK_TOPO_NODE_TYPE_M, + IXGBE_ACI_LINK_TOPO_NODE_TYPE_CAGE); + + /* Node type cage can be used to determine if cage is present. If AQC + * returns error (ENOENT), then no cage present. If no cage present then + * connection type is backplane or BASE-T. + */ + return ixgbe_aci_get_netlist_node(hw, cmd, NULL, NULL); +} + +/** + * ixgbe_get_media_type_from_phy_type - Gets media type based on phy type + * @hw: pointer to the HW struct + * + * Try to identify the media type based on the phy type. + * If more than one media type, the ixgbe_media_type_unknown is returned. + * First, phy_type_low is checked, then phy_type_high. + * If none are identified, the ixgbe_media_type_unknown is returned + * + * Return: type of a media based on phy type in form of enum. + */ +static enum ixgbe_media_type +ixgbe_get_media_type_from_phy_type(struct ixgbe_hw *hw) +{ + struct ixgbe_link_status *hw_link_info; + + if (!hw) + return ixgbe_media_type_unknown; + + hw_link_info = &hw->link.link_info; + if (hw_link_info->phy_type_low && hw_link_info->phy_type_high) + /* If more than one media type is selected, report unknown */ + return ixgbe_media_type_unknown; + + if (hw_link_info->phy_type_low) { + /* 1G SGMII is a special case where some DA cable PHYs + * may show this as an option when it really shouldn't + * be since SGMII is meant to be between a MAC and a PHY + * in a backplane. Try to detect this case and handle it + */ + if (hw_link_info->phy_type_low == IXGBE_PHY_TYPE_LOW_1G_SGMII && + (hw_link_info->module_type[IXGBE_ACI_MOD_TYPE_IDENT] == + IXGBE_ACI_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE || + hw_link_info->module_type[IXGBE_ACI_MOD_TYPE_IDENT] == + IXGBE_ACI_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE)) + return ixgbe_media_type_da; + + switch (hw_link_info->phy_type_low) { + case IXGBE_PHY_TYPE_LOW_1000BASE_SX: + case IXGBE_PHY_TYPE_LOW_1000BASE_LX: + case IXGBE_PHY_TYPE_LOW_10GBASE_SR: + case IXGBE_PHY_TYPE_LOW_10GBASE_LR: + case IXGBE_PHY_TYPE_LOW_25GBASE_SR: + case IXGBE_PHY_TYPE_LOW_25GBASE_LR: + return ixgbe_media_type_fiber; + case IXGBE_PHY_TYPE_LOW_10G_SFI_AOC_ACC: + case IXGBE_PHY_TYPE_LOW_25G_AUI_AOC_ACC: + return ixgbe_media_type_fiber; + case IXGBE_PHY_TYPE_LOW_100BASE_TX: + case IXGBE_PHY_TYPE_LOW_1000BASE_T: + case IXGBE_PHY_TYPE_LOW_2500BASE_T: + case IXGBE_PHY_TYPE_LOW_5GBASE_T: + case IXGBE_PHY_TYPE_LOW_10GBASE_T: + case IXGBE_PHY_TYPE_LOW_25GBASE_T: + return ixgbe_media_type_copper; + case IXGBE_PHY_TYPE_LOW_10G_SFI_DA: + case IXGBE_PHY_TYPE_LOW_25GBASE_CR: + case IXGBE_PHY_TYPE_LOW_25GBASE_CR_S: + case IXGBE_PHY_TYPE_LOW_25GBASE_CR1: + return ixgbe_media_type_da; + case IXGBE_PHY_TYPE_LOW_25G_AUI_C2C: + if (ixgbe_is_media_cage_present(hw)) + return ixgbe_media_type_aui; + fallthrough; + case IXGBE_PHY_TYPE_LOW_1000BASE_KX: + case IXGBE_PHY_TYPE_LOW_2500BASE_KX: + case IXGBE_PHY_TYPE_LOW_2500BASE_X: + case IXGBE_PHY_TYPE_LOW_5GBASE_KR: + case IXGBE_PHY_TYPE_LOW_10GBASE_KR_CR1: + case IXGBE_PHY_TYPE_LOW_10G_SFI_C2C: + case IXGBE_PHY_TYPE_LOW_25GBASE_KR: + case IXGBE_PHY_TYPE_LOW_25GBASE_KR1: + case IXGBE_PHY_TYPE_LOW_25GBASE_KR_S: + return ixgbe_media_type_backplane; + } + } else { + switch (hw_link_info->phy_type_high) { + case IXGBE_PHY_TYPE_HIGH_10BASE_T: + return ixgbe_media_type_copper; + } + } + return ixgbe_media_type_unknown; +} + +/** + * ixgbe_update_link_info - update status of the HW network link + * @hw: pointer to the HW struct + * + * Update the status of the HW network link. + * + * Return: the exit code of the operation. + */ +int ixgbe_update_link_info(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_phy_caps_data *pcaps; + struct ixgbe_link_status *li; + int err; + + if (!hw) + return -EINVAL; + + li = &hw->link.link_info; + + err = ixgbe_aci_get_link_info(hw, true, NULL); + if (err) + return err; + + if (!(li->link_info & IXGBE_ACI_MEDIA_AVAILABLE)) + return 0; + + pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL); + if (!pcaps) + return -ENOMEM; + + err = ixgbe_aci_get_phy_caps(hw, false, IXGBE_ACI_REPORT_TOPO_CAP_MEDIA, + pcaps); + + if (!err) + memcpy(li->module_type, &pcaps->module_type, + sizeof(li->module_type)); + + kfree(pcaps); + + return err; +} + +/** + * ixgbe_get_link_status - get status of the HW network link + * @hw: pointer to the HW struct + * @link_up: pointer to bool (true/false = linkup/linkdown) + * + * Variable link_up is true if link is up, false if link is down. + * The variable link_up is invalid if status is non zero. As a + * result of this call, link status reporting becomes enabled + * + * Return: the exit code of the operation. + */ +int ixgbe_get_link_status(struct ixgbe_hw *hw, bool *link_up) +{ + if (!hw || !link_up) + return -EINVAL; + + if (hw->link.get_link_info) { + int err = ixgbe_update_link_info(hw); + + if (err) + return err; + } + + *link_up = hw->link.link_info.link_info & IXGBE_ACI_LINK_UP; + + return 0; +} + +/** + * ixgbe_aci_get_link_info - get the link status + * @hw: pointer to the HW struct + * @ena_lse: enable/disable LinkStatusEvent reporting + * @link: pointer to link status structure - optional + * + * Get the current Link Status using ACI command (0x607). + * The current link can be optionally provided to update + * the status. + * + * Return: the link status of the adapter. + */ +int ixgbe_aci_get_link_info(struct ixgbe_hw *hw, bool ena_lse, + struct ixgbe_link_status *link) +{ + struct ixgbe_aci_cmd_get_link_status_data link_data = {}; + struct ixgbe_aci_cmd_get_link_status *resp; + struct ixgbe_link_status *li_old, *li; + struct ixgbe_fc_info *hw_fc_info; + struct ixgbe_aci_desc desc; + bool tx_pause, rx_pause; + u8 cmd_flags; + int err; + + if (!hw) + return -EINVAL; + + li_old = &hw->link.link_info_old; + li = &hw->link.link_info; + hw_fc_info = &hw->fc; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_link_status); + cmd_flags = (ena_lse) ? IXGBE_ACI_LSE_ENA : IXGBE_ACI_LSE_DIS; + resp = &desc.params.get_link_status; + resp->cmd_flags = cpu_to_le16(cmd_flags); + resp->lport_num = hw->bus.func; + + err = ixgbe_aci_send_cmd(hw, &desc, &link_data, sizeof(link_data)); + if (err) + return err; + + /* Save off old link status information. */ + *li_old = *li; + + /* Update current link status information. */ + li->link_speed = le16_to_cpu(link_data.link_speed); + li->phy_type_low = le64_to_cpu(link_data.phy_type_low); + li->phy_type_high = le64_to_cpu(link_data.phy_type_high); + li->link_info = link_data.link_info; + li->link_cfg_err = link_data.link_cfg_err; + li->an_info = link_data.an_info; + li->ext_info = link_data.ext_info; + li->max_frame_size = le16_to_cpu(link_data.max_frame_size); + li->fec_info = link_data.cfg & IXGBE_ACI_FEC_MASK; + li->topo_media_conflict = link_data.topo_media_conflict; + li->pacing = link_data.cfg & (IXGBE_ACI_CFG_PACING_M | + IXGBE_ACI_CFG_PACING_TYPE_M); + + /* Update fc info. */ + tx_pause = !!(link_data.an_info & IXGBE_ACI_LINK_PAUSE_TX); + rx_pause = !!(link_data.an_info & IXGBE_ACI_LINK_PAUSE_RX); + if (tx_pause && rx_pause) + hw_fc_info->current_mode = ixgbe_fc_full; + else if (tx_pause) + hw_fc_info->current_mode = ixgbe_fc_tx_pause; + else if (rx_pause) + hw_fc_info->current_mode = ixgbe_fc_rx_pause; + else + hw_fc_info->current_mode = ixgbe_fc_none; + + li->lse_ena = !!(le16_to_cpu(resp->cmd_flags) & + IXGBE_ACI_LSE_IS_ENABLED); + + /* Save link status information. */ + if (link) + *link = *li; + + /* Flag cleared so calling functions don't call AQ again. */ + hw->link.get_link_info = false; + + return 0; +} + +/** + * ixgbe_aci_set_event_mask - set event mask + * @hw: pointer to the HW struct + * @port_num: port number of the physical function + * @mask: event mask to be set + * + * Set the event mask using ACI command (0x0613). + * + * Return: the exit code of the operation. + */ +int ixgbe_aci_set_event_mask(struct ixgbe_hw *hw, u8 port_num, u16 mask) +{ + struct ixgbe_aci_cmd_set_event_mask *cmd; + struct ixgbe_aci_desc desc; + + cmd = &desc.params.set_event_mask; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_set_event_mask); + + cmd->lport_num = port_num; + + cmd->event_mask = cpu_to_le16(mask); + return ixgbe_aci_send_cmd(hw, &desc, NULL, 0); +} + +/** + * ixgbe_configure_lse - enable/disable link status events + * @hw: pointer to the HW struct + * @activate: true for enable lse, false otherwise + * @mask: event mask to be set; a set bit means deactivation of the + * corresponding event + * + * Set the event mask and then enable or disable link status events + * + * Return: the exit code of the operation. + */ +int ixgbe_configure_lse(struct ixgbe_hw *hw, bool activate, u16 mask) +{ + int err; + + err = ixgbe_aci_set_event_mask(hw, (u8)hw->bus.func, mask); + if (err) + return err; + + /* Enabling link status events generation by fw. */ + return ixgbe_aci_get_link_info(hw, activate, NULL); +} + +/** + * ixgbe_get_media_type_e610 - Gets media type + * @hw: pointer to the HW struct + * + * In order to get the media type, the function gets PHY + * capabilities and later on use them to identify the PHY type + * checking phy_type_high and phy_type_low. + * + * Return: the type of media in form of ixgbe_media_type enum + * or ixgbe_media_type_unknown in case of an error. + */ +enum ixgbe_media_type ixgbe_get_media_type_e610(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_phy_caps_data pcaps; + int rc; + + rc = ixgbe_update_link_info(hw); + if (rc) + return ixgbe_media_type_unknown; + + /* If there is no link but PHY (dongle) is available SW should use + * Get PHY Caps admin command instead of Get Link Status, find most + * significant bit that is set in PHY types reported by the command + * and use it to discover media type. + */ + if (!(hw->link.link_info.link_info & IXGBE_ACI_LINK_UP) && + (hw->link.link_info.link_info & IXGBE_ACI_MEDIA_AVAILABLE)) { + int highest_bit; + + /* Get PHY Capabilities */ + rc = ixgbe_aci_get_phy_caps(hw, false, + IXGBE_ACI_REPORT_TOPO_CAP_MEDIA, + &pcaps); + if (rc) + return ixgbe_media_type_unknown; + + highest_bit = fls64(le64_to_cpu(pcaps.phy_type_high)); + if (highest_bit) { + hw->link.link_info.phy_type_high = + BIT_ULL(highest_bit - 1); + hw->link.link_info.phy_type_low = 0; + } else { + highest_bit = fls64(le64_to_cpu(pcaps.phy_type_low)); + if (highest_bit) + hw->link.link_info.phy_type_low = + BIT_ULL(highest_bit - 1); + } + } + + /* Based on link status or search above try to discover media type. */ + hw->phy.media_type = ixgbe_get_media_type_from_phy_type(hw); + + return hw->phy.media_type; +} + +/** + * ixgbe_setup_link_e610 - Set up link + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg_wait: true when waiting for completion is needed + * + * Set up the link with the specified speed. + * + * Return: the exit code of the operation. + */ +int ixgbe_setup_link_e610(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg_wait) +{ + /* Simply request FW to perform proper PHY setup */ + return hw->phy.ops.setup_link_speed(hw, speed, autoneg_wait); +} + +/** + * ixgbe_check_link_e610 - Determine link and speed status + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @link_up: true when link is up + * @link_up_wait_to_complete: bool used to wait for link up or not + * + * Determine if the link is up and the current link speed + * using ACI command (0x0607). + * + * Return: the exit code of the operation. + */ +int ixgbe_check_link_e610(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up, bool link_up_wait_to_complete) +{ + int err; + u32 i; + + if (!speed || !link_up) + return -EINVAL; + + /* Set get_link_info flag to ensure that fresh + * link information will be obtained from FW + * by sending Get Link Status admin command. + */ + hw->link.get_link_info = true; + + /* Update link information in adapter context. */ + err = ixgbe_get_link_status(hw, link_up); + if (err) + return err; + + /* Wait for link up if it was requested. */ + if (link_up_wait_to_complete && !(*link_up)) { + for (i = 0; i < hw->mac.max_link_up_time; i++) { + msleep(100); + hw->link.get_link_info = true; + err = ixgbe_get_link_status(hw, link_up); + if (err) + return err; + if (*link_up) + break; + } + } + + /* Use link information in adapter context updated by the call + * to ixgbe_get_link_status() to determine current link speed. + * Link speed information is valid only when link up was + * reported by FW. + */ + if (*link_up) { + switch (hw->link.link_info.link_speed) { + case IXGBE_ACI_LINK_SPEED_10MB: + *speed = IXGBE_LINK_SPEED_10_FULL; + break; + case IXGBE_ACI_LINK_SPEED_100MB: + *speed = IXGBE_LINK_SPEED_100_FULL; + break; + case IXGBE_ACI_LINK_SPEED_1000MB: + *speed = IXGBE_LINK_SPEED_1GB_FULL; + break; + case IXGBE_ACI_LINK_SPEED_2500MB: + *speed = IXGBE_LINK_SPEED_2_5GB_FULL; + break; + case IXGBE_ACI_LINK_SPEED_5GB: + *speed = IXGBE_LINK_SPEED_5GB_FULL; + break; + case IXGBE_ACI_LINK_SPEED_10GB: + *speed = IXGBE_LINK_SPEED_10GB_FULL; + break; + default: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + break; + } + } else { + *speed = IXGBE_LINK_SPEED_UNKNOWN; + } + + return 0; +} + +/** + * ixgbe_get_link_capabilities_e610 - Determine link capabilities + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @autoneg: true when autoneg or autotry is enabled + * + * Determine speed and AN parameters of a link. + * + * Return: the exit code of the operation. + */ +int ixgbe_get_link_capabilities_e610(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *autoneg) +{ + if (!speed || !autoneg) + return -EINVAL; + + *autoneg = true; + *speed = hw->phy.speeds_supported; + + return 0; +} + +/** + * ixgbe_cfg_phy_fc - Configure PHY Flow Control (FC) data based on FC mode + * @hw: pointer to hardware structure + * @cfg: PHY configuration data to set FC mode + * @req_mode: FC mode to configure + * + * Configures PHY Flow Control according to the provided configuration. + * + * Return: the exit code of the operation. + */ +int ixgbe_cfg_phy_fc(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_set_phy_cfg_data *cfg, + enum ixgbe_fc_mode req_mode) +{ + u8 pause_mask = 0x0; + + if (!cfg) + return -EINVAL; + + switch (req_mode) { + case ixgbe_fc_full: + pause_mask |= IXGBE_ACI_PHY_EN_TX_LINK_PAUSE; + pause_mask |= IXGBE_ACI_PHY_EN_RX_LINK_PAUSE; + break; + case ixgbe_fc_rx_pause: + pause_mask |= IXGBE_ACI_PHY_EN_RX_LINK_PAUSE; + break; + case ixgbe_fc_tx_pause: + pause_mask |= IXGBE_ACI_PHY_EN_TX_LINK_PAUSE; + break; + default: + break; + } + + /* Clear the old pause settings. */ + cfg->caps &= ~(IXGBE_ACI_PHY_EN_TX_LINK_PAUSE | + IXGBE_ACI_PHY_EN_RX_LINK_PAUSE); + + /* Set the new capabilities. */ + cfg->caps |= pause_mask; + + return 0; +} + +/** + * ixgbe_setup_fc_e610 - Set up flow control + * @hw: pointer to hardware structure + * + * Set up flow control. This has to be done during init time. + * + * Return: the exit code of the operation. + */ +int ixgbe_setup_fc_e610(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_phy_caps_data pcaps = {}; + struct ixgbe_aci_cmd_set_phy_cfg_data cfg = {}; + int err; + + /* Get the current PHY config */ + err = ixgbe_aci_get_phy_caps(hw, false, + IXGBE_ACI_REPORT_ACTIVE_CFG, &pcaps); + if (err) + return err; + + ixgbe_copy_phy_caps_to_cfg(&pcaps, &cfg); + + /* Configure the set PHY data */ + err = ixgbe_cfg_phy_fc(hw, &cfg, hw->fc.requested_mode); + if (err) + return err; + + /* If the capabilities have changed, then set the new config */ + if (cfg.caps != pcaps.caps) { + cfg.caps |= IXGBE_ACI_PHY_ENA_AUTO_LINK_UPDT; + + err = ixgbe_aci_set_phy_cfg(hw, &cfg); + if (err) + return err; + } + + return err; +} + +/** + * ixgbe_fc_autoneg_e610 - Configure flow control + * @hw: pointer to hardware structure + * + * Configure Flow Control. + */ +void ixgbe_fc_autoneg_e610(struct ixgbe_hw *hw) +{ + int err; + + /* Get current link err. + * Current FC mode will be stored in the hw context. + */ + err = ixgbe_aci_get_link_info(hw, false, NULL); + if (err) + goto no_autoneg; + + /* Check if the link is up */ + if (!(hw->link.link_info.link_info & IXGBE_ACI_LINK_UP)) + goto no_autoneg; + + /* Check if auto-negotiation has completed */ + if (!(hw->link.link_info.an_info & IXGBE_ACI_AN_COMPLETED)) + goto no_autoneg; + + hw->fc.fc_was_autonegged = true; + return; + +no_autoneg: + hw->fc.fc_was_autonegged = false; + hw->fc.current_mode = hw->fc.requested_mode; +} + +/** + * ixgbe_disable_rx_e610 - Disable RX unit + * @hw: pointer to hardware structure + * + * Disable RX DMA unit on E610 with use of ACI command (0x000C). + * + * Return: the exit code of the operation. + */ +void ixgbe_disable_rx_e610(struct ixgbe_hw *hw) +{ + u32 rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + u32 pfdtxgswc; + int err; + + if (!(rxctrl & IXGBE_RXCTRL_RXEN)) + return; + + pfdtxgswc = IXGBE_READ_REG(hw, IXGBE_PFDTXGSWC); + if (pfdtxgswc & IXGBE_PFDTXGSWC_VT_LBEN) { + pfdtxgswc &= ~IXGBE_PFDTXGSWC_VT_LBEN; + IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, pfdtxgswc); + hw->mac.set_lben = true; + } else { + hw->mac.set_lben = false; + } + + err = ixgbe_aci_disable_rxen(hw); + + /* If we fail - disable RX using register write */ + if (err) { + rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + if (rxctrl & IXGBE_RXCTRL_RXEN) { + rxctrl &= ~IXGBE_RXCTRL_RXEN; + IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl); + } + } +} + +/** + * ixgbe_init_phy_ops_e610 - PHY specific init + * @hw: pointer to hardware structure + * + * Initialize any function pointers that were not able to be + * set during init_shared_code because the PHY type was not known. + * + * Return: the exit code of the operation. + */ +int ixgbe_init_phy_ops_e610(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_phy_info *phy = &hw->phy; + + if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) + phy->ops.set_phy_power = ixgbe_set_phy_power_e610; + else + phy->ops.set_phy_power = NULL; + + /* Identify the PHY */ + return phy->ops.identify(hw); +} + +/** + * ixgbe_identify_phy_e610 - Identify PHY + * @hw: pointer to hardware structure + * + * Determine PHY type, supported speeds and PHY ID. + * + * Return: the exit code of the operation. + */ +int ixgbe_identify_phy_e610(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_phy_caps_data pcaps; + u64 phy_type_low, phy_type_high; + int err; + + /* Set PHY type */ + hw->phy.type = ixgbe_phy_fw; + + err = ixgbe_aci_get_phy_caps(hw, false, + IXGBE_ACI_REPORT_TOPO_CAP_MEDIA, &pcaps); + if (err) + return err; + + if (!(pcaps.module_compliance_enforcement & + IXGBE_ACI_MOD_ENFORCE_STRICT_MODE)) { + /* Handle lenient mode */ + err = ixgbe_aci_get_phy_caps(hw, false, + IXGBE_ACI_REPORT_TOPO_CAP_NO_MEDIA, + &pcaps); + if (err) + return err; + } + + /* Determine supported speeds */ + hw->phy.speeds_supported = IXGBE_LINK_SPEED_UNKNOWN; + phy_type_high = le64_to_cpu(pcaps.phy_type_high); + phy_type_low = le64_to_cpu(pcaps.phy_type_low); + + if (phy_type_high & IXGBE_PHY_TYPE_HIGH_10BASE_T || + phy_type_high & IXGBE_PHY_TYPE_HIGH_10M_SGMII) + hw->phy.speeds_supported |= IXGBE_LINK_SPEED_10_FULL; + if (phy_type_low & IXGBE_PHY_TYPE_LOW_100BASE_TX || + phy_type_low & IXGBE_PHY_TYPE_LOW_100M_SGMII || + phy_type_high & IXGBE_PHY_TYPE_HIGH_100M_USXGMII) + hw->phy.speeds_supported |= IXGBE_LINK_SPEED_100_FULL; + if (phy_type_low & IXGBE_PHY_TYPE_LOW_1000BASE_T || + phy_type_low & IXGBE_PHY_TYPE_LOW_1000BASE_SX || + phy_type_low & IXGBE_PHY_TYPE_LOW_1000BASE_LX || + phy_type_low & IXGBE_PHY_TYPE_LOW_1000BASE_KX || + phy_type_low & IXGBE_PHY_TYPE_LOW_1G_SGMII || + phy_type_high & IXGBE_PHY_TYPE_HIGH_1G_USXGMII) + hw->phy.speeds_supported |= IXGBE_LINK_SPEED_1GB_FULL; + if (phy_type_low & IXGBE_PHY_TYPE_LOW_10GBASE_T || + phy_type_low & IXGBE_PHY_TYPE_LOW_10G_SFI_DA || + phy_type_low & IXGBE_PHY_TYPE_LOW_10GBASE_SR || + phy_type_low & IXGBE_PHY_TYPE_LOW_10GBASE_LR || + phy_type_low & IXGBE_PHY_TYPE_LOW_10GBASE_KR_CR1 || + phy_type_low & IXGBE_PHY_TYPE_LOW_10G_SFI_AOC_ACC || + phy_type_low & IXGBE_PHY_TYPE_LOW_10G_SFI_C2C || + phy_type_high & IXGBE_PHY_TYPE_HIGH_10G_USXGMII) + hw->phy.speeds_supported |= IXGBE_LINK_SPEED_10GB_FULL; + + /* 2.5 and 5 Gbps link speeds must be excluded from the + * auto-negotiation set used during driver initialization due to + * compatibility issues with certain switches. Those issues do not + * exist in case of E610 2.5G SKU device (0x57b1). + */ + if (!hw->phy.autoneg_advertised && + hw->device_id != IXGBE_DEV_ID_E610_2_5G_T) + hw->phy.autoneg_advertised = hw->phy.speeds_supported; + + if (phy_type_low & IXGBE_PHY_TYPE_LOW_2500BASE_T || + phy_type_low & IXGBE_PHY_TYPE_LOW_2500BASE_X || + phy_type_low & IXGBE_PHY_TYPE_LOW_2500BASE_KX || + phy_type_high & IXGBE_PHY_TYPE_HIGH_2500M_SGMII || + phy_type_high & IXGBE_PHY_TYPE_HIGH_2500M_USXGMII) + hw->phy.speeds_supported |= IXGBE_LINK_SPEED_2_5GB_FULL; + + if (!hw->phy.autoneg_advertised && + hw->device_id == IXGBE_DEV_ID_E610_2_5G_T) + hw->phy.autoneg_advertised = hw->phy.speeds_supported; + + if (phy_type_low & IXGBE_PHY_TYPE_LOW_5GBASE_T || + phy_type_low & IXGBE_PHY_TYPE_LOW_5GBASE_KR || + phy_type_high & IXGBE_PHY_TYPE_HIGH_5G_USXGMII) + hw->phy.speeds_supported |= IXGBE_LINK_SPEED_5GB_FULL; + + /* Set PHY ID */ + memcpy(&hw->phy.id, pcaps.phy_id_oui, sizeof(u32)); + + hw->phy.eee_speeds_supported = IXGBE_LINK_SPEED_10_FULL | + IXGBE_LINK_SPEED_100_FULL | + IXGBE_LINK_SPEED_1GB_FULL; + hw->phy.eee_speeds_advertised = hw->phy.eee_speeds_supported; + + return 0; +} + +/** + * ixgbe_identify_module_e610 - Identify SFP module type + * @hw: pointer to hardware structure + * + * Identify the SFP module type. + * + * Return: the exit code of the operation. + */ +int ixgbe_identify_module_e610(struct ixgbe_hw *hw) +{ + bool media_available; + u8 module_type; + int err; + + err = ixgbe_update_link_info(hw); + if (err) + return err; + + media_available = + (hw->link.link_info.link_info & IXGBE_ACI_MEDIA_AVAILABLE); + + if (media_available) { + hw->phy.sfp_type = ixgbe_sfp_type_unknown; + + /* Get module type from hw context updated by + * ixgbe_update_link_info() + */ + module_type = hw->link.link_info.module_type[IXGBE_ACI_MOD_TYPE_IDENT]; + + if ((module_type & IXGBE_ACI_MOD_TYPE_BYTE1_SFP_PLUS_CU_PASSIVE) || + (module_type & IXGBE_ACI_MOD_TYPE_BYTE1_SFP_PLUS_CU_ACTIVE)) { + hw->phy.sfp_type = ixgbe_sfp_type_da_cu; + } else if (module_type & IXGBE_ACI_MOD_TYPE_BYTE1_10G_BASE_SR) { + hw->phy.sfp_type = ixgbe_sfp_type_sr; + } else if ((module_type & IXGBE_ACI_MOD_TYPE_BYTE1_10G_BASE_LR) || + (module_type & IXGBE_ACI_MOD_TYPE_BYTE1_10G_BASE_LRM)) { + hw->phy.sfp_type = ixgbe_sfp_type_lr; + } + } else { + hw->phy.sfp_type = ixgbe_sfp_type_not_present; + return -ENOENT; + } + + return 0; +} + +/** + * ixgbe_setup_phy_link_e610 - Sets up firmware-controlled PHYs + * @hw: pointer to hardware structure + * + * Set the parameters for the firmware-controlled PHYs. + * + * Return: the exit code of the operation. + */ +int ixgbe_setup_phy_link_e610(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_phy_caps_data pcaps; + struct ixgbe_aci_cmd_set_phy_cfg_data pcfg; + u8 rmode = IXGBE_ACI_REPORT_TOPO_CAP_MEDIA; + u64 sup_phy_type_low, sup_phy_type_high; + u64 phy_type_low = 0, phy_type_high = 0; + int err; + + err = ixgbe_aci_get_link_info(hw, false, NULL); + if (err) + return err; + + /* If media is not available get default config. */ + if (!(hw->link.link_info.link_info & IXGBE_ACI_MEDIA_AVAILABLE)) + rmode = IXGBE_ACI_REPORT_DFLT_CFG; + + err = ixgbe_aci_get_phy_caps(hw, false, rmode, &pcaps); + if (err) + return err; + + sup_phy_type_low = le64_to_cpu(pcaps.phy_type_low); + sup_phy_type_high = le64_to_cpu(pcaps.phy_type_high); + + /* Get Active configuration to avoid unintended changes. */ + err = ixgbe_aci_get_phy_caps(hw, false, IXGBE_ACI_REPORT_ACTIVE_CFG, + &pcaps); + if (err) + return err; + + ixgbe_copy_phy_caps_to_cfg(&pcaps, &pcfg); + + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10_FULL) { + phy_type_high |= IXGBE_PHY_TYPE_HIGH_10BASE_T; + phy_type_high |= IXGBE_PHY_TYPE_HIGH_10M_SGMII; + } + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) { + phy_type_low |= IXGBE_PHY_TYPE_LOW_100BASE_TX; + phy_type_low |= IXGBE_PHY_TYPE_LOW_100M_SGMII; + phy_type_high |= IXGBE_PHY_TYPE_HIGH_100M_USXGMII; + } + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) { + phy_type_low |= IXGBE_PHY_TYPE_LOW_1000BASE_T; + phy_type_low |= IXGBE_PHY_TYPE_LOW_1000BASE_SX; + phy_type_low |= IXGBE_PHY_TYPE_LOW_1000BASE_LX; + phy_type_low |= IXGBE_PHY_TYPE_LOW_1000BASE_KX; + phy_type_low |= IXGBE_PHY_TYPE_LOW_1G_SGMII; + phy_type_high |= IXGBE_PHY_TYPE_HIGH_1G_USXGMII; + } + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_2_5GB_FULL) { + phy_type_low |= IXGBE_PHY_TYPE_LOW_2500BASE_T; + phy_type_low |= IXGBE_PHY_TYPE_LOW_2500BASE_X; + phy_type_low |= IXGBE_PHY_TYPE_LOW_2500BASE_KX; + phy_type_high |= IXGBE_PHY_TYPE_HIGH_2500M_SGMII; + phy_type_high |= IXGBE_PHY_TYPE_HIGH_2500M_USXGMII; + } + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_5GB_FULL) { + phy_type_low |= IXGBE_PHY_TYPE_LOW_5GBASE_T; + phy_type_low |= IXGBE_PHY_TYPE_LOW_5GBASE_KR; + phy_type_high |= IXGBE_PHY_TYPE_HIGH_5G_USXGMII; + } + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) { + phy_type_low |= IXGBE_PHY_TYPE_LOW_10GBASE_T; + phy_type_low |= IXGBE_PHY_TYPE_LOW_10G_SFI_DA; + phy_type_low |= IXGBE_PHY_TYPE_LOW_10GBASE_SR; + phy_type_low |= IXGBE_PHY_TYPE_LOW_10GBASE_LR; + phy_type_low |= IXGBE_PHY_TYPE_LOW_10GBASE_KR_CR1; + phy_type_low |= IXGBE_PHY_TYPE_LOW_10G_SFI_AOC_ACC; + phy_type_low |= IXGBE_PHY_TYPE_LOW_10G_SFI_C2C; + phy_type_high |= IXGBE_PHY_TYPE_HIGH_10G_USXGMII; + } + + /* Mask the set values to avoid requesting unsupported link types. */ + phy_type_low &= sup_phy_type_low; + pcfg.phy_type_low = cpu_to_le64(phy_type_low); + phy_type_high &= sup_phy_type_high; + pcfg.phy_type_high = cpu_to_le64(phy_type_high); + + if (pcfg.phy_type_high != pcaps.phy_type_high || + pcfg.phy_type_low != pcaps.phy_type_low || + pcfg.caps != pcaps.caps) { + pcfg.caps |= IXGBE_ACI_PHY_ENA_LINK; + pcfg.caps |= IXGBE_ACI_PHY_ENA_AUTO_LINK_UPDT; + + err = ixgbe_aci_set_phy_cfg(hw, &pcfg); + if (err) + return err; + } + + return 0; +} + +/** + * ixgbe_set_phy_power_e610 - Control power for copper PHY + * @hw: pointer to hardware structure + * @on: true for on, false for off + * + * Set the power on/off of the PHY + * by getting its capabilities and setting the appropriate + * configuration parameters. + * + * Return: the exit code of the operation. + */ +int ixgbe_set_phy_power_e610(struct ixgbe_hw *hw, bool on) +{ + struct ixgbe_aci_cmd_get_phy_caps_data phy_caps = {}; + struct ixgbe_aci_cmd_set_phy_cfg_data phy_cfg = {}; + int err; + + err = ixgbe_aci_get_phy_caps(hw, false, + IXGBE_ACI_REPORT_ACTIVE_CFG, + &phy_caps); + if (err) + return err; + + ixgbe_copy_phy_caps_to_cfg(&phy_caps, &phy_cfg); + + if (on) + phy_cfg.caps &= ~IXGBE_ACI_PHY_ENA_LOW_POWER; + else + phy_cfg.caps |= IXGBE_ACI_PHY_ENA_LOW_POWER; + + /* PHY is already in requested power mode. */ + if (phy_caps.caps == phy_cfg.caps) + return 0; + + phy_cfg.caps |= IXGBE_ACI_PHY_ENA_LINK; + phy_cfg.caps |= IXGBE_ACI_PHY_ENA_AUTO_LINK_UPDT; + + return ixgbe_aci_set_phy_cfg(hw, &phy_cfg); +} + +/** + * ixgbe_enter_lplu_e610 - Transition to low power states + * @hw: pointer to hardware structure + * + * Configures Low Power Link Up on transition to low power states + * (from D0 to non-D0). Link is required to enter LPLU so avoid resetting the + * X557 PHY immediately prior to entering LPLU. + * + * Return: the exit code of the operation. + */ +int ixgbe_enter_lplu_e610(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_get_phy_caps_data phy_caps = {}; + struct ixgbe_aci_cmd_set_phy_cfg_data phy_cfg = {}; + int err; + + err = ixgbe_aci_get_phy_caps(hw, false, + IXGBE_ACI_REPORT_ACTIVE_CFG, + &phy_caps); + if (err) + return err; + + ixgbe_copy_phy_caps_to_cfg(&phy_caps, &phy_cfg); + + phy_cfg.low_power_ctrl_an |= IXGBE_ACI_PHY_EN_D3COLD_LOW_POWER_AUTONEG; + + return ixgbe_aci_set_phy_cfg(hw, &phy_cfg); +} + +/** + * ixgbe_aci_get_netlist_node - get a node handle + * @hw: pointer to the hw struct + * @cmd: get_link_topo AQ structure + * @node_part_number: output node part number if node found + * @node_handle: output node handle parameter if node found + * + * Get the netlist node and assigns it to + * the provided handle using ACI command (0x06E0). + * + * Return: the exit code of the operation. + */ +int ixgbe_aci_get_netlist_node(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_get_link_topo *cmd, + u8 *node_part_number, u16 *node_handle) +{ + struct ixgbe_aci_desc desc; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_get_link_topo); + desc.params.get_link_topo = *cmd; + + if (ixgbe_aci_send_cmd(hw, &desc, NULL, 0)) + return -EOPNOTSUPP; + + if (node_handle) + *node_handle = + le16_to_cpu(desc.params.get_link_topo.addr.handle); + if (node_part_number) + *node_part_number = desc.params.get_link_topo.node_part_num; + + return 0; +} diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h index 5c5a6769b566..4a4f969b2100 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h @@ -27,5 +27,37 @@ int ixgbe_aci_get_phy_caps(struct ixgbe_hw *hw, bool qual_mods, u8 report_mode, struct ixgbe_aci_cmd_get_phy_caps_data *pcaps); void ixgbe_copy_phy_caps_to_cfg(struct ixgbe_aci_cmd_get_phy_caps_data *caps, struct ixgbe_aci_cmd_set_phy_cfg_data *cfg); +int ixgbe_aci_set_phy_cfg(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_set_phy_cfg_data *cfg); +int ixgbe_aci_set_link_restart_an(struct ixgbe_hw *hw, bool ena_link); +int ixgbe_update_link_info(struct ixgbe_hw *hw); +int ixgbe_get_link_status(struct ixgbe_hw *hw, bool *link_up); +int ixgbe_aci_get_link_info(struct ixgbe_hw *hw, bool ena_lse, + struct ixgbe_link_status *link); +int ixgbe_aci_set_event_mask(struct ixgbe_hw *hw, u8 port_num, u16 mask); +int ixgbe_configure_lse(struct ixgbe_hw *hw, bool activate, u16 mask); +enum ixgbe_media_type ixgbe_get_media_type_e610(struct ixgbe_hw *hw); +int ixgbe_setup_link_e610(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg_wait); +int ixgbe_check_link_e610(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up, bool link_up_wait_to_complete); +int ixgbe_get_link_capabilities_e610(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *autoneg); +int ixgbe_cfg_phy_fc(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_set_phy_cfg_data *cfg, + enum ixgbe_fc_mode req_mode); +int ixgbe_setup_fc_e610(struct ixgbe_hw *hw); +void ixgbe_fc_autoneg_e610(struct ixgbe_hw *hw); +void ixgbe_disable_rx_e610(struct ixgbe_hw *hw); +int ixgbe_init_phy_ops_e610(struct ixgbe_hw *hw); +int ixgbe_identify_phy_e610(struct ixgbe_hw *hw); +int ixgbe_identify_module_e610(struct ixgbe_hw *hw); +int ixgbe_setup_phy_link_e610(struct ixgbe_hw *hw); +int ixgbe_set_phy_power_e610(struct ixgbe_hw *hw, bool on); +int ixgbe_enter_lplu_e610(struct ixgbe_hw *hw); +int ixgbe_aci_get_netlist_node(struct ixgbe_hw *hw, + struct ixgbe_aci_cmd_get_link_topo *cmd, + u8 *node_part_number, u16 *node_handle); #endif /* _IXGBE_E610_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h index 5978cb06f732..ecc3fc8c8d52 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h @@ -652,6 +652,7 @@ struct ixgbe_aci_cmd_link_topo_params { #define IXGBE_ACI_LINK_TOPO_NODE_TYPE_CLK_MUX 10 #define IXGBE_ACI_LINK_TOPO_NODE_TYPE_GPS 11 #define IXGBE_ACI_LINK_TOPO_NODE_CTX_S 4 +#define IXGBE_ACI_LINK_TOPO_NODE_CTX_M GENMASK(7, 4) #define IXGBE_ACI_LINK_TOPO_NODE_CTX_GLOBAL 0 #define IXGBE_ACI_LINK_TOPO_NODE_CTX_BOARD 1 #define IXGBE_ACI_LINK_TOPO_NODE_CTX_PORT 2 From d2483ebc9deb9de23fd85a2a45f9073ec9101f36 Mon Sep 17 00:00:00 2001 From: Piotr Kwapulinski Date: Thu, 5 Dec 2024 09:44:46 +0100 Subject: [PATCH 0561/1386] ixgbe: Add support for NVM handling in E610 device Add low level support for accessing NVM in E610 device. NVM operations are handled via the Admin Command Interface. Add the following NVM specific operations: - acquire, release, read - validate checksum - read shadow ram Co-developed-by: Stefan Wegrzyn Signed-off-by: Stefan Wegrzyn Co-developed-by: Jedrzej Jagielski Signed-off-by: Jedrzej Jagielski Reviewed-by: Michal Swiatkowski Reviewed-by: Simon Horman Tested-by: Bharath R Signed-off-by: Piotr Kwapulinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c | 291 ++++++++++++++++++ drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h | 12 + 2 files changed, 303 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c index 0b578c4006b1..a35e28d99269 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c @@ -2105,3 +2105,294 @@ int ixgbe_aci_get_netlist_node(struct ixgbe_hw *hw, return 0; } + +/** + * ixgbe_acquire_nvm - Generic request for acquiring the NVM ownership + * @hw: pointer to the HW structure + * @access: NVM access type (read or write) + * + * Request NVM ownership. + * + * Return: the exit code of the operation. + */ +int ixgbe_acquire_nvm(struct ixgbe_hw *hw, + enum ixgbe_aci_res_access_type access) +{ + u32 fla; + + /* Skip if we are in blank NVM programming mode */ + fla = IXGBE_READ_REG(hw, IXGBE_GLNVM_FLA); + if ((fla & IXGBE_GLNVM_FLA_LOCKED_M) == 0) + return 0; + + return ixgbe_acquire_res(hw, IXGBE_NVM_RES_ID, access, + IXGBE_NVM_TIMEOUT); +} + +/** + * ixgbe_release_nvm - Generic request for releasing the NVM ownership + * @hw: pointer to the HW structure + * + * Release NVM ownership. + */ +void ixgbe_release_nvm(struct ixgbe_hw *hw) +{ + u32 fla; + + /* Skip if we are in blank NVM programming mode */ + fla = IXGBE_READ_REG(hw, IXGBE_GLNVM_FLA); + if ((fla & IXGBE_GLNVM_FLA_LOCKED_M) == 0) + return; + + ixgbe_release_res(hw, IXGBE_NVM_RES_ID); +} + +/** + * ixgbe_aci_read_nvm - read NVM + * @hw: pointer to the HW struct + * @module_typeid: module pointer location in words from the NVM beginning + * @offset: byte offset from the module beginning + * @length: length of the section to be read (in bytes from the offset) + * @data: command buffer (size [bytes] = length) + * @last_command: tells if this is the last command in a series + * @read_shadow_ram: tell if this is a shadow RAM read + * + * Read the NVM using ACI command (0x0701). + * + * Return: the exit code of the operation. + */ +int ixgbe_aci_read_nvm(struct ixgbe_hw *hw, u16 module_typeid, u32 offset, + u16 length, void *data, bool last_command, + bool read_shadow_ram) +{ + struct ixgbe_aci_cmd_nvm *cmd; + struct ixgbe_aci_desc desc; + + if (offset > IXGBE_ACI_NVM_MAX_OFFSET) + return -EINVAL; + + cmd = &desc.params.nvm; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_read); + + if (!read_shadow_ram && module_typeid == IXGBE_ACI_NVM_START_POINT) + cmd->cmd_flags |= IXGBE_ACI_NVM_FLASH_ONLY; + + /* If this is the last command in a series, set the proper flag. */ + if (last_command) + cmd->cmd_flags |= IXGBE_ACI_NVM_LAST_CMD; + cmd->module_typeid = cpu_to_le16(module_typeid); + cmd->offset_low = cpu_to_le16(offset & 0xFFFF); + cmd->offset_high = (offset >> 16) & 0xFF; + cmd->length = cpu_to_le16(length); + + return ixgbe_aci_send_cmd(hw, &desc, data, length); +} + +/** + * ixgbe_nvm_validate_checksum - validate checksum + * @hw: pointer to the HW struct + * + * Verify NVM PFA checksum validity using ACI command (0x0706). + * If the checksum verification failed, IXGBE_ERR_NVM_CHECKSUM is returned. + * The function acquires and then releases the NVM ownership. + * + * Return: the exit code of the operation. + */ +int ixgbe_nvm_validate_checksum(struct ixgbe_hw *hw) +{ + struct ixgbe_aci_cmd_nvm_checksum *cmd; + struct ixgbe_aci_desc desc; + int err; + + err = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (err) + return err; + + cmd = &desc.params.nvm_checksum; + + ixgbe_fill_dflt_direct_cmd_desc(&desc, ixgbe_aci_opc_nvm_checksum); + cmd->flags = IXGBE_ACI_NVM_CHECKSUM_VERIFY; + + err = ixgbe_aci_send_cmd(hw, &desc, NULL, 0); + + ixgbe_release_nvm(hw); + + if (!err && cmd->checksum != + cpu_to_le16(IXGBE_ACI_NVM_CHECKSUM_CORRECT)) { + struct ixgbe_adapter *adapter = container_of(hw, struct ixgbe_adapter, + hw); + + err = -EIO; + netdev_err(adapter->netdev, "Invalid Shadow Ram checksum"); + } + + return err; +} + +/** + * ixgbe_read_sr_word_aci - Reads Shadow RAM via ACI + * @hw: pointer to the HW structure + * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF) + * @data: word read from the Shadow RAM + * + * Reads one 16 bit word from the Shadow RAM using ixgbe_read_flat_nvm. + * + * Return: the exit code of the operation. + */ +int ixgbe_read_sr_word_aci(struct ixgbe_hw *hw, u16 offset, u16 *data) +{ + u32 bytes = sizeof(u16); + u16 data_local; + int err; + + err = ixgbe_read_flat_nvm(hw, offset * sizeof(u16), &bytes, + (u8 *)&data_local, true); + if (err) + return err; + + *data = data_local; + return 0; +} + +/** + * ixgbe_read_flat_nvm - Read portion of NVM by flat offset + * @hw: pointer to the HW struct + * @offset: offset from beginning of NVM + * @length: (in) number of bytes to read; (out) number of bytes actually read + * @data: buffer to return data in (sized to fit the specified length) + * @read_shadow_ram: if true, read from shadow RAM instead of NVM + * + * Reads a portion of the NVM, as a flat memory space. This function correctly + * breaks read requests across Shadow RAM sectors, prevents Shadow RAM size + * from being exceeded in case of Shadow RAM read requests and ensures that no + * single read request exceeds the maximum 4KB read for a single admin command. + * + * Returns an error code on failure. Note that the data pointer may be + * partially updated if some reads succeed before a failure. + * + * Return: the exit code of the operation. + */ +int ixgbe_read_flat_nvm(struct ixgbe_hw *hw, u32 offset, u32 *length, + u8 *data, bool read_shadow_ram) +{ + u32 inlen = *length; + u32 bytes_read = 0; + bool last_cmd; + int err; + + /* Verify the length of the read if this is for the Shadow RAM */ + if (read_shadow_ram && ((offset + inlen) > + (hw->eeprom.word_size * 2u))) + return -EINVAL; + + do { + u32 read_size, sector_offset; + + /* ixgbe_aci_read_nvm cannot read more than 4KB at a time. + * Additionally, a read from the Shadow RAM may not cross over + * a sector boundary. Conveniently, the sector size is also 4KB. + */ + sector_offset = offset % IXGBE_ACI_MAX_BUFFER_SIZE; + read_size = min_t(u32, + IXGBE_ACI_MAX_BUFFER_SIZE - sector_offset, + inlen - bytes_read); + + last_cmd = !(bytes_read + read_size < inlen); + + /* ixgbe_aci_read_nvm takes the length as a u16. Our read_size + * is calculated using a u32, but the IXGBE_ACI_MAX_BUFFER_SIZE + * maximum size guarantees that it will fit within the 2 bytes. + */ + err = ixgbe_aci_read_nvm(hw, IXGBE_ACI_NVM_START_POINT, + offset, (u16)read_size, + data + bytes_read, last_cmd, + read_shadow_ram); + if (err) + break; + + bytes_read += read_size; + offset += read_size; + } while (!last_cmd); + + *length = bytes_read; + return err; +} + +/** + * ixgbe_read_ee_aci_e610 - Read EEPROM word using the admin command. + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to read + * @data: word read from the EEPROM + * + * Reads a 16 bit word from the EEPROM using the ACI. + * If the EEPROM params are not initialized, the function + * initialize them before proceeding with reading. + * The function acquires and then releases the NVM ownership. + * + * Return: the exit code of the operation. + */ +int ixgbe_read_ee_aci_e610(struct ixgbe_hw *hw, u16 offset, u16 *data) +{ + int err; + + if (hw->eeprom.type == ixgbe_eeprom_uninitialized) { + err = hw->eeprom.ops.init_params(hw); + if (err) + return err; + } + + err = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (err) + return err; + + err = ixgbe_read_sr_word_aci(hw, offset, data); + ixgbe_release_nvm(hw); + + return err; +} + +/** + * ixgbe_validate_eeprom_checksum_e610 - Validate EEPROM checksum + * @hw: pointer to hardware structure + * @checksum_val: calculated checksum + * + * Performs checksum calculation and validates the EEPROM checksum. If the + * caller does not need checksum_val, the value can be NULL. + * If the EEPROM params are not initialized, the function + * initialize them before proceeding. + * The function acquires and then releases the NVM ownership. + * + * Return: the exit code of the operation. + */ +int ixgbe_validate_eeprom_checksum_e610(struct ixgbe_hw *hw, u16 *checksum_val) +{ + int err; + + if (hw->eeprom.type == ixgbe_eeprom_uninitialized) { + err = hw->eeprom.ops.init_params(hw); + if (err) + return err; + } + + err = ixgbe_nvm_validate_checksum(hw); + if (err) + return err; + + if (checksum_val) { + u16 tmp_checksum; + + err = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (err) + return err; + + err = ixgbe_read_sr_word_aci(hw, E610_SR_SW_CHECKSUM_WORD, + &tmp_checksum); + ixgbe_release_nvm(hw); + + if (!err) + *checksum_val = tmp_checksum; + } + + return err; +} diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h index 4a4f969b2100..412ddd123cd1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h @@ -59,5 +59,17 @@ int ixgbe_enter_lplu_e610(struct ixgbe_hw *hw); int ixgbe_aci_get_netlist_node(struct ixgbe_hw *hw, struct ixgbe_aci_cmd_get_link_topo *cmd, u8 *node_part_number, u16 *node_handle); +int ixgbe_acquire_nvm(struct ixgbe_hw *hw, + enum ixgbe_aci_res_access_type access); +void ixgbe_release_nvm(struct ixgbe_hw *hw); +int ixgbe_aci_read_nvm(struct ixgbe_hw *hw, u16 module_typeid, u32 offset, + u16 length, void *data, bool last_command, + bool read_shadow_ram); +int ixgbe_nvm_validate_checksum(struct ixgbe_hw *hw); +int ixgbe_read_sr_word_aci(struct ixgbe_hw *hw, u16 offset, u16 *data); +int ixgbe_read_flat_nvm(struct ixgbe_hw *hw, u32 offset, u32 *length, + u8 *data, bool read_shadow_ram); +int ixgbe_read_ee_aci_e610(struct ixgbe_hw *hw, u16 offset, u16 *data); +int ixgbe_validate_eeprom_checksum_e610(struct ixgbe_hw *hw, u16 *checksum_val); #endif /* _IXGBE_E610_H_ */ From e5b132b4f4d97a4737d152df0f97906e542be7ee Mon Sep 17 00:00:00 2001 From: Piotr Kwapulinski Date: Thu, 5 Dec 2024 09:44:47 +0100 Subject: [PATCH 0562/1386] ixgbe: Add support for EEPROM dump in E610 device Add low level support for EEPROM dump for the specified network device. Co-developed-by: Stefan Wegrzyn Signed-off-by: Stefan Wegrzyn Reviewed-by: Przemek Kitszel Reviewed-by: Simon Horman Tested-by: Bharath R Signed-off-by: Piotr Kwapulinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c | 95 +++++++++++++++++++ drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h | 5 + .../ethernet/intel/ixgbe/ixgbe_type_e610.h | 7 ++ 3 files changed, 107 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c index a35e28d99269..6bf3562b3ce2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c @@ -2073,6 +2073,38 @@ int ixgbe_enter_lplu_e610(struct ixgbe_hw *hw) return ixgbe_aci_set_phy_cfg(hw, &phy_cfg); } +/** + * ixgbe_init_eeprom_params_e610 - Initialize EEPROM params + * @hw: pointer to hardware structure + * + * Initialize the EEPROM parameters ixgbe_eeprom_info within the ixgbe_hw + * struct in order to set up EEPROM access. + * + * Return: the operation exit code. + */ +int ixgbe_init_eeprom_params_e610(struct ixgbe_hw *hw) +{ + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + u32 gens_stat; + u8 sr_size; + + if (eeprom->type != ixgbe_eeprom_uninitialized) + return 0; + + eeprom->type = ixgbe_flash; + + gens_stat = IXGBE_READ_REG(hw, GLNVM_GENS); + sr_size = FIELD_GET(GLNVM_GENS_SR_SIZE_M, gens_stat); + + /* Switching to words (sr_size contains power of 2). */ + eeprom->word_size = BIT(sr_size) * IXGBE_SR_WORDS_IN_1KB; + + hw_dbg(hw, "Eeprom params: type = %d, size = %d\n", eeprom->type, + eeprom->word_size); + + return 0; +} + /** * ixgbe_aci_get_netlist_node - get a node handle * @hw: pointer to the hw struct @@ -2319,6 +2351,36 @@ int ixgbe_read_flat_nvm(struct ixgbe_hw *hw, u32 offset, u32 *length, return err; } +/** + * ixgbe_read_sr_buf_aci - Read Shadow RAM buffer via ACI + * @hw: pointer to the HW structure + * @offset: offset of the Shadow RAM words to read (0x000000 - 0x001FFF) + * @words: (in) number of words to read; (out) number of words actually read + * @data: words read from the Shadow RAM + * + * Read 16 bit words (data buf) from the Shadow RAM. Acquire/release the NVM + * ownership. + * + * Return: the operation exit code. + */ +int ixgbe_read_sr_buf_aci(struct ixgbe_hw *hw, u16 offset, u16 *words, + u16 *data) +{ + u32 bytes = *words * 2; + int err; + + err = ixgbe_read_flat_nvm(hw, offset * 2, &bytes, (u8 *)data, true); + if (err) + return err; + + *words = bytes / 2; + + for (int i = 0; i < *words; i++) + data[i] = le16_to_cpu(((__le16 *)data)[i]); + + return 0; +} + /** * ixgbe_read_ee_aci_e610 - Read EEPROM word using the admin command. * @hw: pointer to hardware structure @@ -2352,6 +2414,39 @@ int ixgbe_read_ee_aci_e610(struct ixgbe_hw *hw, u16 offset, u16 *data) return err; } +/** + * ixgbe_read_ee_aci_buffer_e610 - Read EEPROM words via ACI + * @hw: pointer to hardware structure + * @offset: offset of words in the EEPROM to read + * @words: number of words to read + * @data: words to read from the EEPROM + * + * Read 16 bit words from the EEPROM via the ACI. Initialize the EEPROM params + * prior to the read. Acquire/release the NVM ownership. + * + * Return: the operation exit code. + */ +int ixgbe_read_ee_aci_buffer_e610(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + int err; + + if (hw->eeprom.type == ixgbe_eeprom_uninitialized) { + err = hw->eeprom.ops.init_params(hw); + if (err) + return err; + } + + err = ixgbe_acquire_nvm(hw, IXGBE_RES_READ); + if (err) + return err; + + err = ixgbe_read_sr_buf_aci(hw, offset, &words, data); + ixgbe_release_nvm(hw); + + return err; +} + /** * ixgbe_validate_eeprom_checksum_e610 - Validate EEPROM checksum * @hw: pointer to hardware structure diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h index 412ddd123cd1..9cfcfeec6e0b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h @@ -56,6 +56,7 @@ int ixgbe_identify_module_e610(struct ixgbe_hw *hw); int ixgbe_setup_phy_link_e610(struct ixgbe_hw *hw); int ixgbe_set_phy_power_e610(struct ixgbe_hw *hw, bool on); int ixgbe_enter_lplu_e610(struct ixgbe_hw *hw); +int ixgbe_init_eeprom_params_e610(struct ixgbe_hw *hw); int ixgbe_aci_get_netlist_node(struct ixgbe_hw *hw, struct ixgbe_aci_cmd_get_link_topo *cmd, u8 *node_part_number, u16 *node_handle); @@ -69,7 +70,11 @@ int ixgbe_nvm_validate_checksum(struct ixgbe_hw *hw); int ixgbe_read_sr_word_aci(struct ixgbe_hw *hw, u16 offset, u16 *data); int ixgbe_read_flat_nvm(struct ixgbe_hw *hw, u32 offset, u32 *length, u8 *data, bool read_shadow_ram); +int ixgbe_read_sr_buf_aci(struct ixgbe_hw *hw, u16 offset, u16 *words, + u16 *data); int ixgbe_read_ee_aci_e610(struct ixgbe_hw *hw, u16 offset, u16 *data); +int ixgbe_read_ee_aci_buffer_e610(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); int ixgbe_validate_eeprom_checksum_e610(struct ixgbe_hw *hw, u16 *checksum_val); #endif /* _IXGBE_E610_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h index ecc3fc8c8d52..8d06ade3c7cd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h @@ -12,11 +12,18 @@ /* Checksum and Shadow RAM pointers */ #define E610_SR_SW_CHECKSUM_WORD 0x3F +/* Shadow RAM related */ +#define IXGBE_SR_WORDS_IN_1KB 512 + /* Firmware Status Register (GL_FWSTS) */ #define GL_FWSTS 0x00083048 /* Reset Source: POR */ #define GL_FWSTS_EP_PF0 BIT(24) #define GL_FWSTS_EP_PF1 BIT(25) +/* Global NVM General Status Register */ +#define GLNVM_GENS 0x000B6100 /* Reset Source: POR */ +#define GLNVM_GENS_SR_SIZE_M GENMASK(7, 5) + /* Flash Access Register */ #define IXGBE_GLNVM_FLA 0x000B6108 /* Reset Source: POR */ #define IXGBE_GLNVM_FLA_LOCKED_S 6 From a0834bd521eaf1f2014041a8ad5a0cb233ac4fda Mon Sep 17 00:00:00 2001 From: Piotr Kwapulinski Date: Thu, 5 Dec 2024 09:44:48 +0100 Subject: [PATCH 0563/1386] ixgbe: Add ixgbe_x540 multiple header inclusion protection Required to adopt x540 specific functions by E610 device. Reviewed-by: Simon Horman Tested-by: Bharath R Signed-off-by: Piotr Kwapulinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h index b69a680d3ab5..6ed360c5b605 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.h @@ -1,5 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ + +#ifndef _IXGBE_X540_H_ +#define _IXGBE_X540_H_ #include "ixgbe_type.h" @@ -17,3 +20,5 @@ int ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask); void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u32 mask); void ixgbe_init_swfw_sync_X540(struct ixgbe_hw *hw); int ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw); + +#endif /* _IXGBE_X540_H_ */ From 34b41577077198953e156f5e4bf8cdf734485e1f Mon Sep 17 00:00:00 2001 From: Piotr Kwapulinski Date: Thu, 5 Dec 2024 09:44:49 +0100 Subject: [PATCH 0564/1386] ixgbe: Clean up the E610 link management related code Required for enabling the link management in E610 device. Reviewed-by: Simon Horman Tested-by: Bharath R Signed-off-by: Piotr Kwapulinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 17 +++++++++++------ drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 12 ++++++------ 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 13b777d702a2..2656f2617a8f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -237,6 +237,9 @@ static int ixgbe_get_parent_bus_info(struct ixgbe_adapter *adapter) * bandwidth details should be gathered from the parent bus instead of from the * device. Used to ensure that various locations all have the correct device ID * checks. + * + * Return: true if information should be collected from the parent bus, false + * otherwise */ static inline bool ixgbe_pcie_from_parent(struct ixgbe_hw *hw) { @@ -5533,7 +5536,9 @@ static void ixgbe_sfp_link_config(struct ixgbe_adapter *adapter) * ixgbe_non_sfp_link_config - set up non-SFP+ link * @hw: pointer to private hardware struct * - * Returns 0 on success, negative on failure + * Configure non-SFP link. + * + * Return: 0 on success, negative on failure **/ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw) { @@ -7222,11 +7227,11 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) for (i = 0; i < 16; i++) { hwstats->qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); hwstats->qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); - if ((hw->mac.type == ixgbe_mac_82599EB) || - (hw->mac.type == ixgbe_mac_X540) || - (hw->mac.type == ixgbe_mac_X550) || - (hw->mac.type == ixgbe_mac_X550EM_x) || - (hw->mac.type == ixgbe_mac_x550em_a)) { + if (hw->mac.type == ixgbe_mac_82599EB || + hw->mac.type == ixgbe_mac_X540 || + hw->mac.type == ixgbe_mac_X550 || + hw->mac.type == ixgbe_mac_X550EM_x || + hw->mac.type == ixgbe_mac_x550em_a) { hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC_L(i)); IXGBE_READ_REG(hw, IXGBE_QBTC_H(i)); /* to clear */ hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC_L(i)); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index d9a8cf018d3b..1de05443d3a2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3505,13 +3505,13 @@ mac_reset_top: return status; } -/** ixgbe_set_ethertype_anti_spoofing_X550 - Enable/Disable Ethertype +/** ixgbe_set_ethertype_anti_spoofing_x550 - Enable/Disable Ethertype * anti-spoofing * @hw: pointer to hardware structure * @enable: enable or disable switch for Ethertype anti-spoofing * @vf: Virtual Function pool - VF Pool to set for Ethertype anti-spoofing **/ -static void ixgbe_set_ethertype_anti_spoofing_X550(struct ixgbe_hw *hw, +static void ixgbe_set_ethertype_anti_spoofing_x550(struct ixgbe_hw *hw, bool enable, int vf) { int vf_target_reg = vf >> 3; @@ -3527,12 +3527,12 @@ static void ixgbe_set_ethertype_anti_spoofing_X550(struct ixgbe_hw *hw, IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), pfvfspoof); } -/** ixgbe_set_source_address_pruning_X550 - Enable/Disbale src address pruning +/** ixgbe_set_source_address_pruning_x550 - Enable/Disable src address pruning * @hw: pointer to hardware structure * @enable: enable or disable source address pruning * @pool: Rx pool to set source address pruning for **/ -static void ixgbe_set_source_address_pruning_X550(struct ixgbe_hw *hw, +static void ixgbe_set_source_address_pruning_x550(struct ixgbe_hw *hw, bool enable, unsigned int pool) { @@ -3831,9 +3831,9 @@ static int ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, .set_mac_anti_spoofing = &ixgbe_set_mac_anti_spoofing, \ .set_vlan_anti_spoofing = &ixgbe_set_vlan_anti_spoofing, \ .set_source_address_pruning = \ - &ixgbe_set_source_address_pruning_X550, \ + &ixgbe_set_source_address_pruning_x550, \ .set_ethertype_anti_spoofing = \ - &ixgbe_set_ethertype_anti_spoofing_X550, \ + &ixgbe_set_ethertype_anti_spoofing_x550, \ .disable_rx_buff = &ixgbe_disable_rx_buff_generic, \ .enable_rx_buff = &ixgbe_enable_rx_buff_generic, \ .get_thermal_sensor_data = NULL, \ From 4600cdf9f5aca3d2559d858c414e09cf64370da1 Mon Sep 17 00:00:00 2001 From: Piotr Kwapulinski Date: Thu, 5 Dec 2024 09:44:50 +0100 Subject: [PATCH 0565/1386] ixgbe: Enable link management in E610 device Add high level link management support for E610 device. Enable the following features: - driver load - bring up network interface - IP address assignment - pass traffic - show statistics (e.g. via ethtool) - disable network interface - driver unload Co-developed-by: Carolyn Wyborny Signed-off-by: Carolyn Wyborny Co-developed-by: Jedrzej Jagielski Signed-off-by: Jedrzej Jagielski Reviewed-by: Jan Glaza Reviewed-by: Simon Horman Tested-by: Bharath R Signed-off-by: Piotr Kwapulinski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 13 +- .../net/ethernet/intel/ixgbe/ixgbe_82599.c | 3 +- .../net/ethernet/intel/ixgbe/ixgbe_common.c | 19 +- .../net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c | 3 +- drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c | 165 +++++++ drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h | 1 + .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 6 +- drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c | 3 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 414 +++++++++++++++++- drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c | 4 +- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 5 +- drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c | 12 +- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c | 21 +- drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h | 20 + 14 files changed, 659 insertions(+), 30 deletions(-) create mode 100644 drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 559b443c409f..e6a380d4929b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ #ifndef _IXGBE_H_ #define _IXGBE_H_ @@ -20,6 +20,7 @@ #include "ixgbe_type.h" #include "ixgbe_common.h" #include "ixgbe_dcb.h" +#include "ixgbe_e610.h" #if IS_ENABLED(CONFIG_FCOE) #define IXGBE_FCOE #include "ixgbe_fcoe.h" @@ -173,6 +174,7 @@ enum ixgbe_tx_flags { #define VMDQ_P(p) ((p) + adapter->ring_feature[RING_F_VMDQ].offset) #define IXGBE_82599_VF_DEVICE_ID 0x10ED #define IXGBE_X540_VF_DEVICE_ID 0x1515 +#define IXGBE_E610_VF_DEVICE_ID 0x57AD #define UPDATE_VF_COUNTER_32bit(reg, last_counter, counter) \ { \ @@ -654,6 +656,7 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP BIT(9) #define IXGBE_FLAG2_PTP_PPS_ENABLED BIT(10) #define IXGBE_FLAG2_PHY_INTERRUPT BIT(11) +#define IXGBE_FLAG2_FW_ASYNC_EVENT BIT(12) #define IXGBE_FLAG2_VLAN_PROMISC BIT(13) #define IXGBE_FLAG2_EEE_CAPABLE BIT(14) #define IXGBE_FLAG2_EEE_ENABLED BIT(15) @@ -661,6 +664,9 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_IPSEC_ENABLED BIT(17) #define IXGBE_FLAG2_VF_IPSEC_ENABLED BIT(18) #define IXGBE_FLAG2_AUTO_DISABLE_VF BIT(19) +#define IXGBE_FLAG2_PHY_FW_LOAD_FAILED BIT(20) +#define IXGBE_FLAG2_NO_MEDIA BIT(21) +#define IXGBE_FLAG2_MOD_POWER_UNSUPPORTED BIT(22) /* Tx fast path data */ int num_tx_queues; @@ -793,6 +799,7 @@ struct ixgbe_adapter { u32 vferr_refcount; struct ixgbe_mac_addr *mac_table; struct kobject *info_kobj; + u16 lse_mask; #ifdef CONFIG_IXGBE_HWMON struct hwmon_buff *ixgbe_hwmon_buff; #endif /* CONFIG_IXGBE_HWMON */ @@ -849,6 +856,7 @@ static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: return IXGBE_MAX_RSS_INDICES_X550; default: return 0; @@ -874,6 +882,7 @@ enum ixgbe_state_t { __IXGBE_PTP_RUNNING, __IXGBE_PTP_TX_IN_PROGRESS, __IXGBE_RESET_REQUESTED, + __IXGBE_PHY_INIT_COMPLETE, }; struct ixgbe_cb { @@ -896,6 +905,7 @@ enum ixgbe_boards { board_x550em_x_fw, board_x550em_a, board_x550em_a_fw, + board_e610, }; extern const struct ixgbe_info ixgbe_82598_info; @@ -906,6 +916,7 @@ extern const struct ixgbe_info ixgbe_X550EM_x_info; extern const struct ixgbe_info ixgbe_x550em_x_fw_info; extern const struct ixgbe_info ixgbe_x550em_a_info; extern const struct ixgbe_info ixgbe_x550em_a_fw_info; +extern const struct ixgbe_info ixgbe_e610_info; #ifdef CONFIG_IXGBE_DCB extern const struct dcbnl_rtnl_ops ixgbe_dcbnl_ops; #endif diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c index cdaf087b4e85..964988b4d58b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ #include #include @@ -1615,6 +1615,7 @@ int ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: IXGBE_WRITE_REG(hw, IXGBE_FDIRSCTPM, ~fdirtcpm); break; default: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index bfab2c0ee0aa..7beaf6ea57f9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ #include #include @@ -58,6 +58,7 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) switch (hw->device_id) { case IXGBE_DEV_ID_X550EM_A_SFP: case IXGBE_DEV_ID_X550EM_A_SFP_N: + case IXGBE_DEV_ID_E610_SFP: supported = false; break; default: @@ -88,6 +89,8 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_A_10G_T: case IXGBE_DEV_ID_X550EM_A_1G_T: case IXGBE_DEV_ID_X550EM_A_1G_T_L: + case IXGBE_DEV_ID_E610_10G_T: + case IXGBE_DEV_ID_E610_2_5G_T: supported = true; break; default: @@ -469,9 +472,14 @@ int ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw) } } - if (hw->mac.type == ixgbe_mac_X550 || hw->mac.type == ixgbe_mac_X540) { + if (hw->mac.type == ixgbe_mac_X550 || + hw->mac.type == ixgbe_mac_X540 || + hw->mac.type == ixgbe_mac_e610) { if (hw->phy.id == 0) hw->phy.ops.identify(hw); + } + + if (hw->mac.type == ixgbe_mac_X550 || hw->mac.type == ixgbe_mac_X540) { hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECL, MDIO_MMD_PCS, &i); hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECH, MDIO_MMD_PCS, &i); hw->phy.ops.read_reg(hw, IXGBE_LDPCECL, MDIO_MMD_PCS, &i); @@ -2922,6 +2930,10 @@ u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw) pcie_offset = IXGBE_PCIE_MSIX_82599_CAPS; max_msix_count = IXGBE_MAX_MSIX_VECTORS_82599; break; + case ixgbe_mac_e610: + pcie_offset = IXGBE_PCIE_MSIX_E610_CAPS; + max_msix_count = IXGBE_MAX_MSIX_VECTORS_82599; + break; default: return 1; } @@ -3370,7 +3382,8 @@ int ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed, *speed = IXGBE_LINK_SPEED_1GB_FULL; break; case IXGBE_LINKS_SPEED_100_82599: - if ((hw->mac.type >= ixgbe_mac_X550) && + if ((hw->mac.type >= ixgbe_mac_X550 || + hw->mac.type == ixgbe_mac_e610) && (links_reg & IXGBE_LINKS_SPEED_NON_STD)) *speed = IXGBE_LINK_SPEED_5GB_FULL; else diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c index f2709b10c2e5..19d6b6fa8fb3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ #include "ixgbe.h" #include @@ -154,6 +154,7 @@ static void ixgbe_dcbnl_get_perm_hw_addr(struct net_device *netdev, case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: + case ixgbe_mac_e610: for (j = 0; j < netdev->addr_len; j++, i++) perm_addr[i] = adapter->hw.mac.san_addr[j]; break; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c index 6bf3562b3ce2..683c668672d6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c @@ -3,8 +3,10 @@ #include "ixgbe_common.h" #include "ixgbe_e610.h" +#include "ixgbe_x550.h" #include "ixgbe_type.h" #include "ixgbe_x540.h" +#include "ixgbe_mbx.h" #include "ixgbe_phy.h" /** @@ -2491,3 +2493,166 @@ int ixgbe_validate_eeprom_checksum_e610(struct ixgbe_hw *hw, u16 *checksum_val) return err; } + +/** + * ixgbe_reset_hw_e610 - Perform hardware reset + * @hw: pointer to hardware structure + * + * Resets the hardware by resetting the transmit and receive units, masks + * and clears all interrupts, and performs a reset. + * + * Return: the exit code of the operation. + */ +int ixgbe_reset_hw_e610(struct ixgbe_hw *hw) +{ + u32 swfw_mask = hw->phy.phy_semaphore_mask; + u32 ctrl, i; + int err; + + /* Call adapter stop to disable tx/rx and clear interrupts */ + err = hw->mac.ops.stop_adapter(hw); + if (err) + goto reset_hw_out; + + /* Flush pending Tx transactions. */ + ixgbe_clear_tx_pending(hw); + + hw->phy.ops.init(hw); +mac_reset_top: + err = hw->mac.ops.acquire_swfw_sync(hw, swfw_mask); + if (err) + return -EBUSY; + ctrl = IXGBE_CTRL_RST; + ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); + IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); + IXGBE_WRITE_FLUSH(hw); + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + + /* Poll for reset bit to self-clear indicating reset is complete */ + for (i = 0; i < 10; i++) { + udelay(1); + ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); + if (!(ctrl & IXGBE_CTRL_RST_MASK)) + break; + } + + if (ctrl & IXGBE_CTRL_RST_MASK) { + struct ixgbe_adapter *adapter = container_of(hw, struct ixgbe_adapter, + hw); + + err = -EIO; + netdev_err(adapter->netdev, "Reset polling failed to complete."); + } + + /* Double resets are required for recovery from certain error + * conditions. Between resets, it is necessary to stall to allow time + * for any pending HW events to complete. + */ + msleep(100); + if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) { + hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; + goto mac_reset_top; + } + + /* Set the Rx packet buffer size. */ + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(0), GENMASK(18, 17)); + + /* Store the permanent mac address */ + hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); + + /* Maximum number of Receive Address Registers. */ +#define IXGBE_MAX_NUM_RAR 128 + + /* Store MAC address from RAR0, clear receive address registers, and + * clear the multicast table. Also reset num_rar_entries to the + * maximum number of Receive Address Registers, since we modify this + * value when programming the SAN MAC address. + */ + hw->mac.num_rar_entries = IXGBE_MAX_NUM_RAR; + hw->mac.ops.init_rx_addrs(hw); + + /* Initialize bus function number */ + hw->mac.ops.set_lan_id(hw); + +reset_hw_out: + return err; +} + +static const struct ixgbe_mac_operations mac_ops_e610 = { + .init_hw = ixgbe_init_hw_generic, + .start_hw = ixgbe_start_hw_X540, + .clear_hw_cntrs = ixgbe_clear_hw_cntrs_generic, + .enable_rx_dma = ixgbe_enable_rx_dma_generic, + .get_mac_addr = ixgbe_get_mac_addr_generic, + .get_device_caps = ixgbe_get_device_caps_generic, + .stop_adapter = ixgbe_stop_adapter_generic, + .set_lan_id = ixgbe_set_lan_id_multi_port_pcie, + .set_rxpba = ixgbe_set_rxpba_generic, + .check_link = ixgbe_check_link_e610, + .blink_led_start = ixgbe_blink_led_start_X540, + .blink_led_stop = ixgbe_blink_led_stop_X540, + .set_rar = ixgbe_set_rar_generic, + .clear_rar = ixgbe_clear_rar_generic, + .set_vmdq = ixgbe_set_vmdq_generic, + .set_vmdq_san_mac = ixgbe_set_vmdq_san_mac_generic, + .clear_vmdq = ixgbe_clear_vmdq_generic, + .init_rx_addrs = ixgbe_init_rx_addrs_generic, + .update_mc_addr_list = ixgbe_update_mc_addr_list_generic, + .enable_mc = ixgbe_enable_mc_generic, + .disable_mc = ixgbe_disable_mc_generic, + .clear_vfta = ixgbe_clear_vfta_generic, + .set_vfta = ixgbe_set_vfta_generic, + .fc_enable = ixgbe_fc_enable_generic, + .set_fw_drv_ver = ixgbe_set_fw_drv_ver_x550, + .init_uta_tables = ixgbe_init_uta_tables_generic, + .set_mac_anti_spoofing = ixgbe_set_mac_anti_spoofing, + .set_vlan_anti_spoofing = ixgbe_set_vlan_anti_spoofing, + .set_source_address_pruning = + ixgbe_set_source_address_pruning_x550, + .set_ethertype_anti_spoofing = + ixgbe_set_ethertype_anti_spoofing_x550, + .disable_rx_buff = ixgbe_disable_rx_buff_generic, + .enable_rx_buff = ixgbe_enable_rx_buff_generic, + .enable_rx = ixgbe_enable_rx_generic, + .disable_rx = ixgbe_disable_rx_e610, + .led_on = ixgbe_led_on_generic, + .led_off = ixgbe_led_off_generic, + .init_led_link_act = ixgbe_init_led_link_act_generic, + .reset_hw = ixgbe_reset_hw_e610, + .get_media_type = ixgbe_get_media_type_e610, + .setup_link = ixgbe_setup_link_e610, + .get_link_capabilities = ixgbe_get_link_capabilities_e610, + .get_bus_info = ixgbe_get_bus_info_generic, + .acquire_swfw_sync = ixgbe_acquire_swfw_sync_X540, + .release_swfw_sync = ixgbe_release_swfw_sync_X540, + .init_swfw_sync = ixgbe_init_swfw_sync_X540, + .prot_autoc_read = prot_autoc_read_generic, + .prot_autoc_write = prot_autoc_write_generic, + .setup_fc = ixgbe_setup_fc_e610, + .fc_autoneg = ixgbe_fc_autoneg_e610, +}; + +static const struct ixgbe_phy_operations phy_ops_e610 = { + .init = ixgbe_init_phy_ops_e610, + .identify = ixgbe_identify_phy_e610, + .identify_sfp = ixgbe_identify_module_e610, + .setup_link_speed = ixgbe_setup_phy_link_speed_generic, + .setup_link = ixgbe_setup_phy_link_e610, + .enter_lplu = ixgbe_enter_lplu_e610, +}; + +static const struct ixgbe_eeprom_operations eeprom_ops_e610 = { + .read = ixgbe_read_ee_aci_e610, + .read_buffer = ixgbe_read_ee_aci_buffer_e610, + .validate_checksum = ixgbe_validate_eeprom_checksum_e610, +}; + +const struct ixgbe_info ixgbe_e610_info = { + .mac = ixgbe_mac_e610, + .get_invariants = ixgbe_get_invariants_X540, + .mac_ops = &mac_ops_e610, + .eeprom_ops = &eeprom_ops_e610, + .phy_ops = &phy_ops_e610, + .mbx_ops = &mbx_ops_generic, + .mvals = ixgbe_mvals_x550em_a, +}; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h index 9cfcfeec6e0b..ba8c06b73810 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h @@ -76,5 +76,6 @@ int ixgbe_read_ee_aci_e610(struct ixgbe_hw *hw, u16 offset, u16 *data); int ixgbe_read_ee_aci_buffer_e610(struct ixgbe_hw *hw, u16 offset, u16 words, u16 *data); int ixgbe_validate_eeprom_checksum_e610(struct ixgbe_hw *hw, u16 *checksum_val); +int ixgbe_reset_hw_e610(struct ixgbe_hw *hw); #endif /* _IXGBE_E610_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 9482e0cca8b7..da91c582d439 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ /* ethtool support for ixgbe */ @@ -690,6 +690,7 @@ static void ixgbe_get_regs(struct net_device *netdev, case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: regs_buff[35 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTL_82599(i)); regs_buff[43 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTH_82599(i)); break; @@ -1613,6 +1614,7 @@ static int ixgbe_reg_test(struct ixgbe_adapter *adapter, u64 *data) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: toggle = 0x7FFFF30F; test = reg_test_82599; break; @@ -1874,6 +1876,7 @@ static int ixgbe_setup_desc_rings(struct ixgbe_adapter *adapter) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: reg_data = IXGBE_READ_REG(&adapter->hw, IXGBE_DMATXCTL); reg_data |= IXGBE_DMATXCTL_TE; IXGBE_WRITE_REG(&adapter->hw, IXGBE_DMATXCTL, reg_data); @@ -1935,6 +1938,7 @@ static int ixgbe_setup_loopback_test(struct ixgbe_adapter *adapter) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: reg_data = IXGBE_READ_REG(hw, IXGBE_MACC); reg_data |= IXGBE_MACC_FLU; IXGBE_WRITE_REG(hw, IXGBE_MACC, reg_data); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 16fa621ce0ff..336d47ffb95a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ #include "ixgbe.h" #include "ixgbe_sriov.h" @@ -107,6 +107,7 @@ static void ixgbe_get_first_reg_idx(struct ixgbe_adapter *adapter, u8 tc, case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: if (num_tcs > 4) { /* * TCs : TC0/1 TC2/3 TC4-7 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 2656f2617a8f..336e08d35f97 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ #include #include @@ -74,6 +74,7 @@ static const struct ixgbe_info *ixgbe_info_tbl[] = { [board_x550em_x_fw] = &ixgbe_x550em_x_fw_info, [board_x550em_a] = &ixgbe_x550em_a_info, [board_x550em_a_fw] = &ixgbe_x550em_a_fw_info, + [board_e610] = &ixgbe_e610_info, }; /* ixgbe_pci_tbl - PCI Device ID Table @@ -132,6 +133,11 @@ static const struct pci_device_id ixgbe_pci_tbl[] = { {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP), board_x550em_a }, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_1G_T), board_x550em_a_fw }, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L), board_x550em_a_fw }, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_E610_BACKPLANE), board_e610}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_E610_SFP), board_e610}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_E610_10G_T), board_e610}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_E610_2_5G_T), board_e610}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_E610_SGMII), board_e610}, /* required last entry */ {0, } }; @@ -174,6 +180,8 @@ static struct workqueue_struct *ixgbe_wq; static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev); static void ixgbe_watchdog_link_is_down(struct ixgbe_adapter *); +static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *); +static void ixgbe_watchdog_update_link(struct ixgbe_adapter *); static const struct net_device_ops ixgbe_netdev_ops; @@ -241,7 +249,7 @@ static int ixgbe_get_parent_bus_info(struct ixgbe_adapter *adapter) * Return: true if information should be collected from the parent bus, false * otherwise */ -static inline bool ixgbe_pcie_from_parent(struct ixgbe_hw *hw) +static bool ixgbe_pcie_from_parent(struct ixgbe_hw *hw) { switch (hw->device_id) { case IXGBE_DEV_ID_82599_SFP_SF_QP: @@ -880,6 +888,7 @@ static void ixgbe_set_ivar(struct ixgbe_adapter *adapter, s8 direction, case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: if (direction == -1) { /* other causes */ msix_vector |= IXGBE_IVAR_ALLOC_VAL; @@ -919,6 +928,7 @@ void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter, case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: mask = (qmask & 0xFFFFFFFF); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask); mask = (qmask >> 32); @@ -1029,7 +1039,7 @@ static u64 ixgbe_get_tx_pending(struct ixgbe_ring *ring) return ((head <= tail) ? tail : tail + ring->count) - head; } -static inline bool ixgbe_check_tx_hang(struct ixgbe_ring *tx_ring) +static bool ixgbe_check_tx_hang(struct ixgbe_ring *tx_ring) { u32 tx_done = ixgbe_get_tx_completed(tx_ring); u32 tx_done_old = tx_ring->tx_stats.tx_done_old; @@ -2519,6 +2529,7 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: ixgbe_set_ivar(adapter, -1, 1, v_idx); break; default: @@ -2532,6 +2543,9 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter) IXGBE_EIMS_MAILBOX | IXGBE_EIMS_LSC); + if (adapter->hw.mac.type == ixgbe_mac_e610) + mask &= ~IXGBE_EIMS_FW_EVENT; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, mask); } @@ -2748,6 +2762,7 @@ void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: /* * set the WDIS bit to not clear the timer bits and cause an * immediate assertion of the interrupt @@ -2970,6 +2985,218 @@ static void ixgbe_check_lsc(struct ixgbe_adapter *adapter) } } +/** + * ixgbe_check_phy_fw_load - check if PHY FW load failed + * @adapter: pointer to adapter structure + * @link_cfg_err: bitmap from the link info structure + * + * Check if external PHY FW load failed and print an error message if it did. + */ +static void ixgbe_check_phy_fw_load(struct ixgbe_adapter *adapter, + u8 link_cfg_err) +{ + if (!(link_cfg_err & IXGBE_ACI_LINK_EXTERNAL_PHY_LOAD_FAILURE)) { + adapter->flags2 &= ~IXGBE_FLAG2_PHY_FW_LOAD_FAILED; + return; + } + + if (adapter->flags2 & IXGBE_FLAG2_PHY_FW_LOAD_FAILED) + return; + + if (link_cfg_err & IXGBE_ACI_LINK_EXTERNAL_PHY_LOAD_FAILURE) { + netdev_err(adapter->netdev, "Device failed to load the FW for the external PHY. Please download and install the latest NVM for your device and try again\n"); + adapter->flags2 |= IXGBE_FLAG2_PHY_FW_LOAD_FAILED; + } +} + +/** + * ixgbe_check_module_power - check module power level + * @adapter: pointer to adapter structure + * @link_cfg_err: bitmap from the link info structure + * + * Check module power level returned by a previous call to aci_get_link_info + * and print error messages if module power level is not supported. + */ +static void ixgbe_check_module_power(struct ixgbe_adapter *adapter, + u8 link_cfg_err) +{ + /* If module power level is supported, clear the flag. */ + if (!(link_cfg_err & (IXGBE_ACI_LINK_INVAL_MAX_POWER_LIMIT | + IXGBE_ACI_LINK_MODULE_POWER_UNSUPPORTED))) { + adapter->flags2 &= ~IXGBE_FLAG2_MOD_POWER_UNSUPPORTED; + return; + } + + /* If IXGBE_FLAG2_MOD_POWER_UNSUPPORTED was previously set and the + * above block didn't clear this bit, there's nothing to do. + */ + if (adapter->flags2 & IXGBE_FLAG2_MOD_POWER_UNSUPPORTED) + return; + + if (link_cfg_err & IXGBE_ACI_LINK_INVAL_MAX_POWER_LIMIT) { + netdev_err(adapter->netdev, "The installed module is incompatible with the device's NVM image. Cannot start link.\n"); + adapter->flags2 |= IXGBE_FLAG2_MOD_POWER_UNSUPPORTED; + } else if (link_cfg_err & IXGBE_ACI_LINK_MODULE_POWER_UNSUPPORTED) { + netdev_err(adapter->netdev, "The module's power requirements exceed the device's power supply. Cannot start link.\n"); + adapter->flags2 |= IXGBE_FLAG2_MOD_POWER_UNSUPPORTED; + } +} + +/** + * ixgbe_check_link_cfg_err - check if link configuration failed + * @adapter: pointer to adapter structure + * @link_cfg_err: bitmap from the link info structure + * + * Print if any link configuration failure happens due to the value in the + * link_cfg_err parameter in the link info structure. + */ +static void ixgbe_check_link_cfg_err(struct ixgbe_adapter *adapter, + u8 link_cfg_err) +{ + ixgbe_check_module_power(adapter, link_cfg_err); + ixgbe_check_phy_fw_load(adapter, link_cfg_err); +} + +/** + * ixgbe_process_link_status_event - process the link event + * @adapter: pointer to adapter structure + * @link_up: true if the physical link is up and false if it is down + * @link_speed: current link speed received from the link event + * + * Return: 0 on success or negative value on failure. + */ +static int +ixgbe_process_link_status_event(struct ixgbe_adapter *adapter, bool link_up, + u16 link_speed) +{ + struct ixgbe_hw *hw = &adapter->hw; + int status; + + /* Update the link info structures and re-enable link events, + * don't bail on failure due to other book keeping needed. + */ + status = ixgbe_update_link_info(hw); + if (status) + e_dev_err("Failed to update link status, err %d aq_err %d\n", + status, hw->aci.last_status); + + ixgbe_check_link_cfg_err(adapter, hw->link.link_info.link_cfg_err); + + /* Check if the link state is up after updating link info, and treat + * this event as an UP event since the link is actually UP now. + */ + if (hw->link.link_info.link_info & IXGBE_ACI_LINK_UP) + link_up = true; + + /* Turn off PHY if media was removed. */ + if (!(adapter->flags2 & IXGBE_FLAG2_NO_MEDIA) && + !(hw->link.link_info.link_info & IXGBE_ACI_MEDIA_AVAILABLE)) + adapter->flags2 |= IXGBE_FLAG2_NO_MEDIA; + + if (link_up == adapter->link_up && + link_up == netif_carrier_ok(adapter->netdev) && + link_speed == adapter->link_speed) + return 0; + + adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE; + adapter->link_check_timeout = jiffies; + ixgbe_watchdog_update_link(adapter); + + if (link_up) + ixgbe_watchdog_link_is_up(adapter); + else + ixgbe_watchdog_link_is_down(adapter); + + return 0; +} + +/** + * ixgbe_handle_link_status_event - handle link status event via ACI + * @adapter: pointer to adapter structure + * @e: event structure containing link status info + */ +static void +ixgbe_handle_link_status_event(struct ixgbe_adapter *adapter, + struct ixgbe_aci_event *e) +{ + struct ixgbe_aci_cmd_get_link_status_data *link_data; + u16 link_speed; + bool link_up; + + link_data = (struct ixgbe_aci_cmd_get_link_status_data *)e->msg_buf; + + link_up = !!(link_data->link_info & IXGBE_ACI_LINK_UP); + link_speed = le16_to_cpu(link_data->link_speed); + + if (ixgbe_process_link_status_event(adapter, link_up, link_speed)) + e_dev_warn("Could not process link status event"); +} + +/** + * ixgbe_schedule_fw_event - schedule Firmware event + * @adapter: pointer to the adapter structure + * + * If the adapter is not in down, removing or resetting state, + * an event is scheduled. + */ +static void ixgbe_schedule_fw_event(struct ixgbe_adapter *adapter) +{ + if (!test_bit(__IXGBE_DOWN, &adapter->state) && + !test_bit(__IXGBE_REMOVING, &adapter->state) && + !test_bit(__IXGBE_RESETTING, &adapter->state)) { + adapter->flags2 |= IXGBE_FLAG2_FW_ASYNC_EVENT; + ixgbe_service_event_schedule(adapter); + } +} + +/** + * ixgbe_aci_event_cleanup - release msg_buf memory + * @event: pointer to the event holding msg_buf to be released + * + * Clean memory allocated for event's msg_buf. Implements auto memory cleanup. + */ +static void ixgbe_aci_event_cleanup(struct ixgbe_aci_event *event) +{ + kfree(event->msg_buf); +} + +/** + * ixgbe_handle_fw_event - handle Firmware event + * @adapter: pointer to the adapter structure + * + * Obtain an event from the ACI and then and then process it according to the + * type of the event and the opcode. + */ +static void ixgbe_handle_fw_event(struct ixgbe_adapter *adapter) +{ + struct ixgbe_aci_event event __cleanup(ixgbe_aci_event_cleanup); + struct ixgbe_hw *hw = &adapter->hw; + bool pending = false; + int err; + + if (adapter->flags2 & IXGBE_FLAG2_FW_ASYNC_EVENT) + adapter->flags2 &= ~IXGBE_FLAG2_FW_ASYNC_EVENT; + event.buf_len = IXGBE_ACI_MAX_BUFFER_SIZE; + event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); + if (!event.msg_buf) + return; + + do { + err = ixgbe_aci_get_event(hw, &event, &pending); + if (err) + break; + + switch (le16_to_cpu(event.desc.opcode)) { + case ixgbe_aci_opc_get_link_status: + ixgbe_handle_link_status_event(adapter, &event); + break; + default: + e_warn(hw, "unknown FW async event captured\n"); + break; + } + } while (pending); +} + static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter, u64 qmask) { @@ -2986,6 +3213,7 @@ static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter, case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: mask = (qmask & 0xFFFFFFFF); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); @@ -3039,6 +3267,9 @@ static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter, bool queues, case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: + case ixgbe_mac_e610: + mask |= IXGBE_EIMS_FW_EVENT; + fallthrough; case ixgbe_mac_x550em_a: if (adapter->hw.device_id == IXGBE_DEV_ID_X550EM_X_SFP || adapter->hw.device_id == IXGBE_DEV_ID_X550EM_A_SFP || @@ -3095,12 +3326,16 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data) if (eicr & IXGBE_EICR_MAILBOX) ixgbe_msg_task(adapter); + if (eicr & IXGBE_EICR_FW_EVENT) + ixgbe_schedule_fw_event(adapter); + switch (hw->mac.type) { case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: if (hw->phy.type == ixgbe_phy_x550em_ext_t && (eicr & IXGBE_EICR_GPI_SDP0_X540)) { adapter->flags2 |= IXGBE_FLAG2_PHY_INTERRUPT; @@ -3338,6 +3573,9 @@ static irqreturn_t ixgbe_intr(int irq, void *data) if (eicr & IXGBE_EICR_LSC) ixgbe_check_lsc(adapter); + if (eicr & IXGBE_EICR_FW_EVENT) + ixgbe_schedule_fw_event(adapter); + switch (hw->mac.type) { case ixgbe_mac_82599EB: ixgbe_check_sfp_event(adapter, eicr); @@ -3346,6 +3584,7 @@ static irqreturn_t ixgbe_intr(int irq, void *data) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: if (eicr & IXGBE_EICR_ECC) { e_info(link, "Received ECC Err, initiating reset\n"); set_bit(__IXGBE_RESET_REQUESTED, &adapter->state); @@ -3446,6 +3685,7 @@ static inline void ixgbe_irq_disable(struct ixgbe_adapter *adapter) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0); @@ -4363,6 +4603,7 @@ static void ixgbe_setup_rdrxctl(struct ixgbe_adapter *adapter) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: if (adapter->num_vfs) rdrxctl |= IXGBE_RDRXCTL_PSP; fallthrough; @@ -4530,6 +4771,7 @@ static void ixgbe_vlan_strip_disable(struct ixgbe_adapter *adapter) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: for (i = 0; i < adapter->num_rx_queues; i++) { struct ixgbe_ring *ring = adapter->rx_ring[i]; @@ -4568,6 +4810,7 @@ static void ixgbe_vlan_strip_enable(struct ixgbe_adapter *adapter) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: for (i = 0; i < adapter->num_rx_queues; i++) { struct ixgbe_ring *ring = adapter->rx_ring[i]; @@ -5152,6 +5395,7 @@ static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: dv_id = IXGBE_DV_X540(link, tc); break; default: @@ -5212,6 +5456,7 @@ static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: dv_id = IXGBE_LOW_DV_X540(tc); break; default: @@ -5513,6 +5758,48 @@ static void ixgbe_configure(struct ixgbe_adapter *adapter) ixgbe_configure_dfwd(adapter); } +/** + * ixgbe_enable_link_status_events - enable link status events + * @adapter: pointer to the adapter structure + * @mask: event mask to be set + * + * Enables link status events by invoking ixgbe_configure_lse() + * + * Return: the exit code of the operation. + */ +static int ixgbe_enable_link_status_events(struct ixgbe_adapter *adapter, + u16 mask) +{ + int err; + + err = ixgbe_configure_lse(&adapter->hw, true, mask); + if (err) + return err; + + adapter->lse_mask = mask; + return 0; +} + +/** + * ixgbe_disable_link_status_events - disable link status events + * @adapter: pointer to the adapter structure + * + * Disables link status events by invoking ixgbe_configure_lse() + * + * Return: the exit code of the operation. + */ +static int ixgbe_disable_link_status_events(struct ixgbe_adapter *adapter) +{ + int err; + + err = ixgbe_configure_lse(&adapter->hw, false, adapter->lse_mask); + if (err) + return err; + + adapter->lse_mask = 0; + return 0; +} + /** * ixgbe_sfp_link_config - set up SFP+ link * @adapter: pointer to private adapter struct @@ -5542,9 +5829,15 @@ static void ixgbe_sfp_link_config(struct ixgbe_adapter *adapter) **/ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw) { - u32 speed; + struct ixgbe_adapter *adapter = container_of(hw, struct ixgbe_adapter, + hw); + u16 mask = ~((u16)(IXGBE_ACI_LINK_EVENT_UPDOWN | + IXGBE_ACI_LINK_EVENT_MEDIA_NA | + IXGBE_ACI_LINK_EVENT_MODULE_QUAL_FAIL | + IXGBE_ACI_LINK_EVENT_PHY_FW_LOAD_FAIL)); bool autoneg, link_up = false; int ret = -EIO; + u32 speed; if (hw->mac.ops.check_link) ret = hw->mac.ops.check_link(hw, &speed, &link_up, false); @@ -5567,12 +5860,52 @@ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw) if (ret) return ret; - if (hw->mac.ops.setup_link) + if (hw->mac.ops.setup_link) { + if (adapter->hw.mac.type == ixgbe_mac_e610) { + ret = ixgbe_enable_link_status_events(adapter, mask); + if (ret) + return ret; + } ret = hw->mac.ops.setup_link(hw, speed, link_up); + } return ret; } +/** + * ixgbe_check_media_subtask - check for media + * @adapter: pointer to adapter structure + * + * If media is available then initialize PHY user configuration. Configure the + * PHY if the interface is up. + */ +static void ixgbe_check_media_subtask(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + /* No need to check for media if it's already present */ + if (!(adapter->flags2 & IXGBE_FLAG2_NO_MEDIA)) + return; + + /* Refresh link info and check if media is present */ + if (ixgbe_update_link_info(hw)) + return; + + ixgbe_check_link_cfg_err(adapter, hw->link.link_info.link_cfg_err); + + if (hw->link.link_info.link_info & IXGBE_ACI_MEDIA_AVAILABLE) { + /* PHY settings are reset on media insertion, reconfigure + * PHY to preserve settings. + */ + if (!(ixgbe_non_sfp_link_config(&adapter->hw))) + adapter->flags2 &= ~IXGBE_FLAG2_NO_MEDIA; + + /* A Link Status Event will be generated; the event handler + * will complete bringing the interface up + */ + } +} + /** * ixgbe_clear_vf_stats_counters - Clear out VF stats after reset * @adapter: board private structure @@ -5636,6 +5969,7 @@ static void ixgbe_setup_gpie(struct ixgbe_adapter *adapter) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: default: IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF); IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF); @@ -5986,6 +6320,7 @@ dma_engine_disable: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, (IXGBE_READ_REG(hw, IXGBE_DMATXCTL) & ~IXGBE_DMATXCTL_TE)); @@ -6230,6 +6565,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter) ixgbe_clean_all_tx_rings(adapter); ixgbe_clean_all_rx_rings(adapter); + if (adapter->hw.mac.type == ixgbe_mac_e610) + ixgbe_disable_link_status_events(adapter); } /** @@ -6285,6 +6622,7 @@ static void ixgbe_init_dcb(struct ixgbe_adapter *adapter) break; case ixgbe_mac_X540: case ixgbe_mac_X550: + case ixgbe_mac_e610: adapter->dcb_cfg.num_tcs.pg_tcs = X540_TRAFFIC_CLASS; adapter->dcb_cfg.num_tcs.pfc_tcs = X540_TRAFFIC_CLASS; break; @@ -6348,6 +6686,8 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, hw->subsystem_vendor_id = pdev->subsystem_vendor; hw->subsystem_device_id = pdev->subsystem_device; + hw->mac.max_link_up_time = IXGBE_LINK_UP_TIME; + /* get_invariants needs the device IDs */ ii->get_invariants(hw); @@ -6915,6 +7255,19 @@ int ixgbe_open(struct net_device *netdev) ixgbe_up_complete(adapter); udp_tunnel_nic_reset_ntf(netdev); + if (adapter->hw.mac.type == ixgbe_mac_e610) { + int err = ixgbe_update_link_info(&adapter->hw); + + if (err) + e_dev_err("Failed to update link info, err %d.\n", err); + + ixgbe_check_link_cfg_err(adapter, + adapter->hw.link.link_info.link_cfg_err); + + err = ixgbe_non_sfp_link_config(&adapter->hw); + if (ixgbe_non_sfp_link_config(&adapter->hw)) + e_dev_err("Link setup failed, err %d.\n", err); + } return 0; @@ -7068,6 +7421,7 @@ static int __ixgbe_shutdown(struct pci_dev *pdev, bool *enable_wake) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: pci_wake_from_d3(pdev, !!wufc); break; default: @@ -7215,6 +7569,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: hwstats->pxonrxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i)); break; @@ -7231,7 +7586,8 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) hw->mac.type == ixgbe_mac_X540 || hw->mac.type == ixgbe_mac_X550 || hw->mac.type == ixgbe_mac_X550EM_x || - hw->mac.type == ixgbe_mac_x550em_a) { + hw->mac.type == ixgbe_mac_x550em_a || + hw->mac.type == ixgbe_mac_e610) { hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC_L(i)); IXGBE_READ_REG(hw, IXGBE_QBTC_H(i)); /* to clear */ hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC_L(i)); @@ -7257,6 +7613,7 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: /* OS2BMC stats are X540 and later */ hwstats->o2bgptc += IXGBE_READ_REG(hw, IXGBE_O2BGPTC); hwstats->o2bspc += IXGBE_READ_REG(hw, IXGBE_O2BSPC); @@ -7557,6 +7914,7 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: case ixgbe_mac_82599EB: { u32 mflcn = IXGBE_READ_REG(hw, IXGBE_MFLCN); u32 fccfg = IXGBE_READ_REG(hw, IXGBE_FCCFG); @@ -8058,6 +8416,11 @@ static void ixgbe_service_task(struct work_struct *work) ixgbe_service_event_complete(adapter); return; } + if (adapter->hw.mac.type == ixgbe_mac_e610) { + if (adapter->flags2 & IXGBE_FLAG2_FW_ASYNC_EVENT) + ixgbe_handle_fw_event(adapter); + ixgbe_check_media_subtask(adapter); + } ixgbe_reset_subtask(adapter); ixgbe_phy_interrupt_subtask(adapter); ixgbe_sfp_detection_subtask(adapter); @@ -10776,6 +11139,24 @@ bool ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id, return false; } +/** + * ixgbe_set_fw_version_e610 - Set FW version specifically on E610 adapters + * @adapter: the adapter private structure + * + * This function is used by probe and ethtool to determine the FW version to + * format to display. The FW version is taken from the EEPROM/NVM. + * + */ +static void ixgbe_set_fw_version_e610(struct ixgbe_adapter *adapter) +{ + struct ixgbe_orom_info *orom = &adapter->hw.flash.orom; + struct ixgbe_nvm_info *nvm = &adapter->hw.flash.nvm; + + snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), + "%x.%02x 0x%x %d.%d.%d", nvm->major, nvm->minor, + nvm->eetrack, orom->major, orom->build, orom->patch); +} + /** * ixgbe_set_fw_version - Set FW version * @adapter: the adapter private structure @@ -10788,6 +11169,11 @@ static void ixgbe_set_fw_version(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; struct ixgbe_nvm_version nvm_ver; + if (adapter->hw.mac.type == ixgbe_mac_e610) { + ixgbe_set_fw_version_e610(adapter); + return; + } + ixgbe_get_oem_prod_version(hw, &nvm_ver); if (nvm_ver.oem_valid) { snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), @@ -10874,6 +11260,8 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) #else indices = IXGBE_MAX_RSS_INDICES; #endif + } else if (ii->mac == ixgbe_mac_e610) { + indices = IXGBE_MAX_RSS_INDICES_X550; } netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices); @@ -10951,6 +11339,7 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) switch (adapter->hw.mac.type) { case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: + case ixgbe_mac_e610: netdev->udp_tunnel_nic_info = &ixgbe_udp_tunnels_x550; break; case ixgbe_mac_x550em_a: @@ -10971,6 +11360,7 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0); break; default: @@ -11142,6 +11532,8 @@ skip_sriov: ether_addr_copy(hw->mac.addr, hw->mac.perm_addr); ixgbe_mac_set_default_filter(adapter); + if (hw->mac.type == ixgbe_mac_e610) + mutex_init(&hw->aci.lock); timer_setup(&adapter->service_timer, ixgbe_service_timer, 0); if (ixgbe_removed(hw->hw_addr)) { @@ -11287,6 +11679,8 @@ err_netdev: err_register: ixgbe_release_hw_control(adapter); ixgbe_clear_interrupt_scheme(adapter); + if (hw->mac.type == ixgbe_mac_e610) + mutex_destroy(&adapter->hw.aci.lock); err_sw_init: ixgbe_disable_sriov(adapter); adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP; @@ -11333,6 +11727,11 @@ static void ixgbe_remove(struct pci_dev *pdev) set_bit(__IXGBE_REMOVING, &adapter->state); cancel_work_sync(&adapter->service_task); + if (adapter->hw.mac.type == ixgbe_mac_e610) { + ixgbe_disable_link_status_events(adapter); + mutex_destroy(&adapter->hw.aci.lock); + } + if (adapter->mii_bus) mdiobus_unregister(adapter->mii_bus); @@ -11464,6 +11863,9 @@ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev, case ixgbe_mac_x550em_a: device_id = IXGBE_DEV_ID_X550EM_A_VF; break; + case ixgbe_mac_e610: + device_id = IXGBE_DEV_ID_E610_VF; + break; default: device_id = 0; break; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c index d67d77e5dacc..788b5af07c70 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ #include #include @@ -283,6 +283,7 @@ static int ixgbe_check_for_rst_pf(struct ixgbe_hw *hw, u16 vf_number) case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + case ixgbe_mac_e610: vflre = IXGBE_READ_REG(hw, IXGBE_VFLREC(reg_offset)); break; default: @@ -407,6 +408,7 @@ void ixgbe_init_mbx_params_pf(struct ixgbe_hw *hw) hw->mac.type != ixgbe_mac_X550 && hw->mac.type != ixgbe_mac_X550EM_x && hw->mac.type != ixgbe_mac_x550em_a && + hw->mac.type != ixgbe_mac_e610 && hw->mac.type != ixgbe_mac_X540) return; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index 07eaa3c3f4d3..0a03a8bb5f88 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ #include #include @@ -1117,7 +1117,7 @@ int ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw) hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, MDIO_MMD_AN, &autoneg_reg); - if (hw->mac.type == ixgbe_mac_X550) { + if (hw->mac.type == ixgbe_mac_X550 || hw->mac.type == ixgbe_mac_e610) { /* Set or unset auto-negotiation 5G advertisement */ autoneg_reg &= ~IXGBE_MII_5GBASE_T_ADVERTISE; if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_5GB_FULL) && @@ -1233,6 +1233,7 @@ static int ixgbe_get_copper_speeds_supported(struct ixgbe_hw *hw) switch (hw->mac.type) { case ixgbe_mac_X550: + case ixgbe_mac_e610: hw->phy.speeds_supported |= IXGBE_LINK_SPEED_2_5GB_FULL; hw->phy.speeds_supported |= IXGBE_LINK_SPEED_5GB_FULL; break; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index 81e1df83f136..1fc821fb351a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ #include #include @@ -66,7 +66,9 @@ int ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed, * Resets the hardware by resetting the transmit and receive units, masks * and clears all interrupts, perform a PHY reset, and perform a link (MAC) * reset. - **/ + * + * Return: 0 on success or negative value on failure + */ int ixgbe_reset_hw_X540(struct ixgbe_hw *hw) { u32 swfw_mask = hw->phy.phy_semaphore_mask; @@ -133,10 +135,14 @@ mac_reset_top: hw->mac.num_rar_entries = IXGBE_X540_MAX_TX_QUEUES; hw->mac.ops.init_rx_addrs(hw); + /* The following is not supported by E610. */ + if (hw->mac.type == ixgbe_mac_e610) + return status; + /* Store the permanent SAN mac address */ hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr); - /* Add the SAN MAC address to the RAR only if it's a valid address */ + /* Add the SAN MAC address to RAR if it's a valid address */ if (is_valid_ether_addr(hw->mac.san_addr)) { /* Save the SAN MAC RAR index */ hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 1de05443d3a2..277ceaf8a793 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ #include "ixgbe_x540.h" +#include "ixgbe_x550.h" #include "ixgbe_type.h" #include "ixgbe_common.h" #include "ixgbe_mbx.h" @@ -2770,9 +2771,9 @@ static int ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) * semaphore, -EIO when command fails or -ENIVAL when incorrect * params passed. **/ -static int ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, - u8 build, u8 sub, u16 len, - const char *driver_ver) +int ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, + u8 build, u8 sub, u16 len, + const char *driver_ver) { struct ixgbe_hic_drv_info2 fw_cmd; int ret_val; @@ -3511,8 +3512,8 @@ mac_reset_top: * @enable: enable or disable switch for Ethertype anti-spoofing * @vf: Virtual Function pool - VF Pool to set for Ethertype anti-spoofing **/ -static void ixgbe_set_ethertype_anti_spoofing_x550(struct ixgbe_hw *hw, - bool enable, int vf) +void ixgbe_set_ethertype_anti_spoofing_x550(struct ixgbe_hw *hw, + bool enable, int vf) { int vf_target_reg = vf >> 3; int vf_target_shift = vf % 8 + IXGBE_SPOOF_ETHERTYPEAS_SHIFT; @@ -3532,9 +3533,9 @@ static void ixgbe_set_ethertype_anti_spoofing_x550(struct ixgbe_hw *hw, * @enable: enable or disable source address pruning * @pool: Rx pool to set source address pruning for **/ -static void ixgbe_set_source_address_pruning_x550(struct ixgbe_hw *hw, - bool enable, - unsigned int pool) +void ixgbe_set_source_address_pruning_x550(struct ixgbe_hw *hw, + bool enable, + unsigned int pool) { u64 pfflp; @@ -4047,7 +4048,7 @@ static const u32 ixgbe_mvals_X550EM_x[IXGBE_MVALS_IDX_LIMIT] = { IXGBE_MVALS_INIT(X550EM_x) }; -static const u32 ixgbe_mvals_x550em_a[IXGBE_MVALS_IDX_LIMIT] = { +const u32 ixgbe_mvals_x550em_a[IXGBE_MVALS_IDX_LIMIT] = { IXGBE_MVALS_INIT(X550EM_a) }; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h new file mode 100644 index 000000000000..3e4092f8da3e --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2024 Intel Corporation. */ + +#ifndef _IXGBE_X550_H_ +#define _IXGBE_X550_H_ + +#include "ixgbe_type.h" + +extern const u32 ixgbe_mvals_x550em_a[IXGBE_MVALS_IDX_LIMIT]; + +int ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, + u8 build, u8 sub, u16 len, + const char *driver_ver); +void ixgbe_set_source_address_pruning_x550(struct ixgbe_hw *hw, + bool enable, + unsigned int pool); +void ixgbe_set_ethertype_anti_spoofing_x550(struct ixgbe_hw *hw, + bool enable, int vf); + +#endif /* _IXGBE_X550_H_ */ From 208fff3f567e2a3c3e7e4788845e90245c3891b4 Mon Sep 17 00:00:00 2001 From: Piotr Kwapulinski Date: Wed, 18 Dec 2024 14:12:37 +0100 Subject: [PATCH 0566/1386] PCI: Add PCI_VDEVICE_SUB helper macro PCI_VDEVICE_SUB generates the pci_device_id struct layout for the specific PCI device/subdevice. Private data may follow the output. Reviewed-by: Przemek Kitszel Signed-off-by: Piotr Kwapulinski Acked-by: Bjorn Helgaas Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- include/linux/pci.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/linux/pci.h b/include/linux/pci.h index db9b47ce3eef..414ee5fff66b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1046,6 +1046,20 @@ struct pci_driver { .vendor = PCI_VENDOR_ID_##vend, .device = (dev), \ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0 +/** + * PCI_VDEVICE_SUB - describe a specific PCI device/subdevice in a short form + * @vend: the vendor name + * @dev: the 16 bit PCI Device ID + * @subvend: the 16 bit PCI Subvendor ID + * @subdev: the 16 bit PCI Subdevice ID + * + * Generate the pci_device_id struct layout for the specific PCI + * device/subdevice. Private data may follow the output. + */ +#define PCI_VDEVICE_SUB(vend, dev, subvend, subdev) \ + .vendor = PCI_VENDOR_ID_##vend, .device = (dev), \ + .subvendor = (subvend), .subdevice = (subdev), 0, 0 + /** * PCI_DEVICE_DATA - macro used to describe a specific PCI device in very short form * @vend: the vendor name (without PCI_VENDOR_ID_ prefix) From 4c44b450c69b676955c2790dcf467c1f969d80f1 Mon Sep 17 00:00:00 2001 From: Piotr Kwapulinski Date: Wed, 18 Dec 2024 14:12:38 +0100 Subject: [PATCH 0567/1386] ixgbevf: Add support for Intel(R) E610 device Add support for Intel(R) E610 Series of network devices. The E610 is based on X550 but adds firmware managed link, enhanced security capabilities and support for updated server manageability Reviewed-by: Przemek Kitszel Signed-off-by: Piotr Kwapulinski Reviewed-by: Simon Horman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ixgbevf/defines.h | 5 ++++- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 6 +++++- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 12 ++++++++++-- drivers/net/ethernet/intel/ixgbevf/vf.c | 12 +++++++++++- drivers/net/ethernet/intel/ixgbevf/vf.h | 4 +++- 5 files changed, 33 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h index 5f08779c0e4e..a9bc96f6399d 100644 --- a/drivers/net/ethernet/intel/ixgbevf/defines.h +++ b/drivers/net/ethernet/intel/ixgbevf/defines.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ #ifndef _IXGBEVF_DEFINES_H_ #define _IXGBEVF_DEFINES_H_ @@ -16,6 +16,9 @@ #define IXGBE_DEV_ID_X550_VF_HV 0x1564 #define IXGBE_DEV_ID_X550EM_X_VF_HV 0x15A9 +#define IXGBE_DEV_ID_E610_VF 0x57AD +#define IXGBE_SUBDEV_ID_E610_VF_HV 0x00FF + #define IXGBE_VF_IRQ_CLEAR_MASK 7 #define IXGBE_VF_MAX_TX_QUEUES 8 #define IXGBE_VF_MAX_RX_QUEUES 8 diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 130cb868774c..9b37f354d78c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ #ifndef _IXGBEVF_H_ #define _IXGBEVF_H_ @@ -418,6 +418,8 @@ enum ixgbevf_boards { board_X550EM_x_vf, board_X550EM_x_vf_hv, board_x550em_a_vf, + board_e610_vf, + board_e610_vf_hv, }; enum ixgbevf_xcast_modes { @@ -434,11 +436,13 @@ extern const struct ixgbevf_info ixgbevf_X550EM_x_vf_info; extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops; extern const struct ixgbe_mbx_operations ixgbevf_mbx_ops_legacy; extern const struct ixgbevf_info ixgbevf_x550em_a_vf_info; +extern const struct ixgbevf_info ixgbevf_e610_vf_info; extern const struct ixgbevf_info ixgbevf_82599_vf_hv_info; extern const struct ixgbevf_info ixgbevf_X540_vf_hv_info; extern const struct ixgbevf_info ixgbevf_X550_vf_hv_info; extern const struct ixgbevf_info ixgbevf_X550EM_x_vf_hv_info; +extern const struct ixgbevf_info ixgbevf_e610_vf_hv_info; extern const struct ixgbe_mbx_operations ixgbevf_hv_mbx_ops; /* needed by ethtool.c */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 149911e3002a..2829bac9af94 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ /****************************************************************************** Copyright (c)2006 - 2007 Myricom, Inc. for some LRO specific code @@ -39,7 +39,7 @@ static const char ixgbevf_driver_string[] = "Intel(R) 10 Gigabit PCI Express Virtual Function Network Driver"; static char ixgbevf_copyright[] = - "Copyright (c) 2009 - 2018 Intel Corporation."; + "Copyright (c) 2009 - 2024 Intel Corporation."; static const struct ixgbevf_info *ixgbevf_info_tbl[] = { [board_82599_vf] = &ixgbevf_82599_vf_info, @@ -51,6 +51,8 @@ static const struct ixgbevf_info *ixgbevf_info_tbl[] = { [board_X550EM_x_vf] = &ixgbevf_X550EM_x_vf_info, [board_X550EM_x_vf_hv] = &ixgbevf_X550EM_x_vf_hv_info, [board_x550em_a_vf] = &ixgbevf_x550em_a_vf_info, + [board_e610_vf] = &ixgbevf_e610_vf_info, + [board_e610_vf_hv] = &ixgbevf_e610_vf_hv_info, }; /* ixgbevf_pci_tbl - PCI Device ID Table @@ -71,6 +73,9 @@ static const struct pci_device_id ixgbevf_pci_tbl[] = { {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF), board_X550EM_x_vf }, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV), board_X550EM_x_vf_hv}, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_VF), board_x550em_a_vf }, + {PCI_VDEVICE_SUB(INTEL, IXGBE_DEV_ID_E610_VF, PCI_ANY_ID, + IXGBE_SUBDEV_ID_E610_VF_HV), board_e610_vf_hv}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_E610_VF), board_e610_vf}, /* required last entry */ {0, } }; @@ -4693,6 +4698,9 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) case ixgbe_mac_X540_vf: dev_info(&pdev->dev, "Intel(R) X540 Virtual Function\n"); break; + case ixgbe_mac_e610_vf: + dev_info(&pdev->dev, "Intel(R) E610 Virtual Function\n"); + break; case ixgbe_mac_82599_vf: default: dev_info(&pdev->dev, "Intel(R) 82599 Virtual Function\n"); diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 1641d00d8ed3..da7a72ecce7a 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ #include "vf.h" #include "ixgbevf.h" @@ -1076,3 +1076,13 @@ const struct ixgbevf_info ixgbevf_x550em_a_vf_info = { .mac = ixgbe_mac_x550em_a_vf, .mac_ops = &ixgbevf_mac_ops, }; + +const struct ixgbevf_info ixgbevf_e610_vf_info = { + .mac = ixgbe_mac_e610_vf, + .mac_ops = &ixgbevf_mac_ops, +}; + +const struct ixgbevf_info ixgbevf_e610_vf_hv_info = { + .mac = ixgbe_mac_e610_vf, + .mac_ops = &ixgbevf_hv_mac_ops, +}; diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h index b4eef5b6c172..2d791bc26ae4 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.h +++ b/drivers/net/ethernet/intel/ixgbevf/vf.h @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright(c) 1999 - 2018 Intel Corporation. */ +/* Copyright(c) 1999 - 2024 Intel Corporation. */ #ifndef __IXGBE_VF_H__ #define __IXGBE_VF_H__ @@ -54,6 +54,8 @@ enum ixgbe_mac_type { ixgbe_mac_X550_vf, ixgbe_mac_X550EM_x_vf, ixgbe_mac_x550em_a_vf, + ixgbe_mac_e610, + ixgbe_mac_e610_vf, ixgbe_num_macs }; From a574fe14ed1e496deb8ee6821029ed96591021e8 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 18 Dec 2024 16:33:38 +0000 Subject: [PATCH 0568/1386] net: hisilicon: hns: Remove unused hns_dsaf_roce_reset hns_dsaf_roce_reset() has been unused since 2021's commit 38d220882426 ("RDMA/hns: Remove support for HIP06") Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Jijie Shao Link: https://patch.msgid.link/20241218163341.40297-2-linux@treblig.org Signed-off-by: Jakub Kicinski --- .../ethernet/hisilicon/hns/hns_dsaf_main.c | 109 ------------------ .../ethernet/hisilicon/hns/hns_dsaf_main.h | 2 - 2 files changed, 111 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index 851490346261..6b6ced37e490 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -3019,115 +3019,6 @@ static struct platform_driver g_dsaf_driver = { module_platform_driver(g_dsaf_driver); -/** - * hns_dsaf_roce_reset - reset dsaf and roce - * @dsaf_fwnode: Pointer to framework node for the dasf - * @dereset: false - request reset , true - drop reset - * return 0 - success , negative -fail - */ -int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset) -{ - struct dsaf_device *dsaf_dev; - struct platform_device *pdev; - u32 mp; - u32 sl; - u32 credit; - int i; - static const u32 port_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = { - {DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0}, - {DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0}, - {DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0}, - {DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0}, - {DSAF_ROCE_PORT_4, DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1}, - {DSAF_ROCE_PORT_4, DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1}, - {DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1}, - {DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1}, - }; - static const u32 sl_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = { - {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_0}, - {DSAF_ROCE_SL_0, DSAF_ROCE_SL_1, DSAF_ROCE_SL_1}, - {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_2}, - {DSAF_ROCE_SL_0, DSAF_ROCE_SL_1, DSAF_ROCE_SL_3}, - {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_0}, - {DSAF_ROCE_SL_1, DSAF_ROCE_SL_1, DSAF_ROCE_SL_1}, - {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_2}, - {DSAF_ROCE_SL_1, DSAF_ROCE_SL_1, DSAF_ROCE_SL_3}, - }; - - /* find the platform device corresponding to fwnode */ - if (is_of_node(dsaf_fwnode)) { - pdev = of_find_device_by_node(to_of_node(dsaf_fwnode)); - } else if (is_acpi_device_node(dsaf_fwnode)) { - pdev = hns_dsaf_find_platform_device(dsaf_fwnode); - } else { - pr_err("fwnode is neither OF or ACPI type\n"); - return -EINVAL; - } - - /* check if we were a success in fetching pdev */ - if (!pdev) { - pr_err("couldn't find platform device for node\n"); - return -ENODEV; - } - - /* retrieve the dsaf_device from the driver data */ - dsaf_dev = dev_get_drvdata(&pdev->dev); - if (!dsaf_dev) { - dev_err(&pdev->dev, "dsaf_dev is NULL\n"); - put_device(&pdev->dev); - return -ENODEV; - } - - /* now, make sure we are running on compatible SoC */ - if (AE_IS_VER1(dsaf_dev->dsaf_ver)) { - dev_err(dsaf_dev->dev, "%s v1 chip doesn't support RoCE!\n", - dsaf_dev->ae_dev.name); - put_device(&pdev->dev); - return -ENODEV; - } - - /* do reset or de-reset according to the flag */ - if (!dereset) { - /* reset rocee-channels in dsaf and rocee */ - dsaf_dev->misc_op->hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, - false); - dsaf_dev->misc_op->hns_dsaf_roce_srst(dsaf_dev, false); - } else { - /* configure dsaf tx roce correspond to port map and sl map */ - mp = dsaf_read_dev(dsaf_dev, DSAF_ROCE_PORT_MAP_REG); - for (i = 0; i < DSAF_ROCE_CREDIT_CHN; i++) - dsaf_set_field(mp, 7 << i * 3, i * 3, - port_map[i][DSAF_ROCE_6PORT_MODE]); - dsaf_set_field(mp, 3 << i * 3, i * 3, 0); - dsaf_write_dev(dsaf_dev, DSAF_ROCE_PORT_MAP_REG, mp); - - sl = dsaf_read_dev(dsaf_dev, DSAF_ROCE_SL_MAP_REG); - for (i = 0; i < DSAF_ROCE_CREDIT_CHN; i++) - dsaf_set_field(sl, 3 << i * 2, i * 2, - sl_map[i][DSAF_ROCE_6PORT_MODE]); - dsaf_write_dev(dsaf_dev, DSAF_ROCE_SL_MAP_REG, sl); - - /* de-reset rocee-channels in dsaf and rocee */ - dsaf_dev->misc_op->hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, - true); - msleep(SRST_TIME_INTERVAL); - dsaf_dev->misc_op->hns_dsaf_roce_srst(dsaf_dev, true); - - /* enable dsaf channel rocee credit */ - credit = dsaf_read_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG); - dsaf_set_bit(credit, DSAF_SBM_ROCEE_CFG_CRD_EN_B, 0); - dsaf_write_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG, credit); - - dsaf_set_bit(credit, DSAF_SBM_ROCEE_CFG_CRD_EN_B, 1); - dsaf_write_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG, credit); - } - - put_device(&pdev->dev); - - return 0; -} -EXPORT_SYMBOL(hns_dsaf_roce_reset); - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Huawei Tech. Co., Ltd."); MODULE_DESCRIPTION("HNS DSAF driver"); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h index 0eb03dff1a8b..c90f41c75500 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -463,6 +463,4 @@ int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev, u8 mac_id, u8 port_num); int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port); -int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset); - #endif /* __HNS_DSAF_MAIN_H__ */ From 0265e9edf2100735304907e9979a9264c4dc7b5e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 18 Dec 2024 16:33:39 +0000 Subject: [PATCH 0569/1386] net: hisilicon: hns: Remove unused hns_rcb_start hns_rcb_start() has been unused since 2016's commit 454784d85de3 ("net: hns: delete redundancy ring enable operations") Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Jijie Shao Link: https://patch.msgid.link/20241218163341.40297-3-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c | 5 ----- drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h | 1 - 2 files changed, 6 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c index 46af467aa596..635b3a95dd82 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c @@ -195,11 +195,6 @@ void hns_rcb_ring_enable_hw(struct hnae_queue *q, u32 val) dsaf_write_dev(q, RCB_RING_PREFETCH_EN_REG, !!val); } -void hns_rcb_start(struct hnae_queue *q, u32 val) -{ - hns_rcb_ring_enable_hw(q, val); -} - /** *hns_rcb_common_init_commit_hw - make rcb common init completed *@rcb_common: rcb common device diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h index 0f4cc184ef39..68f81547dfb4 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h @@ -116,7 +116,6 @@ int hns_rcb_buf_size2type(u32 buf_size); int hns_rcb_common_get_cfg(struct dsaf_device *dsaf_dev, int comm_index); void hns_rcb_common_free_cfg(struct dsaf_device *dsaf_dev, u32 comm_index); int hns_rcb_common_init_hw(struct rcb_common_cb *rcb_common); -void hns_rcb_start(struct hnae_queue *q, u32 val); int hns_rcb_get_cfg(struct rcb_common_cb *rcb_common); void hns_rcb_get_queue_mode(enum dsaf_mode dsaf_mode, u16 *max_vfn, u16 *max_q_per_vf); From 0198b459f54e5813da4f6c36c867822401abc0c8 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 18 Dec 2024 16:33:40 +0000 Subject: [PATCH 0570/1386] net: hisilicon: hns: Remove reset helpers With hns_dsaf_roce_reset() removed in a previous patch, the two helper member pointers, 'hns_dsaf_roce_srst', and 'hns_dsaf_srst_chns' are now unread. Remove them, and the helper functions that they were initialised to, that is hns_dsaf_srst_chns(), hns_dsaf_srst_chns_acpi(), hns_dsaf_roce_srst() and hns_dsaf_roce_srst_acpi(). Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Jijie Shao Link: https://patch.msgid.link/20241218163341.40297-4-linux@treblig.org Signed-off-by: Jakub Kicinski --- .../ethernet/hisilicon/hns/hns_dsaf_main.h | 3 - .../ethernet/hisilicon/hns/hns_dsaf_misc.c | 67 ------------------- 2 files changed, 70 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h index c90f41c75500..bb8267aafc43 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -307,9 +307,6 @@ struct dsaf_misc_op { void (*ge_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset); void (*ppe_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset); void (*ppe_comm_srst)(struct dsaf_device *dsaf_dev, bool dereset); - void (*hns_dsaf_srst_chns)(struct dsaf_device *dsaf_dev, u32 msk, - bool dereset); - void (*hns_dsaf_roce_srst)(struct dsaf_device *dsaf_dev, bool dereset); phy_interface_t (*get_phy_if)(struct hns_mac_cb *mac_cb); int (*get_sfp_prsnt)(struct hns_mac_cb *mac_cb, int *sfp_prsnt); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index 5df19c604d09..91391a49fcea 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -326,69 +326,6 @@ static void hns_dsaf_xge_srst_by_port_acpi(struct dsaf_device *dsaf_dev, HNS_XGE_RESET_FUNC, port, dereset); } -/** - * hns_dsaf_srst_chns - reset dsaf channels - * @dsaf_dev: dsaf device struct pointer - * @msk: xbar channels mask value: - * @dereset: false - request reset , true - drop reset - * - * bit0-5 for xge0-5 - * bit6-11 for ppe0-5 - * bit12-17 for roce0-5 - * bit18-19 for com/dfx - */ -static void -hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool dereset) -{ - u32 reg_addr; - - if (!dereset) - reg_addr = DSAF_SUB_SC_DSAF_RESET_REQ_REG; - else - reg_addr = DSAF_SUB_SC_DSAF_RESET_DREQ_REG; - - dsaf_write_sub(dsaf_dev, reg_addr, msk); -} - -/** - * hns_dsaf_srst_chns_acpi - reset dsaf channels - * @dsaf_dev: dsaf device struct pointer - * @msk: xbar channels mask value: - * @dereset: false - request reset , true - drop reset - * - * bit0-5 for xge0-5 - * bit6-11 for ppe0-5 - * bit12-17 for roce0-5 - * bit18-19 for com/dfx - */ -static void -hns_dsaf_srst_chns_acpi(struct dsaf_device *dsaf_dev, u32 msk, bool dereset) -{ - hns_dsaf_acpi_srst_by_port(dsaf_dev, HNS_OP_RESET_FUNC, - HNS_DSAF_CHN_RESET_FUNC, - msk, dereset); -} - -static void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool dereset) -{ - if (!dereset) { - dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_ROCEE_RESET_REQ_REG, 1); - } else { - dsaf_write_sub(dsaf_dev, - DSAF_SUB_SC_ROCEE_CLK_DIS_REG, 1); - dsaf_write_sub(dsaf_dev, - DSAF_SUB_SC_ROCEE_RESET_DREQ_REG, 1); - msleep(20); - dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_ROCEE_CLK_EN_REG, 1); - } -} - -static void hns_dsaf_roce_srst_acpi(struct dsaf_device *dsaf_dev, bool dereset) -{ - hns_dsaf_acpi_srst_by_port(dsaf_dev, HNS_OP_RESET_FUNC, - HNS_ROCE_RESET_FUNC, 0, dereset); -} - static void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, bool dereset) { @@ -729,8 +666,6 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev) misc_op->ge_srst = hns_dsaf_ge_srst_by_port; misc_op->ppe_srst = hns_ppe_srst_by_port; misc_op->ppe_comm_srst = hns_ppe_com_srst; - misc_op->hns_dsaf_srst_chns = hns_dsaf_srst_chns; - misc_op->hns_dsaf_roce_srst = hns_dsaf_roce_srst; misc_op->get_phy_if = hns_mac_get_phy_if; misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt; @@ -746,8 +681,6 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev) misc_op->ge_srst = hns_dsaf_ge_srst_by_port_acpi; misc_op->ppe_srst = hns_ppe_srst_by_port_acpi; misc_op->ppe_comm_srst = hns_ppe_com_srst; - misc_op->hns_dsaf_srst_chns = hns_dsaf_srst_chns_acpi; - misc_op->hns_dsaf_roce_srst = hns_dsaf_roce_srst_acpi; misc_op->get_phy_if = hns_mac_get_phy_if_acpi; misc_op->get_sfp_prsnt = hns_mac_get_sfp_prsnt_acpi; From 8973ce189376e41ddd398fab0ba8dc8b14e50cd0 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 18 Dec 2024 16:33:41 +0000 Subject: [PATCH 0571/1386] net: hisilicon: hns: Remove unused enums The enums dsaf_roce_port_mode, dsaf_roce_port_num and dsaf_roce_qos_sl are unused after the removal of the reset code. Remove them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Jijie Shao Link: https://patch.msgid.link/20241218163341.40297-5-linux@treblig.org Signed-off-by: Jakub Kicinski --- .../ethernet/hisilicon/hns/hns_dsaf_main.h | 23 ------------------- 1 file changed, 23 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h index bb8267aafc43..653dfbb25d1b 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -42,29 +42,6 @@ struct hns_mac_cb; #define HNS_MAX_WAIT_CNT 10000 -enum dsaf_roce_port_mode { - DSAF_ROCE_6PORT_MODE, - DSAF_ROCE_4PORT_MODE, - DSAF_ROCE_2PORT_MODE, - DSAF_ROCE_CHAN_MODE_NUM, -}; - -enum dsaf_roce_port_num { - DSAF_ROCE_PORT_0, - DSAF_ROCE_PORT_1, - DSAF_ROCE_PORT_2, - DSAF_ROCE_PORT_3, - DSAF_ROCE_PORT_4, - DSAF_ROCE_PORT_5, -}; - -enum dsaf_roce_qos_sl { - DSAF_ROCE_SL_0, - DSAF_ROCE_SL_1, - DSAF_ROCE_SL_2, - DSAF_ROCE_SL_3, -}; - #define DSAF_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset)))) #define HNS_DSAF_IS_DEBUG(dev) ((dev)->dsaf_mode == DSAF_MODE_DISABLE_SP) From 05dd04b218f42c57a14e330fd8583995f141ed6b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Dec 2024 15:03:30 +0000 Subject: [PATCH 0572/1386] inetpeer: avoid false sharing in inet_peer_xrlim_allow() Under DOS, inet_peer_xrlim_allow() might be called millions of times per second from different cpus. Make sure to write over peer->rate_tokens and peer->rate_last only when really needed. Note the inherent races of this function are still there, we do not care of precise ICMP rate limiting. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20241219150330.3159027-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/inetpeer.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e02484f4d22b..b8b23a77ceb4 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -246,23 +246,27 @@ void inet_putpeer(struct inet_peer *p) #define XRLIM_BURST_FACTOR 6 bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) { - unsigned long now, token; + unsigned long now, token, otoken, delta; bool rc = false; if (!peer) return true; - token = peer->rate_tokens; + token = otoken = READ_ONCE(peer->rate_tokens); now = jiffies; - token += now - peer->rate_last; - peer->rate_last = now; - if (token > XRLIM_BURST_FACTOR * timeout) - token = XRLIM_BURST_FACTOR * timeout; + delta = now - READ_ONCE(peer->rate_last); + if (delta) { + WRITE_ONCE(peer->rate_last, now); + token += delta; + if (token > XRLIM_BURST_FACTOR * timeout) + token = XRLIM_BURST_FACTOR * timeout; + } if (token >= timeout) { token -= timeout; rc = true; } - peer->rate_tokens = token; + if (token != otoken) + WRITE_ONCE(peer->rate_tokens, token); return rc; } EXPORT_SYMBOL(inet_peer_xrlim_allow); From f284424dc17b57d779a03dfc9a66489a67150b30 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 18 Dec 2024 18:15:56 +0100 Subject: [PATCH 0573/1386] net: bridge: Extract a helper to handle bridge_binding toggles Currently, the BROPT_VLAN_BRIDGE_BINDING bridge option is only toggled when VLAN devices are added on top of a bridge or removed from it. Extract the toggling of the option to a function so that it could be invoked by a subsequent patch when the state of an upper VLAN device changes. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/a7455f6fe1dfa7b13126ed8a7fb33d3b611eecb8.1734540770.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br_vlan.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 89f51ea4cabe..b728b71e693f 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1664,6 +1664,18 @@ static void br_vlan_set_all_vlan_dev_state(struct net_bridge_port *p) } } +static void br_vlan_toggle_bridge_binding(struct net_device *br_dev, + bool enable) +{ + struct net_bridge *br = netdev_priv(br_dev); + + if (enable) + br_opt_toggle(br, BROPT_VLAN_BRIDGE_BINDING, true); + else + br_opt_toggle(br, BROPT_VLAN_BRIDGE_BINDING, + br_vlan_has_upper_bind_vlan_dev(br_dev)); +} + static void br_vlan_upper_change(struct net_device *dev, struct net_device *upper_dev, bool linking) @@ -1673,13 +1685,9 @@ static void br_vlan_upper_change(struct net_device *dev, if (!br_vlan_is_bind_vlan_dev(upper_dev)) return; - if (linking) { + br_vlan_toggle_bridge_binding(dev, linking); + if (linking) br_vlan_set_vlan_dev_state(br, upper_dev); - br_opt_toggle(br, BROPT_VLAN_BRIDGE_BINDING, true); - } else { - br_opt_toggle(br, BROPT_VLAN_BRIDGE_BINDING, - br_vlan_has_upper_bind_vlan_dev(dev)); - } } struct br_vlan_link_state_walk_data { From 3abd45122c72d6a66a52d41a65586fdf7ab40ef7 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 18 Dec 2024 18:15:57 +0100 Subject: [PATCH 0574/1386] net: bridge: Handle changes in VLAN_FLAG_BRIDGE_BINDING When bridge binding is enabled on a VLAN netdevice, its link state should track bridge ports that are members of the corresponding VLAN. This works for newly-added netdevices. However toggling the option does not have the effect of enabling or disabling the behavior as appropriate. In this patch, react to bridge_binding toggles on VLAN uppers. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/90a8ca8aea4d81378b29d75d9e562433e0d5c7ff.1734540770.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- net/bridge/br.c | 7 +++++++ net/bridge/br_private.h | 9 +++++++++ net/bridge/br_vlan.c | 24 ++++++++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/net/bridge/br.c b/net/bridge/br.c index 2cab878e0a39..183fcb362f9e 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -51,6 +51,13 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v } } + if (is_vlan_dev(dev)) { + struct net_device *real_dev = vlan_dev_real_dev(dev); + + if (netif_is_bridge_master(real_dev)) + br_vlan_vlan_upper_event(real_dev, dev, event); + } + /* not a port of a bridge */ p = br_port_get_rtnl(dev); if (!p) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 9853cfbb9d14..29d6ec45cf41 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -1571,6 +1571,9 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v, void br_vlan_port_event(struct net_bridge_port *p, unsigned long event); int br_vlan_bridge_event(struct net_device *dev, unsigned long event, void *ptr); +void br_vlan_vlan_upper_event(struct net_device *br_dev, + struct net_device *vlan_dev, + unsigned long event); int br_vlan_rtnl_init(void); void br_vlan_rtnl_uninit(void); void br_vlan_notify(const struct net_bridge *br, @@ -1802,6 +1805,12 @@ static inline int br_vlan_bridge_event(struct net_device *dev, return 0; } +static inline void br_vlan_vlan_upper_event(struct net_device *br_dev, + struct net_device *vlan_dev, + unsigned long event) +{ +} + static inline int br_vlan_rtnl_init(void) { return 0; diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index b728b71e693f..d9a69ec9affe 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1772,6 +1772,30 @@ int br_vlan_bridge_event(struct net_device *dev, unsigned long event, void *ptr) return ret; } +void br_vlan_vlan_upper_event(struct net_device *br_dev, + struct net_device *vlan_dev, + unsigned long event) +{ + struct vlan_dev_priv *vlan = vlan_dev_priv(vlan_dev); + struct net_bridge *br = netdev_priv(br_dev); + bool bridge_binding; + + switch (event) { + case NETDEV_CHANGE: + case NETDEV_UP: + break; + default: + return; + } + + bridge_binding = vlan->flags & VLAN_FLAG_BRIDGE_BINDING; + br_vlan_toggle_bridge_binding(br_dev, bridge_binding); + if (bridge_binding) + br_vlan_set_vlan_dev_state(br, vlan_dev); + else if (!bridge_binding && netif_carrier_ok(br_dev)) + netif_carrier_on(vlan_dev); +} + /* Must be protected by RTNL. */ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event) { From 976d248bd33356eecb958cdc1b0c37622fd5d595 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 18 Dec 2024 18:15:58 +0100 Subject: [PATCH 0575/1386] selftests: net: lib: Add a couple autodefer helpers Alongside the helper ip_link_set_up(), one to set the link down will be useful as well. Add a helper to determine the link state as well, ip_link_is_up(), and use it to short-circuit any changes if the state is already the desired one. Furthermore, add a helper bridge_vlan_add(). Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/856d9e01725fdba21b7f6716358f645b19131af2.1734540770.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib.sh | 31 ++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 2cd5c743b2d9..0bd9a038a1f0 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -477,12 +477,33 @@ ip_link_set_addr() defer ip link set dev "$name" address "$old_addr" } +ip_link_is_up() +{ + local name=$1; shift + + local state=$(ip -j link show "$name" | + jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"') + [[ $state == "UP" ]] +} + ip_link_set_up() { local name=$1; shift - ip link set dev "$name" up - defer ip link set dev "$name" down + if ! ip_link_is_up "$name"; then + ip link set dev "$name" up + defer ip link set dev "$name" down + fi +} + +ip_link_set_down() +{ + local name=$1; shift + + if ip_link_is_up "$name"; then + ip link set dev "$name" down + defer ip link set dev "$name" up + fi } ip_addr_add() @@ -498,3 +519,9 @@ ip_route_add() ip route add "$@" defer ip route del "$@" } + +bridge_vlan_add() +{ + bridge vlan add "$@" + defer bridge vlan del "$@" +} From dca12e9ab7603d94e47ded65080f750d6527c852 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Wed, 18 Dec 2024 18:15:59 +0100 Subject: [PATCH 0576/1386] selftests: net: Add a VLAN bridge binding selftest Add a test that exercises bridge binding. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/baf7244fd1fe223a6d93e027584fa9f99dee982c.1734540770.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/Makefile | 1 + .../selftests/net/vlan_bridge_binding.sh | 256 ++++++++++++++++++ 2 files changed, 257 insertions(+) create mode 100755 tools/testing/selftests/net/vlan_bridge_binding.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index f09bd96cc978..73ee88d6b043 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -96,6 +96,7 @@ TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += fdb_flush.sh fdb_notify.sh TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh +TEST_PROGS += vlan_bridge_binding.sh TEST_PROGS += bpf_offload.py TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_PROGS += busy_poll_test.sh diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh new file mode 100755 index 000000000000..e7cb8c678bde --- /dev/null +++ b/tools/testing/selftests/net/vlan_bridge_binding.sh @@ -0,0 +1,256 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +ALL_TESTS=" + test_binding_on + test_binding_off + test_binding_toggle_on + test_binding_toggle_off + test_binding_toggle_on_when_upper_down + test_binding_toggle_off_when_upper_down + test_binding_toggle_on_when_lower_down + test_binding_toggle_off_when_lower_down +" + +setup_prepare() +{ + local port + + ip_link_add br up type bridge vlan_filtering 1 + + for port in d1 d2 d3; do + ip_link_add $port type veth peer name r$port + ip_link_set_up $port + ip_link_set_up r$port + ip_link_set_master $port br + done + + bridge_vlan_add vid 11 dev br self + bridge_vlan_add vid 11 dev d1 master + + bridge_vlan_add vid 12 dev br self + bridge_vlan_add vid 12 dev d2 master + + bridge_vlan_add vid 13 dev br self + bridge_vlan_add vid 13 dev d1 master + bridge_vlan_add vid 13 dev d2 master + + bridge_vlan_add vid 14 dev br self + bridge_vlan_add vid 14 dev d1 master + bridge_vlan_add vid 14 dev d2 master + bridge_vlan_add vid 14 dev d3 master +} + +operstate_is() +{ + local dev=$1; shift + local expect=$1; shift + + local operstate=$(ip -j link show $dev | jq -r .[].operstate) + if [[ $operstate == UP ]]; then + operstate=1 + elif [[ $operstate == DOWN || $operstate == LOWERLAYERDOWN ]]; then + operstate=0 + fi + echo -n $operstate + [[ $operstate == $expect ]] +} + +check_operstate() +{ + local dev=$1; shift + local expect=$1; shift + local operstate + + operstate=$(busywait 1000 \ + operstate_is "$dev" "$expect") + check_err $? "Got operstate of $operstate, expected $expect" +} + +add_one_vlan() +{ + local link=$1; shift + local id=$1; shift + + ip_link_add $link.$id link $link type vlan id $id "$@" +} + +add_vlans() +{ + add_one_vlan br 11 "$@" + add_one_vlan br 12 "$@" + add_one_vlan br 13 "$@" + add_one_vlan br 14 "$@" +} + +set_vlans() +{ + ip link set dev br.11 "$@" + ip link set dev br.12 "$@" + ip link set dev br.13 "$@" + ip link set dev br.14 "$@" +} + +down_netdevs() +{ + local dev + + for dev in "$@"; do + ip_link_set_down $dev + done +} + +check_operstates() +{ + local opst_11=$1; shift + local opst_12=$1; shift + local opst_13=$1; shift + local opst_14=$1; shift + + check_operstate br.11 $opst_11 + check_operstate br.12 $opst_12 + check_operstate br.13 $opst_13 + check_operstate br.14 $opst_14 +} + +do_test_binding() +{ + local inject=$1; shift + local what=$1; shift + local opsts_d1=$1; shift + local opsts_d2=$1; shift + local opsts_d12=$1; shift + local opsts_d123=$1; shift + + RET=0 + + defer_scope_push + down_netdevs d1 + $inject + check_operstates $opsts_d1 + defer_scope_pop + + defer_scope_push + down_netdevs d2 + $inject + check_operstates $opsts_d2 + defer_scope_pop + + defer_scope_push + down_netdevs d1 d2 + $inject + check_operstates $opsts_d12 + defer_scope_pop + + defer_scope_push + down_netdevs d1 d2 d3 + $inject + check_operstates $opsts_d123 + defer_scope_pop + + log_test "Test bridge_binding $what" +} + +do_test_binding_on() +{ + local inject=$1; shift + local what=$1; shift + + do_test_binding "$inject" "$what" \ + "0 1 1 1" \ + "1 0 1 1" \ + "0 0 0 1" \ + "0 0 0 0" +} + +do_test_binding_off() +{ + local inject=$1; shift + local what=$1; shift + + do_test_binding "$inject" "$what" \ + "1 1 1 1" \ + "1 1 1 1" \ + "1 1 1 1" \ + "0 0 0 0" +} + +test_binding_on() +{ + add_vlans bridge_binding on + set_vlans up + do_test_binding_on : "on" +} + +test_binding_off() +{ + add_vlans bridge_binding off + set_vlans up + do_test_binding_off : "off" +} + +test_binding_toggle_on() +{ + add_vlans bridge_binding off + set_vlans up + set_vlans type vlan bridge_binding on + do_test_binding_on : "off->on" +} + +test_binding_toggle_off() +{ + add_vlans bridge_binding on + set_vlans up + set_vlans type vlan bridge_binding off + do_test_binding_off : "on->off" +} + +dfr_set_binding_on() +{ + set_vlans type vlan bridge_binding on + defer set_vlans type vlan bridge_binding off +} + +dfr_set_binding_off() +{ + set_vlans type vlan bridge_binding off + defer set_vlans type vlan bridge_binding on +} + +test_binding_toggle_on_when_lower_down() +{ + add_vlans bridge_binding off + set_vlans up + do_test_binding_on dfr_set_binding_on "off->on when lower down" +} + +test_binding_toggle_off_when_lower_down() +{ + add_vlans bridge_binding on + set_vlans up + do_test_binding_off dfr_set_binding_off "on->off when lower down" +} + +test_binding_toggle_on_when_upper_down() +{ + add_vlans bridge_binding off + set_vlans type vlan bridge_binding on + set_vlans up + do_test_binding_on : "off->on when upper down" +} + +test_binding_toggle_off_when_upper_down() +{ + add_vlans bridge_binding on + set_vlans type vlan bridge_binding off + set_vlans up + do_test_binding_off : "on->off when upper down" +} + +trap defer_scopes_cleanup EXIT +setup_prepare +tests_run + +exit $EXIT_STATUS From 3272040790eb4b6cafe6c30ec05049e9599ec456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Thu, 19 Dec 2024 11:00:19 +0100 Subject: [PATCH 0577/1386] qlcnic: use const 'struct bin_attribute' callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now provides callback variants that explicitly take a const pointer. Use them so the non-const variants can be removed. Signed-off-by: Thomas Weißschuh Link: https://patch.msgid.link/20241219-sysfs-const-bin_attr-net-v2-1-93bdaece3c90@weissschuh.net Signed-off-by: Jakub Kicinski --- .../net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c | 69 +++++++++---------- 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c index 74125188beb8..c0f20464fd1e 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c @@ -264,7 +264,7 @@ static int qlcnic_sysfs_validate_crb(struct qlcnic_adapter *adapter, } static ssize_t qlcnic_sysfs_read_crb(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = kobj_to_dev(kobj); @@ -281,7 +281,7 @@ static ssize_t qlcnic_sysfs_read_crb(struct file *filp, struct kobject *kobj, } static ssize_t qlcnic_sysfs_write_crb(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = kobj_to_dev(kobj); @@ -310,7 +310,7 @@ static int qlcnic_sysfs_validate_mem(struct qlcnic_adapter *adapter, } static ssize_t qlcnic_sysfs_read_mem(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = kobj_to_dev(kobj); @@ -332,7 +332,7 @@ static ssize_t qlcnic_sysfs_read_mem(struct file *filp, struct kobject *kobj, } static ssize_t qlcnic_sysfs_write_mem(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { struct device *dev = kobj_to_dev(kobj); @@ -396,7 +396,7 @@ static int validate_pm_config(struct qlcnic_adapter *adapter, static ssize_t qlcnic_sysfs_write_pm_config(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { @@ -446,7 +446,7 @@ static ssize_t qlcnic_sysfs_write_pm_config(struct file *filp, static ssize_t qlcnic_sysfs_read_pm_config(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { @@ -539,7 +539,7 @@ static int validate_esw_config(struct qlcnic_adapter *adapter, static ssize_t qlcnic_sysfs_write_esw_config(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { @@ -623,7 +623,7 @@ out: static ssize_t qlcnic_sysfs_read_esw_config(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { @@ -675,7 +675,7 @@ static int validate_npar_config(struct qlcnic_adapter *adapter, static ssize_t qlcnic_sysfs_write_npar_config(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { @@ -722,7 +722,7 @@ static ssize_t qlcnic_sysfs_write_npar_config(struct file *file, static ssize_t qlcnic_sysfs_read_npar_config(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { @@ -769,7 +769,7 @@ static ssize_t qlcnic_sysfs_read_npar_config(struct file *file, static ssize_t qlcnic_sysfs_get_port_stats(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { @@ -804,7 +804,7 @@ static ssize_t qlcnic_sysfs_get_port_stats(struct file *file, static ssize_t qlcnic_sysfs_get_esw_stats(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { @@ -839,7 +839,7 @@ static ssize_t qlcnic_sysfs_get_esw_stats(struct file *file, static ssize_t qlcnic_sysfs_clear_esw_stats(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { @@ -868,7 +868,7 @@ static ssize_t qlcnic_sysfs_clear_esw_stats(struct file *file, static ssize_t qlcnic_sysfs_clear_port_stats(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { @@ -898,7 +898,7 @@ static ssize_t qlcnic_sysfs_clear_port_stats(struct file *file, static ssize_t qlcnic_sysfs_read_pci_config(struct file *file, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { @@ -938,7 +938,7 @@ static ssize_t qlcnic_sysfs_read_pci_config(struct file *file, static ssize_t qlcnic_83xx_sysfs_flash_read_handler(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { @@ -1115,7 +1115,7 @@ static int qlcnic_83xx_sysfs_flash_write(struct qlcnic_adapter *adapter, static ssize_t qlcnic_83xx_sysfs_flash_write_handler(struct file *filp, struct kobject *kobj, - struct bin_attribute *attr, + const struct bin_attribute *attr, char *buf, loff_t offset, size_t size) { @@ -1195,64 +1195,63 @@ static const struct device_attribute dev_attr_beacon = { static const struct bin_attribute bin_attr_crb = { .attr = { .name = "crb", .mode = 0644 }, .size = 0, - .read = qlcnic_sysfs_read_crb, - .write = qlcnic_sysfs_write_crb, + .read_new = qlcnic_sysfs_read_crb, + .write_new = qlcnic_sysfs_write_crb, }; static const struct bin_attribute bin_attr_mem = { .attr = { .name = "mem", .mode = 0644 }, .size = 0, - .read = qlcnic_sysfs_read_mem, - .write = qlcnic_sysfs_write_mem, + .read_new = qlcnic_sysfs_read_mem, + .write_new = qlcnic_sysfs_write_mem, }; static const struct bin_attribute bin_attr_npar_config = { .attr = { .name = "npar_config", .mode = 0644 }, .size = 0, - .read = qlcnic_sysfs_read_npar_config, - .write = qlcnic_sysfs_write_npar_config, + .read_new = qlcnic_sysfs_read_npar_config, + .write_new = qlcnic_sysfs_write_npar_config, }; static const struct bin_attribute bin_attr_pci_config = { .attr = { .name = "pci_config", .mode = 0644 }, .size = 0, - .read = qlcnic_sysfs_read_pci_config, - .write = NULL, + .read_new = qlcnic_sysfs_read_pci_config, }; static const struct bin_attribute bin_attr_port_stats = { .attr = { .name = "port_stats", .mode = 0644 }, .size = 0, - .read = qlcnic_sysfs_get_port_stats, - .write = qlcnic_sysfs_clear_port_stats, + .read_new = qlcnic_sysfs_get_port_stats, + .write_new = qlcnic_sysfs_clear_port_stats, }; static const struct bin_attribute bin_attr_esw_stats = { .attr = { .name = "esw_stats", .mode = 0644 }, .size = 0, - .read = qlcnic_sysfs_get_esw_stats, - .write = qlcnic_sysfs_clear_esw_stats, + .read_new = qlcnic_sysfs_get_esw_stats, + .write_new = qlcnic_sysfs_clear_esw_stats, }; static const struct bin_attribute bin_attr_esw_config = { .attr = { .name = "esw_config", .mode = 0644 }, .size = 0, - .read = qlcnic_sysfs_read_esw_config, - .write = qlcnic_sysfs_write_esw_config, + .read_new = qlcnic_sysfs_read_esw_config, + .write_new = qlcnic_sysfs_write_esw_config, }; static const struct bin_attribute bin_attr_pm_config = { .attr = { .name = "pm_config", .mode = 0644 }, .size = 0, - .read = qlcnic_sysfs_read_pm_config, - .write = qlcnic_sysfs_write_pm_config, + .read_new = qlcnic_sysfs_read_pm_config, + .write_new = qlcnic_sysfs_write_pm_config, }; static const struct bin_attribute bin_attr_flash = { .attr = { .name = "flash", .mode = 0644 }, .size = 0, - .read = qlcnic_83xx_sysfs_flash_read_handler, - .write = qlcnic_83xx_sysfs_flash_write_handler, + .read_new = qlcnic_83xx_sysfs_flash_read_handler, + .write_new = qlcnic_83xx_sysfs_flash_write_handler, }; #ifdef CONFIG_QLCNIC_HWMON From 6ed3472173c575cd8aaed6c62eb74f7728404ee6 Mon Sep 17 00:00:00 2001 From: Tristram Ha Date: Tue, 17 Dec 2024 18:02:40 -0800 Subject: [PATCH 0578/1386] net: dsa: microchip: Do not execute PTP driver code for unsupported switches The PTP driver code only works for certain KSZ switches like KSZ9477, KSZ9567, LAN937X and their varieties. This code is enabled by kernel configuration CONFIG_NET_DSA_MICROCHIP_KSZ_PTP. As the DSA driver is common to work with all KSZ switches this PTP code is not appropriate for other unsupported switches. The ptp_capable indication is added to the chip data structure to signal whether to execute those code. Signed-off-by: Tristram Ha Link: https://patch.msgid.link/20241218020240.70601-1-Tristram.Ha@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/microchip/ksz_common.c | 40 +++++++++++++++++++------- drivers/net/dsa/microchip/ksz_common.h | 1 + 2 files changed, 30 insertions(+), 11 deletions(-) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index a8dac7ff6b81..e3512e324572 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -1339,6 +1339,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .supports_rgmii = {false, false, true}, .internal_phy = {true, true, false}, .gbit_capable = {false, false, true}, + .ptp_capable = true, .wr_table = &ksz8563_register_set, .rd_table = &ksz8563_register_set, }, @@ -1550,6 +1551,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .internal_phy = {true, true, true, true, true, false, false}, .gbit_capable = {true, true, true, true, true, true, true}, + .ptp_capable = true, .wr_table = &ksz9477_register_set, .rd_table = &ksz9477_register_set, }, @@ -1677,6 +1679,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .supports_rgmii = {false, false, true}, .internal_phy = {true, true, false}, .gbit_capable = {true, true, true}, + .ptp_capable = true, }, [KSZ8567] = { @@ -1712,6 +1715,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { true, false, false}, .gbit_capable = {false, false, false, false, false, true, true}, + .ptp_capable = true, }, [KSZ9567] = { @@ -1744,6 +1748,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .internal_phy = {true, true, true, true, true, false, false}, .gbit_capable = {true, true, true, true, true, true, true}, + .ptp_capable = true, }, [LAN9370] = { @@ -1773,6 +1778,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .supports_rmii = {false, false, false, false, true}, .supports_rgmii = {false, false, false, false, true}, .internal_phy = {true, true, true, true, false}, + .ptp_capable = true, }, [LAN9371] = { @@ -1802,6 +1808,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { .supports_rmii = {false, false, false, false, true, true}, .supports_rgmii = {false, false, false, false, true, true}, .internal_phy = {true, true, true, true, false, false}, + .ptp_capable = true, }, [LAN9372] = { @@ -1835,6 +1842,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { true, true, false, false}, .internal_phy = {true, true, true, true, false, false, true, true}, + .ptp_capable = true, }, [LAN9373] = { @@ -1868,6 +1876,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { true, true, false, false}, .internal_phy = {true, true, true, false, false, false, true, true}, + .ptp_capable = true, }, [LAN9374] = { @@ -1901,6 +1910,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { true, true, false, false}, .internal_phy = {true, true, true, true, false, false, true, true}, + .ptp_capable = true, }, [LAN9646] = { @@ -2809,16 +2819,21 @@ static int ksz_setup(struct dsa_switch *ds) if (ret) goto out_girq; - ret = ksz_ptp_irq_setup(ds, dp->index); - if (ret) - goto out_pirq; + if (dev->info->ptp_capable) { + ret = ksz_ptp_irq_setup(ds, dp->index); + if (ret) + goto out_pirq; + } } } - ret = ksz_ptp_clock_register(ds); - if (ret) { - dev_err(dev->dev, "Failed to register PTP clock: %d\n", ret); - goto out_ptpirq; + if (dev->info->ptp_capable) { + ret = ksz_ptp_clock_register(ds); + if (ret) { + dev_err(dev->dev, "Failed to register PTP clock: %d\n", + ret); + goto out_ptpirq; + } } ret = ksz_mdio_register(dev); @@ -2838,9 +2853,10 @@ static int ksz_setup(struct dsa_switch *ds) return 0; out_ptp_clock_unregister: - ksz_ptp_clock_unregister(ds); + if (dev->info->ptp_capable) + ksz_ptp_clock_unregister(ds); out_ptpirq: - if (dev->irq > 0) + if (dev->irq > 0 && dev->info->ptp_capable) dsa_switch_for_each_user_port(dp, dev->ds) ksz_ptp_irq_free(ds, dp->index); out_pirq: @@ -2859,11 +2875,13 @@ static void ksz_teardown(struct dsa_switch *ds) struct ksz_device *dev = ds->priv; struct dsa_port *dp; - ksz_ptp_clock_unregister(ds); + if (dev->info->ptp_capable) + ksz_ptp_clock_unregister(ds); if (dev->irq > 0) { dsa_switch_for_each_user_port(dp, dev->ds) { - ksz_ptp_irq_free(ds, dp->index); + if (dev->info->ptp_capable) + ksz_ptp_irq_free(ds, dp->index); ksz_irq_free(&dev->ports[dp->index].pirq); } diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index 2bc96127a447..af17a9c030d4 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -92,6 +92,7 @@ struct ksz_chip_data { bool supports_rgmii[KSZ_MAX_NUM_PORTS]; bool internal_phy[KSZ_MAX_NUM_PORTS]; bool gbit_capable[KSZ_MAX_NUM_PORTS]; + bool ptp_capable; const struct regmap_access_table *wr_table; const struct regmap_access_table *rd_table; }; From 1dbdce30f040a87f5aa6a9dbe43be398737f090f Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 16 Dec 2024 18:21:44 +0100 Subject: [PATCH 0579/1386] ipv4: Define inet_sk_init_flowi4() and use it in inet_sk_rebuild_header(). IPv4 code commonly has to initialise a flowi4 structure from an IPv4 socket. This requires looking at potential IPv4 options to set the proper destination address, call flowi4_init_output() with the correct set of parameters and run the sk_classify_flow security hook. Instead of reimplementing these operations in different parts of the stack, let's define inet_sk_init_flowi4() which does all these operations. The first user is inet_sk_rebuild_header(), where inet_sk_init_flowi4() replaces ip_route_output_ports(). Unlike ip_route_output_ports(), which sets the flowi4 structure and performs the route lookup in one go, inet_sk_init_flowi4() only initialises the flow. The route lookup is then done by ip_route_output_flow(). Decoupling flow initialisation from route lookup makes this new interface applicable more broadly as it will allow some users to overwrite specific struct flowi4 members before the route lookup. Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/fd416275262b1f518d5abfcef740ce4f4a1a6522.1734357769.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/route.h | 28 ++++++++++++++++++++++++++++ net/ipv4/af_inet.c | 14 ++------------ 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index 6947a155d501..f86775be3e29 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -129,6 +130,33 @@ struct in_device; int ip_rt_init(void); void rt_cache_flush(struct net *net); void rt_flush_dev(struct net_device *dev); + +static inline void inet_sk_init_flowi4(const struct inet_sock *inet, + struct flowi4 *fl4) +{ + const struct ip_options_rcu *ip4_opt; + const struct sock *sk; + __be32 daddr; + + rcu_read_lock(); + ip4_opt = rcu_dereference(inet->inet_opt); + + /* Source routing option overrides the socket destination address */ + if (ip4_opt && ip4_opt->opt.srr) + daddr = ip4_opt->opt.faddr; + else + daddr = inet->inet_daddr; + rcu_read_unlock(); + + sk = &inet->sk; + flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark), + ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), + sk->sk_protocol, inet_sk_flowi_flags(sk), daddr, + inet->inet_saddr, inet->inet_dport, + inet->inet_sport, sk->sk_uid); + security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4)); +} + struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *flp, const struct sk_buff *skb); struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *flp, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8095e82de808..21f46ee7b6e9 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1309,8 +1309,6 @@ int inet_sk_rebuild_header(struct sock *sk) { struct rtable *rt = dst_rtable(__sk_dst_check(sk, 0)); struct inet_sock *inet = inet_sk(sk); - __be32 daddr; - struct ip_options_rcu *inet_opt; struct flowi4 *fl4; int err; @@ -1319,17 +1317,9 @@ int inet_sk_rebuild_header(struct sock *sk) return 0; /* Reroute. */ - rcu_read_lock(); - inet_opt = rcu_dereference(inet->inet_opt); - daddr = inet->inet_daddr; - if (inet_opt && inet_opt->opt.srr) - daddr = inet_opt->opt.faddr; - rcu_read_unlock(); fl4 = &inet->cork.fl.u.ip4; - rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, inet->inet_saddr, - inet->inet_dport, inet->inet_sport, - sk->sk_protocol, ip_sock_rt_tos(sk), - sk->sk_bound_dev_if); + inet_sk_init_flowi4(inet, fl4); + rt = ip_route_output_flow(sock_net(sk), fl4, sk); if (!IS_ERR(rt)) { err = 0; sk_setup_caps(sk, &rt->dst); From 5be1323b5041d806716c80be4f8b11cfb64fa24c Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 16 Dec 2024 18:21:46 +0100 Subject: [PATCH 0580/1386] ipv4: Use inet_sk_init_flowi4() in ip4_datagram_release_cb(). Use inet_sk_init_flowi4() to automatically initialise the flowi4 structure in ip4_datagram_release_cb() instead of passing parameters manually to ip_route_output_ports(). Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/9c326b8d9e919478f7952b21473d31da07eba2dd.1734357769.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/datagram.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 4aca1f05edd3..4b5bc6eb52e7 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -102,8 +102,6 @@ EXPORT_SYMBOL(ip4_datagram_connect); void ip4_datagram_release_cb(struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); - const struct ip_options_rcu *inet_opt; - __be32 daddr = inet->inet_daddr; struct dst_entry *dst; struct flowi4 fl4; struct rtable *rt; @@ -115,14 +113,9 @@ void ip4_datagram_release_cb(struct sock *sk) rcu_read_unlock(); return; } - inet_opt = rcu_dereference(inet->inet_opt); - if (inet_opt && inet_opt->opt.srr) - daddr = inet_opt->opt.faddr; - rt = ip_route_output_ports(sock_net(sk), &fl4, sk, daddr, - inet->inet_saddr, inet->inet_dport, - inet->inet_sport, sk->sk_protocol, - ip_sock_rt_tos(sk), sk->sk_bound_dev_if); + inet_sk_init_flowi4(inet, &fl4); + rt = ip_route_output_flow(sock_net(sk), &fl4, sk); dst = !IS_ERR(rt) ? &rt->dst : NULL; sk_dst_set(sk, dst); From 42e5ffc385f3b0790c6cd5b54d3396a6c772d3b6 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 16 Dec 2024 18:21:48 +0100 Subject: [PATCH 0581/1386] ipv4: Use inet_sk_init_flowi4() in inet_csk_rebuild_route(). Use inet_sk_init_flowi4() to automatically initialise the flowi4 structure in inet_csk_rebuild_route() instead of passing parameters manually to ip_route_output_ports(). Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/b270931636effa1095508e0f0a3e8c3a0e6d357f.1734357769.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/inet_connection_sock.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 6872b5aff73e..e4decfb270fa 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1561,20 +1561,13 @@ EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) { const struct inet_sock *inet = inet_sk(sk); - const struct ip_options_rcu *inet_opt; - __be32 daddr = inet->inet_daddr; struct flowi4 *fl4; struct rtable *rt; rcu_read_lock(); - inet_opt = rcu_dereference(inet->inet_opt); - if (inet_opt && inet_opt->opt.srr) - daddr = inet_opt->opt.faddr; fl4 = &fl->u.ip4; - rt = ip_route_output_ports(sock_net(sk), fl4, sk, daddr, - inet->inet_saddr, inet->inet_dport, - inet->inet_sport, sk->sk_protocol, - ip_sock_rt_tos(sk), sk->sk_bound_dev_if); + inet_sk_init_flowi4(inet, fl4); + rt = ip_route_output_flow(sock_net(sk), fl4, sk); if (IS_ERR(rt)) rt = NULL; if (rt) From 148721f8e04a10a3b9c51f030c9be0d15b0a4d17 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 16 Dec 2024 18:21:51 +0100 Subject: [PATCH 0582/1386] ipv4: Use inet_sk_init_flowi4() in __ip_queue_xmit(). Use inet_sk_init_flowi4() to automatically initialise the flowi4 structure in __ip_queue_xmit() instead of passing parameters manually to ip_route_output_ports(). Override ->flowi4_tos with the value passed as parameter since that's required by SCTP. Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/37e64ffbd9adac187b14aa9097b095f5c86e85be.1734357769.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/ipv4/ip_output.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f45a083f2c13..ea7a260bec8a 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -478,24 +478,16 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, /* Make sure we can route this packet. */ rt = dst_rtable(__sk_dst_check(sk, 0)); if (!rt) { - __be32 daddr; + inet_sk_init_flowi4(inet, fl4); - /* Use correct destination address if we have options. */ - daddr = inet->inet_daddr; - if (inet_opt && inet_opt->opt.srr) - daddr = inet_opt->opt.faddr; + /* sctp_v4_xmit() uses its own DSCP value */ + fl4->flowi4_tos = tos & INET_DSCP_MASK; /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times * itself out. */ - rt = ip_route_output_ports(net, fl4, sk, - daddr, inet->inet_saddr, - inet->inet_dport, - inet->inet_sport, - sk->sk_protocol, - tos & INET_DSCP_MASK, - sk->sk_bound_dev_if); + rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; sk_setup_caps(sk, &rt->dst); From c63e9f3b89d3f96220a1c99466fed4563c14a259 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Mon, 16 Dec 2024 18:21:54 +0100 Subject: [PATCH 0583/1386] l2tp: Use inet_sk_init_flowi4() in l2tp_ip_sendmsg(). Use inet_sk_init_flowi4() to automatically initialise the flowi4 structure in l2tp_ip_sendmsg() instead of passing parameters manually to ip_route_output_ports(). Override ->daddr with the value passed in the msghdr structure if provided. Signed-off-by: Guillaume Nault Reviewed-by: James Chapman Link: https://patch.msgid.link/2ff22a3560c5050228928456662b80b9c84a8fe4.1734357769.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- net/l2tp/l2tp_ip.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 4bc24fddfd52..29795d2839e8 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -425,7 +425,6 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int rc; struct inet_sock *inet = inet_sk(sk); struct rtable *rt = NULL; - struct flowi4 *fl4; int connected = 0; __be32 daddr; @@ -455,7 +454,6 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (sk->sk_state != TCP_ESTABLISHED) goto out; - daddr = inet->inet_daddr; connected = 1; } @@ -482,29 +480,24 @@ static int l2tp_ip_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto error; } - fl4 = &inet->cork.fl.u.ip4; if (connected) rt = dst_rtable(__sk_dst_check(sk, 0)); rcu_read_lock(); if (!rt) { - const struct ip_options_rcu *inet_opt; + struct flowi4 *fl4 = &inet->cork.fl.u.ip4; - inet_opt = rcu_dereference(inet->inet_opt); + inet_sk_init_flowi4(inet, fl4); - /* Use correct destination address if we have options. */ - if (inet_opt && inet_opt->opt.srr) - daddr = inet_opt->opt.faddr; + /* Overwrite ->daddr if msg->msg_name was provided */ + if (!connected) + fl4->daddr = daddr; /* If this fails, retransmit mechanism of transport layer will * keep trying until route appears or the connection times * itself out. */ - rt = ip_route_output_ports(sock_net(sk), fl4, sk, - daddr, inet->inet_saddr, - inet->inet_dport, inet->inet_sport, - sk->sk_protocol, ip_sock_rt_tos(sk), - sk->sk_bound_dev_if); + rt = ip_route_output_flow(sock_net(sk), fl4, sk); if (IS_ERR(rt)) goto no_route; if (connected) { From 0daa521a1c8c29ffbefe6530f0d276e74e2749d0 Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Thu, 12 Dec 2024 13:42:03 +0800 Subject: [PATCH 0584/1386] wifi: rtw88: add __packed attribute to efuse layout struct The layout struct of efuse should not do address alignment by compiler. Otherwise it leads unexpected layout and size for certain arch suc as arm. In x86-64, the results are identical before and after this patch. Also adjust bit-field to prevent over adjacent byte to avoid warning: rtw88/rtw8822b.h:66:1: note: offset of packed bit-field `res2` has changed in GCC 4.4 66 | } __packed; | ^ Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202412120131.qk0x6OhE-lkp@intel.com/ Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241212054203.135046-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw88/main.h | 4 ++-- drivers/net/wireless/realtek/rtw88/rtw8723x.h | 8 ++++---- drivers/net/wireless/realtek/rtw88/rtw8821c.h | 9 +++++---- drivers/net/wireless/realtek/rtw88/rtw8822b.h | 9 +++++---- drivers/net/wireless/realtek/rtw88/rtw8822c.h | 9 +++++---- 5 files changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h index cd09fb6f7b8b..65c7acea41af 100644 --- a/drivers/net/wireless/realtek/rtw88/main.h +++ b/drivers/net/wireless/realtek/rtw88/main.h @@ -510,12 +510,12 @@ struct rtw_5g_txpwr_idx { struct rtw_5g_vht_ns_pwr_idx_diff vht_2s_diff; struct rtw_5g_vht_ns_pwr_idx_diff vht_3s_diff; struct rtw_5g_vht_ns_pwr_idx_diff vht_4s_diff; -}; +} __packed; struct rtw_txpwr_idx { struct rtw_2g_txpwr_idx pwr_idx_2g; struct rtw_5g_txpwr_idx pwr_idx_5g; -}; +} __packed; struct rtw_channel_params { u8 center_chan; diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723x.h b/drivers/net/wireless/realtek/rtw88/rtw8723x.h index e93bfce994bf..a99af527c92c 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8723x.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8723x.h @@ -47,7 +47,7 @@ struct rtw8723xe_efuse { u8 device_id[2]; u8 sub_vendor_id[2]; u8 sub_device_id[2]; -}; +} __packed; struct rtw8723xu_efuse { u8 res4[48]; /* 0xd0 */ @@ -56,12 +56,12 @@ struct rtw8723xu_efuse { u8 usb_option; /* 0x104 */ u8 res5[2]; /* 0x105 */ u8 mac_addr[ETH_ALEN]; /* 0x107 */ -}; +} __packed; struct rtw8723xs_efuse { u8 res4[0x4a]; /* 0xd0 */ u8 mac_addr[ETH_ALEN]; /* 0x11a */ -}; +} __packed; struct rtw8723x_efuse { __le16 rtl_id; @@ -96,7 +96,7 @@ struct rtw8723x_efuse { struct rtw8723xu_efuse u; struct rtw8723xs_efuse s; }; -}; +} __packed; #define RTW8723X_IQK_ADDA_REG_NUM 16 #define RTW8723X_IQK_MAC8_REG_NUM 3 diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.h b/drivers/net/wireless/realtek/rtw88/rtw8821c.h index 7a33ebd612ed..954e93c8020d 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.h @@ -27,7 +27,7 @@ struct rtw8821cu_efuse { u8 res11[0xcf]; u8 package_type; /* 0x1fb */ u8 res12[0x4]; -}; +} __packed; struct rtw8821ce_efuse { u8 mac_addr[ETH_ALEN]; /* 0xd0 */ @@ -47,7 +47,8 @@ struct rtw8821ce_efuse { u8 ltr_en:1; u8 res1:2; u8 obff:2; - u8 res2:3; + u8 res2_1:1; + u8 res2_2:2; u8 obff_cap:2; u8 res3:4; u8 res4[3]; @@ -63,7 +64,7 @@ struct rtw8821ce_efuse { u8 res6:1; u8 port_t_power_on_value:5; u8 res7; -}; +} __packed; struct rtw8821cs_efuse { u8 res4[0x4a]; /* 0xd0 */ @@ -101,7 +102,7 @@ struct rtw8821c_efuse { struct rtw8821cu_efuse u; struct rtw8821cs_efuse s; }; -}; +} __packed; static inline void _rtw_write32s_mask(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 data) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.h b/drivers/net/wireless/realtek/rtw88/rtw8822b.h index 0514958fb57c..9fca9ba67c90 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822b.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.h @@ -27,7 +27,7 @@ struct rtw8822bu_efuse { u8 res11[0xcf]; u8 package_type; /* 0x1fb */ u8 res12[0x4]; -}; +} __packed; struct rtw8822be_efuse { u8 mac_addr[ETH_ALEN]; /* 0xd0 */ @@ -47,7 +47,8 @@ struct rtw8822be_efuse { u8 ltr_en:1; u8 res1:2; u8 obff:2; - u8 res2:3; + u8 res2_1:1; + u8 res2_2:2; u8 obff_cap:2; u8 res3:4; u8 res4[3]; @@ -63,7 +64,7 @@ struct rtw8822be_efuse { u8 res6:1; u8 port_t_power_on_value:5; u8 res7; -}; +} __packed; struct rtw8822bs_efuse { u8 res4[0x4a]; /* 0xd0 */ @@ -103,7 +104,7 @@ struct rtw8822b_efuse { struct rtw8822bu_efuse u; struct rtw8822bs_efuse s; }; -}; +} __packed; static inline void _rtw_write32s_mask(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 data) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.h b/drivers/net/wireless/realtek/rtw88/rtw8822c.h index e2b383d633cd..fc62b67a15f2 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822c.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.h @@ -14,7 +14,7 @@ struct rtw8822cu_efuse { u8 res1[3]; u8 mac_addr[ETH_ALEN]; /* 0x157 */ u8 res2[0x3d]; -}; +} __packed; struct rtw8822cs_efuse { u8 res0[0x4a]; /* 0x120 */ @@ -39,7 +39,8 @@ struct rtw8822ce_efuse { u8 ltr_en:1; u8 res1:2; u8 obff:2; - u8 res2:3; + u8 res2_1:1; + u8 res2_2:2; u8 obff_cap:2; u8 res3:4; u8 class_code[3]; @@ -55,7 +56,7 @@ struct rtw8822ce_efuse { u8 res6:1; u8 port_t_power_on_value:5; u8 res7; -}; +} __packed; struct rtw8822c_efuse { __le16 rtl_id; @@ -102,7 +103,7 @@ struct rtw8822c_efuse { struct rtw8822cu_efuse u; struct rtw8822cs_efuse s; }; -}; +} __packed; enum rtw8822c_dpk_agc_phase { RTW_DPK_GAIN_CHECK, From 66ef0289ac99e155d206ddaa0fdfad09ae3cd007 Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Wed, 18 Dec 2024 00:53:11 +0200 Subject: [PATCH 0585/1386] wifi: rtlwifi: rtl8821ae: Fix media status report RTL8821AE is stuck transmitting at the lowest rate allowed by the rate mask. This is because the firmware doesn't know the device is connected to a network. Fix the macros SET_H2CCMD_MSRRPT_PARM_OPMODE and SET_H2CCMD_MSRRPT_PARM_MACID_IND to work on the first byte of __cmd, not the second. Now the firmware is correctly notified when the device is connected to a network and it activates the rate control. Before (MCS3): [ 5] 0.00-1.00 sec 12.5 MBytes 105 Mbits/sec 0 339 KBytes [ 5] 1.00-2.00 sec 10.6 MBytes 89.1 Mbits/sec 0 339 KBytes [ 5] 2.00-3.00 sec 10.6 MBytes 89.1 Mbits/sec 0 386 KBytes [ 5] 3.00-4.00 sec 10.6 MBytes 89.1 Mbits/sec 0 386 KBytes [ 5] 4.00-5.00 sec 10.2 MBytes 86.0 Mbits/sec 0 427 KBytes After (MCS9): [ 5] 0.00-1.00 sec 33.9 MBytes 284 Mbits/sec 0 771 KBytes [ 5] 1.00-2.00 sec 31.6 MBytes 265 Mbits/sec 0 865 KBytes [ 5] 2.00-3.00 sec 29.9 MBytes 251 Mbits/sec 0 963 KBytes [ 5] 3.00-4.00 sec 28.2 MBytes 237 Mbits/sec 0 963 KBytes [ 5] 4.00-5.00 sec 26.8 MBytes 224 Mbits/sec 0 963 KBytes Fixes: 39f40710d0b5 ("rtlwifi: rtl88821ae: Remove usage of private bit manipulation macros") Cc: stable@vger.kernel.org Signed-off-by: Bitterblue Smith Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/754785b3-8a78-4554-b80d-de5f603b410b@gmail.com --- drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.h b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.h index c269942b3f4a..af8d17b9e012 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.h +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.h @@ -197,9 +197,9 @@ enum rtl8821a_h2c_cmd { /* _MEDIA_STATUS_RPT_PARM_CMD1 */ #define SET_H2CCMD_MSRRPT_PARM_OPMODE(__cmd, __value) \ - u8p_replace_bits(__cmd + 1, __value, BIT(0)) + u8p_replace_bits(__cmd, __value, BIT(0)) #define SET_H2CCMD_MSRRPT_PARM_MACID_IND(__cmd, __value) \ - u8p_replace_bits(__cmd + 1, __value, BIT(1)) + u8p_replace_bits(__cmd, __value, BIT(1)) /* AP_OFFLOAD */ #define SET_H2CCMD_AP_OFFLOAD_ON(__cmd, __value) \ From 59ab27a9f20f8de6f7989e8a8c3d97c04ed8199c Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Wed, 18 Dec 2024 02:13:22 +0200 Subject: [PATCH 0586/1386] wifi: rtw88: 8812a: Support RFE type 2 RF front end type 2 exists in the wild and can be treated like types 0 and 1. Signed-off-by: Bitterblue Smith Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/2917c7fc-6d88-4007-b6a6-9130bd1991e5@gmail.com --- drivers/net/wireless/realtek/rtw88/rtw8812a.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8812a.c b/drivers/net/wireless/realtek/rtw88/rtw8812a.c index 482edd31823d..d8f0ed70777f 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8812a.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8812a.c @@ -985,6 +985,9 @@ static const struct rtw_rfe_def rtw8812a_rfe_defs[] = { [1] = { .phy_pg_tbl = &rtw8812a_bb_pg_tbl, .txpwr_lmt_tbl = &rtw8812a_txpwr_lmt_tbl, .pwr_track_tbl = &rtw8812a_rtw_pwr_track_tbl, }, + [2] = { .phy_pg_tbl = &rtw8812a_bb_pg_tbl, + .txpwr_lmt_tbl = &rtw8812a_txpwr_lmt_tbl, + .pwr_track_tbl = &rtw8812a_rtw_pwr_track_tbl, }, [3] = { .phy_pg_tbl = &rtw8812a_bb_pg_rfe3_tbl, .txpwr_lmt_tbl = &rtw8812a_txpwr_lmt_tbl, .pwr_track_tbl = &rtw8812a_rtw_pwr_track_rfe3_tbl, }, From 74a72c367573ad521becf6cc4d649e14387b3c64 Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Wed, 18 Dec 2024 02:16:11 +0200 Subject: [PATCH 0587/1386] wifi: rtw88: 8821a/8812a: Set ptct_efuse_size to 0 Some RTL8812AU devices fail to probe: [ 12.478774] rtw_8812au 1-1.3:1.0: failed to dump efuse logical map [ 12.487712] rtw_8812au 1-1.3:1.0: failed to setup chip efuse info [ 12.487742] rtw_8812au 1-1.3:1.0: failed to setup chip information [ 12.491077] rtw_8812au: probe of 1-1.3:1.0 failed with error -22 It turns out these chips don't need to "protect" any bytes at the end of the efuse. The original value of 96 was copied from rtw8821c.c. No one reported any failures with RTL8821AU yet, but the vendor driver uses the same efuse reading code for both chips. Signed-off-by: Bitterblue Smith Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/1a477adb-60c3-463c-b158-3f86c94cb821@gmail.com --- drivers/net/wireless/realtek/rtw88/rtw8812a.c | 2 +- drivers/net/wireless/realtek/rtw88/rtw8821a.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8812a.c b/drivers/net/wireless/realtek/rtw88/rtw8812a.c index d8f0ed70777f..21795286a1a0 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8812a.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8812a.c @@ -1027,7 +1027,7 @@ const struct rtw_chip_info rtw8812a_hw_spec = { .rx_buf_desc_sz = 8, .phy_efuse_size = 512, .log_efuse_size = 512, - .ptct_efuse_size = 96 + 1, /* TODO or just 18? */ + .ptct_efuse_size = 0, .txff_size = 131072, .rxff_size = 16128, .rsvd_drv_pg_num = 9, diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821a.c b/drivers/net/wireless/realtek/rtw88/rtw8821a.c index db242c9ad68f..dafab2af33bc 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821a.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8821a.c @@ -1118,7 +1118,7 @@ const struct rtw_chip_info rtw8821a_hw_spec = { .rx_buf_desc_sz = 8, .phy_efuse_size = 512, .log_efuse_size = 512, - .ptct_efuse_size = 96 + 1, /* TODO or just 18? */ + .ptct_efuse_size = 0, .txff_size = 65536, .rxff_size = 16128, .rsvd_drv_pg_num = 8, From e9048e2935f7d797c2ba047c15b705b57c2fa99a Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Thu, 19 Dec 2024 00:33:20 +0200 Subject: [PATCH 0588/1386] wifi: rtw88: usb: Copy instead of cloning the RX skb "iperf3 -c 192.168.0.1 -R --udp -b 0" shows about 40% of datagrams are lost. Many torrents don't download faster than 3 MiB/s, probably because the Bittorrent protocol uses UDP. This is somehow related to the use of skb_clone() in the RX path. Don't use skb_clone(). Instead allocate a new skb for each 802.11 frame received and copy the data from the big (32768 byte) skb. With this patch, "iperf3 -c 192.168.0.1 -R --udp -b 0" shows only 1-2% of datagrams are lost, and torrents can reach download speeds of 36 MiB/s. Tested with RTL8812AU and RTL8822CU. Signed-off-by: Bitterblue Smith Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/8c9d4f9d-ebd8-4dc0-a0c4-9ebe430521dd@gmail.com --- drivers/net/wireless/realtek/rtw88/usb.c | 52 ++++++++++++++---------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c index be193c7add77..28679d9cdffc 100644 --- a/drivers/net/wireless/realtek/rtw88/usb.c +++ b/drivers/net/wireless/realtek/rtw88/usb.c @@ -7,6 +7,7 @@ #include #include "main.h" #include "debug.h" +#include "mac.h" #include "reg.h" #include "tx.h" #include "rx.h" @@ -547,49 +548,58 @@ static void rtw_usb_rx_handler(struct work_struct *work) { struct rtw_usb *rtwusb = container_of(work, struct rtw_usb, rx_work); struct rtw_dev *rtwdev = rtwusb->rtwdev; - const struct rtw_chip_info *chip = rtwdev->chip; - u32 pkt_desc_sz = chip->rx_pkt_desc_sz; struct ieee80211_rx_status rx_status; - u32 pkt_offset, next_pkt, urb_len; struct rtw_rx_pkt_stat pkt_stat; - struct sk_buff *next_skb; + struct sk_buff *rx_skb; struct sk_buff *skb; + u32 pkt_desc_sz = rtwdev->chip->rx_pkt_desc_sz; + u32 max_skb_len = pkt_desc_sz + PHY_STATUS_SIZE * 8 + + IEEE80211_MAX_MPDU_LEN_VHT_11454; + u32 pkt_offset, next_pkt, skb_len; u8 *rx_desc; int limit; for (limit = 0; limit < 200; limit++) { - skb = skb_dequeue(&rtwusb->rx_queue); - if (!skb) + rx_skb = skb_dequeue(&rtwusb->rx_queue); + if (!rx_skb) break; if (skb_queue_len(&rtwusb->rx_queue) >= RTW_USB_MAX_RXQ_LEN) { dev_dbg_ratelimited(rtwdev->dev, "failed to get rx_queue, overflow\n"); - dev_kfree_skb_any(skb); + dev_kfree_skb_any(rx_skb); continue; } - urb_len = skb->len; + rx_desc = rx_skb->data; do { - rx_desc = skb->data; rtw_rx_query_rx_desc(rtwdev, rx_desc, &pkt_stat, &rx_status); pkt_offset = pkt_desc_sz + pkt_stat.drv_info_sz + pkt_stat.shift; - next_pkt = round_up(pkt_stat.pkt_len + pkt_offset, 8); + skb_len = pkt_stat.pkt_len + pkt_offset; + if (skb_len > max_skb_len) { + rtw_dbg(rtwdev, RTW_DBG_USB, + "skipping too big packet: %u\n", + skb_len); + goto skip_packet; + } - if (urb_len >= next_pkt + pkt_desc_sz) - next_skb = skb_clone(skb, GFP_KERNEL); - else - next_skb = NULL; + skb = alloc_skb(skb_len, GFP_KERNEL); + if (!skb) { + rtw_dbg(rtwdev, RTW_DBG_USB, + "failed to allocate RX skb of size %u\n", + skb_len); + goto skip_packet; + } + + skb_put_data(skb, rx_desc, skb_len); if (pkt_stat.is_c2h) { - skb_trim(skb, pkt_stat.pkt_len + pkt_offset); rtw_fw_c2h_cmd_rx_irqsafe(rtwdev, pkt_offset, skb); } else { skb_pull(skb, pkt_offset); - skb_trim(skb, pkt_stat.pkt_len); rtw_update_rx_freq_for_invalid(rtwdev, skb, &rx_status, &pkt_stat); @@ -598,12 +608,12 @@ static void rtw_usb_rx_handler(struct work_struct *work) ieee80211_rx_irqsafe(rtwdev->hw, skb); } - skb = next_skb; - if (skb) - skb_pull(skb, next_pkt); +skip_packet: + next_pkt = round_up(skb_len, 8); + rx_desc += next_pkt; + } while (rx_desc + pkt_desc_sz < rx_skb->data + rx_skb->len); - urb_len -= next_pkt; - } while (skb); + dev_kfree_skb_any(rx_skb); } } From 13221be72034d1c34630ab124c43438aefe7e656 Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Thu, 19 Dec 2024 00:34:42 +0200 Subject: [PATCH 0589/1386] wifi: rtw88: Handle C2H_ADAPTIVITY in rtw_fw_c2h_cmd_handle() The firmware message C2H_ADAPTIVITY is currently handled in rtw_fw_c2h_cmd_rx_irqsafe(), which runs in the RX workqueue, but it's not "irqsafe" with USB because it sleeps (reads hardware registers). This becomes a problem after the next patch, which will create the RX workqueue with the flag WQ_BH. To avoid sleeping when it's not allowed, handle C2H_ADAPTIVITY in rtw_fw_c2h_cmd_handle(), which runs in the c2h workqueue. Signed-off-by: Bitterblue Smith Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/96e52b03-be8d-4050-ae71-bfdb478ff42f@gmail.com --- drivers/net/wireless/realtek/rtw88/fw.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c index e6e9946fbf44..02389b7c6876 100644 --- a/drivers/net/wireless/realtek/rtw88/fw.c +++ b/drivers/net/wireless/realtek/rtw88/fw.c @@ -332,6 +332,9 @@ void rtw_fw_c2h_cmd_handle(struct rtw_dev *rtwdev, struct sk_buff *skb) case C2H_RA_RPT: rtw_fw_ra_report_handle(rtwdev, c2h->payload, len); break; + case C2H_ADAPTIVITY: + rtw_fw_adaptivity_result(rtwdev, c2h->payload, len); + break; default: rtw_dbg(rtwdev, RTW_DBG_FW, "C2H 0x%x isn't handled\n", c2h->id); break; @@ -367,10 +370,6 @@ void rtw_fw_c2h_cmd_rx_irqsafe(struct rtw_dev *rtwdev, u32 pkt_offset, rtw_fw_scan_result(rtwdev, c2h->payload, len); dev_kfree_skb_any(skb); break; - case C2H_ADAPTIVITY: - rtw_fw_adaptivity_result(rtwdev, c2h->payload, len); - dev_kfree_skb_any(skb); - break; default: /* pass offset for further operation */ *((u32 *)skb->cb) = pkt_offset; From 3e3aa566dd1803f1697530de6c8489a8350765b3 Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Thu, 19 Dec 2024 00:35:49 +0200 Subject: [PATCH 0590/1386] wifi: rtw88: usb: Preallocate and reuse the RX skbs The USB driver uses four USB Request Blocks for RX. Before submitting one, it allocates a 32768 byte skb for the RX data. This allocation can fail, maybe due to temporary memory fragmentation. When the allocation fails, the corresponding URB is never submitted again. After four such allocation failures, all RX stops because the driver is not requesting data from the device anymore. Don't allocate a 32768 byte skb when submitting a USB Request Block (which happens very often). Instead preallocate 8 such skbs, and reuse them over and over. If all 8 are busy, allocate a new one. This is pretty rare. If the allocation fails, use a work to try again later. When there are enough free skbs again, free the excess skbs. Also, use WQ_BH for the RX workqueue. With a normal or high priority workqueue the skbs are processed too slowly when the system is even a little busy, like when opening a new page in a browser, and the driver runs out of free skbs and allocates a lot of new ones. This is more or less what the out-of-tree Realtek drivers do, except they use a tasklet instead of a BH workqueue. Tested with RTL8723DU, RTL8821AU, RTL8812AU, RTL8812BU, RTL8822CU, RTL8811CU. Closes: https://lore.kernel.org/linux-wireless/6e7ecb47-7ea0-433a-a19f-05f88a2edf6b@gmail.com/ Signed-off-by: Bitterblue Smith Acked-by: Ping-Ke Shih Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/9cee7a34-c38d-4128-824d-0ec139ca5a4e@gmail.com --- drivers/net/wireless/realtek/rtw88/usb.c | 79 +++++++++++++++++++----- drivers/net/wireless/realtek/rtw88/usb.h | 3 + 2 files changed, 67 insertions(+), 15 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw88/usb.c b/drivers/net/wireless/realtek/rtw88/usb.c index 28679d9cdffc..1572b61cf877 100644 --- a/drivers/net/wireless/realtek/rtw88/usb.c +++ b/drivers/net/wireless/realtek/rtw88/usb.c @@ -586,7 +586,7 @@ static void rtw_usb_rx_handler(struct work_struct *work) goto skip_packet; } - skb = alloc_skb(skb_len, GFP_KERNEL); + skb = alloc_skb(skb_len, GFP_ATOMIC); if (!skb) { rtw_dbg(rtwdev, RTW_DBG_USB, "failed to allocate RX skb of size %u\n", @@ -613,32 +613,70 @@ skip_packet: rx_desc += next_pkt; } while (rx_desc + pkt_desc_sz < rx_skb->data + rx_skb->len); - dev_kfree_skb_any(rx_skb); + if (skb_queue_len(&rtwusb->rx_free_queue) >= RTW_USB_RX_SKB_NUM) + dev_kfree_skb_any(rx_skb); + else + skb_queue_tail(&rtwusb->rx_free_queue, rx_skb); } } static void rtw_usb_read_port_complete(struct urb *urb); -static void rtw_usb_rx_resubmit(struct rtw_usb *rtwusb, struct rx_usb_ctrl_block *rxcb) +static void rtw_usb_rx_resubmit(struct rtw_usb *rtwusb, + struct rx_usb_ctrl_block *rxcb, + gfp_t gfp) { struct rtw_dev *rtwdev = rtwusb->rtwdev; + struct sk_buff *rx_skb; int error; - rxcb->rx_skb = alloc_skb(RTW_USB_MAX_RECVBUF_SZ, GFP_ATOMIC); - if (!rxcb->rx_skb) - return; + rx_skb = skb_dequeue(&rtwusb->rx_free_queue); + if (!rx_skb) + rx_skb = alloc_skb(RTW_USB_MAX_RECVBUF_SZ, gfp); + + if (!rx_skb) + goto try_later; + + skb_reset_tail_pointer(rx_skb); + rx_skb->len = 0; + + rxcb->rx_skb = rx_skb; usb_fill_bulk_urb(rxcb->rx_urb, rtwusb->udev, usb_rcvbulkpipe(rtwusb->udev, rtwusb->pipe_in), rxcb->rx_skb->data, RTW_USB_MAX_RECVBUF_SZ, rtw_usb_read_port_complete, rxcb); - error = usb_submit_urb(rxcb->rx_urb, GFP_ATOMIC); + error = usb_submit_urb(rxcb->rx_urb, gfp); if (error) { - kfree_skb(rxcb->rx_skb); + skb_queue_tail(&rtwusb->rx_free_queue, rxcb->rx_skb); + if (error != -ENODEV) rtw_err(rtwdev, "Err sending rx data urb %d\n", error); + + if (error == -ENOMEM) + goto try_later; + } + + return; + +try_later: + rxcb->rx_skb = NULL; + queue_work(rtwusb->rxwq, &rtwusb->rx_urb_work); +} + +static void rtw_usb_rx_resubmit_work(struct work_struct *work) +{ + struct rtw_usb *rtwusb = container_of(work, struct rtw_usb, rx_urb_work); + struct rx_usb_ctrl_block *rxcb; + int i; + + for (i = 0; i < RTW_USB_RXCB_NUM; i++) { + rxcb = &rtwusb->rx_cb[i]; + + if (!rxcb->rx_skb) + rtw_usb_rx_resubmit(rtwusb, rxcb, GFP_ATOMIC); } } @@ -654,15 +692,16 @@ static void rtw_usb_read_port_complete(struct urb *urb) urb->actual_length < 24) { rtw_err(rtwdev, "failed to get urb length:%d\n", urb->actual_length); - if (skb) - dev_kfree_skb_any(skb); + skb_queue_tail(&rtwusb->rx_free_queue, skb); } else { skb_put(skb, urb->actual_length); skb_queue_tail(&rtwusb->rx_queue, skb); queue_work(rtwusb->rxwq, &rtwusb->rx_work); } - rtw_usb_rx_resubmit(rtwusb, rxcb); + rtw_usb_rx_resubmit(rtwusb, rxcb, GFP_ATOMIC); } else { + skb_queue_tail(&rtwusb->rx_free_queue, skb); + switch (urb->status) { case -EINVAL: case -EPIPE: @@ -680,8 +719,6 @@ static void rtw_usb_read_port_complete(struct urb *urb) rtw_err(rtwdev, "status %d\n", urb->status); break; } - if (skb) - dev_kfree_skb_any(skb); } } @@ -869,16 +906,26 @@ static struct rtw_hci_ops rtw_usb_ops = { static int rtw_usb_init_rx(struct rtw_dev *rtwdev) { struct rtw_usb *rtwusb = rtw_get_usb_priv(rtwdev); + struct sk_buff *rx_skb; + int i; - rtwusb->rxwq = create_singlethread_workqueue("rtw88_usb: rx wq"); + rtwusb->rxwq = alloc_workqueue("rtw88_usb: rx wq", WQ_BH, 0); if (!rtwusb->rxwq) { rtw_err(rtwdev, "failed to create RX work queue\n"); return -ENOMEM; } skb_queue_head_init(&rtwusb->rx_queue); + skb_queue_head_init(&rtwusb->rx_free_queue); INIT_WORK(&rtwusb->rx_work, rtw_usb_rx_handler); + INIT_WORK(&rtwusb->rx_urb_work, rtw_usb_rx_resubmit_work); + + for (i = 0; i < RTW_USB_RX_SKB_NUM; i++) { + rx_skb = alloc_skb(RTW_USB_MAX_RECVBUF_SZ, GFP_KERNEL); + if (rx_skb) + skb_queue_tail(&rtwusb->rx_free_queue, rx_skb); + } return 0; } @@ -891,7 +938,7 @@ static void rtw_usb_setup_rx(struct rtw_dev *rtwdev) for (i = 0; i < RTW_USB_RXCB_NUM; i++) { struct rx_usb_ctrl_block *rxcb = &rtwusb->rx_cb[i]; - rtw_usb_rx_resubmit(rtwusb, rxcb); + rtw_usb_rx_resubmit(rtwusb, rxcb, GFP_KERNEL); } } @@ -903,6 +950,8 @@ static void rtw_usb_deinit_rx(struct rtw_dev *rtwdev) flush_workqueue(rtwusb->rxwq); destroy_workqueue(rtwusb->rxwq); + + skb_queue_purge(&rtwusb->rx_free_queue); } static int rtw_usb_init_tx(struct rtw_dev *rtwdev) diff --git a/drivers/net/wireless/realtek/rtw88/usb.h b/drivers/net/wireless/realtek/rtw88/usb.h index 86697a5c0103..9b695b688b24 100644 --- a/drivers/net/wireless/realtek/rtw88/usb.h +++ b/drivers/net/wireless/realtek/rtw88/usb.h @@ -38,6 +38,7 @@ #define RTW_USB_RXAGG_TIMEOUT 10 #define RTW_USB_RXCB_NUM 4 +#define RTW_USB_RX_SKB_NUM 8 #define RTW_USB_EP_MAX 4 @@ -81,7 +82,9 @@ struct rtw_usb { struct rx_usb_ctrl_block rx_cb[RTW_USB_RXCB_NUM]; struct sk_buff_head rx_queue; + struct sk_buff_head rx_free_queue; struct work_struct rx_work; + struct work_struct rx_urb_work; }; static inline struct rtw_usb_tx_data *rtw_usb_get_tx_data(struct sk_buff *skb) From 36131b72fb1c62bc61e86068618de304763b8ac7 Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Thu, 14 Nov 2024 10:14:50 +0100 Subject: [PATCH 0591/1386] can: tcan4x5x: add option for selecting nWKRQ voltage The nWKRQ pin supports an output voltage of either the internal reference voltage (3.6V) or the reference voltage of the digital interface 0-6V (VIO). Add the devicetree option ti,nwkrq-voltage-vio to set it to VIO. If this property is omitted the reset default, the internal reference voltage, is used. Signed-off-by: Sean Nyekjaer Reviewed-by: Marc Kleine-Budde Reviewed-by: Vincent Mailhol Link: https://patch.msgid.link/20241114-tcan-wkrqv-v5-2-a2d50833ed71@geanix.com [mkl: remove unused variable in tcan4x5x_get_dt_data()] Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/tcan4x5x-core.c | 19 +++++++++++++++++++ drivers/net/can/m_can/tcan4x5x.h | 2 ++ 2 files changed, 21 insertions(+) diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c index 2f73bf3abad8..84b914056b7d 100644 --- a/drivers/net/can/m_can/tcan4x5x-core.c +++ b/drivers/net/can/m_can/tcan4x5x-core.c @@ -92,6 +92,8 @@ #define TCAN4X5X_MODE_STANDBY BIT(6) #define TCAN4X5X_MODE_NORMAL BIT(7) +#define TCAN4X5X_NWKRQ_VOLTAGE_VIO BIT(19) + #define TCAN4X5X_DISABLE_WAKE_MSK (BIT(31) | BIT(30)) #define TCAN4X5X_DISABLE_INH_MSK BIT(9) @@ -267,6 +269,13 @@ static int tcan4x5x_init(struct m_can_classdev *cdev) if (ret) return ret; + if (tcan4x5x->nwkrq_voltage_vio) { + ret = regmap_set_bits(tcan4x5x->regmap, TCAN4X5X_CONFIG, + TCAN4X5X_NWKRQ_VOLTAGE_VIO); + if (ret) + return ret; + } + return ret; } @@ -318,6 +327,14 @@ static const struct tcan4x5x_version_info return &tcan4x5x_versions[TCAN4X5X]; } +static void tcan4x5x_get_dt_data(struct m_can_classdev *cdev) +{ + struct tcan4x5x_priv *tcan4x5x = cdev_to_priv(cdev); + + tcan4x5x->nwkrq_voltage_vio = + of_property_read_bool(cdev->dev->of_node, "ti,nwkrq-voltage-vio"); +} + static int tcan4x5x_get_gpios(struct m_can_classdev *cdev, const struct tcan4x5x_version_info *version_info) { @@ -453,6 +470,8 @@ static int tcan4x5x_can_probe(struct spi_device *spi) goto out_power; } + tcan4x5x_get_dt_data(mcan_class); + tcan4x5x_check_wake(priv); ret = tcan4x5x_write_tcan_reg(mcan_class, TCAN4X5X_INT_EN, 0); diff --git a/drivers/net/can/m_can/tcan4x5x.h b/drivers/net/can/m_can/tcan4x5x.h index e62c030d3e1e..203399d5e8cc 100644 --- a/drivers/net/can/m_can/tcan4x5x.h +++ b/drivers/net/can/m_can/tcan4x5x.h @@ -42,6 +42,8 @@ struct tcan4x5x_priv { struct tcan4x5x_map_buf map_buf_rx; struct tcan4x5x_map_buf map_buf_tx; + + bool nwkrq_voltage_vio; }; static inline void From bddad4fac9f73c14f57b111058dd0fa6d9ede228 Mon Sep 17 00:00:00 2001 From: Dario Binacchi Date: Fri, 22 Nov 2024 23:15:44 +0100 Subject: [PATCH 0592/1386] can: sun4i_can: continue to use likely() to check skb Throughout the sun4i_can_err() function, the likely() macro is used to check the skb buffer, except in one instance. This patch makes the code consistent by using the macro in that case as well. Signed-off-by: Dario Binacchi Link: https://patch.msgid.link/20241122221650.633981-4-dario.binacchi@amarulasolutions.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/sun4i_can.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c index 360158c295d3..48d31197adec 100644 --- a/drivers/net/can/sun4i_can.c +++ b/drivers/net/can/sun4i_can.c @@ -570,7 +570,7 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status) else state = CAN_STATE_ERROR_ACTIVE; } - if (skb && state != CAN_STATE_BUS_OFF) { + if (likely(skb) && state != CAN_STATE_BUS_OFF) { cf->can_id |= CAN_ERR_CNT; cf->data[6] = txerr; cf->data[7] = rxerr; From 68d426da13fac6b1d3f5949a38d31ce2e3d88e49 Mon Sep 17 00:00:00 2001 From: Sean Nyekjaer Date: Thu, 28 Nov 2024 09:32:31 +0100 Subject: [PATCH 0593/1386] can: tcan4x5x: get rid of false clock errors tcan4x5x devices only requires the clock "cclk", so call devm_clk_get() directly. This is done to avoid m_can_class_get_clocks() that checks for both hclk and cclk and results in this warning message: | tcan4x5x spi0.0: no clock found Signed-off-by: Sean Nyekjaer Link: https://patch.msgid.link/20241128-mcancclk-v1-1-a93aac64dbae@geanix.com Signed-off-by: Marc Kleine-Budde --- drivers/net/can/m_can/tcan4x5x-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/m_can/tcan4x5x-core.c b/drivers/net/can/m_can/tcan4x5x-core.c index 84b914056b7d..4c9454176607 100644 --- a/drivers/net/can/m_can/tcan4x5x-core.c +++ b/drivers/net/can/m_can/tcan4x5x-core.c @@ -409,7 +409,7 @@ static int tcan4x5x_can_probe(struct spi_device *spi) priv->power = NULL; } - m_can_class_get_clocks(mcan_class); + mcan_class->cclk = devm_clk_get(mcan_class->dev, "cclk"); if (IS_ERR(mcan_class->cclk)) { dev_err(&spi->dev, "no CAN clock source defined\n"); freq = TCAN4X5X_EXT_CLK_DEF; From 2351998fd833eb40358adf0b889637311b5bc6b4 Mon Sep 17 00:00:00 2001 From: Charan Pedumuru Date: Wed, 20 Nov 2024 13:58:08 +0530 Subject: [PATCH 0594/1386] dt-bindings: net: can: atmel: Convert to json schema Convert old text based binding to json schema. Changes during conversion: - Add a fallback for `microchip,sam9x60-can` as it is compatible with the CAN IP core on `atmel,at91sam9x5-can`. - Add the required properties `clock` and `clock-names`, which were missing in the original binding. - Update examples and include appropriate file directives to resolve errors identified by `dt_binding_check` and `dtbs_check`. Signed-off-by: Charan Pedumuru Reviewed-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20241120-can-v3-1-da5bb4f6128d@microchip.com [mkl: fixed indention in example] Signed-off-by: Marc Kleine-Budde --- .../net/can/atmel,at91sam9263-can.yaml | 58 +++++++++++++++++++ .../devicetree/bindings/net/can/atmel-can.txt | 15 ----- 2 files changed, 58 insertions(+), 15 deletions(-) create mode 100644 Documentation/devicetree/bindings/net/can/atmel,at91sam9263-can.yaml delete mode 100644 Documentation/devicetree/bindings/net/can/atmel-can.txt diff --git a/Documentation/devicetree/bindings/net/can/atmel,at91sam9263-can.yaml b/Documentation/devicetree/bindings/net/can/atmel,at91sam9263-can.yaml new file mode 100644 index 000000000000..c818c01a718b --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/atmel,at91sam9263-can.yaml @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/can/atmel,at91sam9263-can.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip AT91 CAN Controller + +maintainers: + - Nicolas Ferre + +allOf: + - $ref: can-controller.yaml# + +properties: + compatible: + oneOf: + - enum: + - atmel,at91sam9263-can + - atmel,at91sam9x5-can + - items: + - enum: + - microchip,sam9x60-can + - const: atmel,at91sam9x5-can + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-names: + items: + - const: can_clk + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + +unevaluatedProperties: false + +examples: + - | + #include + #include + can@f000c000 { + compatible = "atmel,at91sam9263-can"; + reg = <0xf000c000 0x300>; + interrupts = <30 IRQ_TYPE_LEVEL_HIGH 3>; + clocks = <&pmc PMC_TYPE_PERIPHERAL 12>; + clock-names = "can_clk"; + }; diff --git a/Documentation/devicetree/bindings/net/can/atmel-can.txt b/Documentation/devicetree/bindings/net/can/atmel-can.txt deleted file mode 100644 index 218a3b3eb27e..000000000000 --- a/Documentation/devicetree/bindings/net/can/atmel-can.txt +++ /dev/null @@ -1,15 +0,0 @@ -* AT91 CAN * - -Required properties: - - compatible: Should be "atmel,at91sam9263-can", "atmel,at91sam9x5-can" or - "microchip,sam9x60-can" - - reg: Should contain CAN controller registers location and length - - interrupts: Should contain IRQ line for the CAN controller - -Example: - - can0: can@f000c000 { - compatible = "atmel,at91sam9x5-can"; - reg = <0xf000c000 0x300>; - interrupts = <40 4 5> - }; From 57769cb9ccbad92c5126264584346ebc8501b353 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Sat, 30 Nov 2024 18:08:34 +0100 Subject: [PATCH 0595/1386] mailmap: add an entry for Oliver Hartkopp Map my retired company address and an accidentally used personal mail address within mailmap. Signed-off-by: Oliver Hartkopp Link: https://patch.msgid.link/20241130170911.2828-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index 5ff0e5d681e7..37ecf2632be3 100644 --- a/.mailmap +++ b/.mailmap @@ -529,6 +529,8 @@ Oleksij Rempel Oleksij Rempel Oleksij Rempel Oleksij Rempel +Oliver Hartkopp +Oliver Hartkopp Oliver Upton OndÅ™ej Jirman Oza Pawandeep From 1263e69a7c47a68537476298f13f943fb954581e Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 19 Dec 2024 20:08:37 +0100 Subject: [PATCH 0596/1386] MAINTAINERS: assign em_canid.c additionally to CAN maintainers The extended match rule em_canid is used to classify CAN frames based on their CAN Identifier. To keep the CAN maintainers in the loop for relevant changes which might affect the CAN specific functionality add em_canid.c to the CAN NETWORK LAYER files. Signed-off-by: Oliver Hartkopp Link: https://patch.msgid.link/20241219190837.3087-1-socketcan@hartkopp.net Signed-off-by: Marc Kleine-Budde --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0456a33ef657..a7716f48f50c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5065,6 +5065,7 @@ F: include/uapi/linux/can/gw.h F: include/uapi/linux/can/isotp.h F: include/uapi/linux/can/raw.h F: net/can/ +F: net/sched/em_canid.c CAN-J1939 NETWORK LAYER M: Robin van der Gracht From d50c837675a95f733e53a5e21eb168f8c9f5a73d Mon Sep 17 00:00:00 2001 From: Ariel Otilibili Date: Sat, 21 Dec 2024 12:06:49 +0100 Subject: [PATCH 0597/1386] can: dev: can_get_state_str(): Remove dead code The default switch case ends with a return; meaning this return is never reached. Coverity-ID: 1497123 Signed-off-by: Ariel Otilibili Link: https://patch.msgid.link/20241221111454.1074285-4-ariel.otilibili-anieli@eurecom.fr Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev/dev.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/can/dev/dev.c b/drivers/net/can/dev/dev.c index 6792c14fd7eb..93035a7d50f3 100644 --- a/drivers/net/can/dev/dev.c +++ b/drivers/net/can/dev/dev.c @@ -85,8 +85,6 @@ const char *can_get_state_str(const enum can_state state) default: return ""; } - - return ""; } EXPORT_SYMBOL_GPL(can_get_state_str); From a502ea6fa94b1f7be72a24bcf9e3f5f6b7e6e90c Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Wed, 18 Dec 2024 17:21:16 +0100 Subject: [PATCH 0598/1386] udp: Deal with race between UDP socket address change and rehash If a UDP socket changes its local address while it's receiving datagrams, as a result of connect(), there is a period during which a lookup operation might fail to find it, after the address is changed but before the secondary hash (port and address) and the four-tuple hash (local and remote ports and addresses) are updated. Secondary hash chains were introduced by commit 30fff9231fad ("udp: bind() optimisation") and, as a result, a rehash operation became needed to make a bound socket reachable again after a connect(). This operation was introduced by commit 719f835853a9 ("udp: add rehash on connect()") which isn't however a complete fix: the socket will be found once the rehashing completes, but not while it's pending. This is noticeable with a socat(1) server in UDP4-LISTEN mode, and a client sending datagrams to it. After the server receives the first datagram (cf. _xioopen_ipdgram_listen()), it issues a connect() to the address of the sender, in order to set up a directed flow. Now, if the client, running on a different CPU thread, happens to send a (subsequent) datagram while the server's socket changes its address, but is not rehashed yet, this will result in a failed lookup and a port unreachable error delivered to the client, as apparent from the following reproducer: LEN=$(($(cat /proc/sys/net/core/wmem_default) / 4)) dd if=/dev/urandom bs=1 count=${LEN} of=tmp.in while :; do taskset -c 1 socat UDP4-LISTEN:1337,null-eof OPEN:tmp.out,create,trunc & sleep 0.1 || sleep 1 taskset -c 2 socat OPEN:tmp.in UDP4:localhost:1337,shut-null wait done where the client will eventually get ECONNREFUSED on a write() (typically the second or third one of a given iteration): 2024/11/13 21:28:23 socat[46901] E write(6, 0x556db2e3c000, 8192): Connection refused This issue was first observed as a seldom failure in Podman's tests checking UDP functionality while using pasta(1) to connect the container's network namespace, which leads us to a reproducer with the lookup error resulting in an ICMP packet on a tap device: LOCAL_ADDR="$(ip -j -4 addr show|jq -rM '.[] | .addr_info[0] | select(.scope == "global").local')" while :; do ./pasta --config-net -p pasta.pcap -u 1337 socat UDP4-LISTEN:1337,null-eof OPEN:tmp.out,create,trunc & sleep 0.2 || sleep 1 socat OPEN:tmp.in UDP4:${LOCAL_ADDR}:1337,shut-null wait cmp tmp.in tmp.out done Once this fails: tmp.in tmp.out differ: char 8193, line 29 we can finally have a look at what's going on: $ tshark -r pasta.pcap 1 0.000000 :: ? ff02::16 ICMPv6 110 Multicast Listener Report Message v2 2 0.168690 88.198.0.161 ? 88.198.0.164 UDP 8234 60260 ? 1337 Len=8192 3 0.168767 88.198.0.161 ? 88.198.0.164 UDP 8234 60260 ? 1337 Len=8192 4 0.168806 88.198.0.161 ? 88.198.0.164 UDP 8234 60260 ? 1337 Len=8192 5 0.168827 c6:47:05:8d:dc:04 ? Broadcast ARP 42 Who has 88.198.0.161? Tell 88.198.0.164 6 0.168851 9a:55:9a:55:9a:55 ? c6:47:05:8d:dc:04 ARP 42 88.198.0.161 is at 9a:55:9a:55:9a:55 7 0.168875 88.198.0.161 ? 88.198.0.164 UDP 8234 60260 ? 1337 Len=8192 8 0.168896 88.198.0.164 ? 88.198.0.161 ICMP 590 Destination unreachable (Port unreachable) 9 0.168926 88.198.0.161 ? 88.198.0.164 UDP 8234 60260 ? 1337 Len=8192 10 0.168959 88.198.0.161 ? 88.198.0.164 UDP 8234 60260 ? 1337 Len=8192 11 0.168989 88.198.0.161 ? 88.198.0.164 UDP 4138 60260 ? 1337 Len=4096 12 0.169010 88.198.0.161 ? 88.198.0.164 UDP 42 60260 ? 1337 Len=0 On the third datagram received, the network namespace of the container initiates an ARP lookup to deliver the ICMP message. In another variant of this reproducer, starting the client with: strace -f pasta --config-net -u 1337 socat UDP4-LISTEN:1337,null-eof OPEN:tmp.out,create,trunc 2>strace.log & and connecting to the socat server using a loopback address: socat OPEN:tmp.in UDP4:localhost:1337,shut-null we can more clearly observe a sendmmsg() call failing after the first datagram is delivered: [pid 278012] connect(173, 0x7fff96c95fc0, 16) = 0 [...] [pid 278012] recvmmsg(173, 0x7fff96c96020, 1024, MSG_DONTWAIT, NULL) = -1 EAGAIN (Resource temporarily unavailable) [pid 278012] sendmmsg(173, 0x561c5ad0a720, 1, MSG_NOSIGNAL) = 1 [...] [pid 278012] sendmmsg(173, 0x561c5ad0a720, 1, MSG_NOSIGNAL) = -1 ECONNREFUSED (Connection refused) and, somewhat confusingly, after a connect() on the same socket succeeded. Until commit 4cdeeee9252a ("net: udp: prefer listeners bound to an address"), the race between receive address change and lookup didn't actually cause visible issues, because, once the lookup based on the secondary hash chain failed, we would still attempt a lookup based on the primary hash (destination port only), and find the socket with the outdated secondary hash. That change, however, dropped port-only lookups altogether, as side effect, making the race visible. To fix this, while avoiding the need to make address changes and rehash atomic against lookups, reintroduce primary hash lookups as fallback, if lookups based on four-tuple and secondary hashes fail. To this end, introduce a simplified lookup implementation, which doesn't take care of SO_REUSEPORT groups: if we have one, there are multiple sockets that would match the four-tuple or secondary hash, meaning that we can't run into this race at all. v2: - instead of synchronising lookup operations against address change plus rehash, reintroduce a simplified version of the original primary hash lookup as fallback v1: - fix build with CONFIG_IPV6=n: add ifdef around sk_v6_rcv_saddr usage (Kuniyuki Iwashima) - directly use sk_rcv_saddr for IPv4 receive addresses instead of fetching inet_rcv_saddr (Kuniyuki Iwashima) - move inet_update_saddr() to inet_hashtables.h and use that to set IPv4/IPv6 addresses as suitable (Kuniyuki Iwashima) - rebase onto net-next, update commit message accordingly Reported-by: Ed Santiago Link: https://github.com/containers/podman/issues/24147 Analysed-by: David Gibson Fixes: 30fff9231fad ("udp: bind() optimisation") Signed-off-by: Stefano Brivio Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/udp.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/udp.c | 50 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e8953e88efef..4bc0a0686fcd 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -420,6 +420,49 @@ u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, } EXPORT_SYMBOL(udp_ehashfn); +/** + * udp4_lib_lookup1() - Simplified lookup using primary hash (destination port) + * @net: Network namespace + * @saddr: Source address, network order + * @sport: Source port, network order + * @daddr: Destination address, network order + * @hnum: Destination port, host order + * @dif: Destination interface index + * @sdif: Destination bridge port index, if relevant + * @udptable: Set of UDP hash tables + * + * Simplified lookup to be used as fallback if no sockets are found due to a + * potential race between (receive) address change, and lookup happening before + * the rehash operation. This function ignores SO_REUSEPORT groups while scoring + * result sockets, because if we have one, we don't need the fallback at all. + * + * Called under rcu_read_lock(). + * + * Return: socket with highest matching score if any, NULL if none + */ +static struct sock *udp4_lib_lookup1(const struct net *net, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned int hnum, + int dif, int sdif, + const struct udp_table *udptable) +{ + unsigned int slot = udp_hashfn(net, hnum, udptable->mask); + struct udp_hslot *hslot = &udptable->hash[slot]; + struct sock *sk, *result = NULL; + int score, badness = 0; + + sk_for_each_rcu(sk, &hslot->head) { + score = compute_score(sk, net, + saddr, sport, daddr, hnum, dif, sdif); + if (score > badness) { + result = sk; + badness = score; + } + } + + return result; +} + /* called with rcu_read_lock() */ static struct sock *udp4_lib_lookup2(const struct net *net, __be32 saddr, __be16 sport, @@ -683,6 +726,19 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, htonl(INADDR_ANY), hnum, dif, sdif, hslot2, skb); + if (!IS_ERR_OR_NULL(result)) + goto done; + + /* Primary hash (destination port) lookup as fallback for this race: + * 1. __ip4_datagram_connect() sets sk_rcv_saddr + * 2. lookup (this function): new sk_rcv_saddr, hashes not updated yet + * 3. rehash operation updating _secondary and four-tuple_ hashes + * The primary hash doesn't need an update after 1., so, thanks to this + * further step, 1. and 3. don't need to be atomic against the lookup. + */ + result = udp4_lib_lookup1(net, saddr, sport, daddr, hnum, dif, sdif, + udptable); + done: if (IS_ERR(result)) return NULL; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7c14c449804c..6671daa67f4f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -170,6 +170,49 @@ static int compute_score(struct sock *sk, const struct net *net, return score; } +/** + * udp6_lib_lookup1() - Simplified lookup using primary hash (destination port) + * @net: Network namespace + * @saddr: Source address, network order + * @sport: Source port, network order + * @daddr: Destination address, network order + * @hnum: Destination port, host order + * @dif: Destination interface index + * @sdif: Destination bridge port index, if relevant + * @udptable: Set of UDP hash tables + * + * Simplified lookup to be used as fallback if no sockets are found due to a + * potential race between (receive) address change, and lookup happening before + * the rehash operation. This function ignores SO_REUSEPORT groups while scoring + * result sockets, because if we have one, we don't need the fallback at all. + * + * Called under rcu_read_lock(). + * + * Return: socket with highest matching score if any, NULL if none + */ +static struct sock *udp6_lib_lookup1(const struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, + unsigned int hnum, int dif, int sdif, + const struct udp_table *udptable) +{ + unsigned int slot = udp_hashfn(net, hnum, udptable->mask); + struct udp_hslot *hslot = &udptable->hash[slot]; + struct sock *sk, *result = NULL; + int score, badness = 0; + + sk_for_each_rcu(sk, &hslot->head) { + score = compute_score(sk, net, + saddr, sport, daddr, hnum, dif, sdif); + if (score > badness) { + result = sk; + badness = score; + } + } + + return result; +} + /* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(const struct net *net, const struct in6_addr *saddr, __be16 sport, @@ -347,6 +390,13 @@ struct sock *__udp6_lib_lookup(const struct net *net, result = udp6_lib_lookup2(net, saddr, sport, &in6addr_any, hnum, dif, sdif, hslot2, skb); + if (!IS_ERR_OR_NULL(result)) + goto done; + + /* Cover address change/lookup/rehash race: see __udp4_lib_lookup() */ + result = udp6_lib_lookup1(net, saddr, sport, daddr, hnum, dif, sdif, + udptable); + done: if (IS_ERR(result)) return NULL; From d9a093d2d12aec87ed0a2ac660b3a62261bef966 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 19 Dec 2024 13:47:52 +0800 Subject: [PATCH 0599/1386] net: enetc: add Tx checksum offload for i.MX95 ENETC In addition to supporting Rx checksum offload, i.MX95 ENETC also supports Tx checksum offload. The transmit checksum offload is implemented through the Tx BD. To support Tx checksum offload, software needs to fill some auxiliary information in Tx BD, such as IP version, IP header offset and size, whether L4 is UDP or TCP, etc. Same as Rx checksum offload, Tx checksum offload capability isn't defined in register, so tx_csum bit is added to struct enetc_drvdata to indicate whether the device supports Tx checksum offload. Signed-off-by: Wei Fang Reviewed-by: Frank Li Reviewed-by: Claudiu Manoil Link: https://patch.msgid.link/20241219054755.1615626-2-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 53 ++++++++++++++++--- drivers/net/ethernet/freescale/enetc/enetc.h | 2 + .../net/ethernet/freescale/enetc/enetc_hw.h | 15 ++++-- .../freescale/enetc/enetc_pf_common.c | 3 ++ 4 files changed, 63 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 535969fa0fdb..88f12c88110f 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -146,6 +146,27 @@ static int enetc_ptp_parse(struct sk_buff *skb, u8 *udp, return 0; } +static bool enetc_tx_csum_offload_check(struct sk_buff *skb) +{ + switch (skb->csum_offset) { + case offsetof(struct tcphdr, check): + case offsetof(struct udphdr, check): + return true; + default: + return false; + } +} + +static bool enetc_skb_is_ipv6(struct sk_buff *skb) +{ + return vlan_get_protocol(skb) == htons(ETH_P_IPV6); +} + +static bool enetc_skb_is_tcp(struct sk_buff *skb) +{ + return skb->csum_offset == offsetof(struct tcphdr, check); +} + static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) { bool do_vlan, do_onestep_tstamp = false, do_twostep_tstamp = false; @@ -163,6 +184,29 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) dma_addr_t dma; u8 flags = 0; + enetc_clear_tx_bd(&temp_bd); + if (skb->ip_summed == CHECKSUM_PARTIAL) { + /* Can not support TSD and checksum offload at the same time */ + if (priv->active_offloads & ENETC_F_TXCSUM && + enetc_tx_csum_offload_check(skb) && !tx_ring->tsd_enable) { + temp_bd.l3_aux0 = FIELD_PREP(ENETC_TX_BD_L3_START, + skb_network_offset(skb)); + temp_bd.l3_aux1 = FIELD_PREP(ENETC_TX_BD_L3_HDR_LEN, + skb_network_header_len(skb) / 4); + temp_bd.l3_aux1 |= FIELD_PREP(ENETC_TX_BD_L3T, + enetc_skb_is_ipv6(skb)); + if (enetc_skb_is_tcp(skb)) + temp_bd.l4_aux = FIELD_PREP(ENETC_TX_BD_L4T, + ENETC_TXBD_L4T_TCP); + else + temp_bd.l4_aux = FIELD_PREP(ENETC_TX_BD_L4T, + ENETC_TXBD_L4T_UDP); + flags |= ENETC_TXBD_FLAGS_CSUM_LSO | ENETC_TXBD_FLAGS_L4CS; + } else if (skb_checksum_help(skb)) { + return 0; + } + } + i = tx_ring->next_to_use; txbd = ENETC_TXBD(*tx_ring, i); prefetchw(txbd); @@ -173,7 +217,6 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) temp_bd.addr = cpu_to_le64(dma); temp_bd.buf_len = cpu_to_le16(len); - temp_bd.lstatus = 0; tx_swbd = &tx_ring->tx_swbd[i]; tx_swbd->dma = dma; @@ -594,7 +637,7 @@ static netdev_tx_t enetc_start_xmit(struct sk_buff *skb, { struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_bdr *tx_ring; - int count, err; + int count; /* Queue one-step Sync packet if already locked */ if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { @@ -627,11 +670,6 @@ static netdev_tx_t enetc_start_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; } - if (skb->ip_summed == CHECKSUM_PARTIAL) { - err = skb_checksum_help(skb); - if (err) - goto drop_packet_err; - } enetc_lock_mdio(); count = enetc_map_tx_buffs(tx_ring, skb); enetc_unlock_mdio(); @@ -3274,6 +3312,7 @@ static const struct enetc_drvdata enetc_pf_data = { static const struct enetc_drvdata enetc4_pf_data = { .sysclk_freq = ENETC_CLK_333M, + .tx_csum = true, .pmac_offset = ENETC4_PMAC_OFFSET, .eth_ops = &enetc4_pf_ethtool_ops, }; diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 72fa03dbc2dd..e82eb9a9137c 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -234,6 +234,7 @@ enum enetc_errata { struct enetc_drvdata { u32 pmac_offset; /* Only valid for PSI which supports 802.1Qbu */ + u8 tx_csum:1; u64 sysclk_freq; const struct ethtool_ops *eth_ops; }; @@ -341,6 +342,7 @@ enum enetc_active_offloads { ENETC_F_QBV = BIT(9), ENETC_F_QCI = BIT(10), ENETC_F_QBU = BIT(11), + ENETC_F_TXCSUM = BIT(12), }; enum enetc_flags_bit { diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 55ba949230ff..0e259baf36ee 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -558,7 +558,16 @@ union enetc_tx_bd { __le16 frm_len; union { struct { - u8 reserved[3]; + u8 l3_aux0; +#define ENETC_TX_BD_L3_START GENMASK(6, 0) +#define ENETC_TX_BD_IPCS BIT(7) + u8 l3_aux1; +#define ENETC_TX_BD_L3_HDR_LEN GENMASK(6, 0) +#define ENETC_TX_BD_L3T BIT(7) + u8 l4_aux; +#define ENETC_TX_BD_L4T GENMASK(7, 5) +#define ENETC_TXBD_L4T_UDP 1 +#define ENETC_TXBD_L4T_TCP 2 u8 flags; }; /* default layout */ __le32 txstart; @@ -582,10 +591,10 @@ union enetc_tx_bd { }; enum enetc_txbd_flags { - ENETC_TXBD_FLAGS_RES0 = BIT(0), /* reserved */ + ENETC_TXBD_FLAGS_L4CS = BIT(0), /* For ENETC 4.1 and later */ ENETC_TXBD_FLAGS_TSE = BIT(1), ENETC_TXBD_FLAGS_W = BIT(2), - ENETC_TXBD_FLAGS_RES3 = BIT(3), /* reserved */ + ENETC_TXBD_FLAGS_CSUM_LSO = BIT(3), /* For ENETC 4.1 and later */ ENETC_TXBD_FLAGS_TXSTART = BIT(4), ENETC_TXBD_FLAGS_EX = BIT(6), ENETC_TXBD_FLAGS_F = BIT(7) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c index 0eecfc833164..09f2d7ec44eb 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c @@ -119,6 +119,9 @@ void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->priv_flags |= IFF_UNICAST_FLT; + if (si->drvdata->tx_csum) + priv->active_offloads |= ENETC_F_TXCSUM; + /* TODO: currently, i.MX95 ENETC driver does not support advanced features */ if (!is_enetc_rev1(si)) { ndev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_LOOPBACK); From 93c5d5a0ddf8ad39661a087edb45286a0a55f7e6 Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 19 Dec 2024 13:47:53 +0800 Subject: [PATCH 0600/1386] net: enetc: update max chained Tx BD number for i.MX95 ENETC The max chained Tx BDs of latest ENETC (i.MX95 ENETC, rev 4.1) has been increased to 63, but since the range of MAX_SKB_FRAGS is 17~45, so for i.MX95 ENETC and later revision, it is better to set ENETC4_MAX_SKB_FRAGS to MAX_SKB_FRAGS. In addition, add max_frags in struct enetc_drvdata to indicate the max chained BDs supported by device. Because the max number of chained BDs supported by LS1028A and i.MX95 ENETC is different. Signed-off-by: Wei Fang Reviewed-by: Frank Li Reviewed-by: Claudiu Manoil Reviewed-by: Simon Horman Link: https://patch.msgid.link/20241219054755.1615626-3-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 13 +++++++++---- drivers/net/ethernet/freescale/enetc/enetc.h | 13 +++++++++++-- .../net/ethernet/freescale/enetc/enetc_pf_common.c | 1 + drivers/net/ethernet/freescale/enetc/enetc_vf.c | 1 + 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 88f12c88110f..76c33506991b 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -534,6 +534,7 @@ static void enetc_tso_complete_csum(struct enetc_bdr *tx_ring, struct tso_t *tso static int enetc_map_tx_tso_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) { + struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev); int hdr_len, total_len, data_len; struct enetc_tx_swbd *tx_swbd; union enetc_tx_bd *txbd; @@ -599,7 +600,7 @@ static int enetc_map_tx_tso_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb bd_data_num++; tso_build_data(skb, &tso, size); - if (unlikely(bd_data_num >= ENETC_MAX_SKB_FRAGS && data_len)) + if (unlikely(bd_data_num >= priv->max_frags && data_len)) goto err_chained_bd; } @@ -660,7 +661,7 @@ static netdev_tx_t enetc_start_xmit(struct sk_buff *skb, count = enetc_map_tx_tso_buffs(tx_ring, skb); enetc_unlock_mdio(); } else { - if (unlikely(skb_shinfo(skb)->nr_frags > ENETC_MAX_SKB_FRAGS)) + if (unlikely(skb_shinfo(skb)->nr_frags > priv->max_frags)) if (unlikely(skb_linearize(skb))) goto drop_packet_err; @@ -678,7 +679,7 @@ static netdev_tx_t enetc_start_xmit(struct sk_buff *skb, if (unlikely(!count)) goto drop_packet_err; - if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_MAX_NEEDED) + if (enetc_bd_unused(tx_ring) < ENETC_TXBDS_MAX_NEEDED(priv->max_frags)) netif_stop_subqueue(ndev, tx_ring->index); return NETDEV_TX_OK; @@ -946,7 +947,8 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) if (unlikely(tx_frm_cnt && netif_carrier_ok(ndev) && __netif_subqueue_stopped(ndev, tx_ring->index) && !test_bit(ENETC_TX_DOWN, &priv->flags) && - (enetc_bd_unused(tx_ring) >= ENETC_TXBDS_MAX_NEEDED))) { + (enetc_bd_unused(tx_ring) >= + ENETC_TXBDS_MAX_NEEDED(priv->max_frags)))) { netif_wake_subqueue(ndev, tx_ring->index); } @@ -3307,18 +3309,21 @@ EXPORT_SYMBOL_GPL(enetc_pci_remove); static const struct enetc_drvdata enetc_pf_data = { .sysclk_freq = ENETC_CLK_400M, .pmac_offset = ENETC_PMAC_OFFSET, + .max_frags = ENETC_MAX_SKB_FRAGS, .eth_ops = &enetc_pf_ethtool_ops, }; static const struct enetc_drvdata enetc4_pf_data = { .sysclk_freq = ENETC_CLK_333M, .tx_csum = true, + .max_frags = ENETC4_MAX_SKB_FRAGS, .pmac_offset = ENETC4_PMAC_OFFSET, .eth_ops = &enetc4_pf_ethtool_ops, }; static const struct enetc_drvdata enetc_vf_data = { .sysclk_freq = ENETC_CLK_400M, + .max_frags = ENETC_MAX_SKB_FRAGS, .eth_ops = &enetc_vf_ethtool_ops, }; diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index e82eb9a9137c..1e680f0f5123 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -59,9 +59,16 @@ struct enetc_rx_swbd { /* ENETC overhead: optional extension BD + 1 BD gap */ #define ENETC_TXBDS_NEEDED(val) ((val) + 2) -/* max # of chained Tx BDs is 15, including head and extension BD */ +/* For LS1028A, max # of chained Tx BDs is 15, including head and + * extension BD. + */ #define ENETC_MAX_SKB_FRAGS 13 -#define ENETC_TXBDS_MAX_NEEDED ENETC_TXBDS_NEEDED(ENETC_MAX_SKB_FRAGS + 1) +/* For ENETC v4 and later versions, max # of chained Tx BDs is 63, + * including head and extension BD, but the range of MAX_SKB_FRAGS + * is 17 ~ 45, so set ENETC4_MAX_SKB_FRAGS to MAX_SKB_FRAGS. + */ +#define ENETC4_MAX_SKB_FRAGS MAX_SKB_FRAGS +#define ENETC_TXBDS_MAX_NEEDED(x) ENETC_TXBDS_NEEDED((x) + 1) struct enetc_ring_stats { unsigned int packets; @@ -235,6 +242,7 @@ enum enetc_errata { struct enetc_drvdata { u32 pmac_offset; /* Only valid for PSI which supports 802.1Qbu */ u8 tx_csum:1; + u8 max_frags; u64 sysclk_freq; const struct ethtool_ops *eth_ops; }; @@ -377,6 +385,7 @@ struct enetc_ndev_priv { u16 msg_enable; u8 preemptible_tcs; + u8 max_frags; /* The maximum number of BDs for fragments */ enum enetc_active_offloads active_offloads; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c index 09f2d7ec44eb..00b73a948746 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c @@ -101,6 +101,7 @@ void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, priv->msg_enable = (NETIF_MSG_WOL << 1) - 1; priv->sysclk_freq = si->drvdata->sysclk_freq; + priv->max_frags = si->drvdata->max_frags; ndev->netdev_ops = ndev_ops; enetc_set_ethtool_ops(ndev); ndev->watchdog_timeo = 5 * HZ; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index a5f8ce576b6e..63d78b2b8670 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -136,6 +136,7 @@ static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev, priv->msg_enable = (NETIF_MSG_IFUP << 1) - 1; priv->sysclk_freq = si->drvdata->sysclk_freq; + priv->max_frags = si->drvdata->max_frags; ndev->netdev_ops = ndev_ops; enetc_set_ethtool_ops(ndev); ndev->watchdog_timeo = 5 * HZ; From 69797ff888d3dbab035a0d2516b554285b094e3c Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 19 Dec 2024 13:47:54 +0800 Subject: [PATCH 0601/1386] net: enetc: add LSO support for i.MX95 ENETC PF ENETC rev 4.1 supports large send offload (LSO), segmenting large TCP and UDP transmit units into multiple Ethernet frames. To support LSO, software needs to fill some auxiliary information in Tx BD, such as LSO header length, frame length, LSO maximum segment size, etc. At 1Gbps link rate, TCP segmentation was tested using iperf3, and the CPU performance before and after applying the patch was compared through the top command. It can be seen that LSO saves a significant amount of CPU cycles compared to software TSO. Before applying the patch: %Cpu(s): 0.1 us, 4.1 sy, 0.0 ni, 85.7 id, 0.0 wa, 0.5 hi, 9.7 si After applying the patch: %Cpu(s): 0.1 us, 2.3 sy, 0.0 ni, 94.5 id, 0.0 wa, 0.4 hi, 2.6 si Signed-off-by: Wei Fang Reviewed-by: Frank Li Reviewed-by: Claudiu Manoil Link: https://patch.msgid.link/20241219054755.1615626-4-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc.c | 264 +++++++++++++++++- drivers/net/ethernet/freescale/enetc/enetc.h | 14 + .../net/ethernet/freescale/enetc/enetc4_hw.h | 23 ++ .../net/ethernet/freescale/enetc/enetc_hw.h | 16 +- .../freescale/enetc/enetc_pf_common.c | 3 + 5 files changed, 310 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index 76c33506991b..6a6fc819dfde 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -532,6 +532,230 @@ static void enetc_tso_complete_csum(struct enetc_bdr *tx_ring, struct tso_t *tso } } +static int enetc_lso_count_descs(const struct sk_buff *skb) +{ + /* 4 BDs: 1 BD for LSO header + 1 BD for extended BD + 1 BD + * for linear area data but not include LSO header, namely + * skb_headlen(skb) - lso_hdr_len (it may be 0, but that's + * okay, we only need to consider the worst case). And 1 BD + * for gap. + */ + return skb_shinfo(skb)->nr_frags + 4; +} + +static int enetc_lso_get_hdr_len(const struct sk_buff *skb) +{ + int hdr_len, tlen; + + tlen = skb_is_gso_tcp(skb) ? tcp_hdrlen(skb) : sizeof(struct udphdr); + hdr_len = skb_transport_offset(skb) + tlen; + + return hdr_len; +} + +static void enetc_lso_start(struct sk_buff *skb, struct enetc_lso_t *lso) +{ + lso->lso_seg_size = skb_shinfo(skb)->gso_size; + lso->ipv6 = enetc_skb_is_ipv6(skb); + lso->tcp = skb_is_gso_tcp(skb); + lso->l3_hdr_len = skb_network_header_len(skb); + lso->l3_start = skb_network_offset(skb); + lso->hdr_len = enetc_lso_get_hdr_len(skb); + lso->total_len = skb->len - lso->hdr_len; +} + +static void enetc_lso_map_hdr(struct enetc_bdr *tx_ring, struct sk_buff *skb, + int *i, struct enetc_lso_t *lso) +{ + union enetc_tx_bd txbd_tmp, *txbd; + struct enetc_tx_swbd *tx_swbd; + u16 frm_len, frm_len_ext; + u8 flags, e_flags = 0; + dma_addr_t addr; + char *hdr; + + /* Get the first BD of the LSO BDs chain */ + txbd = ENETC_TXBD(*tx_ring, *i); + tx_swbd = &tx_ring->tx_swbd[*i]; + prefetchw(txbd); + + /* Prepare LSO header: MAC + IP + TCP/UDP */ + hdr = tx_ring->tso_headers + *i * TSO_HEADER_SIZE; + memcpy(hdr, skb->data, lso->hdr_len); + addr = tx_ring->tso_headers_dma + *i * TSO_HEADER_SIZE; + + /* {frm_len_ext, frm_len} indicates the total length of + * large transmit data unit. frm_len contains the 16 least + * significant bits and frm_len_ext contains the 4 most + * significant bits. + */ + frm_len = lso->total_len & 0xffff; + frm_len_ext = (lso->total_len >> 16) & 0xf; + + /* Set the flags of the first BD */ + flags = ENETC_TXBD_FLAGS_EX | ENETC_TXBD_FLAGS_CSUM_LSO | + ENETC_TXBD_FLAGS_LSO | ENETC_TXBD_FLAGS_L4CS; + + enetc_clear_tx_bd(&txbd_tmp); + txbd_tmp.addr = cpu_to_le64(addr); + txbd_tmp.hdr_len = cpu_to_le16(lso->hdr_len); + + /* first BD needs frm_len and offload flags set */ + txbd_tmp.frm_len = cpu_to_le16(frm_len); + txbd_tmp.flags = flags; + + txbd_tmp.l3_aux0 = FIELD_PREP(ENETC_TX_BD_L3_START, lso->l3_start); + /* l3_hdr_size in 32-bits (4 bytes) */ + txbd_tmp.l3_aux1 = FIELD_PREP(ENETC_TX_BD_L3_HDR_LEN, + lso->l3_hdr_len / 4); + if (lso->ipv6) + txbd_tmp.l3_aux1 |= ENETC_TX_BD_L3T; + else + txbd_tmp.l3_aux0 |= ENETC_TX_BD_IPCS; + + txbd_tmp.l4_aux = FIELD_PREP(ENETC_TX_BD_L4T, lso->tcp ? + ENETC_TXBD_L4T_TCP : ENETC_TXBD_L4T_UDP); + + /* For the LSO header we do not set the dma address since + * we do not want it unmapped when we do cleanup. We still + * set len so that we count the bytes sent. + */ + tx_swbd->len = lso->hdr_len; + tx_swbd->do_twostep_tstamp = false; + tx_swbd->check_wb = false; + + /* Actually write the header in the BD */ + *txbd = txbd_tmp; + + /* Get the next BD, and the next BD is extended BD */ + enetc_bdr_idx_inc(tx_ring, i); + txbd = ENETC_TXBD(*tx_ring, *i); + tx_swbd = &tx_ring->tx_swbd[*i]; + prefetchw(txbd); + + enetc_clear_tx_bd(&txbd_tmp); + if (skb_vlan_tag_present(skb)) { + /* Setup the VLAN fields */ + txbd_tmp.ext.vid = cpu_to_le16(skb_vlan_tag_get(skb)); + txbd_tmp.ext.tpid = ENETC_TPID_8021Q; + e_flags = ENETC_TXBD_E_FLAGS_VLAN_INS; + } + + /* Write the BD */ + txbd_tmp.ext.e_flags = e_flags; + txbd_tmp.ext.lso_sg_size = cpu_to_le16(lso->lso_seg_size); + txbd_tmp.ext.frm_len_ext = cpu_to_le16(frm_len_ext); + *txbd = txbd_tmp; +} + +static int enetc_lso_map_data(struct enetc_bdr *tx_ring, struct sk_buff *skb, + int *i, struct enetc_lso_t *lso, int *count) +{ + union enetc_tx_bd txbd_tmp, *txbd = NULL; + struct enetc_tx_swbd *tx_swbd; + skb_frag_t *frag; + dma_addr_t dma; + u8 flags = 0; + int len, f; + + len = skb_headlen(skb) - lso->hdr_len; + if (len > 0) { + dma = dma_map_single(tx_ring->dev, skb->data + lso->hdr_len, + len, DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) + return -ENOMEM; + + enetc_bdr_idx_inc(tx_ring, i); + txbd = ENETC_TXBD(*tx_ring, *i); + tx_swbd = &tx_ring->tx_swbd[*i]; + prefetchw(txbd); + *count += 1; + + enetc_clear_tx_bd(&txbd_tmp); + txbd_tmp.addr = cpu_to_le64(dma); + txbd_tmp.buf_len = cpu_to_le16(len); + + tx_swbd->dma = dma; + tx_swbd->len = len; + tx_swbd->is_dma_page = 0; + tx_swbd->dir = DMA_TO_DEVICE; + } + + frag = &skb_shinfo(skb)->frags[0]; + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++, frag++) { + if (txbd) + *txbd = txbd_tmp; + + len = skb_frag_size(frag); + dma = skb_frag_dma_map(tx_ring->dev, frag); + if (dma_mapping_error(tx_ring->dev, dma)) + return -ENOMEM; + + /* Get the next BD */ + enetc_bdr_idx_inc(tx_ring, i); + txbd = ENETC_TXBD(*tx_ring, *i); + tx_swbd = &tx_ring->tx_swbd[*i]; + prefetchw(txbd); + *count += 1; + + enetc_clear_tx_bd(&txbd_tmp); + txbd_tmp.addr = cpu_to_le64(dma); + txbd_tmp.buf_len = cpu_to_le16(len); + + tx_swbd->dma = dma; + tx_swbd->len = len; + tx_swbd->is_dma_page = 1; + tx_swbd->dir = DMA_TO_DEVICE; + } + + /* Last BD needs 'F' bit set */ + flags |= ENETC_TXBD_FLAGS_F; + txbd_tmp.flags = flags; + *txbd = txbd_tmp; + + tx_swbd->is_eof = 1; + tx_swbd->skb = skb; + + return 0; +} + +static int enetc_lso_hw_offload(struct enetc_bdr *tx_ring, struct sk_buff *skb) +{ + struct enetc_tx_swbd *tx_swbd; + struct enetc_lso_t lso = {0}; + int err, i, count = 0; + + /* Initialize the LSO handler */ + enetc_lso_start(skb, &lso); + i = tx_ring->next_to_use; + + enetc_lso_map_hdr(tx_ring, skb, &i, &lso); + /* First BD and an extend BD */ + count += 2; + + err = enetc_lso_map_data(tx_ring, skb, &i, &lso, &count); + if (err) + goto dma_err; + + /* Go to the next BD */ + enetc_bdr_idx_inc(tx_ring, &i); + tx_ring->next_to_use = i; + enetc_update_tx_ring_tail(tx_ring); + + return count; + +dma_err: + do { + tx_swbd = &tx_ring->tx_swbd[i]; + enetc_free_tx_frame(tx_ring, tx_swbd); + if (i == 0) + i = tx_ring->bd_count; + i--; + } while (--count); + + return 0; +} + static int enetc_map_tx_tso_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) { struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev); @@ -652,14 +876,26 @@ static netdev_tx_t enetc_start_xmit(struct sk_buff *skb, tx_ring = priv->tx_ring[skb->queue_mapping]; if (skb_is_gso(skb)) { - if (enetc_bd_unused(tx_ring) < tso_count_descs(skb)) { - netif_stop_subqueue(ndev, tx_ring->index); - return NETDEV_TX_BUSY; - } + /* LSO data unit lengths of up to 256KB are supported */ + if (priv->active_offloads & ENETC_F_LSO && + (skb->len - enetc_lso_get_hdr_len(skb)) <= + ENETC_LSO_MAX_DATA_LEN) { + if (enetc_bd_unused(tx_ring) < enetc_lso_count_descs(skb)) { + netif_stop_subqueue(ndev, tx_ring->index); + return NETDEV_TX_BUSY; + } - enetc_lock_mdio(); - count = enetc_map_tx_tso_buffs(tx_ring, skb); - enetc_unlock_mdio(); + count = enetc_lso_hw_offload(tx_ring, skb); + } else { + if (enetc_bd_unused(tx_ring) < tso_count_descs(skb)) { + netif_stop_subqueue(ndev, tx_ring->index); + return NETDEV_TX_BUSY; + } + + enetc_lock_mdio(); + count = enetc_map_tx_tso_buffs(tx_ring, skb); + enetc_unlock_mdio(); + } } else { if (unlikely(skb_shinfo(skb)->nr_frags > priv->max_frags)) if (unlikely(skb_linearize(skb))) @@ -1799,6 +2035,9 @@ void enetc_get_si_caps(struct enetc_si *si) rss = enetc_rd(hw, ENETC_SIRSSCAPR); si->num_rss = ENETC_SIRSSCAPR_GET_NUM_RSS(rss); } + + if (val & ENETC_SIPCAPR0_LSO) + si->hw_features |= ENETC_SI_F_LSO; } EXPORT_SYMBOL_GPL(enetc_get_si_caps); @@ -2095,6 +2334,14 @@ static int enetc_setup_default_rss_table(struct enetc_si *si, int num_groups) return 0; } +static void enetc_set_lso_flags_mask(struct enetc_hw *hw) +{ + enetc_wr(hw, ENETC4_SILSOSFMR0, + SILSOSFMR0_VAL_SET(ENETC4_TCP_NL_SEG_FLAGS_DMASK, + ENETC4_TCP_NL_SEG_FLAGS_DMASK)); + enetc_wr(hw, ENETC4_SILSOSFMR1, 0); +} + int enetc_configure_si(struct enetc_ndev_priv *priv) { struct enetc_si *si = priv->si; @@ -2108,6 +2355,9 @@ int enetc_configure_si(struct enetc_ndev_priv *priv) /* enable SI */ enetc_wr(hw, ENETC_SIMR, ENETC_SIMR_EN); + if (si->hw_features & ENETC_SI_F_LSO) + enetc_set_lso_flags_mask(hw); + /* TODO: RSS support for i.MX95 will be supported later, and the * is_enetc_rev1() condition will be removed */ diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 1e680f0f5123..4ad4eb5c5a74 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -41,6 +41,18 @@ struct enetc_tx_swbd { u8 qbv_en:1; }; +struct enetc_lso_t { + bool ipv6; + bool tcp; + u8 l3_hdr_len; + u8 hdr_len; /* LSO header length */ + u8 l3_start; + u16 lso_seg_size; + int total_len; /* total data length, not include LSO header */ +}; + +#define ENETC_LSO_MAX_DATA_LEN SZ_256K + #define ENETC_RX_MAXFRM_SIZE ENETC_MAC_MAXFRM_SIZE #define ENETC_RXB_TRUESIZE 2048 /* PAGE_SIZE >> 1 */ #define ENETC_RXB_PAD NET_SKB_PAD /* add extra space if needed */ @@ -238,6 +250,7 @@ enum enetc_errata { #define ENETC_SI_F_PSFP BIT(0) #define ENETC_SI_F_QBV BIT(1) #define ENETC_SI_F_QBU BIT(2) +#define ENETC_SI_F_LSO BIT(3) struct enetc_drvdata { u32 pmac_offset; /* Only valid for PSI which supports 802.1Qbu */ @@ -351,6 +364,7 @@ enum enetc_active_offloads { ENETC_F_QCI = BIT(10), ENETC_F_QBU = BIT(11), ENETC_F_TXCSUM = BIT(12), + ENETC_F_LSO = BIT(13), }; enum enetc_flags_bit { diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h index 26b220677448..695cb07c74bc 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h @@ -12,6 +12,29 @@ #define NXP_ENETC_VENDOR_ID 0x1131 #define NXP_ENETC_PF_DEV_ID 0xe101 +/**********************Station interface registers************************/ +/* Station interface LSO segmentation flag mask register 0/1 */ +#define ENETC4_SILSOSFMR0 0x1300 +#define SILSOSFMR0_TCP_MID_SEG GENMASK(27, 16) +#define SILSOSFMR0_TCP_1ST_SEG GENMASK(11, 0) +#define SILSOSFMR0_VAL_SET(first, mid) (FIELD_PREP(SILSOSFMR0_TCP_MID_SEG, mid) | \ + FIELD_PREP(SILSOSFMR0_TCP_1ST_SEG, first)) + +#define ENETC4_SILSOSFMR1 0x1304 +#define SILSOSFMR1_TCP_LAST_SEG GENMASK(11, 0) +#define ENETC4_TCP_FLAGS_FIN BIT(0) +#define ENETC4_TCP_FLAGS_SYN BIT(1) +#define ENETC4_TCP_FLAGS_RST BIT(2) +#define ENETC4_TCP_FLAGS_PSH BIT(3) +#define ENETC4_TCP_FLAGS_ACK BIT(4) +#define ENETC4_TCP_FLAGS_URG BIT(5) +#define ENETC4_TCP_FLAGS_ECE BIT(6) +#define ENETC4_TCP_FLAGS_CWR BIT(7) +#define ENETC4_TCP_FLAGS_NS BIT(8) +/* According to tso_build_hdr(), clear all special flags for not last packet. */ +#define ENETC4_TCP_NL_SEG_FLAGS_DMASK (ENETC4_TCP_FLAGS_FIN | \ + ENETC4_TCP_FLAGS_RST | ENETC4_TCP_FLAGS_PSH) + /***************************ENETC port registers**************************/ #define ENETC4_ECAPR0 0x0 #define ECAPR0_RFS BIT(2) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 0e259baf36ee..4098f01479bc 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -25,6 +25,7 @@ #define ENETC_SIPCAPR0 0x20 #define ENETC_SIPCAPR0_RSS BIT(8) #define ENETC_SIPCAPR0_RFS BIT(2) +#define ENETC_SIPCAPR0_LSO BIT(1) #define ENETC_SIPCAPR1 0x24 #define ENETC_SITGTGR 0x30 #define ENETC_SIRBGCR 0x38 @@ -554,7 +555,10 @@ static inline u64 _enetc_rd_reg64_wa(void __iomem *reg) union enetc_tx_bd { struct { __le64 addr; - __le16 buf_len; + union { + __le16 buf_len; + __le16 hdr_len; /* For LSO, ENETC 4.1 and later */ + }; __le16 frm_len; union { struct { @@ -578,13 +582,16 @@ union enetc_tx_bd { __le32 tstamp; __le16 tpid; __le16 vid; - u8 reserved[6]; + __le16 lso_sg_size; /* For ENETC 4.1 and later */ + __le16 frm_len_ext; /* For ENETC 4.1 and later */ + u8 reserved[2]; u8 e_flags; u8 flags; } ext; /* Tx BD extension */ struct { __le32 tstamp; - u8 reserved[10]; + u8 reserved[8]; + __le16 lso_err_count; /* For ENETC 4.1 and later */ u8 status; u8 flags; } wb; /* writeback descriptor */ @@ -593,6 +600,7 @@ union enetc_tx_bd { enum enetc_txbd_flags { ENETC_TXBD_FLAGS_L4CS = BIT(0), /* For ENETC 4.1 and later */ ENETC_TXBD_FLAGS_TSE = BIT(1), + ENETC_TXBD_FLAGS_LSO = BIT(1), /* For ENETC 4.1 and later */ ENETC_TXBD_FLAGS_W = BIT(2), ENETC_TXBD_FLAGS_CSUM_LSO = BIT(3), /* For ENETC 4.1 and later */ ENETC_TXBD_FLAGS_TXSTART = BIT(4), @@ -663,6 +671,8 @@ union enetc_rx_bd { #define ENETC_CBD_FLAGS_SF BIT(7) /* short format */ #define ENETC_CBD_STATUS_MASK 0xf +#define ENETC_TPID_8021Q 0 + struct enetc_cmd_rfse { u8 smac_h[6]; u8 smac_m[6]; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c index 00b73a948746..31dedc665a16 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c @@ -123,6 +123,9 @@ void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, if (si->drvdata->tx_csum) priv->active_offloads |= ENETC_F_TXCSUM; + if (si->hw_features & ENETC_SI_F_LSO) + priv->active_offloads |= ENETC_F_LSO; + /* TODO: currently, i.MX95 ENETC driver does not support advanced features */ if (!is_enetc_rev1(si)) { ndev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_LOOPBACK); From c12e82c053f6f444a6644ae937b037a3272d6c5a Mon Sep 17 00:00:00 2001 From: Wei Fang Date: Thu, 19 Dec 2024 13:47:55 +0800 Subject: [PATCH 0602/1386] net: enetc: add UDP segmentation offload support Set NETIF_F_GSO_UDP_L4 bit of hw_features and features because i.MX95 enetc and LS1028A driver implements UDP segmentation. - i.MX95 ENETC supports UDP segmentation via LSO. - LS1028A ENETC supports UDP segmentation since the commit 3d5b459ba0e3 ("net: tso: add UDP segmentation support"). Signed-off-by: Wei Fang Reviewed-by: Frank Li Link: https://patch.msgid.link/20241219054755.1615626-5-wei.fang@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/enetc/enetc_pf_common.c | 6 ++++-- drivers/net/ethernet/freescale/enetc/enetc_vf.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c index 31dedc665a16..3fd9b0727875 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf_common.c @@ -110,11 +110,13 @@ void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_LOOPBACK | - NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6; + NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_GSO_UDP_L4; ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6; + NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_GSO_UDP_L4; ndev->vlan_features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6; diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index 63d78b2b8670..3768752b6008 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -145,11 +145,13 @@ static void enetc_vf_netdev_setup(struct enetc_si *si, struct net_device *ndev, ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6; + NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_GSO_UDP_L4; ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6; + NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_GSO_UDP_L4; ndev->vlan_features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO | NETIF_F_TSO6; From 46e0ccfb88f02ab2eb20a41d519d6e4c028652f2 Mon Sep 17 00:00:00 2001 From: Radu Rendec Date: Thu, 19 Dec 2024 11:36:05 -0500 Subject: [PATCH 0603/1386] net: vxlan: rename SKB_DROP_REASON_VXLAN_NO_REMOTE The SKB_DROP_REASON_VXLAN_NO_REMOTE skb drop reason was introduced in the specific context of vxlan. As it turns out, there are similar cases when a packet needs to be dropped in other parts of the network stack, such as the bridge module. Rename SKB_DROP_REASON_VXLAN_NO_REMOTE and give it a more generic name, so that it can be used in other parts of the network stack. This is not a functional change, and the numeric value of the drop reason even remains unchanged. Signed-off-by: Radu Rendec Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20241219163606.717758-2-rrendec@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 4 ++-- drivers/net/vxlan/vxlan_mdb.c | 2 +- include/net/dropreason-core.h | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 0c356e0a61ef..05c10acb2a57 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2798,7 +2798,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) dev_dstats_tx_dropped(dev); vxlan_vnifilter_count(vxlan, vni, NULL, VXLAN_VNI_STATS_TX_DROPS, 0); - kfree_skb_reason(skb, SKB_DROP_REASON_VXLAN_NO_REMOTE); + kfree_skb_reason(skb, SKB_DROP_REASON_NO_TX_TARGET); return NETDEV_TX_OK; } } @@ -2821,7 +2821,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) if (fdst) vxlan_xmit_one(skb, dev, vni, fdst, did_rsc); else - kfree_skb_reason(skb, SKB_DROP_REASON_VXLAN_NO_REMOTE); + kfree_skb_reason(skb, SKB_DROP_REASON_NO_TX_TARGET); } return NETDEV_TX_OK; diff --git a/drivers/net/vxlan/vxlan_mdb.c b/drivers/net/vxlan/vxlan_mdb.c index 8735891ee128..816ab1aa0526 100644 --- a/drivers/net/vxlan/vxlan_mdb.c +++ b/drivers/net/vxlan/vxlan_mdb.c @@ -1712,7 +1712,7 @@ netdev_tx_t vxlan_mdb_xmit(struct vxlan_dev *vxlan, vxlan_xmit_one(skb, vxlan->dev, src_vni, rcu_dereference(fremote->rd), false); else - kfree_skb_reason(skb, SKB_DROP_REASON_VXLAN_NO_REMOTE); + kfree_skb_reason(skb, SKB_DROP_REASON_NO_TX_TARGET); return NETDEV_TX_OK; } diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index ead4170a1d0a..6e32106d7229 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -106,7 +106,7 @@ FN(VXLAN_VNI_NOT_FOUND) \ FN(MAC_INVALID_SOURCE) \ FN(VXLAN_ENTRY_EXISTS) \ - FN(VXLAN_NO_REMOTE) \ + FN(NO_TX_TARGET) \ FN(IP_TUNNEL_ECN) \ FN(TUNNEL_TXINFO) \ FN(LOCAL_MAC) \ @@ -497,8 +497,8 @@ enum skb_drop_reason { * entry or an entry pointing to a nexthop. */ SKB_DROP_REASON_VXLAN_ENTRY_EXISTS, - /** @SKB_DROP_REASON_VXLAN_NO_REMOTE: no remote found for xmit */ - SKB_DROP_REASON_VXLAN_NO_REMOTE, + /** @SKB_DROP_REASON_NO_TX_TARGET: no target found for xmit */ + SKB_DROP_REASON_NO_TX_TARGET, /** * @SKB_DROP_REASON_IP_TUNNEL_ECN: skb is dropped according to * RFC 6040 4.2, see __INET_ECN_decapsulate() for detail. From 623e43c2f5023853cbf71d6a60898d448a06416a Mon Sep 17 00:00:00 2001 From: Radu Rendec Date: Thu, 19 Dec 2024 11:36:06 -0500 Subject: [PATCH 0604/1386] net: bridge: add skb drop reasons to the most common drop points The bridge input code may drop frames for various reasons and at various points in the ingress handling logic. Currently kfree_skb() is used everywhere, and therefore no drop reason is specified. Add drop reasons to the most common drop points. Drop reasons are not added exhaustively to the entire bridge code. The intention is to incrementally add drop reasons to the rest of the bridge code in follow up patches. Signed-off-by: Radu Rendec Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20241219163606.717758-3-rrendec@redhat.com Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 12 ++++++++++++ net/bridge/br_forward.c | 16 ++++++++++++---- net/bridge/br_input.c | 20 +++++++++++++++----- 3 files changed, 39 insertions(+), 9 deletions(-) diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 6e32106d7229..3a6602f37978 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -111,6 +111,8 @@ FN(TUNNEL_TXINFO) \ FN(LOCAL_MAC) \ FN(ARP_PVLAN_DISABLE) \ + FN(MAC_IEEE_MAC_CONTROL) \ + FN(BRIDGE_INGRESS_STP_STATE) \ FNe(MAX) /** @@ -520,6 +522,16 @@ enum skb_drop_reason { * enabled. */ SKB_DROP_REASON_ARP_PVLAN_DISABLE, + /** + * @SKB_DROP_REASON_MAC_IEEE_MAC_CONTROL: the destination MAC address + * is an IEEE MAC Control address. + */ + SKB_DROP_REASON_MAC_IEEE_MAC_CONTROL, + /** + * @SKB_DROP_REASON_BRIDGE_INGRESS_STP_STATE: the STP state of the + * ingress bridge port does not allow frames to be forwarded. + */ + SKB_DROP_REASON_BRIDGE_INGRESS_STP_STATE, /** * @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which * shouldn't be used as a real 'reason' - only for tracing code gen diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index e19b583ff2c6..29097e984b4f 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -201,6 +201,7 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, enum br_pkt_type pkt_type, bool local_rcv, bool local_orig, u16 vid) { + enum skb_drop_reason reason = SKB_DROP_REASON_NO_TX_TARGET; struct net_bridge_port *prev = NULL; struct net_bridge_port *p; @@ -234,8 +235,11 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, continue; prev = maybe_deliver(prev, p, skb, local_orig); - if (IS_ERR(prev)) + if (IS_ERR(prev)) { + reason = PTR_ERR(prev) == -ENOMEM ? SKB_DROP_REASON_NOMEM : + SKB_DROP_REASON_NOT_SPECIFIED; goto out; + } } if (!prev) @@ -249,7 +253,7 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, out: if (!local_rcv) - kfree_skb(skb); + kfree_skb_reason(skb, reason); } #ifdef CONFIG_BRIDGE_IGMP_SNOOPING @@ -289,6 +293,7 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst, struct net_bridge_mcast *brmctx, bool local_rcv, bool local_orig) { + enum skb_drop_reason reason = SKB_DROP_REASON_NO_TX_TARGET; struct net_bridge_port *prev = NULL; struct net_bridge_port_group *p; bool allow_mode_include = true; @@ -329,8 +334,11 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst, } prev = maybe_deliver(prev, port, skb, local_orig); - if (IS_ERR(prev)) + if (IS_ERR(prev)) { + reason = PTR_ERR(prev) == -ENOMEM ? SKB_DROP_REASON_NOMEM : + SKB_DROP_REASON_NOT_SPECIFIED; goto out; + } delivered: if ((unsigned long)lport >= (unsigned long)port) p = rcu_dereference(p->next); @@ -349,6 +357,6 @@ delivered: out: if (!local_rcv) - kfree_skb(skb); + kfree_skb_reason(skb, reason); } #endif diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index ceaa5a89b947..232133a0fd21 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -75,6 +75,7 @@ static int br_pass_frame_up(struct sk_buff *skb, bool promisc) /* note: already called with rcu_read_lock */ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct net_bridge_port *p = br_port_get_rcu(skb->dev); enum br_pkt_type pkt_type = BR_PKT_UNICAST; struct net_bridge_fdb_entry *dst = NULL; @@ -96,8 +97,10 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (br_mst_is_enabled(br)) { state = BR_STATE_FORWARDING; } else { - if (p->state == BR_STATE_DISABLED) + if (p->state == BR_STATE_DISABLED) { + reason = SKB_DROP_REASON_BRIDGE_INGRESS_STP_STATE; goto drop; + } state = p->state; } @@ -155,8 +158,10 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb } } - if (state == BR_STATE_LEARNING) + if (state == BR_STATE_LEARNING) { + reason = SKB_DROP_REASON_BRIDGE_INGRESS_STP_STATE; goto drop; + } BR_INPUT_SKB_CB(skb)->brdev = br->dev; BR_INPUT_SKB_CB(skb)->src_port_isolated = !!(p->flags & BR_ISOLATED); @@ -223,7 +228,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb out: return 0; drop: - kfree_skb(skb); + kfree_skb_reason(skb, reason); goto out; } EXPORT_SYMBOL_GPL(br_handle_frame_finish); @@ -324,6 +329,7 @@ static int br_process_frame_type(struct net_bridge_port *p, */ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) { + enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct net_bridge_port *p; struct sk_buff *skb = *pskb; const unsigned char *dest = eth_hdr(skb)->h_dest; @@ -331,8 +337,10 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) if (unlikely(skb->pkt_type == PACKET_LOOPBACK)) return RX_HANDLER_PASS; - if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) + if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) { + reason = SKB_DROP_REASON_MAC_INVALID_SOURCE; goto drop; + } skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) @@ -374,6 +382,7 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb) return RX_HANDLER_PASS; case 0x01: /* IEEE MAC (Pause) */ + reason = SKB_DROP_REASON_MAC_IEEE_MAC_CONTROL; goto drop; case 0x0E: /* 802.1AB LLDP */ @@ -423,8 +432,9 @@ defer_stp_filtering: return nf_hook_bridge_pre(skb, pskb); default: + reason = SKB_DROP_REASON_BRIDGE_INGRESS_STP_STATE; drop: - kfree_skb(skb); + kfree_skb_reason(skb, reason); } return RX_HANDLER_CONSUMED; } From 85101bda1387e85eabf77cd416bfc38e14f1bce6 Mon Sep 17 00:00:00 2001 From: YiFei Zhu Date: Thu, 19 Dec 2024 17:30:04 +0000 Subject: [PATCH 0605/1386] sfc: Use netdev refcount tracking in struct efx_async_filter_insertion I was debugging some netdev refcount issues in OpenOnload, and one of the places I was looking at was in the sfc driver. Only struct efx_async_filter_insertion was not using netdev refcount tracker, so add it here. GFP_ATOMIC because this code path is called by ndo_rx_flow_steer which holds RCU. This patch should be a no-op if !CONFIG_NET_DEV_REFCNT_TRACKER Signed-off-by: YiFei Zhu Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241219173004.2615655-1-zhuyifei@google.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/sfc/net_driver.h | 2 ++ drivers/net/ethernet/sfc/rx_common.c | 5 +++-- drivers/net/ethernet/sfc/siena/net_driver.h | 2 ++ drivers/net/ethernet/sfc/siena/rx_common.c | 5 +++-- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 620ba6ef3514..f70a7b7d6345 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -831,6 +831,7 @@ struct efx_arfs_rule { /** * struct efx_async_filter_insertion - Request to asynchronously insert a filter * @net_dev: Reference to the netdevice + * @net_dev_tracker: reference tracker entry for @net_dev * @spec: The filter to insert * @work: Workitem for this request * @rxq_index: Identifies the channel for which this request was made @@ -838,6 +839,7 @@ struct efx_arfs_rule { */ struct efx_async_filter_insertion { struct net_device *net_dev; + netdevice_tracker net_dev_tracker; struct efx_filter_spec spec; struct work_struct work; u16 rxq_index; diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index ab358fe13e1d..4cc83203e188 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -897,7 +897,7 @@ static void efx_filter_rfs_work(struct work_struct *data) /* Release references */ clear_bit(slot_idx, &efx->rps_slot_map); - dev_put(req->net_dev); + netdev_put(req->net_dev, &req->net_dev_tracker); } int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, @@ -989,7 +989,8 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, } /* Queue the request */ - dev_hold(req->net_dev = net_dev); + req->net_dev = net_dev; + netdev_hold(req->net_dev, &req->net_dev_tracker, GFP_ATOMIC); INIT_WORK(&req->work, efx_filter_rfs_work); req->rxq_index = rxq_index; req->flow_id = flow_id; diff --git a/drivers/net/ethernet/sfc/siena/net_driver.h b/drivers/net/ethernet/sfc/siena/net_driver.h index 9785eff10607..2be3bad3c993 100644 --- a/drivers/net/ethernet/sfc/siena/net_driver.h +++ b/drivers/net/ethernet/sfc/siena/net_driver.h @@ -753,6 +753,7 @@ struct efx_arfs_rule { /** * struct efx_async_filter_insertion - Request to asynchronously insert a filter * @net_dev: Reference to the netdevice + * @net_dev_tracker: reference tracker entry for @net_dev * @spec: The filter to insert * @work: Workitem for this request * @rxq_index: Identifies the channel for which this request was made @@ -760,6 +761,7 @@ struct efx_arfs_rule { */ struct efx_async_filter_insertion { struct net_device *net_dev; + netdevice_tracker net_dev_tracker; struct efx_filter_spec spec; struct work_struct work; u16 rxq_index; diff --git a/drivers/net/ethernet/sfc/siena/rx_common.c b/drivers/net/ethernet/sfc/siena/rx_common.c index 082e35c6caaa..2839d0e0a9c1 100644 --- a/drivers/net/ethernet/sfc/siena/rx_common.c +++ b/drivers/net/ethernet/sfc/siena/rx_common.c @@ -888,7 +888,7 @@ static void efx_filter_rfs_work(struct work_struct *data) /* Release references */ clear_bit(slot_idx, &efx->rps_slot_map); - dev_put(req->net_dev); + netdev_put(req->net_dev, &req->net_dev_tracker); } int efx_siena_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, @@ -980,7 +980,8 @@ int efx_siena_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, } /* Queue the request */ - dev_hold(req->net_dev = net_dev); + req->net_dev = net_dev; + netdev_hold(req->net_dev, &req->net_dev_tracker, GFP_ATOMIC); INIT_WORK(&req->work, efx_filter_rfs_work); req->rxq_index = rxq_index; req->flow_id = flow_id; From f288c7a1ba268a9ed58a7971142a98a1e41a3c73 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 19 Dec 2024 16:31:16 -0800 Subject: [PATCH 0606/1386] selftests: drv-net: assume stats refresh is 0 if no ethtool -c support Tests using HW stats wait for them to stabilize, using data from ethtool -c as the delay. Not all drivers implement ethtool -c so handle the errors gracefully. Reviewed-by: Andrew Lunn Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20241220003116.1458863-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/lib/py/env.py | 9 +++++++-- tools/testing/selftests/net/lib/py/utils.py | 6 ++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 1ea9bb695e94..fea343f209ea 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -5,7 +5,7 @@ import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx from lib.py import ksft_setup -from lib.py import cmd, ethtool, ip +from lib.py import cmd, ethtool, ip, CmdExitFailure from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -234,7 +234,12 @@ class NetDrvEpEnv: Good drivers will tell us via ethtool what their sync period is. """ if self._stats_settle_time is None: - data = ethtool("-c " + self.ifname, json=True)[0] + data = {} + try: + data = ethtool("-c " + self.ifname, json=True)[0] + except CmdExitFailure as e: + if "Operation not supported" not in e.cmd.stderr: + raise self._stats_settle_time = 0.025 + \ data.get('stats-block-usecs', 0) / 1000 / 1000 diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 72590c3f90f1..9e3bcddcf3e8 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -10,7 +10,9 @@ import time class CmdExitFailure(Exception): - pass + def __init__(self, msg, cmd_obj): + super().__init__(msg) + self.cmd = cmd_obj class cmd: @@ -48,7 +50,7 @@ class cmd: if len(stderr) > 0 and stderr[-1] == "\n": stderr = stderr[:-1] raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" % - (self.proc.args, stdout, stderr)) + (self.proc.args, stdout, stderr), self) class bkg(cmd): From aa4ad7c3f283fa94b80cf84605661700aa39d708 Mon Sep 17 00:00:00 2001 From: Yuyang Huang Date: Sat, 21 Dec 2024 19:00:07 +0900 Subject: [PATCH 0607/1386] netlink: correct nlmsg size for multicast notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Corrected the netlink message size calculation for multicast group join/leave notifications. The previous calculation did not account for the inclusion of both IPv4/IPv6 addresses and ifa_cacheinfo in the payload. This fix ensures that the allocated message size is sufficient to hold all necessary information. This patch also includes the following improvements: * Uses GFP_KERNEL instead of GFP_ATOMIC when holding the RTNL mutex. * Uses nla_total_size(sizeof(struct in6_addr)) instead of nla_total_size(16). * Removes unnecessary EXPORT_SYMBOL(). Fixes: 2c2b61d2138f ("netlink: add IGMP/MLD join/leave notifications") Cc: Maciej Å»enczykowski Cc: Lorenzo Colitti Signed-off-by: Yuyang Huang Link: https://patch.msgid.link/20241221100007.1910089-1-yuyanghuang@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/igmp.c | 6 ++++-- net/ipv6/addrconf.c | 1 - net/ipv6/mcast.c | 6 ++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 8a370ef37d3f..3da126cea884 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1473,7 +1473,9 @@ static void inet_ifmcaddr_notify(struct net_device *dev, int err = -ENOMEM; skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + - nla_total_size(sizeof(__be32)), GFP_ATOMIC); + nla_total_size(sizeof(__be32)) + + nla_total_size(sizeof(struct ifa_cacheinfo)), + GFP_KERNEL); if (!skb) goto error; @@ -1484,7 +1486,7 @@ static void inet_ifmcaddr_notify(struct net_device *dev, goto error; } - rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MCADDR, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MCADDR, NULL, GFP_KERNEL); return; error: rtnl_set_sk_err(net, RTNLGRP_IPV4_MCADDR, err); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2e2684886953..4da409bc4577 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5239,7 +5239,6 @@ int inet6_fill_ifmcaddr(struct sk_buff *skb, nlmsg_end(skb, nlh); return 0; } -EXPORT_SYMBOL(inet6_fill_ifmcaddr); static int inet6_fill_ifacaddr(struct sk_buff *skb, const struct ifacaddr6 *ifaca, diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 587831c148de..9dfdb40988b0 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -920,7 +920,9 @@ static void inet6_ifmcaddr_notify(struct net_device *dev, int err = -ENOMEM; skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + - nla_total_size(16), GFP_ATOMIC); + nla_total_size(sizeof(struct in6_addr)) + + nla_total_size(sizeof(struct ifa_cacheinfo)), + GFP_KERNEL); if (!skb) goto error; @@ -931,7 +933,7 @@ static void inet6_ifmcaddr_notify(struct net_device *dev, goto error; } - rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MCADDR, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MCADDR, NULL, GFP_KERNEL); return; error: rtnl_set_sk_err(net, RTNLGRP_IPV6_MCADDR, err); From f3af3ba1083836d174ada619366783fa17272f66 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 19 Dec 2024 10:49:28 +0100 Subject: [PATCH 0608/1386] vsock/test: Use NSEC_PER_SEC Replace 1000000000ULL with NSEC_PER_SEC. No functional change intended. Reviewed-by: Luigi Leonardi Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20241219-test-vsock-leaks-v4-1-a416e554d9d7@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_test.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 48f17641ca50..38fd8d96eb83 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "vsock_test_zerocopy.h" #include "timeout.h" @@ -559,7 +560,7 @@ static time_t current_nsec(void) exit(EXIT_FAILURE); } - return (ts.tv_sec * 1000000000ULL) + ts.tv_nsec; + return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec; } #define RCVTIMEO_TIMEOUT_SEC 1 @@ -599,7 +600,7 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts) } read_overhead_ns = current_nsec() - read_enter_ns - - 1000000000ULL * RCVTIMEO_TIMEOUT_SEC; + NSEC_PER_SEC * RCVTIMEO_TIMEOUT_SEC; if (read_overhead_ns > READ_OVERHEAD_NSEC) { fprintf(stderr, From ef8bd18f475e969753b1b72588a4932195d420f3 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 19 Dec 2024 10:49:29 +0100 Subject: [PATCH 0609/1386] vsock/test: Introduce option to select tests Allow for selecting specific test IDs to be executed. Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20241219-test-vsock-leaks-v4-2-a416e554d9d7@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/util.c | 29 +++++++++++++++++++++++++++-- tools/testing/vsock/util.h | 2 ++ tools/testing/vsock/vsock_test.c | 11 +++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 34e9dac0a105..81b9a31059d8 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -486,8 +486,7 @@ void list_tests(const struct test_case *test_cases) exit(EXIT_FAILURE); } -void skip_test(struct test_case *test_cases, size_t test_cases_len, - const char *test_id_str) +static unsigned long parse_test_id(const char *test_id_str, size_t test_cases_len) { unsigned long test_id; char *endptr = NULL; @@ -505,9 +504,35 @@ void skip_test(struct test_case *test_cases, size_t test_cases_len, exit(EXIT_FAILURE); } + return test_id; +} + +void skip_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str) +{ + unsigned long test_id = parse_test_id(test_id_str, test_cases_len); test_cases[test_id].skip = true; } +void pick_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str) +{ + static bool skip_all = true; + unsigned long test_id; + + if (skip_all) { + unsigned long i; + + for (i = 0; i < test_cases_len; ++i) + test_cases[i].skip = true; + + skip_all = false; + } + + test_id = parse_test_id(test_id_str, test_cases_len); + test_cases[test_id].skip = false; +} + unsigned long hash_djb2(const void *data, size_t len) { unsigned long hash = 5381; diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index ba84d296d8b7..e62f46b2b92a 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -62,6 +62,8 @@ void run_tests(const struct test_case *test_cases, void list_tests(const struct test_case *test_cases); void skip_test(struct test_case *test_cases, size_t test_cases_len, const char *test_id_str); +void pick_test(struct test_case *test_cases, size_t test_cases_len, + const char *test_id_str); unsigned long hash_djb2(const void *data, size_t len); size_t iovec_bytes(const struct iovec *iov, size_t iovnum); unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum); diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 38fd8d96eb83..8bb2ab41c55f 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1644,6 +1644,11 @@ static const struct option longopts[] = { .has_arg = required_argument, .val = 's', }, + { + .name = "pick", + .has_arg = required_argument, + .val = 't', + }, { .name = "help", .has_arg = no_argument, @@ -1681,6 +1686,8 @@ static void usage(void) " --peer-cid CID of the other side\n" " --peer-port AF_VSOCK port used for the test [default: %d]\n" " --list List of tests that will be executed\n" + " --pick Test ID to execute selectively;\n" + " use multiple --pick options to select more tests\n" " --skip Test ID to skip;\n" " use multiple --skip options to skip more tests\n", DEFAULT_PEER_PORT @@ -1737,6 +1744,10 @@ int main(int argc, char **argv) skip_test(test_cases, ARRAY_SIZE(test_cases) - 1, optarg); break; + case 't': + pick_test(test_cases, ARRAY_SIZE(test_cases) - 1, + optarg); + break; case '?': default: usage(); From 50f9434463a0be5b972ee442ba6a9704c9afb02a Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 19 Dec 2024 10:49:30 +0100 Subject: [PATCH 0610/1386] vsock/test: Add README blurb about kmemleak usage Document the suggested use of kmemleak for memory leak detection. Suggested-by: Stefano Garzarella Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20241219-test-vsock-leaks-v4-3-a416e554d9d7@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/README | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README index 84ee217ba8ee..680ce666ceb5 100644 --- a/tools/testing/vsock/README +++ b/tools/testing/vsock/README @@ -36,6 +36,21 @@ Invoke test binaries in both directions as follows: --control-port=1234 \ --peer-cid=3 +Some tests are designed to produce kernel memory leaks. Leaks detection, +however, is deferred to Kernel Memory Leak Detector. It is recommended to enable +kmemleak (CONFIG_DEBUG_KMEMLEAK=y) and explicitly trigger a scan after each test +suite run, e.g. + + # echo clear > /sys/kernel/debug/kmemleak + # $TEST_BINARY ... + # echo "wait for any grace periods" && sleep 2 + # echo scan > /sys/kernel/debug/kmemleak + # echo "wait for kmemleak" && sleep 5 + # echo scan > /sys/kernel/debug/kmemleak + # cat /sys/kernel/debug/kmemleak + +For more information see Documentation/dev-tools/kmemleak.rst. + vsock_perf utility ------------------- 'vsock_perf' is a simple tool to measure vsock performance. It works in From f52e7f593b49344b9497c289cbb2ada213f60a7a Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 19 Dec 2024 10:49:31 +0100 Subject: [PATCH 0611/1386] vsock/test: Adapt send_byte()/recv_byte() to handle MSG_ZEROCOPY For a zerocopy send(), buffer (always byte 'A') needs to be preserved (thus it can not be on the stack) or the data recv()ed check in recv_byte() might fail. While there, change the printf format to 0x%02x so the '\0' bytes can be seen. Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20241219-test-vsock-leaks-v4-4-a416e554d9d7@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 81b9a31059d8..7058dc614c25 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -401,7 +401,7 @@ void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret) */ void send_byte(int fd, int expected_ret, int flags) { - const uint8_t byte = 'A'; + static const uint8_t byte = 'A'; send_buf(fd, &byte, sizeof(byte), flags, expected_ret); } @@ -420,7 +420,7 @@ void recv_byte(int fd, int expected_ret, int flags) recv_buf(fd, &byte, sizeof(byte), flags, expected_ret); if (byte != 'A') { - fprintf(stderr, "unexpected byte read %c\n", byte); + fprintf(stderr, "unexpected byte read 0x%02x\n", byte); exit(EXIT_FAILURE); } } From f66ef469a72d19764f943067307a570f83b00dca Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 19 Dec 2024 10:49:32 +0100 Subject: [PATCH 0612/1386] vsock/test: Add test for accept_queue memory leak Attempt to enqueue a child after the queue was flushed, but before SOCK_DONE flag has been set. Test tries to produce a memory leak, kmemleak should be employed. Dealing with a race condition, test by its very nature may lead to a false negative. Fixed by commit d7b0ff5a8667 ("virtio/vsock: Fix accept_queue memory leak"). Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20241219-test-vsock-leaks-v4-5-a416e554d9d7@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_test.c | 52 ++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 8bb2ab41c55f..2a8fcb062d9d 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -29,6 +29,10 @@ #include "control.h" #include "util.h" +/* Basic messages for control_writeulong(), control_readulong() */ +#define CONTROL_CONTINUE 1 +#define CONTROL_DONE 0 + static void test_stream_connection_reset(const struct test_opts *opts) { union { @@ -1474,6 +1478,49 @@ static void test_stream_cred_upd_on_set_rcvlowat(const struct test_opts *opts) test_stream_credit_update_test(opts, false); } +/* The goal of test leak_acceptq is to stress the race between connect() and + * close(listener). Implementation of client/server loops boils down to: + * + * client server + * ------ ------ + * write(CONTINUE) + * expect(CONTINUE) + * listen() + * write(LISTENING) + * expect(LISTENING) + * connect() close() + */ +#define ACCEPTQ_LEAK_RACE_TIMEOUT 2 /* seconds */ + +static void test_stream_leak_acceptq_client(const struct test_opts *opts) +{ + time_t tout; + int fd; + + tout = current_nsec() + ACCEPTQ_LEAK_RACE_TIMEOUT * NSEC_PER_SEC; + do { + control_writeulong(CONTROL_CONTINUE); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd >= 0) + close(fd); + } while (current_nsec() < tout); + + control_writeulong(CONTROL_DONE); +} + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_leak_acceptq_server(const struct test_opts *opts) +{ + int fd; + + while (control_readulong() == CONTROL_CONTINUE) { + fd = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port); + control_writeln("LISTENING"); + close(fd); + } +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1604,6 +1651,11 @@ static struct test_case test_cases[] = { .run_client = test_seqpacket_unsent_bytes_client, .run_server = test_seqpacket_unsent_bytes_server, }, + { + .name = "SOCK_STREAM leak accept queue", + .run_client = test_stream_leak_acceptq_client, + .run_server = test_stream_leak_acceptq_server, + }, {}, }; From ec50efee8cf814035d82f3b42dad916144d98b38 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 19 Dec 2024 10:49:33 +0100 Subject: [PATCH 0613/1386] vsock/test: Add test for sk_error_queue memory leak Ask for MSG_ZEROCOPY completion notification, but do not recv() it. Test attempts to create a memory leak, kmemleak should be employed. Fixed by commit fbf7085b3ad1 ("vsock: Fix sk_error_queue memory leak"). Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20241219-test-vsock-leaks-v4-6-a416e554d9d7@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_test.c | 45 ++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 2a8fcb062d9d..2dec6290b075 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1521,6 +1521,46 @@ static void test_stream_leak_acceptq_server(const struct test_opts *opts) } } +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_msgzcopy_leak_errq_client(const struct test_opts *opts) +{ + struct pollfd fds = { 0 }; + int fd; + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + send_byte(fd, 1, MSG_ZEROCOPY); + + fds.fd = fd; + fds.events = 0; + if (poll(&fds, 1, -1) < 0) { + perror("poll"); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_msgzcopy_leak_errq_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + recv_byte(fd, 1, 0); + vsock_wait_remote_close(fd); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1656,6 +1696,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_leak_acceptq_client, .run_server = test_stream_leak_acceptq_server, }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY leak MSG_ERRQUEUE", + .run_client = test_stream_msgzcopy_leak_errq_client, + .run_server = test_stream_msgzcopy_leak_errq_server, + }, {}, }; From d127ac8b1d4d3524d292b597100fef96dd909c9b Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Thu, 19 Dec 2024 10:49:34 +0100 Subject: [PATCH 0614/1386] vsock/test: Add test for MSG_ZEROCOPY completion memory leak Exercise the ENOMEM error path by attempting to hit net.core.optmem_max limit on send(). Test aims to create a memory leak, kmemleak should be employed. Fixed by commit 60cf6206a1f5 ("virtio/vsock: Improve MSG_ZEROCOPY error handling"). Reviewed-by: Stefano Garzarella Signed-off-by: Michal Luczaj Link: https://patch.msgid.link/20241219-test-vsock-leaks-v4-7-a416e554d9d7@rbox.co Signed-off-by: Jakub Kicinski --- tools/testing/vsock/vsock_test.c | 152 +++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 2dec6290b075..1eebbc0d5f61 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1561,6 +1561,153 @@ static void test_stream_msgzcopy_leak_errq_server(const struct test_opts *opts) close(fd); } +/* Test msgzcopy_leak_zcskb is meant to exercise sendmsg() error handling path, + * that might leak an skb. The idea is to fail virtio_transport_init_zcopy_skb() + * by hitting net.core.optmem_max limit in sock_omalloc(), specifically + * + * vsock_connectible_sendmsg + * virtio_transport_stream_enqueue + * virtio_transport_send_pkt_info + * virtio_transport_init_zcopy_skb + * . msg_zerocopy_realloc + * . msg_zerocopy_alloc + * . sock_omalloc + * . sk_omem_alloc + size > sysctl_optmem_max + * return -ENOMEM + * + * We abuse the implementation detail of net/socket.c:____sys_sendmsg(). + * sk_omem_alloc can be precisely bumped by sock_kmalloc(), as it is used to + * fetch user-provided control data. + * + * While this approach works for now, it relies on assumptions regarding the + * implementation and configuration (for example, order of net.core.optmem_max + * can not exceed MAX_PAGE_ORDER), which may not hold in the future. A more + * resilient testing could be implemented by leveraging the Fault injection + * framework (CONFIG_FAULT_INJECTION), e.g. + * + * client# echo N > /sys/kernel/debug/failslab/ignore-gfp-wait + * client# echo 0 > /sys/kernel/debug/failslab/verbose + * + * void client(const struct test_opts *opts) + * { + * char buf[16]; + * int f, s, i; + * + * f = open("/proc/self/fail-nth", O_WRONLY); + * + * for (i = 1; i < 32; i++) { + * control_writeulong(CONTROL_CONTINUE); + * + * s = vsock_stream_connect(opts->peer_cid, opts->peer_port); + * enable_so_zerocopy_check(s); + * + * sprintf(buf, "%d", i); + * write(f, buf, strlen(buf)); + * + * send(s, &(char){ 0 }, 1, MSG_ZEROCOPY); + * + * write(f, "0", 1); + * close(s); + * } + * + * control_writeulong(CONTROL_DONE); + * close(f); + * } + * + * void server(const struct test_opts *opts) + * { + * int fd; + * + * while (control_readulong() == CONTROL_CONTINUE) { + * fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + * vsock_wait_remote_close(fd); + * close(fd); + * } + * } + * + * Refer to Documentation/fault-injection/fault-injection.rst. + */ +#define MAX_PAGE_ORDER 10 /* usually */ +#define PAGE_SIZE 4096 + +/* Test for a memory leak. User is expected to run kmemleak scan, see README. */ +static void test_stream_msgzcopy_leak_zcskb_client(const struct test_opts *opts) +{ + size_t optmem_max, ctl_len, chunk_size; + struct msghdr msg = { 0 }; + struct iovec iov; + char *chunk; + int fd, res; + FILE *f; + + f = fopen("/proc/sys/net/core/optmem_max", "r"); + if (!f) { + perror("fopen(optmem_max)"); + exit(EXIT_FAILURE); + } + + if (fscanf(f, "%zu", &optmem_max) != 1) { + fprintf(stderr, "fscanf(optmem_max) failed\n"); + exit(EXIT_FAILURE); + } + + fclose(f); + + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + enable_so_zerocopy_check(fd); + + ctl_len = optmem_max - 1; + if (ctl_len > PAGE_SIZE << MAX_PAGE_ORDER) { + fprintf(stderr, "Try with net.core.optmem_max = 100000\n"); + exit(EXIT_FAILURE); + } + + chunk_size = CMSG_SPACE(ctl_len); + chunk = malloc(chunk_size); + if (!chunk) { + perror("malloc"); + exit(EXIT_FAILURE); + } + memset(chunk, 0, chunk_size); + + iov.iov_base = &(char){ 0 }; + iov.iov_len = 1; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = chunk; + msg.msg_controllen = ctl_len; + + errno = 0; + res = sendmsg(fd, &msg, MSG_ZEROCOPY); + if (res >= 0 || errno != ENOMEM) { + fprintf(stderr, "Expected ENOMEM, got errno=%d res=%d\n", + errno, res); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts) +{ + int fd; + + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); + if (fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + vsock_wait_remote_close(fd); + close(fd); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1701,6 +1848,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_msgzcopy_leak_errq_client, .run_server = test_stream_msgzcopy_leak_errq_server, }, + { + .name = "SOCK_STREAM MSG_ZEROCOPY leak completion skb", + .run_client = test_stream_msgzcopy_leak_zcskb_client, + .run_server = test_stream_msgzcopy_leak_zcskb_server, + }, {}, }; From d46ef4ee381f0f73b13714f319662f48f0c8b471 Mon Sep 17 00:00:00 2001 From: Divya Koppera Date: Thu, 19 Dec 2024 18:03:07 +0530 Subject: [PATCH 0615/1386] net: phy: microchip_rds_ptp: Add header file for Microchip rds ptp library This rds ptp header file will cover ptp macros for future phys in Microchip where addresses will be same but base offset and mmd address may changes. Reviewed-by: Andrew Lunn Reviewed-by: Vadim Fedorenko Signed-off-by: Divya Koppera Link: https://patch.msgid.link/20241219123311.30213-2-divya.koppera@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/microchip_rds_ptp.h | 223 ++++++++++++++++++++++++++++ 1 file changed, 223 insertions(+) create mode 100644 drivers/net/phy/microchip_rds_ptp.h diff --git a/drivers/net/phy/microchip_rds_ptp.h b/drivers/net/phy/microchip_rds_ptp.h new file mode 100644 index 000000000000..e95c065728b5 --- /dev/null +++ b/drivers/net/phy/microchip_rds_ptp.h @@ -0,0 +1,223 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (C) 2024 Microchip Technology + */ + +#ifndef _MICROCHIP_RDS_PTP_H +#define _MICROCHIP_RDS_PTP_H + +#include +#include +#include +#include +#include +#include + +#define MCHP_RDS_PTP_CMD_CTL 0x0 +#define MCHP_RDS_PTP_CMD_CTL_LTC_STEP_NSEC BIT(6) +#define MCHP_RDS_PTP_CMD_CTL_LTC_STEP_SEC BIT(5) +#define MCHP_RDS_PTP_CMD_CTL_CLOCK_LOAD BIT(4) +#define MCHP_RDS_PTP_CMD_CTL_CLOCK_READ BIT(3) +#define MCHP_RDS_PTP_CMD_CTL_EN BIT(1) +#define MCHP_RDS_PTP_CMD_CTL_DIS BIT(0) + +#define MCHP_RDS_PTP_REF_CLK_CFG 0x2 +#define MCHP_RDS_PTP_REF_CLK_SRC_250MHZ 0x0 +#define MCHP_RDS_PTP_REF_CLK_PERIOD_OVERRIDE BIT(9) +#define MCHP_RDS_PTP_REF_CLK_PERIOD 4 +#define MCHP_RDS_PTP_REF_CLK_CFG_SET (MCHP_RDS_PTP_REF_CLK_SRC_250MHZ |\ + MCHP_RDS_PTP_REF_CLK_PERIOD_OVERRIDE |\ + MCHP_RDS_PTP_REF_CLK_PERIOD) + +#define MCHP_RDS_PTP_LTC_SEC_HI 0x5 +#define MCHP_RDS_PTP_LTC_SEC_MID 0x6 +#define MCHP_RDS_PTP_LTC_SEC_LO 0x7 +#define MCHP_RDS_PTP_LTC_NS_HI 0x8 +#define MCHP_RDS_PTP_LTC_NS_LO 0x9 +#define MCHP_RDS_PTP_LTC_RATE_ADJ_HI 0xc +#define MCHP_RDS_PTP_LTC_RATE_ADJ_HI_DIR BIT(15) +#define MCHP_RDS_PTP_LTC_RATE_ADJ_LO 0xd +#define MCHP_RDS_PTP_STEP_ADJ_HI 0x12 +#define MCHP_RDS_PTP_STEP_ADJ_HI_DIR BIT(15) +#define MCHP_RDS_PTP_STEP_ADJ_LO 0x13 +#define MCHP_RDS_PTP_LTC_READ_SEC_HI 0x29 +#define MCHP_RDS_PTP_LTC_READ_SEC_MID 0x2a +#define MCHP_RDS_PTP_LTC_READ_SEC_LO 0x2b +#define MCHP_RDS_PTP_LTC_READ_NS_HI 0x2c +#define MCHP_RDS_PTP_LTC_READ_NS_LO 0x2d +#define MCHP_RDS_PTP_OP_MODE 0x41 +#define MCHP_RDS_PTP_OP_MODE_DIS 0 +#define MCHP_RDS_PTP_OP_MODE_STANDALONE 1 +#define MCHP_RDS_PTP_LATENCY_CORRECTION_CTL 0x44 +#define MCHP_RDS_PTP_PREDICTOR_EN BIT(6) +#define MCHP_RDS_PTP_TX_PRED_DIS BIT(1) +#define MCHP_RDS_PTP_RX_PRED_DIS BIT(0) +#define MCHP_RDS_PTP_LATENCY_SETTING (MCHP_RDS_PTP_PREDICTOR_EN | \ + MCHP_RDS_PTP_TX_PRED_DIS | \ + MCHP_RDS_PTP_RX_PRED_DIS) + +#define MCHP_RDS_PTP_INT_EN 0x0 +#define MCHP_RDS_PTP_INT_STS 0x01 +#define MCHP_RDS_PTP_INT_TX_TS_OVRFL_EN BIT(3) +#define MCHP_RDS_PTP_INT_TX_TS_EN BIT(2) +#define MCHP_RDS_PTP_INT_RX_TS_OVRFL_EN BIT(1) +#define MCHP_RDS_PTP_INT_RX_TS_EN BIT(0) +#define MCHP_RDS_PTP_INT_ALL_MSK (MCHP_RDS_PTP_INT_TX_TS_OVRFL_EN | \ + MCHP_RDS_PTP_INT_TX_TS_EN | \ + MCHP_RDS_PTP_INT_RX_TS_OVRFL_EN |\ + MCHP_RDS_PTP_INT_RX_TS_EN) + +#define MCHP_RDS_PTP_CAP_INFO 0x2e +#define MCHP_RDS_PTP_TX_TS_CNT(v) (((v) & GENMASK(11, 8)) >> 8) +#define MCHP_RDS_PTP_RX_TS_CNT(v) ((v) & GENMASK(3, 0)) + +#define MCHP_RDS_PTP_RX_PARSE_CONFIG 0x42 +#define MCHP_RDS_PTP_RX_PARSE_L2_ADDR_EN 0x44 +#define MCHP_RDS_PTP_RX_PARSE_IPV4_ADDR_EN 0x45 + +#define MCHP_RDS_PTP_RX_TIMESTAMP_CONFIG 0x4e +#define MCHP_RDS_PTP_RX_TIMESTAMP_CONFIG_PTP_FCS_DIS BIT(0) + +#define MCHP_RDS_PTP_RX_VERSION 0x48 +#define MCHP_RDS_PTP_RX_TIMESTAMP_EN 0x4d + +#define MCHP_RDS_PTP_RX_INGRESS_NS_HI 0x54 +#define MCHP_RDS_PTP_RX_INGRESS_NS_HI_TS_VALID BIT(15) + +#define MCHP_RDS_PTP_RX_INGRESS_NS_LO 0x55 +#define MCHP_RDS_PTP_RX_INGRESS_SEC_HI 0x56 +#define MCHP_RDS_PTP_RX_INGRESS_SEC_LO 0x57 +#define MCHP_RDS_PTP_RX_MSG_HDR2 0x59 + +#define MCHP_RDS_PTP_TX_PARSE_CONFIG 0x82 +#define MCHP_RDS_PTP_PARSE_CONFIG_LAYER2_EN BIT(0) +#define MCHP_RDS_PTP_PARSE_CONFIG_IPV4_EN BIT(1) +#define MCHP_RDS_PTP_PARSE_CONFIG_IPV6_EN BIT(2) + +#define MCHP_RDS_PTP_TX_PARSE_L2_ADDR_EN 0x84 +#define MCHP_RDS_PTP_TX_PARSE_IPV4_ADDR_EN 0x85 + +#define MCHP_RDS_PTP_TX_VERSION 0x88 +#define MCHP_RDS_PTP_MAX_VERSION(x) (((x) & GENMASK(7, 0)) << 8) +#define MCHP_RDS_PTP_MIN_VERSION(x) ((x) & GENMASK(7, 0)) + +#define MCHP_RDS_PTP_TX_TIMESTAMP_EN 0x8d +#define MCHP_RDS_PTP_TIMESTAMP_EN_SYNC BIT(0) +#define MCHP_RDS_PTP_TIMESTAMP_EN_DREQ BIT(1) +#define MCHP_RDS_PTP_TIMESTAMP_EN_PDREQ BIT(2) +#define MCHP_RDS_PTP_TIMESTAMP_EN_PDRES BIT(3) +#define MCHP_RDS_PTP_TIMESTAMP_EN_ALL (MCHP_RDS_PTP_TIMESTAMP_EN_SYNC |\ + MCHP_RDS_PTP_TIMESTAMP_EN_DREQ |\ + MCHP_RDS_PTP_TIMESTAMP_EN_PDREQ |\ + MCHP_RDS_PTP_TIMESTAMP_EN_PDRES) + +#define MCHP_RDS_PTP_TX_TIMESTAMP_CONFIG 0x8e +#define MCHP_RDS_PTP_TX_TIMESTAMP_CONFIG_PTP_FCS_DIS BIT(0) + +#define MCHP_RDS_PTP_TX_MOD 0x8f +#define MCHP_RDS_TX_MOD_PTP_SYNC_TS_INSERT BIT(12) + +#define MCHP_RDS_PTP_TX_EGRESS_NS_HI 0x94 +#define MCHP_RDS_PTP_TX_EGRESS_NS_HI_TS_VALID BIT(15) + +#define MCHP_RDS_PTP_TX_EGRESS_NS_LO 0x95 +#define MCHP_RDS_PTP_TX_EGRESS_SEC_HI 0x96 +#define MCHP_RDS_PTP_TX_EGRESS_SEC_LO 0x97 +#define MCHP_RDS_PTP_TX_MSG_HDR2 0x99 + +#define MCHP_RDS_PTP_TSU_GEN_CONFIG 0xc0 +#define MCHP_RDS_PTP_TSU_GEN_CFG_TSU_EN BIT(0) + +#define MCHP_RDS_PTP_TSU_HARD_RESET 0xc1 +#define MCHP_RDS_PTP_TSU_HARDRESET BIT(0) + +/* Represents 1ppm adjustment in 2^32 format with + * each nsec contains 4 clock cycles in 250MHz. + * The value is calculated as following: (1/1000000)/((2^-32)/4) + */ +#define MCHP_RDS_PTP_1PPM_FORMAT 17179 +#define MCHP_RDS_PTP_FIFO_SIZE 8 +#define MCHP_RDS_PTP_MAX_ADJ 31249999 + +#define BASE_CLK(p) ((p)->clk_base_addr) +#define BASE_PORT(p) ((p)->port_base_addr) +#define PTP_MMD(p) ((p)->mmd) + +enum mchp_rds_ptp_base { + MCHP_RDS_PTP_PORT, + MCHP_RDS_PTP_CLOCK +}; + +enum mchp_rds_ptp_fifo_dir { + MCHP_RDS_PTP_INGRESS_FIFO, + MCHP_RDS_PTP_EGRESS_FIFO +}; + +struct mchp_rds_ptp_clock { + struct mii_timestamper mii_ts; + struct phy_device *phydev; + struct ptp_clock *ptp_clock; + + struct sk_buff_head tx_queue; + struct sk_buff_head rx_queue; + struct list_head rx_ts_list; + + struct ptp_clock_info caps; + + /* Lock for Rx ts fifo */ + spinlock_t rx_ts_lock; + int hwts_tx_type; + + enum hwtstamp_rx_filters rx_filter; + int layer; + int version; + u16 port_base_addr; + u16 clk_base_addr; + + /* Lock for phc */ + struct mutex ptp_lock; + u8 mmd; +}; + +struct mchp_rds_ptp_rx_ts { + struct list_head list; + u32 seconds; + u32 nsec; + u16 seq_id; +}; + +#if IS_ENABLED(CONFIG_MICROCHIP_PHY_RDS_PTP) + +struct mchp_rds_ptp_clock *mchp_rds_ptp_probe(struct phy_device *phydev, u8 mmd, + u16 clk_base, u16 port_base); + +int mchp_rds_ptp_top_config_intr(struct mchp_rds_ptp_clock *clock, + u16 reg, u16 val, bool enable); + +irqreturn_t mchp_rds_ptp_handle_interrupt(struct mchp_rds_ptp_clock *clock); + +#else + +static inline struct mchp_rds_ptp_clock *mchp_rds_ptp_probe(struct phy_device + *phydev, u8 mmd, + u16 clk_base, + u16 port_base) +{ + return NULL; +} + +static inline int mchp_rds_ptp_top_config_intr(struct mchp_rds_ptp_clock *clock, + u16 reg, u16 val, bool enable) +{ + return 0; +} + +static inline irqreturn_t mchp_rds_ptp_handle_interrupt(struct + mchp_rds_ptp_clock + * clock) +{ + return IRQ_NONE; +} + +#endif //CONFIG_MICROCHIP_PHY_RDS_PTP + +#endif //_MICROCHIP_RDS_PTP_H From fa51199c5f34172fc7fd248ca9105e4e0ca6d80a Mon Sep 17 00:00:00 2001 From: Divya Koppera Date: Thu, 19 Dec 2024 18:03:08 +0530 Subject: [PATCH 0616/1386] net: phy: microchip_rds_ptp : Add rds ptp library for Microchip phys Add rds ptp library for Microchip phys 1-step and 2-step modes are supported, over Ethernet and UDP(ipv4, ipv6) Reviewed-by: Vadim Fedorenko Signed-off-by: Divya Koppera Link: https://patch.msgid.link/20241219123311.30213-3-divya.koppera@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/microchip_rds_ptp.c | 1039 +++++++++++++++++++++++++++ 1 file changed, 1039 insertions(+) create mode 100644 drivers/net/phy/microchip_rds_ptp.c diff --git a/drivers/net/phy/microchip_rds_ptp.c b/drivers/net/phy/microchip_rds_ptp.c new file mode 100644 index 000000000000..2936e46531cf --- /dev/null +++ b/drivers/net/phy/microchip_rds_ptp.c @@ -0,0 +1,1039 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2024 Microchip Technology + +#include "microchip_rds_ptp.h" + +static int mchp_rds_phy_read_mmd(struct mchp_rds_ptp_clock *clock, + u32 offset, enum mchp_rds_ptp_base base) +{ + struct phy_device *phydev = clock->phydev; + u32 addr; + + addr = (offset + ((base == MCHP_RDS_PTP_PORT) ? BASE_PORT(clock) : + BASE_CLK(clock))); + + return phy_read_mmd(phydev, PTP_MMD(clock), addr); +} + +static int mchp_rds_phy_write_mmd(struct mchp_rds_ptp_clock *clock, + u32 offset, enum mchp_rds_ptp_base base, + u16 val) +{ + struct phy_device *phydev = clock->phydev; + u32 addr; + + addr = (offset + ((base == MCHP_RDS_PTP_PORT) ? BASE_PORT(clock) : + BASE_CLK(clock))); + + return phy_write_mmd(phydev, PTP_MMD(clock), addr, val); +} + +static int mchp_rds_phy_modify_mmd(struct mchp_rds_ptp_clock *clock, + u32 offset, enum mchp_rds_ptp_base base, + u16 mask, u16 val) +{ + struct phy_device *phydev = clock->phydev; + u32 addr; + + addr = (offset + ((base == MCHP_RDS_PTP_PORT) ? BASE_PORT(clock) : + BASE_CLK(clock))); + + return phy_modify_mmd(phydev, PTP_MMD(clock), addr, mask, val); +} + +static int mchp_rds_phy_set_bits_mmd(struct mchp_rds_ptp_clock *clock, + u32 offset, enum mchp_rds_ptp_base base, + u16 val) +{ + struct phy_device *phydev = clock->phydev; + u32 addr; + + addr = (offset + ((base == MCHP_RDS_PTP_PORT) ? BASE_PORT(clock) : + BASE_CLK(clock))); + + return phy_set_bits_mmd(phydev, PTP_MMD(clock), addr, val); +} + +static int mchp_rds_ptp_flush_fifo(struct mchp_rds_ptp_clock *clock, + enum mchp_rds_ptp_fifo_dir dir) +{ + int rc; + + if (dir == MCHP_RDS_PTP_EGRESS_FIFO) + skb_queue_purge(&clock->tx_queue); + else + skb_queue_purge(&clock->rx_queue); + + for (int i = 0; i < MCHP_RDS_PTP_FIFO_SIZE; ++i) { + rc = mchp_rds_phy_read_mmd(clock, + dir == MCHP_RDS_PTP_EGRESS_FIFO ? + MCHP_RDS_PTP_TX_MSG_HDR2 : + MCHP_RDS_PTP_RX_MSG_HDR2, + MCHP_RDS_PTP_PORT); + if (rc < 0) + return rc; + } + return mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_INT_STS, + MCHP_RDS_PTP_PORT); +} + +static int mchp_rds_ptp_config_intr(struct mchp_rds_ptp_clock *clock, + bool enable) +{ + /* Enable or disable ptp interrupts */ + return mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_INT_EN, + MCHP_RDS_PTP_PORT, + enable ? MCHP_RDS_PTP_INT_ALL_MSK : 0); +} + +static void mchp_rds_ptp_txtstamp(struct mii_timestamper *mii_ts, + struct sk_buff *skb, int type) +{ + struct mchp_rds_ptp_clock *clock = container_of(mii_ts, + struct mchp_rds_ptp_clock, + mii_ts); + + switch (clock->hwts_tx_type) { + case HWTSTAMP_TX_ONESTEP_SYNC: + if (ptp_msg_is_sync(skb, type)) { + kfree_skb(skb); + return; + } + fallthrough; + case HWTSTAMP_TX_ON: + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + skb_queue_tail(&clock->tx_queue, skb); + break; + case HWTSTAMP_TX_OFF: + default: + kfree_skb(skb); + break; + } +} + +static bool mchp_rds_ptp_get_sig_rx(struct sk_buff *skb, u16 *sig) +{ + struct ptp_header *ptp_header; + int type; + + skb_push(skb, ETH_HLEN); + type = ptp_classify_raw(skb); + if (type == PTP_CLASS_NONE) + return false; + + ptp_header = ptp_parse_header(skb, type); + if (!ptp_header) + return false; + + skb_pull_inline(skb, ETH_HLEN); + + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); + + return true; +} + +static bool mchp_rds_ptp_match_skb(struct mchp_rds_ptp_clock *clock, + struct mchp_rds_ptp_rx_ts *rx_ts) +{ + struct skb_shared_hwtstamps *shhwtstamps; + struct sk_buff *skb, *skb_tmp; + unsigned long flags; + bool rc = false; + u16 skb_sig; + + spin_lock_irqsave(&clock->rx_queue.lock, flags); + skb_queue_walk_safe(&clock->rx_queue, skb, skb_tmp) { + if (!mchp_rds_ptp_get_sig_rx(skb, &skb_sig)) + continue; + + if (skb_sig != rx_ts->seq_id) + continue; + + __skb_unlink(skb, &clock->rx_queue); + + rc = true; + break; + } + spin_unlock_irqrestore(&clock->rx_queue.lock, flags); + + if (rc) { + shhwtstamps = skb_hwtstamps(skb); + shhwtstamps->hwtstamp = ktime_set(rx_ts->seconds, rx_ts->nsec); + netif_rx(skb); + } + + return rc; +} + +static void mchp_rds_ptp_match_rx_ts(struct mchp_rds_ptp_clock *clock, + struct mchp_rds_ptp_rx_ts *rx_ts) +{ + unsigned long flags; + + /* If we failed to match the skb add it to the queue for when + * the frame will come + */ + if (!mchp_rds_ptp_match_skb(clock, rx_ts)) { + spin_lock_irqsave(&clock->rx_ts_lock, flags); + list_add(&rx_ts->list, &clock->rx_ts_list); + spin_unlock_irqrestore(&clock->rx_ts_lock, flags); + } else { + kfree(rx_ts); + } +} + +static void mchp_rds_ptp_match_rx_skb(struct mchp_rds_ptp_clock *clock, + struct sk_buff *skb) +{ + struct mchp_rds_ptp_rx_ts *rx_ts, *tmp, *rx_ts_var = NULL; + struct skb_shared_hwtstamps *shhwtstamps; + unsigned long flags; + u16 skb_sig; + + if (!mchp_rds_ptp_get_sig_rx(skb, &skb_sig)) + return; + + /* Iterate over all RX timestamps and match it with the received skbs */ + spin_lock_irqsave(&clock->rx_ts_lock, flags); + list_for_each_entry_safe(rx_ts, tmp, &clock->rx_ts_list, list) { + /* Check if we found the signature we were looking for. */ + if (skb_sig != rx_ts->seq_id) + continue; + + shhwtstamps = skb_hwtstamps(skb); + shhwtstamps->hwtstamp = ktime_set(rx_ts->seconds, rx_ts->nsec); + netif_rx(skb); + + rx_ts_var = rx_ts; + + break; + } + spin_unlock_irqrestore(&clock->rx_ts_lock, flags); + + if (rx_ts_var) { + list_del(&rx_ts_var->list); + kfree(rx_ts_var); + } else { + skb_queue_tail(&clock->rx_queue, skb); + } +} + +static bool mchp_rds_ptp_rxtstamp(struct mii_timestamper *mii_ts, + struct sk_buff *skb, int type) +{ + struct mchp_rds_ptp_clock *clock = container_of(mii_ts, + struct mchp_rds_ptp_clock, + mii_ts); + + if (clock->rx_filter == HWTSTAMP_FILTER_NONE || + type == PTP_CLASS_NONE) + return false; + + if ((type & clock->version) == 0 || (type & clock->layer) == 0) + return false; + + /* Here if match occurs skb is sent to application, If not skb is added + * to queue and sending skb to application will get handled when + * interrupt occurs i.e., it get handles in interrupt handler. By + * any means skb will reach the application so we should not return + * false here if skb doesn't matches. + */ + mchp_rds_ptp_match_rx_skb(clock, skb); + + return true; +} + +static int mchp_rds_ptp_hwtstamp(struct mii_timestamper *mii_ts, + struct kernel_hwtstamp_config *config, + struct netlink_ext_ack *extack) +{ + struct mchp_rds_ptp_clock *clock = + container_of(mii_ts, struct mchp_rds_ptp_clock, + mii_ts); + struct mchp_rds_ptp_rx_ts *rx_ts, *tmp; + int txcfg = 0, rxcfg = 0; + unsigned long flags; + int rc; + + clock->hwts_tx_type = config->tx_type; + clock->rx_filter = config->rx_filter; + + switch (config->rx_filter) { + case HWTSTAMP_FILTER_NONE: + clock->layer = 0; + clock->version = 0; + break; + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + clock->layer = PTP_CLASS_L4; + clock->version = PTP_CLASS_V2; + break; + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + clock->layer = PTP_CLASS_L2; + clock->version = PTP_CLASS_V2; + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + clock->layer = PTP_CLASS_L4 | PTP_CLASS_L2; + clock->version = PTP_CLASS_V2; + break; + default: + return -ERANGE; + } + + /* Setup parsing of the frames and enable the timestamping for ptp + * frames + */ + if (clock->layer & PTP_CLASS_L2) { + rxcfg = MCHP_RDS_PTP_PARSE_CONFIG_LAYER2_EN; + txcfg = MCHP_RDS_PTP_PARSE_CONFIG_LAYER2_EN; + } + if (clock->layer & PTP_CLASS_L4) { + rxcfg |= MCHP_RDS_PTP_PARSE_CONFIG_IPV4_EN | + MCHP_RDS_PTP_PARSE_CONFIG_IPV6_EN; + txcfg |= MCHP_RDS_PTP_PARSE_CONFIG_IPV4_EN | + MCHP_RDS_PTP_PARSE_CONFIG_IPV6_EN; + } + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_RX_PARSE_CONFIG, + MCHP_RDS_PTP_PORT, rxcfg); + if (rc < 0) + return rc; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_TX_PARSE_CONFIG, + MCHP_RDS_PTP_PORT, txcfg); + if (rc < 0) + return rc; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_RX_TIMESTAMP_EN, + MCHP_RDS_PTP_PORT, + MCHP_RDS_PTP_TIMESTAMP_EN_ALL); + if (rc < 0) + return rc; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_TX_TIMESTAMP_EN, + MCHP_RDS_PTP_PORT, + MCHP_RDS_PTP_TIMESTAMP_EN_ALL); + if (rc < 0) + return rc; + + if (clock->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) + /* Enable / disable of the TX timestamp in the SYNC frames */ + rc = mchp_rds_phy_modify_mmd(clock, MCHP_RDS_PTP_TX_MOD, + MCHP_RDS_PTP_PORT, + MCHP_RDS_TX_MOD_PTP_SYNC_TS_INSERT, + MCHP_RDS_TX_MOD_PTP_SYNC_TS_INSERT); + else + rc = mchp_rds_phy_modify_mmd(clock, MCHP_RDS_PTP_TX_MOD, + MCHP_RDS_PTP_PORT, + MCHP_RDS_TX_MOD_PTP_SYNC_TS_INSERT, + (u16)~MCHP_RDS_TX_MOD_PTP_SYNC_TS_INSERT); + + if (rc < 0) + return rc; + + /* In case of multiple starts and stops, these needs to be cleared */ + spin_lock_irqsave(&clock->rx_ts_lock, flags); + list_for_each_entry_safe(rx_ts, tmp, &clock->rx_ts_list, list) { + list_del(&rx_ts->list); + kfree(rx_ts); + } + spin_unlock_irqrestore(&clock->rx_ts_lock, flags); + + rc = mchp_rds_ptp_flush_fifo(clock, MCHP_RDS_PTP_INGRESS_FIFO); + if (rc < 0) + return rc; + + rc = mchp_rds_ptp_flush_fifo(clock, MCHP_RDS_PTP_EGRESS_FIFO); + if (rc < 0) + return rc; + + /* Now enable the timestamping interrupts */ + rc = mchp_rds_ptp_config_intr(clock, + config->rx_filter != HWTSTAMP_FILTER_NONE); + + return rc < 0 ? rc : 0; +} + +static int mchp_rds_ptp_ts_info(struct mii_timestamper *mii_ts, + struct kernel_ethtool_ts_info *info) +{ + struct mchp_rds_ptp_clock *clock = container_of(mii_ts, + struct mchp_rds_ptp_clock, + mii_ts); + + info->phc_index = ptp_clock_index(clock->ptp_clock); + + info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) | + BIT(HWTSTAMP_TX_ONESTEP_SYNC); + + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_EVENT); + + return 0; +} + +static int mchp_rds_ptp_ltc_adjtime(struct ptp_clock_info *info, s64 delta) +{ + struct mchp_rds_ptp_clock *clock = container_of(info, + struct mchp_rds_ptp_clock, + caps); + struct timespec64 ts; + bool add = true; + int rc = 0; + u32 nsec; + s32 sec; + + /* The HW allows up to 15 sec to adjust the time, but here we limit to + * 10 sec the adjustment. The reason is, in case the adjustment is 14 + * sec and 999999999 nsec, then we add 8ns to compensate the actual + * increment so the value can be bigger than 15 sec. Therefore limit the + * possible adjustments so we will not have these corner cases + */ + if (delta > 10000000000LL || delta < -10000000000LL) { + /* The timeadjustment is too big, so fall back using set time */ + u64 now; + + info->gettime64(info, &ts); + + now = ktime_to_ns(timespec64_to_ktime(ts)); + ts = ns_to_timespec64(now + delta); + + info->settime64(info, &ts); + return 0; + } + sec = div_u64_rem(abs(delta), NSEC_PER_SEC, &nsec); + if (delta < 0 && nsec != 0) { + /* It is not allowed to adjust low the nsec part, therefore + * subtract more from second part and add to nanosecond such + * that would roll over, so the second part will increase + */ + sec--; + nsec = NSEC_PER_SEC - nsec; + } + + /* Calculate the adjustments and the direction */ + if (delta < 0) + add = false; + + if (nsec > 0) { + /* add 8 ns to cover the likely normal increment */ + nsec += 8; + + if (nsec >= NSEC_PER_SEC) { + /* carry into seconds */ + sec++; + nsec -= NSEC_PER_SEC; + } + } + + mutex_lock(&clock->ptp_lock); + if (sec) { + sec = abs(sec); + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_STEP_ADJ_LO, + MCHP_RDS_PTP_CLOCK, sec); + if (rc < 0) + goto out_unlock; + + rc = mchp_rds_phy_set_bits_mmd(clock, MCHP_RDS_PTP_STEP_ADJ_HI, + MCHP_RDS_PTP_CLOCK, + ((add ? + MCHP_RDS_PTP_STEP_ADJ_HI_DIR : + 0) | ((sec >> 16) & + GENMASK(13, 0)))); + if (rc < 0) + goto out_unlock; + + rc = mchp_rds_phy_set_bits_mmd(clock, MCHP_RDS_PTP_CMD_CTL, + MCHP_RDS_PTP_CLOCK, + MCHP_RDS_PTP_CMD_CTL_LTC_STEP_SEC); + if (rc < 0) + goto out_unlock; + } + + if (nsec) { + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_STEP_ADJ_LO, + MCHP_RDS_PTP_CLOCK, + nsec & GENMASK(15, 0)); + if (rc < 0) + goto out_unlock; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_STEP_ADJ_HI, + MCHP_RDS_PTP_CLOCK, + (nsec >> 16) & GENMASK(13, 0)); + if (rc < 0) + goto out_unlock; + + rc = mchp_rds_phy_set_bits_mmd(clock, MCHP_RDS_PTP_CMD_CTL, + MCHP_RDS_PTP_CLOCK, + MCHP_RDS_PTP_CMD_CTL_LTC_STEP_NSEC); + } + +out_unlock: + mutex_unlock(&clock->ptp_lock); + + return rc; +} + +static int mchp_rds_ptp_ltc_adjfine(struct ptp_clock_info *info, + long scaled_ppm) +{ + struct mchp_rds_ptp_clock *clock = container_of(info, + struct mchp_rds_ptp_clock, + caps); + u16 rate_lo, rate_hi; + bool faster = true; + u32 rate; + int rc; + + if (!scaled_ppm) + return 0; + + if (scaled_ppm < 0) { + scaled_ppm = -scaled_ppm; + faster = false; + } + + rate = MCHP_RDS_PTP_1PPM_FORMAT * (upper_16_bits(scaled_ppm)); + rate += (MCHP_RDS_PTP_1PPM_FORMAT * (lower_16_bits(scaled_ppm))) >> 16; + + rate_lo = rate & GENMASK(15, 0); + rate_hi = (rate >> 16) & GENMASK(13, 0); + + if (faster) + rate_hi |= MCHP_RDS_PTP_LTC_RATE_ADJ_HI_DIR; + + mutex_lock(&clock->ptp_lock); + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_LTC_RATE_ADJ_HI, + MCHP_RDS_PTP_CLOCK, rate_hi); + if (rc < 0) + goto error; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_LTC_RATE_ADJ_LO, + MCHP_RDS_PTP_CLOCK, rate_lo); + if (rc > 0) + rc = 0; +error: + mutex_unlock(&clock->ptp_lock); + + return rc; +} + +static int mchp_rds_ptp_ltc_gettime64(struct ptp_clock_info *info, + struct timespec64 *ts) +{ + struct mchp_rds_ptp_clock *clock = container_of(info, + struct mchp_rds_ptp_clock, + caps); + time64_t secs; + int rc = 0; + s64 nsecs; + + mutex_lock(&clock->ptp_lock); + /* Set read bit to 1 to save current values of 1588 local time counter + * into PTP LTC seconds and nanoseconds registers. + */ + rc = mchp_rds_phy_set_bits_mmd(clock, MCHP_RDS_PTP_CMD_CTL, + MCHP_RDS_PTP_CLOCK, + MCHP_RDS_PTP_CMD_CTL_CLOCK_READ); + if (rc < 0) + goto out_unlock; + + /* Get LTC clock values */ + rc = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_LTC_READ_SEC_HI, + MCHP_RDS_PTP_CLOCK); + if (rc < 0) + goto out_unlock; + secs = rc << 16; + + rc = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_LTC_READ_SEC_MID, + MCHP_RDS_PTP_CLOCK); + if (rc < 0) + goto out_unlock; + secs |= rc; + secs <<= 16; + + rc = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_LTC_READ_SEC_LO, + MCHP_RDS_PTP_CLOCK); + if (rc < 0) + goto out_unlock; + secs |= rc; + + rc = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_LTC_READ_NS_HI, + MCHP_RDS_PTP_CLOCK); + if (rc < 0) + goto out_unlock; + nsecs = (rc & GENMASK(13, 0)); + nsecs <<= 16; + + rc = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_LTC_READ_NS_LO, + MCHP_RDS_PTP_CLOCK); + if (rc < 0) + goto out_unlock; + nsecs |= rc; + + set_normalized_timespec64(ts, secs, nsecs); + + if (rc > 0) + rc = 0; +out_unlock: + mutex_unlock(&clock->ptp_lock); + + return rc; +} + +static int mchp_rds_ptp_ltc_settime64(struct ptp_clock_info *info, + const struct timespec64 *ts) +{ + struct mchp_rds_ptp_clock *clock = container_of(info, + struct mchp_rds_ptp_clock, + caps); + int rc; + + mutex_lock(&clock->ptp_lock); + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_LTC_SEC_LO, + MCHP_RDS_PTP_CLOCK, + lower_16_bits(ts->tv_sec)); + if (rc < 0) + goto out_unlock; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_LTC_SEC_MID, + MCHP_RDS_PTP_CLOCK, + upper_16_bits(ts->tv_sec)); + if (rc < 0) + goto out_unlock; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_LTC_SEC_HI, + MCHP_RDS_PTP_CLOCK, + upper_32_bits(ts->tv_sec) & GENMASK(15, 0)); + if (rc < 0) + goto out_unlock; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_LTC_NS_LO, + MCHP_RDS_PTP_CLOCK, + lower_16_bits(ts->tv_nsec)); + if (rc < 0) + goto out_unlock; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_LTC_NS_HI, + MCHP_RDS_PTP_CLOCK, + upper_16_bits(ts->tv_nsec) & GENMASK(13, 0)); + if (rc < 0) + goto out_unlock; + + /* Set load bit to 1 to write PTP LTC seconds and nanoseconds + * registers to 1588 local time counter. + */ + rc = mchp_rds_phy_set_bits_mmd(clock, MCHP_RDS_PTP_CMD_CTL, + MCHP_RDS_PTP_CLOCK, + MCHP_RDS_PTP_CMD_CTL_CLOCK_LOAD); + if (rc > 0) + rc = 0; +out_unlock: + mutex_unlock(&clock->ptp_lock); + + return rc; +} + +static bool mchp_rds_ptp_get_sig_tx(struct sk_buff *skb, u16 *sig) +{ + struct ptp_header *ptp_header; + int type; + + type = ptp_classify_raw(skb); + if (type == PTP_CLASS_NONE) + return false; + + ptp_header = ptp_parse_header(skb, type); + if (!ptp_header) + return false; + + *sig = (__force u16)(ntohs(ptp_header->sequence_id)); + + return true; +} + +static void mchp_rds_ptp_match_tx_skb(struct mchp_rds_ptp_clock *clock, + u32 seconds, u32 nsec, u16 seq_id) +{ + struct skb_shared_hwtstamps shhwtstamps; + struct sk_buff *skb, *skb_tmp; + unsigned long flags; + bool rc = false; + u16 skb_sig; + + spin_lock_irqsave(&clock->tx_queue.lock, flags); + skb_queue_walk_safe(&clock->tx_queue, skb, skb_tmp) { + if (!mchp_rds_ptp_get_sig_tx(skb, &skb_sig)) + continue; + + if (skb_sig != seq_id) + continue; + + __skb_unlink(skb, &clock->tx_queue); + rc = true; + break; + } + spin_unlock_irqrestore(&clock->tx_queue.lock, flags); + + if (rc) { + shhwtstamps.hwtstamp = ktime_set(seconds, nsec); + skb_complete_tx_timestamp(skb, &shhwtstamps); + } +} + +static struct mchp_rds_ptp_rx_ts + *mchp_rds_ptp_get_rx_ts(struct mchp_rds_ptp_clock *clock) +{ + struct phy_device *phydev = clock->phydev; + struct mchp_rds_ptp_rx_ts *rx_ts = NULL; + u32 sec, nsec; + int rc; + + rc = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_RX_INGRESS_NS_HI, + MCHP_RDS_PTP_PORT); + if (rc < 0) + goto error; + if (!(rc & MCHP_RDS_PTP_RX_INGRESS_NS_HI_TS_VALID)) { + phydev_err(phydev, "RX Timestamp is not valid!\n"); + goto error; + } + nsec = (rc & GENMASK(13, 0)) << 16; + + rc = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_RX_INGRESS_NS_LO, + MCHP_RDS_PTP_PORT); + if (rc < 0) + goto error; + nsec |= rc; + + rc = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_RX_INGRESS_SEC_HI, + MCHP_RDS_PTP_PORT); + if (rc < 0) + goto error; + sec = rc << 16; + + rc = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_RX_INGRESS_SEC_LO, + MCHP_RDS_PTP_PORT); + if (rc < 0) + goto error; + sec |= rc; + + rc = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_RX_MSG_HDR2, + MCHP_RDS_PTP_PORT); + if (rc < 0) + goto error; + + rx_ts = kmalloc(sizeof(*rx_ts), GFP_KERNEL); + if (!rx_ts) + return NULL; + + rx_ts->seconds = sec; + rx_ts->nsec = nsec; + rx_ts->seq_id = rc; + +error: + return rx_ts; +} + +static void mchp_rds_ptp_process_rx_ts(struct mchp_rds_ptp_clock *clock) +{ + int caps; + + do { + struct mchp_rds_ptp_rx_ts *rx_ts; + + rx_ts = mchp_rds_ptp_get_rx_ts(clock); + if (rx_ts) + mchp_rds_ptp_match_rx_ts(clock, rx_ts); + + caps = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_CAP_INFO, + MCHP_RDS_PTP_PORT); + if (caps < 0) + return; + } while (MCHP_RDS_PTP_RX_TS_CNT(caps) > 0); +} + +static bool mchp_rds_ptp_get_tx_ts(struct mchp_rds_ptp_clock *clock, + u32 *sec, u32 *nsec, u16 *seq) +{ + int rc; + + rc = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_TX_EGRESS_NS_HI, + MCHP_RDS_PTP_PORT); + if (rc < 0) + return false; + if (!(rc & MCHP_RDS_PTP_TX_EGRESS_NS_HI_TS_VALID)) + return false; + *nsec = (rc & GENMASK(13, 0)) << 16; + + rc = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_TX_EGRESS_NS_LO, + MCHP_RDS_PTP_PORT); + if (rc < 0) + return false; + *nsec = *nsec | rc; + + rc = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_TX_EGRESS_SEC_HI, + MCHP_RDS_PTP_PORT); + if (rc < 0) + return false; + *sec = rc << 16; + + rc = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_TX_EGRESS_SEC_LO, + MCHP_RDS_PTP_PORT); + if (rc < 0) + return false; + *sec = *sec | rc; + + rc = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_TX_MSG_HDR2, + MCHP_RDS_PTP_PORT); + if (rc < 0) + return false; + + *seq = rc; + + return true; +} + +static void mchp_rds_ptp_process_tx_ts(struct mchp_rds_ptp_clock *clock) +{ + int caps; + + do { + u32 sec, nsec; + u16 seq; + + if (mchp_rds_ptp_get_tx_ts(clock, &sec, &nsec, &seq)) + mchp_rds_ptp_match_tx_skb(clock, sec, nsec, seq); + + caps = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_CAP_INFO, + MCHP_RDS_PTP_PORT); + if (caps < 0) + return; + } while (MCHP_RDS_PTP_TX_TS_CNT(caps) > 0); +} + +int mchp_rds_ptp_top_config_intr(struct mchp_rds_ptp_clock *clock, + u16 reg, u16 val, bool clear) +{ + if (clear) + return phy_clear_bits_mmd(clock->phydev, PTP_MMD(clock), reg, + val); + else + return phy_set_bits_mmd(clock->phydev, PTP_MMD(clock), reg, + val); +} +EXPORT_SYMBOL_GPL(mchp_rds_ptp_top_config_intr); + +irqreturn_t mchp_rds_ptp_handle_interrupt(struct mchp_rds_ptp_clock *clock) +{ + int irq_sts; + + /* To handle rogue interrupt scenarios */ + if (!clock) + return IRQ_NONE; + + do { + irq_sts = mchp_rds_phy_read_mmd(clock, MCHP_RDS_PTP_INT_STS, + MCHP_RDS_PTP_PORT); + if (irq_sts < 0) + return IRQ_NONE; + + if (irq_sts & MCHP_RDS_PTP_INT_RX_TS_EN) + mchp_rds_ptp_process_rx_ts(clock); + + if (irq_sts & MCHP_RDS_PTP_INT_TX_TS_EN) + mchp_rds_ptp_process_tx_ts(clock); + + if (irq_sts & MCHP_RDS_PTP_INT_TX_TS_OVRFL_EN) + mchp_rds_ptp_flush_fifo(clock, + MCHP_RDS_PTP_EGRESS_FIFO); + + if (irq_sts & MCHP_RDS_PTP_INT_RX_TS_OVRFL_EN) + mchp_rds_ptp_flush_fifo(clock, + MCHP_RDS_PTP_INGRESS_FIFO); + } while (irq_sts & (MCHP_RDS_PTP_INT_RX_TS_EN | + MCHP_RDS_PTP_INT_TX_TS_EN | + MCHP_RDS_PTP_INT_TX_TS_OVRFL_EN | + MCHP_RDS_PTP_INT_RX_TS_OVRFL_EN)); + + return IRQ_HANDLED; +} +EXPORT_SYMBOL_GPL(mchp_rds_ptp_handle_interrupt); + +static int mchp_rds_ptp_init(struct mchp_rds_ptp_clock *clock) +{ + int rc; + + /* Disable PTP */ + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_CMD_CTL, + MCHP_RDS_PTP_CLOCK, + MCHP_RDS_PTP_CMD_CTL_DIS); + if (rc < 0) + return rc; + + /* Disable TSU */ + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_TSU_GEN_CONFIG, + MCHP_RDS_PTP_PORT, 0); + if (rc < 0) + return rc; + + /* Clear PTP interrupt status registers */ + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_TSU_HARD_RESET, + MCHP_RDS_PTP_PORT, + MCHP_RDS_PTP_TSU_HARDRESET); + if (rc < 0) + return rc; + + /* Predictor enable */ + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_LATENCY_CORRECTION_CTL, + MCHP_RDS_PTP_CLOCK, + MCHP_RDS_PTP_LATENCY_SETTING); + if (rc < 0) + return rc; + + /* Configure PTP operational mode */ + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_OP_MODE, + MCHP_RDS_PTP_CLOCK, + MCHP_RDS_PTP_OP_MODE_STANDALONE); + if (rc < 0) + return rc; + + /* Reference clock configuration */ + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_REF_CLK_CFG, + MCHP_RDS_PTP_CLOCK, + MCHP_RDS_PTP_REF_CLK_CFG_SET); + if (rc < 0) + return rc; + + /* Classifier configurations */ + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_RX_PARSE_CONFIG, + MCHP_RDS_PTP_PORT, 0); + if (rc < 0) + return rc; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_TX_PARSE_CONFIG, + MCHP_RDS_PTP_PORT, 0); + if (rc < 0) + return rc; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_TX_PARSE_L2_ADDR_EN, + MCHP_RDS_PTP_PORT, 0); + if (rc < 0) + return rc; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_RX_PARSE_L2_ADDR_EN, + MCHP_RDS_PTP_PORT, 0); + if (rc < 0) + return rc; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_RX_PARSE_IPV4_ADDR_EN, + MCHP_RDS_PTP_PORT, 0); + if (rc < 0) + return rc; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_TX_PARSE_IPV4_ADDR_EN, + MCHP_RDS_PTP_PORT, 0); + if (rc < 0) + return rc; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_RX_VERSION, + MCHP_RDS_PTP_PORT, + MCHP_RDS_PTP_MAX_VERSION(0xff) | + MCHP_RDS_PTP_MIN_VERSION(0x0)); + if (rc < 0) + return rc; + + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_TX_VERSION, + MCHP_RDS_PTP_PORT, + MCHP_RDS_PTP_MAX_VERSION(0xff) | + MCHP_RDS_PTP_MIN_VERSION(0x0)); + if (rc < 0) + return rc; + + /* Enable TSU */ + rc = mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_TSU_GEN_CONFIG, + MCHP_RDS_PTP_PORT, + MCHP_RDS_PTP_TSU_GEN_CFG_TSU_EN); + if (rc < 0) + return rc; + + /* Enable PTP */ + return mchp_rds_phy_write_mmd(clock, MCHP_RDS_PTP_CMD_CTL, + MCHP_RDS_PTP_CLOCK, + MCHP_RDS_PTP_CMD_CTL_EN); +} + +struct mchp_rds_ptp_clock *mchp_rds_ptp_probe(struct phy_device *phydev, u8 mmd, + u16 clk_base_addr, + u16 port_base_addr) +{ + struct mchp_rds_ptp_clock *clock; + int rc; + + clock = devm_kzalloc(&phydev->mdio.dev, sizeof(*clock), GFP_KERNEL); + if (!clock) + return ERR_PTR(-ENOMEM); + + clock->port_base_addr = port_base_addr; + clock->clk_base_addr = clk_base_addr; + clock->mmd = mmd; + + mutex_init(&clock->ptp_lock); + /* Register PTP clock */ + clock->caps.owner = THIS_MODULE; + snprintf(clock->caps.name, 30, "%s", phydev->drv->name); + clock->caps.max_adj = MCHP_RDS_PTP_MAX_ADJ; + clock->caps.n_ext_ts = 0; + clock->caps.pps = 0; + clock->caps.adjfine = mchp_rds_ptp_ltc_adjfine; + clock->caps.adjtime = mchp_rds_ptp_ltc_adjtime; + clock->caps.gettime64 = mchp_rds_ptp_ltc_gettime64; + clock->caps.settime64 = mchp_rds_ptp_ltc_settime64; + clock->ptp_clock = ptp_clock_register(&clock->caps, + &phydev->mdio.dev); + if (IS_ERR(clock->ptp_clock)) + return ERR_PTR(-EINVAL); + + /* Check if PHC support is missing at the configuration level */ + if (!clock->ptp_clock) + return NULL; + + /* Initialize the SW */ + skb_queue_head_init(&clock->tx_queue); + skb_queue_head_init(&clock->rx_queue); + INIT_LIST_HEAD(&clock->rx_ts_list); + spin_lock_init(&clock->rx_ts_lock); + + clock->mii_ts.rxtstamp = mchp_rds_ptp_rxtstamp; + clock->mii_ts.txtstamp = mchp_rds_ptp_txtstamp; + clock->mii_ts.hwtstamp = mchp_rds_ptp_hwtstamp; + clock->mii_ts.ts_info = mchp_rds_ptp_ts_info; + + phydev->mii_ts = &clock->mii_ts; + + /* Timestamp selected by default to keep legacy API */ + phydev->default_timestamp = true; + + clock->phydev = phydev; + + rc = mchp_rds_ptp_init(clock); + if (rc < 0) + return ERR_PTR(rc); + + return clock; +} +EXPORT_SYMBOL_GPL(mchp_rds_ptp_probe); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("MICROCHIP PHY RDS PTP driver"); +MODULE_AUTHOR("Divya Koppera"); From 2550afc61ef54274ac6f9355df9d33dad2910d3f Mon Sep 17 00:00:00 2001 From: Divya Koppera Date: Thu, 19 Dec 2024 18:03:09 +0530 Subject: [PATCH 0617/1386] net: phy: Kconfig: Add rds ptp library support and 1588 optional flag in Microchip phys Add ptp library support in Kconfig As some of Microchip T1 phys support ptp, add dependency of 1588 optional flag in Kconfig Reviewed-by: Andrew Lunn Reviewed-by: Vadim Fedorenko Signed-off-by: Divya Koppera Link: https://patch.msgid.link/20241219123311.30213-4-divya.koppera@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/Kconfig | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 15828f4710a9..dc625f2b3ae4 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -287,8 +287,15 @@ config MICROCHIP_PHY config MICROCHIP_T1_PHY tristate "Microchip T1 PHYs" + select MICROCHIP_PHY_RDS_PTP if NETWORK_PHY_TIMESTAMPING && \ + PTP_1588_CLOCK_OPTIONAL help - Supports the LAN87XX PHYs. + Supports the LAN8XXX PHYs. + +config MICROCHIP_PHY_RDS_PTP + tristate + help + Currently supports LAN887X T1 PHY config MICROSEMI_PHY tristate "Microsemi PHYs" From 85b39f7593e1383b235f1e9b3d943cc2e91b6b10 Mon Sep 17 00:00:00 2001 From: Divya Koppera Date: Thu, 19 Dec 2024 18:03:10 +0530 Subject: [PATCH 0618/1386] net: phy: Makefile: Add makefile support for rds ptp in Microchip phys Add makefile support for rds ptp library. Reviewed-by: Andrew Lunn Reviewed-by: Vadim Fedorenko Signed-off-by: Divya Koppera Link: https://patch.msgid.link/20241219123311.30213-5-divya.koppera@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index e6145153e837..39b72b464287 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -79,6 +79,7 @@ obj-$(CONFIG_MESON_GXL_PHY) += meson-gxl.o obj-$(CONFIG_MICREL_KS8995MA) += spi_ks8995.o obj-$(CONFIG_MICREL_PHY) += micrel.o obj-$(CONFIG_MICROCHIP_PHY) += microchip.o +obj-$(CONFIG_MICROCHIP_PHY_RDS_PTP) += microchip_rds_ptp.o obj-$(CONFIG_MICROCHIP_T1_PHY) += microchip_t1.o obj-$(CONFIG_MICROCHIP_T1S_PHY) += microchip_t1s.o obj-$(CONFIG_MICROSEMI_PHY) += mscc/ From 9fc3d6fe802923b026ecac16e59c0acdd6744d5d Mon Sep 17 00:00:00 2001 From: Divya Koppera Date: Thu, 19 Dec 2024 18:03:11 +0530 Subject: [PATCH 0619/1386] net: phy: microchip_t1 : Add initialization of ptp for lan887x Add initialization of ptp for lan887x. Reviewed-by: Andrew Lunn Signed-off-by: Divya Koppera Link: https://patch.msgid.link/20241219123311.30213-6-divya.koppera@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/microchip_t1.c | 41 +++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/drivers/net/phy/microchip_t1.c b/drivers/net/phy/microchip_t1.c index b17bf6708003..73f28463bc35 100644 --- a/drivers/net/phy/microchip_t1.c +++ b/drivers/net/phy/microchip_t1.c @@ -10,11 +10,15 @@ #include #include #include +#include "microchip_rds_ptp.h" #define PHY_ID_LAN87XX 0x0007c150 #define PHY_ID_LAN937X 0x0007c180 #define PHY_ID_LAN887X 0x0007c1f0 +#define MCHP_RDS_PTP_LTC_BASE_ADDR 0xe000 +#define MCHP_RDS_PTP_PORT_BASE_ADDR (MCHP_RDS_PTP_LTC_BASE_ADDR + 0x800) + /* External Register Control Register */ #define LAN87XX_EXT_REG_CTL (0x14) #define LAN87XX_EXT_REG_CTL_RD_CTL (0x1000) @@ -229,6 +233,7 @@ #define LAN887X_INT_STS 0xf000 #define LAN887X_INT_MSK 0xf001 +#define LAN887X_INT_MSK_P1588_MOD_INT_MSK BIT(3) #define LAN887X_INT_MSK_T1_PHY_INT_MSK BIT(2) #define LAN887X_INT_MSK_LINK_UP_MSK BIT(1) #define LAN887X_INT_MSK_LINK_DOWN_MSK BIT(0) @@ -319,6 +324,8 @@ struct lan887x_regwr_map { struct lan887x_priv { u64 stats[ARRAY_SIZE(lan887x_hw_stats)]; + struct mchp_rds_ptp_clock *clock; + bool init_done; }; static int lan937x_dsp_workaround(struct phy_device *phydev, u16 ereg, u8 bank) @@ -1269,8 +1276,19 @@ static int lan887x_get_features(struct phy_device *phydev) static int lan887x_phy_init(struct phy_device *phydev) { + struct lan887x_priv *priv = phydev->priv; int ret; + if (!priv->init_done && phy_interrupt_is_valid(phydev)) { + priv->clock = mchp_rds_ptp_probe(phydev, MDIO_MMD_VEND1, + MCHP_RDS_PTP_LTC_BASE_ADDR, + MCHP_RDS_PTP_PORT_BASE_ADDR); + if (IS_ERR(priv->clock)) + return PTR_ERR(priv->clock); + + priv->init_done = true; + } + /* Clear loopback */ ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND1, LAN887X_MIS_CFG_REG2, @@ -1470,6 +1488,7 @@ static int lan887x_probe(struct phy_device *phydev) if (!priv) return -ENOMEM; + priv->init_done = false; phydev->priv = priv; return lan887x_phy_setup(phydev); @@ -1518,6 +1537,7 @@ static void lan887x_get_strings(struct phy_device *phydev, u8 *data) static int lan887x_config_intr(struct phy_device *phydev) { + struct lan887x_priv *priv = phydev->priv; int rc; if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { @@ -1537,12 +1557,24 @@ static int lan887x_config_intr(struct phy_device *phydev) rc = phy_read_mmd(phydev, MDIO_MMD_VEND1, LAN887X_INT_STS); } + if (rc < 0) + return rc; - return rc < 0 ? rc : 0; + if (phy_is_default_hwtstamp(phydev)) { + return mchp_rds_ptp_top_config_intr(priv->clock, + LAN887X_INT_MSK, + LAN887X_INT_MSK_P1588_MOD_INT_MSK, + (phydev->interrupts == + PHY_INTERRUPT_ENABLED)); + } + + return 0; } static irqreturn_t lan887x_handle_interrupt(struct phy_device *phydev) { + struct lan887x_priv *priv = phydev->priv; + int rc = IRQ_NONE; int irq_status; irq_status = phy_read_mmd(phydev, MDIO_MMD_VEND1, LAN887X_INT_STS); @@ -1553,10 +1585,13 @@ static irqreturn_t lan887x_handle_interrupt(struct phy_device *phydev) if (irq_status & LAN887X_MX_CHIP_TOP_LINK_MSK) { phy_trigger_machine(phydev); - return IRQ_HANDLED; + rc = IRQ_HANDLED; } - return IRQ_NONE; + if (irq_status & LAN887X_INT_MSK_P1588_MOD_INT_MSK) + rc = mchp_rds_ptp_handle_interrupt(priv->clock); + + return rc; } static int lan887x_cd_reset(struct phy_device *phydev, From ddbb5ddc43ad000a984149db5af1133433938404 Mon Sep 17 00:00:00 2001 From: Rongwei Liu Date: Thu, 19 Dec 2024 19:58:31 +0200 Subject: [PATCH 0620/1386] net/mlx5: LAG, Refactor lag logic Wrap the lag pf access into two new macros: 1. ldev_for_each() 2. ldev_for_each_reverse() The maximum number of lag ports and the index to `natvie_port_num` mapping will be handled by the two new macros. Users shouldn't use the for loop anymore. Signed-off-by: Rongwei Liu Reviewed-by: Saeed Mahameed Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241219175841.1094544-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/lag/debugfs.c | 13 +- .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 181 +++++++++--------- .../net/ethernet/mellanox/mlx5/core/lag/lag.h | 14 +- .../net/ethernet/mellanox/mlx5/core/lag/mp.c | 24 ++- .../ethernet/mellanox/mlx5/core/lag/mpesw.c | 10 +- .../mellanox/mlx5/core/lag/port_sel.c | 16 +- 6 files changed, 137 insertions(+), 121 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c index f4b777d4e108..62b6faa4276a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c @@ -105,20 +105,20 @@ static int mapping_show(struct seq_file *file, void *priv) struct mlx5_lag *ldev; bool hash = false; bool lag_active; + int i, idx = 0; int num_ports; - int i; ldev = mlx5_lag_dev(dev); mutex_lock(&ldev->lock); lag_active = __mlx5_lag_is_active(ldev); if (lag_active) { if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { - mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, ports, + mlx5_infer_tx_enabled(&ldev->tracker, ldev, ports, &num_ports); hash = true; } else { - for (i = 0; i < ldev->ports; i++) - ports[i] = ldev->v2p_map[i]; + mlx5_ldev_for_each(i, 0, ldev) + ports[idx++] = ldev->v2p_map[i]; num_ports = ldev->ports; } } @@ -144,11 +144,8 @@ static int members_show(struct seq_file *file, void *priv) ldev = mlx5_lag_dev(dev); mutex_lock(&ldev->lock); - for (i = 0; i < ldev->ports; i++) { - if (!ldev->pf[i].dev) - continue; + mlx5_ldev_for_each(i, 0, ldev) seq_printf(file, "%s\n", dev_name(ldev->pf[i].dev->device)); - } mutex_unlock(&ldev->lock); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index 7f68468c2e75..ed539ac4fef1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -43,10 +43,6 @@ #include "mp.h" #include "mpesw.h" -enum { - MLX5_LAG_EGRESS_PORT_1 = 1, - MLX5_LAG_EGRESS_PORT_2, -}; /* General purpose, use for short periods of time. * Beware of lock dependencies (preferably, no locks should be acquired @@ -72,7 +68,7 @@ static u8 lag_active_port_bits(struct mlx5_lag *ldev) int num_enabled; int idx; - mlx5_infer_tx_enabled(&ldev->tracker, ldev->ports, enabled_ports, + mlx5_infer_tx_enabled(&ldev->tracker, ldev, enabled_ports, &num_enabled); for (idx = 0; idx < num_enabled; idx++) active_port |= BIT_MASK(enabled_ports[idx]); @@ -113,7 +109,7 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, return mlx5_cmd_exec_in(dev, create_lag, in); } -static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 num_ports, +static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev, u8 *ports) { u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; @@ -148,33 +144,31 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); -static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, u8 num_ports, +static void mlx5_infer_tx_disabled(struct lag_tracker *tracker, struct mlx5_lag *ldev, u8 *ports, int *num_disabled) { int i; *num_disabled = 0; - for (i = 0; i < num_ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) if (!tracker->netdev_state[i].tx_enabled || !tracker->netdev_state[i].link_up) ports[(*num_disabled)++] = i; - } } -void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, +void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev, u8 *ports, int *num_enabled) { int i; *num_enabled = 0; - for (i = 0; i < num_ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) if (tracker->netdev_state[i].tx_enabled && tracker->netdev_state[i].link_up) ports[(*num_enabled)++] = i; - } if (*num_enabled == 0) - mlx5_infer_tx_disabled(tracker, num_ports, ports, num_enabled); + mlx5_infer_tx_disabled(tracker, ldev, ports, num_enabled); } static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, @@ -192,7 +186,7 @@ static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, int j; if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { - mlx5_infer_tx_enabled(tracker, ldev->ports, enabled_ports, + mlx5_infer_tx_enabled(tracker, ldev, enabled_ports, &num_enabled); for (i = 0; i < num_enabled; i++) { err = scnprintf(buf + written, 4, "%d, ", enabled_ports[i] + 1); @@ -203,7 +197,7 @@ static void mlx5_lag_print_mapping(struct mlx5_core_dev *dev, buf[written - 2] = 0; mlx5_core_info(dev, "lag map active ports: %s\n", buf); } else { - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < ldev->buckets; j++) { idx = i * ldev->buckets + j; err = scnprintf(buf + written, 10, @@ -286,7 +280,7 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, { int i; - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) if (ldev->pf[i].netdev == ndev) return i; @@ -310,7 +304,7 @@ static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) * with mapping that points to active ports. */ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, - u8 num_ports, + struct mlx5_lag *ldev, u8 buckets, u8 *ports) { @@ -323,7 +317,7 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, int i; int j; - for (i = 0; i < num_ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { if (tracker->netdev_state[i].tx_enabled && tracker->netdev_state[i].link_up) enabled[enabled_ports_num++] = i; @@ -334,15 +328,16 @@ static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, /* Use native mapping by default where each port's buckets * point the native port: 1 1 1 .. 1 2 2 2 ... 2 3 3 3 ... 3 etc */ - for (i = 0; i < num_ports; i++) + mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < buckets; j++) { idx = i * buckets + j; - ports[idx] = MLX5_LAG_EGRESS_PORT_1 + i; + ports[idx] = i + 1; } + } /* If all ports are disabled/enabled keep native mapping */ - if (enabled_ports_num == num_ports || - disabled_ports_num == num_ports) + if (enabled_ports_num == ldev->ports || + disabled_ports_num == ldev->ports) return; /* Go over the disabled ports and for each assign a random active port */ @@ -358,7 +353,7 @@ static bool mlx5_lag_has_drop_rule(struct mlx5_lag *ldev) { int i; - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) if (ldev->pf[i].has_drop) return true; return false; @@ -368,7 +363,7 @@ static void mlx5_lag_drop_rule_cleanup(struct mlx5_lag *ldev) { int i; - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { if (!ldev->pf[i].has_drop) continue; @@ -396,7 +391,7 @@ static void mlx5_lag_drop_rule_setup(struct mlx5_lag *ldev, if (!ldev->tracker.has_inactive) return; - mlx5_infer_tx_disabled(tracker, ldev->ports, disabled_ports, &num_disabled); + mlx5_infer_tx_disabled(tracker, ldev, disabled_ports, &num_disabled); for (i = 0; i < num_disabled; i++) { disabled_index = disabled_ports[i]; @@ -442,7 +437,7 @@ static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) return mlx5_cmd_modify_active_port(dev0, active_ports); } - return mlx5_cmd_modify_lag(dev0, ldev->ports, ports); + return mlx5_cmd_modify_lag(dev0, ldev, ports); } static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev *dev) @@ -458,7 +453,7 @@ static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev if (!ldev) goto unlock; - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) if (ldev->tracker.netdev_state[i].tx_enabled) ndev = ldev->pf[i].netdev; if (!ndev) @@ -483,9 +478,9 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, int i; int j; - mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ports); + mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports); - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < ldev->buckets; j++) { idx = i * ldev->buckets + j; if (ports[idx] == ldev->v2p_map[idx]) @@ -596,9 +591,9 @@ static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_eswitch *master_esw = dev0->priv.eswitch; int err; - int i; + int i, j; - for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 1, ldev) { struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch; err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw, @@ -608,9 +603,9 @@ static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) } return 0; err: - for (; i > MLX5_LAG_P1; i--) + mlx5_ldev_for_each_reverse(j, i, 1, ldev) mlx5_eswitch_offloads_single_fdb_del_one(master_esw, - ldev->pf[i].dev->priv.eswitch); + ldev->pf[j].dev->priv.eswitch); return err; } @@ -671,7 +666,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, return err; if (mode != MLX5_LAG_MODE_MPESW) { - mlx5_infer_tx_affinity_mapping(tracker, ldev->ports, ldev->buckets, ldev->v2p_map); + mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ldev->v2p_map); if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) { err = mlx5_lag_port_sel_create(ldev, tracker->hash_type, ldev->v2p_map); @@ -722,7 +717,7 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev) mlx5_lag_mp_reset(ldev); if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { - for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 1, ldev) mlx5_eswitch_offloads_single_fdb_del_one(master_esw, ldev->pf[i].dev->priv.eswitch); clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); @@ -766,7 +761,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) return false; #ifdef CONFIG_MLX5_ESWITCH - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { dev = ldev->pf[i].dev; if (mlx5_eswitch_num_vfs(dev->priv.eswitch) && !is_mdev_switchdev_mode(dev)) return false; @@ -774,17 +769,17 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) dev = ldev->pf[MLX5_LAG_P1].dev; mode = mlx5_eswitch_mode(dev); - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) return false; #else - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) return false; #endif roce_support = mlx5_get_roce_state(ldev->pf[MLX5_LAG_P1].dev); - for (i = 1; i < ldev->ports; i++) + mlx5_ldev_for_each(i, MLX5_LAG_P2, ldev) if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support) return false; @@ -795,10 +790,7 @@ void mlx5_lag_add_devices(struct mlx5_lag *ldev) { int i; - for (i = 0; i < ldev->ports; i++) { - if (!ldev->pf[i].dev) - continue; - + mlx5_ldev_for_each(i, 0, ldev) { if (ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) continue; @@ -812,10 +804,7 @@ void mlx5_lag_remove_devices(struct mlx5_lag *ldev) { int i; - for (i = 0; i < ldev->ports; i++) { - if (!ldev->pf[i].dev) - continue; - + mlx5_ldev_for_each(i, 0, ldev) { if (ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) continue; @@ -842,7 +831,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); } - for (i = 1; i < ldev->ports; i++) + mlx5_ldev_for_each(i, MLX5_LAG_P2, ldev) mlx5_nic_vport_disable_roce(ldev->pf[i].dev); } @@ -854,7 +843,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) mlx5_lag_add_devices(ldev); if (shared_fdb) - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) if (!(ldev->pf[i].dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); } @@ -864,7 +853,7 @@ static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) struct mlx5_core_dev *dev; int i; - for (i = MLX5_LAG_P1 + 1; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, MLX5_LAG_P1 + 1, ldev) { dev = ldev->pf[i].dev; if (is_mdev_switchdev_mode(dev) && mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && @@ -892,11 +881,11 @@ static bool mlx5_lag_is_roce_lag(struct mlx5_lag *ldev) bool roce_lag = true; int i; - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) roce_lag = roce_lag && !mlx5_sriov_is_enabled(ldev->pf[i].dev); #ifdef CONFIG_MLX5_ESWITCH - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) roce_lag = roce_lag && is_mdev_legacy_mode(ldev->pf[i].dev); #endif @@ -956,7 +945,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) } else if (roce_lag) { dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); - for (i = 1; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, MLX5_LAG_P2, ldev) { if (mlx5_get_roce_state(ldev->pf[i].dev)) mlx5_nic_vport_enable_roce(ldev->pf[i].dev); } @@ -966,7 +955,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); if (err) break; @@ -977,7 +966,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) mlx5_rescan_drivers_locked(dev0); mlx5_deactivate_lag(ldev); mlx5_lag_add_devices(ldev); - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); mlx5_core_err(dev0, "Failed to enable lag\n"); return; @@ -1010,12 +999,9 @@ struct mlx5_devcom_comp_dev *mlx5_lag_get_devcom_comp(struct mlx5_lag *ldev) int i; mutex_lock(&ldev->lock); - for (i = 0; i < ldev->ports; i++) { - if (ldev->pf[i].dev) { - devcom = ldev->pf[i].dev->priv.hca_devcom_comp; - break; - } - } + i = mlx5_get_next_ldev_func(ldev, 0); + if (i < MLX5_MAX_PORTS) + devcom = ldev->pf[i].dev->priv.hca_devcom_comp; mutex_unlock(&ldev->lock); return devcom; } @@ -1068,7 +1054,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, u8 bond_status = 0; int num_slaves = 0; int changed = 0; - int idx; + int i, idx = -1; if (!netif_is_lag_master(upper)) return 0; @@ -1083,8 +1069,13 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, */ rcu_read_lock(); for_each_netdev_in_bond_rcu(upper, ndev_tmp) { - idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); - if (idx >= 0) { + mlx5_ldev_for_each(i, 0, ldev) { + if (ldev->pf[i].netdev == ndev_tmp) { + idx++; + break; + } + } + if (i < MLX5_MAX_PORTS) { slave = bond_slave_get_rcu(ndev_tmp); if (slave) has_inactive |= bond_is_slave_inactive(slave); @@ -1234,15 +1225,12 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, } static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, - struct mlx5_core_dev *dev, - struct net_device *netdev) + struct mlx5_core_dev *dev, + struct net_device *netdev) { unsigned int fn = mlx5_get_dev_index(dev); unsigned long flags; - if (fn >= ldev->ports) - return; - spin_lock_irqsave(&lag_lock, flags); ldev->pf[fn].netdev = netdev; ldev->tracker.netdev_state[fn].link_up = 0; @@ -1257,7 +1245,7 @@ static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, int i; spin_lock_irqsave(&lag_lock, flags); - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { if (ldev->pf[i].netdev == netdev) { ldev->pf[i].netdev = NULL; break; @@ -1267,13 +1255,10 @@ static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, } static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, - struct mlx5_core_dev *dev) + struct mlx5_core_dev *dev) { unsigned int fn = mlx5_get_dev_index(dev); - if (fn >= ldev->ports) - return; - ldev->pf[fn].dev = dev; dev->priv.lag = ldev; } @@ -1281,16 +1266,13 @@ static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, struct mlx5_core_dev *dev) { - int i; + int fn; - for (i = 0; i < ldev->ports; i++) - if (ldev->pf[i].dev == dev) - break; - - if (i == ldev->ports) + fn = mlx5_get_dev_index(dev); + if (ldev->pf[fn].dev != dev) return; - ldev->pf[i].dev = NULL; + ldev->pf[fn].dev = NULL; dev->priv.lag = NULL; } @@ -1406,7 +1388,6 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, mutex_lock(&ldev->lock); mlx5_ldev_add_netdev(ldev, dev, netdev); - for (i = 0; i < ldev->ports; i++) if (!ldev->pf[i].netdev) break; @@ -1417,6 +1398,26 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, mlx5_queue_bond_work(ldev, 0); } +int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx) +{ + int i; + + for (i = start_idx; i >= end_idx; i--) + if (ldev->pf[i].dev) + return i; + return -1; +} + +int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx) +{ + int i; + + for (i = start_idx; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + return i; + return MLX5_MAX_PORTS; +} + bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; @@ -1467,7 +1468,7 @@ bool mlx5_lag_is_master(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; unsigned long flags; - bool res; + bool res = false; spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); @@ -1555,7 +1556,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, if (!(ldev && __mlx5_lag_is_roce(ldev))) goto unlock; - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { if (ldev->pf[i].netdev == slave) { port = i; break; @@ -1594,13 +1595,13 @@ struct mlx5_core_dev *mlx5_lag_get_next_peer_mdev(struct mlx5_core_dev *dev, int if (!ldev) goto unlock; - if (*i == ldev->ports) + if (*i == MLX5_MAX_PORTS) goto unlock; - for (idx = *i; idx < ldev->ports; idx++) + mlx5_ldev_for_each(idx, *i, ldev) if (ldev->pf[idx].dev != dev) break; - if (idx == ldev->ports) { + if (idx == MLX5_MAX_PORTS) { *i = idx; goto unlock; } @@ -1621,10 +1622,10 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, { int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); struct mlx5_core_dev **mdev; + int ret = 0, i, j, idx = 0; struct mlx5_lag *ldev; unsigned long flags; int num_ports; - int ret, i, j; void *out; out = kvzalloc(outlen, GFP_KERNEL); @@ -1643,8 +1644,8 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, ldev = mlx5_lag_dev(dev); if (ldev && __mlx5_lag_is_active(ldev)) { num_ports = ldev->ports; - for (i = 0; i < ldev->ports; i++) - mdev[i] = ldev->pf[i].dev; + mlx5_ldev_for_each(i, 0, ldev) + mdev[idx++] = ldev->pf[i].dev; } else { num_ports = 1; mdev[MLX5_LAG_P1] = dev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index 50fcb1eee574..1dada791815e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -103,7 +103,7 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, struct net_device *ndev); char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags); -void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, +void mlx5_infer_tx_enabled(struct lag_tracker *tracker, struct mlx5_lag *ldev, u8 *ports, int *num_enabled); void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev); @@ -119,9 +119,21 @@ static inline bool mlx5_lag_is_supported(struct mlx5_core_dev *dev) if (!MLX5_CAP_GEN(dev, vport_group_manager) || !MLX5_CAP_GEN(dev, lag_master) || MLX5_CAP_GEN(dev, num_lag_ports) < 2 || + mlx5_get_dev_index(dev) >= MLX5_MAX_PORTS || MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS) return false; return true; } +#define mlx5_ldev_for_each(i, start_index, ldev) \ + for (int tmp = start_index; tmp = mlx5_get_next_ldev_func(ldev, tmp), \ + i = tmp, tmp < MLX5_MAX_PORTS; tmp++) + +#define mlx5_ldev_for_each_reverse(i, start_index, end_index, ldev) \ + for (int tmp = start_index, tmp1 = end_index; \ + tmp = mlx5_get_pre_ldev_func(ldev, tmp, tmp1), \ + i = tmp, tmp >= tmp1; tmp--) + +int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx); +int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx); #endif /* __MLX5_LAG_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index b1aa494c76ba..40406d04adc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -153,6 +153,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, struct net_device *nh_dev0, *nh_dev1; struct fib_info *fi = fen_info->fi; struct lag_mp *mp = &ldev->lag_mp; + int i, dev_idx = 0; /* Handle delete event */ if (event == FIB_EVENT_ENTRY_DEL) { @@ -186,10 +187,12 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, if (!nh_dev1) { if (__mlx5_lag_is_active(ldev)) { - int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev0); - - i++; - mlx5_lag_set_port_affinity(ldev, i); + mlx5_ldev_for_each(i, 0, ldev) { + dev_idx++; + if (ldev->pf[i].netdev == nh_dev0) + break; + } + mlx5_lag_set_port_affinity(ldev, dev_idx); mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); } @@ -214,6 +217,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, struct fib_info *fi) { struct lag_mp *mp = &ldev->lag_mp; + int i, dev_idx = 0; /* Check the nh event is related to the route */ if (!mp->fib.mfi || mp->fib.mfi != fi) @@ -221,11 +225,15 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, /* nh added/removed */ if (event == FIB_EVENT_NH_DEL) { - int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev); + mlx5_ldev_for_each(i, 0, ldev) { + if (ldev->pf[i].netdev == fib_nh->fib_nh_dev) + break; + dev_idx++; + } - if (i >= 0) { - i = (i + 1) % 2 + 1; /* peer port */ - mlx5_lag_set_port_affinity(ldev, i); + if (dev_idx >= 0) { + dev_idx = (dev_idx + 1) % 2 + 1; /* peer port */ + mlx5_lag_set_port_affinity(ldev, dev_idx); } } else if (event == FIB_EVENT_NH_ADD && fib_info_num_path(fi) == 2) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 571ea26edd0c..1123c8afcf9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -15,7 +15,7 @@ static void mlx5_mpesw_metadata_cleanup(struct mlx5_lag *ldev) u32 pf_metadata; int i; - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { dev = ldev->pf[i].dev; esw = dev->priv.eswitch; pf_metadata = ldev->lag_mpesw.pf_metadata[i]; @@ -36,7 +36,7 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev) u32 pf_metadata; int i, err; - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { dev = ldev->pf[i].dev; esw = dev->priv.eswitch; pf_metadata = mlx5_esw_match_metadata_alloc(esw); @@ -52,7 +52,7 @@ static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev) goto err_metadata; } - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { dev = ldev->pf[i].dev; mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_MULTIPORT_ESW, (void *)0); @@ -98,7 +98,7 @@ static int enable_mpesw(struct mlx5_lag *ldev) dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { err = mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); if (err) goto err_rescan_drivers; @@ -112,7 +112,7 @@ err_rescan_drivers: mlx5_deactivate_lag(ldev); err_add_devices: mlx5_lag_add_devices(ldev); - for (i = 0; i < ldev->ports; i++) + mlx5_ldev_for_each(i, 0, ldev) mlx5_eswitch_reload_ib_reps(ldev->pf[i].dev->priv.eswitch); mlx5_mpesw_metadata_cleanup(ldev); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index ab2717012b79..f98f0735fce0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -44,9 +44,7 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_namespace *ns; - int err, i; - int idx; - int j; + int err, i, j, k, idx; ft_attr.max_fte = ldev->ports * ldev->buckets; ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER; @@ -74,7 +72,7 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; flow_act.flags |= FLOW_ACT_NO_APPEND; - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < ldev->buckets; j++) { u8 affinity; @@ -88,13 +86,13 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, &dest, 1); if (IS_ERR(lag_definer->rules[idx])) { err = PTR_ERR(lag_definer->rules[idx]); - do { + mlx5_ldev_for_each_reverse(k, i, 0, ldev) { while (j--) { - idx = i * ldev->buckets + j; + idx = k * ldev->buckets + j; mlx5_del_flow_rules(lag_definer->rules[idx]); } j = ldev->buckets; - } while (i--); + }; goto destroy_fg; } } @@ -346,7 +344,7 @@ static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev, int i; int j; - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < ldev->buckets; j++) { idx = i * ldev->buckets + j; mlx5_del_flow_rules(lag_definer->rules[idx]); @@ -565,7 +563,7 @@ static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev, dest.type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; - for (i = 0; i < ldev->ports; i++) { + mlx5_ldev_for_each(i, 0, ldev) { for (j = 0; j < ldev->buckets; j++) { idx = i * ldev->buckets + j; if (ldev->v2p_map[idx] == ports[idx]) From 60d01cc468fdb0fbd6b878d66ef86f7e946b0669 Mon Sep 17 00:00:00 2001 From: Rongwei Liu Date: Thu, 19 Dec 2024 19:58:32 +0200 Subject: [PATCH 0621/1386] net/mlx5: LAG, Support LAG over Multi-Host NICs New multi-host NICs provide each host with partial ports, allowing each host to maintain its unique LAG configuration. On these multi-host NICs, the 'native_port_num' capability is no longer continuous on each host and can exceed the 'num_lag_ports' capability. Therefore, it is necessary to skip the PFs with ldev->pf[i].dev == NULL when querying/modifying the lag devices' information. There is no need to check dev.native_port_num against ldev->ports. Signed-off-by: Rongwei Liu Reviewed-by: Saeed Mahameed Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241219175841.1094544-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/lag/lag.c | 200 ++++++++++++++---- .../net/ethernet/mellanox/mlx5/core/lag/lag.h | 3 + .../net/ethernet/mellanox/mlx5/core/lag/mp.c | 53 +++-- .../ethernet/mellanox/mlx5/core/lag/mpesw.c | 6 +- .../mellanox/mlx5/core/lag/port_sel.c | 41 +++- .../net/ethernet/mellanox/mlx5/core/main.c | 4 + 6 files changed, 229 insertions(+), 78 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index ed539ac4fef1..cea5aa314f6c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -76,23 +76,30 @@ static u8 lag_active_port_bits(struct mlx5_lag *ldev) return active_port; } -static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode, - unsigned long flags) +static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev, + int mode, unsigned long flags) { bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, &flags); int port_sel_mode = get_port_sel_mode(mode, flags); u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; + u8 *ports = ldev->v2p_map; + int idx0, idx1; void *lag_ctx; lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode); + idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0); + idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1); + + if (idx0 < 0 || idx1 < 0) + return -EINVAL; switch (port_sel_mode) { case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]); break; case MLX5_LAG_PORT_SELECT_MODE_PORT_SELECT_FT: if (!MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table_bypass)) @@ -114,12 +121,18 @@ static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, struct mlx5_lag *ldev, { u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); + int idx0, idx1; + + idx0 = mlx5_lag_get_dev_index_by_seq(ldev, 0); + idx1 = mlx5_lag_get_dev_index_by_seq(ldev, 1); + if (idx0 < 0 || idx1 < 0) + return -EINVAL; MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); MLX5_SET(modify_lag_in, in, field_select, 0x1); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[idx0]); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[idx1]); return mlx5_cmd_exec_in(dev, modify_lag, in); } @@ -287,6 +300,48 @@ int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, return -ENOENT; } +int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq) +{ + int i, num = 0; + + if (!ldev) + return -ENOENT; + + mlx5_ldev_for_each(i, 0, ldev) { + if (num == seq) + return i; + num++; + } + return -ENOENT; +} + +int mlx5_lag_num_devs(struct mlx5_lag *ldev) +{ + int i, num = 0; + + if (!ldev) + return 0; + + mlx5_ldev_for_each(i, 0, ldev) { + (void)i; + num++; + } + return num; +} + +int mlx5_lag_num_netdevs(struct mlx5_lag *ldev) +{ + int i, num = 0; + + if (!ldev) + return 0; + + mlx5_ldev_for_each(i, 0, ldev) + if (ldev->pf[i].netdev) + num++; + return num; +} + static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) { return ldev->mode == MLX5_LAG_MODE_ROCE; @@ -423,10 +478,15 @@ static int mlx5_cmd_modify_active_port(struct mlx5_core_dev *dev, u8 ports) static int _mlx5_modify_lag(struct mlx5_lag *ldev, u8 *ports) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + struct mlx5_core_dev *dev0; u8 active_ports; int ret; + if (idx < 0) + return -EINVAL; + + dev0 = ldev->pf[idx].dev; if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &ldev->mode_flags)) { ret = mlx5_lag_port_sel_modify(ldev, ports); if (ret || @@ -445,7 +505,7 @@ static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev struct net_device *ndev = NULL; struct mlx5_lag *ldev; unsigned long flags; - int i; + int i, last_idx; spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); @@ -456,8 +516,12 @@ static struct net_device *mlx5_lag_active_backup_get_netdev(struct mlx5_core_dev mlx5_ldev_for_each(i, 0, ldev) if (ldev->tracker.netdev_state[i].tx_enabled) ndev = ldev->pf[i].netdev; - if (!ndev) - ndev = ldev->pf[ldev->ports - 1].netdev; + if (!ndev) { + last_idx = mlx5_lag_get_dev_index_by_seq(ldev, ldev->ports - 1); + if (last_idx < 0) + goto unlock; + ndev = ldev->pf[last_idx].netdev; + } if (ndev) dev_hold(ndev); @@ -471,13 +535,18 @@ unlock: void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker) { + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); u8 ports[MLX5_MAX_PORTS * MLX5_LAG_MAX_HASH_BUCKETS] = {}; - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev0; int idx; int err; int i; int j; + if (first_idx < 0) + return; + + dev0 = ldev->pf[first_idx].dev; mlx5_infer_tx_affinity_mapping(tracker, ldev, ldev->buckets, ports); mlx5_ldev_for_each(i, 0, ldev) { @@ -518,8 +587,13 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, static int mlx5_lag_set_port_sel_mode_roce(struct mlx5_lag *ldev, unsigned long *flags) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + struct mlx5_core_dev *dev0; + if (first_idx < 0) + return -EINVAL; + + dev0 = ldev->pf[first_idx].dev; if (!MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table)) { if (ldev->ports > 2) return -EINVAL; @@ -539,11 +613,13 @@ static void mlx5_lag_set_port_sel_mode_offloads(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, unsigned long *flags) { - struct lag_func *dev0 = &ldev->pf[MLX5_LAG_P1]; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + struct lag_func *dev0; - if (mode == MLX5_LAG_MODE_MPESW) + if (first_idx < 0 || mode == MLX5_LAG_MODE_MPESW) return; + dev0 = &ldev->pf[first_idx]; if (MLX5_CAP_PORT_SELECTION(dev0->dev, port_select_flow_table) && tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH) { if (ldev->ports > 2) @@ -588,12 +664,18 @@ char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags) static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_eswitch *master_esw = dev0->priv.eswitch; - int err; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + struct mlx5_eswitch *master_esw; + struct mlx5_core_dev *dev0; int i, j; + int err; - mlx5_ldev_for_each(i, 1, ldev) { + if (first_idx < 0) + return -EINVAL; + + dev0 = ldev->pf[first_idx].dev; + master_esw = dev0->priv.eswitch; + mlx5_ldev_for_each(i, first_idx + 1, ldev) { struct mlx5_eswitch *slave_esw = ldev->pf[i].dev->priv.eswitch; err = mlx5_eswitch_offloads_single_fdb_add_one(master_esw, @@ -603,7 +685,7 @@ static int mlx5_lag_create_single_fdb(struct mlx5_lag *ldev) } return 0; err: - mlx5_ldev_for_each_reverse(j, i, 1, ldev) + mlx5_ldev_for_each_reverse(j, i, first_idx + 1, ldev) mlx5_eswitch_offloads_single_fdb_del_one(master_esw, ldev->pf[j].dev->priv.eswitch); return err; @@ -615,16 +697,21 @@ static int mlx5_create_lag(struct mlx5_lag *ldev, unsigned long flags) { bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; + struct mlx5_core_dev *dev0; int err; + if (first_idx < 0) + return -EINVAL; + + dev0 = ldev->pf[first_idx].dev; if (tracker) mlx5_lag_print_mapping(dev0, ldev, tracker, flags); mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n", shared_fdb, mlx5_get_str_port_sel_mode(mode, flags)); - err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags); + err = mlx5_cmd_create_lag(dev0, ldev, mode, flags); if (err) { mlx5_core_err(dev0, "Failed to create LAG (%d)\n", @@ -656,11 +743,16 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, enum mlx5_lag_mode mode, bool shared_fdb) { + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); bool roce_lag = mode == MLX5_LAG_MODE_ROCE; - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev0; unsigned long flags = 0; int err; + if (first_idx < 0) + return -EINVAL; + + dev0 = ldev->pf[first_idx].dev; err = mlx5_lag_set_flags(ldev, mode, tracker, shared_fdb, &flags); if (err) return err; @@ -704,20 +796,26 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, int mlx5_deactivate_lag(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_eswitch *master_esw = dev0->priv.eswitch; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; bool roce_lag = __mlx5_lag_is_roce(ldev); unsigned long flags = ldev->mode_flags; + struct mlx5_eswitch *master_esw; + struct mlx5_core_dev *dev0; int err; int i; + if (first_idx < 0) + return -EINVAL; + + dev0 = ldev->pf[first_idx].dev; + master_esw = dev0->priv.eswitch; ldev->mode = MLX5_LAG_MODE_NONE; ldev->mode_flags = 0; mlx5_lag_mp_reset(ldev); if (test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags)) { - mlx5_ldev_for_each(i, 1, ldev) + mlx5_ldev_for_each(i, first_idx + 1, ldev) mlx5_eswitch_offloads_single_fdb_del_one(master_esw, ldev->pf[i].dev->priv.eswitch); clear_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags); @@ -749,6 +847,7 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev) bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) { + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); #ifdef CONFIG_MLX5_ESWITCH struct mlx5_core_dev *dev; u8 mode; @@ -756,9 +855,8 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) bool roce_support; int i; - for (i = 0; i < ldev->ports; i++) - if (!ldev->pf[i].dev) - return false; + if (first_idx < 0 || mlx5_lag_num_devs(ldev) != ldev->ports) + return false; #ifdef CONFIG_MLX5_ESWITCH mlx5_ldev_for_each(i, 0, ldev) { @@ -767,7 +865,7 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) return false; } - dev = ldev->pf[MLX5_LAG_P1].dev; + dev = ldev->pf[first_idx].dev; mode = mlx5_eswitch_mode(dev); mlx5_ldev_for_each(i, 0, ldev) if (mlx5_eswitch_mode(ldev->pf[i].dev) != mode) @@ -778,8 +876,8 @@ bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) if (mlx5_sriov_is_enabled(ldev->pf[i].dev)) return false; #endif - roce_support = mlx5_get_roce_state(ldev->pf[MLX5_LAG_P1].dev); - mlx5_ldev_for_each(i, MLX5_LAG_P2, ldev) + roce_support = mlx5_get_roce_state(ldev->pf[first_idx].dev); + mlx5_ldev_for_each(i, first_idx + 1, ldev) if (mlx5_get_roce_state(ldev->pf[i].dev) != roce_support) return false; @@ -817,11 +915,16 @@ void mlx5_lag_remove_devices(struct mlx5_lag *ldev) void mlx5_disable_lag(struct mlx5_lag *ldev) { bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + struct mlx5_core_dev *dev0; bool roce_lag; int err; int i; + if (idx < 0) + return; + + dev0 = ldev->pf[idx].dev; roce_lag = __mlx5_lag_is_roce(ldev); if (shared_fdb) { @@ -831,7 +934,7 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); } - mlx5_ldev_for_each(i, MLX5_LAG_P2, ldev) + mlx5_ldev_for_each(i, idx + 1, ldev) mlx5_nic_vport_disable_roce(ldev->pf[i].dev); } @@ -850,10 +953,14 @@ void mlx5_disable_lag(struct mlx5_lag *ldev) static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) { + int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_core_dev *dev; int i; - mlx5_ldev_for_each(i, MLX5_LAG_P1 + 1, ldev) { + if (idx < 0) + return false; + + mlx5_ldev_for_each(i, idx + 1, ldev) { dev = ldev->pf[i].dev; if (is_mdev_switchdev_mode(dev) && mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && @@ -865,7 +972,7 @@ static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) return false; } - dev = ldev->pf[MLX5_LAG_P1].dev; + dev = ldev->pf[idx].dev; if (is_mdev_switchdev_mode(dev) && mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch) && mlx5_esw_offloads_devcom_is_ready(dev->priv.eswitch) && @@ -906,13 +1013,18 @@ static bool mlx5_lag_should_disable_lag(struct mlx5_lag *ldev, bool do_bond) static void mlx5_do_bond(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct lag_tracker tracker = { }; + struct mlx5_core_dev *dev0; struct net_device *ndev; bool do_bond, roce_lag; int err; int i; + if (idx < 0) + return; + + dev0 = ldev->pf[idx].dev; if (!mlx5_lag_is_ready(ldev)) { do_bond = false; } else { @@ -945,7 +1057,7 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) } else if (roce_lag) { dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); - mlx5_ldev_for_each(i, MLX5_LAG_P2, ldev) { + mlx5_ldev_for_each(i, idx + 1, ldev) { if (mlx5_get_roce_state(ldev->pf[i].dev)) mlx5_nic_vport_enable_roce(ldev->pf[i].dev); } @@ -1380,7 +1492,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev) { struct mlx5_lag *ldev; - int i; + int num = 0; ldev = mlx5_lag_dev(dev); if (!ldev) @@ -1388,11 +1500,8 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, mutex_lock(&ldev->lock); mlx5_ldev_add_netdev(ldev, dev, netdev); - for (i = 0; i < ldev->ports; i++) - if (!ldev->pf[i].netdev) - break; - - if (i >= ldev->ports) + num = mlx5_lag_num_netdevs(ldev); + if (num >= ldev->ports) set_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); mutex_unlock(&ldev->lock); mlx5_queue_bond_work(ldev, 0); @@ -1469,11 +1578,12 @@ bool mlx5_lag_is_master(struct mlx5_core_dev *dev) struct mlx5_lag *ldev; unsigned long flags; bool res = false; + int idx; spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); - res = ldev && __mlx5_lag_is_active(ldev) && - dev == ldev->pf[MLX5_LAG_P1].dev; + idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + res = ldev && __mlx5_lag_is_active(ldev) && idx >= 0 && dev == ldev->pf[idx].dev; spin_unlock_irqrestore(&lag_lock, flags); return res; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index 1dada791815e..01cf72366947 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -136,4 +136,7 @@ static inline bool mlx5_lag_is_supported(struct mlx5_core_dev *dev) int mlx5_get_pre_ldev_func(struct mlx5_lag *ldev, int start_idx, int end_idx); int mlx5_get_next_ldev_func(struct mlx5_lag *ldev, int start_idx); +int mlx5_lag_get_dev_index_by_seq(struct mlx5_lag *ldev, int seq); +int mlx5_lag_num_devs(struct mlx5_lag *ldev); +int mlx5_lag_num_netdevs(struct mlx5_lag *ldev); #endif /* __MLX5_LAG_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index 40406d04adc9..aee17fcf3b36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -17,7 +17,10 @@ static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) #define MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS 2 static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) { - if (!mlx5_lag_is_ready(ldev)) + int idx0 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + int idx1 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P2); + + if (idx0 < 0 || idx1 < 0 || !mlx5_lag_is_ready(ldev)) return false; if (__mlx5_lag_is_active(ldev) && !__mlx5_lag_is_multipath(ldev)) @@ -26,8 +29,8 @@ static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) if (ldev->ports > MLX5_LAG_MULTIPATH_OFFLOADS_SUPPORTED_PORTS) return false; - return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, - ldev->pf[MLX5_LAG_P2].dev); + return mlx5_esw_multipath_prereq(ldev->pf[idx0].dev, + ldev->pf[idx1].dev); } bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) @@ -50,43 +53,45 @@ bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, enum mlx5_lag_port_affinity port) { + int idx0 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + int idx1 = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P2); struct lag_tracker tracker = {}; - if (!__mlx5_lag_is_multipath(ldev)) + if (idx0 < 0 || idx1 < 0 || !__mlx5_lag_is_multipath(ldev)) return; switch (port) { case MLX5_LAG_NORMAL_AFFINITY: - tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; - tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; - tracker.netdev_state[MLX5_LAG_P1].link_up = true; - tracker.netdev_state[MLX5_LAG_P2].link_up = true; + tracker.netdev_state[idx0].tx_enabled = true; + tracker.netdev_state[idx1].tx_enabled = true; + tracker.netdev_state[idx0].link_up = true; + tracker.netdev_state[idx1].link_up = true; break; case MLX5_LAG_P1_AFFINITY: - tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; - tracker.netdev_state[MLX5_LAG_P1].link_up = true; - tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false; - tracker.netdev_state[MLX5_LAG_P2].link_up = false; + tracker.netdev_state[idx0].tx_enabled = true; + tracker.netdev_state[idx0].link_up = true; + tracker.netdev_state[idx1].tx_enabled = false; + tracker.netdev_state[idx1].link_up = false; break; case MLX5_LAG_P2_AFFINITY: - tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false; - tracker.netdev_state[MLX5_LAG_P1].link_up = false; - tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; - tracker.netdev_state[MLX5_LAG_P2].link_up = true; + tracker.netdev_state[idx0].tx_enabled = false; + tracker.netdev_state[idx0].link_up = false; + tracker.netdev_state[idx1].tx_enabled = true; + tracker.netdev_state[idx1].link_up = true; break; default: - mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, + mlx5_core_warn(ldev->pf[idx0].dev, "Invalid affinity port %d", port); return; } - if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled) - mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events, + if (tracker.netdev_state[idx0].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[idx0].dev->priv.events, MLX5_DEV_EVENT_PORT_AFFINITY, (void *)0); - if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled) - mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events, + if (tracker.netdev_state[idx1].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[idx1].dev->priv.events, MLX5_DEV_EVENT_PORT_AFFINITY, (void *)0); @@ -150,11 +155,15 @@ mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev, static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, struct fib_entry_notifier_info *fen_info) { + int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct net_device *nh_dev0, *nh_dev1; struct fib_info *fi = fen_info->fi; struct lag_mp *mp = &ldev->lag_mp; int i, dev_idx = 0; + if (idx < 0) + return; + /* Handle delete event */ if (event == FIB_EVENT_ENTRY_DEL) { /* stop track */ @@ -180,7 +189,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, } if (nh_dev0 == nh_dev1) { - mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, + mlx5_core_warn(ldev->pf[idx].dev, "Multipath offload doesn't support routes with multiple nexthops of the same device"); return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index 1123c8afcf9e..ffac0bd6c895 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -68,13 +68,15 @@ err_metadata: #define MLX5_LAG_MPESW_OFFLOADS_SUPPORTED_PORTS 4 static int enable_mpesw(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + int idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + struct mlx5_core_dev *dev0; int err; int i; - if (ldev->mode != MLX5_LAG_MODE_NONE) + if (idx < 0 || ldev->mode != MLX5_LAG_MODE_NONE) return -EINVAL; + dev0 = ldev->pf[idx].dev; if (ldev->ports > MLX5_LAG_MPESW_OFFLOADS_SUPPORTED_PORTS) return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index f98f0735fce0..22241f52716c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -39,13 +39,18 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, struct mlx5_lag_definer *lag_definer, u8 *ports) { - struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_namespace *ns; + struct mlx5_core_dev *dev; int err, i, j, k, idx; + if (first_idx < 0) + return -EINVAL; + + dev = ldev->pf[first_idx].dev; ft_attr.max_fte = ldev->ports * ldev->buckets; ft_attr.level = MLX5_LAG_FT_LEVEL_DEFINER; @@ -293,11 +298,16 @@ static struct mlx5_lag_definer * mlx5_lag_create_definer(struct mlx5_lag *ldev, enum netdev_lag_hash hash, enum mlx5_traffic_types tt, bool tunnel, u8 *ports) { - struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_lag_definer *lag_definer; + struct mlx5_core_dev *dev; u32 *match_definer_mask; int format_id, err; + if (first_idx < 0) + return ERR_PTR(-EINVAL); + + dev = ldev->pf[first_idx].dev; lag_definer = kzalloc(sizeof(*lag_definer), GFP_KERNEL); if (!lag_definer) return ERR_PTR(-ENOMEM); @@ -339,12 +349,15 @@ free_lag_definer: static void mlx5_lag_destroy_definer(struct mlx5_lag *ldev, struct mlx5_lag_definer *lag_definer) { - struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; - int idx; - int i; - int j; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); + struct mlx5_core_dev *dev; + int idx, i, j; - mlx5_ldev_for_each(i, 0, ldev) { + if (first_idx < 0) + return; + + dev = ldev->pf[first_idx].dev; + mlx5_ldev_for_each(i, first_idx, ldev) { for (j = 0; j < ldev->buckets; j++) { idx = i * ldev->buckets + j; mlx5_del_flow_rules(lag_definer->rules[idx]); @@ -499,10 +512,15 @@ static void mlx5_lag_set_outer_ttc_params(struct mlx5_lag *ldev, static int mlx5_lag_create_ttc_table(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; struct ttc_params ttc_params = {}; + struct mlx5_core_dev *dev; + if (first_idx < 0) + return -EINVAL; + + dev = ldev->pf[first_idx].dev; mlx5_lag_set_outer_ttc_params(ldev, &ttc_params); port_sel->outer.ttc = mlx5_create_ttc_table(dev, &ttc_params); return PTR_ERR_OR_ZERO(port_sel->outer.ttc); @@ -510,10 +528,15 @@ static int mlx5_lag_create_ttc_table(struct mlx5_lag *ldev) static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; + int first_idx = mlx5_lag_get_dev_index_by_seq(ldev, MLX5_LAG_P1); struct mlx5_lag_port_sel *port_sel = &ldev->port_sel; struct ttc_params ttc_params = {}; + struct mlx5_core_dev *dev; + if (first_idx < 0) + return -EINVAL; + + dev = ldev->pf[first_idx].dev; mlx5_lag_set_inner_ttc_params(ldev, &ttc_params); port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params); return PTR_ERR_OR_ZERO(port_sel->inner.ttc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 220a9ac75c8b..869bfecdd8ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -664,6 +664,10 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_current_uc_list, ilog2(max_uc_list)); + /* enable absolute native port num */ + if (MLX5_CAP_GEN_MAX(dev, abs_native_port_num)) + MLX5_SET(cmd_hca_cap, set_hca_cap, abs_native_port_num, 1); + return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); } From 95f68e06b41b9e88291796efa3969409d13fdd4c Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 19 Dec 2024 19:58:33 +0200 Subject: [PATCH 0622/1386] net/mlx5: fs, add counter object to flow destination Currently mlx5_flow_destination includes counter_id which is assigned in case we use flow counter on the flow steering rule. However, counter_id is not enough data in case of using HW Steering. Thus, have mlx5_fc object as part of mlx5_flow_destination instead of counter_id and assign it where needed. In case counter_id is received from user space, create a local counter object to represent it. Signed-off-by: Moshe Shemesh Reviewed-by: Yevgeny Kliteynik Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241219175841.1094544-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/infiniband/hw/mlx5/fs.c | 37 +++++++++---- .../mellanox/mlx5/core/diag/fs_tracepoint.h | 2 +- .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 20 +++---- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- .../mellanox/mlx5/core/esw/acl/egress_lgcy.c | 2 +- .../mellanox/mlx5/core/esw/acl/ingress_lgcy.c | 2 +- .../ethernet/mellanox/mlx5/core/esw/bridge.c | 20 +++---- .../mellanox/mlx5/core/eswitch_offloads.c | 2 +- .../net/ethernet/mellanox/mlx5/core/fs_cmd.c | 2 +- .../net/ethernet/mellanox/mlx5/core/fs_core.c | 1 + .../ethernet/mellanox/mlx5/core/fs_counters.c | 53 +++++++++++++++++++ .../mellanox/mlx5/core/lib/macsec_fs.c | 8 +-- .../mellanox/mlx5/core/steering/sws/fs_dr.c | 2 +- drivers/vdpa/mlx5/net/mlx5_vnet.c | 4 +- include/linux/mlx5/fs.h | 4 +- 15 files changed, 117 insertions(+), 44 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 520034acf73a..162814ae8cb4 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -943,7 +943,7 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num, } dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dst.counter_id = mlx5_fc_id(opfc->fc); + dst.counter = opfc->fc; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW; @@ -1113,8 +1113,8 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, handler->ibcounters = flow_act.counters; dest_arr[dest_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest_arr[dest_num].counter_id = - mlx5_fc_id(mcounters->hw_cntrs_hndl); + dest_arr[dest_num].counter = + mcounters->hw_cntrs_hndl; dest_num++; } @@ -1603,7 +1603,7 @@ static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, static struct mlx5_ib_flow_handler *raw_fs_rule_add( struct mlx5_ib_dev *dev, struct mlx5_ib_flow_matcher *fs_matcher, struct mlx5_flow_context *flow_context, struct mlx5_flow_act *flow_act, - u32 counter_id, void *cmd_in, int inlen, int dest_id, int dest_type) + struct mlx5_fc *counter, void *cmd_in, int inlen, int dest_id, int dest_type) { struct mlx5_flow_destination *dst; struct mlx5_ib_flow_prio *ft_prio; @@ -1652,8 +1652,12 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add( } if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + if (WARN_ON(!counter)) { + err = -EINVAL; + goto unlock; + } dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dst[dst_num].counter_id = counter_id; + dst[dst_num].counter = counter; dst_num++; } @@ -1878,7 +1882,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs, return 0; } -static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id) +static bool +is_flow_counter(void *obj, u32 offset, u32 *counter_id, u32 *fc_bulk_size) { struct devx_obj *devx_obj = obj; u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, devx_obj->dinbox, opcode); @@ -1888,6 +1893,7 @@ static bool is_flow_counter(void *obj, u32 offset, u32 *counter_id) if (offset && offset >= devx_obj->flow_counter_bulk_size) return false; + *fc_bulk_size = devx_obj->flow_counter_bulk_size; *counter_id = MLX5_GET(dealloc_flow_counter_in, devx_obj->dinbox, flow_counter_id); @@ -1904,13 +1910,13 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( { struct mlx5_flow_context flow_context = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG}; - u32 *offset_attr, offset = 0, counter_id = 0; int dest_id, dest_type = -1, inlen, len, ret, i; struct mlx5_ib_flow_handler *flow_handler; struct mlx5_ib_flow_matcher *fs_matcher; struct ib_uobject **arr_flow_actions; struct ib_uflow_resources *uflow_res; struct mlx5_flow_act flow_act = {}; + struct mlx5_fc *counter = NULL; struct ib_qp *qp = NULL; void *devx_obj, *cmd_in; struct ib_uobject *uobj; @@ -1937,6 +1943,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( len = uverbs_attr_get_uobjs_arr(attrs, MLX5_IB_ATTR_CREATE_FLOW_ARR_COUNTERS_DEVX, &arr_flow_actions); if (len) { + u32 *offset_attr, fc_bulk_size, offset = 0, counter_id = 0; devx_obj = arr_flow_actions[0]->object; if (uverbs_attr_is_valid(attrs, @@ -1956,8 +1963,11 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( offset = *offset_attr; } - if (!is_flow_counter(devx_obj, offset, &counter_id)) + if (!is_flow_counter(devx_obj, offset, &counter_id, &fc_bulk_size)) return -EINVAL; + counter = mlx5_fc_local_create(counter_id, offset, fc_bulk_size); + if (IS_ERR(counter)) + return PTR_ERR(counter); flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; } @@ -1968,8 +1978,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( MLX5_IB_ATTR_CREATE_FLOW_MATCH_VALUE); uflow_res = flow_resources_alloc(MLX5_IB_CREATE_FLOW_MAX_FLOW_ACTIONS); - if (!uflow_res) - return -ENOMEM; + if (!uflow_res) { + ret = -ENOMEM; + goto destroy_counter; + } len = uverbs_attr_get_uobjs_arr(attrs, MLX5_IB_ATTR_CREATE_FLOW_ARR_FLOW_ACTIONS, &arr_flow_actions); @@ -1996,7 +2008,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( flow_handler = raw_fs_rule_add(dev, fs_matcher, &flow_context, &flow_act, - counter_id, cmd_in, inlen, dest_id, dest_type); + counter, cmd_in, inlen, dest_id, dest_type); if (IS_ERR(flow_handler)) { ret = PTR_ERR(flow_handler); goto err_out; @@ -2007,6 +2019,9 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_CREATE_FLOW)( return 0; err_out: ib_uverbs_flow_resources_free(uflow_res); +destroy_counter: + if (counter) + mlx5_fc_local_destroy(counter); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index 9aed29fa4900..d6e736c1fb24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -292,7 +292,7 @@ TRACE_EVENT(mlx5_fs_add_rule, if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER) __entry->counter_id = - rule->dest_attr.counter_id; + mlx5_fc_id(rule->dest_attr.counter); ), TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n", __entry->rule, __entry->fte, __entry->index, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index e51b03d4c717..687bd95d2c3e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -194,7 +194,7 @@ static int rx_add_rule_drop_auth_trailer(struct mlx5e_ipsec_sa_entry *sa_entry, flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; flow_act.flags = FLOW_ACT_NO_APPEND; dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(flow_counter); + dest.counter = flow_counter; if (rx == ipsec->rx_esw) spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; @@ -223,7 +223,7 @@ static int rx_add_rule_drop_auth_trailer(struct mlx5e_ipsec_sa_entry *sa_entry, } sa_entry->ipsec_rule.trailer.fc = flow_counter; - dest.counter_id = mlx5_fc_id(flow_counter); + dest.counter = flow_counter; MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.ipsec_syndrome, 2); rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { @@ -275,7 +275,7 @@ static int rx_add_rule_drop_replay(struct mlx5e_ipsec_sa_entry *sa_entry, struct flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; flow_act.flags = FLOW_ACT_NO_APPEND; dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(flow_counter); + dest.counter = flow_counter; if (rx == ipsec->rx_esw) spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; @@ -348,7 +348,7 @@ static int ipsec_rx_status_drop_all_create(struct mlx5e_ipsec *ipsec, flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(flow_counter); + dest.counter = flow_counter; if (rx == ipsec->rx_esw) spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK; rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); @@ -686,7 +686,7 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, rx->ft.status = ft; dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[1].counter_id = mlx5_fc_id(rx->fc->cnt); + dest[1].counter = rx->fc->cnt; err = mlx5_ipsec_rx_status_create(ipsec, rx, dest); if (err) goto err_add; @@ -873,7 +873,7 @@ static int ipsec_counter_rule_tx(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_ flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | MLX5_FLOW_CONTEXT_ACTION_COUNT; dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(tx->fc->cnt); + dest.counter = tx->fc->cnt; fte = mlx5_add_flow_rules(tx->ft.status, spec, &flow_act, &dest, 1); if (IS_ERR(fte)) { err = PTR_ERR(fte); @@ -1649,7 +1649,7 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[0].ft = rx->ft.status; dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[1].counter_id = mlx5_fc_id(counter); + dest[1].counter = counter; rule = mlx5_add_flow_rules(rx->ft.sa, spec, &flow_act, dest, 2); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -1762,7 +1762,7 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) dest[0].ft = tx->ft.status; dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[1].counter_id = mlx5_fc_id(counter); + dest[1].counter = counter; rule = mlx5_add_flow_rules(tx->ft.sa, spec, &flow_act, dest, 2); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -1835,7 +1835,7 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[dstn].counter_id = mlx5_fc_id(tx->fc->drop); + dest[dstn].counter = tx->fc->drop; dstn++; break; default: @@ -1913,7 +1913,7 @@ static int rx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) case XFRM_POLICY_BLOCK: flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[dstn].counter_id = mlx5_fc_id(rx->fc->drop); + dest[dstn].counter = rx->fc->drop; dstn++; break; default: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 6b3b1afe8312..9ba99609999f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1282,7 +1282,7 @@ mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[dest_ix].counter_id = mlx5_fc_id(attr->counter); + dest[dest_ix].counter = attr->counter; dest_ix++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c index 6b4c9ffad95b..7dd1dc3f77c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c @@ -135,7 +135,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, if (drop_counter) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - drop_ctr_dst.counter_id = mlx5_fc_id(drop_counter); + drop_ctr_dst.counter = drop_counter; dst = &drop_ctr_dst; dest_num++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c index 093ed86a0acd..1c37098e09ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c @@ -260,7 +260,7 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, if (counter) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - drop_ctr_dst.counter_id = mlx5_fc_id(counter); + drop_ctr_dst.counter = counter; dst = &drop_ctr_dst; dest_num++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index c5ea1d1d2b03..5f647358a05c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -570,7 +570,8 @@ mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge) static struct mlx5_flow_handle * mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char *addr, - struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_fc *counter, struct mlx5_esw_bridge *bridge, struct mlx5_eswitch *esw) { @@ -628,7 +629,7 @@ mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char dests[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dests[0].ft = bridge->egress_ft; dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dests[1].counter_id = counter_id; + dests[1].counter = counter; handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, dests, ARRAY_SIZE(dests)); @@ -639,17 +640,19 @@ mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char static struct mlx5_flow_handle * mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, - struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_fc *counter, struct mlx5_esw_bridge *bridge) { - return mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id, + return mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter, bridge, bridge->br_offloads->esw); } static struct mlx5_flow_handle * mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, u16 esw_owner_vhca_id, const unsigned char *addr, - struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_fc *counter, struct mlx5_esw_bridge *bridge) { struct mlx5_devcom_comp_dev *devcom = bridge->br_offloads->esw->devcom, *pos; @@ -671,7 +674,7 @@ mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, u16 esw_owner_vhca_id, goto out; } - handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id, + handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter, bridge, peer_esw); out: @@ -1385,10 +1388,9 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, u16 esw_ow handle = peer ? mlx5_esw_bridge_ingress_flow_peer_create(vport_num, esw_owner_vhca_id, - addr, vlan, mlx5_fc_id(counter), - bridge) : + addr, vlan, counter, bridge) : mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan, - mlx5_fc_id(counter), bridge); + counter, bridge); if (IS_ERR(handle)) { err = PTR_ERR(handle); esw_warn(esw->dev, "Failed to create ingress flow(vport=%u,err=%d,peer=%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d5b42b3a19fd..8636f0485800 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -724,7 +724,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[i].counter_id = mlx5_fc_id(attr->counter); + dest[i].counter = attr->counter; i++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 676005854dad..6bf0aade69d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -718,7 +718,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, continue; MLX5_SET(flow_counter_list, in_dests, flow_counter_id, - dst->dest_attr.counter_id); + mlx5_fc_id(dst->dest_attr.counter)); in_dests += dst_cnt_size; list_size++; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 2eabfcc247c6..f781f8f169b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -658,6 +658,7 @@ static void del_sw_hw_rule(struct fs_node *node) BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); fte->act_dests.action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; + mlx5_fc_local_destroy(rule->dest_attr.counter); goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 62d0c689796b..7d56deaa4609 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -43,6 +43,11 @@ #define MLX5_FC_POOL_MAX_THRESHOLD BIT(18) #define MLX5_FC_POOL_USED_BUFF_RATIO 10 +enum mlx5_fc_type { + MLX5_FC_TYPE_ACQUIRED = 0, + MLX5_FC_TYPE_LOCAL, +}; + struct mlx5_fc_cache { u64 packets; u64 bytes; @@ -52,6 +57,7 @@ struct mlx5_fc_cache { struct mlx5_fc { u32 id; bool aging; + enum mlx5_fc_type type; struct mlx5_fc_bulk *bulk; struct mlx5_fc_cache cache; /* last{packets,bytes} are used for calculating deltas since last reading. */ @@ -186,6 +192,9 @@ static void mlx5_fc_release(struct mlx5_core_dev *dev, struct mlx5_fc *counter) { struct mlx5_fc_stats *fc_stats = dev->priv.fc_stats; + if (WARN_ON(counter->type == MLX5_FC_TYPE_LOCAL)) + return; + if (counter->bulk) mlx5_fc_pool_release_counter(&fc_stats->fc_pool, counter); else @@ -536,6 +545,50 @@ static int mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc return 0; } +/** + * mlx5_fc_local_create - Allocate mlx5_fc struct for a counter which + * was already acquired using its counter id and bulk data. + * + * @counter_id: counter acquired counter id + * @offset: counter offset from bulk base + * @bulk_size: counter's bulk size as was allocated + * + * Return: Pointer to mlx5_fc on success, ERR_PTR otherwise. + */ +struct mlx5_fc * +mlx5_fc_local_create(u32 counter_id, u32 offset, u32 bulk_size) +{ + struct mlx5_fc_bulk *fc_bulk; + struct mlx5_fc *counter; + + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) + return ERR_PTR(-ENOMEM); + fc_bulk = kzalloc(sizeof(*fc_bulk), GFP_KERNEL); + if (!fc_bulk) { + kfree(counter); + return ERR_PTR(-ENOMEM); + } + + counter->type = MLX5_FC_TYPE_LOCAL; + counter->id = counter_id; + fc_bulk->base_id = counter_id - offset; + fc_bulk->bulk_len = bulk_size; + counter->bulk = fc_bulk; + return counter; +} +EXPORT_SYMBOL(mlx5_fc_local_create); + +void mlx5_fc_local_destroy(struct mlx5_fc *counter) +{ + if (!counter || counter->type != MLX5_FC_TYPE_LOCAL) + return; + + kfree(counter->bulk); + kfree(counter); +} +EXPORT_SYMBOL(mlx5_fc_local_destroy); + /* Flow counters pool API */ static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c index 4a078113e292..762d55ba9e51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c @@ -497,7 +497,7 @@ static int macsec_fs_tx_create(struct mlx5_macsec_fs *macsec_fs) memset(&dest, 0, sizeof(struct mlx5_flow_destination)); memset(&flow_act, 0, sizeof(flow_act)); dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(tx_tables->check_miss_rule_counter); + dest.counter = tx_tables->check_miss_rule_counter; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; rule = mlx5_add_flow_rules(tx_tables->ft_check, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { @@ -519,7 +519,7 @@ static int macsec_fs_tx_create(struct mlx5_macsec_fs *macsec_fs) flow_act.flags = FLOW_ACT_NO_APPEND; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | MLX5_FLOW_CONTEXT_ACTION_COUNT; dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(tx_tables->check_rule_counter); + dest.counter = tx_tables->check_rule_counter; rule = mlx5_add_flow_rules(tx_tables->ft_check, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -1200,7 +1200,7 @@ static int macsec_fs_rx_create_check_decap_rule(struct mlx5_macsec_fs *macsec_fs flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | MLX5_FLOW_CONTEXT_ACTION_COUNT; roce_dest[dstn].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - roce_dest[dstn].counter_id = mlx5_fc_id(rx_tables->check_rule_counter); + roce_dest[dstn].counter = rx_tables->check_rule_counter; rule = mlx5_add_flow_rules(rx_tables->ft_check, spec, flow_act, roce_dest, dstn + 1); if (IS_ERR(rule)) { @@ -1592,7 +1592,7 @@ static int macsec_fs_rx_create(struct mlx5_macsec_fs *macsec_fs) memset(&flow_act, 0, sizeof(flow_act)); dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest.counter_id = mlx5_fc_id(rx_tables->check_miss_rule_counter); + dest.counter = rx_tables->check_miss_rule_counter; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; rule = mlx5_add_flow_rules(rx_tables->ft_check, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c index 4b349d4005e4..8007d3f523c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/fs_dr.c @@ -521,7 +521,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, goto free_actions; } - id = dst->dest_attr.counter_id; + id = mlx5_fc_id(dst->dest_attr.counter); tmp_action = mlx5dr_action_create_flow_counter(id); if (!tmp_action) { diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 5f581e71e201..36099047560d 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1952,7 +1952,7 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, goto out_free; #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) - dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter); + dests[1].counter = node->ucast_counter.counter; #endif node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS); if (IS_ERR(node->ucast_rule)) { @@ -1961,7 +1961,7 @@ static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac, } #if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG) - dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter); + dests[1].counter = node->mcast_counter.counter; #endif memset(dmac_c, 0, ETH_ALEN); diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 438db888bde0..2a69d9d71276 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -163,7 +163,7 @@ struct mlx5_flow_destination { u32 tir_num; u32 ft_num; struct mlx5_flow_table *ft; - u32 counter_id; + struct mlx5_fc *counter; struct { u16 num; u16 vhca_id; @@ -299,6 +299,8 @@ int mlx5_modify_rule_destination(struct mlx5_flow_handle *handler, struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); +struct mlx5_fc *mlx5_fc_local_create(u32 counter_id, u32 offset, u32 bulk_size); +void mlx5_fc_local_destroy(struct mlx5_fc *counter); u64 mlx5_fc_query_lastuse(struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, u64 *bytes, u64 *packets, u64 *lastuse); From 31d1356b8fdcdb7fe845874b598cce552a151c64 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Thu, 19 Dec 2024 19:58:34 +0200 Subject: [PATCH 0623/1386] net/mlx5: fs, add mlx5_fs_pool API Refactor fc_pool API to create generic fs_pool API, as HW steering has more flow steering elements which can take advantage of the same pool of bulks API. Change fs_counters code to use the fs_pool API. Note, removed __counted_by from struct mlx5_fc_bulk as bulk_len is now inner struct member. It will be added back once __counted_by can support inner struct members. Signed-off-by: Moshe Shemesh Reviewed-by: Yevgeny Kliteynik Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241219175841.1094544-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../ethernet/mellanox/mlx5/core/fs_counters.c | 290 +++++------------- .../net/ethernet/mellanox/mlx5/core/fs_pool.c | 194 ++++++++++++ .../net/ethernet/mellanox/mlx5/core/fs_pool.h | 54 ++++ 4 files changed, 329 insertions(+), 211 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_pool.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/fs_pool.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index be3d0876c521..79fe09de0a9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -17,7 +17,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \ - fw_reset.o qos.o lib/tout.o lib/aso.o wc.o + fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o # # Netdev basic diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 7d56deaa4609..d8e1c4ebd364 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -34,6 +34,7 @@ #include #include "mlx5_core.h" #include "fs_core.h" +#include "fs_pool.h" #include "fs_cmd.h" #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) @@ -65,17 +66,6 @@ struct mlx5_fc { u64 lastbytes; }; -struct mlx5_fc_pool { - struct mlx5_core_dev *dev; - struct mutex pool_lock; /* protects pool lists */ - struct list_head fully_used; - struct list_head partially_used; - struct list_head unused; - int available_fcs; - int used_fcs; - int threshold; -}; - struct mlx5_fc_stats { struct xarray counters; @@ -86,13 +76,13 @@ struct mlx5_fc_stats { int bulk_query_len; bool bulk_query_alloc_failed; unsigned long next_bulk_query_alloc; - struct mlx5_fc_pool fc_pool; + struct mlx5_fs_pool fc_pool; }; -static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev); -static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool); -static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool); -static void mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc); +static void mlx5_fc_pool_init(struct mlx5_fs_pool *fc_pool, struct mlx5_core_dev *dev); +static void mlx5_fc_pool_cleanup(struct mlx5_fs_pool *fc_pool); +static struct mlx5_fc *mlx5_fc_pool_acquire_counter(struct mlx5_fs_pool *fc_pool); +static void mlx5_fc_pool_release_counter(struct mlx5_fs_pool *fc_pool, struct mlx5_fc *fc); static int get_init_bulk_query_len(struct mlx5_core_dev *dev) { @@ -447,11 +437,9 @@ void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, /* Flow counter bluks */ struct mlx5_fc_bulk { - struct list_head pool_list; + struct mlx5_fs_bulk fs_bulk; u32 base_id; - int bulk_len; - unsigned long *bitmask; - struct mlx5_fc fcs[] __counted_by(bulk_len); + struct mlx5_fc fcs[]; }; static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, @@ -461,16 +449,10 @@ static void mlx5_fc_init(struct mlx5_fc *counter, struct mlx5_fc_bulk *bulk, counter->id = id; } -static int mlx5_fc_bulk_get_free_fcs_amount(struct mlx5_fc_bulk *bulk) -{ - return bitmap_weight(bulk->bitmask, bulk->bulk_len); -} - -static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev) +static struct mlx5_fs_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev) { enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask; - struct mlx5_fc_bulk *bulk; - int err = -ENOMEM; + struct mlx5_fc_bulk *fc_bulk; int bulk_len; u32 base_id; int i; @@ -478,71 +460,97 @@ static struct mlx5_fc_bulk *mlx5_fc_bulk_create(struct mlx5_core_dev *dev) alloc_bitmask = MLX5_CAP_GEN(dev, flow_counter_bulk_alloc); bulk_len = alloc_bitmask > 0 ? MLX5_FC_BULK_NUM_FCS(alloc_bitmask) : 1; - bulk = kvzalloc(struct_size(bulk, fcs, bulk_len), GFP_KERNEL); - if (!bulk) - goto err_alloc_bulk; + fc_bulk = kvzalloc(struct_size(fc_bulk, fcs, bulk_len), GFP_KERNEL); + if (!fc_bulk) + return NULL; - bulk->bitmask = kvcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long), - GFP_KERNEL); - if (!bulk->bitmask) - goto err_alloc_bitmask; + if (mlx5_fs_bulk_init(dev, &fc_bulk->fs_bulk, bulk_len)) + goto fc_bulk_free; - err = mlx5_cmd_fc_bulk_alloc(dev, alloc_bitmask, &base_id); - if (err) - goto err_mlx5_cmd_bulk_alloc; + if (mlx5_cmd_fc_bulk_alloc(dev, alloc_bitmask, &base_id)) + goto fs_bulk_cleanup; + fc_bulk->base_id = base_id; + for (i = 0; i < bulk_len; i++) + mlx5_fc_init(&fc_bulk->fcs[i], fc_bulk, base_id + i); - bulk->base_id = base_id; - bulk->bulk_len = bulk_len; - for (i = 0; i < bulk_len; i++) { - mlx5_fc_init(&bulk->fcs[i], bulk, base_id + i); - set_bit(i, bulk->bitmask); - } + return &fc_bulk->fs_bulk; - return bulk; - -err_mlx5_cmd_bulk_alloc: - kvfree(bulk->bitmask); -err_alloc_bitmask: - kvfree(bulk); -err_alloc_bulk: - return ERR_PTR(err); +fs_bulk_cleanup: + mlx5_fs_bulk_cleanup(&fc_bulk->fs_bulk); +fc_bulk_free: + kvfree(fc_bulk); + return NULL; } static int -mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fc_bulk *bulk) +mlx5_fc_bulk_destroy(struct mlx5_core_dev *dev, struct mlx5_fs_bulk *fs_bulk) { - if (mlx5_fc_bulk_get_free_fcs_amount(bulk) < bulk->bulk_len) { + struct mlx5_fc_bulk *fc_bulk = container_of(fs_bulk, + struct mlx5_fc_bulk, + fs_bulk); + + if (mlx5_fs_bulk_get_free_amount(fs_bulk) < fs_bulk->bulk_len) { mlx5_core_err(dev, "Freeing bulk before all counters were released\n"); return -EBUSY; } - mlx5_cmd_fc_free(dev, bulk->base_id); - kvfree(bulk->bitmask); - kvfree(bulk); + mlx5_cmd_fc_free(dev, fc_bulk->base_id); + mlx5_fs_bulk_cleanup(fs_bulk); + kvfree(fc_bulk); return 0; } -static struct mlx5_fc *mlx5_fc_bulk_acquire_fc(struct mlx5_fc_bulk *bulk) +static void mlx5_fc_pool_update_threshold(struct mlx5_fs_pool *fc_pool) { - int free_fc_index = find_first_bit(bulk->bitmask, bulk->bulk_len); - - if (free_fc_index >= bulk->bulk_len) - return ERR_PTR(-ENOSPC); - - clear_bit(free_fc_index, bulk->bitmask); - return &bulk->fcs[free_fc_index]; + fc_pool->threshold = min_t(int, MLX5_FC_POOL_MAX_THRESHOLD, + fc_pool->used_units / MLX5_FC_POOL_USED_BUFF_RATIO); } -static int mlx5_fc_bulk_release_fc(struct mlx5_fc_bulk *bulk, struct mlx5_fc *fc) +/* Flow counters pool API */ + +static const struct mlx5_fs_pool_ops mlx5_fc_pool_ops = { + .bulk_destroy = mlx5_fc_bulk_destroy, + .bulk_create = mlx5_fc_bulk_create, + .update_threshold = mlx5_fc_pool_update_threshold, +}; + +static void +mlx5_fc_pool_init(struct mlx5_fs_pool *fc_pool, struct mlx5_core_dev *dev) { - int fc_index = fc->id - bulk->base_id; + mlx5_fs_pool_init(fc_pool, dev, &mlx5_fc_pool_ops); +} - if (test_bit(fc_index, bulk->bitmask)) - return -EINVAL; +static void mlx5_fc_pool_cleanup(struct mlx5_fs_pool *fc_pool) +{ + mlx5_fs_pool_cleanup(fc_pool); +} - set_bit(fc_index, bulk->bitmask); - return 0; +static struct mlx5_fc * +mlx5_fc_pool_acquire_counter(struct mlx5_fs_pool *fc_pool) +{ + struct mlx5_fs_pool_index pool_index = {}; + struct mlx5_fc_bulk *fc_bulk; + int err; + + err = mlx5_fs_pool_acquire_index(fc_pool, &pool_index); + if (err) + return ERR_PTR(err); + fc_bulk = container_of(pool_index.fs_bulk, struct mlx5_fc_bulk, fs_bulk); + return &fc_bulk->fcs[pool_index.index]; +} + +static void +mlx5_fc_pool_release_counter(struct mlx5_fs_pool *fc_pool, struct mlx5_fc *fc) +{ + struct mlx5_fs_bulk *fs_bulk = &fc->bulk->fs_bulk; + struct mlx5_fs_pool_index pool_index = {}; + struct mlx5_core_dev *dev = fc_pool->dev; + + pool_index.fs_bulk = fs_bulk; + pool_index.index = fc->id - fc->bulk->base_id; + if (mlx5_fs_pool_release_index(fc_pool, &pool_index)) + mlx5_core_warn(dev, "Attempted to release a counter which is not acquired\n"); } /** @@ -573,7 +581,7 @@ mlx5_fc_local_create(u32 counter_id, u32 offset, u32 bulk_size) counter->type = MLX5_FC_TYPE_LOCAL; counter->id = counter_id; fc_bulk->base_id = counter_id - offset; - fc_bulk->bulk_len = bulk_size; + fc_bulk->fs_bulk.bulk_len = bulk_size; counter->bulk = fc_bulk; return counter; } @@ -588,141 +596,3 @@ void mlx5_fc_local_destroy(struct mlx5_fc *counter) kfree(counter); } EXPORT_SYMBOL(mlx5_fc_local_destroy); - -/* Flow counters pool API */ - -static void mlx5_fc_pool_init(struct mlx5_fc_pool *fc_pool, struct mlx5_core_dev *dev) -{ - fc_pool->dev = dev; - mutex_init(&fc_pool->pool_lock); - INIT_LIST_HEAD(&fc_pool->fully_used); - INIT_LIST_HEAD(&fc_pool->partially_used); - INIT_LIST_HEAD(&fc_pool->unused); - fc_pool->available_fcs = 0; - fc_pool->used_fcs = 0; - fc_pool->threshold = 0; -} - -static void mlx5_fc_pool_cleanup(struct mlx5_fc_pool *fc_pool) -{ - struct mlx5_core_dev *dev = fc_pool->dev; - struct mlx5_fc_bulk *bulk; - struct mlx5_fc_bulk *tmp; - - list_for_each_entry_safe(bulk, tmp, &fc_pool->fully_used, pool_list) - mlx5_fc_bulk_destroy(dev, bulk); - list_for_each_entry_safe(bulk, tmp, &fc_pool->partially_used, pool_list) - mlx5_fc_bulk_destroy(dev, bulk); - list_for_each_entry_safe(bulk, tmp, &fc_pool->unused, pool_list) - mlx5_fc_bulk_destroy(dev, bulk); -} - -static void mlx5_fc_pool_update_threshold(struct mlx5_fc_pool *fc_pool) -{ - fc_pool->threshold = min_t(int, MLX5_FC_POOL_MAX_THRESHOLD, - fc_pool->used_fcs / MLX5_FC_POOL_USED_BUFF_RATIO); -} - -static struct mlx5_fc_bulk * -mlx5_fc_pool_alloc_new_bulk(struct mlx5_fc_pool *fc_pool) -{ - struct mlx5_core_dev *dev = fc_pool->dev; - struct mlx5_fc_bulk *new_bulk; - - new_bulk = mlx5_fc_bulk_create(dev); - if (!IS_ERR(new_bulk)) - fc_pool->available_fcs += new_bulk->bulk_len; - mlx5_fc_pool_update_threshold(fc_pool); - return new_bulk; -} - -static void -mlx5_fc_pool_free_bulk(struct mlx5_fc_pool *fc_pool, struct mlx5_fc_bulk *bulk) -{ - struct mlx5_core_dev *dev = fc_pool->dev; - - fc_pool->available_fcs -= bulk->bulk_len; - mlx5_fc_bulk_destroy(dev, bulk); - mlx5_fc_pool_update_threshold(fc_pool); -} - -static struct mlx5_fc * -mlx5_fc_pool_acquire_from_list(struct list_head *src_list, - struct list_head *next_list, - bool move_non_full_bulk) -{ - struct mlx5_fc_bulk *bulk; - struct mlx5_fc *fc; - - if (list_empty(src_list)) - return ERR_PTR(-ENODATA); - - bulk = list_first_entry(src_list, struct mlx5_fc_bulk, pool_list); - fc = mlx5_fc_bulk_acquire_fc(bulk); - if (move_non_full_bulk || mlx5_fc_bulk_get_free_fcs_amount(bulk) == 0) - list_move(&bulk->pool_list, next_list); - return fc; -} - -static struct mlx5_fc * -mlx5_fc_pool_acquire_counter(struct mlx5_fc_pool *fc_pool) -{ - struct mlx5_fc_bulk *new_bulk; - struct mlx5_fc *fc; - - mutex_lock(&fc_pool->pool_lock); - - fc = mlx5_fc_pool_acquire_from_list(&fc_pool->partially_used, - &fc_pool->fully_used, false); - if (IS_ERR(fc)) - fc = mlx5_fc_pool_acquire_from_list(&fc_pool->unused, - &fc_pool->partially_used, - true); - if (IS_ERR(fc)) { - new_bulk = mlx5_fc_pool_alloc_new_bulk(fc_pool); - if (IS_ERR(new_bulk)) { - fc = ERR_CAST(new_bulk); - goto out; - } - fc = mlx5_fc_bulk_acquire_fc(new_bulk); - list_add(&new_bulk->pool_list, &fc_pool->partially_used); - } - fc_pool->available_fcs--; - fc_pool->used_fcs++; - -out: - mutex_unlock(&fc_pool->pool_lock); - return fc; -} - -static void -mlx5_fc_pool_release_counter(struct mlx5_fc_pool *fc_pool, struct mlx5_fc *fc) -{ - struct mlx5_core_dev *dev = fc_pool->dev; - struct mlx5_fc_bulk *bulk = fc->bulk; - int bulk_free_fcs_amount; - - mutex_lock(&fc_pool->pool_lock); - - if (mlx5_fc_bulk_release_fc(bulk, fc)) { - mlx5_core_warn(dev, "Attempted to release a counter which is not acquired\n"); - goto unlock; - } - - fc_pool->available_fcs++; - fc_pool->used_fcs--; - - bulk_free_fcs_amount = mlx5_fc_bulk_get_free_fcs_amount(bulk); - if (bulk_free_fcs_amount == 1) - list_move_tail(&bulk->pool_list, &fc_pool->partially_used); - if (bulk_free_fcs_amount == bulk->bulk_len) { - list_del(&bulk->pool_list); - if (fc_pool->available_fcs > fc_pool->threshold) - mlx5_fc_pool_free_bulk(fc_pool, bulk); - else - list_add(&bulk->pool_list, &fc_pool->unused); - } - -unlock: - mutex_unlock(&fc_pool->pool_lock); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.c new file mode 100644 index 000000000000..b891d7b9e3e0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */ + +#include +#include "fs_pool.h" + +int mlx5_fs_bulk_init(struct mlx5_core_dev *dev, struct mlx5_fs_bulk *fs_bulk, + int bulk_len) +{ + int i; + + fs_bulk->bitmask = kvcalloc(BITS_TO_LONGS(bulk_len), sizeof(unsigned long), + GFP_KERNEL); + if (!fs_bulk->bitmask) + return -ENOMEM; + + fs_bulk->bulk_len = bulk_len; + for (i = 0; i < bulk_len; i++) + set_bit(i, fs_bulk->bitmask); + + return 0; +} + +void mlx5_fs_bulk_cleanup(struct mlx5_fs_bulk *fs_bulk) +{ + kvfree(fs_bulk->bitmask); +} + +int mlx5_fs_bulk_get_free_amount(struct mlx5_fs_bulk *bulk) +{ + return bitmap_weight(bulk->bitmask, bulk->bulk_len); +} + +static int mlx5_fs_bulk_acquire_index(struct mlx5_fs_bulk *fs_bulk, + struct mlx5_fs_pool_index *pool_index) +{ + int free_index = find_first_bit(fs_bulk->bitmask, fs_bulk->bulk_len); + + WARN_ON_ONCE(!pool_index || !fs_bulk); + if (free_index >= fs_bulk->bulk_len) + return -ENOSPC; + + clear_bit(free_index, fs_bulk->bitmask); + pool_index->fs_bulk = fs_bulk; + pool_index->index = free_index; + return 0; +} + +static int mlx5_fs_bulk_release_index(struct mlx5_fs_bulk *fs_bulk, int index) +{ + if (test_bit(index, fs_bulk->bitmask)) + return -EINVAL; + + set_bit(index, fs_bulk->bitmask); + return 0; +} + +void mlx5_fs_pool_init(struct mlx5_fs_pool *pool, struct mlx5_core_dev *dev, + const struct mlx5_fs_pool_ops *ops) +{ + WARN_ON_ONCE(!ops || !ops->bulk_destroy || !ops->bulk_create || + !ops->update_threshold); + pool->dev = dev; + mutex_init(&pool->pool_lock); + INIT_LIST_HEAD(&pool->fully_used); + INIT_LIST_HEAD(&pool->partially_used); + INIT_LIST_HEAD(&pool->unused); + pool->available_units = 0; + pool->used_units = 0; + pool->threshold = 0; + pool->ops = ops; +} + +void mlx5_fs_pool_cleanup(struct mlx5_fs_pool *pool) +{ + struct mlx5_core_dev *dev = pool->dev; + struct mlx5_fs_bulk *bulk; + struct mlx5_fs_bulk *tmp; + + list_for_each_entry_safe(bulk, tmp, &pool->fully_used, pool_list) + pool->ops->bulk_destroy(dev, bulk); + list_for_each_entry_safe(bulk, tmp, &pool->partially_used, pool_list) + pool->ops->bulk_destroy(dev, bulk); + list_for_each_entry_safe(bulk, tmp, &pool->unused, pool_list) + pool->ops->bulk_destroy(dev, bulk); +} + +static struct mlx5_fs_bulk * +mlx5_fs_pool_alloc_new_bulk(struct mlx5_fs_pool *fs_pool) +{ + struct mlx5_core_dev *dev = fs_pool->dev; + struct mlx5_fs_bulk *new_bulk; + + new_bulk = fs_pool->ops->bulk_create(dev); + if (new_bulk) + fs_pool->available_units += new_bulk->bulk_len; + fs_pool->ops->update_threshold(fs_pool); + return new_bulk; +} + +static void +mlx5_fs_pool_free_bulk(struct mlx5_fs_pool *fs_pool, struct mlx5_fs_bulk *bulk) +{ + struct mlx5_core_dev *dev = fs_pool->dev; + + fs_pool->available_units -= bulk->bulk_len; + fs_pool->ops->bulk_destroy(dev, bulk); + fs_pool->ops->update_threshold(fs_pool); +} + +static int +mlx5_fs_pool_acquire_from_list(struct list_head *src_list, + struct list_head *next_list, + bool move_non_full_bulk, + struct mlx5_fs_pool_index *pool_index) +{ + struct mlx5_fs_bulk *fs_bulk; + int err; + + if (list_empty(src_list)) + return -ENODATA; + + fs_bulk = list_first_entry(src_list, struct mlx5_fs_bulk, pool_list); + err = mlx5_fs_bulk_acquire_index(fs_bulk, pool_index); + if (move_non_full_bulk || mlx5_fs_bulk_get_free_amount(fs_bulk) == 0) + list_move(&fs_bulk->pool_list, next_list); + return err; +} + +int mlx5_fs_pool_acquire_index(struct mlx5_fs_pool *fs_pool, + struct mlx5_fs_pool_index *pool_index) +{ + struct mlx5_fs_bulk *new_bulk; + int err; + + mutex_lock(&fs_pool->pool_lock); + + err = mlx5_fs_pool_acquire_from_list(&fs_pool->partially_used, + &fs_pool->fully_used, false, + pool_index); + if (err) + err = mlx5_fs_pool_acquire_from_list(&fs_pool->unused, + &fs_pool->partially_used, + true, pool_index); + if (err) { + new_bulk = mlx5_fs_pool_alloc_new_bulk(fs_pool); + if (!new_bulk) { + err = -ENOENT; + goto out; + } + err = mlx5_fs_bulk_acquire_index(new_bulk, pool_index); + WARN_ON_ONCE(err); + list_add(&new_bulk->pool_list, &fs_pool->partially_used); + } + fs_pool->available_units--; + fs_pool->used_units++; + +out: + mutex_unlock(&fs_pool->pool_lock); + return err; +} + +int mlx5_fs_pool_release_index(struct mlx5_fs_pool *fs_pool, + struct mlx5_fs_pool_index *pool_index) +{ + struct mlx5_fs_bulk *bulk = pool_index->fs_bulk; + int bulk_free_amount; + int err; + + mutex_lock(&fs_pool->pool_lock); + + /* TBD would rather return void if there was no warn here in original code */ + err = mlx5_fs_bulk_release_index(bulk, pool_index->index); + if (err) + goto unlock; + + fs_pool->available_units++; + fs_pool->used_units--; + + bulk_free_amount = mlx5_fs_bulk_get_free_amount(bulk); + if (bulk_free_amount == 1) + list_move_tail(&bulk->pool_list, &fs_pool->partially_used); + if (bulk_free_amount == bulk->bulk_len) { + list_del(&bulk->pool_list); + if (fs_pool->available_units > fs_pool->threshold) + mlx5_fs_pool_free_bulk(fs_pool, bulk); + else + list_add(&bulk->pool_list, &fs_pool->unused); + } + +unlock: + mutex_unlock(&fs_pool->pool_lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.h new file mode 100644 index 000000000000..3b149863260c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_pool.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2024 NVIDIA Corporation & Affiliates */ + +#ifndef __MLX5_FS_POOL_H__ +#define __MLX5_FS_POOL_H__ + +#include + +struct mlx5_fs_bulk { + struct list_head pool_list; + int bulk_len; + unsigned long *bitmask; +}; + +struct mlx5_fs_pool_index { + struct mlx5_fs_bulk *fs_bulk; + int index; +}; + +struct mlx5_fs_pool; + +struct mlx5_fs_pool_ops { + int (*bulk_destroy)(struct mlx5_core_dev *dev, struct mlx5_fs_bulk *bulk); + struct mlx5_fs_bulk * (*bulk_create)(struct mlx5_core_dev *dev); + void (*update_threshold)(struct mlx5_fs_pool *pool); +}; + +struct mlx5_fs_pool { + struct mlx5_core_dev *dev; + void *pool_ctx; + const struct mlx5_fs_pool_ops *ops; + struct mutex pool_lock; /* protects pool lists */ + struct list_head fully_used; + struct list_head partially_used; + struct list_head unused; + int available_units; + int used_units; + int threshold; +}; + +int mlx5_fs_bulk_init(struct mlx5_core_dev *dev, struct mlx5_fs_bulk *fs_bulk, + int bulk_len); +void mlx5_fs_bulk_cleanup(struct mlx5_fs_bulk *fs_bulk); +int mlx5_fs_bulk_get_free_amount(struct mlx5_fs_bulk *bulk); + +void mlx5_fs_pool_init(struct mlx5_fs_pool *pool, struct mlx5_core_dev *dev, + const struct mlx5_fs_pool_ops *ops); +void mlx5_fs_pool_cleanup(struct mlx5_fs_pool *pool); +int mlx5_fs_pool_acquire_index(struct mlx5_fs_pool *fs_pool, + struct mlx5_fs_pool_index *pool_index); +int mlx5_fs_pool_release_index(struct mlx5_fs_pool *fs_pool, + struct mlx5_fs_pool_index *pool_index); + +#endif /* __MLX5_FS_POOL_H__ */ From 586face88106481e8c527675a837da8a3ab6677d Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 19 Dec 2024 19:58:35 +0200 Subject: [PATCH 0624/1386] net/mlx5: fs, retry insertion to hash table on EBUSY When inserting into an rhashtable faster than it can grow, an -EBUSY error may be encountered. Modify the insertion logic to retry on -EBUSY until either a successful insertion or a genuine error is returned. Signed-off-by: Mark Bloch Signed-off-by: Tariq Toukan Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/20241219175841.1094544-6-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index f781f8f169b9..ae1a5705b26d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -821,11 +821,17 @@ static int insert_fte(struct mlx5_flow_group *fg, struct fs_fte *fte) return index; fte->index = index + fg->start_index; +retry_insert: ret = rhashtable_insert_fast(&fg->ftes_hash, &fte->hash, rhash_fte); - if (ret) + if (ret) { + if (ret == -EBUSY) { + cond_resched(); + goto retry_insert; + } goto err_ida_remove; + } tree_add_node(&fte->node, &fg->node); list_add_tail(&fte->node.list, &fg->node.children); From 9a0155a709fadaab468a24abca7996c5fdf0507b Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 19 Dec 2024 19:58:36 +0200 Subject: [PATCH 0625/1386] net/mlx5: HWS, no need to expose mlx5hws_send_queues_open/close No need to have mlx5hws_send_queues_open/close in header. Make them static and remove from header. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Itamar Gozlan Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241219175841.1094544-7-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/steering/hws/send.c | 8 ++++---- .../net/ethernet/mellanox/mlx5/core/steering/hws/send.h | 6 ------ 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c index 883b4ed30892..b68b0c368771 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c @@ -896,15 +896,15 @@ close_cq: return err; } -void mlx5hws_send_queue_close(struct mlx5hws_send_engine *queue) +static void mlx5hws_send_queue_close(struct mlx5hws_send_engine *queue) { hws_send_ring_close(queue); kfree(queue->completed.entries); } -int mlx5hws_send_queue_open(struct mlx5hws_context *ctx, - struct mlx5hws_send_engine *queue, - u16 queue_size) +static int mlx5hws_send_queue_open(struct mlx5hws_context *ctx, + struct mlx5hws_send_engine *queue, + u16 queue_size) { int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h index b50825d6dc53..f833092235c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.h @@ -189,12 +189,6 @@ void mlx5hws_send_abort_new_dep_wqe(struct mlx5hws_send_engine *queue); void mlx5hws_send_all_dep_wqe(struct mlx5hws_send_engine *queue); -void mlx5hws_send_queue_close(struct mlx5hws_send_engine *queue); - -int mlx5hws_send_queue_open(struct mlx5hws_context *ctx, - struct mlx5hws_send_engine *queue, - u16 queue_size); - void mlx5hws_send_queues_close(struct mlx5hws_context *ctx); int mlx5hws_send_queues_open(struct mlx5hws_context *ctx, From 429776b6019bbdcf04dcd49706fe7de6a280078b Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 19 Dec 2024 19:58:37 +0200 Subject: [PATCH 0626/1386] net/mlx5: HWS, do not initialize native API queues HWS has two types of APIs: - Native: fastest and slimmest, async API. The user of this API is required to manage rule handles memory, and to poll for completion for each rule. - BWC: backward compatible API, similar semantics to SWS API. This layer is implemented above native API and it does all the work for the user, so that it is easy to switch between SWS and HWS. Right now the existing users of HWS require only BWC API. Therefore, in order to not waste resources, this patch disables send queues allocation for native API. If in the future support for faster HWS rule insertion will be required (such as for Connection Tracking), native queues can be enabled. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Itamar Gozlan Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241219175841.1094544-8-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/steering/hws/bwc.h | 6 ++++-- .../mellanox/mlx5/core/steering/hws/context.c | 6 ++++-- .../mellanox/mlx5/core/steering/hws/context.h | 6 ++++++ .../mellanox/mlx5/core/steering/hws/mlx5hws.h | 1 - .../ethernet/mellanox/mlx5/core/steering/hws/send.c | 13 +++++++++++-- 5 files changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h index 0b745968e21e..3d4965213b01 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h @@ -60,9 +60,11 @@ void mlx5hws_bwc_rule_fill_attr(struct mlx5hws_bwc_matcher *bwc_matcher, static inline u16 mlx5hws_bwc_queues(struct mlx5hws_context *ctx) { /* Besides the control queue, half of the queues are - * reguler HWS queues, and the other half are BWC queues. + * regular HWS queues, and the other half are BWC queues. */ - return (ctx->queues - 1) / 2; + if (mlx5hws_context_bwc_supported(ctx)) + return (ctx->queues - 1) / 2; + return 0; } static inline u16 mlx5hws_bwc_get_queue_id(struct mlx5hws_context *ctx, u16 idx) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c index fd48b05e91e0..4a8928f33bb9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c @@ -161,8 +161,10 @@ static int hws_context_init_hws(struct mlx5hws_context *ctx, if (ret) goto uninit_pd; - if (attr->bwc) - ctx->flags |= MLX5HWS_CONTEXT_FLAG_BWC_SUPPORT; + /* Context has support for backward compatible API, + * and does not have support for native HWS API. + */ + ctx->flags |= MLX5HWS_CONTEXT_FLAG_BWC_SUPPORT; ret = mlx5hws_send_queues_open(ctx, attr->queues, attr->queue_size); if (ret) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h index 47f5cc8de73f..1c9cc4fba083 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h @@ -8,6 +8,7 @@ enum mlx5hws_context_flags { MLX5HWS_CONTEXT_FLAG_HWS_SUPPORT = 1 << 0, MLX5HWS_CONTEXT_FLAG_PRIVATE_PD = 1 << 1, MLX5HWS_CONTEXT_FLAG_BWC_SUPPORT = 1 << 2, + MLX5HWS_CONTEXT_FLAG_NATIVE_SUPPORT = 1 << 3, }; enum mlx5hws_context_shared_stc_type { @@ -58,6 +59,11 @@ static inline bool mlx5hws_context_bwc_supported(struct mlx5hws_context *ctx) return ctx->flags & MLX5HWS_CONTEXT_FLAG_BWC_SUPPORT; } +static inline bool mlx5hws_context_native_supported(struct mlx5hws_context *ctx) +{ + return ctx->flags & MLX5HWS_CONTEXT_FLAG_NATIVE_SUPPORT; +} + bool mlx5hws_context_cap_dynamic_reparse(struct mlx5hws_context *ctx); u8 mlx5hws_context_get_reparse_mode(struct mlx5hws_context *ctx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h index f39d636ff39a..5121951f2778 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h @@ -70,7 +70,6 @@ enum mlx5hws_send_queue_actions { struct mlx5hws_context_attr { u16 queues; u16 queue_size; - bool bwc; /* add support for backward compatible API*/ }; struct mlx5hws_table_attr { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c index b68b0c368771..20fe126ffd22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c @@ -898,6 +898,9 @@ close_cq: static void mlx5hws_send_queue_close(struct mlx5hws_send_engine *queue) { + if (!queue->num_entries) + return; /* this queue wasn't initialized */ + hws_send_ring_close(queue); kfree(queue->completed.entries); } @@ -1005,7 +1008,7 @@ int mlx5hws_send_queues_open(struct mlx5hws_context *ctx, u16 queue_size) { int err = 0; - u32 i; + int i = 0; /* Open one extra queue for control path */ ctx->queues = queues + 1; @@ -1021,7 +1024,13 @@ int mlx5hws_send_queues_open(struct mlx5hws_context *ctx, goto free_bwc_locks; } - for (i = 0; i < ctx->queues; i++) { + /* If native API isn't supported, skip the unused native queues: + * initialize BWC queues and control queue only. + */ + if (!mlx5hws_context_native_supported(ctx)) + i = mlx5hws_bwc_get_queue_id(ctx, 0); + + for (; i < ctx->queues; i++) { err = mlx5hws_send_queue_open(ctx, &ctx->send_queue[i], queue_size); if (err) goto close_send_queues; From aa90a30804a563763eb78f00f56f759b72b91cb0 Mon Sep 17 00:00:00 2001 From: Itamar Gozlan Date: Thu, 19 Dec 2024 19:58:38 +0200 Subject: [PATCH 0627/1386] net/mlx5: DR, expand SWS STE callbacks and consolidate common structs Expand SWS STE callbacks to support ConnectX-8 hardware. Move common enums and structures to a shared header file. Signed-off-by: Itamar Gozlan Signed-off-by: Yevgeny Kliteynik Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241219175841.1094544-9-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/sws/dr_ste.c | 4 +- .../mellanox/mlx5/core/steering/sws/dr_ste.h | 18 +- .../mlx5/core/steering/sws/dr_ste_v0.c | 6 +- .../mlx5/core/steering/sws/dr_ste_v1.c | 207 ++++-------------- .../mlx5/core/steering/sws/dr_ste_v1.h | 147 ++++++++++++- .../mlx5/core/steering/sws/dr_ste_v2.c | 169 +------------- .../mlx5/core/steering/sws/dr_ste_v2.h | 168 ++++++++++++++ 7 files changed, 377 insertions(+), 342 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.h diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.c index e94fbb015efa..01ba8eae2983 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.c @@ -555,7 +555,7 @@ void mlx5dr_ste_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes) { - ste_ctx->set_actions_tx(dmn, action_type_set, ste_ctx->actions_caps, + ste_ctx->set_actions_tx(ste_ctx, dmn, action_type_set, ste_ctx->actions_caps, hw_ste_arr, attr, added_stes); } @@ -566,7 +566,7 @@ void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes) { - ste_ctx->set_actions_rx(dmn, action_type_set, ste_ctx->actions_caps, + ste_ctx->set_actions_rx(ste_ctx, dmn, action_type_set, ste_ctx->actions_caps, hw_ste_arr, attr, added_stes); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h index 54a6619c3ecb..b6ec8d30d990 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h @@ -160,13 +160,15 @@ struct mlx5dr_ste_ctx { /* Actions */ u32 actions_caps; - void (*set_actions_rx)(struct mlx5dr_domain *dmn, + void (*set_actions_rx)(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, u8 *hw_ste_arr, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); - void (*set_actions_tx)(struct mlx5dr_domain *dmn, + void (*set_actions_tx)(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, u8 *hw_ste_arr, @@ -197,7 +199,17 @@ struct mlx5dr_ste_ctx { u16 *used_hw_action_num); int (*alloc_modify_hdr_chunk)(struct mlx5dr_action *action); void (*dealloc_modify_hdr_chunk)(struct mlx5dr_action *action); - + /* Actions bit set */ + void (*set_encap)(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, int size); + void (*set_push_vlan)(u8 *ste, u8 *d_action, + u32 vlan_hdr); + void (*set_pop_vlan)(u8 *hw_ste_p, u8 *s_action, + u8 vlans_num); + void (*set_rx_decap)(u8 *hw_ste_p, u8 *s_action); + void (*set_encap_l3)(u8 *hw_ste_p, u8 *frst_s_action, + u8 *scnd_d_action, u32 reformat_id, + int size); /* Send */ void (*prepare_for_postsend)(u8 *hw_ste_p, u32 ste_size); }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v0.c index e9f6c7ed7a7b..42536bee55e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v0.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v0.c @@ -406,7 +406,8 @@ static void dr_ste_v0_arr_init_next(u8 **last_ste, } static void -dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn, +dr_ste_v0_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, u8 *last_ste, @@ -476,7 +477,8 @@ dr_ste_v0_set_actions_tx(struct mlx5dr_domain *dmn, } static void -dr_ste_v0_set_actions_rx(struct mlx5dr_domain *dmn, +dr_ste_v0_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, u8 *last_ste, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c index 1d49704b9542..7f83d77c43ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.c @@ -5,136 +5,6 @@ #include "mlx5_ifc_dr_ste_v1.h" #include "dr_ste_v1.h" -#define DR_STE_CALC_DFNR_TYPE(lookup_type, inner) \ - ((inner) ? DR_STE_V1_LU_TYPE_##lookup_type##_I : \ - DR_STE_V1_LU_TYPE_##lookup_type##_O) - -enum dr_ste_v1_entry_format { - DR_STE_V1_TYPE_BWC_BYTE = 0x0, - DR_STE_V1_TYPE_BWC_DW = 0x1, - DR_STE_V1_TYPE_MATCH = 0x2, - DR_STE_V1_TYPE_MATCH_RANGES = 0x7, -}; - -/* Lookup type is built from 2B: [ Definer mode 1B ][ Definer index 1B ] */ -enum { - DR_STE_V1_LU_TYPE_NOP = 0x0000, - DR_STE_V1_LU_TYPE_ETHL2_TNL = 0x0002, - DR_STE_V1_LU_TYPE_IBL3_EXT = 0x0102, - DR_STE_V1_LU_TYPE_ETHL2_O = 0x0003, - DR_STE_V1_LU_TYPE_IBL4 = 0x0103, - DR_STE_V1_LU_TYPE_ETHL2_I = 0x0004, - DR_STE_V1_LU_TYPE_SRC_QP_GVMI = 0x0104, - DR_STE_V1_LU_TYPE_ETHL2_SRC_O = 0x0005, - DR_STE_V1_LU_TYPE_ETHL2_HEADERS_O = 0x0105, - DR_STE_V1_LU_TYPE_ETHL2_SRC_I = 0x0006, - DR_STE_V1_LU_TYPE_ETHL2_HEADERS_I = 0x0106, - DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x0007, - DR_STE_V1_LU_TYPE_IPV6_DES_O = 0x0107, - DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x0008, - DR_STE_V1_LU_TYPE_IPV6_DES_I = 0x0108, - DR_STE_V1_LU_TYPE_ETHL4_O = 0x0009, - DR_STE_V1_LU_TYPE_IPV6_SRC_O = 0x0109, - DR_STE_V1_LU_TYPE_ETHL4_I = 0x000a, - DR_STE_V1_LU_TYPE_IPV6_SRC_I = 0x010a, - DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_O = 0x000b, - DR_STE_V1_LU_TYPE_MPLS_O = 0x010b, - DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_I = 0x000c, - DR_STE_V1_LU_TYPE_MPLS_I = 0x010c, - DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_O = 0x000d, - DR_STE_V1_LU_TYPE_GRE = 0x010d, - DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x000e, - DR_STE_V1_LU_TYPE_GENERAL_PURPOSE = 0x010e, - DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_I = 0x000f, - DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0 = 0x010f, - DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1 = 0x0110, - DR_STE_V1_LU_TYPE_FLEX_PARSER_OK = 0x0011, - DR_STE_V1_LU_TYPE_FLEX_PARSER_0 = 0x0111, - DR_STE_V1_LU_TYPE_FLEX_PARSER_1 = 0x0112, - DR_STE_V1_LU_TYPE_ETHL4_MISC_O = 0x0113, - DR_STE_V1_LU_TYPE_ETHL4_MISC_I = 0x0114, - DR_STE_V1_LU_TYPE_INVALID = 0x00ff, - DR_STE_V1_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE, -}; - -enum dr_ste_v1_header_anchors { - DR_STE_HEADER_ANCHOR_START_OUTER = 0x00, - DR_STE_HEADER_ANCHOR_1ST_VLAN = 0x02, - DR_STE_HEADER_ANCHOR_IPV6_IPV4 = 0x07, - DR_STE_HEADER_ANCHOR_INNER_MAC = 0x13, - DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4 = 0x19, -}; - -enum dr_ste_v1_action_size { - DR_STE_ACTION_SINGLE_SZ = 4, - DR_STE_ACTION_DOUBLE_SZ = 8, - DR_STE_ACTION_TRIPLE_SZ = 12, -}; - -enum dr_ste_v1_action_insert_ptr_attr { - DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE = 0, /* Regular push header (e.g. push vlan) */ - DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP = 1, /* Encapsulation / Tunneling */ - DR_STE_V1_ACTION_INSERT_PTR_ATTR_ESP = 2, /* IPsec */ -}; - -enum dr_ste_v1_action_id { - DR_STE_V1_ACTION_ID_NOP = 0x00, - DR_STE_V1_ACTION_ID_COPY = 0x05, - DR_STE_V1_ACTION_ID_SET = 0x06, - DR_STE_V1_ACTION_ID_ADD = 0x07, - DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE = 0x08, - DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER = 0x09, - DR_STE_V1_ACTION_ID_INSERT_INLINE = 0x0a, - DR_STE_V1_ACTION_ID_INSERT_POINTER = 0x0b, - DR_STE_V1_ACTION_ID_FLOW_TAG = 0x0c, - DR_STE_V1_ACTION_ID_QUEUE_ID_SEL = 0x0d, - DR_STE_V1_ACTION_ID_ACCELERATED_LIST = 0x0e, - DR_STE_V1_ACTION_ID_MODIFY_LIST = 0x0f, - DR_STE_V1_ACTION_ID_ASO = 0x12, - DR_STE_V1_ACTION_ID_TRAILER = 0x13, - DR_STE_V1_ACTION_ID_COUNTER_ID = 0x14, - DR_STE_V1_ACTION_ID_MAX = 0x21, - /* use for special cases */ - DR_STE_V1_ACTION_ID_SPECIAL_ENCAP_L3 = 0x22, -}; - -enum { - DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_0 = 0x00, - DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1 = 0x01, - DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2 = 0x02, - DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08, - DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09, - DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e, - DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0 = 0x18, - DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_1 = 0x19, - DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40, - DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e, - DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, - DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, - DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, - DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f, - DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, - DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, - DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0 = 0x8c, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1 = 0x8d, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0 = 0x8e, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1 = 0x8f, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0 = 0x90, - DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1 = 0x91, -}; - -enum dr_ste_v1_aso_ctx_type { - DR_STE_V1_ASO_CTX_TYPE_POLICERS = 0x2, -}; - static const struct mlx5dr_ste_action_modify_field dr_ste_v1_action_modify_field_arr[] = { [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { .hw_field = DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31, @@ -379,13 +249,12 @@ static void dr_ste_v1_set_counter_id(u8 *hw_ste_p, u32 ctr_id) MLX5_SET(ste_match_bwc_v1, hw_ste_p, counter_id, ctr_id); } -static void dr_ste_v1_set_reparse(u8 *hw_ste_p) +void dr_ste_v1_set_reparse(u8 *hw_ste_p) { MLX5_SET(ste_match_bwc_v1, hw_ste_p, reparse, 1); } -static void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action, - u32 reformat_id, int size) +void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, int size) { MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER); @@ -432,8 +301,7 @@ static void dr_ste_v1_set_remove_hdr(u8 *hw_ste_p, u8 *s_action, dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_push_vlan(u8 *hw_ste_p, u8 *d_action, - u32 vlan_hdr) +void dr_ste_v1_set_push_vlan(u8 *hw_ste_p, u8 *d_action, u32 vlan_hdr) { MLX5_SET(ste_double_action_insert_with_inline_v1, d_action, action_id, DR_STE_V1_ACTION_ID_INSERT_INLINE); @@ -446,7 +314,7 @@ static void dr_ste_v1_set_push_vlan(u8 *hw_ste_p, u8 *d_action, dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num) +void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num) { MLX5_SET(ste_single_action_remove_header_size_v1, s_action, action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); @@ -459,11 +327,8 @@ static void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num) dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_encap_l3(u8 *hw_ste_p, - u8 *frst_s_action, - u8 *scnd_d_action, - u32 reformat_id, - int size) +void dr_ste_v1_set_encap_l3(u8 *hw_ste_p, u8 *frst_s_action, u8 *scnd_d_action, + u32 reformat_id, int size) { /* Remove L2 headers */ MLX5_SET(ste_single_action_remove_header_v1, frst_s_action, action_id, @@ -483,7 +348,7 @@ static void dr_ste_v1_set_encap_l3(u8 *hw_ste_p, dr_ste_v1_set_reparse(hw_ste_p); } -static void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action) +void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action) { MLX5_SET(ste_single_action_remove_header_v1, s_action, action_id, DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); @@ -620,7 +485,8 @@ static void dr_ste_v1_arr_init_next_match_range(u8 **last_ste, dr_ste_v1_set_entry_type(*last_ste, DR_STE_V1_TYPE_MATCH_RANGES); } -void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, +void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, u8 *last_ste, @@ -640,7 +506,7 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count); + ste_ctx->set_pop_vlan(last_ste, action, attr->vlans.count); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; @@ -677,8 +543,8 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, action_sz = DR_STE_ACTION_TRIPLE_SZ; allow_encap = true; } - dr_ste_v1_set_push_vlan(last_ste, action, - attr->vlans.headers[i]); + ste_ctx->set_push_vlan(last_ste, action, + attr->vlans.headers[i]); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; } @@ -691,9 +557,9 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, action_sz = DR_STE_ACTION_TRIPLE_SZ; allow_encap = true; } - dr_ste_v1_set_encap(last_ste, action, - attr->reformat.id, - attr->reformat.size); + ste_ctx->set_encap(last_ste, action, + attr->reformat.id, + attr->reformat.size); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; } else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) { @@ -706,10 +572,10 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, } d_action = action + DR_STE_ACTION_SINGLE_SZ; - dr_ste_v1_set_encap_l3(last_ste, - action, d_action, - attr->reformat.id, - attr->reformat.size); + ste_ctx->set_encap_l3(last_ste, + action, d_action, + attr->reformat.id, + attr->reformat.size); action_sz -= DR_STE_ACTION_TRIPLE_SZ; action += DR_STE_ACTION_TRIPLE_SZ; } else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) { @@ -776,7 +642,8 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); } -void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, +void dr_ste_v1_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, + struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, u8 *last_ste, @@ -799,7 +666,7 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, allow_modify_hdr = false; allow_ctr = false; } else if (action_type_set[DR_ACTION_TYP_TNL_L2_TO_L2]) { - dr_ste_v1_set_rx_decap(last_ste, action); + ste_ctx->set_rx_decap(last_ste, action); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; allow_modify_hdr = false; @@ -827,7 +694,7 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_pop_vlan(last_ste, action, attr->vlans.count); + ste_ctx->set_pop_vlan(last_ste, action, attr->vlans.count); action_sz -= DR_STE_ACTION_SINGLE_SZ; action += DR_STE_ACTION_SINGLE_SZ; allow_ctr = false; @@ -868,8 +735,8 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_push_vlan(last_ste, action, - attr->vlans.headers[i]); + ste_ctx->set_push_vlan(last_ste, action, + attr->vlans.headers[i]); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; } @@ -895,9 +762,9 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); action_sz = DR_STE_ACTION_TRIPLE_SZ; } - dr_ste_v1_set_encap(last_ste, action, - attr->reformat.id, - attr->reformat.size); + ste_ctx->set_encap(last_ste, action, + attr->reformat.id, + attr->reformat.size); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; allow_modify_hdr = false; @@ -912,10 +779,10 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, d_action = action + DR_STE_ACTION_SINGLE_SZ; - dr_ste_v1_set_encap_l3(last_ste, - action, d_action, - attr->reformat.id, - attr->reformat.size); + ste_ctx->set_encap_l3(last_ste, + action, d_action, + attr->reformat.id, + attr->reformat.size); action_sz -= DR_STE_ACTION_TRIPLE_SZ; allow_modify_hdr = false; } else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) { @@ -1027,9 +894,6 @@ void dr_ste_v1_set_action_copy(u8 *d_action, MLX5_SET(ste_double_action_copy_v1, d_action, source_right_shifter, src_shifter); } -#define DR_STE_DECAP_L3_ACTION_NUM 8 -#define DR_STE_L2_HDR_MAX_SZ 20 - int dr_ste_v1_set_action_decap_l3_list(void *data, u32 data_sz, u8 *hw_action, @@ -2330,7 +2194,12 @@ static struct mlx5dr_ste_ctx ste_ctx_v1 = { .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list, .alloc_modify_hdr_chunk = &dr_ste_v1_alloc_modify_hdr_ptrn_arg, .dealloc_modify_hdr_chunk = &dr_ste_v1_free_modify_hdr_ptrn_arg, - + /* Actions bit set */ + .set_encap = &dr_ste_v1_set_encap, + .set_push_vlan = &dr_ste_v1_set_push_vlan, + .set_pop_vlan = &dr_ste_v1_set_pop_vlan, + .set_rx_decap = &dr_ste_v1_set_rx_decap, + .set_encap_l3 = &dr_ste_v1_set_encap_l3, /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h index e2fc69867088..a8d9e308d339 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v1.h @@ -7,6 +7,138 @@ #include "dr_types.h" #include "dr_ste.h" +#define DR_STE_DECAP_L3_ACTION_NUM 8 +#define DR_STE_L2_HDR_MAX_SZ 20 +#define DR_STE_CALC_DFNR_TYPE(lookup_type, inner) \ + ((inner) ? DR_STE_V1_LU_TYPE_##lookup_type##_I : \ + DR_STE_V1_LU_TYPE_##lookup_type##_O) + +enum dr_ste_v1_entry_format { + DR_STE_V1_TYPE_BWC_BYTE = 0x0, + DR_STE_V1_TYPE_BWC_DW = 0x1, + DR_STE_V1_TYPE_MATCH = 0x2, + DR_STE_V1_TYPE_MATCH_RANGES = 0x7, +}; + +/* Lookup type is built from 2B: [ Definer mode 1B ][ Definer index 1B ] */ +enum { + DR_STE_V1_LU_TYPE_NOP = 0x0000, + DR_STE_V1_LU_TYPE_ETHL2_TNL = 0x0002, + DR_STE_V1_LU_TYPE_IBL3_EXT = 0x0102, + DR_STE_V1_LU_TYPE_ETHL2_O = 0x0003, + DR_STE_V1_LU_TYPE_IBL4 = 0x0103, + DR_STE_V1_LU_TYPE_ETHL2_I = 0x0004, + DR_STE_V1_LU_TYPE_SRC_QP_GVMI = 0x0104, + DR_STE_V1_LU_TYPE_ETHL2_SRC_O = 0x0005, + DR_STE_V1_LU_TYPE_ETHL2_HEADERS_O = 0x0105, + DR_STE_V1_LU_TYPE_ETHL2_SRC_I = 0x0006, + DR_STE_V1_LU_TYPE_ETHL2_HEADERS_I = 0x0106, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_O = 0x0007, + DR_STE_V1_LU_TYPE_IPV6_DES_O = 0x0107, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_5_TUPLE_I = 0x0008, + DR_STE_V1_LU_TYPE_IPV6_DES_I = 0x0108, + DR_STE_V1_LU_TYPE_ETHL4_O = 0x0009, + DR_STE_V1_LU_TYPE_IPV6_SRC_O = 0x0109, + DR_STE_V1_LU_TYPE_ETHL4_I = 0x000a, + DR_STE_V1_LU_TYPE_IPV6_SRC_I = 0x010a, + DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_O = 0x000b, + DR_STE_V1_LU_TYPE_MPLS_O = 0x010b, + DR_STE_V1_LU_TYPE_ETHL2_SRC_DST_I = 0x000c, + DR_STE_V1_LU_TYPE_MPLS_I = 0x010c, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_O = 0x000d, + DR_STE_V1_LU_TYPE_GRE = 0x010d, + DR_STE_V1_LU_TYPE_FLEX_PARSER_TNL_HEADER = 0x000e, + DR_STE_V1_LU_TYPE_GENERAL_PURPOSE = 0x010e, + DR_STE_V1_LU_TYPE_ETHL3_IPV4_MISC_I = 0x000f, + DR_STE_V1_LU_TYPE_STEERING_REGISTERS_0 = 0x010f, + DR_STE_V1_LU_TYPE_STEERING_REGISTERS_1 = 0x0110, + DR_STE_V1_LU_TYPE_FLEX_PARSER_OK = 0x0011, + DR_STE_V1_LU_TYPE_FLEX_PARSER_0 = 0x0111, + DR_STE_V1_LU_TYPE_FLEX_PARSER_1 = 0x0112, + DR_STE_V1_LU_TYPE_ETHL4_MISC_O = 0x0113, + DR_STE_V1_LU_TYPE_ETHL4_MISC_I = 0x0114, + DR_STE_V1_LU_TYPE_INVALID = 0x00ff, + DR_STE_V1_LU_TYPE_DONT_CARE = MLX5DR_STE_LU_TYPE_DONT_CARE, +}; + +enum dr_ste_v1_header_anchors { + DR_STE_HEADER_ANCHOR_START_OUTER = 0x00, + DR_STE_HEADER_ANCHOR_1ST_VLAN = 0x02, + DR_STE_HEADER_ANCHOR_IPV6_IPV4 = 0x07, + DR_STE_HEADER_ANCHOR_INNER_MAC = 0x13, + DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4 = 0x19, +}; + +enum dr_ste_v1_action_size { + DR_STE_ACTION_SINGLE_SZ = 4, + DR_STE_ACTION_DOUBLE_SZ = 8, + DR_STE_ACTION_TRIPLE_SZ = 12, +}; + +enum dr_ste_v1_action_insert_ptr_attr { + DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE = 0, /* Regular push header (e.g. push vlan) */ + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP = 1, /* Encapsulation / Tunneling */ + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ESP = 2, /* IPsec */ +}; + +enum dr_ste_v1_action_id { + DR_STE_V1_ACTION_ID_NOP = 0x00, + DR_STE_V1_ACTION_ID_COPY = 0x05, + DR_STE_V1_ACTION_ID_SET = 0x06, + DR_STE_V1_ACTION_ID_ADD = 0x07, + DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE = 0x08, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER = 0x09, + DR_STE_V1_ACTION_ID_INSERT_INLINE = 0x0a, + DR_STE_V1_ACTION_ID_INSERT_POINTER = 0x0b, + DR_STE_V1_ACTION_ID_FLOW_TAG = 0x0c, + DR_STE_V1_ACTION_ID_QUEUE_ID_SEL = 0x0d, + DR_STE_V1_ACTION_ID_ACCELERATED_LIST = 0x0e, + DR_STE_V1_ACTION_ID_MODIFY_LIST = 0x0f, + DR_STE_V1_ACTION_ID_ASO = 0x12, + DR_STE_V1_ACTION_ID_TRAILER = 0x13, + DR_STE_V1_ACTION_ID_COUNTER_ID = 0x14, + DR_STE_V1_ACTION_ID_MAX = 0x21, + /* use for special cases */ + DR_STE_V1_ACTION_ID_SPECIAL_ENCAP_L3 = 0x22, +}; + +enum { + DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_0 = 0x00, + DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_1 = 0x01, + DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2 = 0x02, + DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08, + DR_STE_V1_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09, + DR_STE_V1_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e, + DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_0 = 0x18, + DR_STE_V1_ACTION_MDFY_FLD_L4_OUT_1 = 0x19, + DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40, + DR_STE_V1_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e, + DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, + DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, + DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, + DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f, + DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, + DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, + DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_0 = 0x8c, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2_1 = 0x8d, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_0 = 0x8e, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_1_1 = 0x8f, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_0 = 0x90, + DR_STE_V1_ACTION_MDFY_FLD_REGISTER_0_1 = 0x91, +}; + +enum dr_ste_v1_aso_ctx_type { + DR_STE_V1_ASO_CTX_TYPE_POLICERS = 0x2, +}; + bool dr_ste_v1_is_miss_addr_set(u8 *hw_ste_p); void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr); u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p); @@ -17,11 +149,18 @@ u16 dr_ste_v1_get_next_lu_type(u8 *hw_ste_p); void dr_ste_v1_set_hit_addr(u8 *hw_ste_p, u64 icm_addr, u32 ht_size); void dr_ste_v1_init(u8 *hw_ste_p, u16 lu_type, bool is_rx, u16 gvmi); void dr_ste_v1_prepare_for_postsend(u8 *hw_ste_p, u32 ste_size); -void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, u8 *action_type_set, - u32 actions_caps, u8 *last_ste, +void dr_ste_v1_set_reparse(u8 *hw_ste_p); +void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action, u32 reformat_id, int size); +void dr_ste_v1_set_push_vlan(u8 *hw_ste_p, u8 *d_action, u32 vlan_hdr); +void dr_ste_v1_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, u8 vlans_num); +void dr_ste_v1_set_encap_l3(u8 *hw_ste_p, u8 *frst_s_action, u8 *scnd_d_action, + u32 reformat_id, int size); +void dr_ste_v1_set_rx_decap(u8 *hw_ste_p, u8 *s_action); +void dr_ste_v1_set_actions_tx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_domain *dmn, + u8 *action_type_set, u32 actions_caps, u8 *last_ste, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); -void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, u8 *action_type_set, - u32 actions_caps, u8 *last_ste, +void dr_ste_v1_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, struct mlx5dr_domain *dmn, + u8 *action_type_set, u32 actions_caps, u8 *last_ste, struct mlx5dr_ste_actions_attr *attr, u32 *added_stes); void dr_ste_v1_set_action_set(u8 *d_action, u8 hw_field, u8 shifter, u8 length, u32 data); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c index 808b013cf48c..0882dba0f64b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.c @@ -2,167 +2,7 @@ /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ #include "dr_ste_v1.h" - -enum { - DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0 = 0x00, - DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1 = 0x01, - DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2 = 0x02, - DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08, - DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09, - DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e, - DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0 = 0x18, - DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1 = 0x19, - DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40, - DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e, - DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, - DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, - DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, - DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f, - DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, - DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, - DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, - DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0 = 0x90, - DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1 = 0x91, - DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0 = 0x92, - DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1 = 0x93, - DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0 = 0x94, - DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1 = 0x95, -}; - -static const struct mlx5dr_ste_action_modify_field dr_ste_v2_action_modify_field_arr[] = { - [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15, - }, - [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23, - }, - [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24, - .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, - }, - [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, - .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, - }, - [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, - .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, - }, - [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, - }, - [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, - .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, - }, - [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, - .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, - }, - [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, - }, - [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, - }, - [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31, - .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15, - }, - [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31, - }, - [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = { - .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15, - }, -}; +#include "dr_ste_v2.h" static struct mlx5dr_ste_ctx ste_ctx_v2 = { /* Builders */ @@ -223,7 +63,12 @@ static struct mlx5dr_ste_ctx ste_ctx_v2 = { .set_action_decap_l3_list = &dr_ste_v1_set_action_decap_l3_list, .alloc_modify_hdr_chunk = &dr_ste_v1_alloc_modify_hdr_ptrn_arg, .dealloc_modify_hdr_chunk = &dr_ste_v1_free_modify_hdr_ptrn_arg, - + /* Actions bit set */ + .set_encap = &dr_ste_v1_set_encap, + .set_push_vlan = &dr_ste_v1_set_push_vlan, + .set_pop_vlan = &dr_ste_v1_set_pop_vlan, + .set_rx_decap = &dr_ste_v1_set_rx_decap, + .set_encap_l3 = &dr_ste_v1_set_encap_l3, /* Send */ .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.h new file mode 100644 index 000000000000..d853fde49cfc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v2.h @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef _DR_STE_V2_ +#define _DR_STE_V2_ + +enum { + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0 = 0x00, + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1 = 0x01, + DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2 = 0x02, + DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0 = 0x08, + DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1 = 0x09, + DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0 = 0x0e, + DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0 = 0x18, + DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1 = 0x19, + DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0 = 0x40, + DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1 = 0x41, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0 = 0x44, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1 = 0x45, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2 = 0x46, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3 = 0x47, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0 = 0x4c, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1 = 0x4d, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2 = 0x4e, + DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, + DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, + DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, + DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f, + DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, + DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, + DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0 = 0x90, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1 = 0x91, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0 = 0x92, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1 = 0x93, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0 = 0x94, + DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1 = 0x95, +}; + +static const struct mlx5dr_ste_action_modify_field dr_ste_v2_action_modify_field_arr[] = { + [MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_SRC_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_1, .start = 16, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_DSCP] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 18, .end = 23, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_1, .start = 16, .end = 24, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_TCP, + }, + [MLX5_ACTION_IN_FIELD_OUT_IP_TTL] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_IPV6_HOPLIMIT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L3_OUT_0, .start = 8, .end = 15, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 16, .end = 31, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L4_OUT_0, .start = 0, .end = 15, + .l4_type = DR_STE_ACTION_MDFY_TYPE_L4_UDP, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_SRC_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_2, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV6_DST_OUT_3, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV6, + }, + [MLX5_ACTION_IN_FIELD_OUT_SIPV4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_0, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_OUT_DIPV4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_IPV4_OUT_1, .start = 0, .end = 31, + .l3_type = DR_STE_ACTION_MDFY_TYPE_L3_IPV4, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_A] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_GNRL_PURPOSE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_B] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_METADATA_2_CQE, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_1] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_2] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_3] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_1_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_4] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_METADATA_REG_C_5] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_REGISTER_2_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_0, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_TCP_MISC_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = { + .hw_field = DR_STE_V2_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15, + }, +}; + +#endif /* _DR_STE_V2_ */ From 4d617b57574f8ac04c997bdf9127a4c703a5f1f0 Mon Sep 17 00:00:00 2001 From: Itamar Gozlan Date: Thu, 19 Dec 2024 19:58:39 +0200 Subject: [PATCH 0628/1386] net/mlx5: DR, add support for ConnectX-8 steering Add support for a new steering format version that is implemented by ConnectX-8. Except for several differences, the STEv3 is identical to STEv2, so for most callbacks STEv3 context struct will call STEv2 functions. Signed-off-by: Itamar Gozlan Signed-off-by: Yevgeny Kliteynik Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241219175841.1094544-10-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/Makefile | 1 + .../mlx5/core/steering/sws/dr_domain.c | 2 +- .../mellanox/mlx5/core/steering/sws/dr_ste.c | 2 + .../mellanox/mlx5/core/steering/sws/dr_ste.h | 1 + .../mlx5/core/steering/sws/dr_ste_v3.c | 221 ++++++++++++++++++ .../mlx5/core/steering/sws/mlx5_ifc_dr.h | 40 ++++ .../mellanox/mlx5/core/steering/sws/mlx5dr.h | 2 +- 7 files changed, 267 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 79fe09de0a9f..10a763e668ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -123,6 +123,7 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/sws/dr_domain.o \ steering/sws/dr_ste_v0.o \ steering/sws/dr_ste_v1.o \ steering/sws/dr_ste_v2.o \ + steering/sws/dr_ste_v3.o \ steering/sws/dr_cmd.o \ steering/sws/dr_fw.o \ steering/sws/dr_action.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c index 49f22cad92bf..60cb4527588a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c @@ -8,7 +8,7 @@ #define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \ ((dmn)->info.caps.dmn_type##_sw_owner || \ ((dmn)->info.caps.dmn_type##_sw_owner_v2 && \ - (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_7)) + (dmn)->info.caps.sw_format_ver <= MLX5_STEERING_FORMAT_CONNECTX_8)) bool mlx5dr_domain_is_support_ptrn_arg(struct mlx5dr_domain *dmn) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.c index 01ba8eae2983..c8b8ff80c7c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.c @@ -1458,6 +1458,8 @@ struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx(u8 version) return mlx5dr_ste_get_ctx_v1(); else if (version == MLX5_STEERING_FORMAT_CONNECTX_7) return mlx5dr_ste_get_ctx_v2(); + else if (version == MLX5_STEERING_FORMAT_CONNECTX_8) + return mlx5dr_ste_get_ctx_v3(); return NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h index b6ec8d30d990..5f409dc30aca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste.h @@ -217,5 +217,6 @@ struct mlx5dr_ste_ctx { struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v0(void); struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v1(void); struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v2(void); +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v3(void); #endif /* _DR_STE_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c new file mode 100644 index 000000000000..cc60ce1d274e --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_ste_v3.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "dr_ste_v1.h" +#include "dr_ste_v2.h" + +static void dr_ste_v3_set_encap(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, int size) +{ + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_POINTER); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v3, d_action, attributes, + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP); + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v3_set_push_vlan(u8 *ste, u8 *d_action, + u32 vlan_hdr) +{ + MLX5_SET(ste_double_action_insert_with_inline_v3, d_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_INLINE); + /* The hardware expects here offset to vlan header in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_inline_v3, d_action, start_offset, + HDR_LEN_L2_MACS >> 1); + MLX5_SET(ste_double_action_insert_with_inline_v3, d_action, inline_data, vlan_hdr); + dr_ste_v1_set_reparse(ste); +} + +static void dr_ste_v3_set_pop_vlan(u8 *hw_ste_p, u8 *s_action, + u8 vlans_num) +{ + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + start_anchor, DR_STE_HEADER_ANCHOR_1ST_VLAN); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_single_action_remove_header_size_v3, s_action, + remove_size, (HDR_LEN_L2_VLAN >> 1) * vlans_num); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v3_set_encap_l3(u8 *hw_ste_p, + u8 *frst_s_action, + u8 *scnd_d_action, + u32 reformat_id, + int size) +{ + /* Remove L2 headers */ + MLX5_SET(ste_single_action_remove_header_v3, frst_s_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); + MLX5_SET(ste_single_action_remove_header_v3, frst_s_action, end_anchor, + DR_STE_HEADER_ANCHOR_IPV6_IPV4); + + /* Encapsulate with given reformat ID */ + MLX5_SET(ste_double_action_insert_with_ptr_v3, scnd_d_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_POINTER); + /* The hardware expects here size in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v3, scnd_d_action, size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v3, scnd_d_action, pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v3, scnd_d_action, attributes, + DR_STE_V1_ACTION_INSERT_PTR_ATTR_ENCAP); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static void dr_ste_v3_set_rx_decap(u8 *hw_ste_p, u8 *s_action) +{ + MLX5_SET(ste_single_action_remove_header_v3, s_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); + MLX5_SET(ste_single_action_remove_header_v3, s_action, decap, 1); + MLX5_SET(ste_single_action_remove_header_v3, s_action, vni_to_cqe, 1); + MLX5_SET(ste_single_action_remove_header_v3, s_action, end_anchor, + DR_STE_HEADER_ANCHOR_INNER_MAC); + + dr_ste_v1_set_reparse(hw_ste_p); +} + +static int +dr_ste_v3_set_action_decap_l3_list(void *data, u32 data_sz, + u8 *hw_action, u32 hw_action_sz, + uint16_t *used_hw_action_num) +{ + u8 padded_data[DR_STE_L2_HDR_MAX_SZ] = {}; + void *data_ptr = padded_data; + u16 used_actions = 0; + u32 inline_data_sz; + u32 i; + + if (hw_action_sz / DR_STE_ACTION_DOUBLE_SZ < DR_STE_DECAP_L3_ACTION_NUM) + return -EINVAL; + + inline_data_sz = + MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v3, inline_data); + + /* Add an alignment padding */ + memcpy(padded_data + data_sz % inline_data_sz, data, data_sz); + + /* Remove L2L3 outer headers */ + MLX5_SET(ste_single_action_remove_header_v3, hw_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_HEADER_TO_HEADER); + MLX5_SET(ste_single_action_remove_header_v3, hw_action, decap, 1); + MLX5_SET(ste_single_action_remove_header_v3, hw_action, vni_to_cqe, 1); + MLX5_SET(ste_single_action_remove_header_v3, hw_action, end_anchor, + DR_STE_HEADER_ANCHOR_INNER_IPV6_IPV4); + hw_action += DR_STE_ACTION_DOUBLE_SZ; + used_actions++; /* Remove and NOP are a single double action */ + + /* Point to the last dword of the header */ + data_ptr += (data_sz / inline_data_sz) * inline_data_sz; + + /* Add the new header using inline action 4Byte at a time, the header + * is added in reversed order to the beginning of the packet to avoid + * incorrect parsing by the HW. Since header is 14B or 18B an extra + * two bytes are padded and later removed. + */ + for (i = 0; i < data_sz / inline_data_sz + 1; i++) { + void *addr_inline; + + MLX5_SET(ste_double_action_insert_with_inline_v3, hw_action, action_id, + DR_STE_V1_ACTION_ID_INSERT_INLINE); + /* The hardware expects here offset to words (2 bytes) */ + MLX5_SET(ste_double_action_insert_with_inline_v3, hw_action, start_offset, 0); + + /* Copy bytes one by one to avoid endianness problem */ + addr_inline = MLX5_ADDR_OF(ste_double_action_insert_with_inline_v3, + hw_action, inline_data); + memcpy(addr_inline, data_ptr - i * inline_data_sz, inline_data_sz); + hw_action += DR_STE_ACTION_DOUBLE_SZ; + used_actions++; + } + + /* Remove first 2 extra bytes */ + MLX5_SET(ste_single_action_remove_header_size_v3, hw_action, action_id, + DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); + MLX5_SET(ste_single_action_remove_header_size_v3, hw_action, start_offset, 0); + /* The hardware expects here size in words (2 bytes) */ + MLX5_SET(ste_single_action_remove_header_size_v3, hw_action, remove_size, 1); + used_actions++; + + *used_hw_action_num = used_actions; + + return 0; +} + +static struct mlx5dr_ste_ctx ste_ctx_v3 = { + /* Builders */ + .build_eth_l2_src_dst_init = &dr_ste_v1_build_eth_l2_src_dst_init, + .build_eth_l3_ipv6_src_init = &dr_ste_v1_build_eth_l3_ipv6_src_init, + .build_eth_l3_ipv6_dst_init = &dr_ste_v1_build_eth_l3_ipv6_dst_init, + .build_eth_l3_ipv4_5_tuple_init = &dr_ste_v1_build_eth_l3_ipv4_5_tuple_init, + .build_eth_l2_src_init = &dr_ste_v1_build_eth_l2_src_init, + .build_eth_l2_dst_init = &dr_ste_v1_build_eth_l2_dst_init, + .build_eth_l2_tnl_init = &dr_ste_v1_build_eth_l2_tnl_init, + .build_eth_l3_ipv4_misc_init = &dr_ste_v1_build_eth_l3_ipv4_misc_init, + .build_eth_ipv6_l3_l4_init = &dr_ste_v1_build_eth_ipv6_l3_l4_init, + .build_mpls_init = &dr_ste_v1_build_mpls_init, + .build_tnl_gre_init = &dr_ste_v1_build_tnl_gre_init, + .build_tnl_mpls_init = &dr_ste_v1_build_tnl_mpls_init, + .build_tnl_mpls_over_udp_init = &dr_ste_v1_build_tnl_mpls_over_udp_init, + .build_tnl_mpls_over_gre_init = &dr_ste_v1_build_tnl_mpls_over_gre_init, + .build_icmp_init = &dr_ste_v1_build_icmp_init, + .build_general_purpose_init = &dr_ste_v1_build_general_purpose_init, + .build_eth_l4_misc_init = &dr_ste_v1_build_eth_l4_misc_init, + .build_tnl_vxlan_gpe_init = &dr_ste_v1_build_flex_parser_tnl_vxlan_gpe_init, + .build_tnl_geneve_init = &dr_ste_v1_build_flex_parser_tnl_geneve_init, + .build_tnl_geneve_tlv_opt_init = &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_init, + .build_tnl_geneve_tlv_opt_exist_init = + &dr_ste_v1_build_flex_parser_tnl_geneve_tlv_opt_exist_init, + .build_register_0_init = &dr_ste_v1_build_register_0_init, + .build_register_1_init = &dr_ste_v1_build_register_1_init, + .build_src_gvmi_qpn_init = &dr_ste_v1_build_src_gvmi_qpn_init, + .build_flex_parser_0_init = &dr_ste_v1_build_flex_parser_0_init, + .build_flex_parser_1_init = &dr_ste_v1_build_flex_parser_1_init, + .build_tnl_gtpu_init = &dr_ste_v1_build_flex_parser_tnl_gtpu_init, + .build_tnl_header_0_1_init = &dr_ste_v1_build_tnl_header_0_1_init, + .build_tnl_gtpu_flex_parser_0_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_0_init, + .build_tnl_gtpu_flex_parser_1_init = &dr_ste_v1_build_tnl_gtpu_flex_parser_1_init, + + /* Getters and Setters */ + .ste_init = &dr_ste_v1_init, + .set_next_lu_type = &dr_ste_v1_set_next_lu_type, + .get_next_lu_type = &dr_ste_v1_get_next_lu_type, + .is_miss_addr_set = &dr_ste_v1_is_miss_addr_set, + .set_miss_addr = &dr_ste_v1_set_miss_addr, + .get_miss_addr = &dr_ste_v1_get_miss_addr, + .set_hit_addr = &dr_ste_v1_set_hit_addr, + .set_byte_mask = &dr_ste_v1_set_byte_mask, + .get_byte_mask = &dr_ste_v1_get_byte_mask, + + /* Actions */ + .actions_caps = DR_STE_CTX_ACTION_CAP_TX_POP | + DR_STE_CTX_ACTION_CAP_RX_PUSH | + DR_STE_CTX_ACTION_CAP_RX_ENCAP, + .set_actions_rx = &dr_ste_v1_set_actions_rx, + .set_actions_tx = &dr_ste_v1_set_actions_tx, + .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v2_action_modify_field_arr), + .modify_field_arr = dr_ste_v2_action_modify_field_arr, + .set_action_set = &dr_ste_v1_set_action_set, + .set_action_add = &dr_ste_v1_set_action_add, + .set_action_copy = &dr_ste_v1_set_action_copy, + .set_action_decap_l3_list = &dr_ste_v3_set_action_decap_l3_list, + .alloc_modify_hdr_chunk = &dr_ste_v1_alloc_modify_hdr_ptrn_arg, + .dealloc_modify_hdr_chunk = &dr_ste_v1_free_modify_hdr_ptrn_arg, + /* Actions bit set */ + .set_encap = &dr_ste_v3_set_encap, + .set_push_vlan = &dr_ste_v3_set_push_vlan, + .set_pop_vlan = &dr_ste_v3_set_pop_vlan, + .set_rx_decap = &dr_ste_v3_set_rx_decap, + .set_encap_l3 = &dr_ste_v3_set_encap_l3, + /* Send */ + .prepare_for_postsend = &dr_ste_v1_prepare_for_postsend, +}; + +struct mlx5dr_ste_ctx *mlx5dr_ste_get_ctx_v3(void) +{ + return &ste_ctx_v3; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5_ifc_dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5_ifc_dr.h index fb078fa0f0cc..898c3618ff26 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5_ifc_dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5_ifc_dr.h @@ -600,4 +600,44 @@ struct mlx5_ifc_ste_double_action_aso_v1_bits { }; }; +struct mlx5_ifc_ste_single_action_remove_header_v3_bits { + u8 action_id[0x8]; + u8 start_anchor[0x7]; + u8 end_anchor[0x7]; + u8 reserved_at_16[0x1]; + u8 outer_l4_remove[0x1]; + u8 reserved_at_18[0x4]; + u8 decap[0x1]; + u8 vni_to_cqe[0x1]; + u8 qos_profile[0x2]; +}; + +struct mlx5_ifc_ste_single_action_remove_header_size_v3_bits { + u8 action_id[0x8]; + u8 start_anchor[0x7]; + u8 start_offset[0x8]; + u8 outer_l4_remove[0x1]; + u8 reserved_at_18[0x2]; + u8 remove_size[0x6]; +}; + +struct mlx5_ifc_ste_double_action_insert_with_inline_v3_bits { + u8 action_id[0x8]; + u8 start_anchor[0x7]; + u8 start_offset[0x8]; + u8 reserved_at_17[0x9]; + + u8 inline_data[0x20]; +}; + +struct mlx5_ifc_ste_double_action_insert_with_ptr_v3_bits { + u8 action_id[0x8]; + u8 start_anchor[0x7]; + u8 start_offset[0x8]; + u8 size[0x6]; + u8 attributes[0x3]; + + u8 pointer[0x20]; +}; + #endif /* MLX5_IFC_DR_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h index 3ac7dc67509f..0bb3724c10c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h @@ -160,7 +160,7 @@ mlx5dr_is_supported(struct mlx5_core_dev *dev) (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) || (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) && (MLX5_CAP_GEN(dev, steering_format_version) <= - MLX5_STEERING_FORMAT_CONNECTX_7))); + MLX5_STEERING_FORMAT_CONNECTX_8))); } /* buddy functions & structure */ From f440d69a21f75af1acfdad16d3804750a360613c Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 19 Dec 2024 19:58:40 +0200 Subject: [PATCH 0629/1386] net/mlx5: Remove PTM support log message The absence of Precision Time Measurement support should not emit a message, as it can be misleading in contexts where PTM is not required. Remove the log message indicating the lack of PCIe PTM support. Signed-off-by: Carolina Jubran Reviewed-by: Dragos Tatulea Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241219175841.1094544-11-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 869bfecdd8ff..a108d8c726f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -945,9 +945,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev, mlx5_pci_vsc_init(dev); - err = pci_enable_ptm(pdev, NULL); - if (err) - mlx5_core_info(dev, "PTM is not supported by PCIe\n"); + pci_enable_ptm(pdev, NULL); return 0; From ef1749d5066984881b3af7a3007c2af91668dd5b Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 19 Dec 2024 19:58:41 +0200 Subject: [PATCH 0630/1386] net/mlx5: fs, Add support for RDMA RX steering over IB link layer Relax the capability check for creating the RDMA RX steering domain by considering only the capabilities reported by the firmware as necessary for its creation, which in turn allows RDMA RX creation over devices with IB link layer as well. The table_miss_action_domain capability is required only for a specific priority, which is handled in mlx5_rdma_enable_roce_steering(). The additional capability check for this case is already in place. Signed-off-by: Patrisious Haddad Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241219175841.1094544-12-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 6bf0aade69d7..ae20c061e0fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -217,7 +217,8 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, int err; if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && - underlay_qpn == 0) + underlay_qpn == 0 && + (ft->type != FS_FT_RDMA_RX && ft->type != FS_FT_RDMA_TX)) return 0; if (ft->type == FS_FT_FDB && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index ae1a5705b26d..41b5e98a0495 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -3665,8 +3665,7 @@ int mlx5_fs_core_init(struct mlx5_core_dev *dev) goto err; } - if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) && - MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)) { + if (MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support)) { err = init_rdma_rx_root_ns(steering); if (err) goto err; From 7d0bf493b1352ba269f5fefe02dda2b06013f8b5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 19 Dec 2024 18:52:32 -0800 Subject: [PATCH 0631/1386] eth: fbnic: reorder ethtool code Define ethtool callback handlers in order in which they are defined in the ops struct. It doesn't really matter what the order is, but it's good to have an order. Reviewed-by: Larysa Zaremba Link: https://patch.msgid.link/20241220025241.1522781-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../net/ethernet/meta/fbnic/fbnic_ethtool.c | 160 +++++++++--------- 1 file changed, 80 insertions(+), 80 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index cc8ca94529ca..777e083acae9 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -40,6 +40,68 @@ static const struct fbnic_stat fbnic_gstrings_hw_stats[] = { #define FBNIC_HW_FIXED_STATS_LEN ARRAY_SIZE(fbnic_gstrings_hw_stats) #define FBNIC_HW_STATS_LEN FBNIC_HW_FIXED_STATS_LEN +static void +fbnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_dev *fbd = fbn->fbd; + + fbnic_get_fw_ver_commit_str(fbd, drvinfo->fw_version, + sizeof(drvinfo->fw_version)); +} + +static int fbnic_get_regs_len(struct net_device *netdev) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + + return fbnic_csr_regs_len(fbn->fbd) * sizeof(u32); +} + +static void fbnic_get_regs(struct net_device *netdev, + struct ethtool_regs *regs, void *data) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + + fbnic_csr_get_regs(fbn->fbd, data, ®s->version); +} + +static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data) +{ + int i; + + switch (sset) { + case ETH_SS_STATS: + for (i = 0; i < FBNIC_HW_STATS_LEN; i++) + ethtool_puts(&data, fbnic_gstrings_hw_stats[i].string); + break; + } +} + +static void fbnic_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct fbnic_net *fbn = netdev_priv(dev); + const struct fbnic_stat *stat; + int i; + + fbnic_get_hw_stats(fbn->fbd); + + for (i = 0; i < FBNIC_HW_STATS_LEN; i++) { + stat = &fbnic_gstrings_hw_stats[i]; + data[i] = *(u64 *)((u8 *)&fbn->fbd->hw_stats + stat->offset); + } +} + +static int fbnic_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return FBNIC_HW_STATS_LEN; + default: + return -EOPNOTSUPP; + } +} + static int fbnic_get_ts_info(struct net_device *netdev, struct kernel_ethtool_ts_info *tsinfo) @@ -69,14 +131,27 @@ fbnic_get_ts_info(struct net_device *netdev, return 0; } -static void -fbnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) +static void fbnic_get_ts_stats(struct net_device *netdev, + struct ethtool_ts_stats *ts_stats) { struct fbnic_net *fbn = netdev_priv(netdev); - struct fbnic_dev *fbd = fbn->fbd; + u64 ts_packets, ts_lost; + struct fbnic_ring *ring; + unsigned int start; + int i; - fbnic_get_fw_ver_commit_str(fbd, drvinfo->fw_version, - sizeof(drvinfo->fw_version)); + ts_stats->pkts = fbn->tx_stats.ts_packets; + ts_stats->lost = fbn->tx_stats.ts_lost; + for (i = 0; i < fbn->num_tx_queues; i++) { + ring = fbn->tx[i]; + do { + start = u64_stats_fetch_begin(&ring->stats.syncp); + ts_packets = ring->stats.ts_packets; + ts_lost = ring->stats.ts_lost; + } while (u64_stats_fetch_retry(&ring->stats.syncp, start)); + ts_stats->pkts += ts_packets; + ts_stats->lost += ts_lost; + } } static void fbnic_set_counter(u64 *stat, struct fbnic_stat_counter *counter) @@ -85,43 +160,6 @@ static void fbnic_set_counter(u64 *stat, struct fbnic_stat_counter *counter) *stat = counter->value; } -static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data) -{ - int i; - - switch (sset) { - case ETH_SS_STATS: - for (i = 0; i < FBNIC_HW_STATS_LEN; i++) - ethtool_puts(&data, fbnic_gstrings_hw_stats[i].string); - break; - } -} - -static int fbnic_get_sset_count(struct net_device *dev, int sset) -{ - switch (sset) { - case ETH_SS_STATS: - return FBNIC_HW_STATS_LEN; - default: - return -EOPNOTSUPP; - } -} - -static void fbnic_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data) -{ - struct fbnic_net *fbn = netdev_priv(dev); - const struct fbnic_stat *stat; - int i; - - fbnic_get_hw_stats(fbn->fbd); - - for (i = 0; i < FBNIC_HW_STATS_LEN; i++) { - stat = &fbnic_gstrings_hw_stats[i]; - data[i] = *(u64 *)((u8 *)&fbn->fbd->hw_stats + stat->offset); - } -} - static void fbnic_get_eth_mac_stats(struct net_device *netdev, struct ethtool_eth_mac_stats *eth_mac_stats) @@ -164,44 +202,6 @@ fbnic_get_eth_mac_stats(struct net_device *netdev, &mac_stats->eth_mac.FrameTooLongErrors); } -static void fbnic_get_ts_stats(struct net_device *netdev, - struct ethtool_ts_stats *ts_stats) -{ - struct fbnic_net *fbn = netdev_priv(netdev); - u64 ts_packets, ts_lost; - struct fbnic_ring *ring; - unsigned int start; - int i; - - ts_stats->pkts = fbn->tx_stats.ts_packets; - ts_stats->lost = fbn->tx_stats.ts_lost; - for (i = 0; i < fbn->num_tx_queues; i++) { - ring = fbn->tx[i]; - do { - start = u64_stats_fetch_begin(&ring->stats.syncp); - ts_packets = ring->stats.ts_packets; - ts_lost = ring->stats.ts_lost; - } while (u64_stats_fetch_retry(&ring->stats.syncp, start)); - ts_stats->pkts += ts_packets; - ts_stats->lost += ts_lost; - } -} - -static void fbnic_get_regs(struct net_device *netdev, - struct ethtool_regs *regs, void *data) -{ - struct fbnic_net *fbn = netdev_priv(netdev); - - fbnic_csr_get_regs(fbn->fbd, data, ®s->version); -} - -static int fbnic_get_regs_len(struct net_device *netdev) -{ - struct fbnic_net *fbn = netdev_priv(netdev); - - return fbnic_csr_regs_len(fbn->fbd) * sizeof(u32); -} - static const struct ethtool_ops fbnic_ethtool_ops = { .get_drvinfo = fbnic_get_drvinfo, .get_regs_len = fbnic_get_regs_len, From 7cb06a6a777cf5a98d6f4edcde5b3937f324efb5 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 19 Dec 2024 18:52:33 -0800 Subject: [PATCH 0632/1386] eth: fbnic: support querying RSS config The initial driver submission already added all the RSS state, as part of multi-queue support. Expose the configuration via the ethtool APIs. Signed-off-by: Alexander Duyck Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/20241220025241.1522781-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../net/ethernet/meta/fbnic/fbnic_ethtool.c | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index 777e083acae9..e71ae6abb0f5 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -102,6 +102,105 @@ static int fbnic_get_sset_count(struct net_device *dev, int sset) } } +static int fbnic_get_rss_hash_idx(u32 flow_type) +{ + switch (flow_type & ~(FLOW_EXT | FLOW_MAC_EXT | FLOW_RSS)) { + case TCP_V4_FLOW: + return FBNIC_TCP4_HASH_OPT; + case TCP_V6_FLOW: + return FBNIC_TCP6_HASH_OPT; + case UDP_V4_FLOW: + return FBNIC_UDP4_HASH_OPT; + case UDP_V6_FLOW: + return FBNIC_UDP6_HASH_OPT; + case AH_V4_FLOW: + case ESP_V4_FLOW: + case AH_ESP_V4_FLOW: + case SCTP_V4_FLOW: + case IPV4_FLOW: + case IPV4_USER_FLOW: + return FBNIC_IPV4_HASH_OPT; + case AH_V6_FLOW: + case ESP_V6_FLOW: + case AH_ESP_V6_FLOW: + case SCTP_V6_FLOW: + case IPV6_FLOW: + case IPV6_USER_FLOW: + return FBNIC_IPV6_HASH_OPT; + case ETHER_FLOW: + return FBNIC_ETHER_HASH_OPT; + } + + return -1; +} + +static int +fbnic_get_rss_hash_opts(struct fbnic_net *fbn, struct ethtool_rxnfc *cmd) +{ + int hash_opt_idx = fbnic_get_rss_hash_idx(cmd->flow_type); + + if (hash_opt_idx < 0) + return -EINVAL; + + /* Report options from rss_en table in fbn */ + cmd->data = fbn->rss_flow_hash[hash_opt_idx]; + + return 0; +} + +static int fbnic_get_rxnfc(struct net_device *netdev, + struct ethtool_rxnfc *cmd, u32 *rule_locs) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = fbn->num_rx_queues; + ret = 0; + break; + case ETHTOOL_GRXFH: + ret = fbnic_get_rss_hash_opts(fbn, cmd); + break; + } + + return ret; +} + +static u32 fbnic_get_rxfh_key_size(struct net_device *netdev) +{ + return FBNIC_RPC_RSS_KEY_BYTE_LEN; +} + +static u32 fbnic_get_rxfh_indir_size(struct net_device *netdev) +{ + return FBNIC_RPC_RSS_TBL_SIZE; +} + +static int +fbnic_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + unsigned int i; + + rxfh->hfunc = ETH_RSS_HASH_TOP; + + if (rxfh->key) { + for (i = 0; i < FBNIC_RPC_RSS_KEY_BYTE_LEN; i++) { + u32 rss_key = fbn->rss_key[i / 4] << ((i % 4) * 8); + + rxfh->key[i] = rss_key >> 24; + } + } + + if (rxfh->indir) { + for (i = 0; i < FBNIC_RPC_RSS_TBL_SIZE; i++) + rxfh->indir[i] = fbn->indir_tbl[0][i]; + } + + return 0; +} + static int fbnic_get_ts_info(struct net_device *netdev, struct kernel_ethtool_ts_info *tsinfo) @@ -209,6 +308,10 @@ static const struct ethtool_ops fbnic_ethtool_ops = { .get_strings = fbnic_get_strings, .get_ethtool_stats = fbnic_get_ethtool_stats, .get_sset_count = fbnic_get_sset_count, + .get_rxnfc = fbnic_get_rxnfc, + .get_rxfh_key_size = fbnic_get_rxfh_key_size, + .get_rxfh_indir_size = fbnic_get_rxfh_indir_size, + .get_rxfh = fbnic_get_rxfh, .get_ts_info = fbnic_get_ts_info, .get_ts_stats = fbnic_get_ts_stats, .get_eth_mac_stats = fbnic_get_eth_mac_stats, From ef1c28817bf90aab3a6365ec81c30c09a3b18ece Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 19 Dec 2024 18:52:34 -0800 Subject: [PATCH 0633/1386] eth: fbnic: don't reset the secondary RSS indir table Secondary RSS indirection table is for additional contexts. It can / should be initialized when such context is created. Since we don't support creating RSS contexts, yet, this change has no user visible effect. Link: https://patch.msgid.link/20241220025241.1522781-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic_rpc.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c index 908c098cd59e..b99c890ac43f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c @@ -13,10 +13,8 @@ void fbnic_reset_indir_tbl(struct fbnic_net *fbn) unsigned int num_rx = fbn->num_rx_queues; unsigned int i; - for (i = 0; i < FBNIC_RPC_RSS_TBL_SIZE; i++) { + for (i = 0; i < FBNIC_RPC_RSS_TBL_SIZE; i++) fbn->indir_tbl[0][i] = ethtool_rxfh_indir_default(i, num_rx); - fbn->indir_tbl[1][i] = ethtool_rxfh_indir_default(i, num_rx); - } } void fbnic_rss_key_fill(u32 *buffer) From 31ab733e999edbc4070d8386c608d9f0b73267c5 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 19 Dec 2024 18:52:35 -0800 Subject: [PATCH 0634/1386] eth: fbnic: support setting RSS configuration Let the user program the RSS indirection table and the RSS key. Straightforward implementation. Track the changes and don't bother poking the HW if user asked for a config identical to what's already programmed. The device only supports Toeplitz hash. Similarly to the GET support - all the real code that does the programming was part of initial driver submission, already. Signed-off-by: Alexander Duyck Link: https://patch.msgid.link/20241220025241.1522781-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../net/ethernet/meta/fbnic/fbnic_ethtool.c | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index e71ae6abb0f5..5523803c8edd 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -201,6 +201,60 @@ fbnic_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) return 0; } +static unsigned int +fbnic_set_indir(struct fbnic_net *fbn, unsigned int idx, const u32 *indir) +{ + unsigned int i, changes = 0; + + for (i = 0; i < FBNIC_RPC_RSS_TBL_SIZE; i++) { + if (fbn->indir_tbl[idx][i] == indir[i]) + continue; + + fbn->indir_tbl[idx][i] = indir[i]; + changes++; + } + + return changes; +} + +static int +fbnic_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + unsigned int i, changes = 0; + + if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && + rxfh->hfunc != ETH_RSS_HASH_TOP) + return -EINVAL; + + if (rxfh->key) { + u32 rss_key = 0; + + for (i = FBNIC_RPC_RSS_KEY_BYTE_LEN; i--;) { + rss_key >>= 8; + rss_key |= (u32)(rxfh->key[i]) << 24; + + if (i % 4) + continue; + + if (fbn->rss_key[i / 4] == rss_key) + continue; + + fbn->rss_key[i / 4] = rss_key; + changes++; + } + } + + if (rxfh->indir) + changes += fbnic_set_indir(fbn, 0, rxfh->indir); + + if (changes && netif_running(netdev)) + fbnic_rss_reinit_hw(fbn->fbd, fbn); + + return 0; +} + static int fbnic_get_ts_info(struct net_device *netdev, struct kernel_ethtool_ts_info *tsinfo) @@ -312,6 +366,7 @@ static const struct ethtool_ops fbnic_ethtool_ops = { .get_rxfh_key_size = fbnic_get_rxfh_key_size, .get_rxfh_indir_size = fbnic_get_rxfh_indir_size, .get_rxfh = fbnic_get_rxfh, + .set_rxfh = fbnic_set_rxfh, .get_ts_info = fbnic_get_ts_info, .get_ts_stats = fbnic_get_ts_stats, .get_eth_mac_stats = fbnic_get_eth_mac_stats, From c23a1461bfee0a6f158795a58c768911c49d6cd0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 19 Dec 2024 18:52:36 -0800 Subject: [PATCH 0635/1386] eth: fbnic: let user control the RSS hash fields Support setting the fields over which RSS computes its hash. Signed-off-by: Alexander Duyck Link: https://patch.msgid.link/20241220025241.1522781-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../net/ethernet/meta/fbnic/fbnic_ethtool.c | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index 5523803c8edd..d1be8fc30404 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -167,6 +167,55 @@ static int fbnic_get_rxnfc(struct net_device *netdev, return ret; } +#define FBNIC_L2_HASH_OPTIONS \ + (RXH_L2DA | RXH_DISCARD) +#define FBNIC_L3_HASH_OPTIONS \ + (FBNIC_L2_HASH_OPTIONS | RXH_IP_SRC | RXH_IP_DST) +#define FBNIC_L4_HASH_OPTIONS \ + (FBNIC_L3_HASH_OPTIONS | RXH_L4_B_0_1 | RXH_L4_B_2_3) + +static int +fbnic_set_rss_hash_opts(struct fbnic_net *fbn, const struct ethtool_rxnfc *cmd) +{ + int hash_opt_idx; + + /* Verify the type requested is correct */ + hash_opt_idx = fbnic_get_rss_hash_idx(cmd->flow_type); + if (hash_opt_idx < 0) + return -EINVAL; + + /* Verify the fields asked for can actually be assigned based on type */ + if (cmd->data & ~FBNIC_L4_HASH_OPTIONS || + (hash_opt_idx > FBNIC_L4_HASH_OPT && + cmd->data & ~FBNIC_L3_HASH_OPTIONS) || + (hash_opt_idx > FBNIC_IP_HASH_OPT && + cmd->data & ~FBNIC_L2_HASH_OPTIONS)) + return -EINVAL; + + fbn->rss_flow_hash[hash_opt_idx] = cmd->data; + + if (netif_running(fbn->netdev)) { + fbnic_rss_reinit(fbn->fbd, fbn); + fbnic_write_rules(fbn->fbd); + } + + return 0; +} + +static int fbnic_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_SRXFH: + ret = fbnic_set_rss_hash_opts(fbn, cmd); + break; + } + + return ret; +} + static u32 fbnic_get_rxfh_key_size(struct net_device *netdev) { return FBNIC_RPC_RSS_KEY_BYTE_LEN; @@ -363,6 +412,7 @@ static const struct ethtool_ops fbnic_ethtool_ops = { .get_ethtool_stats = fbnic_get_ethtool_stats, .get_sset_count = fbnic_get_sset_count, .get_rxnfc = fbnic_get_rxnfc, + .set_rxnfc = fbnic_set_rxnfc, .get_rxfh_key_size = fbnic_get_rxfh_key_size, .get_rxfh_indir_size = fbnic_get_rxfh_indir_size, .get_rxfh = fbnic_get_rxfh, From db7159c400ffbbf3e0df1f3ef6b847b7b62186a3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 19 Dec 2024 18:52:37 -0800 Subject: [PATCH 0636/1386] eth: fbnic: store NAPIs in an array instead of the list We will need an array for storing NAPIs in the upcoming IRQ handler reuse rework. Replace the current list we have, so that we are able to reuse it later. In a few places replace i as the iterator with t when we iterate over triads, this seems slightly less confusing than having i, j, k variables. Link: https://patch.msgid.link/20241220025241.1522781-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../net/ethernet/meta/fbnic/fbnic_netdev.c | 1 - .../net/ethernet/meta/fbnic/fbnic_netdev.h | 6 +- drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 117 ++++++++++-------- drivers/net/ethernet/meta/fbnic/fbnic_txrx.h | 7 +- 4 files changed, 71 insertions(+), 60 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index fc7d80db5fa6..558644c49a4b 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -615,7 +615,6 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd) fbn->netdev = netdev; fbn->fbd = fbd; - INIT_LIST_HEAD(&fbn->napis); fbn->txq_size = FBNIC_TXQ_SIZE_DEFAULT; fbn->hpq_size = FBNIC_HPQ_SIZE_DEFAULT; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h index b8417b300778..0986c8f120a8 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h @@ -11,10 +11,14 @@ #include "fbnic_rpc.h" #include "fbnic_txrx.h" +#define FBNIC_MAX_NAPI_VECTORS 128u + struct fbnic_net { struct fbnic_ring *tx[FBNIC_MAX_TXQS]; struct fbnic_ring *rx[FBNIC_MAX_RXQS]; + struct fbnic_napi_vector *napi[FBNIC_MAX_NAPI_VECTORS]; + struct net_device *netdev; struct fbnic_dev *fbd; @@ -56,8 +60,6 @@ struct fbnic_net { /* Time stampinn filter config */ struct kernel_hwtstamp_config hwtstamp_config; - - struct list_head napis; }; int __fbnic_open(struct fbnic_net *fbn); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index b5050fabe8fe..87e4eb03d991 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -1116,16 +1116,17 @@ static void fbnic_free_napi_vector(struct fbnic_net *fbn, fbnic_free_irq(fbd, v_idx, nv); page_pool_destroy(nv->page_pool); netif_napi_del(&nv->napi); - list_del(&nv->napis); + fbn->napi[fbnic_napi_idx(nv)] = NULL; kfree(nv); } void fbnic_free_napi_vectors(struct fbnic_net *fbn) { - struct fbnic_napi_vector *nv, *temp; + int i; - list_for_each_entry_safe(nv, temp, &fbn->napis, napis) - fbnic_free_napi_vector(fbn, nv); + for (i = 0; i < fbn->num_napi; i++) + if (fbn->napi[i]) + fbnic_free_napi_vector(fbn, fbn->napi[i]); } static void fbnic_name_napi_vector(struct fbnic_napi_vector *nv) @@ -1222,7 +1223,7 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, nv->v_idx = v_idx; /* Tie napi to netdev */ - list_add(&nv->napis, &fbn->napis); + fbn->napi[fbnic_napi_idx(nv)] = nv; netif_napi_add(fbn->netdev, &nv->napi, fbnic_poll); /* Record IRQ to NAPI struct */ @@ -1307,7 +1308,7 @@ pp_destroy: page_pool_destroy(nv->page_pool); napi_del: netif_napi_del(&nv->napi); - list_del(&nv->napis); + fbn->napi[fbnic_napi_idx(nv)] = NULL; kfree(nv); return err; } @@ -1612,19 +1613,18 @@ free_resources: void fbnic_free_resources(struct fbnic_net *fbn) { - struct fbnic_napi_vector *nv; + int i; - list_for_each_entry(nv, &fbn->napis, napis) - fbnic_free_nv_resources(fbn, nv); + for (i = 0; i < fbn->num_napi; i++) + fbnic_free_nv_resources(fbn, fbn->napi[i]); } int fbnic_alloc_resources(struct fbnic_net *fbn) { - struct fbnic_napi_vector *nv; - int err = -ENODEV; + int i, err = -ENODEV; - list_for_each_entry(nv, &fbn->napis, napis) { - err = fbnic_alloc_nv_resources(fbn, nv); + for (i = 0; i < fbn->num_napi; i++) { + err = fbnic_alloc_nv_resources(fbn, fbn->napi[i]); if (err) goto free_resources; } @@ -1632,8 +1632,8 @@ int fbnic_alloc_resources(struct fbnic_net *fbn) return 0; free_resources: - list_for_each_entry_continue_reverse(nv, &fbn->napis, napis) - fbnic_free_nv_resources(fbn, nv); + while (i--) + fbnic_free_nv_resources(fbn, fbn->napi[i]); return err; } @@ -1670,33 +1670,34 @@ static void fbnic_disable_rcq(struct fbnic_ring *rxr) void fbnic_napi_disable(struct fbnic_net *fbn) { - struct fbnic_napi_vector *nv; + int i; - list_for_each_entry(nv, &fbn->napis, napis) { - napi_disable(&nv->napi); + for (i = 0; i < fbn->num_napi; i++) { + napi_disable(&fbn->napi[i]->napi); - fbnic_nv_irq_disable(nv); + fbnic_nv_irq_disable(fbn->napi[i]); } } void fbnic_disable(struct fbnic_net *fbn) { struct fbnic_dev *fbd = fbn->fbd; - struct fbnic_napi_vector *nv; - int i, j; + int i, j, t; + + for (i = 0; i < fbn->num_napi; i++) { + struct fbnic_napi_vector *nv = fbn->napi[i]; - list_for_each_entry(nv, &fbn->napis, napis) { /* Disable Tx queue triads */ - for (i = 0; i < nv->txt_count; i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; + for (t = 0; t < nv->txt_count; t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; fbnic_disable_twq0(&qt->sub0); fbnic_disable_tcq(&qt->cmpl); } /* Disable Rx queue triads */ - for (j = 0; j < nv->rxt_count; j++, i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; + for (j = 0; j < nv->rxt_count; j++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; fbnic_disable_bdq(&qt->sub0, &qt->sub1); fbnic_disable_rcq(&qt->cmpl); @@ -1792,14 +1793,15 @@ int fbnic_wait_all_queues_idle(struct fbnic_dev *fbd, bool may_fail) void fbnic_flush(struct fbnic_net *fbn) { - struct fbnic_napi_vector *nv; + int i; - list_for_each_entry(nv, &fbn->napis, napis) { - int i, j; + for (i = 0; i < fbn->num_napi; i++) { + struct fbnic_napi_vector *nv = fbn->napi[i]; + int j, t; /* Flush any processed Tx Queue Triads and drop the rest */ - for (i = 0; i < nv->txt_count; i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; + for (t = 0; t < nv->txt_count; t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; struct netdev_queue *tx_queue; /* Clean the work queues of unprocessed work */ @@ -1823,8 +1825,8 @@ void fbnic_flush(struct fbnic_net *fbn) } /* Flush any processed Rx Queue Triads and drop the rest */ - for (j = 0; j < nv->rxt_count; j++, i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; + for (j = 0; j < nv->rxt_count; j++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; /* Clean the work queues of unprocessed work */ fbnic_clean_bdq(nv, 0, &qt->sub0, qt->sub0.tail); @@ -1845,14 +1847,15 @@ void fbnic_flush(struct fbnic_net *fbn) void fbnic_fill(struct fbnic_net *fbn) { - struct fbnic_napi_vector *nv; + int i; - list_for_each_entry(nv, &fbn->napis, napis) { - int i, j; + for (i = 0; i < fbn->num_napi; i++) { + struct fbnic_napi_vector *nv = fbn->napi[i]; + int j, t; /* Configure NAPI mapping for Tx */ - for (i = 0; i < nv->txt_count; i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; + for (t = 0; t < nv->txt_count; t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; /* Nothing to do if Tx queue is disabled */ if (qt->sub0.flags & FBNIC_RING_F_DISABLED) @@ -1866,8 +1869,8 @@ void fbnic_fill(struct fbnic_net *fbn) /* Configure NAPI mapping and populate pages * in the BDQ rings to use for Rx */ - for (j = 0; j < nv->rxt_count; j++, i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; + for (j = 0; j < nv->rxt_count; j++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; /* Associate Rx queue with NAPI */ netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, @@ -2025,21 +2028,23 @@ static void fbnic_enable_rcq(struct fbnic_napi_vector *nv, void fbnic_enable(struct fbnic_net *fbn) { struct fbnic_dev *fbd = fbn->fbd; - struct fbnic_napi_vector *nv; - int i, j; + int i; + + for (i = 0; i < fbn->num_napi; i++) { + struct fbnic_napi_vector *nv = fbn->napi[i]; + int j, t; - list_for_each_entry(nv, &fbn->napis, napis) { /* Setup Tx Queue Triads */ - for (i = 0; i < nv->txt_count; i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; + for (t = 0; t < nv->txt_count; t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; fbnic_enable_twq0(&qt->sub0); fbnic_enable_tcq(nv, &qt->cmpl); } /* Setup Rx Queue Triads */ - for (j = 0; j < nv->rxt_count; j++, i++) { - struct fbnic_q_triad *qt = &nv->qt[i]; + for (j = 0; j < nv->rxt_count; j++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; fbnic_enable_bdq(&qt->sub0, &qt->sub1); fbnic_config_drop_mode_rcq(nv, &qt->cmpl); @@ -2064,10 +2069,11 @@ void fbnic_napi_enable(struct fbnic_net *fbn) { u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {}; struct fbnic_dev *fbd = fbn->fbd; - struct fbnic_napi_vector *nv; int i; - list_for_each_entry(nv, &fbn->napis, napis) { + for (i = 0; i < fbn->num_napi; i++) { + struct fbnic_napi_vector *nv = fbn->napi[i]; + napi_enable(&nv->napi); fbnic_nv_irq_enable(nv); @@ -2096,17 +2102,18 @@ void fbnic_napi_depletion_check(struct net_device *netdev) struct fbnic_net *fbn = netdev_priv(netdev); u32 irqs[FBNIC_MAX_MSIX_VECS / 32] = {}; struct fbnic_dev *fbd = fbn->fbd; - struct fbnic_napi_vector *nv; - int i, j; + int i, j, t; + + for (i = 0; i < fbn->num_napi; i++) { + struct fbnic_napi_vector *nv = fbn->napi[i]; - list_for_each_entry(nv, &fbn->napis, napis) { /* Find RQs which are completely out of pages */ - for (i = nv->txt_count, j = 0; j < nv->rxt_count; j++, i++) { + for (t = nv->txt_count, j = 0; j < nv->rxt_count; j++, t++) { /* Assume 4 pages is always enough to fit a packet * and therefore generate a completion and an IRQ. */ - if (fbnic_desc_used(&nv->qt[i].sub0) < 4 || - fbnic_desc_used(&nv->qt[i].sub1) < 4) + if (fbnic_desc_used(&nv->qt[t].sub0) < 4 || + fbnic_desc_used(&nv->qt[t].sub1) < 4) irqs[nv->v_idx / 32] |= BIT(nv->v_idx % 32); } } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h index 8d626287c3f4..1965d1fa38a2 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h @@ -110,8 +110,6 @@ struct fbnic_napi_vector { u8 txt_count; u8 rxt_count; - struct list_head napis; - struct fbnic_q_triad qt[]; }; @@ -137,4 +135,9 @@ void fbnic_fill(struct fbnic_net *fbn); void fbnic_napi_depletion_check(struct net_device *netdev); int fbnic_wait_all_queues_idle(struct fbnic_dev *fbd, bool may_fail); +static inline int fbnic_napi_idx(const struct fbnic_napi_vector *nv) +{ + return nv->v_idx - FBNIC_NON_NAPI_VECTORS; +} + #endif /* _FBNIC_TXRX_H_ */ From 3a856ab347261870d2bb6f3cab95325f27eee104 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 19 Dec 2024 18:52:38 -0800 Subject: [PATCH 0637/1386] eth: fbnic: add IRQ reuse support Change our method of swapping NAPIs without disturbing existing config. This is primarily needed for "live reconfiguration" such as changing the channel count when interface is already up. Previously we were planning to use a trick of using shared interrupts. We would install a second IRQ handler for the new NAPI, and make it return IRQ_NONE until we were ready for it to take over. This works fine functionally but breaks IRQ naming. The IRQ subsystem uses the IRQ name to create the procfs entry, since both handlers used the same name the second handler wouldn't get a proc directory registered. When first one gets removed on success full ring count change it would remove its directory and we would be left with none. New approach uses a double pointer to the NAPI. The IRQ handler needs to know how to locate the NAPI to schedule. We register a single IRQ handler and give it a pointer to a pointer. We can then change what it points to without re-registering. This may have a tiny perf impact, but really really negligible. Link: https://patch.msgid.link/20241220025241.1522781-8-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic.h | 14 +++++++ drivers/net/ethernet/meta/fbnic/fbnic_irq.c | 42 +++++++++++++++++++ .../net/ethernet/meta/fbnic/fbnic_netdev.c | 2 + drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 25 ++--------- drivers/net/ethernet/meta/fbnic/fbnic_txrx.h | 2 +- 5 files changed, 63 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index 706ae6104c8e..ed527209b30c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -16,6 +16,10 @@ #include "fbnic_mac.h" #include "fbnic_rpc.h" +struct fbnic_napi_vector; + +#define FBNIC_MAX_NAPI_VECTORS 128u + struct fbnic_dev { struct device *dev; struct net_device *netdev; @@ -29,6 +33,11 @@ struct fbnic_dev { unsigned int pcs_msix_vector; unsigned short num_irqs; + struct { + u8 users; + char name[IFNAMSIZ + 9]; + } napi_irq[FBNIC_MAX_NAPI_VECTORS]; + struct delayed_work service_task; struct fbnic_fw_mbx mbx[FBNIC_IPC_MBX_INDICES]; @@ -148,6 +157,11 @@ void fbnic_hwmon_unregister(struct fbnic_dev *fbd); int fbnic_pcs_irq_enable(struct fbnic_dev *fbd); void fbnic_pcs_irq_disable(struct fbnic_dev *fbd); +void fbnic_napi_name_irqs(struct fbnic_dev *fbd); +int fbnic_napi_request_irq(struct fbnic_dev *fbd, + struct fbnic_napi_vector *nv); +void fbnic_napi_free_irq(struct fbnic_dev *fbd, + struct fbnic_napi_vector *nv); int fbnic_request_irq(struct fbnic_dev *dev, int nr, irq_handler_t handler, unsigned long flags, const char *name, void *data); void fbnic_free_irq(struct fbnic_dev *dev, int nr, void *data); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c index 914362195920..a8ea7b6774a8 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c @@ -169,6 +169,48 @@ void fbnic_free_irq(struct fbnic_dev *fbd, int nr, void *data) free_irq(irq, data); } +void fbnic_napi_name_irqs(struct fbnic_dev *fbd) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(fbd->napi_irq); i++) + snprintf(fbd->napi_irq[i].name, + sizeof(fbd->napi_irq[i].name), + "%s-TxRx-%u", fbd->netdev->name, i); +} + +int fbnic_napi_request_irq(struct fbnic_dev *fbd, + struct fbnic_napi_vector *nv) +{ + struct fbnic_net *fbn = netdev_priv(fbd->netdev); + int i = fbnic_napi_idx(nv); + int err; + + if (!fbd->napi_irq[i].users) { + err = fbnic_request_irq(fbd, nv->v_idx, + fbnic_msix_clean_rings, 0, + fbd->napi_irq[i].name, + &fbn->napi[i]); + if (err) + return err; + } + + fbd->napi_irq[i].users++; + return 0; +} + +void fbnic_napi_free_irq(struct fbnic_dev *fbd, + struct fbnic_napi_vector *nv) +{ + struct fbnic_net *fbn = netdev_priv(fbd->netdev); + int i = fbnic_napi_idx(nv); + + if (--fbd->napi_irq[i].users) + return; + + fbnic_free_irq(fbd, nv->v_idx, &fbn->napi[i]); +} + void fbnic_free_irqs(struct fbnic_dev *fbd) { struct pci_dev *pdev = to_pci_dev(fbd->dev); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index 558644c49a4b..2f19144e4410 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -74,6 +74,8 @@ static int fbnic_open(struct net_device *netdev) struct fbnic_net *fbn = netdev_priv(netdev); int err; + fbnic_napi_name_irqs(fbn->fbd); + err = __fbnic_open(fbn); if (!err) fbnic_up(fbn); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index 87e4eb03d991..75b491b8e1ca 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -1036,9 +1036,9 @@ static int fbnic_poll(struct napi_struct *napi, int budget) return 0; } -static irqreturn_t fbnic_msix_clean_rings(int __always_unused irq, void *data) +irqreturn_t fbnic_msix_clean_rings(int __always_unused irq, void *data) { - struct fbnic_napi_vector *nv = data; + struct fbnic_napi_vector *nv = *(void **)data; napi_schedule_irqoff(&nv->napi); @@ -1099,7 +1099,6 @@ static void fbnic_free_napi_vector(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) { struct fbnic_dev *fbd = nv->fbd; - u32 v_idx = nv->v_idx; int i, j; for (i = 0; i < nv->txt_count; i++) { @@ -1113,7 +1112,7 @@ static void fbnic_free_napi_vector(struct fbnic_net *fbn, fbnic_remove_rx_ring(fbn, &nv->qt[i].cmpl); } - fbnic_free_irq(fbd, v_idx, nv); + fbnic_napi_free_irq(fbd, nv); page_pool_destroy(nv->page_pool); netif_napi_del(&nv->napi); fbn->napi[fbnic_napi_idx(nv)] = NULL; @@ -1129,18 +1128,6 @@ void fbnic_free_napi_vectors(struct fbnic_net *fbn) fbnic_free_napi_vector(fbn, fbn->napi[i]); } -static void fbnic_name_napi_vector(struct fbnic_napi_vector *nv) -{ - unsigned char *dev_name = nv->napi.dev->name; - - if (!nv->rxt_count) - snprintf(nv->name, sizeof(nv->name), "%s-Tx-%u", dev_name, - nv->v_idx - FBNIC_NON_NAPI_VECTORS); - else - snprintf(nv->name, sizeof(nv->name), "%s-TxRx-%u", dev_name, - nv->v_idx - FBNIC_NON_NAPI_VECTORS); -} - #define FBNIC_PAGE_POOL_FLAGS \ (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV) @@ -1240,12 +1227,8 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, goto napi_del; } - /* Initialize vector name */ - fbnic_name_napi_vector(nv); - /* Request the IRQ for napi vector */ - err = fbnic_request_irq(fbd, v_idx, &fbnic_msix_clean_rings, - IRQF_SHARED, nv->name, nv); + err = fbnic_napi_request_irq(fbd, nv); if (err) goto pp_destroy; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h index 1965d1fa38a2..c8d908860ab0 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h @@ -104,7 +104,6 @@ struct fbnic_napi_vector { struct device *dev; /* Device for DMA unmapping */ struct page_pool *page_pool; struct fbnic_dev *fbd; - char name[IFNAMSIZ + 9]; u16 v_idx; u8 txt_count; @@ -125,6 +124,7 @@ int fbnic_alloc_napi_vectors(struct fbnic_net *fbn); void fbnic_free_napi_vectors(struct fbnic_net *fbn); int fbnic_alloc_resources(struct fbnic_net *fbn); void fbnic_free_resources(struct fbnic_net *fbn); +irqreturn_t fbnic_msix_clean_rings(int irq, void *data); void fbnic_napi_enable(struct fbnic_net *fbn); void fbnic_napi_disable(struct fbnic_net *fbn); void fbnic_enable(struct fbnic_net *fbn); From 557d02238e05eb66b9aba9a1f90f3a2131c6c887 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Thu, 19 Dec 2024 18:52:39 -0800 Subject: [PATCH 0638/1386] eth: fbnic: centralize the queue count and NAPI<>queue setting To simplify dealing with RTNL_ASSERT() requirements further down the line, move setting queue count and NAPI<>queue association to their own helpers. Signed-off-by: Alexander Duyck Link: https://patch.msgid.link/20241220025241.1522781-9-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../net/ethernet/meta/fbnic/fbnic_netdev.c | 9 +- drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 92 +++++++++++++------ drivers/net/ethernet/meta/fbnic/fbnic_txrx.h | 2 + 3 files changed, 70 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index 2f19144e4410..7a96b6ee773f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -23,13 +23,7 @@ int __fbnic_open(struct fbnic_net *fbn) if (err) goto free_napi_vectors; - err = netif_set_real_num_tx_queues(fbn->netdev, - fbn->num_tx_queues); - if (err) - goto free_resources; - - err = netif_set_real_num_rx_queues(fbn->netdev, - fbn->num_rx_queues); + err = fbnic_set_netif_queues(fbn); if (err) goto free_resources; @@ -93,6 +87,7 @@ static int fbnic_stop(struct net_device *netdev) fbnic_time_stop(fbn); fbnic_fw_xmit_ownership_msg(fbn->fbd, false); + fbnic_reset_netif_queues(fbn); fbnic_free_resources(fbn); fbnic_free_napi_vectors(fbn); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index 75b491b8e1ca..92fc1ad6ed6f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -1621,6 +1621,71 @@ free_resources: return err; } +static void fbnic_set_netif_napi(struct fbnic_napi_vector *nv) +{ + int i, j; + + /* Associate Tx queue with NAPI */ + for (i = 0; i < nv->txt_count; i++) { + struct fbnic_q_triad *qt = &nv->qt[i]; + + netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, + NETDEV_QUEUE_TYPE_TX, &nv->napi); + } + + /* Associate Rx queue with NAPI */ + for (j = 0; j < nv->rxt_count; j++, i++) { + struct fbnic_q_triad *qt = &nv->qt[i]; + + netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, + NETDEV_QUEUE_TYPE_RX, &nv->napi); + } +} + +static void fbnic_reset_netif_napi(struct fbnic_napi_vector *nv) +{ + int i, j; + + /* Disassociate Tx queue from NAPI */ + for (i = 0; i < nv->txt_count; i++) { + struct fbnic_q_triad *qt = &nv->qt[i]; + + netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, + NETDEV_QUEUE_TYPE_TX, NULL); + } + + /* Disassociate Rx queue from NAPI */ + for (j = 0; j < nv->rxt_count; j++, i++) { + struct fbnic_q_triad *qt = &nv->qt[i]; + + netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, + NETDEV_QUEUE_TYPE_RX, NULL); + } +} + +int fbnic_set_netif_queues(struct fbnic_net *fbn) +{ + int i, err; + + err = netif_set_real_num_queues(fbn->netdev, fbn->num_tx_queues, + fbn->num_rx_queues); + if (err) + return err; + + for (i = 0; i < fbn->num_napi; i++) + fbnic_set_netif_napi(fbn->napi[i]); + + return 0; +} + +void fbnic_reset_netif_queues(struct fbnic_net *fbn) +{ + int i; + + for (i = 0; i < fbn->num_napi; i++) + fbnic_reset_netif_napi(fbn->napi[i]); +} + static void fbnic_disable_twq0(struct fbnic_ring *txr) { u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ0_CTL); @@ -1801,10 +1866,6 @@ void fbnic_flush(struct fbnic_net *fbn) tx_queue = netdev_get_tx_queue(nv->napi.dev, qt->sub0.q_idx); netdev_tx_reset_queue(tx_queue); - - /* Disassociate Tx queue from NAPI */ - netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, - NETDEV_QUEUE_TYPE_TX, NULL); } /* Flush any processed Rx Queue Triads and drop the rest */ @@ -1820,10 +1881,6 @@ void fbnic_flush(struct fbnic_net *fbn) fbnic_put_pkt_buff(nv, qt->cmpl.pkt, 0); qt->cmpl.pkt->buff.data_hard_start = NULL; - - /* Disassociate Rx queue from NAPI */ - netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, - NETDEV_QUEUE_TYPE_RX, NULL); } } } @@ -1836,29 +1893,12 @@ void fbnic_fill(struct fbnic_net *fbn) struct fbnic_napi_vector *nv = fbn->napi[i]; int j, t; - /* Configure NAPI mapping for Tx */ - for (t = 0; t < nv->txt_count; t++) { - struct fbnic_q_triad *qt = &nv->qt[t]; - - /* Nothing to do if Tx queue is disabled */ - if (qt->sub0.flags & FBNIC_RING_F_DISABLED) - continue; - - /* Associate Tx queue with NAPI */ - netif_queue_set_napi(nv->napi.dev, qt->sub0.q_idx, - NETDEV_QUEUE_TYPE_TX, &nv->napi); - } - /* Configure NAPI mapping and populate pages * in the BDQ rings to use for Rx */ - for (j = 0; j < nv->rxt_count; j++, t++) { + for (j = 0, t = nv->txt_count; j < nv->rxt_count; j++, t++) { struct fbnic_q_triad *qt = &nv->qt[t]; - /* Associate Rx queue with NAPI */ - netif_queue_set_napi(nv->napi.dev, qt->cmpl.q_idx, - NETDEV_QUEUE_TYPE_RX, &nv->napi); - /* Populate the header and payload BDQs */ fbnic_fill_bdq(nv, &qt->sub0); fbnic_fill_bdq(nv, &qt->sub1); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h index c8d908860ab0..92c671135ad7 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h @@ -124,6 +124,8 @@ int fbnic_alloc_napi_vectors(struct fbnic_net *fbn); void fbnic_free_napi_vectors(struct fbnic_net *fbn); int fbnic_alloc_resources(struct fbnic_net *fbn); void fbnic_free_resources(struct fbnic_net *fbn); +int fbnic_set_netif_queues(struct fbnic_net *fbn); +void fbnic_reset_netif_queues(struct fbnic_net *fbn); irqreturn_t fbnic_msix_clean_rings(int irq, void *data); void fbnic_napi_enable(struct fbnic_net *fbn); void fbnic_napi_disable(struct fbnic_net *fbn); From 3a481cc72673b2fbb18271acf2d9b43f6a920ec4 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 19 Dec 2024 18:52:40 -0800 Subject: [PATCH 0639/1386] eth: fbnic: support ring channel get and set while down Trivial implementation of ethtool channel get and set. Set is only supported when device is closed, next patch will add code for live reconfig. Asymmetric configurations are supported (combined + extra Tx or Rx), so are configurations with independent IRQs for Rx and Tx. Having all 3 NAPI types (combined, Tx, Rx) is not supported. We used to only call fbnic_reset_indir_tbl() during init. Now that we call it after device had been register must be careful not to override user config. Link: https://patch.msgid.link/20241220025241.1522781-10-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../net/ethernet/meta/fbnic/fbnic_ethtool.c | 64 +++++++++++++++++++ drivers/net/ethernet/meta/fbnic/fbnic_rpc.c | 3 + 2 files changed, 67 insertions(+) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index d1be8fc30404..d2fe97ae6a71 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -304,6 +304,68 @@ fbnic_set_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh, return 0; } +static void fbnic_get_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_dev *fbd = fbn->fbd; + + ch->max_rx = fbd->max_num_queues; + ch->max_tx = fbd->max_num_queues; + ch->max_combined = min(ch->max_rx, ch->max_tx); + ch->max_other = FBNIC_NON_NAPI_VECTORS; + + if (fbn->num_rx_queues > fbn->num_napi || + fbn->num_tx_queues > fbn->num_napi) + ch->combined_count = min(fbn->num_rx_queues, + fbn->num_tx_queues); + else + ch->combined_count = + fbn->num_rx_queues + fbn->num_tx_queues - fbn->num_napi; + ch->rx_count = fbn->num_rx_queues - ch->combined_count; + ch->tx_count = fbn->num_tx_queues - ch->combined_count; + ch->other_count = FBNIC_NON_NAPI_VECTORS; +} + +static void fbnic_set_queues(struct fbnic_net *fbn, struct ethtool_channels *ch, + unsigned int max_napis) +{ + fbn->num_rx_queues = ch->rx_count + ch->combined_count; + fbn->num_tx_queues = ch->tx_count + ch->combined_count; + fbn->num_napi = min(ch->rx_count + ch->tx_count + ch->combined_count, + max_napis); +} + +static int fbnic_set_channels(struct net_device *netdev, + struct ethtool_channels *ch) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + unsigned int max_napis, standalone; + struct fbnic_dev *fbd = fbn->fbd; + + max_napis = fbd->num_irqs - FBNIC_NON_NAPI_VECTORS; + standalone = ch->rx_count + ch->tx_count; + + /* Limits for standalone queues: + * - each queue has it's own NAPI (num_napi >= rx + tx + combined) + * - combining queues (combined not 0, rx or tx must be 0) + */ + if ((ch->rx_count && ch->tx_count && ch->combined_count) || + (standalone && standalone + ch->combined_count > max_napis) || + ch->rx_count + ch->combined_count > fbd->max_num_queues || + ch->tx_count + ch->combined_count > fbd->max_num_queues || + ch->other_count != FBNIC_NON_NAPI_VECTORS) + return -EINVAL; + + if (!netif_running(netdev)) { + fbnic_set_queues(fbn, ch, max_napis); + fbnic_reset_indir_tbl(fbn); + return 0; + } + + return -EBUSY; +} + static int fbnic_get_ts_info(struct net_device *netdev, struct kernel_ethtool_ts_info *tsinfo) @@ -417,6 +479,8 @@ static const struct ethtool_ops fbnic_ethtool_ops = { .get_rxfh_indir_size = fbnic_get_rxfh_indir_size, .get_rxfh = fbnic_get_rxfh, .set_rxfh = fbnic_set_rxfh, + .get_channels = fbnic_get_channels, + .set_channels = fbnic_set_channels, .get_ts_info = fbnic_get_ts_info, .get_ts_stats = fbnic_get_ts_stats, .get_eth_mac_stats = fbnic_get_eth_mac_stats, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c index b99c890ac43f..c25bd300b902 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c @@ -13,6 +13,9 @@ void fbnic_reset_indir_tbl(struct fbnic_net *fbn) unsigned int num_rx = fbn->num_rx_queues; unsigned int i; + if (netif_is_rxfh_configured(fbn->netdev)) + return; + for (i = 0; i < FBNIC_RPC_RSS_TBL_SIZE; i++) fbn->indir_tbl[0][i] = ethtool_rxfh_indir_default(i, num_rx); } From 52dc722db0d98bcdf40927dd1719468f7d08bd59 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 19 Dec 2024 18:52:41 -0800 Subject: [PATCH 0640/1386] eth: fbnic: support ring channel set while up Implement the channel count changes. Copy the netdev priv, allocate new channels using it. Stop, swap, start. Then free the copy of the priv along with the channels it holds, which are now the channels that used to be on the real priv. Reviewed-by: Przemek Kitszel Link: https://patch.msgid.link/20241220025241.1522781-11-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/meta/fbnic/fbnic.h | 1 + .../net/ethernet/meta/fbnic/fbnic_ethtool.c | 121 +++++++++++++++++- drivers/net/ethernet/meta/fbnic/fbnic_irq.c | 11 ++ .../net/ethernet/meta/fbnic/fbnic_netdev.h | 1 + drivers/net/ethernet/meta/fbnic/fbnic_pci.c | 2 +- drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 8 +- drivers/net/ethernet/meta/fbnic/fbnic_txrx.h | 5 + 7 files changed, 143 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index ed527209b30c..14751f16e125 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -162,6 +162,7 @@ int fbnic_napi_request_irq(struct fbnic_dev *fbd, struct fbnic_napi_vector *nv); void fbnic_napi_free_irq(struct fbnic_dev *fbd, struct fbnic_napi_vector *nv); +void fbnic_synchronize_irq(struct fbnic_dev *fbd, int nr); int fbnic_request_irq(struct fbnic_dev *dev, int nr, irq_handler_t handler, unsigned long flags, const char *name, void *data); void fbnic_free_irq(struct fbnic_dev *dev, int nr, void *data); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index d2fe97ae6a71..20cd9f5f89e2 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -65,6 +65,76 @@ static void fbnic_get_regs(struct net_device *netdev, fbnic_csr_get_regs(fbn->fbd, data, ®s->version); } +static struct fbnic_net *fbnic_clone_create(struct fbnic_net *orig) +{ + struct fbnic_net *clone; + + clone = kmemdup(orig, sizeof(*orig), GFP_KERNEL); + if (!clone) + return NULL; + + memset(clone->tx, 0, sizeof(clone->tx)); + memset(clone->rx, 0, sizeof(clone->rx)); + memset(clone->napi, 0, sizeof(clone->napi)); + return clone; +} + +static void fbnic_clone_swap_cfg(struct fbnic_net *orig, + struct fbnic_net *clone) +{ + swap(clone->rcq_size, orig->rcq_size); + swap(clone->hpq_size, orig->hpq_size); + swap(clone->ppq_size, orig->ppq_size); + swap(clone->txq_size, orig->txq_size); + swap(clone->num_rx_queues, orig->num_rx_queues); + swap(clone->num_tx_queues, orig->num_tx_queues); + swap(clone->num_napi, orig->num_napi); +} + +static void fbnic_aggregate_vector_counters(struct fbnic_net *fbn, + struct fbnic_napi_vector *nv) +{ + int i, j; + + for (i = 0; i < nv->txt_count; i++) { + fbnic_aggregate_ring_tx_counters(fbn, &nv->qt[i].sub0); + fbnic_aggregate_ring_tx_counters(fbn, &nv->qt[i].sub1); + fbnic_aggregate_ring_tx_counters(fbn, &nv->qt[i].cmpl); + } + + for (j = 0; j < nv->rxt_count; j++, i++) { + fbnic_aggregate_ring_rx_counters(fbn, &nv->qt[i].sub0); + fbnic_aggregate_ring_rx_counters(fbn, &nv->qt[i].sub1); + fbnic_aggregate_ring_rx_counters(fbn, &nv->qt[i].cmpl); + } +} + +static void fbnic_clone_swap(struct fbnic_net *orig, + struct fbnic_net *clone) +{ + struct fbnic_dev *fbd = orig->fbd; + unsigned int i; + + for (i = 0; i < max(clone->num_napi, orig->num_napi); i++) + fbnic_synchronize_irq(fbd, FBNIC_NON_NAPI_VECTORS + i); + for (i = 0; i < orig->num_napi; i++) + fbnic_aggregate_vector_counters(orig, orig->napi[i]); + + fbnic_clone_swap_cfg(orig, clone); + + for (i = 0; i < ARRAY_SIZE(orig->napi); i++) + swap(clone->napi[i], orig->napi[i]); + for (i = 0; i < ARRAY_SIZE(orig->tx); i++) + swap(clone->tx[i], orig->tx[i]); + for (i = 0; i < ARRAY_SIZE(orig->rx); i++) + swap(clone->rx[i], orig->rx[i]); +} + +static void fbnic_clone_free(struct fbnic_net *clone) +{ + kfree(clone); +} + static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data) { int i; @@ -342,6 +412,8 @@ static int fbnic_set_channels(struct net_device *netdev, struct fbnic_net *fbn = netdev_priv(netdev); unsigned int max_napis, standalone; struct fbnic_dev *fbd = fbn->fbd; + struct fbnic_net *clone; + int err; max_napis = fbd->num_irqs - FBNIC_NON_NAPI_VECTORS; standalone = ch->rx_count + ch->tx_count; @@ -363,7 +435,54 @@ static int fbnic_set_channels(struct net_device *netdev, return 0; } - return -EBUSY; + clone = fbnic_clone_create(fbn); + if (!clone) + return -ENOMEM; + + fbnic_set_queues(clone, ch, max_napis); + + err = fbnic_alloc_napi_vectors(clone); + if (err) + goto err_free_clone; + + err = fbnic_alloc_resources(clone); + if (err) + goto err_free_napis; + + fbnic_down_noidle(fbn); + err = fbnic_wait_all_queues_idle(fbn->fbd, true); + if (err) + goto err_start_stack; + + err = fbnic_set_netif_queues(clone); + if (err) + goto err_start_stack; + + /* Nothing can fail past this point */ + fbnic_flush(fbn); + + fbnic_clone_swap(fbn, clone); + + /* Reset RSS indirection table */ + fbnic_reset_indir_tbl(fbn); + + fbnic_up(fbn); + + fbnic_free_resources(clone); + fbnic_free_napi_vectors(clone); + fbnic_clone_free(clone); + + return 0; + +err_start_stack: + fbnic_flush(fbn); + fbnic_up(fbn); + fbnic_free_resources(clone); +err_free_napis: + fbnic_free_napi_vectors(clone); +err_free_clone: + fbnic_clone_free(clone); + return err; } static int diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c index a8ea7b6774a8..1bbc0e56f3a0 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_irq.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_irq.c @@ -146,6 +146,17 @@ void fbnic_pcs_irq_disable(struct fbnic_dev *fbd) free_irq(fbd->pcs_msix_vector, fbd); } +void fbnic_synchronize_irq(struct fbnic_dev *fbd, int nr) +{ + struct pci_dev *pdev = to_pci_dev(fbd->dev); + int irq = pci_irq_vector(pdev, nr); + + if (irq < 0) + return; + + synchronize_irq(irq); +} + int fbnic_request_irq(struct fbnic_dev *fbd, int nr, irq_handler_t handler, unsigned long flags, const char *name, void *data) { diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h index 0986c8f120a8..a392ac1cc4f2 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h @@ -65,6 +65,7 @@ struct fbnic_net { int __fbnic_open(struct fbnic_net *fbn); void fbnic_up(struct fbnic_net *fbn); void fbnic_down(struct fbnic_net *fbn); +void fbnic_down_noidle(struct fbnic_net *fbn); struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd); void fbnic_netdev_free(struct fbnic_dev *fbd); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index 32702dc4a066..6cbbc2ee3e1f 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -145,7 +145,7 @@ void fbnic_up(struct fbnic_net *fbn) fbnic_service_task_start(fbn); } -static void fbnic_down_noidle(struct fbnic_net *fbn) +void fbnic_down_noidle(struct fbnic_net *fbn) { fbnic_service_task_stop(fbn); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index 92fc1ad6ed6f..bb54ce5f5787 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -1045,8 +1045,8 @@ irqreturn_t fbnic_msix_clean_rings(int __always_unused irq, void *data) return IRQ_HANDLED; } -static void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, - struct fbnic_ring *rxr) +void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, + struct fbnic_ring *rxr) { struct fbnic_queue_stats *stats = &rxr->stats; @@ -1056,8 +1056,8 @@ static void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, fbn->rx_stats.dropped += stats->dropped; } -static void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, - struct fbnic_ring *txr) +void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, + struct fbnic_ring *txr) { struct fbnic_queue_stats *stats = &txr->stats; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h index 92c671135ad7..c2a94f31f71b 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h @@ -120,6 +120,11 @@ netdev_features_t fbnic_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); +void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, + struct fbnic_ring *rxr); +void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, + struct fbnic_ring *txr); + int fbnic_alloc_napi_vectors(struct fbnic_net *fbn); void fbnic_free_napi_vectors(struct fbnic_net *fbn); int fbnic_alloc_resources(struct fbnic_net *fbn); From 4c61d809cf608842112c77880f50810a564cd9cb Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Fri, 20 Dec 2024 09:37:40 +0100 Subject: [PATCH 0641/1386] net: ethtool: Fix suspicious rcu_dereference usage The __ethtool_get_ts_info function can be called with or without the rtnl lock held. When the rtnl lock is not held, using rtnl_dereference() triggers a warning due to the lack of lock context. Add an rcu_read_lock() to ensure the lock is acquired and to maintain synchronization. Reported-by: syzbot+a344326c05c98ba19682@syzkaller.appspotmail.com Closes: https://lore.kernel.org/netdev/676147f8.050a0220.37aaf.0154.GAE@google.com/ Fixes: b9e3f7dc9ed9 ("net: ethtool: tsinfo: Enhance tsinfo to support several hwtstamp by net topology") Signed-off-by: Kory Maincent Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20241220083741.175329-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- net/ethtool/common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 02f941f667dd..2607aea1fbfb 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -870,7 +870,8 @@ int __ethtool_get_ts_info(struct net_device *dev, { struct hwtstamp_provider *hwprov; - hwprov = rtnl_dereference(dev->hwprov); + rcu_read_lock(); + hwprov = rcu_dereference(dev->hwprov); /* No provider specified, use default behavior */ if (!hwprov) { const struct ethtool_ops *ops = dev->ethtool_ops; @@ -887,9 +888,11 @@ int __ethtool_get_ts_info(struct net_device *dev, info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; + rcu_read_unlock(); return err; } + rcu_read_unlock(); return ethtool_get_ts_info_by_phc(dev, info, &hwprov->desc); } From c71b59690aa12daf3edbb4dd02b8821490dc727e Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 20 Dec 2024 14:48:40 +0100 Subject: [PATCH 0642/1386] net: sparx5: do some preparation work The sparx5_port_init() does initial configuration of a variety of different features and options for each port. Some are shared for all types of devices, some are not. As it is now, common configuration is done after configuration of low-speed devices. This will not work when adding RGMII support in a subsequent patch. In preparation for lan969x RGMII support, move a block of code, that configures 2g5 devices, down. This ensures that the configuration common to all devices is done before configuration of 2g5, 5g, 10g and 25g devices. Reviewed-by: Steen Hegelund Reviewed-by: Horatiu Vultur Tested-by: Robert Marko Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20241220-sparx5-lan969x-switch-driver-4-v5-1-fa8ba5dff732@microchip.com Signed-off-by: Jakub Kicinski --- .../ethernet/microchip/sparx5/sparx5_port.c | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c index f9d1a6bb9bff..f39bf4878e11 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c @@ -1067,24 +1067,6 @@ int sparx5_port_init(struct sparx5 *sparx5, if (err) return err; - /* Configure MAC vlan awareness */ - err = sparx5_port_max_tags_set(sparx5, port); - if (err) - return err; - - /* Set Max Length */ - spx5_rmw(DEV2G5_MAC_MAXLEN_CFG_MAX_LEN_SET(ETH_MAXLEN), - DEV2G5_MAC_MAXLEN_CFG_MAX_LEN, - sparx5, - DEV2G5_MAC_MAXLEN_CFG(port->portno)); - - /* 1G/2G5: Signal Detect configuration */ - spx5_wr(DEV2G5_PCS1G_SD_CFG_SD_POL_SET(sd_pol) | - DEV2G5_PCS1G_SD_CFG_SD_SEL_SET(sd_sel) | - DEV2G5_PCS1G_SD_CFG_SD_ENA_SET(sd_ena), - sparx5, - DEV2G5_PCS1G_SD_CFG(port->portno)); - /* Set Pause WM hysteresis */ spx5_rmw(QSYS_PAUSE_CFG_PAUSE_START_SET(pause_start) | QSYS_PAUSE_CFG_PAUSE_STOP_SET(pause_stop) | @@ -1108,6 +1090,24 @@ int sparx5_port_init(struct sparx5 *sparx5, ANA_CL_FILTER_CTRL_FILTER_SMAC_MC_DIS, sparx5, ANA_CL_FILTER_CTRL(port->portno)); + /* Configure MAC vlan awareness */ + err = sparx5_port_max_tags_set(sparx5, port); + if (err) + return err; + + /* Set Max Length */ + spx5_rmw(DEV2G5_MAC_MAXLEN_CFG_MAX_LEN_SET(ETH_MAXLEN), + DEV2G5_MAC_MAXLEN_CFG_MAX_LEN, + sparx5, + DEV2G5_MAC_MAXLEN_CFG(port->portno)); + + /* 1G/2G5: Signal Detect configuration */ + spx5_wr(DEV2G5_PCS1G_SD_CFG_SD_POL_SET(sd_pol) | + DEV2G5_PCS1G_SD_CFG_SD_SEL_SET(sd_sel) | + DEV2G5_PCS1G_SD_CFG_SD_ENA_SET(sd_ena), + sparx5, + DEV2G5_PCS1G_SD_CFG(port->portno)); + if (conf->portmode == PHY_INTERFACE_MODE_QSGMII || conf->portmode == PHY_INTERFACE_MODE_SGMII) { err = sparx5_serdes_set(sparx5, port, conf); From dd2baee1084034b8666290cbcc02cb32fe5a8666 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 20 Dec 2024 14:48:41 +0100 Subject: [PATCH 0643/1386] net: sparx5: add function for RGMII port check The lan969x device contains two RGMII port interfaces, sitting at port 28 and 29. Add function: is_port_rgmii() to the match data ops, that checks if a given port is an RGMII port or not. For Sparx5, this function always returns false. Reviewed-by: Steen Hegelund Reviewed-by: Horatiu Vultur Tested-by: Robert Marko Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20241220-sparx5-lan969x-switch-driver-4-v5-2-fa8ba5dff732@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.c | 1 + drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.h | 5 +++++ drivers/net/ethernet/microchip/sparx5/sparx5_main.c | 1 + drivers/net/ethernet/microchip/sparx5/sparx5_main.h | 1 + drivers/net/ethernet/microchip/sparx5/sparx5_port.h | 5 +++++ 5 files changed, 13 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.c b/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.c index c2afa2176b08..76f0c8635eb9 100644 --- a/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.c +++ b/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.c @@ -329,6 +329,7 @@ static const struct sparx5_ops lan969x_ops = { .is_port_5g = &lan969x_port_is_5g, .is_port_10g = &lan969x_port_is_10g, .is_port_25g = &lan969x_port_is_25g, + .is_port_rgmii = &lan969x_port_is_rgmii, .get_port_dev_index = &lan969x_port_dev_mapping, .get_port_dev_bit = &lan969x_get_dev_mode_bit, .get_hsch_max_group_rate = &lan969x_get_hsch_max_group_rate, diff --git a/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.h b/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.h index 2489d0d32dfd..4b91c47d6d21 100644 --- a/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.h +++ b/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.h @@ -59,6 +59,11 @@ static inline bool lan969x_port_is_25g(int portno) return false; } +static inline bool lan969x_port_is_rgmii(int portno) +{ + return portno == 28 || portno == 29; +} + /* lan969x_calendar.c */ int lan969x_dsm_calendar_calc(struct sparx5 *sparx5, u32 taxi, struct sparx5_calendar_data *data); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index f61aa15beab7..4be717ba7d37 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -1072,6 +1072,7 @@ static const struct sparx5_ops sparx5_ops = { .is_port_5g = &sparx5_port_is_5g, .is_port_10g = &sparx5_port_is_10g, .is_port_25g = &sparx5_port_is_25g, + .is_port_rgmii = &sparx5_port_is_rgmii, .get_port_dev_index = &sparx5_port_dev_mapping, .get_port_dev_bit = &sparx5_port_dev_mapping, .get_hsch_max_group_rate = &sparx5_get_hsch_max_group_rate, diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index d5dd953b0a71..c58d7841638e 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -313,6 +313,7 @@ struct sparx5_ops { bool (*is_port_5g)(int portno); bool (*is_port_10g)(int portno); bool (*is_port_25g)(int portno); + bool (*is_port_rgmii)(int portno); u32 (*get_port_dev_index)(struct sparx5 *sparx5, int port); u32 (*get_port_dev_bit)(struct sparx5 *sparx5, int port); u32 (*get_hsch_max_group_rate)(int grp); diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.h b/drivers/net/ethernet/microchip/sparx5/sparx5_port.h index 9b9bcc6834bc..c8a37468a3d1 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.h @@ -40,6 +40,11 @@ static inline bool sparx5_port_is_25g(int portno) return portno >= 56 && portno <= 63; } +static inline bool sparx5_port_is_rgmii(int portno) +{ + return false; +} + static inline u32 sparx5_to_high_dev(struct sparx5 *sparx5, int port) { const struct sparx5_ops *ops = sparx5->data->ops; From 05bda8a1bdedd5dfc21522d732c3bf9413d70eb3 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 20 Dec 2024 14:48:42 +0100 Subject: [PATCH 0644/1386] net: sparx5: use is_port_rgmii() throughout Now that we can check if a given port is an RGMII port, use it in the following cases: - To set RGMII PHY modes for RGMII port devices. - To avoid checking for a SerDes node in the devicetree, when the port is an RGMII port. - To bail out of sparx5_port_init() when the common configuration is done. Reviewed-by: Steen Hegelund Reviewed-by: Horatiu Vultur Tested-by: Robert Marko Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20241220-sparx5-lan969x-switch-driver-4-v5-3-fa8ba5dff732@microchip.com Signed-off-by: Jakub Kicinski --- .../ethernet/microchip/sparx5/sparx5_main.c | 28 +++++++++++++------ .../ethernet/microchip/sparx5/sparx5_port.c | 3 ++ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index 4be717ba7d37..e68277c38adc 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -313,10 +313,13 @@ static int sparx5_create_port(struct sparx5 *sparx5, struct initial_port_config *config) { struct sparx5_port *spx5_port; + const struct sparx5_ops *ops; struct net_device *ndev; struct phylink *phylink; int err; + ops = sparx5->data->ops; + ndev = sparx5_create_netdev(sparx5, config->portno); if (IS_ERR(ndev)) { dev_err(sparx5->dev, "Could not create net device: %02u\n", @@ -357,6 +360,9 @@ static int sparx5_create_port(struct sparx5 *sparx5, MAC_SYM_PAUSE | MAC_10 | MAC_100 | MAC_1000FD | MAC_2500FD | MAC_5000FD | MAC_10000FD | MAC_25000FD; + if (ops->is_port_rgmii(spx5_port->portno)) + phy_interface_set_rgmii(spx5_port->phylink_config.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_SGMII, spx5_port->phylink_config.supported_interfaces); __set_bit(PHY_INTERFACE_MODE_QSGMII, @@ -830,6 +836,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) struct initial_port_config *configs, *config; struct device_node *np = pdev->dev.of_node; struct device_node *ports, *portnp; + const struct sparx5_ops *ops; struct reset_control *reset; struct sparx5 *sparx5; int idx = 0, err = 0; @@ -851,6 +858,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) return -EINVAL; regs = sparx5->data->regs; + ops = sparx5->data->ops; /* Do switch core reset if available */ reset = devm_reset_control_get_optional_shared(&pdev->dev, "switch"); @@ -880,7 +888,7 @@ static int mchp_sparx5_probe(struct platform_device *pdev) for_each_available_child_of_node(ports, portnp) { struct sparx5_port_config *conf; - struct phy *serdes; + struct phy *serdes = NULL; u32 portno; err = of_property_read_u32(portnp, "reg", &portno); @@ -910,13 +918,17 @@ static int mchp_sparx5_probe(struct platform_device *pdev) conf->sd_sgpio = ~0; else sparx5->sd_sgpio_remapping = true; - serdes = devm_of_phy_get(sparx5->dev, portnp, NULL); - if (IS_ERR(serdes)) { - err = dev_err_probe(sparx5->dev, PTR_ERR(serdes), - "port %u: missing serdes\n", - portno); - of_node_put(portnp); - goto cleanup_config; + /* There is no SerDes node for RGMII ports. */ + if (!ops->is_port_rgmii(portno)) { + serdes = devm_of_phy_get(sparx5->dev, portnp, NULL); + if (IS_ERR(serdes)) { + err = dev_err_probe(sparx5->dev, + PTR_ERR(serdes), + "port %u: missing serdes\n", + portno); + of_node_put(portnp); + goto cleanup_config; + } } config->portno = portno; config->node = portnp; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c index f39bf4878e11..996dc4343019 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c @@ -1090,6 +1090,9 @@ int sparx5_port_init(struct sparx5 *sparx5, ANA_CL_FILTER_CTRL_FILTER_SMAC_MC_DIS, sparx5, ANA_CL_FILTER_CTRL(port->portno)); + if (ops->is_port_rgmii(port->portno)) + return 0; /* RGMII device - nothing more to configure */ + /* Configure MAC vlan awareness */ err = sparx5_port_max_tags_set(sparx5, port); if (err) From d9450934f915a97b09f035866acd5da302f0dc12 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 20 Dec 2024 14:48:43 +0100 Subject: [PATCH 0645/1386] net: sparx5: skip low-speed configuration when port is RGMII When doing a port config, we configure low-speed port devices, among other things. We have a check to ensure, that the device is indeed a low-speed device, an not a high-speed device. Add an additional check, to ensure that the device is not an RGMII device. Reviewed-by: Steen Hegelund Reviewed-by: Horatiu Vultur Tested-by: Robert Marko Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20241220-sparx5-lan969x-switch-driver-4-v5-4-fa8ba5dff732@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_port.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c index 996dc4343019..0a1374422ccb 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c @@ -994,6 +994,7 @@ int sparx5_port_config(struct sparx5 *sparx5, struct sparx5_port *port, struct sparx5_port_config *conf) { + bool rgmii = phy_interface_mode_is_rgmii(conf->phy_mode); bool high_speed_dev = sparx5_is_baser(conf->portmode); const struct sparx5_ops *ops = sparx5->data->ops; int err, urgency, stop_wm; @@ -1003,7 +1004,7 @@ int sparx5_port_config(struct sparx5 *sparx5, return err; /* high speed device is already configured */ - if (!high_speed_dev) + if (!rgmii && !high_speed_dev) sparx5_port_config_low_set(sparx5, port, conf); /* Configure flow control */ From 9b8d70ecfef7abcabe265be4faeb07e552383520 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 20 Dec 2024 14:48:44 +0100 Subject: [PATCH 0646/1386] net: sparx5: only return PCS for modes that require it The RGMII ports have no PCS to configure. Make sure we only return the PCS for port modes that require it. Reviewed-by: Russell King (Oracle) Tested-by: Robert Marko Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20241220-sparx5-lan969x-switch-driver-4-v5-5-fa8ba5dff732@microchip.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/microchip/sparx5/sparx5_phylink.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c index f8562c1a894d..035d2f1bea0d 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_phylink.c @@ -32,7 +32,19 @@ sparx5_phylink_mac_select_pcs(struct phylink_config *config, { struct sparx5_port *port = netdev_priv(to_net_dev(config->dev)); - return &port->phylink_pcs; + /* Return the PCS for all the modes that require it. */ + switch (interface) { + case PHY_INTERFACE_MODE_SGMII: + case PHY_INTERFACE_MODE_QSGMII: + case PHY_INTERFACE_MODE_1000BASEX: + case PHY_INTERFACE_MODE_2500BASEX: + case PHY_INTERFACE_MODE_5GBASER: + case PHY_INTERFACE_MODE_10GBASER: + case PHY_INTERFACE_MODE_25GBASER: + return &port->phylink_pcs; + default: + return NULL; + } } static void sparx5_phylink_mac_config(struct phylink_config *config, From 95e467b85e6930d34093b7770c7ed964113589b0 Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 20 Dec 2024 14:48:45 +0100 Subject: [PATCH 0647/1386] net: sparx5: verify RGMII speeds When doing a port config, we verify the port speed against the PHY mode and supported speeds of that PHY mode. Add checks for the four RGMII phy modes: RGMII, RGMII_ID, RGMII_TXID and RGMII_RXID. Reviewed-by: Steen Hegelund Reviewed-by: Horatiu Vultur Tested-by: Robert Marko Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20241220-sparx5-lan969x-switch-driver-4-v5-6-fa8ba5dff732@microchip.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microchip/sparx5/sparx5_port.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c index 0a1374422ccb..86d6c9e9ec7c 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c @@ -257,6 +257,15 @@ static int sparx5_port_verify_speed(struct sparx5 *sparx5, conf->speed != SPEED_25000)) return sparx5_port_error(port, conf, SPX5_PERR_SPEED); break; + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: + if (conf->speed != SPEED_1000 && + conf->speed != SPEED_100 && + conf->speed != SPEED_10) + return sparx5_port_error(port, conf, SPX5_PERR_SPEED); + break; default: return sparx5_port_error(port, conf, SPX5_PERR_IFTYPE); } From fb6ac1829bb5865768e75517aefb416a3a19569e Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 20 Dec 2024 14:48:46 +0100 Subject: [PATCH 0648/1386] net: lan969x: add RGMII registers Configuration of RGMII is done by configuring the GPIO and clock settings in the HSIOWRAP target, and configuring the RGMII port devices in the DEVRGMII target. Both targets contain registers replicated for the number of RGMII port devices, which is two. Add said targets and register macros required to configure RGMII. Reviewed-by: Steen Hegelund Reviewed-by: Horatiu Vultur Tested-by: Robert Marko Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20241220-sparx5-lan969x-switch-driver-4-v5-7-fa8ba5dff732@microchip.com Signed-off-by: Jakub Kicinski --- .../microchip/sparx5/lan969x/lan969x.c | 3 + .../microchip/sparx5/sparx5_main_regs.h | 145 ++++++++++++++++++ 2 files changed, 148 insertions(+) diff --git a/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.c b/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.c index 76f0c8635eb9..be49a99556fe 100644 --- a/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.c +++ b/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.c @@ -90,9 +90,12 @@ static const struct sparx5_main_io_resource lan969x_main_iomap[] = { { TARGET_DEV2G5 + 27, 0x30d8000, 1 }, /* 0xe30d8000 */ { TARGET_DEV10G + 9, 0x30dc000, 1 }, /* 0xe30dc000 */ { TARGET_PCS10G_BR + 9, 0x30e0000, 1 }, /* 0xe30e0000 */ + { TARGET_DEVRGMII, 0x30e4000, 1 }, /* 0xe30e4000 */ + { TARGET_DEVRGMII + 1, 0x30e8000, 1 }, /* 0xe30e8000 */ { TARGET_DSM, 0x30ec000, 1 }, /* 0xe30ec000 */ { TARGET_PORT_CONF, 0x30f0000, 1 }, /* 0xe30f0000 */ { TARGET_ASM, 0x3200000, 1 }, /* 0xe3200000 */ + { TARGET_HSIO_WRAP, 0x3408000, 1 }, /* 0xe3408000 */ }; static struct sparx5_sdlb_group lan969x_sdlb_groups[LAN969X_SDLB_GRP_CNT] = { diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h index 561344f19062..d9ef4ef137b8 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main_regs.h @@ -37,6 +37,7 @@ enum sparx5_target { TARGET_FDMA = 117, TARGET_GCB = 118, TARGET_HSCH = 119, + TARGET_HSIO_WRAP = 120, TARGET_LRN = 122, TARGET_PCEP = 129, TARGET_PCS10G_BR = 132, @@ -54,6 +55,7 @@ enum sparx5_target { TARGET_VCAP_SUPER = 326, TARGET_VOP = 327, TARGET_XQS = 331, + TARGET_DEVRGMII = 392, NUM_TARGETS = 517 }; @@ -5367,6 +5369,69 @@ extern const struct sparx5_regs *regs; #define HSCH_TAS_STATEMACHINE_CFG_REVISIT_DLY_GET(x)\ FIELD_GET(HSCH_TAS_STATEMACHINE_CFG_REVISIT_DLY, x) +/* LAN969X ONLY */ +/* HSIOWRAP:XMII_CFG:XMII_CFG */ +#define HSIO_WRAP_XMII_CFG(g) \ + __REG(TARGET_HSIO_WRAP, 0, 1, 116, g, 2, 20, 0, 0, 1, 4) + +#define HSIO_WRAP_XMII_CFG_GPIO_XMII_CFG GENMASK(2, 1) +#define HSIO_WRAP_XMII_CFG_GPIO_XMII_CFG_SET(x)\ + FIELD_PREP(HSIO_WRAP_XMII_CFG_GPIO_XMII_CFG, x) +#define HSIO_WRAP_XMII_CFG_GPIO_XMII_CFG_GET(x)\ + FIELD_GET(HSIO_WRAP_XMII_CFG_GPIO_XMII_CFG, x) + +/* LAN969X ONLY */ +/* HSIOWRAP:XMII_CFG:RGMII_CFG */ +#define HSIO_WRAP_RGMII_CFG(g) \ + __REG(TARGET_HSIO_WRAP, 0, 1, 116, g, 2, 20, 4, 0, 1, 4) + +#define HSIO_WRAP_RGMII_CFG_TX_CLK_CFG GENMASK(4, 2) +#define HSIO_WRAP_RGMII_CFG_TX_CLK_CFG_SET(x)\ + FIELD_PREP(HSIO_WRAP_RGMII_CFG_TX_CLK_CFG, x) +#define HSIO_WRAP_RGMII_CFG_TX_CLK_CFG_GET(x)\ + FIELD_GET(HSIO_WRAP_RGMII_CFG_TX_CLK_CFG, x) + +#define HSIO_WRAP_RGMII_CFG_RGMII_TX_RST BIT(1) +#define HSIO_WRAP_RGMII_CFG_RGMII_TX_RST_SET(x)\ + FIELD_PREP(HSIO_WRAP_RGMII_CFG_RGMII_TX_RST, x) +#define HSIO_WRAP_RGMII_CFG_RGMII_TX_RST_GET(x)\ + FIELD_GET(HSIO_WRAP_RGMII_CFG_RGMII_TX_RST, x) + +#define HSIO_WRAP_RGMII_CFG_RGMII_RX_RST BIT(0) +#define HSIO_WRAP_RGMII_CFG_RGMII_RX_RST_SET(x)\ + FIELD_PREP(HSIO_WRAP_RGMII_CFG_RGMII_RX_RST, x) +#define HSIO_WRAP_RGMII_CFG_RGMII_RX_RST_GET(x)\ + FIELD_GET(HSIO_WRAP_RGMII_CFG_RGMII_RX_RST, x) + +/* LAN969X ONLY */ +/* HSIOWRAP:XMII_CFG:DLL_CFG */ +#define HSIO_WRAP_DLL_CFG(g, r) \ + __REG(TARGET_HSIO_WRAP, 0, 1, 116, g, 2, 20, 12, r, 2, 4) + +#define HSIO_WRAP_DLL_CFG_DLL_ENA BIT(19) +#define HSIO_WRAP_DLL_CFG_DLL_ENA_SET(x)\ + FIELD_PREP(HSIO_WRAP_DLL_CFG_DLL_ENA, x) +#define HSIO_WRAP_DLL_CFG_DLL_ENA_GET(x)\ + FIELD_GET(HSIO_WRAP_DLL_CFG_DLL_ENA, x) + +#define HSIO_WRAP_DLL_CFG_DLL_CLK_ENA BIT(18) +#define HSIO_WRAP_DLL_CFG_DLL_CLK_ENA_SET(x)\ + FIELD_PREP(HSIO_WRAP_DLL_CFG_DLL_CLK_ENA, x) +#define HSIO_WRAP_DLL_CFG_DLL_CLK_ENA_GET(x)\ + FIELD_GET(HSIO_WRAP_DLL_CFG_DLL_CLK_ENA, x) + +#define HSIO_WRAP_DLL_CFG_DLL_CLK_SEL GENMASK(17, 15) +#define HSIO_WRAP_DLL_CFG_DLL_CLK_SEL_SET(x)\ + FIELD_PREP(HSIO_WRAP_DLL_CFG_DLL_CLK_SEL, x) +#define HSIO_WRAP_DLL_CFG_DLL_CLK_SEL_GET(x)\ + FIELD_GET(HSIO_WRAP_DLL_CFG_DLL_CLK_SEL, x) + +#define HSIO_WRAP_DLL_CFG_DLL_RST BIT(0) +#define HSIO_WRAP_DLL_CFG_DLL_RST_SET(x)\ + FIELD_PREP(HSIO_WRAP_DLL_CFG_DLL_RST, x) +#define HSIO_WRAP_DLL_CFG_DLL_RST_GET(x)\ + FIELD_GET(HSIO_WRAP_DLL_CFG_DLL_RST, x) + /* LRN:COMMON:COMMON_ACCESS_CTRL */ #define LRN_COMMON_ACCESS_CTRL \ __REG(TARGET_LRN, 0, 1, 0, 0, 1, 72, 0, 0, 1, 4) @@ -8110,4 +8175,84 @@ extern const struct sparx5_regs *regs; #define XQS_CNT(g) \ __REG(TARGET_XQS, 0, 1, 0, g, 1024, 4, 0, 0, 1, 4) +/* LAN969X ONLY */ +/* DEV1G:DEV_CFG_STATUS:DEV_RST_CTRL */ +#define DEVRGMII_DEV_RST_CTRL(t) \ + __REG(TARGET_DEVRGMII, t, 2, 0, 0, 1, 36, 0, 0, 1, 4) + +#define DEVRGMII_DEV_RST_CTRL_SPEED_SEL GENMASK(22, 20) +#define DEVRGMII_DEV_RST_CTRL_SPEED_SEL_SET(x)\ + FIELD_PREP(DEVRGMII_DEV_RST_CTRL_SPEED_SEL, x) +#define DEVRGMII_DEV_RST_CTRL_SPEED_SEL_GET(x)\ + FIELD_GET(DEVRGMII_DEV_RST_CTRL_SPEED_SEL, x) + +/* LAN969X ONLY */ +/* DEV1G:MAC_CFG_STATUS:MAC_ENA_CFG */ +#define DEVRGMII_MAC_ENA_CFG(t) \ + __REG(TARGET_DEVRGMII, t, 2, 36, 0, 1, 36, 0, 0, 1, 4) + +#define DEVRGMII_MAC_ENA_CFG_RX_ENA BIT(4) +#define DEVRGMII_MAC_ENA_CFG_RX_ENA_SET(x)\ + FIELD_PREP(DEVRGMII_MAC_ENA_CFG_RX_ENA, x) +#define DEVRGMII_MAC_ENA_CFG_RX_ENA_GET(x)\ + FIELD_GET(DEVRGMII_MAC_ENA_CFG_RX_ENA, x) + +#define DEVRGMII_MAC_ENA_CFG_TX_ENA BIT(0) +#define DEVRGMII_MAC_ENA_CFG_TX_ENA_SET(x)\ + FIELD_PREP(DEVRGMII_MAC_ENA_CFG_TX_ENA, x) +#define DEVRGMII_MAC_ENA_CFG_TX_ENA_GET(x)\ + FIELD_GET(DEVRGMII_MAC_ENA_CFG_TX_ENA, x) + +/* LAN969X ONLY */ +/* DEV1G:MAC_CFG_STATUS:MAC_TAGS_CFG */ +#define DEVRGMII_MAC_TAGS_CFG(t) \ + __REG(TARGET_DEVRGMII, t, 2, 36, 0, 1, 36, 12, 0, 1, 4) + +#define DEVRGMII_MAC_TAGS_CFG_TAG_ID GENMASK(31, 16) +#define DEVRGMII_MAC_TAGS_CFG_TAG_ID_SET(x)\ + FIELD_PREP(DEVRGMII_MAC_TAGS_CFG_TAG_ID, x) +#define DEVRGMII_MAC_TAGS_CFG_TAG_ID_GET(x)\ + FIELD_GET(DEVRGMII_MAC_TAGS_CFG_TAG_ID, x) + +#define DEVRGMII_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA BIT(3) +#define DEVRGMII_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA_SET(x)\ + FIELD_PREP(DEVRGMII_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA, x) +#define DEVRGMII_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA_GET(x)\ + FIELD_GET(DEVRGMII_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA, x) + +#define DEVRGMII_MAC_TAGS_CFG_PB_ENA GENMASK(2, 1) +#define DEVRGMII_MAC_TAGS_CFG_PB_ENA_SET(x)\ + FIELD_PREP(DEVRGMII_MAC_TAGS_CFG_PB_ENA, x) +#define DEVRGMII_MAC_TAGS_CFG_PB_ENA_GET(x)\ + FIELD_GET(DEVRGMII_MAC_TAGS_CFG_PB_ENA, x) + +#define DEVRGMII_MAC_TAGS_CFG_VLAN_AWR_ENA BIT(0) +#define DEVRGMII_MAC_TAGS_CFG_VLAN_AWR_ENA_SET(x)\ + FIELD_PREP(DEVRGMII_MAC_TAGS_CFG_VLAN_AWR_ENA, x) +#define DEVRGMII_MAC_TAGS_CFG_VLAN_AWR_ENA_GET(x)\ + FIELD_GET(DEVRGMII_MAC_TAGS_CFG_VLAN_AWR_ENA, x) + +/* LAN969X ONLY */ +/* DEV1G:MAC_CFG_STATUS:MAC_IFG_CFG */ +#define DEVRGMII_MAC_IFG_CFG(t) \ + __REG(TARGET_DEVRGMII, t, 2, 36, 0, 1, 36, 24, 0, 1, 4) + +#define DEVRGMII_MAC_IFG_CFG_TX_IFG GENMASK(12, 8) +#define DEVRGMII_MAC_IFG_CFG_TX_IFG_SET(x)\ + FIELD_PREP(DEVRGMII_MAC_IFG_CFG_TX_IFG, x) +#define DEVRGMII_MAC_IFG_CFG_TX_IFG_GET(x)\ + FIELD_GET(DEVRGMII_MAC_IFG_CFG_TX_IFG, x) + +#define DEVRGMII_MAC_IFG_CFG_RX_IFG2 GENMASK(7, 4) +#define DEVRGMII_MAC_IFG_CFG_RX_IFG2_SET(x)\ + FIELD_PREP(DEVRGMII_MAC_IFG_CFG_RX_IFG2, x) +#define DEVRGMII_MAC_IFG_CFG_RX_IFG2_GET(x)\ + FIELD_GET(DEVRGMII_MAC_IFG_CFG_RX_IFG2, x) + +#define DEVRGMII_MAC_IFG_CFG_RX_IFG1 GENMASK(3, 0) +#define DEVRGMII_MAC_IFG_CFG_RX_IFG1_SET(x)\ + FIELD_PREP(DEVRGMII_MAC_IFG_CFG_RX_IFG1, x) +#define DEVRGMII_MAC_IFG_CFG_RX_IFG1_GET(x)\ + FIELD_GET(DEVRGMII_MAC_IFG_CFG_RX_IFG1, x) + #endif /* _SPARX5_MAIN_REGS_H_ */ From 010fe5dff1644f60520302fd43776a54402b623f Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 20 Dec 2024 14:48:47 +0100 Subject: [PATCH 0649/1386] net: lan969x: add RGMII implementation The lan969x switch device includes two RGMII port interfaces (port 28 and 29) supporting data speeds of 1 Gbps, 100 Mbps and 10 Mbps. MAC level delays are configurable through the HSIO_WRAP target, by choosing a phase shift selector, corresponding to a certain time delay in nano seconds. Add new file: lan969x_rgmii.c that contains the implementation for configuring the RGMII port devices. MAC level delays are configured using the "{rx,tx}-internal-delay-ps" properties. These properties must be specified independently of the phy-mode. If missing, or set to zero, the MAC will not apply any delay. Reviewed-by: Steen Hegelund Reviewed-by: Horatiu Vultur Tested-by: Robert Marko Signed-off-by: Daniel Machon Link: https://patch.msgid.link/20241220-sparx5-lan969x-switch-driver-4-v5-8-fa8ba5dff732@microchip.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/microchip/sparx5/Makefile | 3 +- .../microchip/sparx5/lan969x/lan969x.c | 1 + .../microchip/sparx5/lan969x/lan969x.h | 5 + .../microchip/sparx5/lan969x/lan969x_rgmii.c | 224 ++++++++++++++++++ .../ethernet/microchip/sparx5/sparx5_main.h | 2 + .../ethernet/microchip/sparx5/sparx5_port.c | 6 + 6 files changed, 240 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/microchip/sparx5/lan969x/lan969x_rgmii.c diff --git a/drivers/net/ethernet/microchip/sparx5/Makefile b/drivers/net/ethernet/microchip/sparx5/Makefile index 4bf2a885a9da..3f34e83246a0 100644 --- a/drivers/net/ethernet/microchip/sparx5/Makefile +++ b/drivers/net/ethernet/microchip/sparx5/Makefile @@ -20,7 +20,8 @@ sparx5-switch-$(CONFIG_LAN969X_SWITCH) += lan969x/lan969x_regs.o \ lan969x/lan969x.o \ lan969x/lan969x_calendar.o \ lan969x/lan969x_vcap_ag_api.o \ - lan969x/lan969x_vcap_impl.o + lan969x/lan969x_vcap_impl.o \ + lan969x/lan969x_rgmii.o # Provide include files ccflags-y += -I$(srctree)/drivers/net/ethernet/microchip/vcap diff --git a/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.c b/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.c index be49a99556fe..396f76b6eea5 100644 --- a/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.c +++ b/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.c @@ -340,6 +340,7 @@ static const struct sparx5_ops lan969x_ops = { .set_port_mux = &lan969x_port_mux_set, .ptp_irq_handler = &lan969x_ptp_irq_handler, .dsm_calendar_calc = &lan969x_dsm_calendar_calc, + .port_config_rgmii = &lan969x_port_config_rgmii, }; const struct sparx5_match_data lan969x_desc = { diff --git a/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.h b/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.h index 4b91c47d6d21..9a7ddebecf1e 100644 --- a/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.h +++ b/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x.h @@ -67,4 +67,9 @@ static inline bool lan969x_port_is_rgmii(int portno) /* lan969x_calendar.c */ int lan969x_dsm_calendar_calc(struct sparx5 *sparx5, u32 taxi, struct sparx5_calendar_data *data); + +/* lan969x_rgmii.c */ +int lan969x_port_config_rgmii(struct sparx5_port *port, + struct sparx5_port_config *conf); + #endif diff --git a/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x_rgmii.c b/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x_rgmii.c new file mode 100644 index 000000000000..4e422ca50828 --- /dev/null +++ b/drivers/net/ethernet/microchip/sparx5/lan969x/lan969x_rgmii.c @@ -0,0 +1,224 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Microchip lan969x Switch driver + * + * Copyright (c) 2024 Microchip Technology Inc. and its subsidiaries. + */ + +#include "lan969x.h" + +/* Tx clock selectors */ +#define LAN969X_RGMII_TX_CLK_SEL_125MHZ 1 /* 1000Mbps */ +#define LAN969X_RGMII_TX_CLK_SEL_25MHZ 2 /* 100Mbps */ +#define LAN969X_RGMII_TX_CLK_SEL_2M5MHZ 3 /* 10Mbps */ + +/* Port speed selectors */ +#define LAN969X_RGMII_SPEED_SEL_10 0 /* Select 10Mbps speed */ +#define LAN969X_RGMII_SPEED_SEL_100 1 /* Select 100Mbps speed */ +#define LAN969X_RGMII_SPEED_SEL_1000 2 /* Select 1000Mbps speed */ + +/* Clock delay selectors */ +#define LAN969X_RGMII_CLK_DELAY_SEL_1_0_NS 2 /* Phase shift 45deg */ +#define LAN969X_RGMII_CLK_DELAY_SEL_1_7_NS 3 /* Phase shift 77deg */ +#define LAN969X_RGMII_CLK_DELAY_SEL_2_0_NS 4 /* Phase shift 90deg */ +#define LAN969X_RGMII_CLK_DELAY_SEL_2_5_NS 5 /* Phase shift 112deg */ +#define LAN969X_RGMII_CLK_DELAY_SEL_3_0_NS 6 /* Phase shift 135deg */ +#define LAN969X_RGMII_CLK_DELAY_SEL_3_3_NS 7 /* Phase shift 147deg */ + +#define LAN969X_RGMII_PORT_START_IDX 28 /* Index of the first RGMII port */ +#define LAN969X_RGMII_IFG_TX 4 /* TX Inter Frame Gap value */ +#define LAN969X_RGMII_IFG_RX1 5 /* RX1 Inter Frame Gap value */ +#define LAN969X_RGMII_IFG_RX2 1 /* RX2 Inter Frame Gap value */ + +#define RGMII_PORT_IDX(port) ((port)->portno - LAN969X_RGMII_PORT_START_IDX) + +/* Get the tx clock selector based on the port speed. */ +static int lan969x_rgmii_get_clk_sel(int speed) +{ + return (speed == SPEED_10 ? LAN969X_RGMII_TX_CLK_SEL_2M5MHZ : + speed == SPEED_100 ? LAN969X_RGMII_TX_CLK_SEL_25MHZ : + LAN969X_RGMII_TX_CLK_SEL_125MHZ); +} + +/* Get the port speed selector based on the port speed. */ +static int lan969x_rgmii_get_speed_sel(int speed) +{ + return (speed == SPEED_10 ? LAN969X_RGMII_SPEED_SEL_10 : + speed == SPEED_100 ? LAN969X_RGMII_SPEED_SEL_100 : + LAN969X_RGMII_SPEED_SEL_1000); +} + +/* Get the clock delay selector based on the clock delay in picoseconds. */ +static int lan969x_rgmii_get_clk_delay_sel(struct sparx5_port *port, + u32 delay_ps, u32 *clk_delay_sel) +{ + switch (delay_ps) { + case 0: + /* Hardware default selector. */ + *clk_delay_sel = LAN969X_RGMII_CLK_DELAY_SEL_2_5_NS; + break; + case 1000: + *clk_delay_sel = LAN969X_RGMII_CLK_DELAY_SEL_1_0_NS; + break; + case 1700: + *clk_delay_sel = LAN969X_RGMII_CLK_DELAY_SEL_1_7_NS; + break; + case 2000: + *clk_delay_sel = LAN969X_RGMII_CLK_DELAY_SEL_2_0_NS; + break; + case 2500: + *clk_delay_sel = LAN969X_RGMII_CLK_DELAY_SEL_2_5_NS; + break; + case 3000: + *clk_delay_sel = LAN969X_RGMII_CLK_DELAY_SEL_3_0_NS; + break; + case 3300: + *clk_delay_sel = LAN969X_RGMII_CLK_DELAY_SEL_3_3_NS; + break; + default: + dev_err(port->sparx5->dev, "Invalid RGMII delay: %u", delay_ps); + return -EINVAL; + } + + return 0; +} + +/* Configure the RGMII tx clock frequency. */ +static void lan969x_rgmii_tx_clk_config(struct sparx5_port *port, + struct sparx5_port_config *conf) +{ + u32 clk_sel = lan969x_rgmii_get_clk_sel(conf->speed); + u32 idx = RGMII_PORT_IDX(port); + + /* Take the RGMII clock domain out of reset and set tx clock + * frequency. + */ + spx5_rmw(HSIO_WRAP_RGMII_CFG_TX_CLK_CFG_SET(clk_sel) | + HSIO_WRAP_RGMII_CFG_RGMII_TX_RST_SET(0) | + HSIO_WRAP_RGMII_CFG_RGMII_RX_RST_SET(0), + HSIO_WRAP_RGMII_CFG_TX_CLK_CFG | + HSIO_WRAP_RGMII_CFG_RGMII_TX_RST | + HSIO_WRAP_RGMII_CFG_RGMII_RX_RST, + port->sparx5, HSIO_WRAP_RGMII_CFG(idx)); +} + +/* Configure the RGMII port device. */ +static void lan969x_rgmii_port_device_config(struct sparx5_port *port, + struct sparx5_port_config *conf) +{ + u32 dtag, dotag, etype, speed_sel, idx = RGMII_PORT_IDX(port); + + speed_sel = lan969x_rgmii_get_speed_sel(conf->speed); + + etype = (port->vlan_type == SPX5_VLAN_PORT_TYPE_S_CUSTOM ? + port->custom_etype : + port->vlan_type == SPX5_VLAN_PORT_TYPE_C ? + ETH_P_8021Q : ETH_P_8021AD); + + dtag = port->max_vlan_tags == SPX5_PORT_MAX_TAGS_TWO; + dotag = port->max_vlan_tags != SPX5_PORT_MAX_TAGS_NONE; + + /* Enable the MAC. */ + spx5_wr(DEVRGMII_MAC_ENA_CFG_RX_ENA_SET(1) | + DEVRGMII_MAC_ENA_CFG_TX_ENA_SET(1), + port->sparx5, DEVRGMII_MAC_ENA_CFG(idx)); + + /* Configure the Inter Frame Gap. */ + spx5_wr(DEVRGMII_MAC_IFG_CFG_TX_IFG_SET(LAN969X_RGMII_IFG_TX) | + DEVRGMII_MAC_IFG_CFG_RX_IFG1_SET(LAN969X_RGMII_IFG_RX1) | + DEVRGMII_MAC_IFG_CFG_RX_IFG2_SET(LAN969X_RGMII_IFG_RX2), + port->sparx5, DEVRGMII_MAC_IFG_CFG(idx)); + + /* Configure port data rate. */ + spx5_wr(DEVRGMII_DEV_RST_CTRL_SPEED_SEL_SET(speed_sel), + port->sparx5, DEVRGMII_DEV_RST_CTRL(idx)); + + /* Configure VLAN awareness. */ + spx5_wr(DEVRGMII_MAC_TAGS_CFG_TAG_ID_SET(etype) | + DEVRGMII_MAC_TAGS_CFG_PB_ENA_SET(dtag) | + DEVRGMII_MAC_TAGS_CFG_VLAN_AWR_ENA_SET(dotag) | + DEVRGMII_MAC_TAGS_CFG_VLAN_LEN_AWR_ENA_SET(dotag), + port->sparx5, + DEVRGMII_MAC_TAGS_CFG(idx)); +} + +/* Configure the RGMII delay lines in the MAC. + * + * We use the rx-internal-delay-ps" and "tx-internal-delay-ps" properties to + * configure the rx and tx delays for the MAC. If these properties are missing + * or set to zero, the MAC will not apply any delay. + * + * The PHY side delays are determined by the PHY mode + * (e.g. PHY_INTERFACE_MODE_RGMII_{ID, RXID, TXID}), and ignored by the MAC side + * entirely. + */ +static int lan969x_rgmii_delay_config(struct sparx5_port *port, + struct sparx5_port_config *conf) +{ + u32 tx_clk_sel, rx_clk_sel, tx_delay_ps = 0, rx_delay_ps = 0; + u32 idx = RGMII_PORT_IDX(port); + int err; + + of_property_read_u32(port->of_node, "rx-internal-delay-ps", + &rx_delay_ps); + + of_property_read_u32(port->of_node, "tx-internal-delay-ps", + &tx_delay_ps); + + err = lan969x_rgmii_get_clk_delay_sel(port, rx_delay_ps, &rx_clk_sel); + if (err) + return err; + + err = lan969x_rgmii_get_clk_delay_sel(port, tx_delay_ps, &tx_clk_sel); + if (err) + return err; + + /* Configure rx delay. */ + spx5_rmw(HSIO_WRAP_DLL_CFG_DLL_RST_SET(0) | + HSIO_WRAP_DLL_CFG_DLL_ENA_SET(1) | + HSIO_WRAP_DLL_CFG_DLL_CLK_ENA_SET(!!rx_delay_ps) | + HSIO_WRAP_DLL_CFG_DLL_CLK_SEL_SET(rx_clk_sel), + HSIO_WRAP_DLL_CFG_DLL_RST | + HSIO_WRAP_DLL_CFG_DLL_ENA | + HSIO_WRAP_DLL_CFG_DLL_CLK_ENA | + HSIO_WRAP_DLL_CFG_DLL_CLK_SEL, + port->sparx5, HSIO_WRAP_DLL_CFG(idx, 0)); + + /* Configure tx delay. */ + spx5_rmw(HSIO_WRAP_DLL_CFG_DLL_RST_SET(0) | + HSIO_WRAP_DLL_CFG_DLL_ENA_SET(1) | + HSIO_WRAP_DLL_CFG_DLL_CLK_ENA_SET(!!tx_delay_ps) | + HSIO_WRAP_DLL_CFG_DLL_CLK_SEL_SET(tx_clk_sel), + HSIO_WRAP_DLL_CFG_DLL_RST | + HSIO_WRAP_DLL_CFG_DLL_ENA | + HSIO_WRAP_DLL_CFG_DLL_CLK_ENA | + HSIO_WRAP_DLL_CFG_DLL_CLK_SEL, + port->sparx5, HSIO_WRAP_DLL_CFG(idx, 1)); + + return 0; +} + +/* Configure GPIO's to be used as RGMII interface. */ +static void lan969x_rgmii_gpio_config(struct sparx5_port *port) +{ + u32 idx = RGMII_PORT_IDX(port); + + /* Enable the RGMII on the GPIOs. */ + spx5_wr(HSIO_WRAP_XMII_CFG_GPIO_XMII_CFG_SET(1), port->sparx5, + HSIO_WRAP_XMII_CFG(!idx)); +} + +int lan969x_port_config_rgmii(struct sparx5_port *port, + struct sparx5_port_config *conf) +{ + int err; + + err = lan969x_rgmii_delay_config(port, conf); + if (err) + return err; + + lan969x_rgmii_tx_clk_config(port, conf); + lan969x_rgmii_gpio_config(port); + lan969x_rgmii_port_device_config(port, conf); + + return 0; +} diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h index c58d7841638e..3ae760da17e2 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.h +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.h @@ -324,6 +324,8 @@ struct sparx5_ops { irqreturn_t (*ptp_irq_handler)(int irq, void *args); int (*dsm_calendar_calc)(struct sparx5 *sparx5, u32 taxi, struct sparx5_calendar_data *data); + int (*port_config_rgmii)(struct sparx5_port *port, + struct sparx5_port_config *conf); }; struct sparx5_main_io_resource { diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c index 86d6c9e9ec7c..04bc8fffaf96 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_port.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_port.c @@ -1012,6 +1012,12 @@ int sparx5_port_config(struct sparx5 *sparx5, if (err) return err; + if (rgmii) { + err = ops->port_config_rgmii(port, conf); + if (err) + return err; + } + /* high speed device is already configured */ if (!rgmii && !high_speed_dev) sparx5_port_config_low_set(sparx5, port, conf); From f0706c04721becee4e0576f0c56e871c11b1e84e Mon Sep 17 00:00:00 2001 From: Daniel Machon Date: Fri, 20 Dec 2024 14:48:48 +0100 Subject: [PATCH 0650/1386] dt-bindings: net: sparx5: document RGMII delays The lan969x switch device supports two RGMII port interfaces that can be configured for MAC level rx and tx delays. Document two new properties {rx,tx}-internal-delay-ps in the bindings, used to select these delays. Tested-by: Robert Marko Reviewed-by: Rob Herring (Arm) Signed-off-by: Daniel Machon Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/20241220-sparx5-lan969x-switch-driver-4-v5-9-fa8ba5dff732@microchip.com Signed-off-by: Jakub Kicinski --- .../bindings/net/microchip,sparx5-switch.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml index dedfad526666..a73fc5036905 100644 --- a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml +++ b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml @@ -129,6 +129,24 @@ properties: minimum: 0 maximum: 383 + rx-internal-delay-ps: + description: + RGMII Receive Clock Delay defined in pico seconds, used to select + the DLL phase shift between 1000 ps (45 degree shift at 1Gbps) and + 3300 ps (147 degree shift at 1Gbps). A value of 0 ps will disable + any delay. The Default is no delay. + enum: [0, 1000, 1700, 2000, 2500, 3000, 3300] + default: 0 + + tx-internal-delay-ps: + description: + RGMII Transmit Clock Delay defined in pico seconds, used to select + the DLL phase shift between 1000 ps (45 degree shift at 1Gbps) and + 3300 ps (147 degree shift at 1Gbps). A value of 0 ps will disable + any delay. The Default is no delay. + enum: [0, 1000, 1700, 2000, 2500, 3000, 3300] + default: 0 + required: - reg - phys From f8bcfb2076331388c794e6cd1800132cac6fd965 Mon Sep 17 00:00:00 2001 From: Chih-Kang Chang Date: Thu, 19 Dec 2024 09:45:11 +0800 Subject: [PATCH 0651/1386] wifi: rtw89: adjust thermal protection step and more RTL8852BE-VT ID To have better tuning of TX duty for fields usage, adjust thermal protection step to 5, then TX duty reduce to 75% at most. Add RTL8852BE-VT ID to enable this function by the way. Signed-off-by: Chih-Kang Chang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241219014511.9056-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/core.h | 2 +- drivers/net/wireless/realtek/rtw89/rtw8852bte.c | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/realtek/rtw89/core.h b/drivers/net/wireless/realtek/rtw89/core.h index 155538370a89..37722a0b6a8b 100644 --- a/drivers/net/wireless/realtek/rtw89/core.h +++ b/drivers/net/wireless/realtek/rtw89/core.h @@ -4728,7 +4728,7 @@ enum rtw89_dm_type { }; #define RTW89_THERMAL_PROT_LV_MAX 5 -#define RTW89_THERMAL_PROT_STEP 19 /* -19% for each level */ +#define RTW89_THERMAL_PROT_STEP 5 /* -5% for each level */ struct rtw89_hal { u32 rx_fltr; diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852bte.c b/drivers/net/wireless/realtek/rtw89/rtw8852bte.c index e4f40c2e287d..d1eebecfcd73 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852bte.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852bte.c @@ -9,6 +9,12 @@ #include "reg.h" #include "rtw8852bt.h" +static const struct rtw89_pci_ssid_quirk rtw8852bt_pci_ssid_quirks[] = { + {RTW89_PCI_SSID(PCI_VENDOR_ID_REALTEK, 0xB520, 0x103C, 0x88E9, HP), + .bitmap = BIT(RTW89_QUIRK_THERMAL_PROT_110C)}, + {}, +}; + static const struct rtw89_pci_info rtw8852bt_pci_info = { .gen_def = &rtw89_pci_gen_ax, .txbd_trunc_mode = MAC_AX_BD_TRUNC, @@ -61,7 +67,7 @@ static const struct rtw89_pci_info rtw8852bt_pci_info = { .disable_intr = rtw89_pci_disable_intr, .recognize_intrs = rtw89_pci_recognize_intrs, - .ssid_quirks = NULL, + .ssid_quirks = rtw8852bt_pci_ssid_quirks, }; static const struct rtw89_driver_info rtw89_8852bte_info = { From 2927cb7b3b4ffd56d08749a673a463853f743205 Mon Sep 17 00:00:00 2001 From: Zong-Zhe Yang Date: Thu, 19 Dec 2024 09:45:45 +0800 Subject: [PATCH 0652/1386] wifi: rtw89: debug: print regd for QATAR/UK/THAILAND The regd enum was extended for these countries. But, the dbgfs doesn't recognize them yet. So, add them. Signed-off-by: Zong-Zhe Yang Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241219014545.9157-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/debug.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/debug.c b/drivers/net/wireless/realtek/rtw89/debug.c index fd86752d86f3..09fa977a6e6d 100644 --- a/drivers/net/wireless/realtek/rtw89/debug.c +++ b/drivers/net/wireless/realtek/rtw89/debug.c @@ -812,6 +812,9 @@ static void __print_regd(struct seq_file *m, struct rtw89_dev *rtwdev, case_REGD(MEXICO); case_REGD(UKRAINE); case_REGD(CN); + case_REGD(QATAR); + case_REGD(UK); + case_REGD(THAILAND); } } From 93b3a45645f13290745ef58bf99ad0877af29381 Mon Sep 17 00:00:00 2001 From: Bitterblue Smith Date: Mon, 23 Dec 2024 17:01:57 +0200 Subject: [PATCH 0653/1386] wifi: rtl8xxxu: Fix RTL8188EU firmware upload block size A user reports that the firmware upload consistently fails when it's uploaded in chunks of 128 bytes, but it works when uploaded in chunks of 196 bytes. The official driver uses 196 bytes also. Link: https://github.com/a5a5aa555oo/rtl8xxxu/issues/2 Signed-off-by: Bitterblue Smith Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/cba8e2f2-32c4-4174-90ba-0219f29dbdde@gmail.com --- drivers/net/wireless/realtek/rtl8xxxu/8188e.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/8188e.c b/drivers/net/wireless/realtek/rtl8xxxu/8188e.c index 3d04df0f5bf4..766a7a7c7d28 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/8188e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/8188e.c @@ -1860,7 +1860,7 @@ struct rtl8xxxu_fileops rtl8188eu_fops = { .set_crystal_cap = rtl8188f_set_crystal_cap, .cck_rssi = rtl8188e_cck_rssi, .led_classdev_brightness_set = rtl8188eu_led_brightness_set, - .writeN_block_size = 128, + .writeN_block_size = 196, .rx_desc_size = sizeof(struct rtl8xxxu_rxdesc16), .tx_desc_size = sizeof(struct rtl8xxxu_txdesc32), .has_tx_report = 1, From bddfe23be8f84e66b1920140a6e11400fae4f74a Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 25 Dec 2024 01:24:23 +0000 Subject: [PATCH 0654/1386] net: mac802154: Remove unused ieee802154_mlme_tx_one ieee802154_mlme_tx_one() was added in 2022 by commit ddd9ee7cda12 ("net: mac802154: Introduce a synchronous API for MLME commands") but has remained unused. Remove it. Note, there's still a ieee802154_mlme_tx_one_locked() variant that is used. Signed-off-by: Dr. David Alan Gilbert Acked-by: Miquel Raynal Link: https://lore.kernel.org/20241225012423.439229-1-linux@treblig.org Signed-off-by: Stefan Schmidt --- net/mac802154/ieee802154_i.h | 3 --- net/mac802154/tx.c | 13 ------------- 2 files changed, 16 deletions(-) diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h index 08dd521a51a5..8f2bff268392 100644 --- a/net/mac802154/ieee802154_i.h +++ b/net/mac802154/ieee802154_i.h @@ -194,9 +194,6 @@ int ieee802154_mlme_tx_locked(struct ieee802154_local *local, struct ieee802154_sub_if_data *sdata, struct sk_buff *skb); void ieee802154_mlme_op_post(struct ieee802154_local *local); -int ieee802154_mlme_tx_one(struct ieee802154_local *local, - struct ieee802154_sub_if_data *sdata, - struct sk_buff *skb); int ieee802154_mlme_tx_one_locked(struct ieee802154_local *local, struct ieee802154_sub_if_data *sdata, struct sk_buff *skb); diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c index 337d6faf0d2a..4d13f18f6f2c 100644 --- a/net/mac802154/tx.c +++ b/net/mac802154/tx.c @@ -178,19 +178,6 @@ void ieee802154_mlme_op_post(struct ieee802154_local *local) ieee802154_release_queue(local); } -int ieee802154_mlme_tx_one(struct ieee802154_local *local, - struct ieee802154_sub_if_data *sdata, - struct sk_buff *skb) -{ - int ret; - - ieee802154_mlme_op_pre(local); - ret = ieee802154_mlme_tx(local, sdata, skb); - ieee802154_mlme_op_post(local); - - return ret; -} - int ieee802154_mlme_tx_one_locked(struct ieee802154_local *local, struct ieee802154_sub_if_data *sdata, struct sk_buff *skb) From 7bce3f75189c1c2d7b43af8184b284d783e51375 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Fri, 27 Dec 2024 10:19:12 +0100 Subject: [PATCH 0655/1386] MAINTAINERS: update email address of Marek Linder Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- .mailmap | 2 ++ Documentation/networking/batman-adv.rst | 2 +- MAINTAINERS | 2 +- net/batman-adv/main.h | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index 5ff0e5d681e7..53e038f5f654 100644 --- a/.mailmap +++ b/.mailmap @@ -427,6 +427,8 @@ Marcin Nowakowski Marc Zyngier Marek Behún Marek Behún Marek Behun +Marek Lindner +Marek Lindner Mark Brown Mark Starovoytov Markus Schneider-Pargmann diff --git a/Documentation/networking/batman-adv.rst b/Documentation/networking/batman-adv.rst index 8a0dcb1894b4..44b9b5cc0e24 100644 --- a/Documentation/networking/batman-adv.rst +++ b/Documentation/networking/batman-adv.rst @@ -164,5 +164,5 @@ Mailing-list: You can also contact the Authors: -* Marek Lindner +* Marek Lindner * Simon Wunderlich diff --git a/MAINTAINERS b/MAINTAINERS index 1e930c7a58b1..c5e909a759e6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3867,7 +3867,7 @@ S: Maintained F: drivers/platform/x86/barco-p50-gpio.c BATMAN ADVANCED -M: Marek Lindner +M: Marek Lindner M: Simon Wunderlich M: Antonio Quartulli M: Sven Eckelmann diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 1fbe3a4dd965..964f3088af5b 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -7,7 +7,7 @@ #ifndef _NET_BATMAN_ADV_MAIN_H_ #define _NET_BATMAN_ADV_MAIN_H_ -#define BATADV_DRIVER_AUTHOR "Marek Lindner , " \ +#define BATADV_DRIVER_AUTHOR "Marek Lindner , " \ "Simon Wunderlich " #define BATADV_DRIVER_DESC "B.A.T.M.A.N. advanced" #define BATADV_DRIVER_DEVICE "batman-adv" From 1f5f7ff46435226f34112e0d179f6cbd9eb30b04 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Tue, 31 Dec 2024 10:31:29 +0100 Subject: [PATCH 0656/1386] mailmap: add entries for Simon Wunderlich Map the defunc mail addresses to the currently used mail address (listed in MAINTAINERS). Signed-off-by: Simon Wunderlich --- .mailmap | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.mailmap b/.mailmap index 53e038f5f654..efabe03f7849 100644 --- a/.mailmap +++ b/.mailmap @@ -642,6 +642,11 @@ Simona Vetter Simon Horman Simon Horman Simon Kelley +Simon Wunderlich +Simon Wunderlich +Simon Wunderlich +Simon Wunderlich +Simon Wunderlich Sricharan Ramabadhran Srinivas Ramana Sriram R From 285c72be944089e333ae5f49211b38e460f9528d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Tue, 31 Dec 2024 10:31:29 +0100 Subject: [PATCH 0657/1386] mailmap: add entries for Sven Eckelmann Map the defunc mail addresses to the currently used mail address (listed in MAINTAINERS). Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- .mailmap | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.mailmap b/.mailmap index efabe03f7849..41aca254671d 100644 --- a/.mailmap +++ b/.mailmap @@ -667,6 +667,11 @@ Sudarshan Rajagopalan Sudeep Holla Sudeep KarkadaNagesha Sumit Semwal Surabhi Vishnoi +Sven Eckelmann +Sven Eckelmann +Sven Eckelmann +Sven Eckelmann +Sven Eckelmann Takashi YOSHII Tamizh Chelvam Raja Taniya Das From c83ca5a4df7cf0ce9ccc25e8481043e05aed6ad0 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 20 Dec 2024 23:02:06 +0100 Subject: [PATCH 0658/1386] net: phy: fix phy_disable_eee genphy_c45_write_eee_adv() becomes a no-op if phydev->supported_eee is cleared. That's not what we want because this function is still needed to clear the EEE advertisement register(s). Fill phydev->eee_broken_modes instead to ensure that userspace can't re-enable EEE advertising. Fixes: b55498ff14bd ("net: phy: add phy_disable_eee") Signed-off-by: Heiner Kallweit Link: https://patch.msgid.link/57e2ae5f-4319-413c-b5c4-ebc8d049bc23@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 928dc3c509b6..bdc997f59779 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -3012,10 +3012,11 @@ EXPORT_SYMBOL(phy_support_eee); */ void phy_disable_eee(struct phy_device *phydev) { - linkmode_zero(phydev->supported_eee); linkmode_zero(phydev->advertising_eee); phydev->eee_cfg.tx_lpi_enabled = false; phydev->eee_cfg.eee_enabled = false; + /* don't let userspace re-enable EEE advertisement */ + linkmode_fill(phydev->eee_broken_modes); } EXPORT_SYMBOL_GPL(phy_disable_eee); From be16b46f9ebd3270a4c4da81e5d7bb34b6f43384 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Sun, 22 Dec 2024 21:08:20 +0100 Subject: [PATCH 0659/1386] ptp: ocp: constify 'struct bin_attribute' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysfs core now allows instances of 'struct bin_attribute' to be moved into read-only memory. Make use of that to protect them against accidental or malicious modifications. Signed-off-by: Thomas Weißschuh Reviewed-by: Vadim Fedorenko Link: https://patch.msgid.link/20241222-sysfs-const-bin_attr-ptp-v1-1-5c1f3ee246fb@weissschuh.net Signed-off-by: Jakub Kicinski --- drivers/ptp/ptp_ocp.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 5feecaadde8e..7f08c70d8123 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -3692,7 +3692,7 @@ DEVICE_FREQ_GROUP(freq4, 3); static ssize_t disciplining_config_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct ptp_ocp *bp = dev_get_drvdata(kobj_to_dev(kobj)); @@ -3727,7 +3727,7 @@ out: static ssize_t disciplining_config_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct ptp_ocp *bp = dev_get_drvdata(kobj_to_dev(kobj)); @@ -3750,11 +3750,11 @@ disciplining_config_write(struct file *filp, struct kobject *kobj, return err; } -static BIN_ATTR_RW(disciplining_config, OCP_ART_CONFIG_SIZE); +static const BIN_ATTR_RW(disciplining_config, OCP_ART_CONFIG_SIZE); static ssize_t temperature_table_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct ptp_ocp *bp = dev_get_drvdata(kobj_to_dev(kobj)); @@ -3789,7 +3789,7 @@ out: static ssize_t temperature_table_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buf, + const struct bin_attribute *bin_attr, char *buf, loff_t off, size_t count) { struct ptp_ocp *bp = dev_get_drvdata(kobj_to_dev(kobj)); @@ -3812,7 +3812,7 @@ temperature_table_write(struct file *filp, struct kobject *kobj, return err; } -static BIN_ATTR_RW(temperature_table, OCP_ART_TEMP_TABLE_SIZE); +static const BIN_ATTR_RW(temperature_table, OCP_ART_TEMP_TABLE_SIZE); static struct attribute *fb_timecard_attrs[] = { &dev_attr_serialnum.attr, @@ -3867,7 +3867,7 @@ static struct attribute *art_timecard_attrs[] = { NULL, }; -static struct bin_attribute *bin_art_timecard_attrs[] = { +static const struct bin_attribute *const bin_art_timecard_attrs[] = { &bin_attr_disciplining_config, &bin_attr_temperature_table, NULL, @@ -3875,7 +3875,7 @@ static struct bin_attribute *bin_art_timecard_attrs[] = { static const struct attribute_group art_timecard_group = { .attrs = art_timecard_attrs, - .bin_attrs = bin_art_timecard_attrs, + .bin_attrs_new = bin_art_timecard_attrs, }; static const struct ocp_attr_group art_timecard_groups[] = { From bb70b0d48d8eab20644ca0101fedfe23f8a26c59 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 24 Dec 2024 20:37:06 +0200 Subject: [PATCH 0660/1386] devlink: Improve the port attributes description Current PF number description is vague, sometimes interpreted as some PF index. VF number in the PCI specification starts at 1; however in kernel, it starts at 0 for representor model. Improve the description of devlink port attributes PF, VF and SF numbers with these details. Reviewed-by: Sridhar Samudrala Reviewed-by: Shay Drory Reviewed-by: Mark Bloch Reviewed-by: Jiri Pirko Signed-off-by: Parav Pandit Link: https://patch.msgid.link/20241224183706.26571-1-parav@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/devlink.h | 11 ++++++----- net/devlink/port.c | 11 ++++++----- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/include/net/devlink.h b/include/net/devlink.h index 58e33959c852..fc79fe2297a1 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -35,7 +35,7 @@ struct devlink_port_phys_attrs { /** * struct devlink_port_pci_pf_attrs - devlink port's PCI PF attributes * @controller: Associated controller number - * @pf: Associated PCI PF number for this port. + * @pf: associated PCI function number for the devlink port instance * @external: when set, indicates if a port is for an external controller */ struct devlink_port_pci_pf_attrs { @@ -47,8 +47,9 @@ struct devlink_port_pci_pf_attrs { /** * struct devlink_port_pci_vf_attrs - devlink port's PCI VF attributes * @controller: Associated controller number - * @pf: Associated PCI PF number for this port. - * @vf: Associated PCI VF for of the PCI PF for this port. + * @pf: associated PCI function number for the devlink port instance + * @vf: associated PCI VF number of a PF for the devlink port instance; + * VF number starts from 0 for the first PCI virtual function * @external: when set, indicates if a port is for an external controller */ struct devlink_port_pci_vf_attrs { @@ -61,8 +62,8 @@ struct devlink_port_pci_vf_attrs { /** * struct devlink_port_pci_sf_attrs - devlink port's PCI SF attributes * @controller: Associated controller number - * @sf: Associated PCI SF for of the PCI PF for this port. - * @pf: Associated PCI PF number for this port. + * @sf: associated SF number of a PF for the devlink port instance + * @pf: associated PCI function number for the devlink port instance * @external: when set, indicates if a port is for an external controller */ struct devlink_port_pci_sf_attrs { diff --git a/net/devlink/port.c b/net/devlink/port.c index be9158b4453c..939081a0e615 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -1376,7 +1376,7 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_set); * * @devlink_port: devlink port * @controller: associated controller number for the devlink port instance - * @pf: associated PF for the devlink port instance + * @pf: associated PCI function number for the devlink port instance * @external: indicates if the port is for an external controller */ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 controller, @@ -1402,8 +1402,9 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_pf_set); * * @devlink_port: devlink port * @controller: associated controller number for the devlink port instance - * @pf: associated PF for the devlink port instance - * @vf: associated VF of a PF for the devlink port instance + * @pf: associated PCI function number for the devlink port instance + * @vf: associated PCI VF number of a PF for the devlink port instance; + * VF number starts from 0 for the first PCI virtual function * @external: indicates if the port is for an external controller */ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 controller, @@ -1430,8 +1431,8 @@ EXPORT_SYMBOL_GPL(devlink_port_attrs_pci_vf_set); * * @devlink_port: devlink port * @controller: associated controller number for the devlink port instance - * @pf: associated PF for the devlink port instance - * @sf: associated SF of a PF for the devlink port instance + * @pf: associated PCI function number for the devlink port instance + * @sf: associated SF number of a PF for the devlink port instance * @external: indicates if the port is for an external controller */ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller, From 5df7ca0b827dac9fc44b42a4b695adcaa6d07ffb Mon Sep 17 00:00:00 2001 From: Yu Tian Date: Tue, 31 Dec 2024 10:36:10 +0800 Subject: [PATCH 0661/1386] ipv4: remove useless arg The "struct sock *sk" parameter in ip_rcv_finish_core is unused, which leads the compiler to optimize it out. As a result, the "struct sk_buff *skb" parameter is passed using x1. And this make kprobe hard to use. Signed-off-by: Yu Tian Link: https://patch.msgid.link/20241231023610.1657926-1-tianyu2@kernelsoft.com Signed-off-by: Jakub Kicinski --- net/ipv4/ip_input.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index f0a4dda246ab..30a5e9460d00 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -314,7 +314,7 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph, int tcp_v4_early_demux(struct sk_buff *skb); int udp_v4_early_demux(struct sk_buff *skb); -static int ip_rcv_finish_core(struct net *net, struct sock *sk, +static int ip_rcv_finish_core(struct net *net, struct sk_buff *skb, struct net_device *dev, const struct sk_buff *hint) { @@ -442,7 +442,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) if (!skb) return NET_RX_SUCCESS; - ret = ip_rcv_finish_core(net, sk, skb, dev, NULL); + ret = ip_rcv_finish_core(net, skb, dev, NULL); if (ret != NET_RX_DROP) ret = dst_input(skb); return ret; @@ -589,8 +589,7 @@ static struct sk_buff *ip_extract_route_hint(const struct net *net, return skb; } -static void ip_list_rcv_finish(struct net *net, struct sock *sk, - struct list_head *head) +static void ip_list_rcv_finish(struct net *net, struct list_head *head) { struct sk_buff *skb, *next, *hint = NULL; struct dst_entry *curr_dst = NULL; @@ -607,7 +606,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk, skb = l3mdev_ip_rcv(skb); if (!skb) continue; - if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP) + if (ip_rcv_finish_core(net, skb, dev, hint) == NET_RX_DROP) continue; dst = skb_dst(skb); @@ -633,7 +632,7 @@ static void ip_sublist_rcv(struct list_head *head, struct net_device *dev, { NF_HOOK_LIST(NFPROTO_IPV4, NF_INET_PRE_ROUTING, net, NULL, head, dev, NULL, ip_rcv_finish); - ip_list_rcv_finish(net, NULL, head); + ip_list_rcv_finish(net, head); } /* Receive a list of IP packets */ From 94c16fd4df9089931f674fb9aaec41ea20b0fd7a Mon Sep 17 00:00:00 2001 From: Mathieu Othacehe Date: Fri, 27 Dec 2024 10:59:22 +0100 Subject: [PATCH 0662/1386] net: dwmac-imx: add imx93 clock input support in RMII mode If the rmii_refclk_ext boolean is set, configure the ENET QOS TX_CLK pin direction to input. Otherwise, it defaults to output. That mirrors what is already happening for the imx8mp in the imx8mp_set_intf_mode function. Signed-off-by: Mathieu Othacehe Link: https://patch.msgid.link/20241227095923.4414-1-othacehe@gnu.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 43e0fbba4f77..4ac7a78f4b14 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -36,6 +36,8 @@ #define MX93_GPR_ENET_QOS_INTF_SEL_RMII (0x4 << 1) #define MX93_GPR_ENET_QOS_INTF_SEL_RGMII (0x1 << 1) #define MX93_GPR_ENET_QOS_CLK_GEN_EN (0x1 << 0) +#define MX93_GPR_ENET_QOS_CLK_SEL_MASK BIT_MASK(0) +#define MX93_GPR_CLK_SEL_OFFSET (4) #define DMA_BUS_MODE 0x00001000 #define DMA_BUS_MODE_SFT_RESET (0x1 << 0) @@ -108,13 +110,21 @@ imx8dxl_set_intf_mode(struct plat_stmmacenet_data *plat_dat) static int imx93_set_intf_mode(struct plat_stmmacenet_data *plat_dat) { struct imx_priv_data *dwmac = plat_dat->bsp_priv; - int val; + int val, ret; switch (plat_dat->mac_interface) { case PHY_INTERFACE_MODE_MII: val = MX93_GPR_ENET_QOS_INTF_SEL_MII; break; case PHY_INTERFACE_MODE_RMII: + if (dwmac->rmii_refclk_ext) { + ret = regmap_clear_bits(dwmac->intf_regmap, + dwmac->intf_reg_off + + MX93_GPR_CLK_SEL_OFFSET, + MX93_GPR_ENET_QOS_CLK_SEL_MASK); + if (ret) + return ret; + } val = MX93_GPR_ENET_QOS_INTF_SEL_RMII; break; case PHY_INTERFACE_MODE_RGMII: From 3fff5da4ca2164bb4d0f1e6cd33f6eb8a0e73e50 Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Mon, 30 Dec 2024 12:56:47 -0800 Subject: [PATCH 0663/1386] team: prevent adding a device which is already a team device lower Prevent adding a device which is already a team device lower, e.g. adding veth0 if vlan1 was already added and veth0 is a lower of vlan1. This is not useful in practice and can lead to recursive locking: $ ip link add veth0 type veth peer name veth1 $ ip link set veth0 up $ ip link set veth1 up $ ip link add link veth0 name veth0.1 type vlan protocol 802.1Q id 1 $ ip link add team0 type team $ ip link set veth0.1 down $ ip link set veth0.1 master team0 team0: Port device veth0.1 added $ ip link set veth0 down $ ip link set veth0 master team0 ============================================ WARNING: possible recursive locking detected 6.13.0-rc2-virtme-00441-ga14a429069bb #46 Not tainted -------------------------------------------- ip/7684 is trying to acquire lock: ffff888016848e00 (team->team_lock_key){+.+.}-{4:4}, at: team_device_event (drivers/net/team/team_core.c:2928 drivers/net/team/team_core.c:2951 drivers/net/team/team_core.c:2973) but task is already holding lock: ffff888016848e00 (team->team_lock_key){+.+.}-{4:4}, at: team_add_slave (drivers/net/team/team_core.c:1147 drivers/net/team/team_core.c:1977) other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(team->team_lock_key); lock(team->team_lock_key); *** DEADLOCK *** May be due to missing lock nesting notation 2 locks held by ip/7684: stack backtrace: CPU: 3 UID: 0 PID: 7684 Comm: ip Not tainted 6.13.0-rc2-virtme-00441-ga14a429069bb #46 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 Call Trace: dump_stack_lvl (lib/dump_stack.c:122) print_deadlock_bug.cold (kernel/locking/lockdep.c:3040) __lock_acquire (kernel/locking/lockdep.c:3893 kernel/locking/lockdep.c:5226) ? netlink_broadcast_filtered (net/netlink/af_netlink.c:1548) lock_acquire.part.0 (kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5851) ? team_device_event (drivers/net/team/team_core.c:2928 drivers/net/team/team_core.c:2951 drivers/net/team/team_core.c:2973) ? trace_lock_acquire (./include/trace/events/lock.h:24 (discriminator 2)) ? team_device_event (drivers/net/team/team_core.c:2928 drivers/net/team/team_core.c:2951 drivers/net/team/team_core.c:2973) ? lock_acquire (kernel/locking/lockdep.c:5822) ? team_device_event (drivers/net/team/team_core.c:2928 drivers/net/team/team_core.c:2951 drivers/net/team/team_core.c:2973) __mutex_lock (kernel/locking/mutex.c:587 kernel/locking/mutex.c:735) ? team_device_event (drivers/net/team/team_core.c:2928 drivers/net/team/team_core.c:2951 drivers/net/team/team_core.c:2973) ? team_device_event (drivers/net/team/team_core.c:2928 drivers/net/team/team_core.c:2951 drivers/net/team/team_core.c:2973) ? fib_sync_up (net/ipv4/fib_semantics.c:2167) ? team_device_event (drivers/net/team/team_core.c:2928 drivers/net/team/team_core.c:2951 drivers/net/team/team_core.c:2973) team_device_event (drivers/net/team/team_core.c:2928 drivers/net/team/team_core.c:2951 drivers/net/team/team_core.c:2973) notifier_call_chain (kernel/notifier.c:85) call_netdevice_notifiers_info (net/core/dev.c:1996) __dev_notify_flags (net/core/dev.c:8993) ? __dev_change_flags (net/core/dev.c:8975) dev_change_flags (net/core/dev.c:9027) vlan_device_event (net/8021q/vlan.c:85 net/8021q/vlan.c:470) ? br_device_event (net/bridge/br.c:143) notifier_call_chain (kernel/notifier.c:85) call_netdevice_notifiers_info (net/core/dev.c:1996) dev_open (net/core/dev.c:1519 net/core/dev.c:1505) team_add_slave (drivers/net/team/team_core.c:1219 drivers/net/team/team_core.c:1977) ? __pfx_team_add_slave (drivers/net/team/team_core.c:1972) do_set_master (net/core/rtnetlink.c:2917) do_setlink.isra.0 (net/core/rtnetlink.c:3117) Reported-by: syzbot+3c47b5843403a45aef57@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=3c47b5843403a45aef57 Fixes: 3d249d4ca7d0 ("net: introduce ethernet teaming device") Signed-off-by: Octavian Purdila Reviewed-by: Hangbin Liu Signed-off-by: David S. Miller --- drivers/net/team/team_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/team/team_core.c b/drivers/net/team/team_core.c index c7690adec8db..dc7cbd6a9798 100644 --- a/drivers/net/team/team_core.c +++ b/drivers/net/team/team_core.c @@ -1175,6 +1175,13 @@ static int team_port_add(struct team *team, struct net_device *port_dev, return -EBUSY; } + if (netdev_has_upper_dev(port_dev, dev)) { + NL_SET_ERR_MSG(extack, "Device is already a lower device of the team interface"); + netdev_err(dev, "Device %s is already a lower device of the team interface\n", + portname); + return -EBUSY; + } + if (port_dev->features & NETIF_F_VLAN_CHALLENGED && vlan_uses_dev(dev)) { NL_SET_ERR_MSG(extack, "Device is VLAN challenged and team device has VLAN set up"); From a003c38d9bbbacd26b2354795bddb8d25631b0b5 Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Thu, 2 Jan 2025 12:41:21 +0000 Subject: [PATCH 0664/1386] net: pcs: pcs-mtk-lynxi: correctly report in-band status capabilities Neither does the LynxI PCS support QSGMII, nor is in-band-status supported in 2500Base-X mode. Fix the pcs_inband_caps() method accordingly. Fixes: 520d29bdda86 ("net: pcs: pcs-mtk-lynxi: implement pcs_inband_caps() method") Signed-off-by: Daniel Golle Link: https://patch.msgid.link/Z3aJccb1vW14aukg@pidgin.makrotopia.org Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-mtk-lynxi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/pcs/pcs-mtk-lynxi.c b/drivers/net/pcs/pcs-mtk-lynxi.c index 7de804535229..ed91cd7a406a 100644 --- a/drivers/net/pcs/pcs-mtk-lynxi.c +++ b/drivers/net/pcs/pcs-mtk-lynxi.c @@ -93,11 +93,12 @@ static unsigned int mtk_pcs_lynxi_inband_caps(struct phylink_pcs *pcs, { switch (interface) { case PHY_INTERFACE_MODE_1000BASEX: - case PHY_INTERFACE_MODE_2500BASEX: case PHY_INTERFACE_MODE_SGMII: - case PHY_INTERFACE_MODE_QSGMII: return LINK_INBAND_DISABLE | LINK_INBAND_ENABLE; + case PHY_INTERFACE_MODE_2500BASEX: + return LINK_INBAND_DISABLE; + default: return 0; } From 3569399994384f7e409a560910613edc2ad4a779 Mon Sep 17 00:00:00 2001 From: Furong Xu <0x1207@gmail.com> Date: Fri, 20 Dec 2024 16:07:26 +0800 Subject: [PATCH 0665/1386] net: stmmac: TSO: Simplify the code flow of DMA descriptor allocations The TCP Segmentation Offload (TSO) engine is an optional function in DWMAC cores, it is implemented for dwmac4 and dwxgmac2 only, ancient dwmac100 and dwmac1000 are not supported by hardware. Current driver code checks priv->dma_cap.tsoen which is read from MAC_HW_Feature1 register to determine if TSO is enabled in hardware configurations, if (!priv->dma_cap.tsoen) driver never sets NETIF_F_TSO for net_device. This patch never affects dwmac100/dwmac1000 and their stmmac_desc_ops: ndesc_ops/enh_desc_ops, since TSO is never supported by them two. The DMA AXI address width of DWMAC cores can be configured to 32-bit/40-bit/48-bit, then the format of DMA transmit descriptors get a little different between 32-bit and 40-bit/48-bit. Current driver code checks priv->dma_cap.addr64 to use certain format with certain configuration. This patch converts the format of DMA transmit descriptors on dwmac4 and dwxgmac2 that the DMA AXI address width is configured to 32-bit (as described by function comments of stmmac_tso_xmit() in current code) to a more generic format (see updated function comments after this patch) which is actually already used on 40-bit/48-bit platforms to provide better compatibility and make code flow cleaner in TSO TX routine. Another interesting finding, struct stmmac_desc_ops is a common abstract interface to maintain descriptors, we should avoid the direct assignment of descriptor members (e.g. desc->des0), stmmac_set_desc_addr() is the proper method yet. This patch tries to improve this by the way. Tested and verified on: DWMAC CORE 5.00a with 32-bit DMA AXI address width DWMAC CORE 5.10a with 32-bit DMA AXI address width DWXGMAC CORE 3.20a with 40-bit DMA AXI address width Signed-off-by: Furong Xu <0x1207@gmail.com> Link: https://patch.msgid.link/20241220080726.1733837-1-0x1207@gmail.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/stmicro/stmmac/stmmac_main.c | 60 ++++++++----------- 1 file changed, 24 insertions(+), 36 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6bc10ffe7a2b..99eaec8bac4a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -4116,11 +4116,7 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, dma_addr_t des, desc = &tx_q->dma_tx[tx_q->cur_tx]; curr_addr = des + (total_len - tmp_len); - if (priv->dma_cap.addr64 <= 32) - desc->des0 = cpu_to_le32(curr_addr); - else - stmmac_set_desc_addr(priv, desc, curr_addr); - + stmmac_set_desc_addr(priv, desc, curr_addr); buff_size = tmp_len >= TSO_MAX_BUFF_SIZE ? TSO_MAX_BUFF_SIZE : tmp_len; @@ -4166,17 +4162,27 @@ static void stmmac_flush_tx_descriptors(struct stmmac_priv *priv, int queue) * First Descriptor * -------- * | DES0 |---> buffer1 = L2/L3/L4 header - * | DES1 |---> TCP Payload (can continue on next descr...) - * | DES2 |---> buffer 1 and 2 len + * | DES1 |---> can be used as buffer2 for TCP Payload if the DMA AXI address + * | | width is 32-bit, but we never use it. + * | | Also can be used as the most-significant 8-bits or 16-bits of + * | | buffer1 address pointer if the DMA AXI address width is 40-bit + * | | or 48-bit, and we always use it. + * | DES2 |---> buffer1 len * | DES3 |---> must set TSE, TCP hdr len-> [22:19]. TCP payload len [17:0] * -------- + * -------- + * | DES0 |---> buffer1 = TCP Payload (can continue on next descr...) + * | DES1 |---> same as the First Descriptor + * | DES2 |---> buffer1 len + * | DES3 | + * -------- * | * ... * | * -------- - * | DES0 | --| Split TCP Payload on Buffers 1 and 2 - * | DES1 | --| - * | DES2 | --> buffer 1 and 2 len + * | DES0 |---> buffer1 = Split TCP Payload + * | DES1 |---> same as the First Descriptor + * | DES2 |---> buffer1 len * | DES3 | * -------- * @@ -4186,15 +4192,14 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) { struct dma_desc *desc, *first, *mss_desc = NULL; struct stmmac_priv *priv = netdev_priv(dev); - int tmp_pay_len = 0, first_tx, nfrags; unsigned int first_entry, tx_packets; struct stmmac_txq_stats *txq_stats; struct stmmac_tx_queue *tx_q; u32 pay_len, mss, queue; - dma_addr_t tso_des, des; + int i, first_tx, nfrags; u8 proto_hdr_len, hdr; + dma_addr_t des; bool set_ic; - int i; /* Always insert VLAN tag to SKB payload for TSO frames. * @@ -4279,24 +4284,9 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) if (dma_mapping_error(priv->device, des)) goto dma_map_err; - if (priv->dma_cap.addr64 <= 32) { - first->des0 = cpu_to_le32(des); - - /* Fill start of payload in buff2 of first descriptor */ - if (pay_len) - first->des1 = cpu_to_le32(des + proto_hdr_len); - - /* If needed take extra descriptors to fill the remaining payload */ - tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE; - tso_des = des; - } else { - stmmac_set_desc_addr(priv, first, des); - tmp_pay_len = pay_len; - tso_des = des + proto_hdr_len; - pay_len = 0; - } - - stmmac_tso_allocator(priv, tso_des, tmp_pay_len, (nfrags == 0), queue); + stmmac_set_desc_addr(priv, first, des); + stmmac_tso_allocator(priv, des + proto_hdr_len, pay_len, + (nfrags == 0), queue); /* In case two or more DMA transmit descriptors are allocated for this * non-paged SKB data, the DMA buffer address should be saved to @@ -4400,11 +4390,9 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev) } /* Complete the first descriptor before granting the DMA */ - stmmac_prepare_tso_tx_desc(priv, first, 1, - proto_hdr_len, - pay_len, - 1, tx_q->tx_skbuff_dma[first_entry].last_segment, - hdr / 4, (skb->len - proto_hdr_len)); + stmmac_prepare_tso_tx_desc(priv, first, 1, proto_hdr_len, 0, 1, + tx_q->tx_skbuff_dma[first_entry].last_segment, + hdr / 4, (skb->len - proto_hdr_len)); /* If context desc is used to change MSS */ if (mss_desc) { From da0a090a3c6220772801b791845e408ae7579914 Mon Sep 17 00:00:00 2001 From: tuqiang Date: Mon, 25 Nov 2024 20:26:34 +0800 Subject: [PATCH 0666/1386] netfilter: nf_tables: remove the genmask parameter The genmask parameter is not used within the nf_tables_addchain function body. It should be removed to simplify the function parameter list. Signed-off-by: tuqiang Signed-off-by: Jiang Kun Reviewed-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0b9f1e8dfe49..f7ca7165e66e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2598,9 +2598,8 @@ int nft_chain_add(struct nft_table *table, struct nft_chain *chain) static u64 chain_id; -static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, - u8 policy, u32 flags, - struct netlink_ext_ack *extack) +static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 policy, + u32 flags, struct netlink_ext_ack *extack) { const struct nlattr * const *nla = ctx->nla; struct nft_table *table = ctx->table; @@ -3038,7 +3037,7 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info, extack); } - return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack); + return nf_tables_addchain(&ctx, family, policy, flags, extack); } static int nft_delchain_hook(struct nft_ctx *ctx, From 178883fd039d38a708cc56555489533d9a9c07df Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 3 Dec 2024 12:08:30 +0100 Subject: [PATCH 0667/1386] ipvs: speed up reads from ip_vs_conn proc file Reading is very slow because ->start() performs a linear re-scan of the entire hash table until it finds the successor to the last dumped element. The current implementation uses 'pos' as the 'number of elements to skip, then does linear iteration until it has skipped 'pos' entries. Store the last bucket and the number of elements to skip in that bucket instead, so we can resume from bucket b directly. before this patch, its possible to read ~35k entries in one second, but each read() gets slower as the number of entries to skip grows: time timeout 60 cat /proc/net/ip_vs_conn > /tmp/all; wc -l /tmp/all real 1m0.007s user 0m0.003s sys 0m59.956s 140386 /tmp/all Only ~100k more got read in remaining the remaining 59s, and did not get nowhere near the 1m entries that are stored at the time. after this patch, dump completes very quickly: time cat /proc/net/ip_vs_conn > /tmp/all; wc -l /tmp/all real 0m2.286s user 0m0.004s sys 0m2.281s 1000001 /tmp/all Signed-off-by: Florian Westphal Acked-by: Julian Anastasov Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_conn.c | 50 ++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index c0289f83f96d..20a1727e2457 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1046,28 +1046,35 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, #ifdef CONFIG_PROC_FS struct ip_vs_iter_state { struct seq_net_private p; - struct hlist_head *l; + unsigned int bucket; + unsigned int skip_elems; }; -static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) +static void *ip_vs_conn_array(struct ip_vs_iter_state *iter) { int idx; struct ip_vs_conn *cp; - struct ip_vs_iter_state *iter = seq->private; - for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { + for (idx = iter->bucket; idx < ip_vs_conn_tab_size; idx++) { + unsigned int skip = 0; + hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { /* __ip_vs_conn_get() is not needed by * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show */ - if (pos-- == 0) { - iter->l = &ip_vs_conn_tab[idx]; + if (skip >= iter->skip_elems) { + iter->bucket = idx; return cp; } + + ++skip; } + + iter->skip_elems = 0; cond_resched_rcu(); } + iter->bucket = idx; return NULL; } @@ -1076,9 +1083,14 @@ static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) { struct ip_vs_iter_state *iter = seq->private; - iter->l = NULL; rcu_read_lock(); - return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN; + if (*pos == 0) { + iter->skip_elems = 0; + iter->bucket = 0; + return SEQ_START_TOKEN; + } + + return ip_vs_conn_array(iter); } static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) @@ -1086,28 +1098,22 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct ip_vs_conn *cp = v; struct ip_vs_iter_state *iter = seq->private; struct hlist_node *e; - struct hlist_head *l = iter->l; - int idx; ++*pos; if (v == SEQ_START_TOKEN) - return ip_vs_conn_array(seq, 0); + return ip_vs_conn_array(iter); /* more on same hash chain? */ e = rcu_dereference(hlist_next_rcu(&cp->c_list)); - if (e) + if (e) { + iter->skip_elems++; return hlist_entry(e, struct ip_vs_conn, c_list); - - idx = l - ip_vs_conn_tab; - while (++idx < ip_vs_conn_tab_size) { - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { - iter->l = &ip_vs_conn_tab[idx]; - return cp; - } - cond_resched_rcu(); } - iter->l = NULL; - return NULL; + + iter->skip_elems = 0; + iter->bucket++; + + return ip_vs_conn_array(iter); } static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) From 95f1c1e98db36ddb9ea79e5c61ec11ec43ebbbaf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 6 Dec 2024 11:38:39 +0000 Subject: [PATCH 0668/1386] netfilter: xt_hashlimit: htable_selective_cleanup() optimization I have seen syzbot reports hinting at xt_hashlimit abuse: [ 105.783066][ T4331] xt_hashlimit: max too large, truncated to 1048576 [ 105.811405][ T4331] xt_hashlimit: size too large, truncated to 1048576 And worker threads using up to 1 second per htable_selective_cleanup() invocation. [ 269.734496][ C1] [] ? __local_bh_enable_ip+0x1a0/0x1a0 [ 269.734513][ C1] [] ? lockdep_hardirqs_on_prepare+0x740/0x740 [ 269.734533][ C1] [] ? htable_selective_cleanup+0x25f/0x310 [ 269.734549][ C1] [] ? __lock_acquire+0x2060/0x2060 [ 269.734567][ C1] [] ? do_raw_spin_lock+0x14a/0x370 [ 269.734583][ C1] [] ? htable_selective_cleanup+0x25f/0x310 [ 269.734599][ C1] [] __local_bh_enable_ip+0x167/0x1a0 [ 269.734616][ C1] [] ? _local_bh_enable+0xa0/0xa0 [ 269.734634][ C1] [] ? htable_selective_cleanup+0x25f/0x310 [ 269.734651][ C1] [] htable_selective_cleanup+0x25f/0x310 [ 269.734670][ C1] [] ? process_one_work+0x7a9/0x1170 [ 269.734685][ C1] [] htable_gc+0x1b/0xa0 [ 269.734700][ C1] [] ? process_one_work+0x7a9/0x1170 [ 269.734714][ C1] [] process_one_work+0x8a9/0x1170 [ 269.734733][ C1] [] ? worker_detach_from_pool+0x260/0x260 [ 269.734749][ C1] [] ? _raw_spin_lock_irq+0xb7/0xf0 [ 269.734763][ C1] [] ? _raw_spin_lock_irqsave+0x100/0x100 [ 269.734777][ C1] [] ? wq_worker_sleeping+0x5f/0x270 [ 269.734800][ C1] [] worker_thread+0xa47/0x1200 [ 269.734815][ C1] [] ? _raw_spin_lock+0x40/0x40 [ 269.734835][ C1] [] kthread+0x25a/0x2e0 [ 269.734853][ C1] [] ? worker_clr_flags+0x190/0x190 [ 269.734866][ C1] [] ? kthread_blkcg+0xd0/0xd0 [ 269.734885][ C1] [] ret_from_fork+0x3a/0x50 We can skip over empty buckets, avoiding the lockdep penalty for debug kernels, and avoid atomic operations on non debug ones. Signed-off-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 0859b8f76764..fa02aab56724 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -363,11 +363,15 @@ static void htable_selective_cleanup(struct xt_hashlimit_htable *ht, bool select unsigned int i; for (i = 0; i < ht->cfg.size; i++) { + struct hlist_head *head = &ht->hash[i]; struct dsthash_ent *dh; struct hlist_node *n; + if (hlist_empty(head)) + continue; + spin_lock_bh(&ht->lock); - hlist_for_each_entry_safe(dh, n, &ht->hash[i], node) { + hlist_for_each_entry_safe(dh, n, head, node) { if (time_after_eq(jiffies, dh->expires) || select_all) dsthash_free(ht, dh); } From b9ed315d3c4c0c294a4348edb6874d489bac47fa Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Dec 2024 00:46:56 +0100 Subject: [PATCH 0669/1386] netkit: Allow for configuring needed_{head,tail}room Allow the user to configure needed_{head,tail}room for both netkit devices. The idea is similar to 163e529200af ("veth: implement ndo_set_rx_headroom") with the difference that the two parameters can be specified upon device creation. By default the current behavior stays as is which is needed_{head,tail}room is 0. In case of Cilium, for example, the netkit devices are not enslaved into a bridge or openvswitch device (rather, BPF-based redirection is used out of tcx), and as such these parameters are not propagated into the Pod's netns via peer device. Given Cilium can run in vxlan/geneve tunneling mode (needed_headroom) and/or be used in combination with WireGuard (needed_{head,tail}room), allow the Cilium CNI plugin to specify these two upon netkit device creation. Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/bpf/20241220234658.490686-1-daniel@iogearbox.net --- drivers/net/netkit.c | 66 +++++++++++++++++++----------- include/uapi/linux/if_link.h | 2 + tools/include/uapi/linux/if_link.h | 2 + 3 files changed, 47 insertions(+), 23 deletions(-) diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c index c1d881dc6409..1e1b00756be7 100644 --- a/drivers/net/netkit.c +++ b/drivers/net/netkit.c @@ -338,6 +338,7 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev, enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT; enum netkit_mode mode = NETKIT_L3; unsigned char ifname_assign_type; + u16 headroom = 0, tailroom = 0; struct ifinfomsg *ifmp = NULL; struct net_device *peer; char ifname[IFNAMSIZ]; @@ -371,6 +372,10 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev, if (err < 0) return err; } + if (data[IFLA_NETKIT_HEADROOM]) + headroom = nla_get_u16(data[IFLA_NETKIT_HEADROOM]); + if (data[IFLA_NETKIT_TAILROOM]) + tailroom = nla_get_u16(data[IFLA_NETKIT_TAILROOM]); } if (ifmp && tbp[IFLA_IFNAME]) { @@ -390,6 +395,14 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev, return PTR_ERR(peer); netif_inherit_tso_max(peer, dev); + if (headroom) { + peer->needed_headroom = headroom; + dev->needed_headroom = headroom; + } + if (tailroom) { + peer->needed_tailroom = tailroom; + dev->needed_tailroom = tailroom; + } if (mode == NETKIT_L2 && !(ifmp && tbp[IFLA_ADDRESS])) eth_hw_addr_random(peer); @@ -401,6 +414,7 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev, nk->policy = policy_peer; nk->scrub = scrub_peer; nk->mode = mode; + nk->headroom = headroom; bpf_mprog_bundle_init(&nk->bundle); err = register_netdevice(peer); @@ -426,6 +440,7 @@ static int netkit_new_link(struct net *peer_net, struct net_device *dev, nk->policy = policy_prim; nk->scrub = scrub_prim; nk->mode = mode; + nk->headroom = headroom; bpf_mprog_bundle_init(&nk->bundle); err = register_netdevice(dev); @@ -850,7 +865,18 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[], struct net_device *peer = rtnl_dereference(nk->peer); enum netkit_action policy; struct nlattr *attr; - int err; + int err, i; + static const struct { + u32 attr; + char *name; + } fixed_params[] = { + { IFLA_NETKIT_MODE, "operating mode" }, + { IFLA_NETKIT_SCRUB, "scrubbing" }, + { IFLA_NETKIT_PEER_SCRUB, "peer scrubbing" }, + { IFLA_NETKIT_PEER_INFO, "peer info" }, + { IFLA_NETKIT_HEADROOM, "headroom" }, + { IFLA_NETKIT_TAILROOM, "tailroom" }, + }; if (!nk->primary) { NL_SET_ERR_MSG(extack, @@ -858,28 +884,14 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[], return -EACCES; } - if (data[IFLA_NETKIT_MODE]) { - NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_MODE], - "netkit link operating mode cannot be changed after device creation"); - return -EACCES; - } - - if (data[IFLA_NETKIT_SCRUB]) { - NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_SCRUB], - "netkit scrubbing cannot be changed after device creation"); - return -EACCES; - } - - if (data[IFLA_NETKIT_PEER_SCRUB]) { - NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_SCRUB], - "netkit scrubbing cannot be changed after device creation"); - return -EACCES; - } - - if (data[IFLA_NETKIT_PEER_INFO]) { - NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO], - "netkit peer info cannot be changed after device creation"); - return -EINVAL; + for (i = 0; i < ARRAY_SIZE(fixed_params); i++) { + attr = data[fixed_params[i].attr]; + if (attr) { + NL_SET_ERR_MSG_ATTR_FMT(extack, attr, + "netkit link %s cannot be changed after device creation", + fixed_params[i].name); + return -EACCES; + } } if (data[IFLA_NETKIT_POLICY]) { @@ -914,6 +926,8 @@ static size_t netkit_get_size(const struct net_device *dev) nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_SCRUB */ nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_MODE */ nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */ + nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_HEADROOM */ + nla_total_size(sizeof(u16)) + /* IFLA_NETKIT_TAILROOM */ 0; } @@ -930,6 +944,10 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev) return -EMSGSIZE; if (nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub)) return -EMSGSIZE; + if (nla_put_u16(skb, IFLA_NETKIT_HEADROOM, dev->needed_headroom)) + return -EMSGSIZE; + if (nla_put_u16(skb, IFLA_NETKIT_TAILROOM, dev->needed_tailroom)) + return -EMSGSIZE; if (peer) { nk = netkit_priv(peer); @@ -947,6 +965,8 @@ static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = { [IFLA_NETKIT_MODE] = NLA_POLICY_MAX(NLA_U32, NETKIT_L3), [IFLA_NETKIT_POLICY] = { .type = NLA_U32 }, [IFLA_NETKIT_PEER_POLICY] = { .type = NLA_U32 }, + [IFLA_NETKIT_HEADROOM] = { .type = NLA_U16 }, + [IFLA_NETKIT_TAILROOM] = { .type = NLA_U16 }, [IFLA_NETKIT_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), [IFLA_NETKIT_PEER_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT), [IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2575e0cd9b48..2fa2c265dcba 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1315,6 +1315,8 @@ enum { IFLA_NETKIT_MODE, IFLA_NETKIT_SCRUB, IFLA_NETKIT_PEER_SCRUB, + IFLA_NETKIT_HEADROOM, + IFLA_NETKIT_TAILROOM, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 8516c1ccd57a..7e46ca4cd31b 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -1315,6 +1315,8 @@ enum { IFLA_NETKIT_MODE, IFLA_NETKIT_SCRUB, IFLA_NETKIT_PEER_SCRUB, + IFLA_NETKIT_HEADROOM, + IFLA_NETKIT_TAILROOM, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) From cc529a33d559cc75eb7250a4f4e2b9e431761312 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Dec 2024 00:46:57 +0100 Subject: [PATCH 0670/1386] netkit: Add add netkit {head,tail}room to rt_link.yaml Add netkit {head,tail}room attribute support to the rt_link.yaml spec file. Example: # ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/rt_link.yaml \ --do getlink --json '{"ifname": "nk0"}' --output-json | jq [...] "linkinfo": { "kind": "netkit", "data": { "primary": 0, "policy": "forward", "mode": "l3", "scrub": "default", "headroom": 0, "tailroom": 0, "peer-policy": "forward", "peer-scrub": "default" } }, [...] Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/bpf/20241220234658.490686-2-daniel@iogearbox.net --- Documentation/netlink/specs/rt_link.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index 9ffa13b77dcf..dbeae6b1c548 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -2166,6 +2166,12 @@ attribute-sets: name: peer-scrub type: u32 enum: netkit-scrub + - + name: headroom + type: u16 + - + name: tailroom + type: u16 sub-messages: - From 058268e23fcadc2bdb9297c6dff3a010c70f9762 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 21 Dec 2024 00:46:58 +0100 Subject: [PATCH 0671/1386] selftests/bpf: Extend netkit tests to validate set {head,tail}room Extend the netkit selftests to specify and validate the {head,tail}room on the netdevice: # ./vmtest.sh -- ./test_progs -t netkit [...] ./test_progs -t netkit [ 1.174147] bpf_testmod: loading out-of-tree module taints kernel. [ 1.174585] bpf_testmod: module verification failed: signature and/or required key missing - tainting kernel [ 1.422307] tsc: Refined TSC clocksource calibration: 3407.983 MHz [ 1.424511] clocksource: tsc: mask: 0xffffffffffffffff max_cycles: 0x311fc3e5084, max_idle_ns: 440795359833 ns [ 1.428092] clocksource: Switched to clocksource tsc #363 tc_netkit_basic:OK #364 tc_netkit_device:OK #365 tc_netkit_multi_links:OK #366 tc_netkit_multi_opts:OK #367 tc_netkit_neigh_links:OK #368 tc_netkit_pkt_type:OK #369 tc_netkit_scrub:OK Summary: 7/0 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Daniel Borkmann Acked-by: Nikolay Aleksandrov Link: https://lore.kernel.org/bpf/20241220234658.490686-3-daniel@iogearbox.net --- .../selftests/bpf/prog_tests/tc_netkit.c | 49 ++++++++++++------- .../selftests/bpf/progs/test_tc_link.c | 15 ++++++ 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c index 151a4210028f..2461d183dee5 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c @@ -14,10 +14,16 @@ #include "netlink_helpers.h" #include "tc_helpers.h" +#define NETKIT_HEADROOM 32 +#define NETKIT_TAILROOM 8 + #define MARK 42 #define PRIO 0xeb9f #define ICMP_ECHO 8 +#define FLAG_ADJUST_ROOM (1 << 0) +#define FLAG_SAME_NETNS (1 << 1) + struct icmphdr { __u8 type; __u8 code; @@ -35,7 +41,7 @@ struct iplink_req { }; static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, - bool same_netns, int scrub, int peer_scrub) + int scrub, int peer_scrub, __u32 flags) { struct rtnl_handle rth = { .fd = -1 }; struct iplink_req req = {}; @@ -63,6 +69,10 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, addattr32(&req.n, sizeof(req), IFLA_NETKIT_SCRUB, scrub); addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_SCRUB, peer_scrub); addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode); + if (flags & FLAG_ADJUST_ROOM) { + addattr16(&req.n, sizeof(req), IFLA_NETKIT_HEADROOM, NETKIT_HEADROOM); + addattr16(&req.n, sizeof(req), IFLA_NETKIT_TAILROOM, NETKIT_TAILROOM); + } addattr_nest_end(&req.n, data); addattr_nest_end(&req.n, linkinfo); @@ -87,7 +97,7 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, " addr ee:ff:bb:cc:aa:dd"), "set hwaddress"); } - if (same_netns) { + if (flags & FLAG_SAME_NETNS) { ASSERT_OK(system("ip link set dev " netkit_peer " up"), "up peer"); ASSERT_OK(system("ip addr add dev " netkit_peer " 10.0.0.2/24"), @@ -184,8 +194,8 @@ void serial_test_tc_netkit_basic(void) int err, ifindex; err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -299,8 +309,8 @@ static void serial_test_tc_netkit_multi_links_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -428,8 +438,8 @@ static void serial_test_tc_netkit_multi_opts_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -543,8 +553,8 @@ void serial_test_tc_netkit_device(void) int err, ifindex, ifindex2; err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS, - &ifindex, true, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS); if (err) return; @@ -655,8 +665,8 @@ static void serial_test_tc_netkit_neigh_links_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, 0); if (err) return; @@ -733,8 +743,8 @@ static void serial_test_tc_netkit_pkt_type_mode(int mode) struct bpf_link *link; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, true, NETKIT_SCRUB_DEFAULT, - NETKIT_SCRUB_DEFAULT); + &ifindex, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS); if (err) return; @@ -799,7 +809,7 @@ void serial_test_tc_netkit_pkt_type(void) serial_test_tc_netkit_pkt_type_mode(NETKIT_L3); } -static void serial_test_tc_netkit_scrub_type(int scrub) +static void serial_test_tc_netkit_scrub_type(int scrub, bool room) { LIBBPF_OPTS(bpf_netkit_opts, optl); struct test_tc_link *skel; @@ -807,7 +817,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub) int err, ifindex; err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, - &ifindex, false, scrub, scrub); + &ifindex, scrub, scrub, + room ? FLAG_ADJUST_ROOM : 0); if (err) return; @@ -842,6 +853,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub) ASSERT_EQ(skel->bss->seen_tc8, true, "seen_tc8"); ASSERT_EQ(skel->bss->mark, scrub == NETKIT_SCRUB_NONE ? MARK : 0, "mark"); ASSERT_EQ(skel->bss->prio, scrub == NETKIT_SCRUB_NONE ? PRIO : 0, "prio"); + ASSERT_EQ(skel->bss->headroom, room ? NETKIT_HEADROOM : 0, "headroom"); + ASSERT_EQ(skel->bss->tailroom, room ? NETKIT_TAILROOM : 0, "tailroom"); cleanup: test_tc_link__destroy(skel); @@ -852,6 +865,6 @@ cleanup: void serial_test_tc_netkit_scrub(void) { - serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT); - serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE); + serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT, false); + serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE, true); } diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c index 10d825928499..630f12e51b07 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_link.c +++ b/tools/testing/selftests/bpf/progs/test_tc_link.c @@ -8,6 +8,7 @@ #include #include #include +#include char LICENSE[] SEC("license") = "GPL"; @@ -27,6 +28,7 @@ bool seen_host; bool seen_mcast; int mark, prio; +unsigned short headroom, tailroom; SEC("tc/ingress") int tc1(struct __sk_buff *skb) @@ -104,11 +106,24 @@ out: return TCX_PASS; } +struct sk_buff { + struct net_device *dev; +}; + +struct net_device { + unsigned short needed_headroom; + unsigned short needed_tailroom; +}; + SEC("tc/egress") int tc8(struct __sk_buff *skb) { + struct net_device *dev = BPF_CORE_READ((struct sk_buff *)skb, dev); + seen_tc8 = true; mark = skb->mark; prio = skb->priority; + headroom = BPF_CORE_READ(dev, needed_headroom); + tailroom = BPF_CORE_READ(dev, needed_tailroom); return TCX_PASS; } From 59ec698d01ebb5bae4865f6083bb9f398e39d63b Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 Jan 2025 17:37:09 +0000 Subject: [PATCH 0672/1386] i40e: Deadcode i40e_aq_* i40e_aq_add_mirrorrule(), i40e_aq_delete_mirrorrule() and i40e_aq_set_vsi_vlan_promisc() were added in 2016 by commit 7bd6875bef70 ("i40e: APIs to Add/remove port mirroring rules") but haven't been used. They were the last user of i40e_mirrorrule_op(). i40e_aq_rearrange_nvm() was added in 2018 by commit f05798b4ff82 ("i40e: Add AQ command for rearrange NVM structure") but hasn't been used. i40e_aq_restore_lldp() was added in 2019 by commit c65e78f87f81 ("i40e: Further implementation of LLDP") but hasn't been used. Remove them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Tony Nguyen Link: https://patch.msgid.link/20250102173717.200359-2-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_common.c | 234 ------------------ .../net/ethernet/intel/i40e/i40e_prototype.h | 17 -- 2 files changed, 251 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index e8031f1a9b4f..47e71f72d87b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1804,37 +1804,6 @@ int i40e_aq_set_vsi_broadcast(struct i40e_hw *hw, return status; } -/** - * i40e_aq_set_vsi_vlan_promisc - control the VLAN promiscuous setting - * @hw: pointer to the hw struct - * @seid: vsi number - * @enable: set MAC L2 layer unicast promiscuous enable/disable for a given VLAN - * @cmd_details: pointer to command details structure or NULL - **/ -int i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw, - u16 seid, bool enable, - struct i40e_asq_cmd_details *cmd_details) -{ - struct i40e_aq_desc desc; - struct i40e_aqc_set_vsi_promiscuous_modes *cmd = - (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw; - u16 flags = 0; - int status; - - i40e_fill_default_direct_cmd_desc(&desc, - i40e_aqc_opc_set_vsi_promiscuous_modes); - if (enable) - flags |= I40E_AQC_SET_VSI_PROMISC_VLAN; - - cmd->promiscuous_flags = cpu_to_le16(flags); - cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_VLAN); - cmd->seid = cpu_to_le16(seid); - - status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); - - return status; -} - /** * i40e_aq_get_vsi_params - get VSI configuration info * @hw: pointer to the hw struct @@ -2435,136 +2404,6 @@ i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid, cmd_details, true, aq_status); } -/** - * i40e_mirrorrule_op - Internal helper function to add/delete mirror rule - * @hw: pointer to the hw struct - * @opcode: AQ opcode for add or delete mirror rule - * @sw_seid: Switch SEID (to which rule refers) - * @rule_type: Rule Type (ingress/egress/VLAN) - * @id: Destination VSI SEID or Rule ID - * @count: length of the list - * @mr_list: list of mirrored VSI SEIDs or VLAN IDs - * @cmd_details: pointer to command details structure or NULL - * @rule_id: Rule ID returned from FW - * @rules_used: Number of rules used in internal switch - * @rules_free: Number of rules free in internal switch - * - * Add/Delete a mirror rule to a specific switch. Mirror rules are supported for - * VEBs/VEPA elements only - **/ -static int i40e_mirrorrule_op(struct i40e_hw *hw, - u16 opcode, u16 sw_seid, u16 rule_type, u16 id, - u16 count, __le16 *mr_list, - struct i40e_asq_cmd_details *cmd_details, - u16 *rule_id, u16 *rules_used, u16 *rules_free) -{ - struct i40e_aq_desc desc; - struct i40e_aqc_add_delete_mirror_rule *cmd = - (struct i40e_aqc_add_delete_mirror_rule *)&desc.params.raw; - struct i40e_aqc_add_delete_mirror_rule_completion *resp = - (struct i40e_aqc_add_delete_mirror_rule_completion *)&desc.params.raw; - u16 buf_size; - int status; - - buf_size = count * sizeof(*mr_list); - - /* prep the rest of the request */ - i40e_fill_default_direct_cmd_desc(&desc, opcode); - cmd->seid = cpu_to_le16(sw_seid); - cmd->rule_type = cpu_to_le16(rule_type & - I40E_AQC_MIRROR_RULE_TYPE_MASK); - cmd->num_entries = cpu_to_le16(count); - /* Dest VSI for add, rule_id for delete */ - cmd->destination = cpu_to_le16(id); - if (mr_list) { - desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | - I40E_AQ_FLAG_RD)); - if (buf_size > I40E_AQ_LARGE_BUF) - desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB); - } - - status = i40e_asq_send_command(hw, &desc, mr_list, buf_size, - cmd_details); - if (!status || - hw->aq.asq_last_status == I40E_AQ_RC_ENOSPC) { - if (rule_id) - *rule_id = le16_to_cpu(resp->rule_id); - if (rules_used) - *rules_used = le16_to_cpu(resp->mirror_rules_used); - if (rules_free) - *rules_free = le16_to_cpu(resp->mirror_rules_free); - } - return status; -} - -/** - * i40e_aq_add_mirrorrule - add a mirror rule - * @hw: pointer to the hw struct - * @sw_seid: Switch SEID (to which rule refers) - * @rule_type: Rule Type (ingress/egress/VLAN) - * @dest_vsi: SEID of VSI to which packets will be mirrored - * @count: length of the list - * @mr_list: list of mirrored VSI SEIDs or VLAN IDs - * @cmd_details: pointer to command details structure or NULL - * @rule_id: Rule ID returned from FW - * @rules_used: Number of rules used in internal switch - * @rules_free: Number of rules free in internal switch - * - * Add mirror rule. Mirror rules are supported for VEBs or VEPA elements only - **/ -int i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid, - u16 rule_type, u16 dest_vsi, u16 count, - __le16 *mr_list, - struct i40e_asq_cmd_details *cmd_details, - u16 *rule_id, u16 *rules_used, u16 *rules_free) -{ - if (!(rule_type == I40E_AQC_MIRROR_RULE_TYPE_ALL_INGRESS || - rule_type == I40E_AQC_MIRROR_RULE_TYPE_ALL_EGRESS)) { - if (count == 0 || !mr_list) - return -EINVAL; - } - - return i40e_mirrorrule_op(hw, i40e_aqc_opc_add_mirror_rule, sw_seid, - rule_type, dest_vsi, count, mr_list, - cmd_details, rule_id, rules_used, rules_free); -} - -/** - * i40e_aq_delete_mirrorrule - delete a mirror rule - * @hw: pointer to the hw struct - * @sw_seid: Switch SEID (to which rule refers) - * @rule_type: Rule Type (ingress/egress/VLAN) - * @count: length of the list - * @rule_id: Rule ID that is returned in the receive desc as part of - * add_mirrorrule. - * @mr_list: list of mirrored VLAN IDs to be removed - * @cmd_details: pointer to command details structure or NULL - * @rules_used: Number of rules used in internal switch - * @rules_free: Number of rules free in internal switch - * - * Delete a mirror rule. Mirror rules are supported for VEBs/VEPA elements only - **/ -int i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid, - u16 rule_type, u16 rule_id, u16 count, - __le16 *mr_list, - struct i40e_asq_cmd_details *cmd_details, - u16 *rules_used, u16 *rules_free) -{ - /* Rule ID has to be valid except rule_type: INGRESS VLAN mirroring */ - if (rule_type == I40E_AQC_MIRROR_RULE_TYPE_VLAN) { - /* count and mr_list shall be valid for rule_type INGRESS VLAN - * mirroring. For other rule_type, count and rule_type should - * not matter. - */ - if (count == 0 || !mr_list) - return -EINVAL; - } - - return i40e_mirrorrule_op(hw, i40e_aqc_opc_delete_mirror_rule, sw_seid, - rule_type, rule_id, count, mr_list, - cmd_details, NULL, rules_used, rules_free); -} - /** * i40e_aq_send_msg_to_vf * @hw: pointer to the hardware structure @@ -3179,41 +3018,6 @@ i40e_aq_update_nvm_exit: return status; } -/** - * i40e_aq_rearrange_nvm - * @hw: pointer to the hw struct - * @rearrange_nvm: defines direction of rearrangement - * @cmd_details: pointer to command details structure or NULL - * - * Rearrange NVM structure, available only for transition FW - **/ -int i40e_aq_rearrange_nvm(struct i40e_hw *hw, - u8 rearrange_nvm, - struct i40e_asq_cmd_details *cmd_details) -{ - struct i40e_aqc_nvm_update *cmd; - struct i40e_aq_desc desc; - int status; - - cmd = (struct i40e_aqc_nvm_update *)&desc.params.raw; - - i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_nvm_update); - - rearrange_nvm &= (I40E_AQ_NVM_REARRANGE_TO_FLAT | - I40E_AQ_NVM_REARRANGE_TO_STRUCT); - - if (!rearrange_nvm) { - status = -EINVAL; - goto i40e_aq_rearrange_nvm_exit; - } - - cmd->command_flags |= rearrange_nvm; - status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); - -i40e_aq_rearrange_nvm_exit: - return status; -} - /** * i40e_aq_get_lldp_mib * @hw: pointer to the hw struct @@ -3334,44 +3138,6 @@ int i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw, return status; } -/** - * i40e_aq_restore_lldp - * @hw: pointer to the hw struct - * @setting: pointer to factory setting variable or NULL - * @restore: True if factory settings should be restored - * @cmd_details: pointer to command details structure or NULL - * - * Restore LLDP Agent factory settings if @restore set to True. In other case - * only returns factory setting in AQ response. - **/ -int -i40e_aq_restore_lldp(struct i40e_hw *hw, u8 *setting, bool restore, - struct i40e_asq_cmd_details *cmd_details) -{ - struct i40e_aq_desc desc; - struct i40e_aqc_lldp_restore *cmd = - (struct i40e_aqc_lldp_restore *)&desc.params.raw; - int status; - - if (!test_bit(I40E_HW_CAP_FW_LLDP_PERSISTENT, hw->caps)) { - i40e_debug(hw, I40E_DEBUG_ALL, - "Restore LLDP not supported by current FW version.\n"); - return -ENODEV; - } - - i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_restore); - - if (restore) - cmd->command |= I40E_AQ_LLDP_AGENT_RESTORE; - - status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); - - if (setting) - *setting = cmd->command & 1; - - return status; -} - /** * i40e_aq_stop_lldp * @hw: pointer to the hw struct diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 5a0699ca7ce5..29f6a903a30c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -141,9 +141,6 @@ int i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw, int i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw, u16 seid, bool enable, u16 vid, struct i40e_asq_cmd_details *cmd_details); -int i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw, - u16 seid, bool enable, - struct i40e_asq_cmd_details *cmd_details); int i40e_aq_get_vsi_params(struct i40e_hw *hw, struct i40e_vsi_context *vsi_ctx, struct i40e_asq_cmd_details *cmd_details); @@ -176,14 +173,6 @@ i40e_aq_remove_macvlan_v2(struct i40e_hw *hw, u16 seid, struct i40e_aqc_remove_macvlan_element_data *mv_list, u16 count, struct i40e_asq_cmd_details *cmd_details, enum i40e_admin_queue_err *aq_status); -int i40e_aq_add_mirrorrule(struct i40e_hw *hw, u16 sw_seid, - u16 rule_type, u16 dest_vsi, u16 count, __le16 *mr_list, - struct i40e_asq_cmd_details *cmd_details, - u16 *rule_id, u16 *rules_used, u16 *rules_free); -int i40e_aq_delete_mirrorrule(struct i40e_hw *hw, u16 sw_seid, - u16 rule_type, u16 rule_id, u16 count, __le16 *mr_list, - struct i40e_asq_cmd_details *cmd_details, - u16 *rules_used, u16 *rules_free); int i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid, u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen, @@ -220,9 +209,6 @@ int i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer, u32 offset, u16 length, void *data, bool last_command, u8 preservation_flags, struct i40e_asq_cmd_details *cmd_details); -int i40e_aq_rearrange_nvm(struct i40e_hw *hw, - u8 rearrange_nvm, - struct i40e_asq_cmd_details *cmd_details); int i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type, u8 mib_type, void *buff, u16 buff_size, u16 *local_len, u16 *remote_len, @@ -234,9 +220,6 @@ i40e_aq_set_lldp_mib(struct i40e_hw *hw, int i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw, bool enable_update, struct i40e_asq_cmd_details *cmd_details); -int -i40e_aq_restore_lldp(struct i40e_hw *hw, u8 *setting, bool restore, - struct i40e_asq_cmd_details *cmd_details); int i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent, bool persist, struct i40e_asq_cmd_details *cmd_details); From 39cabb01d26d2d27bd4794c62e67349d86f8b1df Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 Jan 2025 17:37:10 +0000 Subject: [PATCH 0673/1386] i40e: Remove unused i40e_blink_phy_link_led i40e_blink_phy_link_led() was added in 2016 by commit fd077cd3399b ("i40e: Add functions to blink led on 10GBaseT PHY") but hasn't been used. Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Kalesh AP Reviewed-by: Tony Nguyen Link: https://patch.msgid.link/20250102173717.200359-3-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_common.c | 74 ------------------- .../net/ethernet/intel/i40e/i40e_prototype.h | 4 - 2 files changed, 78 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 47e71f72d87b..ba780a949a47 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -4428,80 +4428,6 @@ u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num) return (u8)(reg_val >> ((dev_num + 1) * 5)) & 0x1f; } -/** - * i40e_blink_phy_link_led - * @hw: pointer to the HW structure - * @time: time how long led will blinks in secs - * @interval: gap between LED on and off in msecs - * - * Blinks PHY link LED - **/ -int i40e_blink_phy_link_led(struct i40e_hw *hw, - u32 time, u32 interval) -{ - u16 led_addr = I40E_PHY_LED_PROV_REG_1; - u16 gpio_led_port; - u8 phy_addr = 0; - int status = 0; - u16 led_ctl; - u8 port_num; - u16 led_reg; - u32 i; - - i = rd32(hw, I40E_PFGEN_PORTNUM); - port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK); - phy_addr = i40e_get_phy_address(hw, port_num); - - for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++, - led_addr++) { - status = i40e_read_phy_register_clause45(hw, - I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, - &led_reg); - if (status) - goto phy_blinking_end; - led_ctl = led_reg; - if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) { - led_reg = 0; - status = i40e_write_phy_register_clause45(hw, - I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, - led_reg); - if (status) - goto phy_blinking_end; - break; - } - } - - if (time > 0 && interval > 0) { - for (i = 0; i < time * 1000; i += interval) { - status = i40e_read_phy_register_clause45(hw, - I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, &led_reg); - if (status) - goto restore_config; - if (led_reg & I40E_PHY_LED_MANUAL_ON) - led_reg = 0; - else - led_reg = I40E_PHY_LED_MANUAL_ON; - status = i40e_write_phy_register_clause45(hw, - I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, led_reg); - if (status) - goto restore_config; - msleep(interval); - } - } - -restore_config: - status = i40e_write_phy_register_clause45(hw, - I40E_PHY_COM_REG_PAGE, - led_addr, phy_addr, led_ctl); - -phy_blinking_end: - return status; -} - /** * i40e_led_get_reg - read LED register * @hw: pointer to the HW structure diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index 29f6a903a30c..c0a4bd53501c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -72,8 +72,6 @@ int i40e_led_set_phy(struct i40e_hw *hw, bool on, u16 led_addr, u32 mode); int i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr, u16 *val); -int i40e_blink_phy_link_led(struct i40e_hw *hw, - u32 time, u32 interval); /* admin send queue commands */ @@ -446,8 +444,6 @@ int i40e_read_phy_register(struct i40e_hw *hw, u8 page, u16 reg, int i40e_write_phy_register(struct i40e_hw *hw, u8 page, u16 reg, u8 phy_addr, u16 value); u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num); -int i40e_blink_phy_link_led(struct i40e_hw *hw, - u32 time, u32 interval); int i40e_aq_write_ddp(struct i40e_hw *hw, void *buff, u16 buff_size, u32 track_id, u32 *error_offset, u32 *error_info, From 8cc51e28ecce4c8b3a96d7802b543553d11f682c Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 Jan 2025 17:37:11 +0000 Subject: [PATCH 0674/1386] i40e: Remove unused i40e_(read|write)_phy_register i40e_read_phy_register() and i40e_write_phy_register() were added in 2016 by commit f62ba91458b5 ("i40e: Add functions which apply correct PHY access method for read and write operation") but haven't been used. Remove them. (There are more specific _clause* variants of these functions that are still used.) Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Tony Nguyen Link: https://patch.msgid.link/20250102173717.200359-4-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_common.c | 78 ------------------- .../net/ethernet/intel/i40e/i40e_prototype.h | 4 - 2 files changed, 82 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index ba780a949a47..6779e281a648 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -4335,84 +4335,6 @@ phy_write_end: return status; } -/** - * i40e_write_phy_register - * @hw: pointer to the HW structure - * @page: registers page number - * @reg: register address in the page - * @phy_addr: PHY address on MDIO interface - * @value: PHY register value - * - * Writes value to specified PHY register - **/ -int i40e_write_phy_register(struct i40e_hw *hw, - u8 page, u16 reg, u8 phy_addr, u16 value) -{ - int status; - - switch (hw->device_id) { - case I40E_DEV_ID_1G_BASE_T_X722: - status = i40e_write_phy_register_clause22(hw, reg, phy_addr, - value); - break; - case I40E_DEV_ID_1G_BASE_T_BC: - case I40E_DEV_ID_5G_BASE_T_BC: - case I40E_DEV_ID_10G_BASE_T: - case I40E_DEV_ID_10G_BASE_T4: - case I40E_DEV_ID_10G_BASE_T_BC: - case I40E_DEV_ID_10G_BASE_T_X722: - case I40E_DEV_ID_25G_B: - case I40E_DEV_ID_25G_SFP28: - status = i40e_write_phy_register_clause45(hw, page, reg, - phy_addr, value); - break; - default: - status = -EIO; - break; - } - - return status; -} - -/** - * i40e_read_phy_register - * @hw: pointer to the HW structure - * @page: registers page number - * @reg: register address in the page - * @phy_addr: PHY address on MDIO interface - * @value: PHY register value - * - * Reads specified PHY register value - **/ -int i40e_read_phy_register(struct i40e_hw *hw, - u8 page, u16 reg, u8 phy_addr, u16 *value) -{ - int status; - - switch (hw->device_id) { - case I40E_DEV_ID_1G_BASE_T_X722: - status = i40e_read_phy_register_clause22(hw, reg, phy_addr, - value); - break; - case I40E_DEV_ID_1G_BASE_T_BC: - case I40E_DEV_ID_5G_BASE_T_BC: - case I40E_DEV_ID_10G_BASE_T: - case I40E_DEV_ID_10G_BASE_T4: - case I40E_DEV_ID_10G_BASE_T_BC: - case I40E_DEV_ID_10G_BASE_T_X722: - case I40E_DEV_ID_25G_B: - case I40E_DEV_ID_25G_SFP28: - status = i40e_read_phy_register_clause45(hw, page, reg, - phy_addr, value); - break; - default: - status = -EIO; - break; - } - - return status; -} - /** * i40e_get_phy_address * @hw: pointer to the HW structure diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index c0a4bd53501c..bfebe18c0041 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -439,10 +439,6 @@ int i40e_read_phy_register_clause45(struct i40e_hw *hw, u8 page, u16 reg, u8 phy_addr, u16 *value); int i40e_write_phy_register_clause45(struct i40e_hw *hw, u8 page, u16 reg, u8 phy_addr, u16 value); -int i40e_read_phy_register(struct i40e_hw *hw, u8 page, u16 reg, - u8 phy_addr, u16 *value); -int i40e_write_phy_register(struct i40e_hw *hw, u8 page, u16 reg, - u8 phy_addr, u16 value); u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num); int i40e_aq_write_ddp(struct i40e_hw *hw, void *buff, u16 buff_size, u32 track_id, From 81d6bb2012e1d6410bc88dcb331113126a13a6ee Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 Jan 2025 17:37:12 +0000 Subject: [PATCH 0675/1386] i40e: Deadcode profile code i40e_add_pinfo_to_list() was added in 2017 by commit 1d5c960c5ef5 ("i40e: new AQ commands") i40e_find_section_in_profile() was added in 2019 by commit cdc594e00370 ("i40e: Implement DDP support in i40e driver") Neither have been used. Remove them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Tony Nguyen Link: https://patch.msgid.link/20250102173717.200359-5-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_common.c | 72 ------------------- .../net/ethernet/intel/i40e/i40e_prototype.h | 8 --- 2 files changed, 80 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 6779e281a648..370b4bddee44 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -4882,39 +4882,6 @@ i40e_find_segment_in_package(u32 segment_type, #define I40E_SECTION_HEADER(profile, offset) \ (struct i40e_profile_section_header *)((u8 *)(profile) + (offset)) -/** - * i40e_find_section_in_profile - * @section_type: the section type to search for (i.e., SECTION_TYPE_NOTE) - * @profile: pointer to the i40e segment header to be searched - * - * This function searches i40e segment for a particular section type. On - * success it returns a pointer to the section header, otherwise it will - * return NULL. - **/ -struct i40e_profile_section_header * -i40e_find_section_in_profile(u32 section_type, - struct i40e_profile_segment *profile) -{ - struct i40e_profile_section_header *sec; - struct i40e_section_table *sec_tbl; - u32 sec_off; - u32 i; - - if (profile->header.type != SEGMENT_TYPE_I40E) - return NULL; - - I40E_SECTION_TABLE(profile, sec_tbl); - - for (i = 0; i < sec_tbl->section_count; i++) { - sec_off = sec_tbl->section_offset[i]; - sec = I40E_SECTION_HEADER(profile, sec_off); - if (sec->section.type == section_type) - return sec; - } - - return NULL; -} - /** * i40e_ddp_exec_aq_section - Execute generic AQ for DDP * @hw: pointer to the hw struct @@ -5137,45 +5104,6 @@ i40e_rollback_profile(struct i40e_hw *hw, struct i40e_profile_segment *profile, return status; } -/** - * i40e_add_pinfo_to_list - * @hw: pointer to the hardware structure - * @profile: pointer to the profile segment of the package - * @profile_info_sec: buffer for information section - * @track_id: package tracking id - * - * Register a profile to the list of loaded profiles. - */ -int -i40e_add_pinfo_to_list(struct i40e_hw *hw, - struct i40e_profile_segment *profile, - u8 *profile_info_sec, u32 track_id) -{ - struct i40e_profile_section_header *sec = NULL; - struct i40e_profile_info *pinfo; - u32 offset = 0, info = 0; - int status = 0; - - sec = (struct i40e_profile_section_header *)profile_info_sec; - sec->tbl_size = 1; - sec->data_end = sizeof(struct i40e_profile_section_header) + - sizeof(struct i40e_profile_info); - sec->section.type = SECTION_TYPE_INFO; - sec->section.offset = sizeof(struct i40e_profile_section_header); - sec->section.size = sizeof(struct i40e_profile_info); - pinfo = (struct i40e_profile_info *)(profile_info_sec + - sec->section.offset); - pinfo->track_id = track_id; - pinfo->version = profile->version; - pinfo->op = I40E_DDP_ADD_TRACKID; - memcpy(pinfo->name, profile->name, I40E_DDP_NAME_SIZE); - - status = i40e_aq_write_ddp(hw, (void *)sec, sec->data_end, - track_id, &offset, &info, NULL); - - return status; -} - /** * i40e_aq_add_cloud_filters * @hw: pointer to the hardware structure diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index bfebe18c0041..ccb8af472cd7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -452,20 +452,12 @@ int i40e_aq_get_ddp_list(struct i40e_hw *hw, void *buff, struct i40e_generic_seg_header * i40e_find_segment_in_package(u32 segment_type, struct i40e_package_header *pkg_header); -struct i40e_profile_section_header * -i40e_find_section_in_profile(u32 section_type, - struct i40e_profile_segment *profile); int i40e_write_profile(struct i40e_hw *hw, struct i40e_profile_segment *i40e_seg, u32 track_id); int i40e_rollback_profile(struct i40e_hw *hw, struct i40e_profile_segment *i40e_seg, u32 track_id); -int -i40e_add_pinfo_to_list(struct i40e_hw *hw, - struct i40e_profile_segment *profile, - u8 *profile_info_sec, u32 track_id); - /* i40e_ddp */ int i40e_ddp_flash(struct net_device *netdev, struct ethtool_flash *flash); From 3eb24a9e0af3a336da8af0bf37140203f742b493 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 Jan 2025 17:37:13 +0000 Subject: [PATCH 0676/1386] i40e: Remove unused i40e_get_cur_guaranteed_fd_count The last use of i40e_get_cur_guaranteed_fd_count() was removed in 2015 by commit 04294e38a451 ("i40e: FD filters flush policy changes") Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Kalesh AP Reviewed-by: Tony Nguyen Link: https://patch.msgid.link/20250102173717.200359-6-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e.h | 1 - drivers/net/ethernet/intel/i40e/i40e_main.c | 13 ------------- 2 files changed, 14 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index d4255c2706fa..5d9738b746f4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1189,7 +1189,6 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi, struct i40e_fdir_filter *input, bool add); void i40e_fdir_check_and_reenable(struct i40e_pf *pf); u32 i40e_get_current_fd_count(struct i40e_pf *pf); -u32 i40e_get_cur_guaranteed_fd_count(struct i40e_pf *pf); u32 i40e_get_current_atr_cnt(struct i40e_pf *pf); u32 i40e_get_global_fd_count(struct i40e_pf *pf); bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 0e1d9e2fbf38..83ba1effe8ba 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -9628,19 +9628,6 @@ static void i40e_handle_lan_overflow_event(struct i40e_pf *pf, i40e_reset_vf(vf, false); } -/** - * i40e_get_cur_guaranteed_fd_count - Get the consumed guaranteed FD filters - * @pf: board private structure - **/ -u32 i40e_get_cur_guaranteed_fd_count(struct i40e_pf *pf) -{ - u32 val, fcnt_prog; - - val = rd32(&pf->hw, I40E_PFQF_FDSTAT); - fcnt_prog = (val & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK); - return fcnt_prog; -} - /** * i40e_get_current_fd_count - Get total FD filters programmed for this PF * @pf: board private structure From 38dfb07d9a65dd408bc50f0cc8e49a5381bc40f5 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 Jan 2025 17:37:14 +0000 Subject: [PATCH 0677/1386] i40e: Remove unused i40e_del_filter The last use of i40e_del_filter() was removed in 2016 by commit 9569a9a4547d ("i40e: when adding or removing MAC filters, correctly handle VLANs") Remove it. Fix up a comment that referenced it. Note: The __ version of this function is still used. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Tony Nguyen Link: https://patch.msgid.link/20250102173717.200359-7-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e.h | 1 - drivers/net/ethernet/intel/i40e/i40e_main.c | 28 ++------------------- 2 files changed, 2 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 5d9738b746f4..399a5dbf3506 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1196,7 +1196,6 @@ void i40e_set_ethtool_ops(struct net_device *netdev); struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan); void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f); -void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan); int i40e_sync_vsi_filters(struct i40e_vsi *vsi); struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, u16 uplink, u32 param1); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 83ba1effe8ba..276dde0bc1d4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1666,9 +1666,8 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, * @vsi: VSI to remove from * @f: the filter to remove from the list * - * This function should be called instead of i40e_del_filter only if you know - * the exact filter you will remove already, such as via i40e_find_filter or - * i40e_find_mac. + * This function requires you've found * the exact filter you will remove + * already, such as via i40e_find_filter or i40e_find_mac. * * NOTE: This function is expected to be called with mac_filter_hash_lock * being held. @@ -1697,29 +1696,6 @@ void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f) set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state); } -/** - * i40e_del_filter - Remove a MAC/VLAN filter from the VSI - * @vsi: the VSI to be searched - * @macaddr: the MAC address - * @vlan: the VLAN - * - * NOTE: This function is expected to be called with mac_filter_hash_lock - * being held. - * ANOTHER NOTE: This function MUST be called from within the context of - * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe() - * instead of list_for_each_entry(). - **/ -void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan) -{ - struct i40e_mac_filter *f; - - if (!vsi || !macaddr) - return; - - f = i40e_find_filter(vsi, macaddr, vlan); - __i40e_del_filter(vsi, f); -} - /** * i40e_add_mac_filter - Add a MAC filter for all active VLANs * @vsi: the VSI to be searched From a324484ac855a6770c6e7220b2ce09810a625f75 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 Jan 2025 17:37:15 +0000 Subject: [PATCH 0678/1386] i40e: Remove unused i40e_commit_partition_bw_setting i40e_commit_partition_bw_setting() was added in 2017 by commit 4fc8c6763957 ("i40e: genericize the partition bandwidth control") but hasn't been used. Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Kalesh AP Reviewed-by: Tony Nguyen Link: https://patch.msgid.link/20250102173717.200359-8-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e.h | 1 - drivers/net/ethernet/intel/i40e/i40e_main.c | 83 --------------------- 2 files changed, 84 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 399a5dbf3506..ce63a7cfe955 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1311,7 +1311,6 @@ int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset); int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi); int i40e_get_partition_bw_setting(struct i40e_pf *pf); int i40e_set_partition_bw_setting(struct i40e_pf *pf); -int i40e_commit_partition_bw_setting(struct i40e_pf *pf); void i40e_print_link_message(struct i40e_vsi *vsi, bool isup); void i40e_set_fec_in_flags(u8 fec_cfg, unsigned long *flags); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 276dde0bc1d4..8a333d0e2218 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -12576,89 +12576,6 @@ int i40e_set_partition_bw_setting(struct i40e_pf *pf) return status; } -/** - * i40e_commit_partition_bw_setting - Commit BW settings for this PF partition - * @pf: board private structure - **/ -int i40e_commit_partition_bw_setting(struct i40e_pf *pf) -{ - /* Commit temporary BW setting to permanent NVM image */ - enum i40e_admin_queue_err last_aq_status; - u16 nvm_word; - int ret; - - if (pf->hw.partition_id != 1) { - dev_info(&pf->pdev->dev, - "Commit BW only works on partition 1! This is partition %d", - pf->hw.partition_id); - ret = -EOPNOTSUPP; - goto bw_commit_out; - } - - /* Acquire NVM for read access */ - ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_READ); - last_aq_status = pf->hw.aq.asq_last_status; - if (ret) { - dev_info(&pf->pdev->dev, - "Cannot acquire NVM for read access, err %pe aq_err %s\n", - ERR_PTR(ret), - i40e_aq_str(&pf->hw, last_aq_status)); - goto bw_commit_out; - } - - /* Read word 0x10 of NVM - SW compatibility word 1 */ - ret = i40e_aq_read_nvm(&pf->hw, - I40E_SR_NVM_CONTROL_WORD, - 0x10, sizeof(nvm_word), &nvm_word, - false, NULL); - /* Save off last admin queue command status before releasing - * the NVM - */ - last_aq_status = pf->hw.aq.asq_last_status; - i40e_release_nvm(&pf->hw); - if (ret) { - dev_info(&pf->pdev->dev, "NVM read error, err %pe aq_err %s\n", - ERR_PTR(ret), - i40e_aq_str(&pf->hw, last_aq_status)); - goto bw_commit_out; - } - - /* Wait a bit for NVM release to complete */ - msleep(50); - - /* Acquire NVM for write access */ - ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_WRITE); - last_aq_status = pf->hw.aq.asq_last_status; - if (ret) { - dev_info(&pf->pdev->dev, - "Cannot acquire NVM for write access, err %pe aq_err %s\n", - ERR_PTR(ret), - i40e_aq_str(&pf->hw, last_aq_status)); - goto bw_commit_out; - } - /* Write it back out unchanged to initiate update NVM, - * which will force a write of the shadow (alt) RAM to - * the NVM - thus storing the bandwidth values permanently. - */ - ret = i40e_aq_update_nvm(&pf->hw, - I40E_SR_NVM_CONTROL_WORD, - 0x10, sizeof(nvm_word), - &nvm_word, true, 0, NULL); - /* Save off last admin queue command status before releasing - * the NVM - */ - last_aq_status = pf->hw.aq.asq_last_status; - i40e_release_nvm(&pf->hw); - if (ret) - dev_info(&pf->pdev->dev, - "BW settings NOT SAVED, err %pe aq_err %s\n", - ERR_PTR(ret), - i40e_aq_str(&pf->hw, last_aq_status)); -bw_commit_out: - - return ret; -} - /** * i40e_is_total_port_shutdown_enabled - read NVM and return value * if total port shutdown feature is enabled for this PF From d424b93f35a61dc1147ef816cb3ae151395af656 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 Jan 2025 17:37:16 +0000 Subject: [PATCH 0679/1386] i40e: Remove unused i40e_asq_send_command_v2 i40e_asq_send_command_v2() was added in 2022 by commit 74073848b0d7 ("i40e: Add new versions of send ASQ command functions") but hasn't been used. Remove it. (The _atomic_v2 version of the function is used, so leave it). Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Tony Nguyen Link: https://patch.msgid.link/20250102173717.200359-9-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_adminq.c | 10 ---------- drivers/net/ethernet/intel/i40e/i40e_prototype.h | 7 ------- 2 files changed, 17 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c index f73f5930fc58..175c1320c143 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c @@ -1016,16 +1016,6 @@ i40e_asq_send_command_atomic_v2(struct i40e_hw *hw, return status; } -int -i40e_asq_send_command_v2(struct i40e_hw *hw, struct i40e_aq_desc *desc, - void *buff, /* can be NULL */ u16 buff_size, - struct i40e_asq_cmd_details *cmd_details, - enum i40e_admin_queue_err *aq_status) -{ - return i40e_asq_send_command_atomic_v2(hw, desc, buff, buff_size, - cmd_details, true, aq_status); -} - /** * i40e_fill_default_direct_cmd_desc - AQ descriptor helper function * @desc: pointer to the temp descriptor (non DMA mem) diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index ccb8af472cd7..099bb8ab7d70 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -27,13 +27,6 @@ i40e_asq_send_command(struct i40e_hw *hw, struct i40e_aq_desc *desc, void *buff, /* can be NULL */ u16 buff_size, struct i40e_asq_cmd_details *cmd_details); int -i40e_asq_send_command_v2(struct i40e_hw *hw, - struct i40e_aq_desc *desc, - void *buff, /* can be NULL */ - u16 buff_size, - struct i40e_asq_cmd_details *cmd_details, - enum i40e_admin_queue_err *aq_status); -int i40e_asq_send_command_atomic(struct i40e_hw *hw, struct i40e_aq_desc *desc, void *buff, /* can be NULL */ u16 buff_size, struct i40e_asq_cmd_details *cmd_details, From 47ea5d4e6f40446eddaf308eed942a3d3a9397e9 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 Jan 2025 17:37:17 +0000 Subject: [PATCH 0680/1386] i40e: Remove unused i40e_dcb_hw_get_num_tc The last useof i40e_dcb_hw_get_num_tc() was removed in 2022 by commit fe20371578ef ("Revert "i40e: Fix reset bw limit when DCB enabled with 1 TC"") Remove it. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Kalesh AP Reviewed-by: Tony Nguyen Link: https://patch.msgid.link/20250102173717.200359-10-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_dcb.c | 13 ------------- drivers/net/ethernet/intel/i40e/i40e_dcb.h | 1 - 2 files changed, 14 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c index 8db1eb0c1768..352e957443fd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c @@ -1490,19 +1490,6 @@ void i40e_dcb_hw_set_num_tc(struct i40e_hw *hw, u8 num_tc) wr32(hw, I40E_PRTDCB_GENC, reg); } -/** - * i40e_dcb_hw_get_num_tc - * @hw: pointer to the hw struct - * - * Returns number of traffic classes configured in HW - **/ -u8 i40e_dcb_hw_get_num_tc(struct i40e_hw *hw) -{ - u32 reg = rd32(hw, I40E_PRTDCB_GENC); - - return FIELD_GET(I40E_PRTDCB_GENC_NUMTC_MASK, reg); -} - /** * i40e_dcb_hw_rx_ets_bw_config * @hw: pointer to the hw struct diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.h b/drivers/net/ethernet/intel/i40e/i40e_dcb.h index d76497566e40..d5662c639c41 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_dcb.h +++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.h @@ -253,7 +253,6 @@ void i40e_dcb_hw_rx_cmd_monitor_config(struct i40e_hw *hw, void i40e_dcb_hw_pfc_config(struct i40e_hw *hw, u8 pfc_en, u8 *prio_tc); void i40e_dcb_hw_set_num_tc(struct i40e_hw *hw, u8 num_tc); -u8 i40e_dcb_hw_get_num_tc(struct i40e_hw *hw); void i40e_dcb_hw_rx_ets_bw_config(struct i40e_hw *hw, u8 *bw_share, u8 *mode, u8 *prio_type); void i40e_dcb_hw_rx_up2tc_config(struct i40e_hw *hw, u8 *prio_tc); From b37dba891b17703bd249b4b7a8c4eb04482e2692 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 Jan 2025 17:41:40 +0000 Subject: [PATCH 0681/1386] igc: Remove unused igc_acquire/release_nvm igc_acquire_nvm() and igc_release_nvm() were added in 2018 as part of commit ab4056126813 ("igc: Add NVM support") but never used. Remove them. The igc_1225.c has it's own specific implementations. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Tony Nguyen Link: https://patch.msgid.link/20250102174142.200700-2-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc_nvm.c | 50 ------------------------ drivers/net/ethernet/intel/igc/igc_nvm.h | 2 - 2 files changed, 52 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.c b/drivers/net/ethernet/intel/igc/igc_nvm.c index 58f81aba0144..efd121c03967 100644 --- a/drivers/net/ethernet/intel/igc/igc_nvm.c +++ b/drivers/net/ethernet/intel/igc/igc_nvm.c @@ -35,56 +35,6 @@ static s32 igc_poll_eerd_eewr_done(struct igc_hw *hw, int ee_reg) return ret_val; } -/** - * igc_acquire_nvm - Generic request for access to EEPROM - * @hw: pointer to the HW structure - * - * Set the EEPROM access request bit and wait for EEPROM access grant bit. - * Return successful if access grant bit set, else clear the request for - * EEPROM access and return -IGC_ERR_NVM (-1). - */ -s32 igc_acquire_nvm(struct igc_hw *hw) -{ - s32 timeout = IGC_NVM_GRANT_ATTEMPTS; - u32 eecd = rd32(IGC_EECD); - s32 ret_val = 0; - - wr32(IGC_EECD, eecd | IGC_EECD_REQ); - eecd = rd32(IGC_EECD); - - while (timeout) { - if (eecd & IGC_EECD_GNT) - break; - udelay(5); - eecd = rd32(IGC_EECD); - timeout--; - } - - if (!timeout) { - eecd &= ~IGC_EECD_REQ; - wr32(IGC_EECD, eecd); - hw_dbg("Could not acquire NVM grant\n"); - ret_val = -IGC_ERR_NVM; - } - - return ret_val; -} - -/** - * igc_release_nvm - Release exclusive access to EEPROM - * @hw: pointer to the HW structure - * - * Stop any current commands to the EEPROM and clear the EEPROM request bit. - */ -void igc_release_nvm(struct igc_hw *hw) -{ - u32 eecd; - - eecd = rd32(IGC_EECD); - eecd &= ~IGC_EECD_REQ; - wr32(IGC_EECD, eecd); -} - /** * igc_read_nvm_eerd - Reads EEPROM using EERD register * @hw: pointer to the HW structure diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.h b/drivers/net/ethernet/intel/igc/igc_nvm.h index f9fc2e9cfb03..ab78d0c64547 100644 --- a/drivers/net/ethernet/intel/igc/igc_nvm.h +++ b/drivers/net/ethernet/intel/igc/igc_nvm.h @@ -4,8 +4,6 @@ #ifndef _IGC_NVM_H_ #define _IGC_NVM_H_ -s32 igc_acquire_nvm(struct igc_hw *hw); -void igc_release_nvm(struct igc_hw *hw); s32 igc_read_mac_addr(struct igc_hw *hw); s32 igc_read_nvm_eerd(struct igc_hw *hw, u16 offset, u16 words, u16 *data); s32 igc_validate_nvm_checksum(struct igc_hw *hw); From 121c3c6bc661039104119a18ad0730540b353eaf Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 Jan 2025 17:41:41 +0000 Subject: [PATCH 0682/1386] igc: Remove unused igc_read/write_pci_cfg wrappers igc_read_pci_cfg() and igc_write_pci_cfg were added in 2018 as part of commit 146740f9abc4 ("igc: Add support for PF") but have remained unused. Remove them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Tony Nguyen Link: https://patch.msgid.link/20250102174142.200700-3-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc_hw.h | 2 -- drivers/net/ethernet/intel/igc/igc_main.c | 14 -------------- 2 files changed, 16 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h index d9d1a1a11daf..7ec7e395020b 100644 --- a/drivers/net/ethernet/intel/igc/igc_hw.h +++ b/drivers/net/ethernet/intel/igc/igc_hw.h @@ -281,7 +281,5 @@ struct net_device *igc_get_hw_dev(struct igc_hw *hw); s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value); s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value); -void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value); -void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value); #endif /* _IGC_HW_H_ */ diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 27872bdea9bd..9c92673a7240 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6780,20 +6780,6 @@ static const struct net_device_ops igc_netdev_ops = { }; /* PCIe configuration access */ -void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value) -{ - struct igc_adapter *adapter = hw->back; - - pci_read_config_word(adapter->pdev, reg, value); -} - -void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value) -{ - struct igc_adapter *adapter = hw->back; - - pci_write_config_word(adapter->pdev, reg, *value); -} - s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) { struct igc_adapter *adapter = hw->back; From c758890813665edca9b66e020c52646c84b7b694 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 2 Jan 2025 17:41:42 +0000 Subject: [PATCH 0683/1386] igc: Remove unused igc_read/write_pcie_cap_reg The last uses of igc_read_pcie_cap_reg() and igc_write_pcie_cap_reg() were removed in 2019 by commit 16ecd8d9af26 ("igc: Remove the obsolete workaround") Remove them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Tony Nguyen Link: https://patch.msgid.link/20250102174142.200700-4-linux@treblig.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/igc/igc_hw.h | 3 --- drivers/net/ethernet/intel/igc/igc_main.c | 25 ----------------------- 2 files changed, 28 deletions(-) diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h index 7ec7e395020b..be8a49a86d09 100644 --- a/drivers/net/ethernet/intel/igc/igc_hw.h +++ b/drivers/net/ethernet/intel/igc/igc_hw.h @@ -279,7 +279,4 @@ struct net_device *igc_get_hw_dev(struct igc_hw *hw); #define hw_dbg(format, arg...) \ netdev_dbg(igc_get_hw_dev(hw), format, ##arg) -s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value); -s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value); - #endif /* _IGC_HW_H_ */ diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 9c92673a7240..f58cd6940434 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6779,31 +6779,6 @@ static const struct net_device_ops igc_netdev_ops = { .ndo_get_tstamp = igc_get_tstamp, }; -/* PCIe configuration access */ -s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) -{ - struct igc_adapter *adapter = hw->back; - - if (!pci_is_pcie(adapter->pdev)) - return -IGC_ERR_CONFIG; - - pcie_capability_read_word(adapter->pdev, reg, value); - - return IGC_SUCCESS; -} - -s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value) -{ - struct igc_adapter *adapter = hw->back; - - if (!pci_is_pcie(adapter->pdev)) - return -IGC_ERR_CONFIG; - - pcie_capability_write_word(adapter->pdev, reg, *value); - - return IGC_SUCCESS; -} - u32 igc_rd32(struct igc_hw *hw, u32 reg) { struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw); From 3f9f5cd005f5b5243eaa2647d40b9857fa1a901d Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Thu, 2 Jan 2025 17:34:18 +0100 Subject: [PATCH 0684/1386] sctp: Prepare sctp_v4_get_dst() to dscp_t conversion. Define inet_sk_dscp() to get a dscp_t value from struct inet_sock, so that sctp_v4_get_dst() can easily set ->flowi4_tos from a dscp_t variable. For the SCTP_DSCP_SET_MASK case, we can just use inet_dsfield_to_dscp() to get a dscp_t value. Then, when converting ->flowi4_tos from __u8 to dscp_t, we'll just have to drop the inet_dscp_to_dsfield() conversion function. Signed-off-by: Guillaume Nault Acked-by: Xin Long Reviewed-by: Ido Schimmel Link: https://patch.msgid.link/1a645f4a0bc60ad18e7c0916642883ce8a43c013.1735835456.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- include/net/inet_sock.h | 6 ++++++ net/sctp/protocol.c | 10 +++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 3ccbad881d74..1086256549fa 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -302,6 +303,11 @@ static inline unsigned long inet_cmsg_flags(const struct inet_sock *inet) return READ_ONCE(inet->inet_flags) & IP_CMSG_ALL; } +static inline dscp_t inet_sk_dscp(const struct inet_sock *inet) +{ + return inet_dsfield_to_dscp(READ_ONCE(inet->tos)); +} + #define inet_test_bit(nr, sk) \ test_bit(INET_FLAGS_##nr, &inet_sk(sk)->inet_flags) #define inet_set_bit(nr, sk) \ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 8b9a1b96695e..29727ed1008e 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -427,16 +428,19 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr, struct dst_entry *dst = NULL; union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr; - u8 tos = READ_ONCE(inet_sk(sk)->tos); + dscp_t dscp; if (t->dscp & SCTP_DSCP_SET_MASK) - tos = t->dscp & SCTP_DSCP_VAL_MASK; + dscp = inet_dsfield_to_dscp(t->dscp); + else + dscp = inet_sk_dscp(inet_sk(sk)); + memset(&_fl, 0x0, sizeof(_fl)); fl4->daddr = daddr->v4.sin_addr.s_addr; fl4->fl4_dport = daddr->v4.sin_port; fl4->flowi4_proto = IPPROTO_SCTP; if (asoc) { - fl4->flowi4_tos = tos & INET_DSCP_MASK; + fl4->flowi4_tos = inet_dscp_to_dsfield(dscp); fl4->flowi4_scope = ip_sock_rt_scope(asoc->base.sk); fl4->flowi4_oif = asoc->base.sk->sk_bound_dev_if; fl4->fl4_sport = htons(asoc->base.bind_addr.port); From 95fc45d1dea8e1253f8ec58abc5befb71553d666 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Jan 2025 21:05:14 +0000 Subject: [PATCH 0685/1386] ax25: rcu protect dev->ax25_ptr syzbot found a lockdep issue [1]. We should remove ax25 RTNL dependency in ax25_setsockopt() This should also fix a variety of possible UAF in ax25. [1] WARNING: possible circular locking dependency detected 6.13.0-rc3-syzkaller-00762-g9268abe611b0 #0 Not tainted ------------------------------------------------------ syz.5.1818/12806 is trying to acquire lock: ffffffff8fcb3988 (rtnl_mutex){+.+.}-{4:4}, at: ax25_setsockopt+0xa55/0xe90 net/ax25/af_ax25.c:680 but task is already holding lock: ffff8880617ac258 (sk_lock-AF_AX25){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1618 [inline] ffff8880617ac258 (sk_lock-AF_AX25){+.+.}-{0:0}, at: ax25_setsockopt+0x209/0xe90 net/ax25/af_ax25.c:574 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (sk_lock-AF_AX25){+.+.}-{0:0}: lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5849 lock_sock_nested+0x48/0x100 net/core/sock.c:3642 lock_sock include/net/sock.h:1618 [inline] ax25_kill_by_device net/ax25/af_ax25.c:101 [inline] ax25_device_event+0x24d/0x580 net/ax25/af_ax25.c:146 notifier_call_chain+0x1a5/0x3f0 kernel/notifier.c:85 __dev_notify_flags+0x207/0x400 dev_change_flags+0xf0/0x1a0 net/core/dev.c:9026 dev_ifsioc+0x7c8/0xe70 net/core/dev_ioctl.c:563 dev_ioctl+0x719/0x1340 net/core/dev_ioctl.c:820 sock_do_ioctl+0x240/0x460 net/socket.c:1234 sock_ioctl+0x626/0x8e0 net/socket.c:1339 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:906 [inline] __se_sys_ioctl+0xf5/0x170 fs/ioctl.c:892 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f -> #0 (rtnl_mutex){+.+.}-{4:4}: check_prev_add kernel/locking/lockdep.c:3161 [inline] check_prevs_add kernel/locking/lockdep.c:3280 [inline] validate_chain+0x18ef/0x5920 kernel/locking/lockdep.c:3904 __lock_acquire+0x1397/0x2100 kernel/locking/lockdep.c:5226 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5849 __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0x1ac/0xee0 kernel/locking/mutex.c:735 ax25_setsockopt+0xa55/0xe90 net/ax25/af_ax25.c:680 do_sock_setsockopt+0x3af/0x720 net/socket.c:2324 __sys_setsockopt net/socket.c:2349 [inline] __do_sys_setsockopt net/socket.c:2355 [inline] __se_sys_setsockopt net/socket.c:2352 [inline] __x64_sys_setsockopt+0x1ee/0x280 net/socket.c:2352 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(sk_lock-AF_AX25); lock(rtnl_mutex); lock(sk_lock-AF_AX25); lock(rtnl_mutex); *** DEADLOCK *** 1 lock held by syz.5.1818/12806: #0: ffff8880617ac258 (sk_lock-AF_AX25){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1618 [inline] #0: ffff8880617ac258 (sk_lock-AF_AX25){+.+.}-{0:0}, at: ax25_setsockopt+0x209/0xe90 net/ax25/af_ax25.c:574 stack backtrace: CPU: 1 UID: 0 PID: 12806 Comm: syz.5.1818 Not tainted 6.13.0-rc3-syzkaller-00762-g9268abe611b0 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 print_circular_bug+0x13a/0x1b0 kernel/locking/lockdep.c:2074 check_noncircular+0x36a/0x4a0 kernel/locking/lockdep.c:2206 check_prev_add kernel/locking/lockdep.c:3161 [inline] check_prevs_add kernel/locking/lockdep.c:3280 [inline] validate_chain+0x18ef/0x5920 kernel/locking/lockdep.c:3904 __lock_acquire+0x1397/0x2100 kernel/locking/lockdep.c:5226 lock_acquire+0x1ed/0x550 kernel/locking/lockdep.c:5849 __mutex_lock_common kernel/locking/mutex.c:585 [inline] __mutex_lock+0x1ac/0xee0 kernel/locking/mutex.c:735 ax25_setsockopt+0xa55/0xe90 net/ax25/af_ax25.c:680 do_sock_setsockopt+0x3af/0x720 net/socket.c:2324 __sys_setsockopt net/socket.c:2349 [inline] __do_sys_setsockopt net/socket.c:2355 [inline] __se_sys_setsockopt net/socket.c:2352 [inline] __x64_sys_setsockopt+0x1ee/0x280 net/socket.c:2352 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7f7b62385d29 Fixes: c433570458e4 ("ax25: fix a use-after-free in ax25_fillin_cb()") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250103210514.87290-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 2 +- include/net/ax25.h | 10 +++++----- net/ax25/af_ax25.c | 12 ++++++------ net/ax25/ax25_dev.c | 4 ++-- net/ax25/ax25_ip.c | 3 ++- net/ax25/ax25_out.c | 22 +++++++++++++++++----- net/ax25/ax25_route.c | 2 ++ 7 files changed, 35 insertions(+), 20 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2593019ad5b1..e84602e0226c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2261,7 +2261,7 @@ struct net_device { void *atalk_ptr; #endif #if IS_ENABLED(CONFIG_AX25) - void *ax25_ptr; + struct ax25_dev __rcu *ax25_ptr; #endif #if IS_ENABLED(CONFIG_CFG80211) struct wireless_dev *ieee80211_ptr; diff --git a/include/net/ax25.h b/include/net/ax25.h index cb622d84cd0c..4ee141aae0a2 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -231,6 +231,7 @@ typedef struct ax25_dev { #endif refcount_t refcount; bool device_up; + struct rcu_head rcu; } ax25_dev; typedef struct ax25_cb { @@ -290,9 +291,8 @@ static inline void ax25_dev_hold(ax25_dev *ax25_dev) static inline void ax25_dev_put(ax25_dev *ax25_dev) { - if (refcount_dec_and_test(&ax25_dev->refcount)) { - kfree(ax25_dev); - } + if (refcount_dec_and_test(&ax25_dev->refcount)) + kfree_rcu(ax25_dev, rcu); } static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) { @@ -335,9 +335,9 @@ void ax25_digi_invert(const ax25_digi *, ax25_digi *); extern spinlock_t ax25_dev_lock; #if IS_ENABLED(CONFIG_AX25) -static inline ax25_dev *ax25_dev_ax25dev(struct net_device *dev) +static inline ax25_dev *ax25_dev_ax25dev(const struct net_device *dev) { - return dev->ax25_ptr; + return rcu_dereference_rtnl(dev->ax25_ptr); } #endif diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index d6f9fae06a9d..aa6c714892ec 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -467,7 +467,7 @@ einval_put: goto out_put; } -static void ax25_fillin_cb_from_dev(ax25_cb *ax25, ax25_dev *ax25_dev) +static void ax25_fillin_cb_from_dev(ax25_cb *ax25, const ax25_dev *ax25_dev) { ax25->rtt = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T1]) / 2; ax25->t1 = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_T1]); @@ -677,22 +677,22 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; } - rtnl_lock(); - dev = __dev_get_by_name(&init_net, devname); + rcu_read_lock(); + dev = dev_get_by_name_rcu(&init_net, devname); if (!dev) { - rtnl_unlock(); + rcu_read_unlock(); res = -ENODEV; break; } ax25->ax25_dev = ax25_dev_ax25dev(dev); if (!ax25->ax25_dev) { - rtnl_unlock(); + rcu_read_unlock(); res = -ENODEV; break; } ax25_fillin_cb(ax25, ax25->ax25_dev); - rtnl_unlock(); + rcu_read_unlock(); break; default: diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 9efd6690b344..3733c0254a50 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -90,7 +90,7 @@ void ax25_dev_device_up(struct net_device *dev) spin_lock_bh(&ax25_dev_lock); list_add(&ax25_dev->list, &ax25_dev_list); - dev->ax25_ptr = ax25_dev; + rcu_assign_pointer(dev->ax25_ptr, ax25_dev); spin_unlock_bh(&ax25_dev_lock); ax25_register_dev_sysctl(ax25_dev); @@ -125,7 +125,7 @@ void ax25_dev_device_down(struct net_device *dev) } } - dev->ax25_ptr = NULL; + RCU_INIT_POINTER(dev->ax25_ptr, NULL); spin_unlock_bh(&ax25_dev_lock); netdev_put(dev, &ax25_dev->dev_tracker); ax25_dev_put(ax25_dev); diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index 36249776c021..215d4ccf12b9 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -122,6 +122,7 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) if (dev == NULL) dev = skb->dev; + rcu_read_lock(); if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) { kfree_skb(skb); goto put; @@ -202,7 +203,7 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) ax25_queue_xmit(skb, dev); put: - + rcu_read_unlock(); ax25_route_lock_unuse(); return NETDEV_TX_OK; } diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c index 3db76d2470e9..8bca2ace98e5 100644 --- a/net/ax25/ax25_out.c +++ b/net/ax25/ax25_out.c @@ -39,10 +39,14 @@ ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, const ax25_address *sr * specified. */ if (paclen == 0) { - if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) + rcu_read_lock(); + ax25_dev = ax25_dev_ax25dev(dev); + if (!ax25_dev) { + rcu_read_unlock(); return NULL; - + } paclen = ax25_dev->values[AX25_VALUES_PACLEN]; + rcu_read_unlock(); } /* @@ -53,13 +57,19 @@ ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, const ax25_address *sr return ax25; /* It already existed */ } - if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) + rcu_read_lock(); + ax25_dev = ax25_dev_ax25dev(dev); + if (!ax25_dev) { + rcu_read_unlock(); return NULL; + } - if ((ax25 = ax25_create_cb()) == NULL) + if ((ax25 = ax25_create_cb()) == NULL) { + rcu_read_unlock(); return NULL; - + } ax25_fillin_cb(ax25, ax25_dev); + rcu_read_unlock(); ax25->source_addr = *src; ax25->dest_addr = *dest; @@ -358,7 +368,9 @@ void ax25_queue_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned char *ptr; + rcu_read_lock(); skb->protocol = ax25_type_trans(skb, ax25_fwd_dev(dev)); + rcu_read_unlock(); ptr = skb_push(skb, 1); *ptr = 0x00; /* KISS */ diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index b7c4d656a94b..69de75db0c9c 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -406,6 +406,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) ax25_route_lock_unuse(); return -EHOSTUNREACH; } + rcu_read_lock(); if ((ax25->ax25_dev = ax25_dev_ax25dev(ax25_rt->dev)) == NULL) { err = -EHOSTUNREACH; goto put; @@ -442,6 +443,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr) } put: + rcu_read_unlock(); ax25_route_lock_unuse(); return err; } From 4475d56145f368d065b05da3a5599d5620ca9408 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Jan 2025 10:11:48 +0000 Subject: [PATCH 0686/1386] net: hsr: remove one synchronize_rcu() from hsr_del_port() Use kfree_rcu() instead of synchronize_rcu()+kfree(). This might allow syzbot to fuzz HSR a bit faster... Signed-off-by: Eric Dumazet Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250103101148.3594545-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/hsr/hsr_main.h | 1 + net/hsr/hsr_slave.c | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index fcfeb79bb040..7d7551e6f0b0 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -163,6 +163,7 @@ struct hsr_port { struct net_device *dev; struct hsr_priv *hsr; enum hsr_port_type type; + struct rcu_head rcu; }; struct hsr_frame_info; diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 464f683e016d..006d6ef97e53 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -235,7 +235,5 @@ void hsr_del_port(struct hsr_port *port) netdev_upper_dev_unlink(port->dev, master->dev); } - synchronize_rcu(); - - kfree(port); + kfree_rcu(port, rcu); } From fbb9a9d263a68f60a16c8ba5a51d6198d67171cd Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 3 Jan 2025 11:16:31 +0000 Subject: [PATCH 0687/1386] net: phylink: add support for PCS supported_interfaces bitmap Add support for the PCS to specify which interfaces it supports, which can be used by MAC drivers to build the main supported_interfaces bitmap. Phylink also validates that the PCS returned by the MAC driver supports the interface that the MAC was asked for. An empty supported_interfaces bitmap from the PCS indicates that it does not provide this information, and we handle that appropriately. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tTffL-007RoD-1Y@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 11 +++++++++++ include/linux/phylink.h | 3 +++ 2 files changed, 14 insertions(+) diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 6d50c2fdb190..31754d5fd659 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -691,6 +691,17 @@ static int phylink_validate_mac_and_pcs(struct phylink *pl, return -EINVAL; } + /* Ensure that this PCS supports the interface which the MAC + * returned it for. It is an error for the MAC to return a PCS + * that does not support the interface mode. + */ + if (!phy_interface_empty(pcs->supported_interfaces) && + !test_bit(state->interface, pcs->supported_interfaces)) { + phylink_err(pl, "MAC returned PCS which does not support %s\n", + phy_modes(state->interface)); + return -EINVAL; + } + /* Validate the link parameters with the PCS */ if (pcs->ops->pcs_validate) { ret = pcs->ops->pcs_validate(pcs, supported, state); diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 5462cc6a37dc..4b7a20620b49 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -393,6 +393,8 @@ struct phylink_pcs_ops; /** * struct phylink_pcs - PHYLINK PCS instance + * @supported_interfaces: describing which PHY_INTERFACE_MODE_xxx + * are supported by this PCS. * @ops: a pointer to the &struct phylink_pcs_ops structure * @phylink: pointer to &struct phylink_config * @neg_mode: provide PCS neg mode via "mode" argument @@ -409,6 +411,7 @@ struct phylink_pcs_ops; * the PCS driver. */ struct phylink_pcs { + DECLARE_PHY_INTERFACE_MASK(supported_interfaces); const struct phylink_pcs_ops *ops; struct phylink *phylink; bool neg_mode; From 906909fabb81dedf93a786c2d7247cab12e0a232 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 3 Jan 2025 11:16:36 +0000 Subject: [PATCH 0688/1386] net: pcs: xpcs: fill in PCS supported_interfaces Fill in the new PCS supported_interfaces member with the interfaces that XPCS supports. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tTffQ-007RoJ-4u@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-xpcs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index f70ca39f0905..cf41dc5e74e8 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -1446,6 +1446,8 @@ static struct dw_xpcs *xpcs_create(struct mdio_device *mdiodev) if (ret) goto out_clear_clks; + xpcs_get_interfaces(xpcs, xpcs->pcs.supported_interfaces); + if (xpcs->info.pma == WX_TXGBE_XPCS_PMA_10G_ID) xpcs->pcs.poll = false; else From b87d4ee16bb4f6335032839a1173d8bb177939a9 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 3 Jan 2025 11:16:41 +0000 Subject: [PATCH 0689/1386] net: pcs: mtk-lynxi: fill in PCS supported_interfaces Fill in the new PCS supported_interfaces member with the interfaces that the Mediatek LynxI supports. Signed-off-by: Russell King (Oracle) Acked-by: Daniel Golle Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1tTffV-007RoP-8D@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-mtk-lynxi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/pcs/pcs-mtk-lynxi.c b/drivers/net/pcs/pcs-mtk-lynxi.c index ed91cd7a406a..4fe0fb6d12a4 100644 --- a/drivers/net/pcs/pcs-mtk-lynxi.c +++ b/drivers/net/pcs/pcs-mtk-lynxi.c @@ -307,6 +307,10 @@ struct phylink_pcs *mtk_pcs_lynxi_create(struct device *dev, mpcs->pcs.poll = true; mpcs->interface = PHY_INTERFACE_MODE_NA; + __set_bit(PHY_INTERFACE_MODE_SGMII, mpcs->pcs.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_1000BASEX, mpcs->pcs.supported_interfaces); + __set_bit(PHY_INTERFACE_MODE_2500BASEX, mpcs->pcs.supported_interfaces); + return &mpcs->pcs; } EXPORT_SYMBOL(mtk_pcs_lynxi_create); From b0f88c1b9a539dc91b83ac90345999273ee7dfd0 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 3 Jan 2025 11:16:46 +0000 Subject: [PATCH 0690/1386] net: pcs: lynx: fill in PCS supported_interfaces Fill in the new PCS supported_interfaces member with the interfaces that Lynx supports. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tTffa-007RoV-Bo@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-lynx.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/net/pcs/pcs-lynx.c b/drivers/net/pcs/pcs-lynx.c index 767a8c0714ac..6457190ec6e7 100644 --- a/drivers/net/pcs/pcs-lynx.c +++ b/drivers/net/pcs/pcs-lynx.c @@ -334,9 +334,19 @@ static const struct phylink_pcs_ops lynx_pcs_phylink_ops = { .pcs_link_up = lynx_pcs_link_up, }; +static const phy_interface_t lynx_interfaces[] = { + PHY_INTERFACE_MODE_SGMII, + PHY_INTERFACE_MODE_QSGMII, + PHY_INTERFACE_MODE_1000BASEX, + PHY_INTERFACE_MODE_2500BASEX, + PHY_INTERFACE_MODE_10GBASER, + PHY_INTERFACE_MODE_USXGMII, +}; + static struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio) { struct lynx_pcs *lynx; + int i; lynx = kzalloc(sizeof(*lynx), GFP_KERNEL); if (!lynx) @@ -348,6 +358,9 @@ static struct phylink_pcs *lynx_pcs_create(struct mdio_device *mdio) lynx->pcs.neg_mode = true; lynx->pcs.poll = true; + for (i = 0; i < ARRAY_SIZE(lynx_interfaces); i++) + __set_bit(lynx_interfaces[i], lynx->pcs.supported_interfaces); + return lynx_to_phylink_pcs(lynx); } From d13cefbb108e2e3362587b93ab5adc31c6a8589e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 3 Jan 2025 11:16:51 +0000 Subject: [PATCH 0691/1386] net: stmmac: use PCS supported_interfaces Use the PCS' supported_interfaces member to build the MAC level supported_interfaces bitmap. Reviewed-by: Maxime Chevallier Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tTfff-007Roc-Ff@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 99eaec8bac4a..2f518ec845ec 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1203,6 +1203,7 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) struct stmmac_mdio_bus_data *mdio_bus_data; int mode = priv->plat->phy_interface; struct fwnode_handle *fwnode; + struct phylink_pcs *pcs; struct phylink *phylink; priv->phylink_config.dev = &priv->dev->dev; @@ -1224,8 +1225,14 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) /* If we have an xpcs, it defines which PHY interfaces are supported. */ if (priv->hw->xpcs) - xpcs_get_interfaces(priv->hw->xpcs, - priv->phylink_config.supported_interfaces); + pcs = xpcs_to_phylink_pcs(priv->hw->xpcs); + else + pcs = priv->hw->phylink_pcs; + + if (pcs) + phy_interface_or(priv->phylink_config.supported_interfaces, + priv->phylink_config.supported_interfaces, + pcs->supported_interfaces); fwnode = priv->plat->port_node; if (!fwnode) From 2410719cdd49d9b062e87dddaf5ec990edafc6e3 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Fri, 3 Jan 2025 11:16:56 +0000 Subject: [PATCH 0692/1386] net: pcs: xpcs: make xpcs_get_interfaces() static xpcs_get_interfaces() should no longer be used outside of the XPCS code, so make it static. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Link: https://patch.msgid.link/E1tTffk-007Roi-JM@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/pcs/pcs-xpcs.c | 3 +-- include/linux/pcs/pcs-xpcs.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c index cf41dc5e74e8..c06b66f40022 100644 --- a/drivers/net/pcs/pcs-xpcs.c +++ b/drivers/net/pcs/pcs-xpcs.c @@ -594,14 +594,13 @@ static unsigned int xpcs_inband_caps(struct phylink_pcs *pcs, } } -void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces) +static void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces) { const struct dw_xpcs_compat *compat; for (compat = xpcs->desc->compat; compat->supported; compat++) __set_bit(compat->interface, interfaces); } -EXPORT_SYMBOL_GPL(xpcs_get_interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable) { diff --git a/include/linux/pcs/pcs-xpcs.h b/include/linux/pcs/pcs-xpcs.h index b5b5d17998b8..733f4ddd2ef1 100644 --- a/include/linux/pcs/pcs-xpcs.h +++ b/include/linux/pcs/pcs-xpcs.h @@ -50,7 +50,6 @@ struct dw_xpcs; struct phylink_pcs *xpcs_to_phylink_pcs(struct dw_xpcs *xpcs); int xpcs_get_an_mode(struct dw_xpcs *xpcs, phy_interface_t interface); -void xpcs_get_interfaces(struct dw_xpcs *xpcs, unsigned long *interfaces); int xpcs_config_eee(struct dw_xpcs *xpcs, int mult_fact_100ns, int enable); struct dw_xpcs *xpcs_create_mdiodev(struct mii_bus *bus, int addr); From 020ca0abae4c1f69e71507981844fe99ae154424 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 2 Jan 2025 20:14:00 +0200 Subject: [PATCH 0693/1386] net/mlx5: HWS, remove the use of duplicated structs Remove definition in HWS of structs that are already defined in mlx5_ifc.h, and fix the usage of these structs. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Itamar Gozlan Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250102181415.1477316-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/hws/cmd.c | 20 ++++----- .../mellanox/mlx5/core/steering/hws/prm.h | 42 ------------------- 2 files changed, 10 insertions(+), 52 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c index c00c138c3366..13689c0c1a44 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c @@ -622,12 +622,12 @@ int mlx5hws_cmd_arg_create(struct mlx5_core_dev *mdev, u32 pd, u32 *arg_id) { + u32 in[MLX5_ST_SZ_DW(create_modify_header_arg_in)] = {0}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; - u32 in[MLX5_ST_SZ_DW(create_arg_in)] = {0}; void *attr; int ret; - attr = MLX5_ADDR_OF(create_arg_in, in, hdr); + attr = MLX5_ADDR_OF(create_modify_header_arg_in, in, hdr); MLX5_SET(general_obj_in_cmd_hdr, attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, @@ -635,8 +635,8 @@ int mlx5hws_cmd_arg_create(struct mlx5_core_dev *mdev, MLX5_SET(general_obj_in_cmd_hdr, attr, op_param.create.log_obj_range, log_obj_range); - attr = MLX5_ADDR_OF(create_arg_in, in, arg); - MLX5_SET(arg, attr, access_pd, pd); + attr = MLX5_ADDR_OF(create_modify_header_arg_in, in, arg); + MLX5_SET(modify_header_arg, attr, access_pd, pd); ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (ret) { @@ -812,7 +812,7 @@ int mlx5hws_cmd_packet_reformat_create(struct mlx5_core_dev *mdev, struct mlx5hws_cmd_packet_reformat_create_attr *attr, u32 *reformat_id) { - u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_out)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {0}; size_t insz, cmd_data_sz, cmd_total_sz; void *prctx; void *pdata; @@ -845,7 +845,7 @@ int mlx5hws_cmd_packet_reformat_create(struct mlx5_core_dev *mdev, goto out; } - *reformat_id = MLX5_GET(alloc_packet_reformat_out, out, packet_reformat_id); + *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id); out: kfree(in); return ret; @@ -854,13 +854,13 @@ out: int mlx5hws_cmd_packet_reformat_destroy(struct mlx5_core_dev *mdev, u32 reformat_id) { - u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {0}; int ret; - MLX5_SET(dealloc_packet_reformat_in, in, opcode, + MLX5_SET(dealloc_packet_reformat_context_in, in, opcode, MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); - MLX5_SET(dealloc_packet_reformat_in, in, + MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id, reformat_id); ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/prm.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/prm.h index de92cecbeb92..271490a51b96 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/prm.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/prm.h @@ -390,11 +390,6 @@ struct mlx5_ifc_definer_bits { u8 match_mask[0x160]; }; -struct mlx5_ifc_arg_bits { - u8 rsvd0[0x88]; - u8 access_pd[0x18]; -}; - struct mlx5_ifc_header_modify_pattern_in_bits { u8 modify_field_select[0x40]; @@ -428,11 +423,6 @@ struct mlx5_ifc_create_definer_in_bits { struct mlx5_ifc_definer_bits definer; }; -struct mlx5_ifc_create_arg_in_bits { - struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; - struct mlx5_ifc_arg_bits arg; -}; - struct mlx5_ifc_create_header_modify_pattern_in_bits { struct mlx5_ifc_general_obj_in_cmd_hdr_bits hdr; struct mlx5_ifc_header_modify_pattern_in_bits pattern; @@ -479,36 +469,4 @@ enum { MLX5_IFC_MODIFY_FLOW_TABLE_MISS_ACTION_GOTO_TBL = 1, }; -struct mlx5_ifc_alloc_packet_reformat_out_bits { - u8 status[0x8]; - u8 reserved_at_8[0x18]; - - u8 syndrome[0x20]; - - u8 packet_reformat_id[0x20]; - - u8 reserved_at_60[0x20]; -}; - -struct mlx5_ifc_dealloc_packet_reformat_in_bits { - u8 opcode[0x10]; - u8 reserved_at_10[0x10]; - - u8 reserved_at_20[0x10]; - u8 op_mod[0x10]; - - u8 packet_reformat_id[0x20]; - - u8 reserved_at_60[0x20]; -}; - -struct mlx5_ifc_dealloc_packet_reformat_out_bits { - u8 status[0x8]; - u8 reserved_at_8[0x18]; - - u8 syndrome[0x20]; - - u8 reserved_at_40[0x40]; -}; - #endif /* MLX5_PRM_H_ */ From 0647f27a5facedf6842c67b9909a23f577bd3d08 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 2 Jan 2025 20:14:01 +0200 Subject: [PATCH 0694/1386] net/mlx5: HWS, remove implementation of unused FW commands Remove functions that manage alias objects - they are not used. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Itamar Gozlan Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250102181415.1477316-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/hws/cmd.c | 67 ------------------- .../mellanox/mlx5/core/steering/hws/cmd.h | 11 --- 2 files changed, 78 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c index 13689c0c1a44..6fd7747f08ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c @@ -889,73 +889,6 @@ int mlx5hws_cmd_sq_modify_rdy(struct mlx5_core_dev *mdev, u32 sqn) return ret; } -int mlx5hws_cmd_allow_other_vhca_access(struct mlx5_core_dev *mdev, - struct mlx5hws_cmd_allow_other_vhca_access_attr *attr) -{ - u32 out[MLX5_ST_SZ_DW(allow_other_vhca_access_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(allow_other_vhca_access_in)] = {0}; - void *key; - int ret; - - MLX5_SET(allow_other_vhca_access_in, - in, opcode, MLX5_CMD_OP_ALLOW_OTHER_VHCA_ACCESS); - MLX5_SET(allow_other_vhca_access_in, - in, object_type_to_be_accessed, attr->obj_type); - MLX5_SET(allow_other_vhca_access_in, - in, object_id_to_be_accessed, attr->obj_id); - - key = MLX5_ADDR_OF(allow_other_vhca_access_in, in, access_key); - memcpy(key, attr->access_key, sizeof(attr->access_key)); - - ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); - if (ret) - mlx5_core_err(mdev, "Failed to execute ALLOW_OTHER_VHCA_ACCESS command\n"); - - return ret; -} - -int mlx5hws_cmd_alias_obj_create(struct mlx5_core_dev *mdev, - struct mlx5hws_cmd_alias_obj_create_attr *alias_attr, - u32 *obj_id) -{ - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0}; - u32 in[MLX5_ST_SZ_DW(create_alias_obj_in)] = {0}; - void *attr; - void *key; - int ret; - - attr = MLX5_ADDR_OF(create_alias_obj_in, in, hdr); - MLX5_SET(general_obj_in_cmd_hdr, - attr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, - attr, obj_type, alias_attr->obj_type); - MLX5_SET(general_obj_in_cmd_hdr, attr, op_param.create.alias_object, 1); - - attr = MLX5_ADDR_OF(create_alias_obj_in, in, alias_ctx); - MLX5_SET(alias_context, attr, vhca_id_to_be_accessed, alias_attr->vhca_id); - MLX5_SET(alias_context, attr, object_id_to_be_accessed, alias_attr->obj_id); - - key = MLX5_ADDR_OF(alias_context, attr, access_key); - memcpy(key, alias_attr->access_key, sizeof(alias_attr->access_key)); - - ret = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); - if (ret) { - mlx5_core_err(mdev, "Failed to create ALIAS OBJ\n"); - goto out; - } - - *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); -out: - return ret; -} - -int mlx5hws_cmd_alias_obj_destroy(struct mlx5_core_dev *mdev, - u16 obj_type, - u32 obj_id) -{ - return hws_cmd_general_obj_destroy(mdev, obj_type, obj_id); -} - int mlx5hws_cmd_generate_wqe(struct mlx5_core_dev *mdev, struct mlx5hws_cmd_generate_wqe_attr *attr, struct mlx5_cqe64 *ret_cqe) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h index 434f62b0904e..038f58890785 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h @@ -334,14 +334,6 @@ mlx5hws_cmd_forward_tbl_create(struct mlx5_core_dev *mdev, void mlx5hws_cmd_forward_tbl_destroy(struct mlx5_core_dev *mdev, struct mlx5hws_cmd_forward_tbl *tbl); -int mlx5hws_cmd_alias_obj_create(struct mlx5_core_dev *mdev, - struct mlx5hws_cmd_alias_obj_create_attr *alias_attr, - u32 *obj_id); - -int mlx5hws_cmd_alias_obj_destroy(struct mlx5_core_dev *mdev, - u16 obj_type, - u32 obj_id); - int mlx5hws_cmd_sq_modify_rdy(struct mlx5_core_dev *mdev, u32 sqn); int mlx5hws_cmd_query_caps(struct mlx5_core_dev *mdev, @@ -352,9 +344,6 @@ void mlx5hws_cmd_set_attr_connect_miss_tbl(struct mlx5hws_context *ctx, enum mlx5hws_table_type type, struct mlx5hws_cmd_ft_modify_attr *ft_attr); -int mlx5hws_cmd_allow_other_vhca_access(struct mlx5_core_dev *mdev, - struct mlx5hws_cmd_allow_other_vhca_access_attr *attr); - int mlx5hws_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_function, u16 vport_number, u16 *gvmi); From 0a1ef807a403b2f386a571133eb35e25c6511808 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 2 Jan 2025 20:14:02 +0200 Subject: [PATCH 0695/1386] net/mlx5: HWS, denote how refcounts are protected Some HWS structs have refcounts that are just u32. Comment how they are protected and add '__must_hold()' annotation where applicable. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Erez Shitrit Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250102181415.1477316-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h | 2 +- .../net/ethernet/mellanox/mlx5/core/steering/hws/definer.h | 2 +- .../net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c | 3 ++- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h index e8f562c31826..4669c9fbcfb2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h @@ -70,12 +70,12 @@ struct mlx5hws_action_default_stc { struct mlx5hws_pool_chunk nop_dw6; struct mlx5hws_pool_chunk nop_dw7; struct mlx5hws_pool_chunk default_hit; - u32 refcount; + u32 refcount; /* protected by context ctrl lock */ }; struct mlx5hws_action_shared_stc { struct mlx5hws_pool_chunk stc_chunk; - u32 refcount; + u32 refcount; /* protected by context ctrl lock */ }; struct mlx5hws_actions_apply_data { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h index 038f58890785..610c63d81ad9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.h @@ -63,7 +63,7 @@ struct mlx5hws_cmd_forward_tbl { u8 type; u32 ft_id; u32 fg_id; - u32 refcount; + u32 refcount; /* protected by context ctrl lock */ }; struct mlx5hws_cmd_rtc_create_attr { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h index 9432d5084def..5c1a2086efba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h @@ -785,7 +785,7 @@ struct mlx5hws_definer_cache { struct mlx5hws_definer_cache_item { struct mlx5hws_definer definer; - u32 refcount; + u32 refcount; /* protected by context ctrl lock */ struct list_head list_node; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h index 27ca93385b08..8ddb51980044 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.h @@ -31,7 +31,7 @@ struct mlx5hws_pattern_cache_item { u8 *data; u16 num_of_actions; } mh_data; - u32 refcount; + u32 refcount; /* protected by pattern_cache lock */ struct list_head ptrn_list_node; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c index 9576e02d00c3..5b183739d5fd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c @@ -37,6 +37,7 @@ static void hws_table_set_cap_attr(struct mlx5hws_table *tbl, } static int hws_table_up_default_fdb_miss_tbl(struct mlx5hws_table *tbl) +__must_hold(&tbl->ctx->ctrl_lock) { struct mlx5hws_cmd_ft_create_attr ft_attr = {0}; struct mlx5hws_cmd_set_fte_attr fte_attr = {0}; @@ -70,7 +71,6 @@ static int hws_table_up_default_fdb_miss_tbl(struct mlx5hws_table *tbl) return -EINVAL; } - /* ctx->ctrl_lock must be held here */ ctx->common_res[tbl_type].default_miss = default_miss; ctx->common_res[tbl_type].default_miss->refcount++; @@ -79,6 +79,7 @@ static int hws_table_up_default_fdb_miss_tbl(struct mlx5hws_table *tbl) /* Called under ctx->ctrl_lock */ static void hws_table_down_default_fdb_miss_tbl(struct mlx5hws_table *tbl) +__must_hold(&tbl->ctx->ctrl_lock) { struct mlx5hws_cmd_forward_tbl *default_miss; struct mlx5hws_context *ctx = tbl->ctx; From c86963aae5b83a865a552408b40e743c3610bd9f Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 2 Jan 2025 20:14:03 +0200 Subject: [PATCH 0696/1386] net/mlx5: HWS, simplify allocations as we support only FDB In pools, STCs and actions: no need to allocate array for various table types, as HWS is used to manage only FDB flow tables. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Erez Shitrit Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250102181415.1477316-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/hws/action.c | 107 +++++++++--------- .../mellanox/mlx5/core/steering/hws/action.h | 2 +- .../mellanox/mlx5/core/steering/hws/cmd.c | 2 +- .../mellanox/mlx5/core/steering/hws/context.c | 29 ++--- .../mellanox/mlx5/core/steering/hws/context.h | 4 +- .../mellanox/mlx5/core/steering/hws/debug.c | 36 +++--- .../mellanox/mlx5/core/steering/hws/matcher.c | 4 +- .../mellanox/mlx5/core/steering/hws/rule.c | 2 +- .../mellanox/mlx5/core/steering/hws/table.c | 13 +-- 9 files changed, 87 insertions(+), 112 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c index a897cdc60fdb..67d4f40cbd83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c @@ -11,31 +11,29 @@ /* This is the longest supported action sequence for FDB table: * DECAP, POP_VLAN, MODIFY, CTR, ASO, PUSH_VLAN, MODIFY, ENCAP, Term. */ -static const u32 action_order_arr[MLX5HWS_TABLE_TYPE_MAX][MLX5HWS_ACTION_TYP_MAX] = { - [MLX5HWS_TABLE_TYPE_FDB] = { - BIT(MLX5HWS_ACTION_TYP_REMOVE_HEADER) | - BIT(MLX5HWS_ACTION_TYP_REFORMAT_TNL_L2_TO_L2) | - BIT(MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2), - BIT(MLX5HWS_ACTION_TYP_POP_VLAN), - BIT(MLX5HWS_ACTION_TYP_POP_VLAN), - BIT(MLX5HWS_ACTION_TYP_MODIFY_HDR), - BIT(MLX5HWS_ACTION_TYP_PUSH_VLAN), - BIT(MLX5HWS_ACTION_TYP_PUSH_VLAN), - BIT(MLX5HWS_ACTION_TYP_INSERT_HEADER) | - BIT(MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2) | - BIT(MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3), - BIT(MLX5HWS_ACTION_TYP_CTR), - BIT(MLX5HWS_ACTION_TYP_TAG), - BIT(MLX5HWS_ACTION_TYP_ASO_METER), - BIT(MLX5HWS_ACTION_TYP_MODIFY_HDR), - BIT(MLX5HWS_ACTION_TYP_TBL) | - BIT(MLX5HWS_ACTION_TYP_VPORT) | - BIT(MLX5HWS_ACTION_TYP_DROP) | - BIT(MLX5HWS_ACTION_TYP_SAMPLER) | - BIT(MLX5HWS_ACTION_TYP_RANGE) | - BIT(MLX5HWS_ACTION_TYP_DEST_ARRAY), - BIT(MLX5HWS_ACTION_TYP_LAST), - }, +static const u32 action_order_arr[MLX5HWS_ACTION_TYP_MAX] = { + BIT(MLX5HWS_ACTION_TYP_REMOVE_HEADER) | + BIT(MLX5HWS_ACTION_TYP_REFORMAT_TNL_L2_TO_L2) | + BIT(MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2), + BIT(MLX5HWS_ACTION_TYP_POP_VLAN), + BIT(MLX5HWS_ACTION_TYP_POP_VLAN), + BIT(MLX5HWS_ACTION_TYP_MODIFY_HDR), + BIT(MLX5HWS_ACTION_TYP_PUSH_VLAN), + BIT(MLX5HWS_ACTION_TYP_PUSH_VLAN), + BIT(MLX5HWS_ACTION_TYP_INSERT_HEADER) | + BIT(MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L2) | + BIT(MLX5HWS_ACTION_TYP_REFORMAT_L2_TO_TNL_L3), + BIT(MLX5HWS_ACTION_TYP_CTR), + BIT(MLX5HWS_ACTION_TYP_TAG), + BIT(MLX5HWS_ACTION_TYP_ASO_METER), + BIT(MLX5HWS_ACTION_TYP_MODIFY_HDR), + BIT(MLX5HWS_ACTION_TYP_TBL) | + BIT(MLX5HWS_ACTION_TYP_VPORT) | + BIT(MLX5HWS_ACTION_TYP_DROP) | + BIT(MLX5HWS_ACTION_TYP_SAMPLER) | + BIT(MLX5HWS_ACTION_TYP_RANGE) | + BIT(MLX5HWS_ACTION_TYP_DEST_ARRAY), + BIT(MLX5HWS_ACTION_TYP_LAST), }; static const char * const mlx5hws_action_type_str[] = { @@ -83,8 +81,8 @@ static int hws_action_get_shared_stc_nic(struct mlx5hws_context *ctx, int ret; mutex_lock(&ctx->ctrl_lock); - if (ctx->common_res[tbl_type].shared_stc[stc_type]) { - ctx->common_res[tbl_type].shared_stc[stc_type]->refcount++; + if (ctx->common_res.shared_stc[stc_type]) { + ctx->common_res.shared_stc[stc_type]->refcount++; mutex_unlock(&ctx->ctrl_lock); return 0; } @@ -124,8 +122,8 @@ static int hws_action_get_shared_stc_nic(struct mlx5hws_context *ctx, goto free_shared_stc; } - ctx->common_res[tbl_type].shared_stc[stc_type] = shared_stc; - ctx->common_res[tbl_type].shared_stc[stc_type]->refcount = 1; + ctx->common_res.shared_stc[stc_type] = shared_stc; + ctx->common_res.shared_stc[stc_type]->refcount = 1; mutex_unlock(&ctx->ctrl_lock); @@ -178,16 +176,16 @@ static void hws_action_put_shared_stc(struct mlx5hws_action *action, } mutex_lock(&ctx->ctrl_lock); - if (--ctx->common_res[tbl_type].shared_stc[stc_type]->refcount) { + if (--ctx->common_res.shared_stc[stc_type]->refcount) { mutex_unlock(&ctx->ctrl_lock); return; } - shared_stc = ctx->common_res[tbl_type].shared_stc[stc_type]; + shared_stc = ctx->common_res.shared_stc[stc_type]; mlx5hws_action_free_single_stc(ctx, tbl_type, &shared_stc->stc_chunk); kfree(shared_stc); - ctx->common_res[tbl_type].shared_stc[stc_type] = NULL; + ctx->common_res.shared_stc[stc_type] = NULL; mutex_unlock(&ctx->ctrl_lock); } @@ -206,10 +204,10 @@ bool mlx5hws_action_check_combo(struct mlx5hws_context *ctx, enum mlx5hws_action_type *user_actions, enum mlx5hws_table_type table_type) { - const u32 *order_arr = action_order_arr[table_type]; + const u32 *order_arr = action_order_arr; + bool valid_combo; u8 order_idx = 0; u8 user_idx = 0; - bool valid_combo; if (table_type >= MLX5HWS_TABLE_TYPE_MAX) { mlx5hws_err(ctx, "Invalid table_type %d", table_type); @@ -321,8 +319,8 @@ int mlx5hws_action_alloc_single_stc(struct mlx5hws_context *ctx, __must_hold(&ctx->ctrl_lock) { struct mlx5hws_cmd_stc_modify_attr cleanup_stc_attr = {0}; - struct mlx5hws_pool *stc_pool = ctx->stc_pool[table_type]; struct mlx5hws_cmd_stc_modify_attr fixup_stc_attr = {0}; + struct mlx5hws_pool *stc_pool = ctx->stc_pool; bool use_fixup; u32 obj_0_id; int ret; @@ -387,8 +385,8 @@ void mlx5hws_action_free_single_stc(struct mlx5hws_context *ctx, struct mlx5hws_pool_chunk *stc) __must_hold(&ctx->ctrl_lock) { - struct mlx5hws_pool *stc_pool = ctx->stc_pool[table_type]; struct mlx5hws_cmd_stc_modify_attr stc_attr = {0}; + struct mlx5hws_pool *stc_pool = ctx->stc_pool; u32 obj_id; /* Modify the STC not to point to an object */ @@ -561,7 +559,7 @@ hws_action_create_stcs(struct mlx5hws_action *action, u32 obj_id) if (action->flags & MLX5HWS_ACTION_FLAG_HWS_FDB) { ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, MLX5HWS_TABLE_TYPE_FDB, - &action->stc[MLX5HWS_TABLE_TYPE_FDB]); + &action->stc); if (ret) goto out_err; } @@ -585,7 +583,7 @@ hws_action_destroy_stcs(struct mlx5hws_action *action) if (action->flags & MLX5HWS_ACTION_FLAG_HWS_FDB) mlx5hws_action_free_single_stc(ctx, MLX5HWS_TABLE_TYPE_FDB, - &action->stc[MLX5HWS_TABLE_TYPE_FDB]); + &action->stc); mutex_unlock(&ctx->ctrl_lock); } @@ -1639,8 +1637,8 @@ hws_action_create_dest_match_range_table(struct mlx5hws_context *ctx, rtc_attr.table_type = mlx5hws_table_get_res_fw_ft_type(MLX5HWS_TABLE_TYPE_FDB, false); /* STC is a single resource (obj_id), use any STC for the ID */ - stc_pool = ctx->stc_pool[MLX5HWS_TABLE_TYPE_FDB]; - default_stc = ctx->common_res[MLX5HWS_TABLE_TYPE_FDB].default_stc; + stc_pool = ctx->stc_pool; + default_stc = ctx->common_res.default_stc; obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit); rtc_attr.stc_base = obj_id; @@ -1731,7 +1729,7 @@ hws_action_create_dest_match_range_fill_table(struct mlx5hws_context *ctx, ste_attr.used_id_rtc_0 = &used_rtc_0_id; ste_attr.used_id_rtc_1 = &used_rtc_1_id; - common_res = &ctx->common_res[MLX5HWS_TABLE_TYPE_FDB]; + common_res = &ctx->common_res; /* init an empty match STE which will always hit */ ste_attr.wqe_ctrl = &wqe_ctrl; @@ -1750,7 +1748,7 @@ hws_action_create_dest_match_range_fill_table(struct mlx5hws_context *ctx, wqe_ctrl.stc_ix[MLX5HWS_ACTION_STC_IDX_CTRL] |= htonl(MLX5HWS_ACTION_STC_IDX_LAST_COMBO2 << 29); wqe_ctrl.stc_ix[MLX5HWS_ACTION_STC_IDX_HIT] = - htonl(hit_ft_action->stc[MLX5HWS_TABLE_TYPE_FDB].offset); + htonl(hit_ft_action->stc.offset); wqe_data_arr = (__force __be32 *)&range_wqe_data; @@ -1843,7 +1841,7 @@ mlx5hws_action_create_dest_match_range(struct mlx5hws_context *ctx, stc_attr.ste_table.match_definer_id = ctx->caps->trivial_match_definer; ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, MLX5HWS_TABLE_TYPE_FDB, - &action->stc[MLX5HWS_TABLE_TYPE_FDB]); + &action->stc); if (ret) goto error_unlock; @@ -1970,8 +1968,8 @@ __must_hold(&ctx->ctrl_lock) struct mlx5hws_action_default_stc *default_stc; int ret; - if (ctx->common_res[tbl_type].default_stc) { - ctx->common_res[tbl_type].default_stc->refcount++; + if (ctx->common_res.default_stc) { + ctx->common_res.default_stc->refcount++; return 0; } @@ -2023,8 +2021,8 @@ __must_hold(&ctx->ctrl_lock) goto free_nop_dw7; } - ctx->common_res[tbl_type].default_stc = default_stc; - ctx->common_res[tbl_type].default_stc->refcount++; + ctx->common_res.default_stc = default_stc; + ctx->common_res.default_stc->refcount++; return 0; @@ -2046,9 +2044,7 @@ __must_hold(&ctx->ctrl_lock) { struct mlx5hws_action_default_stc *default_stc; - default_stc = ctx->common_res[tbl_type].default_stc; - - default_stc = ctx->common_res[tbl_type].default_stc; + default_stc = ctx->common_res.default_stc; if (--default_stc->refcount) return; @@ -2058,7 +2054,7 @@ __must_hold(&ctx->ctrl_lock) mlx5hws_action_free_single_stc(ctx, tbl_type, &default_stc->nop_dw5); mlx5hws_action_free_single_stc(ctx, tbl_type, &default_stc->nop_ctr); kfree(default_stc); - ctx->common_res[tbl_type].default_stc = NULL; + ctx->common_res.default_stc = NULL; } static void hws_action_modify_write(struct mlx5hws_send_engine *queue, @@ -2150,8 +2146,7 @@ hws_action_apply_stc(struct mlx5hws_actions_apply_data *apply, { struct mlx5hws_action *action = apply->rule_action[action_idx].action; - apply->wqe_ctrl->stc_ix[stc_idx] = - htonl(action->stc[apply->tbl_type].offset); + apply->wqe_ctrl->stc_ix[stc_idx] = htonl(action->stc.offset); } static void @@ -2181,7 +2176,7 @@ hws_action_setter_modify_header(struct mlx5hws_actions_apply_data *apply, rule_action = &apply->rule_action[setter->idx_double]; action = rule_action->action; - stc_idx = htonl(action->stc[apply->tbl_type].offset); + stc_idx = htonl(action->stc.offset); apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW6] = stc_idx; apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] = 0; @@ -2240,7 +2235,7 @@ hws_action_setter_insert_ptr(struct mlx5hws_actions_apply_data *apply, apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW6] = 0; apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(arg_idx); - stc_idx = htonl(action->stc[apply->tbl_type].offset); + stc_idx = htonl(action->stc.offset); apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW6] = stc_idx; apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] = 0; @@ -2272,7 +2267,7 @@ hws_action_setter_tnl_l3_to_l2(struct mlx5hws_actions_apply_data *apply, apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW6] = 0; apply->wqe_data[MLX5HWS_ACTION_OFFSET_DW7] = htonl(arg_idx); - stc_idx = htonl(action->stc[apply->tbl_type].offset); + stc_idx = htonl(action->stc.offset); apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW6] = stc_idx; apply->wqe_ctrl->stc_ix[MLX5HWS_ACTION_STC_IDX_DW7] = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h index 4669c9fbcfb2..6d1592c49e0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h @@ -124,7 +124,7 @@ struct mlx5hws_action { struct mlx5hws_context *ctx; union { struct { - struct mlx5hws_pool_chunk stc[MLX5HWS_TABLE_TYPE_MAX]; + struct mlx5hws_pool_chunk stc; union { struct { u32 pat_id; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c index 6fd7747f08ec..9b71ff80831d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c @@ -359,7 +359,7 @@ void mlx5hws_cmd_set_attr_connect_miss_tbl(struct mlx5hws_context *ctx, ft_attr->type = fw_ft_type; ft_attr->table_miss_action = MLX5_IFC_MODIFY_FLOW_TABLE_MISS_ACTION_GOTO_TBL; - default_miss_tbl = ctx->common_res[type].default_miss->ft_id; + default_miss_tbl = ctx->common_res.default_miss->ft_id; if (!default_miss_tbl) { pr_warn("HWS: no flow table ID for default miss\n"); return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c index 4a8928f33bb9..9cda2774fd64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.c @@ -23,7 +23,6 @@ static int hws_context_pools_init(struct mlx5hws_context *ctx) struct mlx5hws_pool_attr pool_attr = {0}; u8 max_log_sz; int ret; - int i; ret = mlx5hws_pat_init_pattern_cache(&ctx->pattern_cache); if (ret) @@ -39,23 +38,17 @@ static int hws_context_pools_init(struct mlx5hws_context *ctx) max_log_sz = min(MLX5HWS_POOL_STC_LOG_SZ, ctx->caps->stc_alloc_log_max); pool_attr.alloc_log_sz = max(max_log_sz, ctx->caps->stc_alloc_log_gran); - for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++) { - pool_attr.table_type = i; - ctx->stc_pool[i] = mlx5hws_pool_create(ctx, &pool_attr); - if (!ctx->stc_pool[i]) { - mlx5hws_err(ctx, "Failed to allocate STC pool [%d]", i); - ret = -ENOMEM; - goto free_stc_pools; - } + pool_attr.table_type = MLX5HWS_TABLE_TYPE_FDB; + ctx->stc_pool = mlx5hws_pool_create(ctx, &pool_attr); + if (!ctx->stc_pool) { + mlx5hws_err(ctx, "Failed to allocate STC pool\n"); + ret = -ENOMEM; + goto uninit_cache; } return 0; -free_stc_pools: - for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++) - if (ctx->stc_pool[i]) - mlx5hws_pool_destroy(ctx->stc_pool[i]); - +uninit_cache: mlx5hws_definer_uninit_cache(ctx->definer_cache); uninit_pat_cache: mlx5hws_pat_uninit_pattern_cache(ctx->pattern_cache); @@ -64,12 +57,8 @@ uninit_pat_cache: static void hws_context_pools_uninit(struct mlx5hws_context *ctx) { - int i; - - for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++) { - if (ctx->stc_pool[i]) - mlx5hws_pool_destroy(ctx->stc_pool[i]); - } + if (ctx->stc_pool) + mlx5hws_pool_destroy(ctx->stc_pool); mlx5hws_definer_uninit_cache(ctx->definer_cache); mlx5hws_pat_uninit_pattern_cache(ctx->pattern_cache); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h index 1c9cc4fba083..38c3647444ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/context.h @@ -38,8 +38,8 @@ struct mlx5hws_context { struct mlx5_core_dev *mdev; struct mlx5hws_cmd_query_caps *caps; u32 pd_num; - struct mlx5hws_pool *stc_pool[MLX5HWS_TABLE_TYPE_MAX]; - struct mlx5hws_context_common_res common_res[MLX5HWS_TABLE_TYPE_MAX]; + struct mlx5hws_pool *stc_pool; + struct mlx5hws_context_common_res common_res; struct mlx5hws_pattern_cache *pattern_cache; struct mlx5hws_definer_cache *definer_cache; struct mutex ctrl_lock; /* control lock to protect the whole context */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c index 5b200b4bc1a8..60ada3143d60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/debug.c @@ -368,9 +368,10 @@ static int hws_debug_dump_context_info(struct seq_file *f, struct mlx5hws_contex static int hws_debug_dump_context_stc_resource(struct seq_file *f, struct mlx5hws_context *ctx, - u32 tbl_type, struct mlx5hws_pool_resource *resource) { + u32 tbl_type = MLX5HWS_TABLE_TYPE_BASE + MLX5HWS_TABLE_TYPE_FDB; + seq_printf(f, "%d,0x%llx,%u,%u\n", MLX5HWS_DEBUG_RES_TYPE_CONTEXT_STC, HWS_PTR_TO_ID(ctx), @@ -382,31 +383,22 @@ static int hws_debug_dump_context_stc_resource(struct seq_file *f, static int hws_debug_dump_context_stc(struct seq_file *f, struct mlx5hws_context *ctx) { - struct mlx5hws_pool *stc_pool; - u32 table_type; + struct mlx5hws_pool *stc_pool = ctx->stc_pool; int ret; - int i; - for (i = 0; i < MLX5HWS_TABLE_TYPE_MAX; i++) { - stc_pool = ctx->stc_pool[i]; - table_type = MLX5HWS_TABLE_TYPE_BASE + i; + if (!stc_pool) + return 0; - if (!stc_pool) - continue; + if (stc_pool->resource[0]) { + ret = hws_debug_dump_context_stc_resource(f, ctx, stc_pool->resource[0]); + if (ret) + return ret; + } - if (stc_pool->resource[0]) { - ret = hws_debug_dump_context_stc_resource(f, ctx, table_type, - stc_pool->resource[0]); - if (ret) - return ret; - } - - if (i == MLX5HWS_TABLE_TYPE_FDB && stc_pool->mirror_resource[0]) { - ret = hws_debug_dump_context_stc_resource(f, ctx, table_type, - stc_pool->mirror_resource[0]); - if (ret) - return ret; - } + if (stc_pool->mirror_resource[0]) { + ret = hws_debug_dump_context_stc_resource(f, ctx, stc_pool->mirror_resource[0]); + if (ret) + return ret; } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c index 1bb3a6f8c3cd..e40193f30c54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c @@ -318,8 +318,8 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher, hws_matcher_set_rtc_attr_sz(matcher, &rtc_attr, rtc_type, false); /* STC is a single resource (obj_id), use any STC for the ID */ - stc_pool = ctx->stc_pool[tbl->type]; - default_stc = ctx->common_res[tbl->type].default_stc; + stc_pool = ctx->stc_pool; + default_stc = ctx->common_res.default_stc; obj_id = mlx5hws_pool_chunk_get_base_id(stc_pool, &default_stc->default_hit); rtc_attr.stc_base = obj_id; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c index e20c67a04203..14f6307a1772 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/rule.c @@ -315,7 +315,7 @@ static void hws_rule_create_init(struct mlx5hws_rule *rule, /* Init default action apply */ apply->tbl_type = tbl->type; - apply->common_res = &ctx->common_res[tbl->type]; + apply->common_res = &ctx->common_res; apply->jump_to_action_stc = matcher->action_ste[0].stc.offset; apply->require_dep = 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c index 5b183739d5fd..967d67ec10e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c @@ -49,8 +49,8 @@ __must_hold(&tbl->ctx->ctrl_lock) if (tbl->type != MLX5HWS_TABLE_TYPE_FDB) return 0; - if (ctx->common_res[tbl_type].default_miss) { - ctx->common_res[tbl_type].default_miss->refcount++; + if (ctx->common_res.default_miss) { + ctx->common_res.default_miss->refcount++; return 0; } @@ -71,8 +71,8 @@ __must_hold(&tbl->ctx->ctrl_lock) return -EINVAL; } - ctx->common_res[tbl_type].default_miss = default_miss; - ctx->common_res[tbl_type].default_miss->refcount++; + ctx->common_res.default_miss = default_miss; + ctx->common_res.default_miss->refcount++; return 0; } @@ -83,17 +83,16 @@ __must_hold(&tbl->ctx->ctrl_lock) { struct mlx5hws_cmd_forward_tbl *default_miss; struct mlx5hws_context *ctx = tbl->ctx; - u8 tbl_type = tbl->type; if (tbl->type != MLX5HWS_TABLE_TYPE_FDB) return; - default_miss = ctx->common_res[tbl_type].default_miss; + default_miss = ctx->common_res.default_miss; if (--default_miss->refcount) return; mlx5hws_cmd_forward_tbl_destroy(ctx->mdev, default_miss); - ctx->common_res[tbl_type].default_miss = NULL; + ctx->common_res.default_miss = NULL; } static int hws_table_connect_to_default_miss_tbl(struct mlx5hws_table *tbl, u32 ft_id) From cc611ab6c712eaa1ed3fd4321d91e66cfe3245a3 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 2 Jan 2025 20:14:04 +0200 Subject: [PATCH 0697/1386] net/mlx5: HWS, add error message on failure to move rules Add error message for failure to move rules from old matcher to new one during rehash. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Itamar Gozlan Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250102181415.1477316-6-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../ethernet/mellanox/mlx5/core/steering/hws/bwc.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index baacf662c0ab..af8ab8750c70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -615,8 +615,12 @@ static int hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_match ret = hws_bwc_queue_poll(ctx, rule_attr.queue_id, &pending_rules[i], false); - if (unlikely(ret)) + if (unlikely(ret)) { + mlx5hws_err(ctx, + "Moving BWC rule failed during rehash (%d)\n", + ret); goto free_bwc_rules; + } } } } while (!all_done); @@ -629,8 +633,11 @@ static int hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_match mlx5hws_send_engine_flush_queue(&ctx->send_queue[queue_id]); ret = hws_bwc_queue_poll(ctx, queue_id, &pending_rules[i], true); - if (unlikely(ret)) + if (unlikely(ret)) { + mlx5hws_err(ctx, + "Moving BWC rule failed during rehash (%d)\n", ret); goto free_bwc_rules; + } } } From 1ce840c7a659aa53a31ef49f0271b4fd0dc10296 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 2 Jan 2025 20:14:05 +0200 Subject: [PATCH 0698/1386] net/mlx5: HWS, change error flow on matcher disconnect Currently, when firmware failure occurs during matcher disconnect flow, the error flow of the function reconnects the matcher back and returns an error, which continues running the calling function and eventually frees the matcher that is being disconnected. This leads to a case where we have a freed matcher on the matchers list, which in turn leads to use-after-free and eventual crash. This patch fixes that by not trying to reconnect the matcher back when some FW command fails during disconnect. Note that we're dealing here with FW error. We can't overcome this problem. This might lead to bad steering state (e.g. wrong connection between matchers), and will also lead to resource leakage, as it is the case with any other error handling during resource destruction. However, the goal here is to allow the driver to continue and not crash the machine with use-after-free error. Signed-off-by: Yevgeny Kliteynik Signed-off-by: Itamar Gozlan Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250102181415.1477316-7-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/hws/matcher.c | 24 +++++++------------ 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c index e40193f30c54..fea2a945b0db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c @@ -165,14 +165,14 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher) next->match_ste.rtc_0_id, next->match_ste.rtc_1_id); if (ret) { - mlx5hws_err(tbl->ctx, "Failed to disconnect matcher\n"); - goto matcher_reconnect; + mlx5hws_err(tbl->ctx, "Fatal error, failed to disconnect matcher\n"); + return ret; } } else { ret = mlx5hws_table_connect_to_miss_table(tbl, tbl->default_miss.miss_tbl); if (ret) { - mlx5hws_err(tbl->ctx, "Failed to disconnect last matcher\n"); - goto matcher_reconnect; + mlx5hws_err(tbl->ctx, "Fatal error, failed to disconnect last matcher\n"); + return ret; } } @@ -180,27 +180,19 @@ static int hws_matcher_disconnect(struct mlx5hws_matcher *matcher) if (prev_ft_id == tbl->ft_id) { ret = mlx5hws_table_update_connected_miss_tables(tbl); if (ret) { - mlx5hws_err(tbl->ctx, "Fatal error, failed to update connected miss table\n"); - goto matcher_reconnect; + mlx5hws_err(tbl->ctx, + "Fatal error, failed to update connected miss table\n"); + return ret; } } ret = mlx5hws_table_ft_set_default_next_ft(tbl, prev_ft_id); if (ret) { mlx5hws_err(tbl->ctx, "Fatal error, failed to restore matcher ft default miss\n"); - goto matcher_reconnect; + return ret; } return 0; - -matcher_reconnect: - if (list_empty(&tbl->matchers_list) || !prev) - list_add(&matcher->list_node, &tbl->matchers_list); - else - /* insert after prev matcher */ - list_add(&matcher->list_node, &prev->list_node); - - return ret; } static void hws_matcher_set_rtc_attr_sz(struct mlx5hws_matcher *matcher, From ad4da6cc36ace35d80a292bfeaac49e63e9e26eb Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 2 Jan 2025 20:14:06 +0200 Subject: [PATCH 0699/1386] net/mlx5: HWS, remove wrong deletion of the miss table list Remove wrong cleanup of the old miss table list and simplify the error flow in the function. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Mark Bloch Reviewed-by: Itamar Gozlan Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250102181415.1477316-8-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/steering/hws/table.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c index 967d67ec10e3..ab1297531232 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/table.c @@ -478,15 +478,9 @@ int mlx5hws_table_set_default_miss(struct mlx5hws_table *tbl, if (old_miss_tbl) list_del_init(&tbl->default_miss.next); - old_miss_tbl = tbl->default_miss.miss_tbl; - if (old_miss_tbl) - list_del_init(&old_miss_tbl->default_miss.head); - if (miss_tbl) list_add(&tbl->default_miss.next, &miss_tbl->default_miss.head); - mutex_unlock(&ctx->ctrl_lock); - return 0; out: mutex_unlock(&ctx->ctrl_lock); return ret; From 05e3c287b98795cf01d829d29841179cef3fb9ce Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 2 Jan 2025 20:14:07 +0200 Subject: [PATCH 0700/1386] net/mlx5: HWS, reduce memory consumption of a matcher struct Instead of having a large array of action templates allocated with kmalloc, have smaller array and allocate it with kvmalloc. The size of the array represents the max number of AT attach operations for the same matcher. This number is not expected to be very high. In any case, when the limit is reached, the next attempt to attach new AT will result in creation of a new matcher and moving all the rules to this matcher. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Erez Shitrit Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250102181415.1477316-9-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h | 8 +++++++- .../ethernet/mellanox/mlx5/core/steering/hws/matcher.c | 8 ++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h index 3d4965213b01..1d27638fa171 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h @@ -8,7 +8,13 @@ #define MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP 1 #define MLX5HWS_BWC_MATCHER_REHASH_PERCENT_TH 70 #define MLX5HWS_BWC_MATCHER_REHASH_BURST_TH 32 -#define MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM 255 + +/* Max number of AT attach operations for the same matcher. + * When the limit is reached, next attempt to attach new AT + * will result in creation of a new matcher and moving all + * the rules to this matcher. + */ +#define MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM 8 #define MLX5HWS_BWC_MAX_ACTS 16 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c index fea2a945b0db..4419c72ad314 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c @@ -999,9 +999,9 @@ hws_matcher_set_templates(struct mlx5hws_matcher *matcher, if (!matcher->mt) return -ENOMEM; - matcher->at = kcalloc(num_of_at + matcher->attr.max_num_of_at_attach, - sizeof(*matcher->at), - GFP_KERNEL); + matcher->at = kvcalloc(num_of_at + matcher->attr.max_num_of_at_attach, + sizeof(*matcher->at), + GFP_KERNEL); if (!matcher->at) { mlx5hws_err(ctx, "Failed to allocate action template array\n"); ret = -ENOMEM; @@ -1027,7 +1027,7 @@ free_mt: static void hws_matcher_unset_templates(struct mlx5hws_matcher *matcher) { - kfree(matcher->at); + kvfree(matcher->at); kfree(matcher->mt); } From 61fb92701b8ac9174857c417cfa988adc24e32c2 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 2 Jan 2025 20:14:08 +0200 Subject: [PATCH 0701/1386] net/mlx5: HWS, num_of_rules counter on matcher should be atomic Rule counter in matcher's struct is used in two places: 1. As heuristics to decide when the number of rules have crossed a certain percentage threshold and the matcher should be resized. We don't mind here if the number will be off by 1-2 due to concurrency. 2. When destroying matcher, the counter value is checked and the user is warned if it is not 0. Here we lock all the queues, so the counter will be correct. We don't need to always have *exact* number, but we do need this number to not be corrupted, which is what is happening when the counter isn't atomic, due to update by different threads. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Erez Shitrit Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250102181415.1477316-10-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/hws/bwc.c | 17 +++++++++++------ .../mellanox/mlx5/core/steering/hws/bwc.h | 2 +- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index af8ab8750c70..40d688ed6153 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -152,6 +152,8 @@ mlx5hws_bwc_matcher_create(struct mlx5hws_table *table, if (!bwc_matcher) return NULL; + atomic_set(&bwc_matcher->num_of_rules, 0); + /* Check if the required match params can be all matched * in single STE, otherwise complex matcher is needed. */ @@ -199,10 +201,12 @@ int mlx5hws_bwc_matcher_destroy_simple(struct mlx5hws_bwc_matcher *bwc_matcher) int mlx5hws_bwc_matcher_destroy(struct mlx5hws_bwc_matcher *bwc_matcher) { - if (bwc_matcher->num_of_rules) + u32 num_of_rules = atomic_read(&bwc_matcher->num_of_rules); + + if (num_of_rules) mlx5hws_err(bwc_matcher->matcher->tbl->ctx, "BWC matcher destroy: matcher still has %d rules\n", - bwc_matcher->num_of_rules); + num_of_rules); mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); @@ -309,7 +313,7 @@ static void hws_bwc_rule_list_add(struct mlx5hws_bwc_rule *bwc_rule, u16 idx) { struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; - bwc_matcher->num_of_rules++; + atomic_inc(&bwc_matcher->num_of_rules); bwc_rule->bwc_queue_idx = idx; list_add(&bwc_rule->list_node, &bwc_matcher->rules[idx]); } @@ -318,7 +322,7 @@ static void hws_bwc_rule_list_remove(struct mlx5hws_bwc_rule *bwc_rule) { struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; - bwc_matcher->num_of_rules--; + atomic_dec(&bwc_matcher->num_of_rules); list_del_init(&bwc_rule->list_node); } @@ -711,7 +715,8 @@ hws_bwc_matcher_rehash_size(struct mlx5hws_bwc_matcher *bwc_matcher) * Need to check again if we really need rehash. * If the reason for rehash was size, but not any more - skip rehash. */ - if (!hws_bwc_matcher_rehash_size_needed(bwc_matcher, bwc_matcher->num_of_rules)) + if (!hws_bwc_matcher_rehash_size_needed(bwc_matcher, + atomic_read(&bwc_matcher->num_of_rules))) return 0; /* Now we're done all the checking - do the rehash: @@ -804,7 +809,7 @@ int mlx5hws_bwc_rule_create_simple(struct mlx5hws_bwc_rule *bwc_rule, } /* check if number of rules require rehash */ - num_of_rules = bwc_matcher->num_of_rules; + num_of_rules = atomic_read(&bwc_matcher->num_of_rules); if (unlikely(hws_bwc_matcher_rehash_size_needed(bwc_matcher, num_of_rules))) { mutex_unlock(queue_lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h index 1d27638fa171..06c2a30c0d4e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h @@ -25,7 +25,7 @@ struct mlx5hws_bwc_matcher { u8 num_of_at; u16 priority; u8 size_log; - u32 num_of_rules; /* atomically accessed */ + atomic_t num_of_rules; struct list_head *rules; }; From 2f851d1702dcd1b7124aef1680a091ff3f2ef791 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 2 Jan 2025 20:14:09 +0200 Subject: [PATCH 0702/1386] net/mlx5: HWS, separate SQ that HWS uses from the usual traffic SQs Mark the HWS SQ as 'non_wire' so that 'Flow Update' flow won't mix with network traffic. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Itamar Gozlan Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250102181415.1477316-11-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c index 20fe126ffd22..c680b7f984e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c @@ -633,6 +633,7 @@ static int hws_send_ring_create_sq(struct mlx5_core_dev *mdev, u32 pdn, MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, flush_in_error_en, 1); + MLX5_SET(sqc, sqc, non_wire, 1); ts_format = mlx5_is_real_time_sq(mdev) ? MLX5_TIMESTAMP_FORMAT_REAL_TIME : MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; From be482f1d10da781db9445d2753c1e3f1fd82babf Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 2 Jan 2025 20:14:10 +0200 Subject: [PATCH 0703/1386] net/mlx5: HWS, fix definer's HWS_SET32 macro for negative offset When bit offset for HWS_SET32 macro is negative, UBSAN complains about the shift-out-of-bounds: UBSAN: shift-out-of-bounds in drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c:177:2 shift exponent -8 is negative Fixes: 74a778b4a63f ("net/mlx5: HWS, added definers handling") Signed-off-by: Yevgeny Kliteynik Reviewed-by: Erez Shitrit Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250102181415.1477316-12-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c index 8fe96eb76baf..10ece7df1cfa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c @@ -70,7 +70,7 @@ u32 second_dw_mask = (mask) & ((1 << _bit_off) - 1); \ _HWS_SET32(p, (v) >> _bit_off, byte_off, 0, (mask) >> _bit_off); \ _HWS_SET32(p, (v) & second_dw_mask, (byte_off) + DW_SIZE, \ - (bit_off) % BITS_IN_DW, second_dw_mask); \ + (bit_off + BITS_IN_DW) % BITS_IN_DW, second_dw_mask); \ } else { \ _HWS_SET32(p, v, byte_off, (bit_off), (mask)); \ } \ From a105db854cf2e495caaa17f00ac0321b503def9b Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Thu, 2 Jan 2025 20:14:11 +0200 Subject: [PATCH 0704/1386] net/mlx5: HWS, handle returned error value in pool alloc Handle all negative return values as errors, not just -1. The code previously treated -ENOMEM (and potentially other negative values) as valid segment numbers, leading to incorrect behavior. This fix ensures that any negative return value is treated as an error. Signed-off-by: Vlad Dogaru Signed-off-by: Yevgeny Kliteynik Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250102181415.1477316-13-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c index fed2d913f3b8..50a81d360bb2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c @@ -183,7 +183,7 @@ static int hws_pool_buddy_get_mem_chunk(struct mlx5hws_pool *pool, *seg = -1; /* Find the next free place from the buddy array */ - while (*seg == -1) { + while (*seg < 0) { for (i = 0; i < MLX5HWS_POOL_RESOURCE_ARR_SZ; i++) { buddy = hws_pool_buddy_get_next_buddy(pool, i, order, @@ -194,7 +194,7 @@ static int hws_pool_buddy_get_mem_chunk(struct mlx5hws_pool *pool, } *seg = mlx5hws_buddy_alloc_mem(buddy, order); - if (*seg != -1) + if (*seg >= 0) goto found; if (pool->flags & MLX5HWS_POOL_FLAGS_ONE_RESOURCE) { From 85ab9ea32548c0fff1c8e07b4fbfc185f615f9f1 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 2 Jan 2025 20:14:12 +0200 Subject: [PATCH 0705/1386] net/mlx5: HWS, use the right size when writing arg data When writing arg data, wrong size was used - fixing this. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Itamar Gozlan Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250102181415.1477316-14-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c index 06db5e4726ae..d9dc4f2d0dc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c @@ -344,7 +344,7 @@ void mlx5hws_arg_write(struct mlx5hws_send_engine *queue, mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_ctrl, &wqe_len); memset(wqe_ctrl, 0, wqe_len); mlx5hws_send_engine_post_req_wqe(&ctrl, (void *)&wqe_arg, &wqe_len); - memcpy(wqe_arg, arg_data, wqe_len); + memcpy(wqe_arg, arg_data, MLX5HWS_ARG_DATA_SIZE); send_attr.id = arg_idx++; mlx5hws_send_engine_post_end(&ctrl, &send_attr); From 663e61225c4019441cd5c9d3cc35dfc293271482 Mon Sep 17 00:00:00 2001 From: Vlad Dogaru Date: Thu, 2 Jan 2025 20:14:13 +0200 Subject: [PATCH 0706/1386] net/mlx5: HWS, support flow sampler destination Since sampler isn't currently supported via HWS, use a FW island that forwards any packets to the supplied sampler. Signed-off-by: Vlad Dogaru Signed-off-by: Yevgeny Kliteynik Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250102181415.1477316-15-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/hws/action.c | 52 ++++++++++++++++++- .../mellanox/mlx5/core/steering/hws/action.h | 3 ++ .../mellanox/mlx5/core/steering/hws/cmd.c | 6 +++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c index 67d4f40cbd83..b5332c54d4fb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c @@ -471,6 +471,7 @@ static void hws_action_fill_stc_attr(struct mlx5hws_action *action, break; case MLX5HWS_ACTION_TYP_TBL: case MLX5HWS_ACTION_TYP_DEST_ARRAY: + case MLX5HWS_ACTION_TYP_SAMPLER: attr->action_type = MLX5_IFC_STC_ACTION_TYPE_JUMP_TO_FT; attr->action_offset = MLX5HWS_ACTION_OFFSET_HIT; attr->dest_table_id = obj_id; @@ -1873,7 +1874,50 @@ struct mlx5hws_action * mlx5hws_action_create_flow_sampler(struct mlx5hws_context *ctx, u32 sampler_id, u32 flags) { - mlx5hws_err(ctx, "Flow sampler action - unsupported\n"); + struct mlx5hws_cmd_ft_create_attr ft_attr = {0}; + struct mlx5hws_cmd_set_fte_attr fte_attr = {0}; + struct mlx5hws_cmd_forward_tbl *fw_island; + struct mlx5hws_cmd_set_fte_dest dest; + struct mlx5hws_action *action; + int ret; + + if (flags != (MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED)) { + mlx5hws_err(ctx, "Unsupported flags for flow sampler\n"); + return NULL; + } + + ft_attr.type = FS_FT_FDB; + ft_attr.level = ctx->caps->fdb_ft.max_level - 1; + + dest.destination_type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; + dest.destination_id = sampler_id; + + fte_attr.dests_num = 1; + fte_attr.dests = &dest; + fte_attr.action_flags = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + fte_attr.ignore_flow_level = 1; + + fw_island = mlx5hws_cmd_forward_tbl_create(ctx->mdev, &ft_attr, &fte_attr); + if (!fw_island) + return NULL; + + action = hws_action_create_generic(ctx, flags, + MLX5HWS_ACTION_TYP_SAMPLER); + if (!action) + goto destroy_fw_island; + + ret = hws_action_create_stcs(action, fw_island->ft_id); + if (ret) + goto free_action; + + action->flow_sampler.fw_island = fw_island; + + return action; + +free_action: + kfree(action); +destroy_fw_island: + mlx5hws_cmd_forward_tbl_destroy(ctx->mdev, fw_island); return NULL; } @@ -1912,6 +1956,11 @@ static void hws_action_destroy_hws(struct mlx5hws_action *action) } kfree(action->dest_array.dest_list); break; + case MLX5HWS_ACTION_TYP_SAMPLER: + hws_action_destroy_stcs(action); + mlx5hws_cmd_forward_tbl_destroy(action->ctx->mdev, + action->flow_sampler.fw_island); + break; case MLX5HWS_ACTION_TYP_REFORMAT_TNL_L3_TO_L2: case MLX5HWS_ACTION_TYP_MODIFY_HDR: shared_arg = false; @@ -2429,6 +2478,7 @@ int mlx5hws_action_template_process(struct mlx5hws_action_template *at) case MLX5HWS_ACTION_TYP_DROP: case MLX5HWS_ACTION_TYP_TBL: case MLX5HWS_ACTION_TYP_DEST_ARRAY: + case MLX5HWS_ACTION_TYP_SAMPLER: case MLX5HWS_ACTION_TYP_VPORT: case MLX5HWS_ACTION_TYP_MISS: /* Hit action */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h index 6d1592c49e0c..64b76075f7f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.h @@ -165,6 +165,9 @@ struct mlx5hws_action { size_t num_dest; struct mlx5hws_cmd_set_fte_dest *dest_list; } dest_array; + struct { + struct mlx5hws_cmd_forward_tbl *fw_island; + } flow_sampler; struct { u8 type; u8 start_anchor; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c index 9b71ff80831d..487e75476b0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c @@ -257,6 +257,12 @@ int mlx5hws_cmd_set_fte(struct mlx5_core_dev *mdev, dest->ext_reformat_id); } break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER: + MLX5_SET(dest_format, in_dests, + destination_type, ifc_dest_type); + MLX5_SET(dest_format, in_dests, destination_id, + dest->destination_id); + break; default: ret = -EOPNOTSUPP; goto out; From d74ee6e197a2c2c5b1697d737ccdcaf8cc6c199e Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 2 Jan 2025 20:14:14 +0200 Subject: [PATCH 0707/1386] net/mlx5: HWS, set timeout on polling for completion Consolidate BWC polling for completion into one function and set a time limit on the loop that polls for completion. This can happen only if there is some issue with FW/PCI/HW, such as FW being stuck, PCI issue, etc. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Itamar Gozlan Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250102181415.1477316-16-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/hws/bwc.c | 24 ++++++++++++------- .../mellanox/mlx5/core/steering/hws/bwc.h | 2 ++ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index 40d688ed6153..a8d886e92144 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -219,6 +219,8 @@ static int hws_bwc_queue_poll(struct mlx5hws_context *ctx, u32 *pending_rules, bool drain) { + unsigned long timeout = jiffies + + msecs_to_jiffies(MLX5HWS_BWC_POLLING_TIMEOUT * MSEC_PER_SEC); struct mlx5hws_flow_op_result comp[MLX5HWS_BWC_MATCHER_REHASH_BURST_TH]; u16 burst_th = hws_bwc_get_burst_th(ctx, queue_id); bool got_comp = *pending_rules >= burst_th; @@ -254,6 +256,11 @@ static int hws_bwc_queue_poll(struct mlx5hws_context *ctx, } got_comp = !!ret; + + if (unlikely(!got_comp && time_after(jiffies, timeout))) { + mlx5hws_err(ctx, "BWC poll error: polling queue %d - TIMEOUT\n", queue_id); + return -ETIMEDOUT; + } } return err; @@ -338,22 +345,21 @@ hws_bwc_rule_destroy_hws_sync(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_rule_attr *rule_attr) { struct mlx5hws_context *ctx = bwc_rule->bwc_matcher->matcher->tbl->ctx; - struct mlx5hws_flow_op_result completion; + u32 expected_completions = 1; int ret; ret = hws_bwc_rule_destroy_hws_async(bwc_rule, rule_attr); if (unlikely(ret)) return ret; - do { - ret = mlx5hws_send_queue_poll(ctx, rule_attr->queue_id, &completion, 1); - } while (ret != 1); + ret = hws_bwc_queue_poll(ctx, rule_attr->queue_id, &expected_completions, true); + if (unlikely(ret)) + return ret; - if (unlikely(completion.status != MLX5HWS_FLOW_OP_SUCCESS || - (bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETED && - bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETING))) { - mlx5hws_err(ctx, "Failed destroying BWC rule: completion %d, rule status %d\n", - completion.status, bwc_rule->rule->status); + if (unlikely(bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETED && + bwc_rule->rule->status != MLX5HWS_RULE_STATUS_DELETING)) { + mlx5hws_err(ctx, "Failed destroying BWC rule: rule status %d\n", + bwc_rule->rule->status); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h index 06c2a30c0d4e..f9f569131dde 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h @@ -18,6 +18,8 @@ #define MLX5HWS_BWC_MAX_ACTS 16 +#define MLX5HWS_BWC_POLLING_TIMEOUT 60 + struct mlx5hws_bwc_matcher { struct mlx5hws_matcher *matcher; struct mlx5hws_match_template *mt; From 21a8a77abb4c5b472072a2f695f89a98c8af1654 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Sat, 4 Jan 2025 15:20:43 +0100 Subject: [PATCH 0708/1386] nfc: st21nfca: Drop unneeded null check in st21nfca_tx_work() Variable 'info' is obtained via container_of() of struct work_struct, so it cannot be NULL. Simplify the code and solve Smatch warning: drivers/nfc/st21nfca/dep.c:119 st21nfca_tx_work() warn: can 'info' even be NULL? Signed-off-by: Krzysztof Kozlowski Reviewed-by: Simon Horman Link: https://patch.msgid.link/20250104142043.116045-1-krzysztof.kozlowski@linaro.org Signed-off-by: Jakub Kicinski --- drivers/nfc/st21nfca/dep.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/nfc/st21nfca/dep.c b/drivers/nfc/st21nfca/dep.c index 1ec651e31064..3425b68f0ddc 100644 --- a/drivers/nfc/st21nfca/dep.c +++ b/drivers/nfc/st21nfca/dep.c @@ -116,18 +116,16 @@ static void st21nfca_tx_work(struct work_struct *work) struct nfc_dev *dev; struct sk_buff *skb; - if (info) { - dev = info->hdev->ndev; - skb = info->dep_info.tx_pending; + dev = info->hdev->ndev; + skb = info->dep_info.tx_pending; - device_lock(&dev->dev); + device_lock(&dev->dev); - nfc_hci_send_cmd_async(info->hdev, ST21NFCA_RF_READER_F_GATE, - ST21NFCA_WR_XCHG_DATA, skb->data, skb->len, - info->async_cb, info); - device_unlock(&dev->dev); - kfree_skb(skb); - } + nfc_hci_send_cmd_async(info->hdev, ST21NFCA_RF_READER_F_GATE, + ST21NFCA_WR_XCHG_DATA, skb->data, skb->len, + info->async_cb, info); + device_unlock(&dev->dev); + kfree_skb(skb); } static void st21nfca_im_send_pdu(struct st21nfca_hci_info *info, From 49afc040f4d707a4149a05180edc42bc590641a4 Mon Sep 17 00:00:00 2001 From: Nihar Chaithanya Date: Sat, 4 Jan 2025 22:49:15 +0530 Subject: [PATCH 0709/1386] octeontx2-pf: mcs: Remove dead code and semi-colon from rsrc_name() Every case in the switch-block ends with return statement, and the default: branch handles the cases where rsrc_type is invalid and returns "Unknown", this makes the return statement at the end of the function unreachable and redundant. The semi-colon is not required after the switch-block's curly braces. Remove the semi-colon after the switch-block's curly braces and the return statement at the end of the function. This issue was reported by Coverity Scan. Signed-off-by: Nihar Chaithanya Link: https://patch.msgid.link/20250104171905.13293-1-niharchaithanya@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c index 6cc7a78968fc..f3b9daffaec3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c @@ -133,9 +133,7 @@ static const char *rsrc_name(enum mcs_rsrc_type rsrc_type) return "SA"; default: return "Unknown"; - }; - - return "Unknown"; + } } static int cn10k_mcs_alloc_rsrc(struct otx2_nic *pfvf, enum mcs_direction dir, From f69ccbc50a12417c74ddf891d3958ddf609f171c Mon Sep 17 00:00:00 2001 From: Chin-Yen Lee Date: Wed, 25 Dec 2024 20:28:04 +0800 Subject: [PATCH 0710/1386] wifi: rtw89: pci: disable PCI completion timeout control Realtek's chips follow suggestion of PCIe spec to design the max timeout of PCI completion, but some PCIe host reply too slow to meet it and lead PCI AER. Disable PCI completion timeout function via PCI configuration to avoid the AER. Signed-off-by: Chin-Yen Lee Signed-off-by: Ping-Ke Shih Link: https://patch.msgid.link/20241225122804.10214-1-pkshih@realtek.com --- drivers/net/wireless/realtek/rtw89/pci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/realtek/rtw89/pci.c b/drivers/net/wireless/realtek/rtw89/pci.c index c3a027735d0f..516160147191 100644 --- a/drivers/net/wireless/realtek/rtw89/pci.c +++ b/drivers/net/wireless/realtek/rtw89/pci.c @@ -4078,6 +4078,15 @@ static void rtw89_pci_l1ss_cfg(struct rtw89_dev *rtwdev) rtw89_pci_l1ss_set(rtwdev, true); } +static void rtw89_pci_cpl_timeout_cfg(struct rtw89_dev *rtwdev) +{ + struct rtw89_pci *rtwpci = (struct rtw89_pci *)rtwdev->priv; + struct pci_dev *pdev = rtwpci->pdev; + + pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2, + PCI_EXP_DEVCTL2_COMP_TMOUT_DIS); +} + static int rtw89_pci_poll_io_idle_ax(struct rtw89_dev *rtwdev) { int ret = 0; @@ -4291,6 +4300,7 @@ void rtw89_pci_basic_cfg(struct rtw89_dev *rtwdev, bool resume) rtw89_pci_disable_eq(rtwdev); rtw89_pci_filter_out(rtwdev); + rtw89_pci_cpl_timeout_cfg(rtwdev); rtw89_pci_link_cfg(rtwdev); rtw89_pci_l1ss_cfg(rtwdev); } From 51cfbed198ca4aa140babde816387db0e71f09e7 Mon Sep 17 00:00:00 2001 From: Furong Xu <0x1207@gmail.com> Date: Fri, 3 Jan 2025 17:37:33 +0800 Subject: [PATCH 0711/1386] net: stmmac: Set dma_sync_size to zero for discarded frames If a frame is going to be discarded by driver, this frame is never touched by driver and the cache lines never become dirty obviously, page_pool_recycle_direct() wastes CPU cycles on unnecessary calling of page_pool_dma_sync_for_device() to sync entire frame. page_pool_put_page() with sync_size setting to 0 is the proper method. Signed-off-by: Furong Xu <0x1207@gmail.com> Link: https://patch.msgid.link/20250103093733.3872939-1-0x1207@gmail.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 2f518ec845ec..70c8f60d34f2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5467,7 +5467,7 @@ read_again: if (priv->extend_desc) stmmac_rx_extended_status(priv, &priv->xstats, rx_q->dma_erx + entry); if (unlikely(status == discard_frame)) { - page_pool_recycle_direct(rx_q->page_pool, buf->page); + page_pool_put_page(rx_q->page_pool, buf->page, 0, true); buf->page = NULL; error = 1; if (!priv->hwts_rx_en) From 912d6f6697251b0024e56ed24b7873b4800822e7 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 3 Jan 2025 06:31:14 -0500 Subject: [PATCH 0712/1386] selftests/net: packetdrill: report benign debug flakes as xfail A few recently added packetdrill tests that are known time sensitive (e.g., because testing timestamping) occasionally fail in debug mode: https://netdev.bots.linux.dev/contest.html?executor=vmksft-packetdrill-dbg These failures are well understood. Correctness of the tests is verified in non-debug mode. Continue running in debug mode also, to keep coverage with debug instrumentation. But, only in debug mode, mark these tests with well understood timing issues as XFAIL (known failing) rather than FAIL when failing. Introduce an allow list xfail_list with known cases. Expand the ktap infrastructure with XFAIL support. Fixes: eab35989cc37 ("selftests/net: packetdrill: import tcp/fast_recovery, tcp/nagle, tcp/timestamping") Reported-by: Jakub Kicinski Closes: https://lore.kernel.org/netdev/20241218100013.0c698629@kernel.org/ Signed-off-by: Willem de Bruijn Link: https://patch.msgid.link/20250103113142.129251-1-willemdebruijn.kernel@gmail.com Signed-off-by: Paolo Abeni --- .../selftests/kselftest/ktap_helpers.sh | 15 ++++++++++-- .../selftests/net/packetdrill/ksft_runner.sh | 23 +++++++++++++++---- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/kselftest/ktap_helpers.sh b/tools/testing/selftests/kselftest/ktap_helpers.sh index 79a125eb24c2..05a461890671 100644 --- a/tools/testing/selftests/kselftest/ktap_helpers.sh +++ b/tools/testing/selftests/kselftest/ktap_helpers.sh @@ -7,6 +7,7 @@ KTAP_TESTNO=1 KTAP_CNT_PASS=0 KTAP_CNT_FAIL=0 +KTAP_CNT_XFAIL=0 KTAP_CNT_SKIP=0 KSFT_PASS=0 @@ -69,6 +70,16 @@ ktap_test_skip() { KTAP_CNT_SKIP=$((KTAP_CNT_SKIP+1)) } +ktap_test_xfail() { + description="$1" + + result="ok" + directive="XFAIL" + __ktap_test "$result" "$description" "$directive" + + KTAP_CNT_XFAIL=$((KTAP_CNT_XFAIL+1)) +} + ktap_test_fail() { description="$1" @@ -99,7 +110,7 @@ ktap_exit_fail_msg() { ktap_finished() { ktap_print_totals - if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP)) -eq "$KSFT_NUM_TESTS" ]; then + if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP + KTAP_CNT_XFAIL)) -eq "$KSFT_NUM_TESTS" ]; then exit "$KSFT_PASS" else exit "$KSFT_FAIL" @@ -107,5 +118,5 @@ ktap_finished() { } ktap_print_totals() { - echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:0 xpass:0 skip:$KTAP_CNT_SKIP error:0" + echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:$KTAP_CNT_XFAIL xpass:0 skip:$KTAP_CNT_SKIP error:0" } diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index 4071c133f29e..ff989c325eef 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -23,7 +23,7 @@ if [ $# -ne 1 ]; then ktap_exit_fail_msg "usage: $0