From 9debb703e14939dfafa5d403f27c4feb2e9f6501 Mon Sep 17 00:00:00 2001 From: Martyna Szapar-Mudlaw Date: Wed, 26 Jun 2024 11:43:42 +0200 Subject: [PATCH 1/5] ice: Fix lldp packets dropping after changing the number of channels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After vsi setup refactor commit 6624e780a577 ("ice: split ice_vsi_setup into smaller functions") ice_cfg_sw_lldp function which removes rx rule directing LLDP packets to vsi is moved from ice_vsi_release to ice_vsi_decfg function. ice_vsi_decfg is used in more cases than just in vsi_release resulting in unnecessary removal of rx lldp packets handling switch rule. This leads to lldp packets being dropped after a change number of channels via ethtool. This patch moves ice_cfg_sw_lldp function that removes rx lldp sw rule back to ice_vsi_release function. Fixes: 6624e780a577 ("ice: split ice_vsi_setup into smaller functions") Reported-by: Matěj Grégr Closes: https://lore.kernel.org/intel-wired-lan/1be45a76-90af-4813-824f-8398b69745a9@netx.as/T/#u Reviewed-by: Przemek Kitszel Signed-off-by: Martyna Szapar-Mudlaw Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_lib.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 737c00b02dd0..2405e5ed9128 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2413,13 +2413,6 @@ void ice_vsi_decfg(struct ice_vsi *vsi) struct ice_pf *pf = vsi->back; int err; - /* The Rx rule will only exist to remove if the LLDP FW - * engine is currently stopped - */ - if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF && - !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) - ice_cfg_sw_lldp(vsi, false, false); - ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx); if (err) @@ -2764,6 +2757,14 @@ int ice_vsi_release(struct ice_vsi *vsi) ice_rss_clean(vsi); ice_vsi_close(vsi); + + /* The Rx rule will only exist to remove if the LLDP FW + * engine is currently stopped + */ + if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF && + !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) + ice_cfg_sw_lldp(vsi, false, false); + ice_vsi_decfg(vsi); /* retain SW VSI data structure since it is needed to unregister and From e843cf7b34fe2e0c1afc55e1f3057375c9b77a14 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 31 Jul 2024 09:55:55 -0700 Subject: [PATCH 2/5] ice: fix accounting for filters shared by multiple VSIs When adding a switch filter (such as a MAC or VLAN filter), it is expected that the driver will detect the case where the filter already exists, and return -EEXIST. This is used by calling code such as ice_vc_add_mac_addr, and ice_vsi_add_vlan to avoid incrementing the accounting fields such as vsi->num_vlan or vf->num_mac. This logic works correctly for the case where only a single VSI has added a given switch filter. When a second VSI adds the same switch filter, the driver converts the existing filter from an ICE_FWD_TO_VSI filter into an ICE_FWD_TO_VSI_LIST filter. This saves switch resources, by ensuring that multiple VSIs can re-use the same filter. The ice_add_update_vsi_list() function is responsible for doing this conversion. When first converting a filter from the FWD_TO_VSI into FWD_TO_VSI_LIST, it checks if the VSI being added is the same as the existing rule's VSI. In such a case it returns -EEXIST. However, when the switch rule has already been converted to a FWD_TO_VSI_LIST, the logic is different. Adding a new VSI in this case just requires extending the VSI list entry. The logic for checking if the rule already exists in this case returns 0 instead of -EEXIST. This breaks the accounting logic mentioned above, so the counters for how many MAC and VLAN filters exist for a given VF or VSI no longer accurately reflect the actual count. This breaks other code which relies on these counts. In typical usage this primarily affects such filters generally shared by multiple VSIs such as VLAN 0, or broadcast and multicast MAC addresses. Fix this by correctly reporting -EEXIST in the case of adding the same VSI to a switch rule already converted to ICE_FWD_TO_VSI_LIST. Fixes: 9daf8208dd4d ("ice: Add support for switch filter programming") Signed-off-by: Jacob Keller Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_switch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index fe8847184cb1..0160f0bae8d6 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -3194,7 +3194,7 @@ ice_add_update_vsi_list(struct ice_hw *hw, /* A rule already exists with the new VSI being added */ if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map)) - return 0; + return -EEXIST; /* Update the previously created VSI list set with * the new VSI ID passed in From e6501fc38a7590fc014d7bb5c406974d32c0530f Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Fri, 30 Aug 2024 15:44:11 +0200 Subject: [PATCH 3/5] ice: stop calling pci_disable_device() as we use pcim Our driver uses devres to manage resources, in particular we call pcim_enable_device(), what also means we express the intent to get automatic pci_disable_device() call at driver removal. Manual calls to pci_disable_device() misuse the API. Recent commit (see "Fixes" tag) has changed the removal action from conditional (silent ignore of double call to pci_disable_device()) to unconditional, but able to catch unwanted redundant calls; see cited "Fixes" commit for details. Since that, unloading the driver yields following warn+splat: [70633.628490] ice 0000:af:00.7: disabling already-disabled device [70633.628512] WARNING: CPU: 52 PID: 33890 at drivers/pci/pci.c:2250 pci_disable_device+0xf4/0x100 ... [70633.628744] ? pci_disable_device+0xf4/0x100 [70633.628752] release_nodes+0x4a/0x70 [70633.628759] devres_release_all+0x8b/0xc0 [70633.628768] device_unbind_cleanup+0xe/0x70 [70633.628774] device_release_driver_internal+0x208/0x250 [70633.628781] driver_detach+0x47/0x90 [70633.628786] bus_remove_driver+0x80/0x100 [70633.628791] pci_unregister_driver+0x2a/0xb0 [70633.628799] ice_module_exit+0x11/0x3a [ice] Note that this is the only Intel ethernet driver that needs such fix. Fixes: f748a07a0b64 ("PCI: Remove legacy pcim_release()") Reviewed-by: Larysa Zaremba Reviewed-by: Philipp Stanner Signed-off-by: Przemek Kitszel Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c7db88b517da..ea780d468579 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5363,7 +5363,6 @@ err_load: ice_deinit(pf); err_init: ice_adapter_put(pdev); - pci_disable_device(pdev); return err; } @@ -5470,7 +5469,6 @@ static void ice_remove(struct pci_dev *pdev) ice_set_wake(pf); ice_adapter_put(pdev); - pci_disable_device(pdev); } /** From d2940002b0aa42898de815a1453b29d440292386 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Wed, 4 Sep 2024 11:39:22 +0200 Subject: [PATCH 4/5] ice: fix VSI lists confusion when adding VLANs The description of function ice_find_vsi_list_entry says: Search VSI list map with VSI count 1 However, since the blamed commit (see Fixes below), the function no longer checks vsi_count. This causes a problem in ice_add_vlan_internal, where the decision to share VSI lists between filter rules relies on the vsi_count of the found existing VSI list being 1. The reproducing steps: 1. Have a PF and two VFs. There will be a filter rule for VLAN 0, referring to a VSI list containing VSIs: 0 (PF), 2 (VF#0), 3 (VF#1). 2. Add VLAN 1234 to VF#0. ice will make the wrong decision to share the VSI list with the new rule. The wrong behavior may not be immediately apparent, but it can be observed with debug prints. 3. Add VLAN 1234 to VF#1. ice will unshare the VSI list for the VLAN 1234 rule. Due to the earlier bad decision, the newly created VSI list will contain VSIs 0 (PF) and 3 (VF#1), instead of expected 2 (VF#0) and 3 (VF#1). 4. Try pinging a network peer over the VLAN interface on VF#0. This fails. Reproducer script at: https://gitlab.com/mschmidt2/repro/-/blob/master/RHEL-46814/test-vlan-vsi-list-confusion.sh Commented debug trace: https://gitlab.com/mschmidt2/repro/-/blob/master/RHEL-46814/ice-vlan-vsi-lists-debug.txt Patch adding the debug prints: https://gitlab.com/mschmidt2/linux/-/commit/f8a8814623944a45091a77c6094c40bfe726bfdb (Unsafe, by the way. Lacks rule_lock when dumping in ice_remove_vlan.) Michal Swiatkowski added to the explanation that the bug is caused by reusing a VSI list created for VLAN 0. All created VFs' VSIs are added to VLAN 0 filter. When a non-zero VLAN is created on a VF which is already in VLAN 0 (normal case), the VSI list from VLAN 0 is reused. It leads to a problem because all VFs (VSIs to be specific) that are subscribed to VLAN 0 will now receive a new VLAN tag traffic. This is one bug, another is the bug described above. Removing filters from one VF will remove VLAN filter from the previous VF. It happens a VF is reset. Example: - creation of 3 VFs - we have VSI list (used for VLAN 0) [0 (pf), 2 (vf1), 3 (vf2), 4 (vf3)] - we are adding VLAN 100 on VF1, we are reusing the previous list because 2 is there - VLAN traffic works fine, but VLAN 100 tagged traffic can be received on all VSIs from the list (for example broadcast or unicast) - trust is turning on VF2, VF2 is resetting, all filters from VF2 are removed; the VLAN 100 filter is also removed because 3 is on the list - VLAN traffic to VF1 isn't working anymore, there is a need to recreate VLAN interface to readd VLAN filter One thing I'm not certain about is the implications for the LAG feature, which is another caller of ice_find_vsi_list_entry. I don't have a LAG-capable card at hand to test. Fixes: 23ccae5ce15f ("ice: changes to the interface with the HW and FW for SRIOV_VF+LAG") Reviewed-by: Michal Swiatkowski Signed-off-by: Michal Schmidt Reviewed-by: Dave Ertman Tested-by: Rafal Romanowski Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_switch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 0160f0bae8d6..79d91e95358c 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -3264,7 +3264,7 @@ ice_find_vsi_list_entry(struct ice_hw *hw, u8 recp_id, u16 vsi_handle, list_head = &sw->recp_list[recp_id].filt_rules; list_for_each_entry(list_itr, list_head, list_entry) { - if (list_itr->vsi_list_info) { + if (list_itr->vsi_count == 1 && list_itr->vsi_list_info) { map_info = list_itr->vsi_list_info; if (test_bit(vsi_handle, map_info->vsi_map)) { *vsi_list_id = map_info->vsi_list_id; From 27717f8b17c098c4373ddb8fe89e1a1899c7779d Mon Sep 17 00:00:00 2001 From: Sriram Yagnaraman Date: Thu, 22 Aug 2024 09:42:07 +0200 Subject: [PATCH 5/5] igb: Always call igb_xdp_ring_update_tail() under Tx lock Always call igb_xdp_ring_update_tail() under __netif_tx_lock, add a comment and lockdep assert to indicate that. This is needed to share the same TX ring between XDP, XSK and slow paths. Furthermore, the current XDP implementation is racy on tail updates. Fixes: 9cbc948b5a20 ("igb: add XDP support") Signed-off-by: Sriram Yagnaraman [Kurt: Add lockdep assert and fixes tag] Signed-off-by: Kurt Kanzenbach Acked-by: Maciej Fijalkowski Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/igb/igb_main.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 9dc7c60838ed..1ef4cb871452 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -33,6 +33,7 @@ #include #include #include +#include #ifdef CONFIG_IGB_DCA #include #endif @@ -2914,8 +2915,11 @@ static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } +/* This function assumes __netif_tx_lock is held by the caller. */ static void igb_xdp_ring_update_tail(struct igb_ring *ring) { + lockdep_assert_held(&txring_txq(ring)->_xmit_lock); + /* Force memory writes to complete before letting h/w know there * are new descriptors to fetch. */ @@ -3000,11 +3004,11 @@ static int igb_xdp_xmit(struct net_device *dev, int n, nxmit++; } - __netif_tx_unlock(nq); - if (unlikely(flags & XDP_XMIT_FLUSH)) igb_xdp_ring_update_tail(tx_ring); + __netif_tx_unlock(nq); + return nxmit; } @@ -8864,12 +8868,14 @@ static void igb_put_rx_buffer(struct igb_ring *rx_ring, static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) { + unsigned int total_bytes = 0, total_packets = 0; struct igb_adapter *adapter = q_vector->adapter; struct igb_ring *rx_ring = q_vector->rx.ring; - struct sk_buff *skb = rx_ring->skb; - unsigned int total_bytes = 0, total_packets = 0; u16 cleaned_count = igb_desc_unused(rx_ring); + struct sk_buff *skb = rx_ring->skb; + int cpu = smp_processor_id(); unsigned int xdp_xmit = 0; + struct netdev_queue *nq; struct xdp_buff xdp; u32 frame_sz = 0; int rx_buf_pgcnt; @@ -8997,7 +9003,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) if (xdp_xmit & IGB_XDP_TX) { struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter); + nq = txring_txq(tx_ring); + __netif_tx_lock(nq, cpu); igb_xdp_ring_update_tail(tx_ring); + __netif_tx_unlock(nq); } u64_stats_update_begin(&rx_ring->rx_syncp);