Merge branch 'intel-wired-lan-driver-updates-2025-01-06-igb-igc-ixgbe-ixgbevf-i40e-fm10k'

Tony Nguyen says:

====================
Intel Wired LAN Driver Updates 2025-01-06 (igb, igc, ixgbe, ixgbevf, i40e, fm10k)

For igb:

Sriram Yagnaraman and Kurt Kanzenbach add support for AF_XDP
zero-copy.

Original cover letter:
The first couple of patches adds helper functions to prepare for AF_XDP
zero-copy support which comes in the last couple of patches, one each
for Rx and TX paths.

As mentioned in v1 patchset [0], I don't have access to an actual IGB
device to provide correct performance numbers. I have used Intel 82576EB
emulator in QEMU [1] to test the changes to IGB driver.

The tests use one isolated vCPU for RX/TX and one isolated vCPU for the
xdp-sock application [2]. Hope these measurements provide at the least
some indication on the increase in performance when using ZC, especially
in the TX path. It would be awesome if someone with a real IGB NIC can
test the patch.

AF_XDP performance using 64 byte packets in Kpps.
Benchmark:	XDP-SKB		XDP-DRV		XDP-DRV(ZC)
rxdrop		220		235		350
txpush		1.000		1.000		410
l2fwd 		1.000		1.000		200

AF_XDP performance using 1500 byte packets in Kpps.
Benchmark:	XDP-SKB		XDP-DRV		XDP-DRV(ZC)
rxdrop		200		210		310
txpush		1.000		1.000		410
l2fwd 		0.900		1.000		160

[0]: https://lore.kernel.org/intel-wired-lan/20230704095915.9750-1-sriram.yagnaraman@est.tech/
[1]: https://www.qemu.org/docs/master/system/devices/igb.html
[2]: https://github.com/xdp-project/bpf-examples/tree/master/AF_XDP-example

Subsequent changes and information can be found here:
https://lore.kernel.org/intel-wired-lan/20241018-b4-igb_zero_copy-v9-0-da139d78d796@linutronix.de/

Yue Haibing converts use of ERR_PTR return to traditional error code
which resolves a smatch warning.

For igc:

Song Yoong Siang allows for the XDP program to be hot-swapped.

Yue Haibing converts use of ERR_PTR return to traditional error code
which resolves a smatch warning.

Joe Damato adds sets IRQ and queues to NAPI instances to allow for
reporting via netdev-genl API.

For ixgbe:

Yue Haibing converts use of ERR_PTR return to traditional error code
which resolves a smatch warning.

For ixgbevf:

Yue Haibing converts use of ERR_PTR return to traditional error code
which resolves a smatch warning.

For i40e:

Alex implements "mdd-auto-reset-vf" private flag to automatically reset
VFs when encountering an MDD event.

For fm10k:

Dr. David Alan Gilbert removes an unused function.
====================

Link: https://patch.msgid.link/20250106221929.956999-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2025-01-07 18:16:02 -08:00
commit 7bf1659bad
18 changed files with 1002 additions and 287 deletions

View File

@ -299,6 +299,18 @@ Use ethtool to view and set link-down-on-close, as follows::
ethtool --show-priv-flags ethX
ethtool --set-priv-flags ethX link-down-on-close [on|off]
Setting the mdd-auto-reset-vf Private Flag
------------------------------------------
When the mdd-auto-reset-vf private flag is set to "on", the problematic VF will
be automatically reset if a malformed descriptor is detected. If the flag is
set to "off", the problematic VF will be disabled.
Use ethtool to view and set mdd-auto-reset-vf, as follows::
ethtool --show-priv-flags ethX
ethtool --set-priv-flags ethX mdd-auto-reset-vf [on|off]
Viewing Link Messages
---------------------
Link messages will not be displayed to the console if the distribution is

View File

@ -1179,126 +1179,6 @@ s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid)
return vid;
}
/**
* fm10k_iov_msg_mac_vlan_pf - Message handler for MAC/VLAN request from VF
* @hw: Pointer to hardware structure
* @results: Pointer array to message, results[0] is pointer to message
* @mbx: Pointer to mailbox information structure
*
* This function is a default handler for MAC/VLAN requests from the VF.
* The assumption is that in this case it is acceptable to just directly
* hand off the message from the VF to the underlying shared code.
**/
s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *hw, u32 **results,
struct fm10k_mbx_info *mbx)
{
struct fm10k_vf_info *vf_info = (struct fm10k_vf_info *)mbx;
u8 mac[ETH_ALEN];
u32 *result;
int err = 0;
bool set;
u16 vlan;
u32 vid;
/* we shouldn't be updating rules on a disabled interface */
if (!FM10K_VF_FLAG_ENABLED(vf_info))
err = FM10K_ERR_PARAM;
if (!err && !!results[FM10K_MAC_VLAN_MSG_VLAN]) {
result = results[FM10K_MAC_VLAN_MSG_VLAN];
/* record VLAN id requested */
err = fm10k_tlv_attr_get_u32(result, &vid);
if (err)
return err;
set = !(vid & FM10K_VLAN_CLEAR);
vid &= ~FM10K_VLAN_CLEAR;
/* if the length field has been set, this is a multi-bit
* update request. For multi-bit requests, simply disallow
* them when the pf_vid has been set. In this case, the PF
* should have already cleared the VLAN_TABLE, and if we
* allowed them, it could allow a rogue VF to receive traffic
* on a VLAN it was not assigned. In the single-bit case, we
* need to modify requests for VLAN 0 to use the default PF or
* SW vid when assigned.
*/
if (vid >> 16) {
/* prevent multi-bit requests when PF has
* administratively set the VLAN for this VF
*/
if (vf_info->pf_vid)
return FM10K_ERR_PARAM;
} else {
err = fm10k_iov_select_vid(vf_info, (u16)vid);
if (err < 0)
return err;
vid = err;
}
/* update VSI info for VF in regards to VLAN table */
err = hw->mac.ops.update_vlan(hw, vid, vf_info->vsi, set);
}
if (!err && !!results[FM10K_MAC_VLAN_MSG_MAC]) {
result = results[FM10K_MAC_VLAN_MSG_MAC];
/* record unicast MAC address requested */
err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan);
if (err)
return err;
/* block attempts to set MAC for a locked device */
if (is_valid_ether_addr(vf_info->mac) &&
!ether_addr_equal(mac, vf_info->mac))
return FM10K_ERR_PARAM;
set = !(vlan & FM10K_VLAN_CLEAR);
vlan &= ~FM10K_VLAN_CLEAR;
err = fm10k_iov_select_vid(vf_info, vlan);
if (err < 0)
return err;
vlan = (u16)err;
/* notify switch of request for new unicast address */
err = hw->mac.ops.update_uc_addr(hw, vf_info->glort,
mac, vlan, set, 0);
}
if (!err && !!results[FM10K_MAC_VLAN_MSG_MULTICAST]) {
result = results[FM10K_MAC_VLAN_MSG_MULTICAST];
/* record multicast MAC address requested */
err = fm10k_tlv_attr_get_mac_vlan(result, mac, &vlan);
if (err)
return err;
/* verify that the VF is allowed to request multicast */
if (!(vf_info->vf_flags & FM10K_VF_FLAG_MULTI_ENABLED))
return FM10K_ERR_PARAM;
set = !(vlan & FM10K_VLAN_CLEAR);
vlan &= ~FM10K_VLAN_CLEAR;
err = fm10k_iov_select_vid(vf_info, vlan);
if (err < 0)
return err;
vlan = (u16)err;
/* notify switch of request for new multicast address */
err = hw->mac.ops.update_mc_addr(hw, vf_info->glort,
mac, vlan, set);
}
return err;
}
/**
* fm10k_iov_supported_xcast_mode_pf - Determine best match for xcast mode
* @vf_info: VF info structure containing capability flags

View File

@ -99,8 +99,6 @@ extern const struct fm10k_tlv_attr fm10k_err_msg_attr[];
s32 fm10k_iov_select_vid(struct fm10k_vf_info *vf_info, u16 vid);
s32 fm10k_iov_msg_msix_pf(struct fm10k_hw *, u32 **, struct fm10k_mbx_info *);
s32 fm10k_iov_msg_mac_vlan_pf(struct fm10k_hw *, u32 **,
struct fm10k_mbx_info *);
s32 fm10k_iov_msg_lport_state_pf(struct fm10k_hw *, u32 **,
struct fm10k_mbx_info *);

View File

@ -88,6 +88,7 @@ enum i40e_state {
__I40E_SERVICE_SCHED,
__I40E_ADMINQ_EVENT_PENDING,
__I40E_MDD_EVENT_PENDING,
__I40E_MDD_VF_PRINT_PENDING,
__I40E_VFLR_EVENT_PENDING,
__I40E_RESET_RECOVERY_PENDING,
__I40E_TIMEOUT_RECOVERY_PENDING,
@ -191,6 +192,7 @@ enum i40e_pf_flags {
*/
I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA,
I40E_FLAG_VF_VLAN_PRUNING_ENA,
I40E_FLAG_MDD_AUTO_RESET_VF,
I40E_PF_FLAGS_NBITS, /* must be last */
};
@ -572,7 +574,7 @@ struct i40e_pf {
int num_alloc_vfs; /* actual number of VFs allocated */
u32 vf_aq_requests;
u32 arq_overflows; /* Not fatal, possibly indicative of problems */
struct ratelimit_state mdd_message_rate_limit;
/* DCBx/DCBNL capability for PF that indicates
* whether DCBx is managed by firmware or host
* based agent (LLDPAD). Also, indicates what

View File

@ -722,7 +722,7 @@ static void i40e_dbg_dump_vf(struct i40e_pf *pf, int vf_id)
dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d, seid=%d, qps=%d\n",
vf_id, vf->lan_vsi_id, vsi->seid, vf->num_queue_pairs);
dev_info(&pf->pdev->dev, " num MDD=%lld\n",
vf->num_mdd_events);
vf->mdd_tx_events.count + vf->mdd_rx_events.count);
} else {
dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id);
}

View File

@ -459,6 +459,8 @@ static const struct i40e_priv_flags i40e_gstrings_priv_flags[] = {
I40E_PRIV_FLAG("base-r-fec", I40E_FLAG_BASE_R_FEC, 0),
I40E_PRIV_FLAG("vf-vlan-pruning",
I40E_FLAG_VF_VLAN_PRUNING_ENA, 0),
I40E_PRIV_FLAG("mdd-auto-reset-vf",
I40E_FLAG_MDD_AUTO_RESET_VF, 0),
};
#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gstrings_priv_flags)

View File

@ -11179,6 +11179,67 @@ static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
i40e_reset_and_rebuild(pf, false, lock_acquired);
}
/**
* i40e_print_vf_mdd_event - print VF Tx/Rx malicious driver detect event
* @pf: board private structure
* @vf: pointer to the VF structure
* @is_tx: true - for Tx event, false - for Rx
*/
static void i40e_print_vf_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf,
bool is_tx)
{
dev_err(&pf->pdev->dev, is_tx ?
"%lld Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n" :
"%lld Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n",
is_tx ? vf->mdd_tx_events.count : vf->mdd_rx_events.count,
pf->hw.pf_id,
vf->vf_id,
vf->default_lan_addr.addr,
str_on_off(test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)));
}
/**
* i40e_print_vfs_mdd_events - print VFs malicious driver detect event
* @pf: pointer to the PF structure
*
* Called from i40e_handle_mdd_event to rate limit and print VFs MDD events.
*/
static void i40e_print_vfs_mdd_events(struct i40e_pf *pf)
{
unsigned int i;
/* check that there are pending MDD events to print */
if (!test_and_clear_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state))
return;
if (!__ratelimit(&pf->mdd_message_rate_limit))
return;
for (i = 0; i < pf->num_alloc_vfs; i++) {
struct i40e_vf *vf = &pf->vf[i];
bool is_printed = false;
/* only print Rx MDD event message if there are new events */
if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
vf->mdd_rx_events.last_printed = vf->mdd_rx_events.count;
i40e_print_vf_mdd_event(pf, vf, false);
is_printed = true;
}
/* only print Tx MDD event message if there are new events */
if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
vf->mdd_tx_events.last_printed = vf->mdd_tx_events.count;
i40e_print_vf_mdd_event(pf, vf, true);
is_printed = true;
}
if (is_printed && !test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags))
dev_info(&pf->pdev->dev,
"Use PF Control I/F to re-enable the VF #%d\n",
i);
}
}
/**
* i40e_handle_mdd_event
* @pf: pointer to the PF structure
@ -11193,8 +11254,13 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
u32 reg;
int i;
if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state))
if (!test_and_clear_bit(__I40E_MDD_EVENT_PENDING, pf->state)) {
/* Since the VF MDD event logging is rate limited, check if
* there are pending MDD events.
*/
i40e_print_vfs_mdd_events(pf);
return;
}
/* find what triggered the MDD event */
reg = rd32(hw, I40E_GL_MDET_TX);
@ -11238,36 +11304,48 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
/* see if one of the VFs needs its hand slapped */
for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
bool is_mdd_on_tx = false;
bool is_mdd_on_rx = false;
vf = &(pf->vf[i]);
reg = rd32(hw, I40E_VP_MDET_TX(i));
if (reg & I40E_VP_MDET_TX_VALID_MASK) {
set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
vf->num_mdd_events++;
dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
i);
dev_info(&pf->pdev->dev,
"Use PF Control I/F to re-enable the VF\n");
vf->mdd_tx_events.count++;
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
is_mdd_on_tx = true;
}
reg = rd32(hw, I40E_VP_MDET_RX(i));
if (reg & I40E_VP_MDET_RX_VALID_MASK) {
set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
vf->num_mdd_events++;
dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
i);
dev_info(&pf->pdev->dev,
"Use PF Control I/F to re-enable the VF\n");
vf->mdd_rx_events.count++;
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
is_mdd_on_rx = true;
}
if ((is_mdd_on_tx || is_mdd_on_rx) &&
test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
/* VF MDD event counters will be cleared by
* reset, so print the event prior to reset.
*/
if (is_mdd_on_rx)
i40e_print_vf_mdd_event(pf, vf, false);
if (is_mdd_on_tx)
i40e_print_vf_mdd_event(pf, vf, true);
i40e_vc_reset_vf(vf, true);
}
}
/* re-enable mdd interrupt cause */
clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
reg = rd32(hw, I40E_PFINT_ICR0_ENA);
reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
wr32(hw, I40E_PFINT_ICR0_ENA, reg);
i40e_flush(hw);
i40e_print_vfs_mdd_events(pf);
}
/**
@ -15878,6 +15956,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
ERR_PTR(err),
i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
/* VF MDD event logs are rate limited to one second intervals */
ratelimit_state_init(&pf->mdd_message_rate_limit, 1 * HZ, 1);
/* Reconfigure hardware for allowing smaller MSS in the case
* of TSO, so that we avoid the MDD being fired and causing
* a reset in the case of small MSS+TSO.

View File

@ -216,7 +216,7 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
* @notify_vf: notify vf about reset or not
* Reset VF handler.
**/
static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf)
{
struct i40e_pf *pf = vf->pf;
int i;

View File

@ -64,6 +64,12 @@ struct i40evf_channel {
u64 max_tx_rate; /* bandwidth rate allocation for VSIs */
};
struct i40e_mdd_vf_events {
u64 count; /* total count of Rx|Tx events */
/* count number of the last printed event */
u64 last_printed;
};
/* VF information structure */
struct i40e_vf {
struct i40e_pf *pf;
@ -92,7 +98,9 @@ struct i40e_vf {
u8 num_queue_pairs; /* num of qps assigned to VF vsis */
u8 num_req_queues; /* num of requested qps */
u64 num_mdd_events; /* num of mdd events detected */
/* num of mdd tx and rx events detected */
struct i40e_mdd_vf_events mdd_rx_events;
struct i40e_mdd_vf_events mdd_tx_events;
unsigned long vf_caps; /* vf's adv. capabilities */
unsigned long vf_states; /* vf's runtime states */
@ -120,6 +128,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs);
int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode,
u32 v_retval, u8 *msg, u16 msglen);
int i40e_vc_process_vflr_event(struct i40e_pf *pf);
void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf);
bool i40e_reset_vf(struct i40e_vf *vf, bool flr);
bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr);
void i40e_vc_notify_vf_reset(struct i40e_vf *vf);

View File

@ -8,4 +8,4 @@ obj-$(CONFIG_IGB) += igb.o
igb-y := igb_main.o igb_ethtool.o e1000_82575.o \
e1000_mac.o e1000_nvm.o e1000_phy.o e1000_mbx.o \
e1000_i210.o igb_ptp.o igb_hwmon.o
e1000_i210.o igb_ptp.o igb_hwmon.o igb_xsk.o

View File

@ -18,8 +18,10 @@
#include <linux/i2c-algo-bit.h>
#include <linux/pci.h>
#include <linux/mdio.h>
#include <linux/lockdep.h>
#include <net/xdp.h>
#include <net/xdp_sock_drv.h>
struct igb_adapter;
@ -86,6 +88,7 @@ struct igb_adapter;
#define IGB_XDP_CONSUMED BIT(0)
#define IGB_XDP_TX BIT(1)
#define IGB_XDP_REDIR BIT(2)
#define IGB_XDP_EXIT BIT(3)
struct vf_data_storage {
unsigned char vf_mac_addresses[ETH_ALEN];
@ -255,6 +258,7 @@ enum igb_tx_flags {
enum igb_tx_buf_type {
IGB_TYPE_SKB = 0,
IGB_TYPE_XDP,
IGB_TYPE_XSK
};
/* wrapper around a pointer to a socket buffer,
@ -320,6 +324,7 @@ struct igb_ring {
union { /* array of buffer info structs */
struct igb_tx_buffer *tx_buffer_info;
struct igb_rx_buffer *rx_buffer_info;
struct xdp_buff **rx_buffer_info_zc;
};
void *desc; /* descriptor ring memory */
unsigned long flags; /* ring specific flags */
@ -357,6 +362,7 @@ struct igb_ring {
};
};
struct xdp_rxq_info xdp_rxq;
struct xsk_buff_pool *xsk_pool;
} ____cacheline_internodealigned_in_smp;
struct igb_q_vector {
@ -384,7 +390,8 @@ enum e1000_ring_flags_t {
IGB_RING_FLAG_RX_SCTP_CSUM,
IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
IGB_RING_FLAG_TX_CTX_IDX,
IGB_RING_FLAG_TX_DETECT_HANG
IGB_RING_FLAG_TX_DETECT_HANG,
IGB_RING_FLAG_TX_DISABLED
};
#define ring_uses_large_buffer(ring) \
@ -731,12 +738,21 @@ int igb_setup_tx_resources(struct igb_ring *);
int igb_setup_rx_resources(struct igb_ring *);
void igb_free_tx_resources(struct igb_ring *);
void igb_free_rx_resources(struct igb_ring *);
void igb_clean_tx_ring(struct igb_ring *tx_ring);
void igb_clean_rx_ring(struct igb_ring *rx_ring);
void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *);
void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *);
void igb_finalize_xdp(struct igb_adapter *adapter, unsigned int status);
void igb_update_rx_stats(struct igb_q_vector *q_vector, unsigned int packets,
unsigned int bytes);
void igb_setup_tctl(struct igb_adapter *);
void igb_setup_rctl(struct igb_adapter *);
void igb_setup_srrctl(struct igb_adapter *, struct igb_ring *);
netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *);
int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp);
void igb_process_skb_fields(struct igb_ring *rx_ring,
union e1000_adv_rx_desc *rx_desc,
struct sk_buff *skb);
void igb_alloc_rx_buffers(struct igb_ring *, u16);
void igb_update_stats(struct igb_adapter *);
bool igb_has_link(struct igb_adapter *adapter);
@ -797,6 +813,33 @@ static inline struct netdev_queue *txring_txq(const struct igb_ring *tx_ring)
return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index);
}
/* This function assumes __netif_tx_lock is held by the caller. */
static inline void igb_xdp_ring_update_tail(struct igb_ring *ring)
{
lockdep_assert_held(&txring_txq(ring)->_xmit_lock);
/* Force memory writes to complete before letting h/w know there
* are new descriptors to fetch.
*/
wmb();
writel(ring->next_to_use, ring->tail);
}
static inline struct igb_ring *igb_xdp_tx_queue_mapping(struct igb_adapter *adapter)
{
unsigned int r_idx = smp_processor_id();
if (r_idx >= adapter->num_tx_queues)
r_idx = r_idx % adapter->num_tx_queues;
return adapter->tx_ring[r_idx];
}
static inline bool igb_xdp_is_enabled(struct igb_adapter *adapter)
{
return !!READ_ONCE(adapter->xdp_prog);
}
int igb_add_filter(struct igb_adapter *adapter,
struct igb_nfc_filter *input);
int igb_erase_filter(struct igb_adapter *adapter,
@ -807,4 +850,17 @@ int igb_add_mac_steering_filter(struct igb_adapter *adapter,
int igb_del_mac_steering_filter(struct igb_adapter *adapter,
const u8 *addr, u8 queue, u8 flags);
struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter,
struct igb_ring *ring);
int igb_xsk_pool_setup(struct igb_adapter *adapter,
struct xsk_buff_pool *pool,
u16 qid);
bool igb_alloc_rx_buffers_zc(struct igb_ring *rx_ring,
struct xsk_buff_pool *xsk_pool, u16 count);
void igb_clean_rx_ring_zc(struct igb_ring *rx_ring);
int igb_clean_rx_irq_zc(struct igb_q_vector *q_vector,
struct xsk_buff_pool *xsk_pool, const int budget);
bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool);
int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
#endif /* _IGB_H_ */

View File

@ -33,7 +33,6 @@
#include <linux/bpf_trace.h>
#include <linux/pm_runtime.h>
#include <linux/etherdevice.h>
#include <linux/lockdep.h>
#ifdef CONFIG_IGB_DCA
#include <linux/dca.h>
#endif
@ -116,8 +115,6 @@ static void igb_configure_tx(struct igb_adapter *);
static void igb_configure_rx(struct igb_adapter *);
static void igb_clean_all_tx_rings(struct igb_adapter *);
static void igb_clean_all_rx_rings(struct igb_adapter *);
static void igb_clean_tx_ring(struct igb_ring *);
static void igb_clean_rx_ring(struct igb_ring *);
static void igb_set_rx_mode(struct net_device *);
static void igb_update_phy_info(struct timer_list *);
static void igb_watchdog(struct timer_list *);
@ -475,12 +472,17 @@ rx_ring_summary:
for (i = 0; i < rx_ring->count; i++) {
const char *next_desc;
struct igb_rx_buffer *buffer_info;
buffer_info = &rx_ring->rx_buffer_info[i];
dma_addr_t dma = (dma_addr_t)0;
struct igb_rx_buffer *buffer_info = NULL;
rx_desc = IGB_RX_DESC(rx_ring, i);
u0 = (struct my_u0 *)rx_desc;
staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
if (!rx_ring->xsk_pool) {
buffer_info = &rx_ring->rx_buffer_info[i];
dma = buffer_info->dma;
}
if (i == rx_ring->next_to_use)
next_desc = " NTU";
else if (i == rx_ring->next_to_clean)
@ -500,11 +502,11 @@ rx_ring_summary:
"R ", i,
le64_to_cpu(u0->a),
le64_to_cpu(u0->b),
(u64)buffer_info->dma,
(u64)dma,
next_desc);
if (netif_msg_pktdata(adapter) &&
buffer_info->dma && buffer_info->page) {
buffer_info && dma && buffer_info->page) {
print_hex_dump(KERN_INFO, "",
DUMP_PREFIX_ADDRESS,
16, 1,
@ -1990,7 +1992,11 @@ static void igb_configure(struct igb_adapter *adapter)
*/
for (i = 0; i < adapter->num_rx_queues; i++) {
struct igb_ring *ring = adapter->rx_ring[i];
igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
if (ring->xsk_pool)
igb_alloc_rx_buffers_zc(ring, ring->xsk_pool,
igb_desc_unused(ring));
else
igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
}
}
@ -2911,37 +2917,20 @@ static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf)
static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
{
struct igb_adapter *adapter = netdev_priv(dev);
switch (xdp->command) {
case XDP_SETUP_PROG:
return igb_xdp_setup(dev, xdp);
case XDP_SETUP_XSK_POOL:
return igb_xsk_pool_setup(adapter, xdp->xsk.pool,
xdp->xsk.queue_id);
default:
return -EINVAL;
}
}
/* This function assumes __netif_tx_lock is held by the caller. */
static void igb_xdp_ring_update_tail(struct igb_ring *ring)
{
lockdep_assert_held(&txring_txq(ring)->_xmit_lock);
/* Force memory writes to complete before letting h/w know there
* are new descriptors to fetch.
*/
wmb();
writel(ring->next_to_use, ring->tail);
}
static struct igb_ring *igb_xdp_tx_queue_mapping(struct igb_adapter *adapter)
{
unsigned int r_idx = smp_processor_id();
if (r_idx >= adapter->num_tx_queues)
r_idx = r_idx % adapter->num_tx_queues;
return adapter->tx_ring[r_idx];
}
static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp)
int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp)
{
struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
int cpu = smp_processor_id();
@ -2955,7 +2944,8 @@ static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp)
/* During program transitions its possible adapter->xdp_prog is assigned
* but ring has not been configured yet. In this case simply abort xmit.
*/
tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL;
tx_ring = igb_xdp_is_enabled(adapter) ?
igb_xdp_tx_queue_mapping(adapter) : NULL;
if (unlikely(!tx_ring))
return IGB_XDP_CONSUMED;
@ -2988,10 +2978,14 @@ static int igb_xdp_xmit(struct net_device *dev, int n,
/* During program transitions its possible adapter->xdp_prog is assigned
* but ring has not been configured yet. In this case simply abort xmit.
*/
tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL;
tx_ring = igb_xdp_is_enabled(adapter) ?
igb_xdp_tx_queue_mapping(adapter) : NULL;
if (unlikely(!tx_ring))
return -ENXIO;
if (unlikely(test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags)))
return -ENXIO;
nq = txring_txq(tx_ring);
__netif_tx_lock(nq, cpu);
@ -3042,6 +3036,7 @@ static const struct net_device_ops igb_netdev_ops = {
.ndo_setup_tc = igb_setup_tc,
.ndo_bpf = igb_xdp,
.ndo_xdp_xmit = igb_xdp_xmit,
.ndo_xsk_wakeup = igb_xsk_wakeup,
};
/**
@ -3338,7 +3333,8 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->priv_flags |= IFF_SUPP_NOFCS;
netdev->priv_flags |= IFF_UNICAST_FLT;
netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT;
netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_XSK_ZEROCOPY;
/* MTU range: 68 - 9216 */
netdev->min_mtu = ETH_MIN_MTU;
@ -4364,6 +4360,8 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
u64 tdba = ring->dma;
int reg_idx = ring->reg_idx;
WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring));
wr32(E1000_TDLEN(reg_idx),
ring->count * sizeof(union e1000_adv_tx_desc));
wr32(E1000_TDBAL(reg_idx),
@ -4424,7 +4422,8 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)
if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
rx_ring->queue_index, 0);
rx_ring->queue_index,
rx_ring->q_vector->napi.napi_id);
if (res < 0) {
dev_err(dev, "Failed to register xdp_rxq index %u\n",
rx_ring->queue_index);
@ -4720,12 +4719,17 @@ void igb_setup_srrctl(struct igb_adapter *adapter, struct igb_ring *ring)
struct e1000_hw *hw = &adapter->hw;
int reg_idx = ring->reg_idx;
u32 srrctl = 0;
u32 buf_size;
if (ring->xsk_pool)
buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool);
else if (ring_uses_large_buffer(ring))
buf_size = IGB_RXBUFFER_3072;
else
buf_size = IGB_RXBUFFER_2048;
srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
if (ring_uses_large_buffer(ring))
srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
else
srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
srrctl |= buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT;
srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
if (hw->mac.type >= e1000_82580)
srrctl |= E1000_SRRCTL_TIMESTAMP;
@ -4757,8 +4761,17 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
u32 rxdctl = 0;
xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED, NULL));
WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring));
if (ring->xsk_pool) {
WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_XSK_BUFF_POOL,
NULL));
xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
} else {
WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED,
NULL));
}
/* disable the queue */
wr32(E1000_RXDCTL(reg_idx), 0);
@ -4785,9 +4798,12 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
rxdctl |= IGB_RX_HTHRESH << 8;
rxdctl |= IGB_RX_WTHRESH << 16;
/* initialize rx_buffer_info */
memset(ring->rx_buffer_info, 0,
sizeof(struct igb_rx_buffer) * ring->count);
if (ring->xsk_pool)
memset(ring->rx_buffer_info_zc, 0,
sizeof(*ring->rx_buffer_info_zc) * ring->count);
else
memset(ring->rx_buffer_info, 0,
sizeof(*ring->rx_buffer_info) * ring->count);
/* initialize Rx descriptor 0 */
rx_desc = IGB_RX_DESC(ring, 0);
@ -4888,19 +4904,24 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter)
* igb_clean_tx_ring - Free Tx Buffers
* @tx_ring: ring to be cleaned
**/
static void igb_clean_tx_ring(struct igb_ring *tx_ring)
void igb_clean_tx_ring(struct igb_ring *tx_ring)
{
u16 i = tx_ring->next_to_clean;
struct igb_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
u32 xsk_frames = 0;
while (i != tx_ring->next_to_use) {
union e1000_adv_tx_desc *eop_desc, *tx_desc;
/* Free all the Tx ring sk_buffs or xdp frames */
if (tx_buffer->type == IGB_TYPE_SKB)
if (tx_buffer->type == IGB_TYPE_SKB) {
dev_kfree_skb_any(tx_buffer->skb);
else
} else if (tx_buffer->type == IGB_TYPE_XDP) {
xdp_return_frame(tx_buffer->xdpf);
} else if (tx_buffer->type == IGB_TYPE_XSK) {
xsk_frames++;
goto skip_for_xsk;
}
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@ -4931,6 +4952,7 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring)
DMA_TO_DEVICE);
}
skip_for_xsk:
tx_buffer->next_to_watch = NULL;
/* move us one more past the eop_desc for start of next pkt */
@ -4945,6 +4967,9 @@ static void igb_clean_tx_ring(struct igb_ring *tx_ring)
/* reset BQL for queue */
netdev_tx_reset_queue(txring_txq(tx_ring));
if (tx_ring->xsk_pool && xsk_frames)
xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
/* reset next_to_use and next_to_clean */
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
@ -4975,8 +5000,13 @@ void igb_free_rx_resources(struct igb_ring *rx_ring)
rx_ring->xdp_prog = NULL;
xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
vfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL;
if (rx_ring->xsk_pool) {
vfree(rx_ring->rx_buffer_info_zc);
rx_ring->rx_buffer_info_zc = NULL;
} else {
vfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL;
}
/* if not set, then don't free */
if (!rx_ring->desc)
@ -5007,13 +5037,18 @@ static void igb_free_all_rx_resources(struct igb_adapter *adapter)
* igb_clean_rx_ring - Free Rx Buffers per Queue
* @rx_ring: ring to free buffers from
**/
static void igb_clean_rx_ring(struct igb_ring *rx_ring)
void igb_clean_rx_ring(struct igb_ring *rx_ring)
{
u16 i = rx_ring->next_to_clean;
dev_kfree_skb(rx_ring->skb);
rx_ring->skb = NULL;
if (rx_ring->xsk_pool) {
igb_clean_rx_ring_zc(rx_ring);
goto skip_for_xsk;
}
/* Free all the Rx ring sk_buffs */
while (i != rx_ring->next_to_alloc) {
struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
@ -5041,6 +5076,7 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring)
i = 0;
}
skip_for_xsk:
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
@ -6467,6 +6503,9 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
return NETDEV_TX_BUSY;
}
if (unlikely(test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags)))
return NETDEV_TX_BUSY;
/* record the location of the first descriptor for this packet */
first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
first->type = IGB_TYPE_SKB;
@ -6622,7 +6661,7 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
struct igb_adapter *adapter = netdev_priv(netdev);
int max_frame = new_mtu + IGB_ETH_PKT_HDR_PAD;
if (adapter->xdp_prog) {
if (igb_xdp_is_enabled(adapter)) {
int i;
for (i = 0; i < adapter->num_rx_queues; i++) {
@ -8195,6 +8234,7 @@ static int igb_poll(struct napi_struct *napi, int budget)
struct igb_q_vector *q_vector = container_of(napi,
struct igb_q_vector,
napi);
struct xsk_buff_pool *xsk_pool;
bool clean_complete = true;
int work_done = 0;
@ -8206,7 +8246,12 @@ static int igb_poll(struct napi_struct *napi, int budget)
clean_complete = igb_clean_tx_irq(q_vector, budget);
if (q_vector->rx.ring) {
int cleaned = igb_clean_rx_irq(q_vector, budget);
int cleaned;
xsk_pool = READ_ONCE(q_vector->rx.ring->xsk_pool);
cleaned = xsk_pool ?
igb_clean_rx_irq_zc(q_vector, xsk_pool, budget) :
igb_clean_rx_irq(q_vector, budget);
work_done += cleaned;
if (cleaned >= budget)
@ -8235,13 +8280,18 @@ static int igb_poll(struct napi_struct *napi, int budget)
**/
static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
{
struct igb_adapter *adapter = q_vector->adapter;
struct igb_ring *tx_ring = q_vector->tx.ring;
struct igb_tx_buffer *tx_buffer;
union e1000_adv_tx_desc *tx_desc;
unsigned int total_bytes = 0, total_packets = 0;
struct igb_adapter *adapter = q_vector->adapter;
unsigned int budget = q_vector->tx.work_limit;
struct igb_ring *tx_ring = q_vector->tx.ring;
unsigned int i = tx_ring->next_to_clean;
union e1000_adv_tx_desc *tx_desc;
struct igb_tx_buffer *tx_buffer;
struct xsk_buff_pool *xsk_pool;
int cpu = smp_processor_id();
bool xsk_xmit_done = true;
struct netdev_queue *nq;
u32 xsk_frames = 0;
if (test_bit(__IGB_DOWN, &adapter->state))
return true;
@ -8272,10 +8322,14 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
total_packets += tx_buffer->gso_segs;
/* free the skb */
if (tx_buffer->type == IGB_TYPE_SKB)
if (tx_buffer->type == IGB_TYPE_SKB) {
napi_consume_skb(tx_buffer->skb, napi_budget);
else
} else if (tx_buffer->type == IGB_TYPE_XDP) {
xdp_return_frame(tx_buffer->xdpf);
} else if (tx_buffer->type == IGB_TYPE_XSK) {
xsk_frames++;
goto skip_for_xsk;
}
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@ -8307,6 +8361,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
}
}
skip_for_xsk:
/* move us one more past the eop_desc for start of next pkt */
tx_buffer++;
tx_desc++;
@ -8335,6 +8390,21 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
q_vector->tx.total_bytes += total_bytes;
q_vector->tx.total_packets += total_packets;
xsk_pool = READ_ONCE(tx_ring->xsk_pool);
if (xsk_pool) {
if (xsk_frames)
xsk_tx_completed(xsk_pool, xsk_frames);
if (xsk_uses_need_wakeup(xsk_pool))
xsk_set_tx_need_wakeup(xsk_pool);
nq = txring_txq(tx_ring);
__netif_tx_lock(nq, cpu);
/* Avoid transmit queue timeout since we share it with the slow path */
txq_trans_cond_update(nq);
xsk_xmit_done = igb_xmit_zc(tx_ring, xsk_pool);
__netif_tx_unlock(nq);
}
if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
struct e1000_hw *hw = &adapter->hw;
@ -8397,7 +8467,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector, int napi_budget)
}
}
return !!budget;
return !!budget && xsk_xmit_done;
}
/**
@ -8588,9 +8658,8 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
return skb;
}
static struct sk_buff *igb_run_xdp(struct igb_adapter *adapter,
struct igb_ring *rx_ring,
struct xdp_buff *xdp)
static int igb_run_xdp(struct igb_adapter *adapter, struct igb_ring *rx_ring,
struct xdp_buff *xdp)
{
int err, result = IGB_XDP_PASS;
struct bpf_prog *xdp_prog;
@ -8630,7 +8699,7 @@ out_failure:
break;
}
xdp_out:
return ERR_PTR(-result);
return result;
}
static unsigned int igb_rx_frame_truesize(struct igb_ring *rx_ring,
@ -8756,10 +8825,6 @@ static bool igb_cleanup_headers(struct igb_ring *rx_ring,
union e1000_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
/* XDP packets use error pointer so abort at this point */
if (IS_ERR(skb))
return true;
if (unlikely((igb_test_staterr(rx_desc,
E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) {
struct net_device *netdev = rx_ring->netdev;
@ -8786,9 +8851,9 @@ static bool igb_cleanup_headers(struct igb_ring *rx_ring,
* order to populate the hash, checksum, VLAN, timestamp, protocol, and
* other fields within the skb.
**/
static void igb_process_skb_fields(struct igb_ring *rx_ring,
union e1000_adv_rx_desc *rx_desc,
struct sk_buff *skb)
void igb_process_skb_fields(struct igb_ring *rx_ring,
union e1000_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
struct net_device *dev = rx_ring->netdev;
@ -8870,6 +8935,38 @@ static void igb_put_rx_buffer(struct igb_ring *rx_ring,
rx_buffer->page = NULL;
}
void igb_finalize_xdp(struct igb_adapter *adapter, unsigned int status)
{
int cpu = smp_processor_id();
struct netdev_queue *nq;
if (status & IGB_XDP_REDIR)
xdp_do_flush();
if (status & IGB_XDP_TX) {
struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
nq = txring_txq(tx_ring);
__netif_tx_lock(nq, cpu);
igb_xdp_ring_update_tail(tx_ring);
__netif_tx_unlock(nq);
}
}
void igb_update_rx_stats(struct igb_q_vector *q_vector, unsigned int packets,
unsigned int bytes)
{
struct igb_ring *ring = q_vector->rx.ring;
u64_stats_update_begin(&ring->rx_syncp);
ring->rx_stats.packets += packets;
ring->rx_stats.bytes += bytes;
u64_stats_update_end(&ring->rx_syncp);
q_vector->rx.total_packets += packets;
q_vector->rx.total_bytes += bytes;
}
static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
{
unsigned int total_bytes = 0, total_packets = 0;
@ -8877,12 +8974,11 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
struct igb_ring *rx_ring = q_vector->rx.ring;
u16 cleaned_count = igb_desc_unused(rx_ring);
struct sk_buff *skb = rx_ring->skb;
int cpu = smp_processor_id();
unsigned int xdp_xmit = 0;
struct netdev_queue *nq;
struct xdp_buff xdp;
u32 frame_sz = 0;
int rx_buf_pgcnt;
int xdp_res = 0;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
@ -8940,12 +9036,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = igb_rx_frame_truesize(rx_ring, size);
#endif
skb = igb_run_xdp(adapter, rx_ring, &xdp);
xdp_res = igb_run_xdp(adapter, rx_ring, &xdp);
}
if (IS_ERR(skb)) {
unsigned int xdp_res = -PTR_ERR(skb);
if (xdp_res) {
if (xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR)) {
xdp_xmit |= xdp_res;
igb_rx_buffer_flip(rx_ring, rx_buffer, size);
@ -8964,7 +9058,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
&xdp, timestamp);
/* exit if we failed to retrieve a buffer */
if (!skb) {
if (!xdp_res && !skb) {
rx_ring->rx_stats.alloc_failed++;
rx_buffer->pagecnt_bias++;
break;
@ -8978,7 +9072,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
continue;
/* verify the packet layout is correct */
if (igb_cleanup_headers(rx_ring, rx_desc, skb)) {
if (xdp_res || igb_cleanup_headers(rx_ring, rx_desc, skb)) {
skb = NULL;
continue;
}
@ -9001,24 +9095,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
/* place incomplete frames back on ring for completion */
rx_ring->skb = skb;
if (xdp_xmit & IGB_XDP_REDIR)
xdp_do_flush();
if (xdp_xmit)
igb_finalize_xdp(adapter, xdp_xmit);
if (xdp_xmit & IGB_XDP_TX) {
struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
nq = txring_txq(tx_ring);
__netif_tx_lock(nq, cpu);
igb_xdp_ring_update_tail(tx_ring);
__netif_tx_unlock(nq);
}
u64_stats_update_begin(&rx_ring->rx_syncp);
rx_ring->rx_stats.packets += total_packets;
rx_ring->rx_stats.bytes += total_bytes;
u64_stats_update_end(&rx_ring->rx_syncp);
q_vector->rx.total_packets += total_packets;
q_vector->rx.total_bytes += total_bytes;
igb_update_rx_stats(q_vector, total_packets, total_bytes);
if (cleaned_count)
igb_alloc_rx_buffers(rx_ring, cleaned_count);

View File

@ -0,0 +1,562 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2018 Intel Corporation. */
#include <linux/bpf_trace.h>
#include <net/xdp_sock_drv.h>
#include <net/xdp.h>
#include "e1000_hw.h"
#include "igb.h"
static int igb_realloc_rx_buffer_info(struct igb_ring *ring, bool pool_present)
{
int size = pool_present ?
sizeof(*ring->rx_buffer_info_zc) * ring->count :
sizeof(*ring->rx_buffer_info) * ring->count;
void *buff_info = vmalloc(size);
if (!buff_info)
return -ENOMEM;
if (pool_present) {
vfree(ring->rx_buffer_info);
ring->rx_buffer_info = NULL;
ring->rx_buffer_info_zc = buff_info;
} else {
vfree(ring->rx_buffer_info_zc);
ring->rx_buffer_info_zc = NULL;
ring->rx_buffer_info = buff_info;
}
return 0;
}
static void igb_txrx_ring_disable(struct igb_adapter *adapter, u16 qid)
{
struct igb_ring *tx_ring = adapter->tx_ring[qid];
struct igb_ring *rx_ring = adapter->rx_ring[qid];
struct e1000_hw *hw = &adapter->hw;
set_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags);
wr32(E1000_TXDCTL(tx_ring->reg_idx), 0);
wr32(E1000_RXDCTL(rx_ring->reg_idx), 0);
synchronize_net();
/* Rx/Tx share the same napi context. */
napi_disable(&rx_ring->q_vector->napi);
igb_clean_tx_ring(tx_ring);
igb_clean_rx_ring(rx_ring);
memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats));
memset(&tx_ring->tx_stats, 0, sizeof(tx_ring->tx_stats));
}
static void igb_txrx_ring_enable(struct igb_adapter *adapter, u16 qid)
{
struct igb_ring *tx_ring = adapter->tx_ring[qid];
struct igb_ring *rx_ring = adapter->rx_ring[qid];
igb_configure_tx_ring(adapter, tx_ring);
igb_configure_rx_ring(adapter, rx_ring);
synchronize_net();
clear_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags);
/* call igb_desc_unused which always leaves
* at least 1 descriptor unused to make sure
* next_to_use != next_to_clean
*/
if (rx_ring->xsk_pool)
igb_alloc_rx_buffers_zc(rx_ring, rx_ring->xsk_pool,
igb_desc_unused(rx_ring));
else
igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring));
/* Rx/Tx share the same napi context. */
napi_enable(&rx_ring->q_vector->napi);
}
struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter,
struct igb_ring *ring)
{
int qid = ring->queue_index;
struct xsk_buff_pool *pool;
pool = xsk_get_pool_from_qid(adapter->netdev, qid);
if (!igb_xdp_is_enabled(adapter))
return NULL;
return (pool && pool->dev) ? pool : NULL;
}
static int igb_xsk_pool_enable(struct igb_adapter *adapter,
struct xsk_buff_pool *pool,
u16 qid)
{
struct net_device *netdev = adapter->netdev;
struct igb_ring *rx_ring;
bool if_running;
int err;
if (qid >= adapter->num_rx_queues)
return -EINVAL;
if (qid >= netdev->real_num_rx_queues ||
qid >= netdev->real_num_tx_queues)
return -EINVAL;
err = xsk_pool_dma_map(pool, &adapter->pdev->dev, IGB_RX_DMA_ATTR);
if (err)
return err;
rx_ring = adapter->rx_ring[qid];
if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter);
if (if_running)
igb_txrx_ring_disable(adapter, qid);
if (if_running) {
err = igb_realloc_rx_buffer_info(rx_ring, true);
if (!err) {
igb_txrx_ring_enable(adapter, qid);
/* Kick start the NAPI context so that receiving will start */
err = igb_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX);
}
if (err) {
xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR);
return err;
}
}
return 0;
}
static int igb_xsk_pool_disable(struct igb_adapter *adapter, u16 qid)
{
struct xsk_buff_pool *pool;
struct igb_ring *rx_ring;
bool if_running;
int err;
pool = xsk_get_pool_from_qid(adapter->netdev, qid);
if (!pool)
return -EINVAL;
rx_ring = adapter->rx_ring[qid];
if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter);
if (if_running)
igb_txrx_ring_disable(adapter, qid);
xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR);
if (if_running) {
err = igb_realloc_rx_buffer_info(rx_ring, false);
if (err)
return err;
igb_txrx_ring_enable(adapter, qid);
}
return 0;
}
int igb_xsk_pool_setup(struct igb_adapter *adapter,
struct xsk_buff_pool *pool,
u16 qid)
{
return pool ? igb_xsk_pool_enable(adapter, pool, qid) :
igb_xsk_pool_disable(adapter, qid);
}
static u16 igb_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp,
union e1000_adv_rx_desc *rx_desc, u16 count)
{
dma_addr_t dma;
u16 buffs;
int i;
/* nothing to do */
if (!count)
return 0;
buffs = xsk_buff_alloc_batch(pool, xdp, count);
for (i = 0; i < buffs; i++) {
dma = xsk_buff_xdp_get_dma(*xdp);
rx_desc->read.pkt_addr = cpu_to_le64(dma);
rx_desc->wb.upper.length = 0;
rx_desc++;
xdp++;
}
return buffs;
}
bool igb_alloc_rx_buffers_zc(struct igb_ring *rx_ring,
struct xsk_buff_pool *xsk_pool, u16 count)
{
u32 nb_buffs_extra = 0, nb_buffs = 0;
union e1000_adv_rx_desc *rx_desc;
u16 ntu = rx_ring->next_to_use;
u16 total_count = count;
struct xdp_buff **xdp;
rx_desc = IGB_RX_DESC(rx_ring, ntu);
xdp = &rx_ring->rx_buffer_info_zc[ntu];
if (ntu + count >= rx_ring->count) {
nb_buffs_extra = igb_fill_rx_descs(xsk_pool, xdp, rx_desc,
rx_ring->count - ntu);
if (nb_buffs_extra != rx_ring->count - ntu) {
ntu += nb_buffs_extra;
goto exit;
}
rx_desc = IGB_RX_DESC(rx_ring, 0);
xdp = rx_ring->rx_buffer_info_zc;
ntu = 0;
count -= nb_buffs_extra;
}
nb_buffs = igb_fill_rx_descs(xsk_pool, xdp, rx_desc, count);
ntu += nb_buffs;
if (ntu == rx_ring->count)
ntu = 0;
/* clear the length for the next_to_use descriptor */
rx_desc = IGB_RX_DESC(rx_ring, ntu);
rx_desc->wb.upper.length = 0;
exit:
if (rx_ring->next_to_use != ntu) {
rx_ring->next_to_use = ntu;
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
wmb();
writel(ntu, rx_ring->tail);
}
return total_count == (nb_buffs + nb_buffs_extra);
}
void igb_clean_rx_ring_zc(struct igb_ring *rx_ring)
{
u16 ntc = rx_ring->next_to_clean;
u16 ntu = rx_ring->next_to_use;
while (ntc != ntu) {
struct xdp_buff *xdp = rx_ring->rx_buffer_info_zc[ntc];
xsk_buff_free(xdp);
ntc++;
if (ntc >= rx_ring->count)
ntc = 0;
}
}
static struct sk_buff *igb_construct_skb_zc(struct igb_ring *rx_ring,
struct xdp_buff *xdp,
ktime_t timestamp)
{
unsigned int totalsize = xdp->data_end - xdp->data_meta;
unsigned int metasize = xdp->data - xdp->data_meta;
struct sk_buff *skb;
net_prefetch(xdp->data_meta);
/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi, totalsize);
if (unlikely(!skb))
return NULL;
if (timestamp)
skb_hwtstamps(skb)->hwtstamp = timestamp;
memcpy(__skb_put(skb, totalsize), xdp->data_meta,
ALIGN(totalsize, sizeof(long)));
if (metasize) {
skb_metadata_set(skb, metasize);
__skb_pull(skb, metasize);
}
return skb;
}
static int igb_run_xdp_zc(struct igb_adapter *adapter, struct igb_ring *rx_ring,
struct xdp_buff *xdp, struct xsk_buff_pool *xsk_pool,
struct bpf_prog *xdp_prog)
{
int err, result = IGB_XDP_PASS;
u32 act;
prefetchw(xdp->data_hard_start); /* xdp_frame write */
act = bpf_prog_run_xdp(xdp_prog, xdp);
if (likely(act == XDP_REDIRECT)) {
err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
if (!err)
return IGB_XDP_REDIR;
if (xsk_uses_need_wakeup(xsk_pool) &&
err == -ENOBUFS)
result = IGB_XDP_EXIT;
else
result = IGB_XDP_CONSUMED;
goto out_failure;
}
switch (act) {
case XDP_PASS:
break;
case XDP_TX:
result = igb_xdp_xmit_back(adapter, xdp);
if (result == IGB_XDP_CONSUMED)
goto out_failure;
break;
default:
bpf_warn_invalid_xdp_action(adapter->netdev, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
out_failure:
trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
fallthrough;
case XDP_DROP:
result = IGB_XDP_CONSUMED;
break;
}
return result;
}
int igb_clean_rx_irq_zc(struct igb_q_vector *q_vector,
struct xsk_buff_pool *xsk_pool, const int budget)
{
struct igb_adapter *adapter = q_vector->adapter;
unsigned int total_bytes = 0, total_packets = 0;
struct igb_ring *rx_ring = q_vector->rx.ring;
u32 ntc = rx_ring->next_to_clean;
struct bpf_prog *xdp_prog;
unsigned int xdp_xmit = 0;
bool failure = false;
u16 entries_to_alloc;
struct sk_buff *skb;
/* xdp_prog cannot be NULL in the ZC path */
xdp_prog = READ_ONCE(rx_ring->xdp_prog);
while (likely(total_packets < budget)) {
union e1000_adv_rx_desc *rx_desc;
ktime_t timestamp = 0;
struct xdp_buff *xdp;
unsigned int size;
int xdp_res = 0;
rx_desc = IGB_RX_DESC(rx_ring, ntc);
size = le16_to_cpu(rx_desc->wb.upper.length);
if (!size)
break;
/* This memory barrier is needed to keep us from reading
* any other fields out of the rx_desc until we know the
* descriptor has been written back
*/
dma_rmb();
xdp = rx_ring->rx_buffer_info_zc[ntc];
xsk_buff_set_size(xdp, size);
xsk_buff_dma_sync_for_cpu(xdp);
/* pull rx packet timestamp if available and valid */
if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
int ts_hdr_len;
ts_hdr_len = igb_ptp_rx_pktstamp(rx_ring->q_vector,
xdp->data,
&timestamp);
xdp->data += ts_hdr_len;
xdp->data_meta += ts_hdr_len;
size -= ts_hdr_len;
}
xdp_res = igb_run_xdp_zc(adapter, rx_ring, xdp, xsk_pool,
xdp_prog);
if (xdp_res) {
if (likely(xdp_res & (IGB_XDP_TX | IGB_XDP_REDIR))) {
xdp_xmit |= xdp_res;
} else if (xdp_res == IGB_XDP_EXIT) {
failure = true;
break;
} else if (xdp_res == IGB_XDP_CONSUMED) {
xsk_buff_free(xdp);
}
total_packets++;
total_bytes += size;
ntc++;
if (ntc == rx_ring->count)
ntc = 0;
continue;
}
skb = igb_construct_skb_zc(rx_ring, xdp, timestamp);
/* exit if we failed to retrieve a buffer */
if (!skb) {
rx_ring->rx_stats.alloc_failed++;
break;
}
xsk_buff_free(xdp);
ntc++;
if (ntc == rx_ring->count)
ntc = 0;
if (eth_skb_pad(skb))
continue;
/* probably a little skewed due to removing CRC */
total_bytes += skb->len;
/* populate checksum, timestamp, VLAN, and protocol */
igb_process_skb_fields(rx_ring, rx_desc, skb);
napi_gro_receive(&q_vector->napi, skb);
/* update budget accounting */
total_packets++;
}
rx_ring->next_to_clean = ntc;
if (xdp_xmit)
igb_finalize_xdp(adapter, xdp_xmit);
igb_update_rx_stats(q_vector, total_packets, total_bytes);
entries_to_alloc = igb_desc_unused(rx_ring);
if (entries_to_alloc >= IGB_RX_BUFFER_WRITE)
failure |= !igb_alloc_rx_buffers_zc(rx_ring, xsk_pool,
entries_to_alloc);
if (xsk_uses_need_wakeup(xsk_pool)) {
if (failure || rx_ring->next_to_clean == rx_ring->next_to_use)
xsk_set_rx_need_wakeup(xsk_pool);
else
xsk_clear_rx_need_wakeup(xsk_pool);
return (int)total_packets;
}
return failure ? budget : (int)total_packets;
}
bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool)
{
unsigned int budget = igb_desc_unused(tx_ring);
u32 cmd_type, olinfo_status, nb_pkts, i = 0;
struct xdp_desc *descs = xsk_pool->tx_descs;
union e1000_adv_tx_desc *tx_desc = NULL;
struct igb_tx_buffer *tx_buffer_info;
unsigned int total_bytes = 0;
dma_addr_t dma;
if (!netif_carrier_ok(tx_ring->netdev))
return true;
if (test_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags))
return true;
nb_pkts = xsk_tx_peek_release_desc_batch(xsk_pool, budget);
if (!nb_pkts)
return true;
while (nb_pkts-- > 0) {
dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr);
xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, descs[i].len);
tx_buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
tx_buffer_info->bytecount = descs[i].len;
tx_buffer_info->type = IGB_TYPE_XSK;
tx_buffer_info->xdpf = NULL;
tx_buffer_info->gso_segs = 1;
tx_buffer_info->time_stamp = jiffies;
tx_desc = IGB_TX_DESC(tx_ring, tx_ring->next_to_use);
tx_desc->read.buffer_addr = cpu_to_le64(dma);
/* put descriptor type bits */
cmd_type = E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_DEXT |
E1000_ADVTXD_DCMD_IFCS;
olinfo_status = descs[i].len << E1000_ADVTXD_PAYLEN_SHIFT;
/* FIXME: This sets the Report Status (RS) bit for every
* descriptor. One nice to have optimization would be to set it
* only for the last descriptor in the whole batch. See Intel
* ice driver for an example on how to do it.
*/
cmd_type |= descs[i].len | IGB_TXD_DCMD;
tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
total_bytes += descs[i].len;
i++;
tx_ring->next_to_use++;
tx_buffer_info->next_to_watch = tx_desc;
if (tx_ring->next_to_use == tx_ring->count)
tx_ring->next_to_use = 0;
}
netdev_tx_sent_queue(txring_txq(tx_ring), total_bytes);
igb_xdp_ring_update_tail(tx_ring);
return nb_pkts < budget;
}
int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
{
struct igb_adapter *adapter = netdev_priv(dev);
struct e1000_hw *hw = &adapter->hw;
struct igb_ring *ring;
u32 eics = 0;
if (test_bit(__IGB_DOWN, &adapter->state))
return -ENETDOWN;
if (!igb_xdp_is_enabled(adapter))
return -EINVAL;
if (qid >= adapter->num_tx_queues)
return -EINVAL;
ring = adapter->tx_ring[qid];
if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags))
return -ENETDOWN;
if (!READ_ONCE(ring->xsk_pool))
return -EINVAL;
if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
/* Cause software interrupt */
if (adapter->flags & IGB_FLAG_HAS_MSIX) {
eics |= ring->q_vector->eims_value;
wr32(E1000_EICS, eics);
} else {
wr32(E1000_ICS, E1000_ICS_RXDMT0);
}
}
return 0;
}

View File

@ -337,6 +337,8 @@ struct igc_adapter {
struct igc_led_classdev *leds;
};
void igc_set_queue_napi(struct igc_adapter *adapter, int q_idx,
struct napi_struct *napi);
void igc_up(struct igc_adapter *adapter);
void igc_down(struct igc_adapter *adapter);
int igc_open(struct net_device *netdev);

View File

@ -2123,10 +2123,6 @@ static bool igc_cleanup_headers(struct igc_ring *rx_ring,
union igc_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
/* XDP packets use error pointer so abort at this point */
if (IS_ERR(skb))
return true;
if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) {
struct net_device *netdev = rx_ring->netdev;
@ -2515,8 +2511,7 @@ out_failure:
}
}
static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
struct xdp_buff *xdp)
static int igc_xdp_run_prog(struct igc_adapter *adapter, struct xdp_buff *xdp)
{
struct bpf_prog *prog;
int res;
@ -2530,7 +2525,7 @@ static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
res = __igc_xdp_run_prog(adapter, prog, xdp);
out:
return ERR_PTR(-res);
return res;
}
/* This function assumes __netif_tx_lock is held by the caller. */
@ -2585,6 +2580,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
struct sk_buff *skb = rx_ring->skb;
u16 cleaned_count = igc_desc_unused(rx_ring);
int xdp_status = 0, rx_buffer_pgcnt;
int xdp_res = 0;
while (likely(total_packets < budget)) {
struct igc_xdp_buff ctx = { .rx_ts = NULL };
@ -2630,12 +2626,10 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
xdp_buff_clear_frags_flag(&ctx.xdp);
ctx.rx_desc = rx_desc;
skb = igc_xdp_run_prog(adapter, &ctx.xdp);
xdp_res = igc_xdp_run_prog(adapter, &ctx.xdp);
}
if (IS_ERR(skb)) {
unsigned int xdp_res = -PTR_ERR(skb);
if (xdp_res) {
switch (xdp_res) {
case IGC_XDP_CONSUMED:
rx_buffer->pagecnt_bias++;
@ -2657,7 +2651,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
skb = igc_construct_skb(rx_ring, rx_buffer, &ctx);
/* exit if we failed to retrieve a buffer */
if (!skb) {
if (!xdp_res && !skb) {
rx_ring->rx_stats.alloc_failed++;
rx_buffer->pagecnt_bias++;
set_bit(IGC_RING_FLAG_RX_ALLOC_FAILED, &rx_ring->flags);
@ -2672,7 +2666,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
continue;
/* verify the packet layout is correct */
if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
if (xdp_res || igc_cleanup_headers(rx_ring, rx_desc, skb)) {
skb = NULL;
continue;
}
@ -4948,6 +4942,22 @@ static int igc_sw_init(struct igc_adapter *adapter)
return 0;
}
void igc_set_queue_napi(struct igc_adapter *adapter, int vector,
struct napi_struct *napi)
{
struct igc_q_vector *q_vector = adapter->q_vector[vector];
if (q_vector->rx.ring)
netif_queue_set_napi(adapter->netdev,
q_vector->rx.ring->queue_index,
NETDEV_QUEUE_TYPE_RX, napi);
if (q_vector->tx.ring)
netif_queue_set_napi(adapter->netdev,
q_vector->tx.ring->queue_index,
NETDEV_QUEUE_TYPE_TX, napi);
}
/**
* igc_up - Open the interface and prepare it to handle traffic
* @adapter: board private structure
@ -4955,6 +4965,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
void igc_up(struct igc_adapter *adapter)
{
struct igc_hw *hw = &adapter->hw;
struct napi_struct *napi;
int i = 0;
/* hardware has been reset, we need to reload some things */
@ -4962,8 +4973,11 @@ void igc_up(struct igc_adapter *adapter)
clear_bit(__IGC_DOWN, &adapter->state);
for (i = 0; i < adapter->num_q_vectors; i++)
napi_enable(&adapter->q_vector[i]->napi);
for (i = 0; i < adapter->num_q_vectors; i++) {
napi = &adapter->q_vector[i]->napi;
napi_enable(napi);
igc_set_queue_napi(adapter, i, napi);
}
if (adapter->msix_entries)
igc_configure_msix(adapter);
@ -5192,6 +5206,7 @@ void igc_down(struct igc_adapter *adapter)
for (i = 0; i < adapter->num_q_vectors; i++) {
if (adapter->q_vector[i]) {
napi_synchronize(&adapter->q_vector[i]->napi);
igc_set_queue_napi(adapter, i, NULL);
napi_disable(&adapter->q_vector[i]->napi);
}
}
@ -5576,6 +5591,9 @@ static int igc_request_msix(struct igc_adapter *adapter)
q_vector);
if (err)
goto err_free;
netif_napi_set_irq(&q_vector->napi,
adapter->msix_entries[vector].vector);
}
igc_configure_msix(adapter);
@ -6018,6 +6036,7 @@ static int __igc_open(struct net_device *netdev, bool resuming)
struct igc_adapter *adapter = netdev_priv(netdev);
struct pci_dev *pdev = adapter->pdev;
struct igc_hw *hw = &adapter->hw;
struct napi_struct *napi;
int err = 0;
int i = 0;
@ -6053,8 +6072,11 @@ static int __igc_open(struct net_device *netdev, bool resuming)
clear_bit(__IGC_DOWN, &adapter->state);
for (i = 0; i < adapter->num_q_vectors; i++)
napi_enable(&adapter->q_vector[i]->napi);
for (i = 0; i < adapter->num_q_vectors; i++) {
napi = &adapter->q_vector[i]->napi;
napi_enable(napi);
igc_set_queue_napi(adapter, i, napi);
}
/* Clear any pending interrupts. */
rd32(IGC_ICR);
@ -7299,7 +7321,7 @@ static void igc_deliver_wake_packet(struct net_device *netdev)
netif_rx(skb);
}
static int igc_resume(struct device *dev)
static int __igc_resume(struct device *dev, bool rpm)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct net_device *netdev = pci_get_drvdata(pdev);
@ -7342,7 +7364,11 @@ static int igc_resume(struct device *dev)
wr32(IGC_WUS, ~0);
if (netif_running(netdev)) {
if (!rpm)
rtnl_lock();
err = __igc_open(netdev, true);
if (!rpm)
rtnl_unlock();
if (!err)
netif_device_attach(netdev);
}
@ -7350,9 +7376,14 @@ static int igc_resume(struct device *dev)
return err;
}
static int igc_resume(struct device *dev)
{
return __igc_resume(dev, false);
}
static int igc_runtime_resume(struct device *dev)
{
return igc_resume(dev);
return __igc_resume(dev, true);
}
static int igc_suspend(struct device *dev)
@ -7397,14 +7428,18 @@ static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
struct net_device *netdev = pci_get_drvdata(pdev);
struct igc_adapter *adapter = netdev_priv(netdev);
rtnl_lock();
netif_device_detach(netdev);
if (state == pci_channel_io_perm_failure)
if (state == pci_channel_io_perm_failure) {
rtnl_unlock();
return PCI_ERS_RESULT_DISCONNECT;
}
if (netif_running(netdev))
igc_down(adapter);
pci_disable_device(pdev);
rtnl_unlock();
/* Request a slot reset. */
return PCI_ERS_RESULT_NEED_RESET;
@ -7415,7 +7450,7 @@ static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
* @pdev: Pointer to PCI device
*
* Restart the card from scratch, as if from a cold-boot. Implementation
* resembles the first-half of the igc_resume routine.
* resembles the first-half of the __igc_resume routine.
**/
static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
{
@ -7454,7 +7489,7 @@ static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
*
* This callback is called when the error recovery driver tells us that
* its OK to resume normal operation. Implementation resembles the
* second-half of the igc_resume routine.
* second-half of the __igc_resume routine.
*/
static void igc_io_resume(struct pci_dev *pdev)
{

View File

@ -13,6 +13,7 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
struct net_device *dev = adapter->netdev;
bool if_running = netif_running(dev);
struct bpf_prog *old_prog;
bool need_update;
if (dev->mtu > ETH_DATA_LEN) {
/* For now, the driver doesn't support XDP functionality with
@ -22,7 +23,8 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
return -EOPNOTSUPP;
}
if (if_running)
need_update = !!adapter->xdp_prog != !!prog;
if (if_running && need_update)
igc_close(dev);
old_prog = xchg(&adapter->xdp_prog, prog);
@ -34,7 +36,7 @@ int igc_xdp_set_prog(struct igc_adapter *adapter, struct bpf_prog *prog,
else
xdp_features_clear_redirect_target(dev);
if (if_running)
if (if_running && need_update)
igc_open(dev);
return 0;
@ -84,6 +86,7 @@ static int igc_xdp_enable_pool(struct igc_adapter *adapter,
napi_disable(napi);
}
igc_set_queue_napi(adapter, queue_id, NULL);
set_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
set_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
@ -133,6 +136,7 @@ static int igc_xdp_disable_pool(struct igc_adapter *adapter, u16 queue_id)
xsk_pool_dma_unmap(pool, IGC_RX_DMA_ATTR);
clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &rx_ring->flags);
clear_bit(IGC_RING_FLAG_AF_XDP_ZC, &tx_ring->flags);
igc_set_queue_napi(adapter, queue_id, napi);
if (needs_reset) {
napi_enable(napi);

View File

@ -1923,10 +1923,6 @@ bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring,
{
struct net_device *netdev = rx_ring->netdev;
/* XDP packets use error pointer so abort at this point */
if (IS_ERR(skb))
return true;
/* Verify netdev is present, and that packet does not have any
* errors that would be unacceptable to the netdev.
*/
@ -2234,9 +2230,9 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
return skb;
}
static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
struct ixgbe_ring *rx_ring,
struct xdp_buff *xdp)
static int ixgbe_run_xdp(struct ixgbe_adapter *adapter,
struct ixgbe_ring *rx_ring,
struct xdp_buff *xdp)
{
int err, result = IXGBE_XDP_PASS;
struct bpf_prog *xdp_prog;
@ -2286,7 +2282,7 @@ out_failure:
break;
}
xdp_out:
return ERR_PTR(-result);
return result;
}
static unsigned int ixgbe_rx_frame_truesize(struct ixgbe_ring *rx_ring,
@ -2344,6 +2340,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
unsigned int offset = rx_ring->rx_offset;
unsigned int xdp_xmit = 0;
struct xdp_buff xdp;
int xdp_res = 0;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
@ -2389,12 +2386,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = ixgbe_rx_frame_truesize(rx_ring, size);
#endif
skb = ixgbe_run_xdp(adapter, rx_ring, &xdp);
xdp_res = ixgbe_run_xdp(adapter, rx_ring, &xdp);
}
if (IS_ERR(skb)) {
unsigned int xdp_res = -PTR_ERR(skb);
if (xdp_res) {
if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) {
xdp_xmit |= xdp_res;
ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size);
@ -2414,7 +2409,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
}
/* exit if we failed to retrieve a buffer */
if (!skb) {
if (!xdp_res && !skb) {
rx_ring->rx_stats.alloc_rx_buff_failed++;
rx_buffer->pagecnt_bias++;
break;
@ -2428,7 +2423,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
continue;
/* verify the packet layout is correct */
if (ixgbe_cleanup_headers(rx_ring, rx_desc, skb))
if (xdp_res || ixgbe_cleanup_headers(rx_ring, rx_desc, skb))
continue;
/* probably a little skewed due to removing CRC */

View File

@ -737,10 +737,6 @@ static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring,
union ixgbe_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
/* XDP packets use error pointer so abort at this point */
if (IS_ERR(skb))
return true;
/* verify that the packet does not have any known errors */
if (unlikely(ixgbevf_test_staterr(rx_desc,
IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) {
@ -1049,9 +1045,9 @@ static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring,
return IXGBEVF_XDP_TX;
}
static struct sk_buff *ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
struct ixgbevf_ring *rx_ring,
struct xdp_buff *xdp)
static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter,
struct ixgbevf_ring *rx_ring,
struct xdp_buff *xdp)
{
int result = IXGBEVF_XDP_PASS;
struct ixgbevf_ring *xdp_ring;
@ -1085,7 +1081,7 @@ out_failure:
break;
}
xdp_out:
return ERR_PTR(-result);
return result;
}
static unsigned int ixgbevf_rx_frame_truesize(struct ixgbevf_ring *rx_ring,
@ -1127,6 +1123,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
struct sk_buff *skb = rx_ring->skb;
bool xdp_xmit = false;
struct xdp_buff xdp;
int xdp_res = 0;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
#if (PAGE_SIZE < 8192)
@ -1170,11 +1167,11 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
/* At larger PAGE_SIZE, frame_sz depend on len size */
xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, size);
#endif
skb = ixgbevf_run_xdp(adapter, rx_ring, &xdp);
xdp_res = ixgbevf_run_xdp(adapter, rx_ring, &xdp);
}
if (IS_ERR(skb)) {
if (PTR_ERR(skb) == -IXGBEVF_XDP_TX) {
if (xdp_res) {
if (xdp_res == IXGBEVF_XDP_TX) {
xdp_xmit = true;
ixgbevf_rx_buffer_flip(rx_ring, rx_buffer,
size);
@ -1194,7 +1191,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
}
/* exit if we failed to retrieve a buffer */
if (!skb) {
if (!xdp_res && !skb) {
rx_ring->rx_stats.alloc_rx_buff_failed++;
rx_buffer->pagecnt_bias++;
break;
@ -1208,7 +1205,7 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
continue;
/* verify the packet layout is correct */
if (ixgbevf_cleanup_headers(rx_ring, rx_desc, skb)) {
if (xdp_res || ixgbevf_cleanup_headers(rx_ring, rx_desc, skb)) {
skb = NULL;
continue;
}