idpf: add singleq start_xmit and napi poll

Add the start_xmit, TX and RX napi poll support for the single queue
model. Unlike split queue model, single queue uses same queue to post
buffer descriptors and completed descriptors.

Signed-off-by: Joshua Hay <joshua.a.hay@intel.com>
Co-developed-by: Alan Brady <alan.brady@intel.com>
Signed-off-by: Alan Brady <alan.brady@intel.com>
Co-developed-by: Madhu Chittim <madhu.chittim@intel.com>
Signed-off-by: Madhu Chittim <madhu.chittim@intel.com>
Co-developed-by: Phani Burra <phani.r.burra@intel.com>
Signed-off-by: Phani Burra <phani.r.burra@intel.com>
Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Co-developed-by: Pavan Kumar Linga <pavan.kumar.linga@intel.com>
Signed-off-by: Pavan Kumar Linga <pavan.kumar.linga@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
This commit is contained in:
Joshua Hay 2023-08-07 17:34:14 -07:00 committed by Tony Nguyen
parent 3a8845af66
commit a5ab9ee0df
8 changed files with 1290 additions and 31 deletions

View File

@ -14,6 +14,7 @@ struct idpf_vport_max_q;
#include <linux/etherdevice.h>
#include <linux/pci.h>
#include <linux/bitfield.h>
#include <linux/sctp.h>
#include <net/gro.h>
#include <linux/dim.h>
@ -280,6 +281,7 @@ enum idpf_vport_flags {
* @txq_grps: Array of TX queue groups
* @txq_model: Split queue or single queue queuing model
* @txqs: Used only in hotpath to get to the right queue very fast
* @crc_enable: Enable CRC insertion offload
* @num_rxq: Number of allocated RX queues
* @num_bufq: Number of allocated buffer queues
* @rxq_desc_count: RX queue descriptor count. *MUST* have enough descriptors
@ -326,6 +328,7 @@ struct idpf_vport {
struct idpf_txq_group *txq_grps;
u32 txq_model;
struct idpf_queue **txqs;
bool crc_enable;
u16 num_rxq;
u16 num_bufq;
@ -534,6 +537,9 @@ struct idpf_vport_config {
* @vc_state: Virtchnl message state
* @vc_msg: Virtchnl message buffer
* @dev_ops: See idpf_dev_ops
* @num_vfs: Number of allocated VFs through sysfs. PF does not directly talk
* to VFs but is used to initialize them
* @crc_enable: Enable CRC insertion offload
* @req_tx_splitq: TX split or single queue model to request
* @req_rx_splitq: RX split or single queue model to request
* @vport_ctrl_lock: Lock to protect the vport control flow
@ -587,6 +593,8 @@ struct idpf_adapter {
DECLARE_BITMAP(vc_state, IDPF_VC_NBITS);
char vc_msg[IDPF_CTLQ_MAX_BUF_LEN];
struct idpf_dev_ops dev_ops;
int num_vfs;
bool crc_enable;
bool req_tx_splitq;
bool req_rx_splitq;
@ -848,5 +856,7 @@ int idpf_send_create_vport_msg(struct idpf_adapter *adapter,
struct idpf_vport_max_q *max_q);
int idpf_check_supported_desc_ids(struct idpf_vport *vport);
int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map);
int idpf_send_set_sriov_vfs_msg(struct idpf_adapter *adapter, u16 num_vfs);
int idpf_sriov_configure(struct pci_dev *pdev, int num_vfs);
#endif /* !_IDPF_H_ */

View File

@ -64,6 +64,32 @@ enum idpf_rss_hash {
#define IDPF_TXD_COMPLQ_QID_S 0
#define IDPF_TXD_COMPLQ_QID_M GENMASK_ULL(9, 0)
/* For base mode TX descriptors */
#define IDPF_TXD_CTX_QW0_TUNN_L4T_CS_S 23
#define IDPF_TXD_CTX_QW0_TUNN_L4T_CS_M BIT_ULL(IDPF_TXD_CTX_QW0_TUNN_L4T_CS_S)
#define IDPF_TXD_CTX_QW0_TUNN_DECTTL_S 19
#define IDPF_TXD_CTX_QW0_TUNN_DECTTL_M \
(0xFULL << IDPF_TXD_CTX_QW0_TUNN_DECTTL_S)
#define IDPF_TXD_CTX_QW0_TUNN_NATLEN_S 12
#define IDPF_TXD_CTX_QW0_TUNN_NATLEN_M \
(0X7FULL << IDPF_TXD_CTX_QW0_TUNN_NATLEN_S)
#define IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_S 11
#define IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_M \
BIT_ULL(IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_S)
#define IDPF_TXD_CTX_EIP_NOINC_IPID_CONST \
IDPF_TXD_CTX_QW0_TUNN_EIP_NOINC_M
#define IDPF_TXD_CTX_QW0_TUNN_NATT_S 9
#define IDPF_TXD_CTX_QW0_TUNN_NATT_M (0x3ULL << IDPF_TXD_CTX_QW0_TUNN_NATT_S)
#define IDPF_TXD_CTX_UDP_TUNNELING BIT_ULL(IDPF_TXD_CTX_QW0_TUNN_NATT_S)
#define IDPF_TXD_CTX_GRE_TUNNELING (0x2ULL << IDPF_TXD_CTX_QW0_TUNN_NATT_S)
#define IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_S 2
#define IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_M \
(0x3FULL << IDPF_TXD_CTX_QW0_TUNN_EXT_IPLEN_S)
#define IDPF_TXD_CTX_QW0_TUNN_EXT_IP_S 0
#define IDPF_TXD_CTX_QW0_TUNN_EXT_IP_M \
(0x3ULL << IDPF_TXD_CTX_QW0_TUNN_EXT_IP_S)
#define IDPF_TXD_CTX_QW1_MSS_S 50
#define IDPF_TXD_CTX_QW1_MSS_M GENMASK_ULL(63, 50)
#define IDPF_TXD_CTX_QW1_TSO_LEN_S 30
@ -112,6 +138,27 @@ enum idpf_tx_desc_dtype_value {
IDPF_TX_DESC_DTYPE_DESC_DONE = 15,
};
enum idpf_tx_ctx_desc_cmd_bits {
IDPF_TX_CTX_DESC_TSO = 0x01,
IDPF_TX_CTX_DESC_TSYN = 0x02,
IDPF_TX_CTX_DESC_IL2TAG2 = 0x04,
IDPF_TX_CTX_DESC_RSVD = 0x08,
IDPF_TX_CTX_DESC_SWTCH_NOTAG = 0x00,
IDPF_TX_CTX_DESC_SWTCH_UPLINK = 0x10,
IDPF_TX_CTX_DESC_SWTCH_LOCAL = 0x20,
IDPF_TX_CTX_DESC_SWTCH_VSI = 0x30,
IDPF_TX_CTX_DESC_FILT_AU_EN = 0x40,
IDPF_TX_CTX_DESC_FILT_AU_EVICT = 0x80,
IDPF_TX_CTX_DESC_RSVD1 = 0xF00
};
enum idpf_tx_desc_len_fields {
/* Note: These are predefined bit offsets */
IDPF_TX_DESC_LEN_MACLEN_S = 0, /* 7 BITS */
IDPF_TX_DESC_LEN_IPLEN_S = 7, /* 7 BITS */
IDPF_TX_DESC_LEN_L4_LEN_S = 14 /* 4 BITS */
};
enum idpf_tx_base_desc_cmd_bits {
IDPF_TX_DESC_CMD_EOP = BIT(0),
IDPF_TX_DESC_CMD_RS = BIT(1),
@ -148,6 +195,16 @@ struct idpf_splitq_tx_compl_desc {
u8 rsvd; /* Reserved */
}; /* writeback used with completion queues */
/* Context descriptors */
struct idpf_base_tx_ctx_desc {
struct {
__le32 tunneling_params;
__le16 l2tag2;
__le16 rsvd1;
} qw0;
__le64 qw1; /* type_cmd_tlen_mss/rt_hint */
};
/* Common cmd field defines for all desc except Flex Flow Scheduler (0x0C) */
enum idpf_tx_flex_desc_cmd_bits {
IDPF_TX_FLEX_DESC_CMD_EOP = BIT(0),

View File

@ -1565,4 +1565,5 @@ static const struct net_device_ops idpf_netdev_ops_splitq = {
static const struct net_device_ops idpf_netdev_ops_singleq = {
.ndo_open = idpf_open,
.ndo_stop = idpf_stop,
.ndo_start_xmit = idpf_tx_singleq_start,
};

View File

@ -133,6 +133,7 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
break;
case IDPF_DEV_ID_VF:
idpf_vf_dev_ops_init(adapter);
adapter->crc_enable = true;
break;
default:
err = -ENODEV;

File diff suppressed because it is too large Load Diff

View File

@ -1953,7 +1953,7 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
*
* Returns 0 if stop is not needed
*/
static int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size)
int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size)
{
struct netdev_queue *nq;
@ -2017,8 +2017,8 @@ splitq_stop:
* to do a register write to update our queue status. We know this can only
* mean tail here as HW should be owning head for TX.
*/
static void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val,
bool xmit_more)
void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val,
bool xmit_more)
{
struct netdev_queue *nq;
@ -2041,11 +2041,13 @@ static void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val,
/**
* idpf_tx_desc_count_required - calculate number of Tx descriptors needed
* @txq: queue to send buffer on
* @skb: send buffer
*
* Returns number of data descriptors needed for this skb.
*/
static unsigned int idpf_tx_desc_count_required(struct sk_buff *skb)
unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq,
struct sk_buff *skb)
{
const struct skb_shared_info *shinfo;
unsigned int count = 0, i;
@ -2071,6 +2073,16 @@ static unsigned int idpf_tx_desc_count_required(struct sk_buff *skb)
count++;
}
if (idpf_chk_linearize(skb, txq->tx_max_bufs, count)) {
if (__skb_linearize(skb))
return 0;
count = idpf_size_to_txd_count(skb->len);
u64_stats_update_begin(&txq->stats_sync);
u64_stats_inc(&txq->q_stats.tx.linearize);
u64_stats_update_end(&txq->stats_sync);
}
return count;
}
@ -2081,8 +2093,8 @@ static unsigned int idpf_tx_desc_count_required(struct sk_buff *skb)
* @first: original first buffer info buffer for packet
* @idx: starting point on ring to unwind
*/
static void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb,
struct idpf_tx_buf *first, u16 idx)
void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb,
struct idpf_tx_buf *first, u16 idx)
{
u64_stats_update_begin(&txq->stats_sync);
u64_stats_inc(&txq->q_stats.tx.dma_map_errs);
@ -2327,7 +2339,7 @@ static void idpf_tx_splitq_map(struct idpf_queue *tx_q,
* Returns error (negative) if TSO was requested but cannot be applied to the
* given skb, 0 if TSO does not apply to the given skb, or 1 otherwise.
*/
static int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off)
int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off)
{
const struct skb_shared_info *shinfo = skb_shinfo(skb);
union {
@ -2488,8 +2500,8 @@ static bool __idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs)
* E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO
* header, 1 for segment payload, and then 7 for the fragments.
*/
static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
unsigned int count)
bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
unsigned int count)
{
if (likely(count < max_bufs))
return false;
@ -2527,8 +2539,7 @@ idpf_tx_splitq_get_ctx_desc(struct idpf_queue *txq)
* @tx_q: queue to send buffer on
* @skb: pointer to skb
*/
static netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q,
struct sk_buff *skb)
netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb)
{
u64_stats_update_begin(&tx_q->stats_sync);
u64_stats_inc(&tx_q->q_stats.tx.skb_drops);
@ -2556,16 +2567,9 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb,
unsigned int count;
int tso;
count = idpf_tx_desc_count_required(skb);
if (idpf_chk_linearize(skb, tx_q->tx_max_bufs, count)) {
if (__skb_linearize(skb))
return idpf_tx_drop_skb(tx_q, skb);
count = idpf_size_to_txd_count(skb->len);
u64_stats_update_begin(&tx_q->stats_sync);
u64_stats_inc(&tx_q->q_stats.tx.linearize);
u64_stats_update_end(&tx_q->stats_sync);
}
count = idpf_tx_desc_count_required(tx_q, skb);
if (unlikely(!count))
return idpf_tx_drop_skb(tx_q, skb);
tso = idpf_tso(skb, &tx_params.offload);
if (unlikely(tso < 0))
@ -2683,8 +2687,7 @@ netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb,
* skb_set_hash based on PTYPE as parsed by HW Rx pipeline and is part of
* Rx desc.
*/
static enum pkt_hash_types
idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded)
enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded)
{
if (!decoded->known)
return PKT_HASH_TYPE_NONE;
@ -2944,8 +2947,8 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq,
* It will just attach the page as a frag to the skb.
* The function will then update the page offset.
*/
static void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
unsigned int size)
void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
unsigned int size)
{
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page,
rx_buf->page_offset, size, rx_buf->truesize);
@ -2963,9 +2966,9 @@ static void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
* data from the current receive descriptor, taking care to set up the
* skb correctly.
*/
static struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq,
struct idpf_rx_buf *rx_buf,
unsigned int size)
struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq,
struct idpf_rx_buf *rx_buf,
unsigned int size)
{
unsigned int headlen;
struct sk_buff *skb;
@ -3595,7 +3598,7 @@ check_rx_itr:
* Update the net_dim() algorithm and re-enable the interrupt associated with
* this vector.
*/
static void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector)
void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector)
{
u32 intval;

View File

@ -72,6 +72,12 @@ do { \
} \
} while (0)
#define IDPF_SINGLEQ_BUMP_RING_IDX(q, idx) \
do { \
if (unlikely(++(idx) == (q)->desc_count)) \
idx = 0; \
} while (0)
#define IDPF_RX_HDR_SIZE 256
#define IDPF_RX_BUF_2048 2048
#define IDPF_RX_BUF_4096 4096
@ -101,6 +107,10 @@ do { \
(&(((struct virtchnl2_splitq_rx_buf_desc *)((rxq)->desc_ring))[i]))
#define IDPF_SPLITQ_RX_BI_DESC(rxq, i) ((((rxq)->ring))[i])
#define IDPF_BASE_TX_DESC(txq, i) \
(&(((struct idpf_base_tx_desc *)((txq)->desc_ring))[i]))
#define IDPF_BASE_TX_CTX_DESC(txq, i) \
(&(((struct idpf_base_tx_ctx_desc *)((txq)->desc_ring))[i]))
#define IDPF_SPLITQ_TX_COMPLQ_DESC(txcq, i) \
(&(((struct idpf_splitq_tx_compl_desc *)((txcq)->desc_ring))[i]))
@ -136,6 +146,9 @@ do { \
#define IDPF_TXD_LAST_DESC_CMD (IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS)
#define IDPF_TX_FLAGS_TSO BIT(0)
#define IDPF_TX_FLAGS_IPV4 BIT(1)
#define IDPF_TX_FLAGS_IPV6 BIT(2)
#define IDPF_TX_FLAGS_TUNNEL BIT(3)
union idpf_tx_flex_desc {
struct idpf_flex_tx_desc q; /* queue based scheduling */
@ -199,6 +212,8 @@ struct idpf_buf_lifo {
/**
* struct idpf_tx_offload_params - Offload parameters for a given packet
* @tx_flags: Feature flags enabled for this packet
* @hdr_offsets: Offset parameter for single queue model
* @cd_tunneling: Type of tunneling enabled for single queue model
* @tso_len: Total length of payload to segment
* @mss: Segment size
* @tso_segs: Number of segments to be sent
@ -208,6 +223,9 @@ struct idpf_buf_lifo {
struct idpf_tx_offload_params {
u32 tx_flags;
u32 hdr_offsets;
u32 cd_tunneling;
u32 tso_len;
u16 mss;
u16 tso_segs;
@ -235,6 +253,13 @@ struct idpf_tx_splitq_params {
struct idpf_tx_offload_params offload;
};
enum idpf_tx_ctx_desc_eipt_offload {
IDPF_TX_CTX_EXT_IP_NONE = 0x0,
IDPF_TX_CTX_EXT_IP_IPV6 = 0x1,
IDPF_TX_CTX_EXT_IP_IPV4_NO_CSUM = 0x2,
IDPF_TX_CTX_EXT_IP_IPV4 = 0x3
};
/* Checksum offload bits decoded from the receive descriptor. */
struct idpf_rx_csum_decoded {
u32 l3l4p : 1;
@ -249,6 +274,11 @@ struct idpf_rx_csum_decoded {
u32 raw_csum : 16;
};
struct idpf_rx_extracted {
unsigned int size;
u16 rx_ptype;
};
#define IDPF_TX_COMPLQ_CLEAN_BUDGET 256
#define IDPF_TX_MIN_PKT_LEN 17
#define IDPF_TX_DESCS_FOR_SKB_DATA_PTR 1
@ -832,6 +862,25 @@ static inline u32 idpf_size_to_txd_count(unsigned int size)
return DIV_ROUND_UP(size, IDPF_TX_MAX_DESC_DATA_ALIGNED);
}
/**
* idpf_tx_singleq_build_ctob - populate command tag offset and size
* @td_cmd: Command to be filled in desc
* @td_offset: Offset to be filled in desc
* @size: Size of the buffer
* @td_tag: td tag to be filled
*
* Returns the 64 bit value populated with the input parameters
*/
static inline __le64 idpf_tx_singleq_build_ctob(u64 td_cmd, u64 td_offset,
unsigned int size, u64 td_tag)
{
return cpu_to_le64(IDPF_TX_DESC_DTYPE_DATA |
(td_cmd << IDPF_TXD_QW1_CMD_S) |
(td_offset << IDPF_TXD_QW1_OFFSET_S) |
((u64)size << IDPF_TXD_QW1_TX_BUF_SZ_S) |
(td_tag << IDPF_TXD_QW1_L2TAG1_S));
}
void idpf_tx_splitq_build_ctb(union idpf_tx_flex_desc *desc,
struct idpf_tx_splitq_params *params,
u16 td_cmd, u16 size);
@ -921,17 +970,38 @@ int idpf_vport_queues_alloc(struct idpf_vport *vport);
void idpf_vport_queues_rel(struct idpf_vport *vport);
void idpf_vport_intr_rel(struct idpf_vport *vport);
int idpf_vport_intr_alloc(struct idpf_vport *vport);
void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector);
void idpf_vport_intr_deinit(struct idpf_vport *vport);
int idpf_vport_intr_init(struct idpf_vport *vport);
enum pkt_hash_types idpf_ptype_to_htype(const struct idpf_rx_ptype_decoded *decoded);
int idpf_config_rss(struct idpf_vport *vport);
int idpf_init_rss(struct idpf_vport *vport);
void idpf_deinit_rss(struct idpf_vport *vport);
int idpf_rx_bufs_init_all(struct idpf_vport *vport);
void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb,
unsigned int size);
struct sk_buff *idpf_rx_construct_skb(struct idpf_queue *rxq,
struct idpf_rx_buf *rx_buf,
unsigned int size);
bool idpf_init_rx_buf_hw_alloc(struct idpf_queue *rxq, struct idpf_rx_buf *buf);
void idpf_rx_buf_hw_update(struct idpf_queue *rxq, u32 val);
void idpf_tx_buf_hw_update(struct idpf_queue *tx_q, u32 val,
bool xmit_more);
unsigned int idpf_size_to_txd_count(unsigned int size);
netdev_tx_t idpf_tx_drop_skb(struct idpf_queue *tx_q, struct sk_buff *skb);
void idpf_tx_dma_map_error(struct idpf_queue *txq, struct sk_buff *skb,
struct idpf_tx_buf *first, u16 ring_idx);
unsigned int idpf_tx_desc_count_required(struct idpf_queue *txq,
struct sk_buff *skb);
bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs,
unsigned int count);
int idpf_tx_maybe_stop_common(struct idpf_queue *tx_q, unsigned int size);
netdev_tx_t idpf_tx_splitq_start(struct sk_buff *skb,
struct net_device *netdev);
netdev_tx_t idpf_tx_singleq_start(struct sk_buff *skb,
struct net_device *netdev);
bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_queue *rxq,
u16 cleaned_count);
int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off);
#endif /* !_IDPF_TXRX_H_ */

View File

@ -2893,6 +2893,8 @@ void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q)
idpf_vport_calc_num_q_desc(vport);
idpf_vport_calc_num_q_groups(vport);
idpf_vport_alloc_vec_indexes(vport);
vport->crc_enable = adapter->crc_enable;
}
/**