Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

This commit is contained in:
David S. Miller 2021-02-16 17:30:20 -08:00
commit d489ded1a3
56 changed files with 726 additions and 338 deletions

View File

@ -651,16 +651,15 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max
default: initial size of receive buffer used by TCP sockets.
This value overrides net.core.rmem_default used by other protocols.
Default: 87380 bytes. This value results in window of 65535 with
default setting of tcp_adv_win_scale and tcp_app_win:0 and a bit
less for default tcp_app_win. See below about these variables.
Default: 131072 bytes.
This value results in initial window of 65535.
max: maximal size of receive buffer allowed for automatically
selected receiver buffers for TCP socket. This value does not override
net.core.rmem_max. Calling setsockopt() with SO_RCVBUF disables
automatic tuning of that socket's receive buffer size, in which
case this value is ignored.
Default: between 87380B and 6MB, depending on RAM size.
Default: between 131072 and 6MB, depending on RAM size.
tcp_sack - BOOLEAN
Enable select acknowledgments (SACKS).

View File

@ -272,6 +272,22 @@ to the mailing list, e.g.::
Posting as one thread is discouraged because it confuses patchwork
(as of patchwork 2.2.2).
Can I reproduce the checks from patchwork on my local machine?
--------------------------------------------------------------
Checks in patchwork are mostly simple wrappers around existing kernel
scripts, the sources are available at:
https://github.com/kuba-moo/nipa/tree/master/tests
Running all the builds and checks locally is a pain, can I post my patches and have the patchwork bot validate them?
--------------------------------------------------------------------------------------------------------------------
No, you must ensure that your patches are ready by testing them locally
before posting to the mailing list. The patchwork build bot instance
gets overloaded very easily and netdev@vger really doesn't need more
traffic if we can help it.
Any other tips to help ensure my net/net-next patch gets OK'd?
--------------------------------------------------------------
Attention to detail. Re-read your own work as if you were the

View File

@ -3743,16 +3743,7 @@ static int __init idt77252_init(void)
struct sk_buff *skb;
printk("%s: at %p\n", __func__, idt77252_init);
if (sizeof(skb->cb) < sizeof(struct atm_skb_data) +
sizeof(struct idt77252_skb_prv)) {
printk(KERN_ERR "%s: skb->cb is too small (%lu < %lu)\n",
__func__, (unsigned long) sizeof(skb->cb),
(unsigned long) sizeof(struct atm_skb_data) +
sizeof(struct idt77252_skb_prv));
return -EIO;
}
BUILD_BUG_ON(sizeof(skb->cb) < sizeof(struct idt77252_skb_prv) + sizeof(struct atm_skb_data));
return pci_register_driver(&idt77252_driver);
}

View File

@ -789,7 +789,7 @@ struct idt77252_skb_prv {
struct scqe tbd; /* Transmit Buffer Descriptor */
dma_addr_t paddr; /* DMA handle */
u32 pool; /* sb_pool handle */
};
} __packed;
#define IDT77252_PRV_TBD(skb) \
(((struct idt77252_skb_prv *)(ATM_SKB(skb)+1))->tbd)

View File

@ -1279,10 +1279,18 @@
#define MDIO_PMA_10GBR_FECCTRL 0x00ab
#endif
#ifndef MDIO_PMA_RX_CTRL1
#define MDIO_PMA_RX_CTRL1 0x8051
#endif
#ifndef MDIO_PCS_DIG_CTRL
#define MDIO_PCS_DIG_CTRL 0x8000
#endif
#ifndef MDIO_PCS_DIGITAL_STAT
#define MDIO_PCS_DIGITAL_STAT 0x8010
#endif
#ifndef MDIO_AN_XNP
#define MDIO_AN_XNP 0x0016
#endif
@ -1358,6 +1366,8 @@
#define XGBE_KR_TRAINING_ENABLE BIT(1)
#define XGBE_PCS_CL37_BP BIT(12)
#define XGBE_PCS_PSEQ_STATE_MASK 0x1c
#define XGBE_PCS_PSEQ_STATE_POWER_GOOD 0x10
#define XGBE_AN_CL37_INT_CMPLT BIT(0)
#define XGBE_AN_CL37_INT_MASK 0x01
@ -1375,6 +1385,10 @@
#define XGBE_PMA_CDR_TRACK_EN_OFF 0x00
#define XGBE_PMA_CDR_TRACK_EN_ON 0x01
#define XGBE_PMA_RX_RST_0_MASK BIT(4)
#define XGBE_PMA_RX_RST_0_RESET_ON 0x10
#define XGBE_PMA_RX_RST_0_RESET_OFF 0x00
/* Bit setting and getting macros
* The get macro will extract the current bit field value from within
* the variable

View File

@ -1368,6 +1368,7 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
return;
netif_tx_stop_all_queues(netdev);
netif_carrier_off(pdata->netdev);
xgbe_stop_timers(pdata);
flush_workqueue(pdata->dev_workqueue);

View File

@ -1345,7 +1345,7 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata)
&an_restart);
if (an_restart) {
xgbe_phy_config_aneg(pdata);
return;
goto adjust_link;
}
if (pdata->phy.link) {
@ -1396,7 +1396,6 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
pdata->phy_if.phy_impl.stop(pdata);
pdata->phy.link = 0;
netif_carrier_off(pdata->netdev);
xgbe_phy_adjust_link(pdata);
}

View File

@ -922,6 +922,9 @@ static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata)
if ((phy_id & 0xfffffff0) != 0x03625d10)
return false;
/* Reset PHY - wait for self-clearing reset bit to clear */
genphy_soft_reset(phy_data->phydev);
/* Disable RGMII mode */
phy_write(phy_data->phydev, 0x18, 0x7007);
reg = phy_read(phy_data->phydev, 0x18);
@ -1953,6 +1956,27 @@ static void xgbe_phy_set_redrv_mode(struct xgbe_prv_data *pdata)
xgbe_phy_put_comm_ownership(pdata);
}
static void xgbe_phy_rx_reset(struct xgbe_prv_data *pdata)
{
int reg;
reg = XMDIO_READ_BITS(pdata, MDIO_MMD_PCS, MDIO_PCS_DIGITAL_STAT,
XGBE_PCS_PSEQ_STATE_MASK);
if (reg == XGBE_PCS_PSEQ_STATE_POWER_GOOD) {
/* Mailbox command timed out, reset of RX block is required.
* This can be done by asseting the reset bit and wait for
* its compeletion.
*/
XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_RX_CTRL1,
XGBE_PMA_RX_RST_0_MASK, XGBE_PMA_RX_RST_0_RESET_ON);
ndelay(20);
XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_RX_CTRL1,
XGBE_PMA_RX_RST_0_MASK, XGBE_PMA_RX_RST_0_RESET_OFF);
usleep_range(40, 50);
netif_err(pdata, link, pdata->netdev, "firmware mailbox reset performed\n");
}
}
static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
unsigned int cmd, unsigned int sub_cmd)
{
@ -1960,9 +1984,11 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
unsigned int wait;
/* Log if a previous command did not complete */
if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) {
netif_dbg(pdata, link, pdata->netdev,
"firmware mailbox not ready for command\n");
xgbe_phy_rx_reset(pdata);
}
/* Construct the command */
XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, cmd);
@ -1984,6 +2010,9 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata,
netif_dbg(pdata, link, pdata->netdev,
"firmware mailbox command did not complete\n");
/* Reset on error */
xgbe_phy_rx_reset(pdata);
}
static void xgbe_phy_rrc(struct xgbe_prv_data *pdata)
@ -2584,6 +2613,14 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
if (reg & MDIO_STAT1_LSTATUS)
return 1;
if (pdata->phy.autoneg == AUTONEG_ENABLE &&
phy_data->port_mode == XGBE_PORT_MODE_BACKPLANE) {
if (!test_bit(XGBE_LINK_INIT, &pdata->dev_state)) {
netif_carrier_off(pdata->netdev);
*an_restart = 1;
}
}
/* No link, attempt a receiver reset cycle */
if (phy_data->rrc_count++ > XGBE_RRC_FREQUENCY) {
phy_data->rrc_count = 0;

View File

@ -71,8 +71,10 @@ static int aq_ndev_open(struct net_device *ndev)
goto err_exit;
err = aq_nic_start(aq_nic);
if (err < 0)
if (err < 0) {
aq_nic_stop(aq_nic);
goto err_exit;
}
err_exit:
if (err < 0)

View File

@ -8984,9 +8984,10 @@ void bnxt_tx_disable(struct bnxt *bp)
txr->dev_state = BNXT_DEV_STATE_CLOSING;
}
}
/* Drop carrier first to prevent TX timeout */
netif_carrier_off(bp->dev);
/* Stop all TX queues */
netif_tx_disable(bp->dev);
netif_carrier_off(bp->dev);
}
void bnxt_tx_enable(struct bnxt *bp)

View File

@ -471,8 +471,8 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
if (BNXT_PF(bp) && !bnxt_hwrm_get_nvm_cfg_ver(bp, &nvm_cfg_ver)) {
u32 ver = nvm_cfg_ver.vu32;
sprintf(buf, "%X.%X.%X", (ver >> 16) & 0xF, (ver >> 8) & 0xF,
ver & 0xF);
sprintf(buf, "%d.%d.%d", (ver >> 16) & 0xf, (ver >> 8) & 0xf,
ver & 0xf);
rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED,
DEVLINK_INFO_VERSION_GENERIC_FW_PSID,
buf);

View File

@ -46,6 +46,9 @@
#define MAX_ULD_QSETS 16
#define MAX_ULD_NPORTS 4
/* ulp_mem_io + ulptx_idata + payload + padding */
#define MAX_IMM_ULPTX_WR_LEN (32 + 8 + 256 + 8)
/* CPL message priority levels */
enum {
CPL_PRIORITY_DATA = 0, /* data messages */

View File

@ -2846,17 +2846,22 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
* @skb: the packet
*
* Returns true if a packet can be sent as an offload WR with immediate
* data. We currently use the same limit as for Ethernet packets.
* data.
* FW_OFLD_TX_DATA_WR limits the payload to 255 bytes due to 8-bit field.
* However, FW_ULPTX_WR commands have a 256 byte immediate only
* payload limit.
*/
static inline int is_ofld_imm(const struct sk_buff *skb)
{
struct work_request_hdr *req = (struct work_request_hdr *)skb->data;
unsigned long opcode = FW_WR_OP_G(ntohl(req->wr_hi));
if (opcode == FW_CRYPTO_LOOKASIDE_WR)
if (unlikely(opcode == FW_ULPTX_WR))
return skb->len <= MAX_IMM_ULPTX_WR_LEN;
else if (opcode == FW_CRYPTO_LOOKASIDE_WR)
return skb->len <= SGE_MAX_WR_LEN;
else
return skb->len <= MAX_IMM_TX_PKT_LEN;
return skb->len <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
}
/**

View File

@ -50,9 +50,6 @@
#define MIN_RCV_WND (24 * 1024U)
#define LOOPBACK(x) (((x) & htonl(0xff000000)) == htonl(0x7f000000))
/* ulp_mem_io + ulptx_idata + payload + padding */
#define MAX_IMM_ULPTX_WR_LEN (32 + 8 + 256 + 8)
/* for TX: a skb must have a headroom of at least TX_HEADER_LEN bytes */
#define TX_HEADER_LEN \
(sizeof(struct fw_ofld_tx_data_wr) + sizeof(struct sge_opaque_hdr))

View File

@ -395,10 +395,20 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv,
xdp.frame_sz = DPAA2_ETH_RX_BUF_RAW_SIZE;
err = xdp_do_redirect(priv->net_dev, &xdp, xdp_prog);
if (unlikely(err))
if (unlikely(err)) {
addr = dma_map_page(priv->net_dev->dev.parent,
virt_to_page(vaddr), 0,
priv->rx_buf_size, DMA_BIDIRECTIONAL);
if (unlikely(dma_mapping_error(priv->net_dev->dev.parent, addr))) {
free_pages((unsigned long)vaddr, 0);
} else {
ch->buf_count++;
dpaa2_eth_xdp_release_buf(priv, ch, addr);
}
ch->stats.xdp_drop++;
else
} else {
ch->stats.xdp_redirect++;
}
break;
}

View File

@ -27,7 +27,7 @@ config FSL_ENETC_VF
config FSL_ENETC_MDIO
tristate "ENETC MDIO driver"
depends on PCI
depends on PCI && MDIO_DEVRES && MDIO_BUS
help
This driver supports NXP ENETC Central MDIO controller as a PCIe
physical function (PF) device.

View File

@ -1157,14 +1157,15 @@ static void enetc_pf_remove(struct pci_dev *pdev)
struct enetc_ndev_priv *priv;
priv = netdev_priv(si->ndev);
enetc_phylink_destroy(priv);
enetc_mdiobus_destroy(pf);
if (pf->num_vfs)
enetc_sriov_configure(pdev, 0);
unregister_netdev(si->ndev);
enetc_phylink_destroy(priv);
enetc_mdiobus_destroy(pf);
enetc_free_msix(priv);
enetc_free_si_resources(priv);

View File

@ -247,8 +247,13 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter,
if (!ltb->buff)
return;
/* VIOS automatically unmaps the long term buffer at remote
* end for the following resets:
* FAILOVER, MOBILITY, TIMEOUT.
*/
if (adapter->reset_reason != VNIC_RESET_FAILOVER &&
adapter->reset_reason != VNIC_RESET_MOBILITY)
adapter->reset_reason != VNIC_RESET_MOBILITY &&
adapter->reset_reason != VNIC_RESET_TIMEOUT)
send_request_unmap(adapter, ltb->map_id);
dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr);
}
@ -1322,10 +1327,8 @@ static int __ibmvnic_close(struct net_device *netdev)
adapter->state = VNIC_CLOSING;
rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN);
if (rc)
return rc;
adapter->state = VNIC_CLOSED;
return 0;
return rc;
}
static int ibmvnic_close(struct net_device *netdev)
@ -1670,6 +1673,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
skb_copy_from_linear_data(skb, dst, skb->len);
}
/* post changes to long_term_buff *dst before VIOS accessing it */
dma_wmb();
tx_pool->consumer_index =
(tx_pool->consumer_index + 1) % tx_pool->num_buffers;
@ -2288,7 +2294,8 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
unsigned long flags;
int ret;
/* If failover is pending don't schedule any other reset.
/*
* If failover is pending don't schedule any other reset.
* Instead let the failover complete. If there is already a
* a failover reset scheduled, we will detect and drop the
* duplicate reset when walking the ->rwi_list below.
@ -2308,14 +2315,11 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
goto err;
}
spin_lock_irqsave(&adapter->rwi_lock, flags);
list_for_each(entry, &adapter->rwi_list) {
tmp = list_entry(entry, struct ibmvnic_rwi, list);
if (tmp->reset_reason == reason) {
netdev_dbg(netdev, "Skipping matching reset, reason=%d\n",
reason);
spin_unlock_irqrestore(&adapter->rwi_lock, flags);
ret = EBUSY;
goto err;
}
@ -2323,8 +2327,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC);
if (!rwi) {
spin_unlock_irqrestore(&adapter->rwi_lock, flags);
ibmvnic_close(netdev);
ret = ENOMEM;
goto err;
}
@ -2337,12 +2339,17 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter,
}
rwi->reset_reason = reason;
list_add_tail(&rwi->list, &adapter->rwi_list);
spin_unlock_irqrestore(&adapter->rwi_lock, flags);
netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason);
schedule_work(&adapter->ibmvnic_reset);
return 0;
ret = 0;
err:
/* ibmvnic_close() below can block, so drop the lock first */
spin_unlock_irqrestore(&adapter->rwi_lock, flags);
if (ret == ENOMEM)
ibmvnic_close(netdev);
return -ret;
}
@ -2433,6 +2440,8 @@ static int ibmvnic_poll(struct napi_struct *napi, int budget)
offset = be16_to_cpu(next->rx_comp.off_frame_data);
flags = next->rx_comp.flags;
skb = rx_buff->skb;
/* load long_term_buff before copying to skb */
dma_rmb();
skb_copy_to_linear_data(skb, rx_buff->data + offset,
length);
@ -5346,7 +5355,18 @@ static int ibmvnic_remove(struct vio_dev *dev)
unsigned long flags;
spin_lock_irqsave(&adapter->state_lock, flags);
/* If ibmvnic_reset() is scheduling a reset, wait for it to
* finish. Then, set the state to REMOVING to prevent it from
* scheduling any more work and to have reset functions ignore
* any resets that have already been scheduled. Drop the lock
* after setting state, so __ibmvnic_reset() which is called
* from the flush_work() below, can make progress.
*/
spin_lock_irqsave(&adapter->rwi_lock, flags);
adapter->state = VNIC_REMOVING;
spin_unlock_irqrestore(&adapter->rwi_lock, flags);
spin_unlock_irqrestore(&adapter->state_lock, flags);
flush_work(&adapter->ibmvnic_reset);

View File

@ -31,7 +31,7 @@
#define IBMVNIC_BUFFS_PER_POOL 100
#define IBMVNIC_MAX_QUEUES 16
#define IBMVNIC_MAX_QUEUE_SZ 4096
#define IBMVNIC_MAX_IND_DESCS 128
#define IBMVNIC_MAX_IND_DESCS 16
#define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32)
#define IBMVNIC_TSO_BUF_SZ 65536
@ -1084,6 +1084,8 @@ struct ibmvnic_adapter {
/* Used for serializatin of state field */
spinlock_t state_lock;
enum ibmvnic_reset_reason reset_reason;
/* when taking both state and rwi locks, take state lock first */
spinlock_t rwi_lock;
struct list_head rwi_list;
/* Used for serialization of rwi_list */
spinlock_t rwi_lock;

View File

@ -141,6 +141,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change,
return -EOPNOTSUPP;
}
if (mlx5_lag_is_active(dev)) {
NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode\n");
return -EOPNOTSUPP;
}
switch (action) {
case DEVLINK_RELOAD_ACTION_DRIVER_REINIT:
mlx5_unload_one(dev, false);
@ -428,6 +433,10 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id,
NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE");
return -EOPNOTSUPP;
}
if (mlx5_core_is_mp_slave(dev) || mlx5_lag_is_active(dev)) {
NL_SET_ERR_MSG_MOD(extack, "Multi port slave/Lag device can't configure RoCE");
return -EOPNOTSUPP;
}
return 0;
}

View File

@ -12,6 +12,7 @@
#include <net/flow_offload.h>
#include <net/netfilter/nf_flow_table.h>
#include <linux/workqueue.h>
#include <linux/refcount.h>
#include <linux/xarray.h>
#include "lib/fs_chains.h"
@ -52,11 +53,11 @@ struct mlx5_tc_ct_priv {
struct mlx5_flow_table *ct_nat;
struct mlx5_flow_table *post_ct;
struct mutex control_lock; /* guards parallel adds/dels */
struct mutex shared_counter_lock;
struct mapping_ctx *zone_mapping;
struct mapping_ctx *labels_mapping;
enum mlx5_flow_namespace_type ns_type;
struct mlx5_fs_chains *chains;
spinlock_t ht_lock; /* protects ft entries */
};
struct mlx5_ct_flow {
@ -125,6 +126,10 @@ struct mlx5_ct_counter {
bool is_shared;
};
enum {
MLX5_CT_ENTRY_FLAG_VALID,
};
struct mlx5_ct_entry {
struct rhash_head node;
struct rhash_head tuple_node;
@ -135,6 +140,12 @@ struct mlx5_ct_entry {
struct mlx5_ct_tuple tuple;
struct mlx5_ct_tuple tuple_nat;
struct mlx5_ct_zone_rule zone_rules[2];
struct mlx5_tc_ct_priv *ct_priv;
struct work_struct work;
refcount_t refcnt;
unsigned long flags;
};
static const struct rhashtable_params cts_ht_params = {
@ -742,6 +753,87 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
return err;
}
static bool
mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
{
return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
}
static struct mlx5_ct_entry *
mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
{
struct mlx5_ct_entry *entry;
entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
tuples_ht_params);
if (entry && mlx5_tc_ct_entry_valid(entry) &&
refcount_inc_not_zero(&entry->refcnt)) {
return entry;
} else if (!entry) {
entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
tuple, tuples_nat_ht_params);
if (entry && mlx5_tc_ct_entry_valid(entry) &&
refcount_inc_not_zero(&entry->refcnt))
return entry;
}
return entry ? ERR_PTR(-EINVAL) : NULL;
}
static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
{
struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
&entry->tuple_nat_node,
tuples_nat_ht_params);
rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
tuples_ht_params);
}
static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
{
struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
mlx5_tc_ct_entry_del_rules(ct_priv, entry);
spin_lock_bh(&ct_priv->ht_lock);
mlx5_tc_ct_entry_remove_from_tuples(entry);
spin_unlock_bh(&ct_priv->ht_lock);
mlx5_tc_ct_counter_put(ct_priv, entry);
kfree(entry);
}
static void
mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
{
if (!refcount_dec_and_test(&entry->refcnt))
return;
mlx5_tc_ct_entry_del(entry);
}
static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
{
struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
mlx5_tc_ct_entry_del(entry);
}
static void
__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
{
struct mlx5e_priv *priv;
if (!refcount_dec_and_test(&entry->refcnt))
return;
priv = netdev_priv(entry->ct_priv->netdev);
INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
queue_work(priv->wq, &entry->work);
}
static struct mlx5_ct_counter *
mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
{
@ -793,16 +885,26 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
}
/* Use the same counter as the reverse direction */
mutex_lock(&ct_priv->shared_counter_lock);
rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple,
tuples_ht_params);
if (rev_entry) {
if (refcount_inc_not_zero(&rev_entry->counter->refcount)) {
mutex_unlock(&ct_priv->shared_counter_lock);
return rev_entry->counter;
}
spin_lock_bh(&ct_priv->ht_lock);
rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
if (IS_ERR(rev_entry)) {
spin_unlock_bh(&ct_priv->ht_lock);
goto create_counter;
}
mutex_unlock(&ct_priv->shared_counter_lock);
if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
ct_dbg("Using shared counter entry=0x%p rev=0x%p\n", entry, rev_entry);
shared_counter = rev_entry->counter;
spin_unlock_bh(&ct_priv->ht_lock);
mlx5_tc_ct_entry_put(rev_entry);
return shared_counter;
}
spin_unlock_bh(&ct_priv->ht_lock);
create_counter:
shared_counter = mlx5_tc_ct_counter_create(ct_priv);
if (IS_ERR(shared_counter))
@ -865,10 +967,14 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
if (!meta_action)
return -EOPNOTSUPP;
entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
cts_ht_params);
if (entry)
return 0;
spin_lock_bh(&ct_priv->ht_lock);
entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
if (entry && refcount_inc_not_zero(&entry->refcnt)) {
spin_unlock_bh(&ct_priv->ht_lock);
mlx5_tc_ct_entry_put(entry);
return -EEXIST;
}
spin_unlock_bh(&ct_priv->ht_lock);
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
@ -877,6 +983,8 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
entry->tuple.zone = ft->zone;
entry->cookie = flow->cookie;
entry->restore_cookie = meta_action->ct_metadata.cookie;
refcount_set(&entry->refcnt, 2);
entry->ct_priv = ct_priv;
err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
if (err)
@ -887,35 +995,40 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
if (err)
goto err_set;
err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht,
&entry->tuple_node,
tuples_ht_params);
spin_lock_bh(&ct_priv->ht_lock);
err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
cts_ht_params);
if (err)
goto err_entries;
err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
&entry->tuple_node,
tuples_ht_params);
if (err)
goto err_tuple;
if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht,
&entry->tuple_nat_node,
tuples_nat_ht_params);
err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
&entry->tuple_nat_node,
tuples_nat_ht_params);
if (err)
goto err_tuple_nat;
}
spin_unlock_bh(&ct_priv->ht_lock);
err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
ft->zone_restore_id);
if (err)
goto err_rules;
err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node,
cts_ht_params);
if (err)
goto err_insert;
set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
mlx5_tc_ct_entry_put(entry); /* this function reference */
return 0;
err_insert:
mlx5_tc_ct_entry_del_rules(ct_priv, entry);
err_rules:
spin_lock_bh(&ct_priv->ht_lock);
if (mlx5_tc_ct_entry_has_nat(entry))
rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
&entry->tuple_nat_node, tuples_nat_ht_params);
@ -924,47 +1037,43 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
&entry->tuple_node,
tuples_ht_params);
err_tuple:
rhashtable_remove_fast(&ft->ct_entries_ht,
&entry->node,
cts_ht_params);
err_entries:
spin_unlock_bh(&ct_priv->ht_lock);
err_set:
kfree(entry);
netdev_warn(ct_priv->netdev,
"Failed to offload ct entry, err: %d\n", err);
if (err != -EEXIST)
netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
return err;
}
static void
mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5_ct_entry *entry)
{
mlx5_tc_ct_entry_del_rules(ct_priv, entry);
mutex_lock(&ct_priv->shared_counter_lock);
if (mlx5_tc_ct_entry_has_nat(entry))
rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
&entry->tuple_nat_node,
tuples_nat_ht_params);
rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
tuples_ht_params);
mutex_unlock(&ct_priv->shared_counter_lock);
mlx5_tc_ct_counter_put(ct_priv, entry);
}
static int
mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
struct flow_cls_offload *flow)
{
struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
unsigned long cookie = flow->cookie;
struct mlx5_ct_entry *entry;
entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
cts_ht_params);
if (!entry)
spin_lock_bh(&ct_priv->ht_lock);
entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
if (!entry) {
spin_unlock_bh(&ct_priv->ht_lock);
return -ENOENT;
}
mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry);
WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht,
&entry->node,
cts_ht_params));
kfree(entry);
if (!mlx5_tc_ct_entry_valid(entry)) {
spin_unlock_bh(&ct_priv->ht_lock);
return -EINVAL;
}
rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
mlx5_tc_ct_entry_remove_from_tuples(entry);
spin_unlock_bh(&ct_priv->ht_lock);
mlx5_tc_ct_entry_put(entry);
return 0;
}
@ -973,19 +1082,30 @@ static int
mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
struct flow_cls_offload *f)
{
struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
unsigned long cookie = f->cookie;
struct mlx5_ct_entry *entry;
u64 lastuse, packets, bytes;
entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie,
cts_ht_params);
if (!entry)
spin_lock_bh(&ct_priv->ht_lock);
entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
if (!entry) {
spin_unlock_bh(&ct_priv->ht_lock);
return -ENOENT;
}
if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
spin_unlock_bh(&ct_priv->ht_lock);
return -EINVAL;
}
spin_unlock_bh(&ct_priv->ht_lock);
mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
FLOW_ACTION_HW_STATS_DELAYED);
mlx5_tc_ct_entry_put(entry);
return 0;
}
@ -1481,11 +1601,9 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
static void
mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
{
struct mlx5_tc_ct_priv *ct_priv = arg;
struct mlx5_ct_entry *entry = ptr;
mlx5_tc_ct_del_ft_entry(ct_priv, entry);
kfree(entry);
mlx5_tc_ct_entry_put(entry);
}
static void
@ -1962,6 +2080,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
goto err_mapping_labels;
}
spin_lock_init(&ct_priv->ht_lock);
ct_priv->ns_type = ns_type;
ct_priv->chains = chains;
ct_priv->netdev = priv->netdev;
@ -1996,7 +2115,6 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
idr_init(&ct_priv->fte_ids);
mutex_init(&ct_priv->control_lock);
mutex_init(&ct_priv->shared_counter_lock);
rhashtable_init(&ct_priv->zone_ht, &zone_params);
rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
@ -2039,7 +2157,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
rhashtable_destroy(&ct_priv->zone_ht);
mutex_destroy(&ct_priv->control_lock);
mutex_destroy(&ct_priv->shared_counter_lock);
idr_destroy(&ct_priv->fte_ids);
kfree(ct_priv);
}
@ -2061,14 +2178,22 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
return false;
entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple,
tuples_ht_params);
if (!entry)
entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
&tuple, tuples_nat_ht_params);
if (!entry)
spin_lock(&ct_priv->ht_lock);
entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
if (!entry) {
spin_unlock(&ct_priv->ht_lock);
return false;
}
if (IS_ERR(entry)) {
spin_unlock(&ct_priv->ht_lock);
return false;
}
spin_unlock(&ct_priv->ht_lock);
tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
__mlx5_tc_ct_entry_put(entry);
return true;
}

View File

@ -83,7 +83,7 @@ static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv)
clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
/* Let other device's napi(s) and XSK wakeups see our new state. */
synchronize_rcu();
synchronize_net();
}
static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)

View File

@ -111,7 +111,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params,
void mlx5e_close_xsk(struct mlx5e_channel *c)
{
clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
synchronize_rcu(); /* Sync with the XSK wakeup and with NAPI. */
synchronize_net(); /* Sync with the XSK wakeup and with NAPI. */
mlx5e_close_rq(&c->xskrq);
mlx5e_close_cq(&c->xskrq.cq);

View File

@ -173,7 +173,7 @@ static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv,
#endif
#if IS_ENABLED(CONFIG_GENEVE)
if (skb->encapsulation)
if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
mlx5e_tx_tunnel_accel(skb, eseg, ihs);
#endif

View File

@ -57,6 +57,20 @@ struct mlx5e_ktls_offload_context_rx {
struct mlx5e_ktls_rx_resync_ctx resync;
};
static bool mlx5e_ktls_priv_rx_put(struct mlx5e_ktls_offload_context_rx *priv_rx)
{
if (!refcount_dec_and_test(&priv_rx->resync.refcnt))
return false;
kfree(priv_rx);
return true;
}
static void mlx5e_ktls_priv_rx_get(struct mlx5e_ktls_offload_context_rx *priv_rx)
{
refcount_inc(&priv_rx->resync.refcnt);
}
static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn)
{
int err, inlen;
@ -326,7 +340,7 @@ static void resync_handle_work(struct work_struct *work)
priv_rx = container_of(resync, struct mlx5e_ktls_offload_context_rx, resync);
if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) {
refcount_dec(&resync->refcnt);
mlx5e_ktls_priv_rx_put(priv_rx);
return;
}
@ -334,7 +348,7 @@ static void resync_handle_work(struct work_struct *work)
sq = &c->async_icosq;
if (resync_post_get_progress_params(sq, priv_rx))
refcount_dec(&resync->refcnt);
mlx5e_ktls_priv_rx_put(priv_rx);
}
static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync,
@ -377,7 +391,11 @@ static int resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx
return err;
}
/* Function is called with elevated refcount, it decreases it. */
/* Function can be called with the refcount being either elevated or not.
* It decreases the refcount and may free the kTLS priv context.
* Refcount is not elevated only if tls_dev_del has been called, but GET_PSV was
* already in flight.
*/
void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
struct mlx5e_icosq *sq)
{
@ -410,7 +428,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq));
priv_rx->stats->tls_resync_req_end++;
out:
refcount_dec(&resync->refcnt);
mlx5e_ktls_priv_rx_put(priv_rx);
dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
kfree(buf);
}
@ -431,9 +449,9 @@ static bool resync_queue_get_psv(struct sock *sk)
return false;
resync = &priv_rx->resync;
refcount_inc(&resync->refcnt);
mlx5e_ktls_priv_rx_get(priv_rx);
if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work)))
refcount_dec(&resync->refcnt);
mlx5e_ktls_priv_rx_put(priv_rx);
return true;
}
@ -625,31 +643,6 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
return err;
}
/* Elevated refcount on the resync object means there are
* outstanding operations (uncompleted GET_PSV WQEs) that
* will read the resync / priv_rx objects once completed.
* Wait for them to avoid use-after-free.
*/
static void wait_for_resync(struct net_device *netdev,
struct mlx5e_ktls_rx_resync_ctx *resync)
{
#define MLX5E_KTLS_RX_RESYNC_TIMEOUT 20000 /* msecs */
unsigned long exp_time = jiffies + msecs_to_jiffies(MLX5E_KTLS_RX_RESYNC_TIMEOUT);
unsigned int refcnt;
do {
refcnt = refcount_read(&resync->refcnt);
if (refcnt == 1)
return;
msleep(20);
} while (time_before(jiffies, exp_time));
netdev_warn(netdev,
"Failed waiting for kTLS RX resync refcnt to be released (%u).\n",
refcnt);
}
void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
{
struct mlx5e_ktls_offload_context_rx *priv_rx;
@ -663,7 +656,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_ctx);
set_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags);
mlx5e_set_ktls_rx_priv_ctx(tls_ctx, NULL);
synchronize_rcu(); /* Sync with NAPI */
synchronize_net(); /* Sync with NAPI */
if (!cancel_work_sync(&priv_rx->rule.work))
/* completion is needed, as the priv_rx in the add flow
* is maintained on the wqe info (wi), not on the socket.
@ -671,8 +664,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
wait_for_completion(&priv_rx->add_ctx);
resync = &priv_rx->resync;
if (cancel_work_sync(&resync->work))
refcount_dec(&resync->refcnt);
wait_for_resync(netdev, resync);
mlx5e_ktls_priv_rx_put(priv_rx);
priv_rx->stats->tls_del++;
if (priv_rx->rule.rule)
@ -680,5 +672,9 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx)
mlx5_core_destroy_tir(mdev, priv_rx->tirn);
mlx5_ktls_destroy_key(mdev, priv_rx->key_id);
kfree(priv_rx);
/* priv_rx should normally be freed here, but if there is an outstanding
* GET_PSV, deallocation will be delayed until the CQE for GET_PSV is
* processed.
*/
mlx5e_ktls_priv_rx_put(priv_rx);
}

View File

@ -536,7 +536,7 @@ static int mlx5e_get_coalesce(struct net_device *netdev,
#define MLX5E_MAX_COAL_FRAMES MLX5_MAX_CQ_COUNT
static void
mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
{
struct mlx5_core_dev *mdev = priv->mdev;
int tc;
@ -551,6 +551,17 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc
coal->tx_coalesce_usecs,
coal->tx_max_coalesced_frames);
}
}
}
static void
mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
{
struct mlx5_core_dev *mdev = priv->mdev;
int i;
for (i = 0; i < priv->channels.num; ++i) {
struct mlx5e_channel *c = priv->channels.c[i];
mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq,
coal->rx_coalesce_usecs,
@ -597,21 +608,9 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
tx_moder->pkts = coal->tx_max_coalesced_frames;
new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
priv->channels.params = new_channels.params;
goto out;
}
/* we are opened */
reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled;
reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled;
if (!reset_rx && !reset_tx) {
mlx5e_set_priv_channels_coalesce(priv, coal);
priv->channels.params = new_channels.params;
goto out;
}
if (reset_rx) {
u8 mode = MLX5E_GET_PFLAG(&new_channels.params,
MLX5E_PFLAG_RX_CQE_BASED_MODER);
@ -625,6 +624,20 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
mlx5e_reset_tx_moderation(&new_channels.params, mode);
}
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
priv->channels.params = new_channels.params;
goto out;
}
if (!reset_rx && !reset_tx) {
if (!coal->use_adaptive_rx_coalesce)
mlx5e_set_priv_channels_rx_coalesce(priv, coal);
if (!coal->use_adaptive_tx_coalesce)
mlx5e_set_priv_channels_tx_coalesce(priv, coal);
priv->channels.params = new_channels.params;
goto out;
}
err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
out:

View File

@ -67,6 +67,7 @@
#include "en/ptp.h"
#include "qos.h"
#include "en/trap.h"
#include "fpga/ipsec.h"
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
{
@ -108,7 +109,7 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
return false;
if (MLX5_IPSEC_DEV(mdev))
if (mlx5_fpga_is_ipsec_device(mdev))
return false;
if (params->xdp_prog) {
@ -947,7 +948,7 @@ void mlx5e_activate_rq(struct mlx5e_rq *rq)
void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
{
clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
synchronize_rcu(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
}
void mlx5e_close_rq(struct mlx5e_rq *rq)
@ -1401,7 +1402,7 @@ void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
struct mlx5_wq_cyc *wq = &sq->wq;
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
synchronize_rcu(); /* Sync with NAPI to prevent netif_tx_wake_queue. */
synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */
mlx5e_tx_disable_queue(sq->txq);
@ -1476,7 +1477,7 @@ void mlx5e_activate_icosq(struct mlx5e_icosq *icosq)
void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
{
clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
synchronize_rcu(); /* Sync with NAPI. */
synchronize_net(); /* Sync with NAPI. */
}
void mlx5e_close_icosq(struct mlx5e_icosq *sq)
@ -1555,7 +1556,7 @@ void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
struct mlx5e_channel *c = sq->channel;
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
synchronize_rcu(); /* Sync with NAPI. */
synchronize_net(); /* Sync with NAPI. */
mlx5e_destroy_sq(c->mdev, sq->sqn);
mlx5e_free_xdpsq_descs(sq);
@ -1879,12 +1880,12 @@ static int mlx5e_open_queues(struct mlx5e_channel *c,
mlx5e_build_create_cq_param(&ccp, c);
err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp,
err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp,
&c->async_icosq.cq);
if (err)
return err;
err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp,
err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp,
&c->icosq.cq);
if (err)
goto err_close_async_icosq_cq;
@ -2122,7 +2123,12 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
u32 buf_size = 0;
int i;
<<<<<<< HEAD
if (MLX5_IPSEC_DEV(mdev))
=======
#ifdef CONFIG_MLX5_EN_IPSEC
if (mlx5_fpga_is_ipsec_device(mdev))
>>>>>>> 3af409ca278d4a8d50e91f9f7c4c33b175645cf3
byte_count += MLX5E_METADATA_ETHER_LEN;
if (mlx5e_rx_is_linear_skb(params, xsk)) {
@ -4590,8 +4596,9 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
return -EINVAL;
}
if (MLX5_IPSEC_DEV(priv->mdev)) {
netdev_warn(netdev, "can't set XDP with IPSec offload\n");
if (mlx5_fpga_is_ipsec_device(priv->mdev)) {
netdev_warn(netdev,
"XDP is not available on Innova cards with IPsec support\n");
return -EINVAL;
}

View File

@ -1783,8 +1783,8 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool
rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe;
if (MLX5_IPSEC_DEV(mdev)) {
netdev_err(netdev, "MPWQE RQ with IPSec offload not supported\n");
if (mlx5_fpga_is_ipsec_device(mdev)) {
netdev_err(netdev, "MPWQE RQ with Innova IPSec offload not supported\n");
return -EINVAL;
}
if (!rq->handle_rx_cqe) {

View File

@ -4445,7 +4445,7 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
*/
if (rate) {
rate = (rate * BITS_PER_BYTE) + 500000;
rate_mbps = max_t(u32, do_div(rate, 1000000), 1);
rate_mbps = max_t(u64, do_div(rate, 1000000), 1);
}
err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);

View File

@ -124,7 +124,7 @@ struct mlx5_fpga_ipsec {
struct ida halloc;
};
static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
{
if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga))
return false;

View File

@ -43,6 +43,7 @@ u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
const struct mlx5_flow_cmds *
mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
void mlx5_fpga_ipsec_build_fs_cmds(void);
bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev);
#else
static inline
const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev)
@ -55,6 +56,7 @@ mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
}
static inline void mlx5_fpga_ipsec_build_fs_cmds(void) {};
static inline bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) { return false; }
#endif /* CONFIG_MLX5_FPGA_IPSEC */
#endif /* __MLX5_FPGA_IPSEC_H__ */

View File

@ -190,6 +190,16 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
return true;
}
static void enter_error_state(struct mlx5_core_dev *dev, bool force)
{
if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
mlx5_cmd_flush(dev);
}
mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
}
void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
{
bool err_detected = false;
@ -208,12 +218,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
goto unlock;
}
if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
mlx5_cmd_flush(dev);
}
mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
enter_error_state(dev, force);
unlock:
mutex_unlock(&dev->intf_state_mutex);
}
@ -613,7 +618,7 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
priv = container_of(health, struct mlx5_priv, health);
dev = container_of(priv, struct mlx5_core_dev, priv);
mlx5_enter_error_state(dev, false);
enter_error_state(dev, false);
if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
if (mlx5_health_try_recover(dev))
mlx5_core_err(dev, "health recovery failed\n");
@ -707,8 +712,9 @@ static void poll_health(struct timer_list *t)
mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
dev->priv.health.fatal_error = fatal_error;
print_health_info(dev);
dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
mlx5_trigger_health_work(dev);
goto out;
return;
}
count = ioread32be(health->health_counter);

View File

@ -1445,7 +1445,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
pci_save_state(pdev);
devlink_reload_enable(devlink);
if (!mlx5_core_is_mp_slave(dev))
devlink_reload_enable(devlink);
return 0;
err_load_one:

View File

@ -2230,6 +2230,31 @@ static void rtl_prepare_power_down(struct rtl8169_private *tp)
phy_speed_down(tp->phydev, false);
rtl_wol_enable_rx(tp);
}
switch (tp->mac_version) {
case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26:
case RTL_GIGA_MAC_VER_29 ... RTL_GIGA_MAC_VER_30:
case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33:
case RTL_GIGA_MAC_VER_37:
case RTL_GIGA_MAC_VER_39:
case RTL_GIGA_MAC_VER_43:
case RTL_GIGA_MAC_VER_44:
case RTL_GIGA_MAC_VER_45:
case RTL_GIGA_MAC_VER_46:
case RTL_GIGA_MAC_VER_47:
case RTL_GIGA_MAC_VER_48:
case RTL_GIGA_MAC_VER_50 ... RTL_GIGA_MAC_VER_63:
RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
break;
case RTL_GIGA_MAC_VER_40:
case RTL_GIGA_MAC_VER_41:
case RTL_GIGA_MAC_VER_49:
rtl_eri_clear_bits(tp, 0x1a8, 0xfc000000);
RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
break;
default:
break;
}
}
static void rtl_init_rxcfg(struct rtl8169_private *tp)

View File

@ -1862,6 +1862,18 @@ static int axienet_probe(struct platform_device *pdev)
lp->options = XAE_OPTION_DEFAULTS;
lp->rx_bd_num = RX_BD_NUM_DEFAULT;
lp->tx_bd_num = TX_BD_NUM_DEFAULT;
lp->clk = devm_clk_get_optional(&pdev->dev, NULL);
if (IS_ERR(lp->clk)) {
ret = PTR_ERR(lp->clk);
goto free_netdev;
}
ret = clk_prepare_enable(lp->clk);
if (ret) {
dev_err(&pdev->dev, "Unable to enable clock: %d\n", ret);
goto free_netdev;
}
/* Map device registers */
ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
lp->regs = devm_ioremap_resource(&pdev->dev, ethres);
@ -2046,20 +2058,6 @@ static int axienet_probe(struct platform_device *pdev)
lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
if (lp->phy_node) {
lp->clk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(lp->clk)) {
dev_warn(&pdev->dev, "Failed to get clock: %ld\n",
PTR_ERR(lp->clk));
lp->clk = NULL;
} else {
ret = clk_prepare_enable(lp->clk);
if (ret) {
dev_err(&pdev->dev, "Unable to enable clock: %d\n",
ret);
goto free_netdev;
}
}
ret = axienet_mdio_setup(lp);
if (ret)
dev_warn(&pdev->dev,

View File

@ -580,10 +580,10 @@ ipa_resource_config(struct ipa *ipa, const struct ipa_resource_data *data)
return -EINVAL;
for (i = 0; i < data->resource_src_count; i++)
ipa_resource_config_src(ipa, data->resource_src);
ipa_resource_config_src(ipa, &data->resource_src[i]);
for (i = 0; i < data->resource_dst_count; i++)
ipa_resource_config_dst(ipa, data->resource_dst);
ipa_resource_config_dst(ipa, &data->resource_dst[i]);
return 0;
}

View File

@ -300,10 +300,13 @@ static int mdio_bus_phy_resume(struct device *dev)
phydev->suspended_by_mdio_bus = 0;
ret = phy_resume(phydev);
ret = phy_init_hw(phydev);
if (ret < 0)
return ret;
ret = phy_resume(phydev);
if (ret < 0)
return ret;
no_resume:
if (phydev->attached_dev && phydev->adjust_link)
phy_start_machine(phydev);
@ -311,39 +314,8 @@ static int mdio_bus_phy_resume(struct device *dev)
return 0;
}
static int mdio_bus_phy_restore(struct device *dev)
{
struct phy_device *phydev = to_phy_device(dev);
struct net_device *netdev = phydev->attached_dev;
int ret;
if (!netdev)
return 0;
ret = phy_init_hw(phydev);
if (ret < 0)
return ret;
if (phydev->attached_dev && phydev->adjust_link)
phy_start_machine(phydev);
return 0;
}
static const struct dev_pm_ops mdio_bus_phy_pm_ops = {
.suspend = mdio_bus_phy_suspend,
.resume = mdio_bus_phy_resume,
.freeze = mdio_bus_phy_suspend,
.thaw = mdio_bus_phy_resume,
.restore = mdio_bus_phy_restore,
};
#define MDIO_BUS_PHY_PM_OPS (&mdio_bus_phy_pm_ops)
#else
#define MDIO_BUS_PHY_PM_OPS NULL
static SIMPLE_DEV_PM_OPS(mdio_bus_phy_pm_ops, mdio_bus_phy_suspend,
mdio_bus_phy_resume);
#endif /* CONFIG_PM */
/**
@ -554,7 +526,7 @@ static const struct device_type mdio_bus_phy_type = {
.name = "PHY",
.groups = phy_dev_groups,
.release = phy_device_release,
.pm = MDIO_BUS_PHY_PM_OPS,
.pm = pm_ptr(&mdio_bus_phy_pm_ops),
};
static int phy_request_driver_module(struct phy_device *dev, u32 phy_id)
@ -1144,10 +1116,19 @@ int phy_init_hw(struct phy_device *phydev)
if (ret < 0)
return ret;
if (phydev->drv->config_init)
if (phydev->drv->config_init) {
ret = phydev->drv->config_init(phydev);
if (ret < 0)
return ret;
}
return ret;
if (phydev->drv->config_intr) {
ret = phydev->drv->config_intr(phydev);
if (ret < 0)
return ret;
}
return 0;
}
EXPORT_SYMBOL(phy_init_hw);

View File

@ -854,7 +854,7 @@ static int lmc_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
spin_lock_init(&sc->lmc_lock);
pci_set_master(pdev);
printk(KERN_INFO "%s: detected at %lx, irq %d\n", dev->name,
printk(KERN_INFO "hdlc: detected at %lx, irq %d\n",
dev->base_addr, dev->irq);
err = register_hdlc_device(dev);
@ -899,6 +899,8 @@ static int lmc_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
break;
default:
printk(KERN_WARNING "%s: LMC UNKNOWN CARD!\n", dev->name);
unregister_hdlc_device(dev);
return -EIO;
break;
}

View File

@ -5311,7 +5311,7 @@ static void b43_nphy_restore_cal(struct b43_wldev *dev)
for (i = 0; i < 4; i++) {
if (dev->phy.rev >= 3)
table[i] = coef[i];
coef[i] = table[i];
else
coef[i] = 0;
}

View File

@ -86,8 +86,7 @@ static int cxgbit_is_ofld_imm(const struct sk_buff *skb)
if (likely(cxgbit_skcb_flags(skb) & SKCBF_TX_ISO))
length += sizeof(struct cpl_tx_data_iso);
#define MAX_IMM_TX_PKT_LEN 256
return length <= MAX_IMM_TX_PKT_LEN;
return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
}
/*

View File

@ -207,7 +207,7 @@ struct atm_skb_data {
struct atm_vcc *vcc; /* ATM VCC */
unsigned long atm_options; /* ATM layer options */
unsigned int acct_truesize; /* truesize accounted to vcc */
};
} __packed;
#define VCC_HTABLE_SIZE 32

View File

@ -166,6 +166,7 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index,
struct nlattr *est, struct tc_action **a,
const struct tc_action_ops *ops, int bind,
u32 flags);
void tcf_idr_insert_many(struct tc_action *actions[]);
void tcf_idr_cleanup(struct tc_action_net *tn, u32 index);
int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
struct tc_action **a, int bind);

View File

@ -593,6 +593,7 @@ enum {
TCA_FLOWER_KEY_CT_FLAGS_TRACKED = 1 << 3, /* Conntrack has occurred. */
TCA_FLOWER_KEY_CT_FLAGS_INVALID = 1 << 4, /* Conntrack is invalid. */
TCA_FLOWER_KEY_CT_FLAGS_REPLY = 1 << 5, /* Packet is in the reply direction. */
__TCA_FLOWER_KEY_CT_FLAGS_MAX,
};
enum {

View File

@ -11563,7 +11563,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
bool isdiv = BPF_OP(insn->code) == BPF_DIV;
struct bpf_insn *patchlet;
struct bpf_insn chk_and_div[] = {
/* Rx div 0 -> 0 */
/* [R,W]x div 0 -> 0 */
BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
BPF_JNE | BPF_K, insn->src_reg,
0, 2, 0),
@ -11572,16 +11572,18 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
*insn,
};
struct bpf_insn chk_and_mod[] = {
/* Rx mod 0 -> Rx */
/* [R,W]x mod 0 -> [R,W]x */
BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
BPF_JEQ | BPF_K, insn->src_reg,
0, 1, 0),
0, 1 + (is64 ? 0 : 1), 0),
*insn,
BPF_JMP_IMM(BPF_JA, 0, 0, 1),
BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
};
patchlet = isdiv ? chk_and_div : chk_and_mod;
cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
ARRAY_SIZE(chk_and_mod);
ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
if (!new_prog)

View File

@ -1577,8 +1577,8 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
struct sk_buff *skb;
struct net_device *dev;
struct ddpehdr *ddp;
int size;
struct atalk_route *rt;
int size, hard_header_len;
struct atalk_route *rt, *rt_lo = NULL;
int err;
if (flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
@ -1641,7 +1641,22 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
SOCK_DEBUG(sk, "SK %p: Size needed %d, device %s\n",
sk, size, dev->name);
size += dev->hard_header_len;
hard_header_len = dev->hard_header_len;
/* Leave room for loopback hardware header if necessary */
if (usat->sat_addr.s_node == ATADDR_BCAST &&
(dev->flags & IFF_LOOPBACK || !(rt->flags & RTF_GATEWAY))) {
struct atalk_addr at_lo;
at_lo.s_node = 0;
at_lo.s_net = 0;
rt_lo = atrtr_find(&at_lo);
if (rt_lo && rt_lo->dev->hard_header_len > hard_header_len)
hard_header_len = rt_lo->dev->hard_header_len;
}
size += hard_header_len;
release_sock(sk);
skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err);
lock_sock(sk);
@ -1649,7 +1664,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
goto out;
skb_reserve(skb, ddp_dl->header_length);
skb_reserve(skb, dev->hard_header_len);
skb_reserve(skb, hard_header_len);
skb->dev = dev;
SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk);
@ -1700,18 +1715,12 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
/* loop back */
skb_orphan(skb);
if (ddp->deh_dnode == ATADDR_BCAST) {
struct atalk_addr at_lo;
at_lo.s_node = 0;
at_lo.s_net = 0;
rt = atrtr_find(&at_lo);
if (!rt) {
if (!rt_lo) {
kfree_skb(skb);
err = -ENETUNREACH;
goto out;
}
dev = rt->dev;
dev = rt_lo->dev;
skb->dev = dev;
}
ddp_dl->request(ddp_dl, skb, dev->dev_addr);

View File

@ -115,10 +115,7 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
else
skb->ip_summed = CHECKSUM_NONE;
if (in_interrupt())
netif_rx(skb);
else
netif_rx_ni(skb);
netif_rx_any_context(skb);
/* Update statistics. */
priv->netdev->stats.rx_packets++;

View File

@ -1056,6 +1056,9 @@ bool __skb_flow_dissect(const struct net *net,
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
}
__skb_flow_dissect_ipv4(skb, flow_dissector,
target_container, data, iph);
if (ip_is_fragment(iph)) {
key_control->flags |= FLOW_DIS_IS_FRAGMENT;
@ -1072,9 +1075,6 @@ bool __skb_flow_dissect(const struct net *net,
}
}
__skb_flow_dissect_ipv4(skb, flow_dissector,
target_container, data, iph);
break;
}
case htons(ETH_P_IPV6): {

View File

@ -508,8 +508,8 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
u64 snd_data_fin_enable, ack_seq;
unsigned int dss_size = 0;
u64 snd_data_fin_enable;
struct mptcp_ext *mpext;
unsigned int ack_size;
bool ret = false;
@ -541,13 +541,14 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
return ret;
}
ack_seq = READ_ONCE(msk->ack_seq);
if (READ_ONCE(msk->use_64bit_ack)) {
ack_size = TCPOLEN_MPTCP_DSS_ACK64;
opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq);
opts->ext_copy.data_ack = ack_seq;
opts->ext_copy.ack64 = 1;
} else {
ack_size = TCPOLEN_MPTCP_DSS_ACK32;
opts->ext_copy.data_ack32 = (uint32_t)READ_ONCE(msk->ack_seq);
opts->ext_copy.data_ack32 = (uint32_t)ack_seq;
opts->ext_copy.ack64 = 0;
}
opts->ext_copy.use_ack = 1;
@ -918,8 +919,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
msk->wnd_end = new_wnd_end;
/* this assumes mptcp_incoming_options() is invoked after tcp_ack() */
if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)) &&
sk_stream_memory_free(ssk))
if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)))
__mptcp_check_push(sk, ssk);
if (after64(new_snd_una, old_snd_una)) {

View File

@ -363,8 +363,6 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
/* Look for an acknowledged DATA_FIN */
if (mptcp_pending_data_fin_ack(sk)) {
mptcp_stop_timer(sk);
WRITE_ONCE(msk->snd_data_fin_enable, 0);
switch (sk->sk_state) {
@ -458,7 +456,18 @@ static bool mptcp_subflow_cleanup_rbuf(struct sock *ssk)
static void mptcp_cleanup_rbuf(struct mptcp_sock *msk)
{
struct sock *ack_hint = READ_ONCE(msk->ack_hint);
int old_space = READ_ONCE(msk->old_wspace);
struct mptcp_subflow_context *subflow;
struct sock *sk = (struct sock *)msk;
bool cleanup;
/* this is a simple superset of what tcp_cleanup_rbuf() implements
* so that we don't have to acquire the ssk socket lock most of the time
* to do actually nothing
*/
cleanup = __mptcp_space(sk) - old_space >= max(0, old_space);
if (!cleanup)
return;
/* if the hinted ssk is still active, try to use it */
if (likely(ack_hint)) {
@ -1565,6 +1574,9 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
mptcp_set_timeout(sk, ssk);
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
if (msk->snd_data_fin_enable &&
msk->snd_nxt + 1 == msk->write_seq)
mptcp_schedule_work(sk);
@ -1868,7 +1880,7 @@ static void __mptcp_splice_receive_queue(struct sock *sk)
skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue);
}
static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
static bool __mptcp_move_skbs(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
unsigned int moved = 0;
@ -1888,13 +1900,10 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
slowpath = lock_sock_fast(ssk);
mptcp_data_lock(sk);
__mptcp_update_rmem(sk);
done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
mptcp_data_unlock(sk);
if (moved && rcv) {
WRITE_ONCE(msk->rmem_pending, min(rcv, moved));
tcp_cleanup_rbuf(ssk, 1);
WRITE_ONCE(msk->rmem_pending, 0);
}
tcp_cleanup_rbuf(ssk, moved);
unlock_sock_fast(ssk, slowpath);
} while (!done);
@ -1907,6 +1916,7 @@ static bool __mptcp_move_skbs(struct mptcp_sock *msk, unsigned int rcv)
ret |= __mptcp_ofo_queue(msk);
__mptcp_splice_receive_queue(sk);
mptcp_data_unlock(sk);
mptcp_cleanup_rbuf(msk);
}
if (ret)
mptcp_check_data_fin((struct sock *)msk);
@ -1936,7 +1946,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
while (copied < len) {
int bytes_read, old_space;
int bytes_read;
bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied);
if (unlikely(bytes_read < 0)) {
@ -1947,14 +1957,11 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
copied += bytes_read;
if (skb_queue_empty(&msk->receive_queue) &&
__mptcp_move_skbs(msk, len - copied))
continue;
/* be sure to advertise window change */
old_space = READ_ONCE(msk->old_wspace);
if ((tcp_space(sk) - old_space) >= old_space)
mptcp_cleanup_rbuf(msk);
mptcp_cleanup_rbuf(msk);
if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk))
continue;
/* only the master socket status is relevant here. The exit
* conditions mirror closely tcp_recvmsg()
@ -1982,7 +1989,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
/* race breaker: the shutdown could be after the
* previous receive queue check
*/
if (__mptcp_move_skbs(msk, len - copied))
if (__mptcp_move_skbs(msk))
continue;
break;
}
@ -2015,7 +2022,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
/* .. race-breaker: ssk might have gotten new data
* after last __mptcp_move_skbs() returned false.
*/
if (unlikely(__mptcp_move_skbs(msk, 0)))
if (unlikely(__mptcp_move_skbs(msk)))
set_bit(MPTCP_DATA_READY, &msk->flags);
} else if (unlikely(!test_bit(MPTCP_DATA_READY, &msk->flags))) {
/* data to read but mptcp_wait_data() cleared DATA_READY */
@ -2275,6 +2282,7 @@ static void mptcp_worker(struct work_struct *work)
if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
goto unlock;
__mptcp_clean_una(sk);
dfrag = mptcp_rtx_head(sk);
if (!dfrag)
goto unlock;
@ -2943,6 +2951,8 @@ static void mptcp_release_cb(struct sock *sk)
mptcp_push_pending(sk, 0);
spin_lock_bh(&sk->sk_lock.slock);
}
if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
__mptcp_error_report(sk);
/* clear any wmem reservation and errors */
__mptcp_update_wmem(sk);
@ -3319,7 +3329,7 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
struct sock *sk = (struct sock *)msk;
if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN))
return 0;
return EPOLLOUT | EPOLLWRNORM;
if (sk_stream_is_writeable(sk))
return EPOLLOUT | EPOLLWRNORM;
@ -3352,9 +3362,16 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
mask |= mptcp_check_readable(msk);
mask |= mptcp_check_writeable(msk);
}
if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
mask |= EPOLLHUP;
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
/* This barrier is coupled with smp_wmb() in tcp_reset() */
smp_rmb();
if (sk->sk_err)
mask |= EPOLLERR;
return mask;
}

View File

@ -102,6 +102,7 @@
#define MPTCP_WORK_CLOSE_SUBFLOW 5
#define MPTCP_PUSH_PENDING 6
#define MPTCP_CLEAN_UNA 7
#define MPTCP_ERROR_REPORT 8
static inline bool before64(__u64 seq1, __u64 seq2)
{
@ -237,7 +238,6 @@ struct mptcp_sock {
u64 wnd_end;
unsigned long timer_ival;
u32 token;
int rmem_pending;
int rmem_released;
unsigned long flags;
bool can_ack;
@ -301,7 +301,7 @@ static inline struct mptcp_sock *mptcp_sk(const struct sock *sk)
static inline int __mptcp_space(const struct sock *sk)
{
return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_pending);
return tcp_space(sk) + READ_ONCE(mptcp_sk(sk)->rmem_released);
}
static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk)
@ -334,20 +334,13 @@ static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk)
return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
}
static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
if (!before64(msk->snd_nxt, READ_ONCE(msk->snd_una)))
return NULL;
return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list);
}
static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
if (msk->snd_una == READ_ONCE(msk->snd_nxt))
return NULL;
return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
}
@ -436,6 +429,7 @@ struct mptcp_subflow_context {
void (*tcp_data_ready)(struct sock *sk);
void (*tcp_state_change)(struct sock *sk);
void (*tcp_write_space)(struct sock *sk);
void (*tcp_error_report)(struct sock *sk);
struct rcu_head rcu;
};
@ -560,6 +554,7 @@ static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
sk->sk_data_ready = ctx->tcp_data_ready;
sk->sk_state_change = ctx->tcp_state_change;
sk->sk_write_space = ctx->tcp_write_space;
sk->sk_error_report = ctx->tcp_error_report;
inet_csk(sk)->icsk_af_ops = ctx->icsk_af_ops;
}
@ -587,6 +582,7 @@ bool mptcp_finish_join(struct sock *sk);
bool mptcp_schedule_work(struct sock *sk);
void __mptcp_check_push(struct sock *sk, struct sock *ssk);
void __mptcp_data_acked(struct sock *sk);
void __mptcp_error_report(struct sock *sk);
void mptcp_subflow_eof(struct sock *sk);
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
void __mptcp_flush_join_list(struct mptcp_sock *msk);

View File

@ -100,7 +100,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req)
return msk;
}
static int __subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
static void subflow_init_req(struct request_sock *req, const struct sock *sk_listener)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
@ -108,16 +108,6 @@ static int __subflow_init_req(struct request_sock *req, const struct sock *sk_li
subflow_req->mp_join = 0;
subflow_req->msk = NULL;
mptcp_token_init_request(req);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
* TCP option space.
*/
if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
return -EINVAL;
#endif
return 0;
}
static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk)
@ -130,20 +120,23 @@ static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct soc
* Returns an error code if a JOIN has failed and a TCP reset
* should be sent.
*/
static int subflow_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
static int subflow_check_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
{
struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener);
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct mptcp_options_received mp_opt;
int ret;
pr_debug("subflow_req=%p, listener=%p", subflow_req, listener);
ret = __subflow_init_req(req, sk_listener);
if (ret)
return 0;
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
* TCP option space.
*/
if (rcu_access_pointer(tcp_sk(sk_listener)->md5sig_info))
return -EINVAL;
#endif
mptcp_get_options(skb, &mp_opt);
@ -236,10 +229,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
struct mptcp_options_received mp_opt;
int err;
err = __subflow_init_req(req, sk_listener);
if (err)
return err;
subflow_init_req(req, sk_listener);
mptcp_get_options(skb, &mp_opt);
if (mp_opt.mp_capable && mp_opt.mp_join)
@ -279,12 +269,13 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
int err;
tcp_rsk(req)->is_mptcp = 1;
subflow_init_req(req, sk);
dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req);
if (!dst)
return NULL;
err = subflow_init_req(req, sk, skb);
err = subflow_check_req(req, sk, skb);
if (err == 0)
return dst;
@ -304,12 +295,13 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
int err;
tcp_rsk(req)->is_mptcp = 1;
subflow_init_req(req, sk);
dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req);
if (!dst)
return NULL;
err = subflow_init_req(req, sk, skb);
err = subflow_check_req(req, sk, skb);
if (err == 0)
return dst;
@ -1124,6 +1116,46 @@ static void subflow_write_space(struct sock *ssk)
mptcp_write_space(sk);
}
void __mptcp_error_report(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
int err = sock_error(ssk);
if (!err)
continue;
/* only propagate errors on fallen-back sockets or
* on MPC connect
*/
if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
continue;
inet_sk_state_store(sk, inet_sk_state_load(ssk));
sk->sk_err = -err;
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
smp_wmb();
sk->sk_error_report(sk);
break;
}
}
static void subflow_error_report(struct sock *ssk)
{
struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
mptcp_data_lock(sk);
if (!sock_owned_by_user(sk))
__mptcp_error_report(sk);
else
set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags);
mptcp_data_unlock(sk);
}
static struct inet_connection_sock_af_ops *
subflow_default_af_ops(struct sock *sk)
{
@ -1470,9 +1502,11 @@ static int subflow_ulp_init(struct sock *sk)
ctx->tcp_data_ready = sk->sk_data_ready;
ctx->tcp_state_change = sk->sk_state_change;
ctx->tcp_write_space = sk->sk_write_space;
ctx->tcp_error_report = sk->sk_error_report;
sk->sk_data_ready = subflow_data_ready;
sk->sk_write_space = subflow_write_space;
sk->sk_state_change = subflow_state_change;
sk->sk_error_report = subflow_error_report;
out:
return err;
}
@ -1526,6 +1560,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
new_ctx->tcp_state_change = old_ctx->tcp_state_change;
new_ctx->tcp_write_space = old_ctx->tcp_write_space;
new_ctx->tcp_error_report = old_ctx->tcp_error_report;
new_ctx->rel_write_seq = 1;
new_ctx->tcp_sock = newsk;

View File

@ -908,7 +908,7 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
[TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY),
};
static void tcf_idr_insert_many(struct tc_action *actions[])
void tcf_idr_insert_many(struct tc_action *actions[])
{
int i;

View File

@ -3060,6 +3060,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
act->type = exts->type = TCA_OLD_COMPAT;
exts->actions[0] = act;
exts->nr_actions = 1;
tcf_idr_insert_many(exts->actions);
} else if (exts->action && tb[exts->action]) {
int err;

View File

@ -30,6 +30,11 @@
#include <uapi/linux/netfilter/nf_conntrack_common.h>
#define TCA_FLOWER_KEY_CT_FLAGS_MAX \
((__TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) << 1)
#define TCA_FLOWER_KEY_CT_FLAGS_MASK \
(TCA_FLOWER_KEY_CT_FLAGS_MAX - 1)
struct fl_flow_key {
struct flow_dissector_key_meta meta;
struct flow_dissector_key_control control;
@ -690,8 +695,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
[TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED },
[TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED },
[TCA_FLOWER_KEY_CT_STATE] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_CT_STATE_MASK] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_CT_STATE] =
NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK),
[TCA_FLOWER_KEY_CT_STATE_MASK] =
NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK),
[TCA_FLOWER_KEY_CT_ZONE] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_CT_ZONE_MASK] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_CT_MARK] = { .type = NLA_U32 },
@ -1394,12 +1401,33 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
return 0;
}
static int fl_validate_ct_state(u16 state, struct nlattr *tb,
struct netlink_ext_ack *extack)
{
if (state && !(state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED)) {
NL_SET_ERR_MSG_ATTR(extack, tb,
"no trk, so no other flag can be set");
return -EINVAL;
}
if (state & TCA_FLOWER_KEY_CT_FLAGS_NEW &&
state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) {
NL_SET_ERR_MSG_ATTR(extack, tb,
"new and est are mutually exclusive");
return -EINVAL;
}
return 0;
}
static int fl_set_key_ct(struct nlattr **tb,
struct flow_dissector_key_ct *key,
struct flow_dissector_key_ct *mask,
struct netlink_ext_ack *extack)
{
if (tb[TCA_FLOWER_KEY_CT_STATE]) {
int err;
if (!IS_ENABLED(CONFIG_NF_CONNTRACK)) {
NL_SET_ERR_MSG(extack, "Conntrack isn't enabled");
return -EOPNOTSUPP;
@ -1407,6 +1435,13 @@ static int fl_set_key_ct(struct nlattr **tb,
fl_set_key_val(tb, &key->ct_state, TCA_FLOWER_KEY_CT_STATE,
&mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK,
sizeof(key->ct_state));
err = fl_validate_ct_state(mask->ct_state,
tb[TCA_FLOWER_KEY_CT_STATE_MASK],
extack);
if (err)
return err;
}
if (tb[TCA_FLOWER_KEY_CT_ZONE]) {
if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) {

View File

@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
# Create 2 namespaces with two veth peers, and
# forward packets in-between using generic XDP
#
@ -57,12 +57,8 @@ test_xdp_redirect()
ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null
ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null
ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null
local ret1=$?
ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null
local ret2=$?
if [ $ret1 -eq 0 -a $ret2 -eq 0 ]; then
if ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null &&
ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null; then
echo "selftests: test_xdp_redirect $xdpmode [PASS]";
else
ret=1

View File

@ -3,7 +3,7 @@
ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
match_src_ip_test match_ip_flags_test match_pcp_test match_vlan_test \
match_ip_tos_test match_indev_test match_mpls_label_test \
match_ip_tos_test match_indev_testmatch_ip_ttl_test match_mpls_label_test \
match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \
match_mpls_lse_test"
NUM_NETIFS=2
@ -312,6 +312,42 @@ match_ip_tos_test()
log_test "ip_tos match ($tcflags)"
}
match_ip_ttl_test()
{
RET=0
tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
$tcflags dst_ip 192.0.2.2 ip_ttl 63 action drop
tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
$tcflags dst_ip 192.0.2.2 action drop
$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
-t ip "ttl=63" -q
$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
-t ip "ttl=63,mf,frag=256" -q
tc_check_packets "dev $h2 ingress" 102 1
check_fail $? "Matched on the wrong filter (no check on ttl)"
tc_check_packets "dev $h2 ingress" 101 2
check_err $? "Did not match on correct filter (ttl=63)"
$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
-t ip "ttl=255" -q
tc_check_packets "dev $h2 ingress" 101 3
check_fail $? "Matched on a wrong filter (ttl=63)"
tc_check_packets "dev $h2 ingress" 102 1
check_err $? "Did not match on correct filter (no check on ttl)"
tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
log_test "ip_ttl match ($tcflags)"
}
match_indev_test()
{
RET=0