Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

Pull networking fixes from David Miller:

 1) Verify netlink attributes properly in nf_queue, from Eric Dumazet.

 2) Need to bump memory lock rlimit for test_sockmap bpf test, from
    Yonghong Song.

 3) Fix VLAN handling in lan78xx driver, from Dave Stevenson.

 4) Fix uninitialized read in nf_log, from Jann Horn.

 5) Fix raw command length parsing in mlx5, from Alex Vesker.

 6) Cleanup loopback RDS connections upon netns deletion, from Sowmini
    Varadhan.

 7) Fix regressions in FIB rule matching during create, from Jason A.
    Donenfeld and Roopa Prabhu.

 8) Fix mpls ether type detection in nfp, from Pieter Jansen van Vuuren.

 9) More bpfilter build fixes/adjustments from Masahiro Yamada.

10) Fix XDP_{TX,REDIRECT} flushing in various drivers, from Jesper
    Dangaard Brouer.

11) fib_tests.sh file permissions were broken, from Shuah Khan.

12) Make sure BH/preemption is disabled in data path of mac80211, from
    Denis Kenzior.

13) Don't ignore nla_parse_nested() return values in nl80211, from
    Johannes berg.

14) Properly account sock objects ot kmemcg, from Shakeel Butt.

15) Adjustments to setting bpf program permissions to read-only, from
    Daniel Borkmann.

16) TCP Fast Open key endianness was broken, it always took on the host
    endiannness. Whoops. Explicitly make it little endian. From Yuching
    Cheng.

17) Fix prefix route setting for link local addresses in ipv6, from
    David Ahern.

18) Potential Spectre v1 in zatm driver, from Gustavo A. R. Silva.

19) Various bpf sockmap fixes, from John Fastabend.

20) Use after free for GRO with ESP, from Sabrina Dubroca.

21) Passing bogus flags to crypto_alloc_shash() in ipv6 SR code, from
    Eric Biggers.

* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net: (87 commits)
  qede: Adverstise software timestamp caps when PHC is not available.
  qed: Fix use of incorrect size in memcpy call.
  qed: Fix setting of incorrect eswitch mode.
  qed: Limit msix vectors in kdump kernel to the minimum required count.
  ipvlan: call dev_change_flags when ipvlan mode is reset
  ipv6: sr: fix passing wrong flags to crypto_alloc_shash()
  net: fix use-after-free in GRO with ESP
  tcp: prevent bogus FRTO undos with non-SACK flows
  bpf: sockhash, add release routine
  bpf: sockhash fix omitted bucket lock in sock_close
  bpf: sockmap, fix smap_list_map_remove when psock is in many maps
  bpf: sockmap, fix crash when ipv6 sock is added
  net: fib_rules: bring back rule_exists to match rule during add
  hv_netvsc: split sub-channel setup into async and sync
  net: use dev_change_tx_queue_len() for SIOCSIFTXQLEN
  atm: zatm: Fix potential Spectre v1
  s390/qeth: consistently re-enable device features
  s390/qeth: don't clobber buffer on async TX completion
  s390/qeth: avoid using is_multicast_ether_addr_64bits on (u8 *)[6]
  s390/qeth: fix race when setting MAC address
  ...
This commit is contained in:
Linus Torvalds 2018-07-02 11:18:28 -07:00
commit 4e33d7d479
114 changed files with 1248 additions and 623 deletions

View File

@ -507,11 +507,6 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC) $(KBUILD_CFLA
KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
endif endif
ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/cc-can-link.sh $(CC)), y)
CC_CAN_LINK := y
export CC_CAN_LINK
endif
# The expansion should be delayed until arch/$(SRCARCH)/Makefile is included. # The expansion should be delayed until arch/$(SRCARCH)/Makefile is included.
# Some architectures define CROSS_COMPILE in arch/$(SRCARCH)/Makefile. # Some architectures define CROSS_COMPILE in arch/$(SRCARCH)/Makefile.
# CC_VERSION_TEXT is referenced from Kconfig (so it needs export), # CC_VERSION_TEXT is referenced from Kconfig (so it needs export),

View File

@ -1844,7 +1844,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
/* there are 2 passes here */ /* there are 2 passes here */
bpf_jit_dump(prog->len, image_size, 2, ctx.target); bpf_jit_dump(prog->len, image_size, 2, ctx.target);
set_memory_ro((unsigned long)header, header->pages); bpf_jit_binary_lock_ro(header);
prog->bpf_func = (void *)ctx.target; prog->bpf_func = (void *)ctx.target;
prog->jited = 1; prog->jited = 1;
prog->jited_len = image_size; prog->jited_len = image_size;

View File

@ -1286,6 +1286,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
goto free_addrs; goto free_addrs;
} }
if (bpf_jit_prog(&jit, fp)) { if (bpf_jit_prog(&jit, fp)) {
bpf_jit_binary_free(header);
fp = orig_fp; fp = orig_fp;
goto free_addrs; goto free_addrs;
} }

View File

@ -1618,7 +1618,7 @@ static int rx_init(struct atm_dev *dev)
skb_queue_head_init(&iadev->rx_dma_q); skb_queue_head_init(&iadev->rx_dma_q);
iadev->rx_free_desc_qhead = NULL; iadev->rx_free_desc_qhead = NULL;
iadev->rx_open = kcalloc(4, iadev->num_vc, GFP_KERNEL); iadev->rx_open = kcalloc(iadev->num_vc, sizeof(void *), GFP_KERNEL);
if (!iadev->rx_open) { if (!iadev->rx_open) {
printk(KERN_ERR DEV_LABEL "itf %d couldn't get free page\n", printk(KERN_ERR DEV_LABEL "itf %d couldn't get free page\n",
dev->number); dev->number);

View File

@ -1483,6 +1483,8 @@ static int zatm_ioctl(struct atm_dev *dev,unsigned int cmd,void __user *arg)
return -EFAULT; return -EFAULT;
if (pool < 0 || pool > ZATM_LAST_POOL) if (pool < 0 || pool > ZATM_LAST_POOL)
return -EINVAL; return -EINVAL;
pool = array_index_nospec(pool,
ZATM_LAST_POOL + 1);
if (copy_from_user(&info, if (copy_from_user(&info,
&((struct zatm_pool_req __user *) arg)->info, &((struct zatm_pool_req __user *) arg)->info,
sizeof(info))) return -EFAULT; sizeof(info))) return -EFAULT;

View File

@ -6113,7 +6113,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports),
MLX5_CAP_GEN(mdev, num_vhca_ports)); MLX5_CAP_GEN(mdev, num_vhca_ports));
if (MLX5_VPORT_MANAGER(mdev) && if (MLX5_ESWITCH_MANAGER(mdev) &&
mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) { mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0); dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);

View File

@ -207,29 +207,19 @@ void lirc_bpf_free(struct rc_dev *rcdev)
bpf_prog_array_free(rcdev->raw->progs); bpf_prog_array_free(rcdev->raw->progs);
} }
int lirc_prog_attach(const union bpf_attr *attr) int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{ {
struct bpf_prog *prog;
struct rc_dev *rcdev; struct rc_dev *rcdev;
int ret; int ret;
if (attr->attach_flags) if (attr->attach_flags)
return -EINVAL; return -EINVAL;
prog = bpf_prog_get_type(attr->attach_bpf_fd,
BPF_PROG_TYPE_LIRC_MODE2);
if (IS_ERR(prog))
return PTR_ERR(prog);
rcdev = rc_dev_get_from_fd(attr->target_fd); rcdev = rc_dev_get_from_fd(attr->target_fd);
if (IS_ERR(rcdev)) { if (IS_ERR(rcdev))
bpf_prog_put(prog);
return PTR_ERR(rcdev); return PTR_ERR(rcdev);
}
ret = lirc_bpf_attach(rcdev, prog); ret = lirc_bpf_attach(rcdev, prog);
if (ret)
bpf_prog_put(prog);
put_device(&rcdev->dev); put_device(&rcdev->dev);

View File

@ -1897,13 +1897,19 @@ static int alx_resume(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev); struct pci_dev *pdev = to_pci_dev(dev);
struct alx_priv *alx = pci_get_drvdata(pdev); struct alx_priv *alx = pci_get_drvdata(pdev);
struct alx_hw *hw = &alx->hw; struct alx_hw *hw = &alx->hw;
int err;
alx_reset_phy(hw); alx_reset_phy(hw);
if (!netif_running(alx->dev)) if (!netif_running(alx->dev))
return 0; return 0;
netif_device_attach(alx->dev); netif_device_attach(alx->dev);
return __alx_open(alx, true);
rtnl_lock();
err = __alx_open(alx, true);
rtnl_unlock();
return err;
} }
static SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume); static SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume);

View File

@ -1533,6 +1533,7 @@ struct bnx2x {
struct link_vars link_vars; struct link_vars link_vars;
u32 link_cnt; u32 link_cnt;
struct bnx2x_link_report_data last_reported_link; struct bnx2x_link_report_data last_reported_link;
bool force_link_down;
struct mdio_if_info mdio; struct mdio_if_info mdio;

View File

@ -1261,6 +1261,11 @@ void __bnx2x_link_report(struct bnx2x *bp)
{ {
struct bnx2x_link_report_data cur_data; struct bnx2x_link_report_data cur_data;
if (bp->force_link_down) {
bp->link_vars.link_up = 0;
return;
}
/* reread mf_cfg */ /* reread mf_cfg */
if (IS_PF(bp) && !CHIP_IS_E1(bp)) if (IS_PF(bp) && !CHIP_IS_E1(bp))
bnx2x_read_mf_cfg(bp); bnx2x_read_mf_cfg(bp);
@ -2817,6 +2822,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
bp->pending_max = 0; bp->pending_max = 0;
} }
bp->force_link_down = false;
if (bp->port.pmf) { if (bp->port.pmf) {
rc = bnx2x_initial_phy_init(bp, load_mode); rc = bnx2x_initial_phy_init(bp, load_mode);
if (rc) if (rc)

View File

@ -10279,6 +10279,12 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work)
bp->sp_rtnl_state = 0; bp->sp_rtnl_state = 0;
smp_mb(); smp_mb();
/* Immediately indicate link as down */
bp->link_vars.link_up = 0;
bp->force_link_down = true;
netif_carrier_off(bp->dev);
BNX2X_ERR("Indicating link is down due to Tx-timeout\n");
bnx2x_nic_unload(bp, UNLOAD_NORMAL, true); bnx2x_nic_unload(bp, UNLOAD_NORMAL, true);
/* When ret value shows failure of allocation failure, /* When ret value shows failure of allocation failure,
* the nic is rebooted again. If open still fails, a error * the nic is rebooted again. If open still fails, a error

View File

@ -660,7 +660,7 @@ static int cnic_init_id_tbl(struct cnic_id_tbl *id_tbl, u32 size, u32 start_id,
id_tbl->max = size; id_tbl->max = size;
id_tbl->next = next; id_tbl->next = next;
spin_lock_init(&id_tbl->lock); spin_lock_init(&id_tbl->lock);
id_tbl->table = kcalloc(DIV_ROUND_UP(size, 32), 4, GFP_KERNEL); id_tbl->table = kcalloc(BITS_TO_LONGS(size), sizeof(long), GFP_KERNEL);
if (!id_tbl->table) if (!id_tbl->table)
return -ENOMEM; return -ENOMEM;

View File

@ -3726,6 +3726,8 @@ static int at91ether_init(struct platform_device *pdev)
int err; int err;
u32 reg; u32 reg;
bp->queues[0].bp = bp;
dev->netdev_ops = &at91ether_netdev_ops; dev->netdev_ops = &at91ether_netdev_ops;
dev->ethtool_ops = &macb_ethtool_ops; dev->ethtool_ops = &macb_ethtool_ops;

View File

@ -125,6 +125,9 @@ MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms");
/* Default alignment for start of data in an Rx FD */ /* Default alignment for start of data in an Rx FD */
#define DPAA_FD_DATA_ALIGNMENT 16 #define DPAA_FD_DATA_ALIGNMENT 16
/* The DPAA requires 256 bytes reserved and mapped for the SGT */
#define DPAA_SGT_SIZE 256
/* Values for the L3R field of the FM Parse Results /* Values for the L3R field of the FM Parse Results
*/ */
/* L3 Type field: First IP Present IPv4 */ /* L3 Type field: First IP Present IPv4 */
@ -1617,8 +1620,8 @@ static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv,
if (unlikely(qm_fd_get_format(fd) == qm_fd_sg)) { if (unlikely(qm_fd_get_format(fd) == qm_fd_sg)) {
nr_frags = skb_shinfo(skb)->nr_frags; nr_frags = skb_shinfo(skb)->nr_frags;
dma_unmap_single(dev, addr, qm_fd_get_offset(fd) + dma_unmap_single(dev, addr,
sizeof(struct qm_sg_entry) * (1 + nr_frags), qm_fd_get_offset(fd) + DPAA_SGT_SIZE,
dma_dir); dma_dir);
/* The sgt buffer has been allocated with netdev_alloc_frag(), /* The sgt buffer has been allocated with netdev_alloc_frag(),
@ -1903,8 +1906,7 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
void *sgt_buf; void *sgt_buf;
/* get a page frag to store the SGTable */ /* get a page frag to store the SGTable */
sz = SKB_DATA_ALIGN(priv->tx_headroom + sz = SKB_DATA_ALIGN(priv->tx_headroom + DPAA_SGT_SIZE);
sizeof(struct qm_sg_entry) * (1 + nr_frags));
sgt_buf = netdev_alloc_frag(sz); sgt_buf = netdev_alloc_frag(sz);
if (unlikely(!sgt_buf)) { if (unlikely(!sgt_buf)) {
netdev_err(net_dev, "netdev_alloc_frag() failed for size %d\n", netdev_err(net_dev, "netdev_alloc_frag() failed for size %d\n",
@ -1972,9 +1974,8 @@ static int skb_to_sg_fd(struct dpaa_priv *priv,
skbh = (struct sk_buff **)buffer_start; skbh = (struct sk_buff **)buffer_start;
*skbh = skb; *skbh = skb;
addr = dma_map_single(dev, buffer_start, priv->tx_headroom + addr = dma_map_single(dev, buffer_start,
sizeof(struct qm_sg_entry) * (1 + nr_frags), priv->tx_headroom + DPAA_SGT_SIZE, dma_dir);
dma_dir);
if (unlikely(dma_mapping_error(dev, addr))) { if (unlikely(dma_mapping_error(dev, addr))) {
dev_err(dev, "DMA mapping failed"); dev_err(dev, "DMA mapping failed");
err = -EINVAL; err = -EINVAL;

View File

@ -324,6 +324,10 @@ struct fman_port_qmi_regs {
#define HWP_HXS_PHE_REPORT 0x00000800 #define HWP_HXS_PHE_REPORT 0x00000800
#define HWP_HXS_PCAC_PSTAT 0x00000100 #define HWP_HXS_PCAC_PSTAT 0x00000100
#define HWP_HXS_PCAC_PSTOP 0x00000001 #define HWP_HXS_PCAC_PSTOP 0x00000001
#define HWP_HXS_TCP_OFFSET 0xA
#define HWP_HXS_UDP_OFFSET 0xB
#define HWP_HXS_SH_PAD_REM 0x80000000
struct fman_port_hwp_regs { struct fman_port_hwp_regs {
struct { struct {
u32 ssa; /* Soft Sequence Attachment */ u32 ssa; /* Soft Sequence Attachment */
@ -728,6 +732,10 @@ static void init_hwp(struct fman_port *port)
iowrite32be(0xffffffff, &regs->pmda[i].lcv); iowrite32be(0xffffffff, &regs->pmda[i].lcv);
} }
/* Short packet padding removal from checksum calculation */
iowrite32be(HWP_HXS_SH_PAD_REM, &regs->pmda[HWP_HXS_TCP_OFFSET].ssa);
iowrite32be(HWP_HXS_SH_PAD_REM, &regs->pmda[HWP_HXS_UDP_OFFSET].ssa);
start_port_hwp(port); start_port_hwp(port);
} }

View File

@ -439,6 +439,7 @@ static void rx_free_irq(struct hinic_rxq *rxq)
{ {
struct hinic_rq *rq = rxq->rq; struct hinic_rq *rq = rxq->rq;
irq_set_affinity_hint(rq->irq, NULL);
free_irq(rq->irq, rxq); free_irq(rq->irq, rxq);
rx_del_napi(rxq); rx_del_napi(rxq);
} }

View File

@ -2199,9 +2199,10 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
return true; return true;
} }
#define I40E_XDP_PASS 0 #define I40E_XDP_PASS 0
#define I40E_XDP_CONSUMED 1 #define I40E_XDP_CONSUMED BIT(0)
#define I40E_XDP_TX 2 #define I40E_XDP_TX BIT(1)
#define I40E_XDP_REDIR BIT(2)
static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf, static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
struct i40e_ring *xdp_ring); struct i40e_ring *xdp_ring);
@ -2248,7 +2249,7 @@ static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
break; break;
case XDP_REDIRECT: case XDP_REDIRECT:
err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
result = !err ? I40E_XDP_TX : I40E_XDP_CONSUMED; result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
break; break;
default: default:
bpf_warn_invalid_xdp_action(act); bpf_warn_invalid_xdp_action(act);
@ -2311,7 +2312,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
unsigned int total_rx_bytes = 0, total_rx_packets = 0; unsigned int total_rx_bytes = 0, total_rx_packets = 0;
struct sk_buff *skb = rx_ring->skb; struct sk_buff *skb = rx_ring->skb;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring); u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
bool failure = false, xdp_xmit = false; unsigned int xdp_xmit = 0;
bool failure = false;
struct xdp_buff xdp; struct xdp_buff xdp;
xdp.rxq = &rx_ring->xdp_rxq; xdp.rxq = &rx_ring->xdp_rxq;
@ -2372,8 +2374,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
} }
if (IS_ERR(skb)) { if (IS_ERR(skb)) {
if (PTR_ERR(skb) == -I40E_XDP_TX) { unsigned int xdp_res = -PTR_ERR(skb);
xdp_xmit = true;
if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) {
xdp_xmit |= xdp_res;
i40e_rx_buffer_flip(rx_ring, rx_buffer, size); i40e_rx_buffer_flip(rx_ring, rx_buffer, size);
} else { } else {
rx_buffer->pagecnt_bias++; rx_buffer->pagecnt_bias++;
@ -2427,12 +2431,14 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
total_rx_packets++; total_rx_packets++;
} }
if (xdp_xmit) { if (xdp_xmit & I40E_XDP_REDIR)
xdp_do_flush_map();
if (xdp_xmit & I40E_XDP_TX) {
struct i40e_ring *xdp_ring = struct i40e_ring *xdp_ring =
rx_ring->vsi->xdp_rings[rx_ring->queue_index]; rx_ring->vsi->xdp_rings[rx_ring->queue_index];
i40e_xdp_ring_update_tail(xdp_ring); i40e_xdp_ring_update_tail(xdp_ring);
xdp_do_flush_map();
} }
rx_ring->skb = skb; rx_ring->skb = skb;

View File

@ -2186,9 +2186,10 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
return skb; return skb;
} }
#define IXGBE_XDP_PASS 0 #define IXGBE_XDP_PASS 0
#define IXGBE_XDP_CONSUMED 1 #define IXGBE_XDP_CONSUMED BIT(0)
#define IXGBE_XDP_TX 2 #define IXGBE_XDP_TX BIT(1)
#define IXGBE_XDP_REDIR BIT(2)
static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter, static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
struct xdp_frame *xdpf); struct xdp_frame *xdpf);
@ -2225,7 +2226,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
case XDP_REDIRECT: case XDP_REDIRECT:
err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog); err = xdp_do_redirect(adapter->netdev, xdp, xdp_prog);
if (!err) if (!err)
result = IXGBE_XDP_TX; result = IXGBE_XDP_REDIR;
else else
result = IXGBE_XDP_CONSUMED; result = IXGBE_XDP_CONSUMED;
break; break;
@ -2285,7 +2286,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
unsigned int mss = 0; unsigned int mss = 0;
#endif /* IXGBE_FCOE */ #endif /* IXGBE_FCOE */
u16 cleaned_count = ixgbe_desc_unused(rx_ring); u16 cleaned_count = ixgbe_desc_unused(rx_ring);
bool xdp_xmit = false; unsigned int xdp_xmit = 0;
struct xdp_buff xdp; struct xdp_buff xdp;
xdp.rxq = &rx_ring->xdp_rxq; xdp.rxq = &rx_ring->xdp_rxq;
@ -2328,8 +2329,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
} }
if (IS_ERR(skb)) { if (IS_ERR(skb)) {
if (PTR_ERR(skb) == -IXGBE_XDP_TX) { unsigned int xdp_res = -PTR_ERR(skb);
xdp_xmit = true;
if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) {
xdp_xmit |= xdp_res;
ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size); ixgbe_rx_buffer_flip(rx_ring, rx_buffer, size);
} else { } else {
rx_buffer->pagecnt_bias++; rx_buffer->pagecnt_bias++;
@ -2401,7 +2404,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
total_rx_packets++; total_rx_packets++;
} }
if (xdp_xmit) { if (xdp_xmit & IXGBE_XDP_REDIR)
xdp_do_flush_map();
if (xdp_xmit & IXGBE_XDP_TX) {
struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()]; struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
/* Force memory writes to complete before letting h/w /* Force memory writes to complete before letting h/w
@ -2409,8 +2415,6 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
*/ */
wmb(); wmb();
writel(ring->next_to_use, ring->tail); writel(ring->next_to_use, ring->tail);
xdp_do_flush_map();
} }
u64_stats_update_begin(&rx_ring->syncp); u64_stats_update_begin(&rx_ring->syncp);

View File

@ -807,6 +807,7 @@ static void cmd_work_handler(struct work_struct *work)
unsigned long flags; unsigned long flags;
bool poll_cmd = ent->polling; bool poll_cmd = ent->polling;
int alloc_ret; int alloc_ret;
int cmd_mode;
sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem;
down(sem); down(sem);
@ -853,6 +854,7 @@ static void cmd_work_handler(struct work_struct *work)
set_signature(ent, !cmd->checksum_disabled); set_signature(ent, !cmd->checksum_disabled);
dump_command(dev, ent, 1); dump_command(dev, ent, 1);
ent->ts1 = ktime_get_ns(); ent->ts1 = ktime_get_ns();
cmd_mode = cmd->mode;
if (ent->callback) if (ent->callback)
schedule_delayed_work(&ent->cb_timeout_work, cb_timeout); schedule_delayed_work(&ent->cb_timeout_work, cb_timeout);
@ -877,7 +879,7 @@ static void cmd_work_handler(struct work_struct *work)
iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell); iowrite32be(1 << ent->idx, &dev->iseg->cmd_dbell);
mmiowb(); mmiowb();
/* if not in polling don't use ent after this point */ /* if not in polling don't use ent after this point */
if (cmd->mode == CMD_MODE_POLLING || poll_cmd) { if (cmd_mode == CMD_MODE_POLLING || poll_cmd) {
poll_timeout(ent); poll_timeout(ent);
/* make sure we read the descriptor after ownership is SW */ /* make sure we read the descriptor after ownership is SW */
rmb(); rmb();
@ -1276,7 +1278,7 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf,
{ {
struct mlx5_core_dev *dev = filp->private_data; struct mlx5_core_dev *dev = filp->private_data;
struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; struct mlx5_cmd_debug *dbg = &dev->cmd.dbg;
char outlen_str[8]; char outlen_str[8] = {0};
int outlen; int outlen;
void *ptr; void *ptr;
int err; int err;
@ -1291,8 +1293,6 @@ static ssize_t outlen_write(struct file *filp, const char __user *buf,
if (copy_from_user(outlen_str, buf, count)) if (copy_from_user(outlen_str, buf, count))
return -EFAULT; return -EFAULT;
outlen_str[7] = 0;
err = sscanf(outlen_str, "%d", &outlen); err = sscanf(outlen_str, "%d", &outlen);
if (err < 0) if (err < 0)
return err; return err;

View File

@ -2846,7 +2846,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
mlx5e_activate_channels(&priv->channels); mlx5e_activate_channels(&priv->channels);
netif_tx_start_all_queues(priv->netdev); netif_tx_start_all_queues(priv->netdev);
if (MLX5_VPORT_MANAGER(priv->mdev)) if (MLX5_ESWITCH_MANAGER(priv->mdev))
mlx5e_add_sqs_fwd_rules(priv); mlx5e_add_sqs_fwd_rules(priv);
mlx5e_wait_channels_min_rx_wqes(&priv->channels); mlx5e_wait_channels_min_rx_wqes(&priv->channels);
@ -2857,7 +2857,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
{ {
mlx5e_redirect_rqts_to_drop(priv); mlx5e_redirect_rqts_to_drop(priv);
if (MLX5_VPORT_MANAGER(priv->mdev)) if (MLX5_ESWITCH_MANAGER(priv->mdev))
mlx5e_remove_sqs_fwd_rules(priv); mlx5e_remove_sqs_fwd_rules(priv);
/* FIXME: This is a W/A only for tx timeout watch dog false alarm when /* FIXME: This is a W/A only for tx timeout watch dog false alarm when
@ -4597,7 +4597,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
mlx5e_set_netdev_dev_addr(netdev); mlx5e_set_netdev_dev_addr(netdev);
#if IS_ENABLED(CONFIG_MLX5_ESWITCH) #if IS_ENABLED(CONFIG_MLX5_ESWITCH)
if (MLX5_VPORT_MANAGER(mdev)) if (MLX5_ESWITCH_MANAGER(mdev))
netdev->switchdev_ops = &mlx5e_switchdev_ops; netdev->switchdev_ops = &mlx5e_switchdev_ops;
#endif #endif
@ -4753,7 +4753,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
mlx5e_enable_async_events(priv); mlx5e_enable_async_events(priv);
if (MLX5_VPORT_MANAGER(priv->mdev)) if (MLX5_ESWITCH_MANAGER(priv->mdev))
mlx5e_register_vport_reps(priv); mlx5e_register_vport_reps(priv);
if (netdev->reg_state != NETREG_REGISTERED) if (netdev->reg_state != NETREG_REGISTERED)
@ -4788,7 +4788,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
queue_work(priv->wq, &priv->set_rx_mode_work); queue_work(priv->wq, &priv->set_rx_mode_work);
if (MLX5_VPORT_MANAGER(priv->mdev)) if (MLX5_ESWITCH_MANAGER(priv->mdev))
mlx5e_unregister_vport_reps(priv); mlx5e_unregister_vport_reps(priv);
mlx5e_disable_async_events(priv); mlx5e_disable_async_events(priv);
@ -4972,7 +4972,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
return NULL; return NULL;
#ifdef CONFIG_MLX5_ESWITCH #ifdef CONFIG_MLX5_ESWITCH
if (MLX5_VPORT_MANAGER(mdev)) { if (MLX5_ESWITCH_MANAGER(mdev)) {
rpriv = mlx5e_alloc_nic_rep_priv(mdev); rpriv = mlx5e_alloc_nic_rep_priv(mdev);
if (!rpriv) { if (!rpriv) {
mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n"); mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n");

View File

@ -823,7 +823,7 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep; struct mlx5_eswitch_rep *rep;
if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) if (!MLX5_ESWITCH_MANAGER(priv->mdev))
return false; return false;
rep = rpriv->rep; rep = rpriv->rep;
@ -837,8 +837,12 @@ bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
{ {
struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch_rep *rep;
if (!MLX5_ESWITCH_MANAGER(priv->mdev))
return false;
rep = rpriv->rep;
if (rep && rep->vport != FDB_UPLINK_VPORT) if (rep && rep->vport != FDB_UPLINK_VPORT)
return true; return true;

View File

@ -1594,17 +1594,15 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
} }
/* Public E-Switch API */ /* Public E-Switch API */
#define ESW_ALLOWED(esw) ((esw) && MLX5_VPORT_MANAGER((esw)->dev)) #define ESW_ALLOWED(esw) ((esw) && MLX5_ESWITCH_MANAGER((esw)->dev))
int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
{ {
int err; int err;
int i, enabled_events; int i, enabled_events;
if (!ESW_ALLOWED(esw)) if (!ESW_ALLOWED(esw) ||
return 0;
if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) ||
!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) { !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n"); esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
return -EOPNOTSUPP; return -EOPNOTSUPP;
@ -1806,7 +1804,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
u64 node_guid; u64 node_guid;
int err = 0; int err = 0;
if (!ESW_ALLOWED(esw)) if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
return -EPERM; return -EPERM;
if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac)) if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac))
return -EINVAL; return -EINVAL;
@ -1883,7 +1881,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
{ {
struct mlx5_vport *evport; struct mlx5_vport *evport;
if (!ESW_ALLOWED(esw)) if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
return -EPERM; return -EPERM;
if (!LEGAL_VPORT(esw, vport)) if (!LEGAL_VPORT(esw, vport))
return -EINVAL; return -EINVAL;

View File

@ -1079,8 +1079,8 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink)
if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (!MLX5_CAP_GEN(dev, vport_group_manager)) if(!MLX5_ESWITCH_MANAGER(dev))
return -EOPNOTSUPP; return -EPERM;
if (dev->priv.eswitch->mode == SRIOV_NONE) if (dev->priv.eswitch->mode == SRIOV_NONE)
return -EOPNOTSUPP; return -EOPNOTSUPP;

View File

@ -32,6 +32,7 @@
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/mlx5/driver.h> #include <linux/mlx5/driver.h>
#include <linux/mlx5/eswitch.h>
#include "mlx5_core.h" #include "mlx5_core.h"
#include "fs_core.h" #include "fs_core.h"
@ -2652,7 +2653,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
goto err; goto err;
} }
if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { if (MLX5_ESWITCH_MANAGER(dev)) {
if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) { if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
err = init_fdb_root_ns(steering); err = init_fdb_root_ns(steering);
if (err) if (err)

View File

@ -32,6 +32,7 @@
#include <linux/mlx5/driver.h> #include <linux/mlx5/driver.h>
#include <linux/mlx5/cmd.h> #include <linux/mlx5/cmd.h>
#include <linux/mlx5/eswitch.h>
#include <linux/module.h> #include <linux/module.h>
#include "mlx5_core.h" #include "mlx5_core.h"
#include "../../mlxfw/mlxfw.h" #include "../../mlxfw/mlxfw.h"
@ -159,13 +160,13 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
} }
if (MLX5_CAP_GEN(dev, vport_group_manager) && if (MLX5_CAP_GEN(dev, vport_group_manager) &&
MLX5_CAP_GEN(dev, eswitch_flow_table)) { MLX5_ESWITCH_MANAGER(dev)) {
err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE); err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH_FLOW_TABLE);
if (err) if (err)
return err; return err;
} }
if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { if (MLX5_ESWITCH_MANAGER(dev)) {
err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH); err = mlx5_core_get_caps(dev, MLX5_CAP_ESWITCH);
if (err) if (err)
return err; return err;

View File

@ -33,6 +33,7 @@
#include <linux/etherdevice.h> #include <linux/etherdevice.h>
#include <linux/mlx5/driver.h> #include <linux/mlx5/driver.h>
#include <linux/mlx5/mlx5_ifc.h> #include <linux/mlx5/mlx5_ifc.h>
#include <linux/mlx5/eswitch.h>
#include "mlx5_core.h" #include "mlx5_core.h"
#include "lib/mpfs.h" #include "lib/mpfs.h"
@ -98,7 +99,7 @@ int mlx5_mpfs_init(struct mlx5_core_dev *dev)
int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
struct mlx5_mpfs *mpfs; struct mlx5_mpfs *mpfs;
if (!MLX5_VPORT_MANAGER(dev)) if (!MLX5_ESWITCH_MANAGER(dev))
return 0; return 0;
mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL); mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL);
@ -122,7 +123,7 @@ void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev)
{ {
struct mlx5_mpfs *mpfs = dev->priv.mpfs; struct mlx5_mpfs *mpfs = dev->priv.mpfs;
if (!MLX5_VPORT_MANAGER(dev)) if (!MLX5_ESWITCH_MANAGER(dev))
return; return;
WARN_ON(!hlist_empty(mpfs->hash)); WARN_ON(!hlist_empty(mpfs->hash));
@ -137,7 +138,7 @@ int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac)
u32 index; u32 index;
int err; int err;
if (!MLX5_VPORT_MANAGER(dev)) if (!MLX5_ESWITCH_MANAGER(dev))
return 0; return 0;
mutex_lock(&mpfs->lock); mutex_lock(&mpfs->lock);
@ -179,7 +180,7 @@ int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac)
int err = 0; int err = 0;
u32 index; u32 index;
if (!MLX5_VPORT_MANAGER(dev)) if (!MLX5_ESWITCH_MANAGER(dev))
return 0; return 0;
mutex_lock(&mpfs->lock); mutex_lock(&mpfs->lock);

View File

@ -701,7 +701,7 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_prio_tc);
static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in, static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
int inlen) int inlen)
{ {
u32 out[MLX5_ST_SZ_DW(qtct_reg)]; u32 out[MLX5_ST_SZ_DW(qetc_reg)];
if (!MLX5_CAP_GEN(mdev, ets)) if (!MLX5_CAP_GEN(mdev, ets))
return -EOPNOTSUPP; return -EOPNOTSUPP;
@ -713,7 +713,7 @@ static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out, static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out,
int outlen) int outlen)
{ {
u32 in[MLX5_ST_SZ_DW(qtct_reg)]; u32 in[MLX5_ST_SZ_DW(qetc_reg)];
if (!MLX5_CAP_GEN(mdev, ets)) if (!MLX5_CAP_GEN(mdev, ets))
return -EOPNOTSUPP; return -EOPNOTSUPP;

View File

@ -88,6 +88,9 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
return -EBUSY; return -EBUSY;
} }
if (!MLX5_ESWITCH_MANAGER(dev))
goto enable_vfs_hca;
err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY);
if (err) { if (err) {
mlx5_core_warn(dev, mlx5_core_warn(dev,
@ -95,6 +98,7 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
return err; return err;
} }
enable_vfs_hca:
for (vf = 0; vf < num_vfs; vf++) { for (vf = 0; vf < num_vfs; vf++) {
err = mlx5_core_enable_hca(dev, vf + 1); err = mlx5_core_enable_hca(dev, vf + 1);
if (err) { if (err) {
@ -140,7 +144,8 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev)
} }
out: out:
mlx5_eswitch_disable_sriov(dev->priv.eswitch); if (MLX5_ESWITCH_MANAGER(dev))
mlx5_eswitch_disable_sriov(dev->priv.eswitch);
if (mlx5_wait_for_vf_pages(dev)) if (mlx5_wait_for_vf_pages(dev))
mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); mlx5_core_warn(dev, "timeout reclaiming VFs pages\n");

View File

@ -549,8 +549,6 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
return -EINVAL; return -EINVAL;
if (!MLX5_CAP_GEN(mdev, vport_group_manager)) if (!MLX5_CAP_GEN(mdev, vport_group_manager))
return -EACCES; return -EACCES;
if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify))
return -EOPNOTSUPP;
in = kvzalloc(inlen, GFP_KERNEL); in = kvzalloc(inlen, GFP_KERNEL);
if (!in) if (!in)

View File

@ -81,10 +81,10 @@ nfp_bpf_xdp_offload(struct nfp_app *app, struct nfp_net *nn,
ret = nfp_net_bpf_offload(nn, prog, running, extack); ret = nfp_net_bpf_offload(nn, prog, running, extack);
/* Stop offload if replace not possible */ /* Stop offload if replace not possible */
if (ret && prog) if (ret)
nfp_bpf_xdp_offload(app, nn, NULL, extack); return ret;
nn->dp.bpf_offload_xdp = prog && !ret; nn->dp.bpf_offload_xdp = !!prog;
return ret; return ret;
} }
@ -202,6 +202,9 @@ static int nfp_bpf_setup_tc_block(struct net_device *netdev,
if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (tcf_block_shared(f->block))
return -EOPNOTSUPP;
switch (f->command) { switch (f->command) {
case TC_BLOCK_BIND: case TC_BLOCK_BIND:
return tcf_block_cb_register(f->block, return tcf_block_cb_register(f->block,

View File

@ -123,6 +123,20 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *frame,
NFP_FLOWER_MASK_MPLS_Q; NFP_FLOWER_MASK_MPLS_Q;
frame->mpls_lse = cpu_to_be32(t_mpls); frame->mpls_lse = cpu_to_be32(t_mpls);
} else if (dissector_uses_key(flow->dissector,
FLOW_DISSECTOR_KEY_BASIC)) {
/* Check for mpls ether type and set NFP_FLOWER_MASK_MPLS_Q
* bit, which indicates an mpls ether type but without any
* mpls fields.
*/
struct flow_dissector_key_basic *key_basic;
key_basic = skb_flow_dissector_target(flow->dissector,
FLOW_DISSECTOR_KEY_BASIC,
flow->key);
if (key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_UC) ||
key_basic->n_proto == cpu_to_be16(ETH_P_MPLS_MC))
frame->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
} }
} }

View File

@ -264,6 +264,14 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
case cpu_to_be16(ETH_P_ARP): case cpu_to_be16(ETH_P_ARP):
return -EOPNOTSUPP; return -EOPNOTSUPP;
case cpu_to_be16(ETH_P_MPLS_UC):
case cpu_to_be16(ETH_P_MPLS_MC):
if (!(key_layer & NFP_FLOWER_LAYER_MAC)) {
key_layer |= NFP_FLOWER_LAYER_MAC;
key_size += sizeof(struct nfp_flower_mac_mpls);
}
break;
/* Will be included in layer 2. */ /* Will be included in layer 2. */
case cpu_to_be16(ETH_P_8021Q): case cpu_to_be16(ETH_P_8021Q):
break; break;
@ -623,6 +631,9 @@ static int nfp_flower_setup_tc_block(struct net_device *netdev,
if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (tcf_block_shared(f->block))
return -EOPNOTSUPP;
switch (f->command) { switch (f->command) {
case TC_BLOCK_BIND: case TC_BLOCK_BIND:
return tcf_block_cb_register(f->block, return tcf_block_cb_register(f->block,

View File

@ -232,7 +232,7 @@ struct nfp_nffw_info *nfp_nffw_info_open(struct nfp_cpp *cpp)
err = nfp_cpp_read(cpp, nfp_resource_cpp_id(state->res), err = nfp_cpp_read(cpp, nfp_resource_cpp_id(state->res),
nfp_resource_address(state->res), nfp_resource_address(state->res),
fwinf, sizeof(*fwinf)); fwinf, sizeof(*fwinf));
if (err < sizeof(*fwinf)) if (err < (int)sizeof(*fwinf))
goto err_release; goto err_release;
if (!nffw_res_flg_init_get(fwinf)) if (!nffw_res_flg_init_get(fwinf))

View File

@ -709,9 +709,9 @@ qed_dcbx_get_local_lldp_params(struct qed_hwfn *p_hwfn,
p_local = &p_hwfn->p_dcbx_info->lldp_local[LLDP_NEAREST_BRIDGE]; p_local = &p_hwfn->p_dcbx_info->lldp_local[LLDP_NEAREST_BRIDGE];
memcpy(params->lldp_local.local_chassis_id, p_local->local_chassis_id, memcpy(params->lldp_local.local_chassis_id, p_local->local_chassis_id,
ARRAY_SIZE(p_local->local_chassis_id)); sizeof(p_local->local_chassis_id));
memcpy(params->lldp_local.local_port_id, p_local->local_port_id, memcpy(params->lldp_local.local_port_id, p_local->local_port_id,
ARRAY_SIZE(p_local->local_port_id)); sizeof(p_local->local_port_id));
} }
static void static void
@ -723,9 +723,9 @@ qed_dcbx_get_remote_lldp_params(struct qed_hwfn *p_hwfn,
p_remote = &p_hwfn->p_dcbx_info->lldp_remote[LLDP_NEAREST_BRIDGE]; p_remote = &p_hwfn->p_dcbx_info->lldp_remote[LLDP_NEAREST_BRIDGE];
memcpy(params->lldp_remote.peer_chassis_id, p_remote->peer_chassis_id, memcpy(params->lldp_remote.peer_chassis_id, p_remote->peer_chassis_id,
ARRAY_SIZE(p_remote->peer_chassis_id)); sizeof(p_remote->peer_chassis_id));
memcpy(params->lldp_remote.peer_port_id, p_remote->peer_port_id, memcpy(params->lldp_remote.peer_port_id, p_remote->peer_port_id,
ARRAY_SIZE(p_remote->peer_port_id)); sizeof(p_remote->peer_port_id));
} }
static int static int

View File

@ -1804,7 +1804,7 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
DP_INFO(p_hwfn, "Failed to update driver state\n"); DP_INFO(p_hwfn, "Failed to update driver state\n");
rc = qed_mcp_ov_update_eswitch(p_hwfn, p_hwfn->p_main_ptt, rc = qed_mcp_ov_update_eswitch(p_hwfn, p_hwfn->p_main_ptt,
QED_OV_ESWITCH_VEB); QED_OV_ESWITCH_NONE);
if (rc) if (rc)
DP_INFO(p_hwfn, "Failed to update eswitch mode\n"); DP_INFO(p_hwfn, "Failed to update eswitch mode\n");
} }

View File

@ -789,6 +789,14 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev,
/* We want a minimum of one slowpath and one fastpath vector per hwfn */ /* We want a minimum of one slowpath and one fastpath vector per hwfn */
cdev->int_params.in.min_msix_cnt = cdev->num_hwfns * 2; cdev->int_params.in.min_msix_cnt = cdev->num_hwfns * 2;
if (is_kdump_kernel()) {
DP_INFO(cdev,
"Kdump kernel: Limit the max number of requested MSI-X vectors to %hd\n",
cdev->int_params.in.min_msix_cnt);
cdev->int_params.in.num_vectors =
cdev->int_params.in.min_msix_cnt;
}
rc = qed_set_int_mode(cdev, false); rc = qed_set_int_mode(cdev, false);
if (rc) { if (rc) {
DP_ERR(cdev, "qed_slowpath_setup_int ERR\n"); DP_ERR(cdev, "qed_slowpath_setup_int ERR\n");

View File

@ -4513,6 +4513,8 @@ static void qed_sriov_enable_qid_config(struct qed_hwfn *hwfn,
static int qed_sriov_enable(struct qed_dev *cdev, int num) static int qed_sriov_enable(struct qed_dev *cdev, int num)
{ {
struct qed_iov_vf_init_params params; struct qed_iov_vf_init_params params;
struct qed_hwfn *hwfn;
struct qed_ptt *ptt;
int i, j, rc; int i, j, rc;
if (num >= RESC_NUM(&cdev->hwfns[0], QED_VPORT)) { if (num >= RESC_NUM(&cdev->hwfns[0], QED_VPORT)) {
@ -4525,8 +4527,8 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num)
/* Initialize HW for VF access */ /* Initialize HW for VF access */
for_each_hwfn(cdev, j) { for_each_hwfn(cdev, j) {
struct qed_hwfn *hwfn = &cdev->hwfns[j]; hwfn = &cdev->hwfns[j];
struct qed_ptt *ptt = qed_ptt_acquire(hwfn); ptt = qed_ptt_acquire(hwfn);
/* Make sure not to use more than 16 queues per VF */ /* Make sure not to use more than 16 queues per VF */
params.num_queues = min_t(int, params.num_queues = min_t(int,
@ -4562,6 +4564,19 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num)
goto err; goto err;
} }
hwfn = QED_LEADING_HWFN(cdev);
ptt = qed_ptt_acquire(hwfn);
if (!ptt) {
DP_ERR(hwfn, "Failed to acquire ptt\n");
rc = -EBUSY;
goto err;
}
rc = qed_mcp_ov_update_eswitch(hwfn, ptt, QED_OV_ESWITCH_VEB);
if (rc)
DP_INFO(cdev, "Failed to update eswitch mode\n");
qed_ptt_release(hwfn, ptt);
return num; return num;
err: err:

View File

@ -337,8 +337,14 @@ int qede_ptp_get_ts_info(struct qede_dev *edev, struct ethtool_ts_info *info)
{ {
struct qede_ptp *ptp = edev->ptp; struct qede_ptp *ptp = edev->ptp;
if (!ptp) if (!ptp) {
return -EIO; info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
SOF_TIMESTAMPING_RX_SOFTWARE |
SOF_TIMESTAMPING_SOFTWARE;
info->phc_index = -1;
return 0;
}
info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE |

View File

@ -2794,6 +2794,7 @@ int efx_farch_filter_table_probe(struct efx_nic *efx)
if (!state) if (!state)
return -ENOMEM; return -ENOMEM;
efx->filter_state = state; efx->filter_state = state;
init_rwsem(&state->lock);
table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP]; table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
table->id = EFX_FARCH_FILTER_TABLE_RX_IP; table->id = EFX_FARCH_FILTER_TABLE_RX_IP;

View File

@ -407,6 +407,16 @@ static void dwmac4_enable_tso(void __iomem *ioaddr, bool en, u32 chan)
} }
} }
static void dwmac4_set_bfsize(void __iomem *ioaddr, int bfsize, u32 chan)
{
u32 value = readl(ioaddr + DMA_CHAN_RX_CONTROL(chan));
value &= ~DMA_RBSZ_MASK;
value |= (bfsize << DMA_RBSZ_SHIFT) & DMA_RBSZ_MASK;
writel(value, ioaddr + DMA_CHAN_RX_CONTROL(chan));
}
const struct stmmac_dma_ops dwmac4_dma_ops = { const struct stmmac_dma_ops dwmac4_dma_ops = {
.reset = dwmac4_dma_reset, .reset = dwmac4_dma_reset,
.init = dwmac4_dma_init, .init = dwmac4_dma_init,
@ -431,6 +441,7 @@ const struct stmmac_dma_ops dwmac4_dma_ops = {
.set_rx_tail_ptr = dwmac4_set_rx_tail_ptr, .set_rx_tail_ptr = dwmac4_set_rx_tail_ptr,
.set_tx_tail_ptr = dwmac4_set_tx_tail_ptr, .set_tx_tail_ptr = dwmac4_set_tx_tail_ptr,
.enable_tso = dwmac4_enable_tso, .enable_tso = dwmac4_enable_tso,
.set_bfsize = dwmac4_set_bfsize,
}; };
const struct stmmac_dma_ops dwmac410_dma_ops = { const struct stmmac_dma_ops dwmac410_dma_ops = {
@ -457,4 +468,5 @@ const struct stmmac_dma_ops dwmac410_dma_ops = {
.set_rx_tail_ptr = dwmac4_set_rx_tail_ptr, .set_rx_tail_ptr = dwmac4_set_rx_tail_ptr,
.set_tx_tail_ptr = dwmac4_set_tx_tail_ptr, .set_tx_tail_ptr = dwmac4_set_tx_tail_ptr,
.enable_tso = dwmac4_enable_tso, .enable_tso = dwmac4_enable_tso,
.set_bfsize = dwmac4_set_bfsize,
}; };

View File

@ -120,6 +120,8 @@
/* DMA Rx Channel X Control register defines */ /* DMA Rx Channel X Control register defines */
#define DMA_CONTROL_SR BIT(0) #define DMA_CONTROL_SR BIT(0)
#define DMA_RBSZ_MASK GENMASK(14, 1)
#define DMA_RBSZ_SHIFT 1
/* Interrupt status per channel */ /* Interrupt status per channel */
#define DMA_CHAN_STATUS_REB GENMASK(21, 19) #define DMA_CHAN_STATUS_REB GENMASK(21, 19)

View File

@ -183,6 +183,7 @@ struct stmmac_dma_ops {
void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan); void (*set_rx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan); void (*set_tx_tail_ptr)(void __iomem *ioaddr, u32 tail_ptr, u32 chan);
void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan); void (*enable_tso)(void __iomem *ioaddr, bool en, u32 chan);
void (*set_bfsize)(void __iomem *ioaddr, int bfsize, u32 chan);
}; };
#define stmmac_reset(__priv, __args...) \ #define stmmac_reset(__priv, __args...) \
@ -235,6 +236,8 @@ struct stmmac_dma_ops {
stmmac_do_void_callback(__priv, dma, set_tx_tail_ptr, __args) stmmac_do_void_callback(__priv, dma, set_tx_tail_ptr, __args)
#define stmmac_enable_tso(__priv, __args...) \ #define stmmac_enable_tso(__priv, __args...) \
stmmac_do_void_callback(__priv, dma, enable_tso, __args) stmmac_do_void_callback(__priv, dma, enable_tso, __args)
#define stmmac_set_dma_bfsize(__priv, __args...) \
stmmac_do_void_callback(__priv, dma, set_bfsize, __args)
struct mac_device_info; struct mac_device_info;
struct net_device; struct net_device;

View File

@ -1804,6 +1804,8 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan, stmmac_dma_rx_mode(priv, priv->ioaddr, rxmode, chan,
rxfifosz, qmode); rxfifosz, qmode);
stmmac_set_dma_bfsize(priv, priv->ioaddr, priv->dma_buf_sz,
chan);
} }
for (chan = 0; chan < tx_channels_count; chan++) { for (chan = 0; chan < tx_channels_count; chan++) {

View File

@ -476,7 +476,7 @@ static struct sk_buff **geneve_gro_receive(struct sock *sk,
out_unlock: out_unlock:
rcu_read_unlock(); rcu_read_unlock();
out: out:
NAPI_GRO_CB(skb)->flush |= flush; skb_gro_flush_final(skb, pp, flush);
return pp; return pp;
} }

View File

@ -210,7 +210,7 @@ int netvsc_recv_callback(struct net_device *net,
void netvsc_channel_cb(void *context); void netvsc_channel_cb(void *context);
int netvsc_poll(struct napi_struct *napi, int budget); int netvsc_poll(struct napi_struct *napi, int budget);
void rndis_set_subchannel(struct work_struct *w); int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev);
int rndis_filter_open(struct netvsc_device *nvdev); int rndis_filter_open(struct netvsc_device *nvdev);
int rndis_filter_close(struct netvsc_device *nvdev); int rndis_filter_close(struct netvsc_device *nvdev);
struct netvsc_device *rndis_filter_device_add(struct hv_device *dev, struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,

View File

@ -65,6 +65,41 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
VM_PKT_DATA_INBAND, 0); VM_PKT_DATA_INBAND, 0);
} }
/* Worker to setup sub channels on initial setup
* Initial hotplug event occurs in softirq context
* and can't wait for channels.
*/
static void netvsc_subchan_work(struct work_struct *w)
{
struct netvsc_device *nvdev =
container_of(w, struct netvsc_device, subchan_work);
struct rndis_device *rdev;
int i, ret;
/* Avoid deadlock with device removal already under RTNL */
if (!rtnl_trylock()) {
schedule_work(w);
return;
}
rdev = nvdev->extension;
if (rdev) {
ret = rndis_set_subchannel(rdev->ndev, nvdev);
if (ret == 0) {
netif_device_attach(rdev->ndev);
} else {
/* fallback to only primary channel */
for (i = 1; i < nvdev->num_chn; i++)
netif_napi_del(&nvdev->chan_table[i].napi);
nvdev->max_chn = 1;
nvdev->num_chn = 1;
}
}
rtnl_unlock();
}
static struct netvsc_device *alloc_net_device(void) static struct netvsc_device *alloc_net_device(void)
{ {
struct netvsc_device *net_device; struct netvsc_device *net_device;
@ -81,7 +116,7 @@ static struct netvsc_device *alloc_net_device(void)
init_completion(&net_device->channel_init_wait); init_completion(&net_device->channel_init_wait);
init_waitqueue_head(&net_device->subchan_open); init_waitqueue_head(&net_device->subchan_open);
INIT_WORK(&net_device->subchan_work, rndis_set_subchannel); INIT_WORK(&net_device->subchan_work, netvsc_subchan_work);
return net_device; return net_device;
} }

View File

@ -905,8 +905,20 @@ static int netvsc_attach(struct net_device *ndev,
if (IS_ERR(nvdev)) if (IS_ERR(nvdev))
return PTR_ERR(nvdev); return PTR_ERR(nvdev);
/* Note: enable and attach happen when sub-channels setup */ if (nvdev->num_chn > 1) {
ret = rndis_set_subchannel(ndev, nvdev);
/* if unavailable, just proceed with one queue */
if (ret) {
nvdev->max_chn = 1;
nvdev->num_chn = 1;
}
}
/* In any case device is now ready */
netif_device_attach(ndev);
/* Note: enable and attach happen when sub-channels setup */
netif_carrier_off(ndev); netif_carrier_off(ndev);
if (netif_running(ndev)) { if (netif_running(ndev)) {
@ -2089,6 +2101,9 @@ static int netvsc_probe(struct hv_device *dev,
memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
if (nvdev->num_chn > 1)
schedule_work(&nvdev->subchan_work);
/* hw_features computed in rndis_netdev_set_hwcaps() */ /* hw_features computed in rndis_netdev_set_hwcaps() */
net->features = net->hw_features | net->features = net->hw_features |
NETIF_F_HIGHDMA | NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_SG |

View File

@ -1062,29 +1062,15 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
* This breaks overlap of processing the host message for the * This breaks overlap of processing the host message for the
* new primary channel with the initialization of sub-channels. * new primary channel with the initialization of sub-channels.
*/ */
void rndis_set_subchannel(struct work_struct *w) int rndis_set_subchannel(struct net_device *ndev, struct netvsc_device *nvdev)
{ {
struct netvsc_device *nvdev
= container_of(w, struct netvsc_device, subchan_work);
struct nvsp_message *init_packet = &nvdev->channel_init_pkt; struct nvsp_message *init_packet = &nvdev->channel_init_pkt;
struct net_device_context *ndev_ctx; struct net_device_context *ndev_ctx = netdev_priv(ndev);
struct rndis_device *rdev; struct hv_device *hv_dev = ndev_ctx->device_ctx;
struct net_device *ndev; struct rndis_device *rdev = nvdev->extension;
struct hv_device *hv_dev;
int i, ret; int i, ret;
if (!rtnl_trylock()) { ASSERT_RTNL();
schedule_work(w);
return;
}
rdev = nvdev->extension;
if (!rdev)
goto unlock; /* device was removed */
ndev = rdev->ndev;
ndev_ctx = netdev_priv(ndev);
hv_dev = ndev_ctx->device_ctx;
memset(init_packet, 0, sizeof(struct nvsp_message)); memset(init_packet, 0, sizeof(struct nvsp_message));
init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL; init_packet->hdr.msg_type = NVSP_MSG5_TYPE_SUBCHANNEL;
@ -1100,13 +1086,13 @@ void rndis_set_subchannel(struct work_struct *w)
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (ret) { if (ret) {
netdev_err(ndev, "sub channel allocate send failed: %d\n", ret); netdev_err(ndev, "sub channel allocate send failed: %d\n", ret);
goto failed; return ret;
} }
wait_for_completion(&nvdev->channel_init_wait); wait_for_completion(&nvdev->channel_init_wait);
if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) { if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) {
netdev_err(ndev, "sub channel request failed\n"); netdev_err(ndev, "sub channel request failed\n");
goto failed; return -EIO;
} }
nvdev->num_chn = 1 + nvdev->num_chn = 1 +
@ -1125,21 +1111,7 @@ void rndis_set_subchannel(struct work_struct *w)
for (i = 0; i < VRSS_SEND_TAB_SIZE; i++) for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
ndev_ctx->tx_table[i] = i % nvdev->num_chn; ndev_ctx->tx_table[i] = i % nvdev->num_chn;
netif_device_attach(ndev); return 0;
rtnl_unlock();
return;
failed:
/* fallback to only primary channel */
for (i = 1; i < nvdev->num_chn; i++)
netif_napi_del(&nvdev->chan_table[i].napi);
nvdev->max_chn = 1;
nvdev->num_chn = 1;
netif_device_attach(ndev);
unlock:
rtnl_unlock();
} }
static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device, static int rndis_netdev_set_hwcaps(struct rndis_device *rndis_device,
@ -1360,21 +1332,12 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
netif_napi_add(net, &net_device->chan_table[i].napi, netif_napi_add(net, &net_device->chan_table[i].napi,
netvsc_poll, NAPI_POLL_WEIGHT); netvsc_poll, NAPI_POLL_WEIGHT);
if (net_device->num_chn > 1) return net_device;
schedule_work(&net_device->subchan_work);
out: out:
/* if unavailable, just proceed with one queue */ /* setting up multiple channels failed */
if (ret) { net_device->max_chn = 1;
net_device->max_chn = 1; net_device->num_chn = 1;
net_device->num_chn = 1;
}
/* No sub channels, device is ready */
if (net_device->num_chn == 1)
netif_device_attach(net);
return net_device;
err_dev_remv: err_dev_remv:
rndis_filter_device_remove(dev, net_device); rndis_filter_device_remove(dev, net_device);

View File

@ -75,10 +75,23 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
{ {
struct ipvl_dev *ipvlan; struct ipvl_dev *ipvlan;
struct net_device *mdev = port->dev; struct net_device *mdev = port->dev;
int err = 0; unsigned int flags;
int err;
ASSERT_RTNL(); ASSERT_RTNL();
if (port->mode != nval) { if (port->mode != nval) {
list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
flags = ipvlan->dev->flags;
if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) {
err = dev_change_flags(ipvlan->dev,
flags | IFF_NOARP);
} else {
err = dev_change_flags(ipvlan->dev,
flags & ~IFF_NOARP);
}
if (unlikely(err))
goto fail;
}
if (nval == IPVLAN_MODE_L3S) { if (nval == IPVLAN_MODE_L3S) {
/* New mode is L3S */ /* New mode is L3S */
err = ipvlan_register_nf_hook(read_pnet(&port->pnet)); err = ipvlan_register_nf_hook(read_pnet(&port->pnet));
@ -86,21 +99,28 @@ static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
mdev->l3mdev_ops = &ipvl_l3mdev_ops; mdev->l3mdev_ops = &ipvl_l3mdev_ops;
mdev->priv_flags |= IFF_L3MDEV_MASTER; mdev->priv_flags |= IFF_L3MDEV_MASTER;
} else } else
return err; goto fail;
} else if (port->mode == IPVLAN_MODE_L3S) { } else if (port->mode == IPVLAN_MODE_L3S) {
/* Old mode was L3S */ /* Old mode was L3S */
mdev->priv_flags &= ~IFF_L3MDEV_MASTER; mdev->priv_flags &= ~IFF_L3MDEV_MASTER;
ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); ipvlan_unregister_nf_hook(read_pnet(&port->pnet));
mdev->l3mdev_ops = NULL; mdev->l3mdev_ops = NULL;
} }
list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S)
ipvlan->dev->flags |= IFF_NOARP;
else
ipvlan->dev->flags &= ~IFF_NOARP;
}
port->mode = nval; port->mode = nval;
} }
return 0;
fail:
/* Undo the flags changes that have been done so far. */
list_for_each_entry_continue_reverse(ipvlan, &port->ipvlans, pnode) {
flags = ipvlan->dev->flags;
if (port->mode == IPVLAN_MODE_L3 ||
port->mode == IPVLAN_MODE_L3S)
dev_change_flags(ipvlan->dev, flags | IFF_NOARP);
else
dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP);
}
return err; return err;
} }

View File

@ -222,7 +222,7 @@ static int dp83811_config_intr(struct phy_device *phydev)
if (err < 0) if (err < 0)
return err; return err;
err = phy_write(phydev, MII_DP83811_INT_STAT1, 0); err = phy_write(phydev, MII_DP83811_INT_STAT2, 0);
} }
return err; return err;

View File

@ -64,6 +64,7 @@
#define DEFAULT_RX_CSUM_ENABLE (true) #define DEFAULT_RX_CSUM_ENABLE (true)
#define DEFAULT_TSO_CSUM_ENABLE (true) #define DEFAULT_TSO_CSUM_ENABLE (true)
#define DEFAULT_VLAN_FILTER_ENABLE (true) #define DEFAULT_VLAN_FILTER_ENABLE (true)
#define DEFAULT_VLAN_RX_OFFLOAD (true)
#define TX_OVERHEAD (8) #define TX_OVERHEAD (8)
#define RXW_PADDING 2 #define RXW_PADDING 2
@ -2298,7 +2299,7 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu)
if ((ll_mtu % dev->maxpacket) == 0) if ((ll_mtu % dev->maxpacket) == 0)
return -EDOM; return -EDOM;
ret = lan78xx_set_rx_max_frame_length(dev, new_mtu + ETH_HLEN); ret = lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN);
netdev->mtu = new_mtu; netdev->mtu = new_mtu;
@ -2364,6 +2365,11 @@ static int lan78xx_set_features(struct net_device *netdev,
} }
if (features & NETIF_F_HW_VLAN_CTAG_RX) if (features & NETIF_F_HW_VLAN_CTAG_RX)
pdata->rfe_ctl |= RFE_CTL_VLAN_STRIP_;
else
pdata->rfe_ctl &= ~RFE_CTL_VLAN_STRIP_;
if (features & NETIF_F_HW_VLAN_CTAG_FILTER)
pdata->rfe_ctl |= RFE_CTL_VLAN_FILTER_; pdata->rfe_ctl |= RFE_CTL_VLAN_FILTER_;
else else
pdata->rfe_ctl &= ~RFE_CTL_VLAN_FILTER_; pdata->rfe_ctl &= ~RFE_CTL_VLAN_FILTER_;
@ -2587,7 +2593,8 @@ static int lan78xx_reset(struct lan78xx_net *dev)
buf |= FCT_TX_CTL_EN_; buf |= FCT_TX_CTL_EN_;
ret = lan78xx_write_reg(dev, FCT_TX_CTL, buf); ret = lan78xx_write_reg(dev, FCT_TX_CTL, buf);
ret = lan78xx_set_rx_max_frame_length(dev, dev->net->mtu + ETH_HLEN); ret = lan78xx_set_rx_max_frame_length(dev,
dev->net->mtu + VLAN_ETH_HLEN);
ret = lan78xx_read_reg(dev, MAC_RX, &buf); ret = lan78xx_read_reg(dev, MAC_RX, &buf);
buf |= MAC_RX_RXEN_; buf |= MAC_RX_RXEN_;
@ -2975,6 +2982,12 @@ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf)
if (DEFAULT_TSO_CSUM_ENABLE) if (DEFAULT_TSO_CSUM_ENABLE)
dev->net->features |= NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_SG; dev->net->features |= NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_SG;
if (DEFAULT_VLAN_RX_OFFLOAD)
dev->net->features |= NETIF_F_HW_VLAN_CTAG_RX;
if (DEFAULT_VLAN_FILTER_ENABLE)
dev->net->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
dev->net->hw_features = dev->net->features; dev->net->hw_features = dev->net->features;
ret = lan78xx_setup_irq_domain(dev); ret = lan78xx_setup_irq_domain(dev);
@ -3039,8 +3052,13 @@ static void lan78xx_rx_csum_offload(struct lan78xx_net *dev,
struct sk_buff *skb, struct sk_buff *skb,
u32 rx_cmd_a, u32 rx_cmd_b) u32 rx_cmd_a, u32 rx_cmd_b)
{ {
/* HW Checksum offload appears to be flawed if used when not stripping
* VLAN headers. Drop back to S/W checksums under these conditions.
*/
if (!(dev->net->features & NETIF_F_RXCSUM) || if (!(dev->net->features & NETIF_F_RXCSUM) ||
unlikely(rx_cmd_a & RX_CMD_A_ICSM_)) { unlikely(rx_cmd_a & RX_CMD_A_ICSM_) ||
((rx_cmd_a & RX_CMD_A_FVTG_) &&
!(dev->net->features & NETIF_F_HW_VLAN_CTAG_RX))) {
skb->ip_summed = CHECKSUM_NONE; skb->ip_summed = CHECKSUM_NONE;
} else { } else {
skb->csum = ntohs((u16)(rx_cmd_b >> RX_CMD_B_CSUM_SHIFT_)); skb->csum = ntohs((u16)(rx_cmd_b >> RX_CMD_B_CSUM_SHIFT_));
@ -3048,6 +3066,16 @@ static void lan78xx_rx_csum_offload(struct lan78xx_net *dev,
} }
} }
static void lan78xx_rx_vlan_offload(struct lan78xx_net *dev,
struct sk_buff *skb,
u32 rx_cmd_a, u32 rx_cmd_b)
{
if ((dev->net->features & NETIF_F_HW_VLAN_CTAG_RX) &&
(rx_cmd_a & RX_CMD_A_FVTG_))
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
(rx_cmd_b & 0xffff));
}
static void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb) static void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb)
{ {
int status; int status;
@ -3112,6 +3140,8 @@ static int lan78xx_rx(struct lan78xx_net *dev, struct sk_buff *skb)
if (skb->len == size) { if (skb->len == size) {
lan78xx_rx_csum_offload(dev, skb, lan78xx_rx_csum_offload(dev, skb,
rx_cmd_a, rx_cmd_b); rx_cmd_a, rx_cmd_b);
lan78xx_rx_vlan_offload(dev, skb,
rx_cmd_a, rx_cmd_b);
skb_trim(skb, skb->len - 4); /* remove fcs */ skb_trim(skb, skb->len - 4); /* remove fcs */
skb->truesize = size + sizeof(struct sk_buff); skb->truesize = size + sizeof(struct sk_buff);
@ -3130,6 +3160,7 @@ static int lan78xx_rx(struct lan78xx_net *dev, struct sk_buff *skb)
skb_set_tail_pointer(skb2, size); skb_set_tail_pointer(skb2, size);
lan78xx_rx_csum_offload(dev, skb2, rx_cmd_a, rx_cmd_b); lan78xx_rx_csum_offload(dev, skb2, rx_cmd_a, rx_cmd_b);
lan78xx_rx_vlan_offload(dev, skb2, rx_cmd_a, rx_cmd_b);
skb_trim(skb2, skb2->len - 4); /* remove fcs */ skb_trim(skb2, skb2->len - 4); /* remove fcs */
skb2->truesize = size + sizeof(struct sk_buff); skb2->truesize = size + sizeof(struct sk_buff);

View File

@ -3962,7 +3962,8 @@ static int rtl8152_close(struct net_device *netdev)
#ifdef CONFIG_PM_SLEEP #ifdef CONFIG_PM_SLEEP
unregister_pm_notifier(&tp->pm_notifier); unregister_pm_notifier(&tp->pm_notifier);
#endif #endif
napi_disable(&tp->napi); if (!test_bit(RTL8152_UNPLUG, &tp->flags))
napi_disable(&tp->napi);
clear_bit(WORK_ENABLE, &tp->flags); clear_bit(WORK_ENABLE, &tp->flags);
usb_kill_urb(tp->intr_urb); usb_kill_urb(tp->intr_urb);
cancel_delayed_work_sync(&tp->schedule); cancel_delayed_work_sync(&tp->schedule);

View File

@ -53,6 +53,10 @@ module_param(napi_tx, bool, 0644);
/* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */ /* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */
#define VIRTIO_XDP_HEADROOM 256 #define VIRTIO_XDP_HEADROOM 256
/* Separating two types of XDP xmit */
#define VIRTIO_XDP_TX BIT(0)
#define VIRTIO_XDP_REDIR BIT(1)
/* RX packet size EWMA. The average packet size is used to determine the packet /* RX packet size EWMA. The average packet size is used to determine the packet
* buffer size when refilling RX rings. As the entire RX ring may be refilled * buffer size when refilling RX rings. As the entire RX ring may be refilled
* at once, the weight is chosen so that the EWMA will be insensitive to short- * at once, the weight is chosen so that the EWMA will be insensitive to short-
@ -582,7 +586,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
struct receive_queue *rq, struct receive_queue *rq,
void *buf, void *ctx, void *buf, void *ctx,
unsigned int len, unsigned int len,
bool *xdp_xmit) unsigned int *xdp_xmit)
{ {
struct sk_buff *skb; struct sk_buff *skb;
struct bpf_prog *xdp_prog; struct bpf_prog *xdp_prog;
@ -654,14 +658,14 @@ static struct sk_buff *receive_small(struct net_device *dev,
trace_xdp_exception(vi->dev, xdp_prog, act); trace_xdp_exception(vi->dev, xdp_prog, act);
goto err_xdp; goto err_xdp;
} }
*xdp_xmit = true; *xdp_xmit |= VIRTIO_XDP_TX;
rcu_read_unlock(); rcu_read_unlock();
goto xdp_xmit; goto xdp_xmit;
case XDP_REDIRECT: case XDP_REDIRECT:
err = xdp_do_redirect(dev, &xdp, xdp_prog); err = xdp_do_redirect(dev, &xdp, xdp_prog);
if (err) if (err)
goto err_xdp; goto err_xdp;
*xdp_xmit = true; *xdp_xmit |= VIRTIO_XDP_REDIR;
rcu_read_unlock(); rcu_read_unlock();
goto xdp_xmit; goto xdp_xmit;
default: default:
@ -723,7 +727,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
void *buf, void *buf,
void *ctx, void *ctx,
unsigned int len, unsigned int len,
bool *xdp_xmit) unsigned int *xdp_xmit)
{ {
struct virtio_net_hdr_mrg_rxbuf *hdr = buf; struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers); u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
@ -818,7 +822,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
put_page(xdp_page); put_page(xdp_page);
goto err_xdp; goto err_xdp;
} }
*xdp_xmit = true; *xdp_xmit |= VIRTIO_XDP_TX;
if (unlikely(xdp_page != page)) if (unlikely(xdp_page != page))
put_page(page); put_page(page);
rcu_read_unlock(); rcu_read_unlock();
@ -830,7 +834,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
put_page(xdp_page); put_page(xdp_page);
goto err_xdp; goto err_xdp;
} }
*xdp_xmit = true; *xdp_xmit |= VIRTIO_XDP_REDIR;
if (unlikely(xdp_page != page)) if (unlikely(xdp_page != page))
put_page(page); put_page(page);
rcu_read_unlock(); rcu_read_unlock();
@ -939,7 +943,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
} }
static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq, static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
void *buf, unsigned int len, void **ctx, bool *xdp_xmit) void *buf, unsigned int len, void **ctx,
unsigned int *xdp_xmit)
{ {
struct net_device *dev = vi->dev; struct net_device *dev = vi->dev;
struct sk_buff *skb; struct sk_buff *skb;
@ -1232,7 +1237,8 @@ static void refill_work(struct work_struct *work)
} }
} }
static int virtnet_receive(struct receive_queue *rq, int budget, bool *xdp_xmit) static int virtnet_receive(struct receive_queue *rq, int budget,
unsigned int *xdp_xmit)
{ {
struct virtnet_info *vi = rq->vq->vdev->priv; struct virtnet_info *vi = rq->vq->vdev->priv;
unsigned int len, received = 0, bytes = 0; unsigned int len, received = 0, bytes = 0;
@ -1321,7 +1327,7 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
struct virtnet_info *vi = rq->vq->vdev->priv; struct virtnet_info *vi = rq->vq->vdev->priv;
struct send_queue *sq; struct send_queue *sq;
unsigned int received, qp; unsigned int received, qp;
bool xdp_xmit = false; unsigned int xdp_xmit = 0;
virtnet_poll_cleantx(rq); virtnet_poll_cleantx(rq);
@ -1331,12 +1337,14 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
if (received < budget) if (received < budget)
virtqueue_napi_complete(napi, rq->vq, received); virtqueue_napi_complete(napi, rq->vq, received);
if (xdp_xmit) { if (xdp_xmit & VIRTIO_XDP_REDIR)
xdp_do_flush_map();
if (xdp_xmit & VIRTIO_XDP_TX) {
qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + qp = vi->curr_queue_pairs - vi->xdp_queue_pairs +
smp_processor_id(); smp_processor_id();
sq = &vi->sq[qp]; sq = &vi->sq[qp];
virtqueue_kick(sq->vq); virtqueue_kick(sq->vq);
xdp_do_flush_map();
} }
return received; return received;

View File

@ -623,9 +623,7 @@ static struct sk_buff **vxlan_gro_receive(struct sock *sk,
flush = 0; flush = 0;
out: out:
skb_gro_remcsum_cleanup(skb, &grc); skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
skb->remcsum_offload = 0;
NAPI_GRO_CB(skb)->flush |= flush;
return pp; return pp;
} }

View File

@ -829,6 +829,17 @@ struct qeth_trap_id {
/*some helper functions*/ /*some helper functions*/
#define QETH_CARD_IFNAME(card) (((card)->dev)? (card)->dev->name : "") #define QETH_CARD_IFNAME(card) (((card)->dev)? (card)->dev->name : "")
static inline void qeth_scrub_qdio_buffer(struct qdio_buffer *buf,
unsigned int elements)
{
unsigned int i;
for (i = 0; i < elements; i++)
memset(&buf->element[i], 0, sizeof(struct qdio_buffer_element));
buf->element[14].sflags = 0;
buf->element[15].sflags = 0;
}
/** /**
* qeth_get_elements_for_range() - find number of SBALEs to cover range. * qeth_get_elements_for_range() - find number of SBALEs to cover range.
* @start: Start of the address range. * @start: Start of the address range.
@ -1029,7 +1040,7 @@ struct qeth_cmd_buffer *qeth_get_setassparms_cmd(struct qeth_card *,
__u16, __u16, __u16, __u16,
enum qeth_prot_versions); enum qeth_prot_versions);
int qeth_set_features(struct net_device *, netdev_features_t); int qeth_set_features(struct net_device *, netdev_features_t);
void qeth_recover_features(struct net_device *dev); void qeth_enable_hw_features(struct net_device *dev);
netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t); netdev_features_t qeth_fix_features(struct net_device *, netdev_features_t);
netdev_features_t qeth_features_check(struct sk_buff *skb, netdev_features_t qeth_features_check(struct sk_buff *skb,
struct net_device *dev, struct net_device *dev,

View File

@ -73,9 +73,6 @@ static void qeth_notify_skbs(struct qeth_qdio_out_q *queue,
struct qeth_qdio_out_buffer *buf, struct qeth_qdio_out_buffer *buf,
enum iucv_tx_notify notification); enum iucv_tx_notify notification);
static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf); static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf);
static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
struct qeth_qdio_out_buffer *buf,
enum qeth_qdio_buffer_states newbufstate);
static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *, int); static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *, int);
struct workqueue_struct *qeth_wq; struct workqueue_struct *qeth_wq;
@ -489,6 +486,7 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
struct qaob *aob; struct qaob *aob;
struct qeth_qdio_out_buffer *buffer; struct qeth_qdio_out_buffer *buffer;
enum iucv_tx_notify notification; enum iucv_tx_notify notification;
unsigned int i;
aob = (struct qaob *) phys_to_virt(phys_aob_addr); aob = (struct qaob *) phys_to_virt(phys_aob_addr);
QETH_CARD_TEXT(card, 5, "haob"); QETH_CARD_TEXT(card, 5, "haob");
@ -513,10 +511,18 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
qeth_notify_skbs(buffer->q, buffer, notification); qeth_notify_skbs(buffer->q, buffer, notification);
buffer->aob = NULL; buffer->aob = NULL;
qeth_clear_output_buffer(buffer->q, buffer, /* Free dangling allocations. The attached skbs are handled by
QETH_QDIO_BUF_HANDLED_DELAYED); * qeth_cleanup_handled_pending().
*/
for (i = 0;
i < aob->sb_count && i < QETH_MAX_BUFFER_ELEMENTS(card);
i++) {
if (aob->sba[i] && buffer->is_header[i])
kmem_cache_free(qeth_core_header_cache,
(void *) aob->sba[i]);
}
atomic_set(&buffer->state, QETH_QDIO_BUF_HANDLED_DELAYED);
/* from here on: do not touch buffer anymore */
qdio_release_aob(aob); qdio_release_aob(aob);
} }
@ -3759,6 +3765,10 @@ static void qeth_qdio_output_handler(struct ccw_device *ccwdev,
QETH_CARD_TEXT(queue->card, 5, "aob"); QETH_CARD_TEXT(queue->card, 5, "aob");
QETH_CARD_TEXT_(queue->card, 5, "%lx", QETH_CARD_TEXT_(queue->card, 5, "%lx",
virt_to_phys(buffer->aob)); virt_to_phys(buffer->aob));
/* prepare the queue slot for re-use: */
qeth_scrub_qdio_buffer(buffer->buffer,
QETH_MAX_BUFFER_ELEMENTS(card));
if (qeth_init_qdio_out_buf(queue, bidx)) { if (qeth_init_qdio_out_buf(queue, bidx)) {
QETH_CARD_TEXT(card, 2, "outofbuf"); QETH_CARD_TEXT(card, 2, "outofbuf");
qeth_schedule_recovery(card); qeth_schedule_recovery(card);
@ -4834,7 +4844,7 @@ int qeth_vm_request_mac(struct qeth_card *card)
goto out; goto out;
} }
ccw_device_get_id(CARD_RDEV(card), &id); ccw_device_get_id(CARD_DDEV(card), &id);
request->resp_buf_len = sizeof(*response); request->resp_buf_len = sizeof(*response);
request->resp_version = DIAG26C_VERSION2; request->resp_version = DIAG26C_VERSION2;
request->op_code = DIAG26C_GET_MAC; request->op_code = DIAG26C_GET_MAC;
@ -6459,28 +6469,27 @@ static int qeth_set_ipa_rx_csum(struct qeth_card *card, bool on)
#define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO | \ #define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO | \
NETIF_F_IPV6_CSUM) NETIF_F_IPV6_CSUM)
/** /**
* qeth_recover_features() - Restore device features after recovery * qeth_enable_hw_features() - (Re-)Enable HW functions for device features
* @dev: the recovering net_device * @dev: a net_device
*
* Caller must hold rtnl lock.
*/ */
void qeth_recover_features(struct net_device *dev) void qeth_enable_hw_features(struct net_device *dev)
{ {
netdev_features_t features = dev->features;
struct qeth_card *card = dev->ml_priv; struct qeth_card *card = dev->ml_priv;
netdev_features_t features;
rtnl_lock();
features = dev->features;
/* force-off any feature that needs an IPA sequence. /* force-off any feature that needs an IPA sequence.
* netdev_update_features() will restart them. * netdev_update_features() will restart them.
*/ */
dev->features &= ~QETH_HW_FEATURES; dev->features &= ~QETH_HW_FEATURES;
netdev_update_features(dev); netdev_update_features(dev);
if (features != dev->features)
if (features == dev->features) dev_warn(&card->gdev->dev,
return; "Device recovery failed to restore all offload features\n");
dev_warn(&card->gdev->dev, rtnl_unlock();
"Device recovery failed to restore all offload features\n");
} }
EXPORT_SYMBOL_GPL(qeth_recover_features); EXPORT_SYMBOL_GPL(qeth_enable_hw_features);
int qeth_set_features(struct net_device *dev, netdev_features_t features) int qeth_set_features(struct net_device *dev, netdev_features_t features)
{ {

View File

@ -140,7 +140,7 @@ static int qeth_l2_send_setmac(struct qeth_card *card, __u8 *mac)
static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac) static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac)
{ {
enum qeth_ipa_cmds cmd = is_multicast_ether_addr_64bits(mac) ? enum qeth_ipa_cmds cmd = is_multicast_ether_addr(mac) ?
IPA_CMD_SETGMAC : IPA_CMD_SETVMAC; IPA_CMD_SETGMAC : IPA_CMD_SETVMAC;
int rc; int rc;
@ -157,7 +157,7 @@ static int qeth_l2_write_mac(struct qeth_card *card, u8 *mac)
static int qeth_l2_remove_mac(struct qeth_card *card, u8 *mac) static int qeth_l2_remove_mac(struct qeth_card *card, u8 *mac)
{ {
enum qeth_ipa_cmds cmd = is_multicast_ether_addr_64bits(mac) ? enum qeth_ipa_cmds cmd = is_multicast_ether_addr(mac) ?
IPA_CMD_DELGMAC : IPA_CMD_DELVMAC; IPA_CMD_DELGMAC : IPA_CMD_DELVMAC;
int rc; int rc;
@ -501,27 +501,34 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p)
return -ERESTARTSYS; return -ERESTARTSYS;
} }
/* avoid racing against concurrent state change: */
if (!mutex_trylock(&card->conf_mutex))
return -EAGAIN;
if (!qeth_card_hw_is_reachable(card)) { if (!qeth_card_hw_is_reachable(card)) {
ether_addr_copy(dev->dev_addr, addr->sa_data); ether_addr_copy(dev->dev_addr, addr->sa_data);
return 0; goto out_unlock;
} }
/* don't register the same address twice */ /* don't register the same address twice */
if (ether_addr_equal_64bits(dev->dev_addr, addr->sa_data) && if (ether_addr_equal_64bits(dev->dev_addr, addr->sa_data) &&
(card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED))
return 0; goto out_unlock;
/* add the new address, switch over, drop the old */ /* add the new address, switch over, drop the old */
rc = qeth_l2_send_setmac(card, addr->sa_data); rc = qeth_l2_send_setmac(card, addr->sa_data);
if (rc) if (rc)
return rc; goto out_unlock;
ether_addr_copy(old_addr, dev->dev_addr); ether_addr_copy(old_addr, dev->dev_addr);
ether_addr_copy(dev->dev_addr, addr->sa_data); ether_addr_copy(dev->dev_addr, addr->sa_data);
if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED) if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)
qeth_l2_remove_mac(card, old_addr); qeth_l2_remove_mac(card, old_addr);
card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED;
return 0;
out_unlock:
mutex_unlock(&card->conf_mutex);
return rc;
} }
static void qeth_promisc_to_bridge(struct qeth_card *card) static void qeth_promisc_to_bridge(struct qeth_card *card)
@ -1112,6 +1119,8 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
netif_carrier_off(card->dev); netif_carrier_off(card->dev);
qeth_set_allowed_threads(card, 0xffffffff, 0); qeth_set_allowed_threads(card, 0xffffffff, 0);
qeth_enable_hw_features(card->dev);
if (recover_flag == CARD_STATE_RECOVER) { if (recover_flag == CARD_STATE_RECOVER) {
if (recovery_mode && if (recovery_mode &&
card->info.type != QETH_CARD_TYPE_OSN) { card->info.type != QETH_CARD_TYPE_OSN) {
@ -1123,9 +1132,6 @@ static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode)
} }
/* this also sets saved unicast addresses */ /* this also sets saved unicast addresses */
qeth_l2_set_rx_mode(card->dev); qeth_l2_set_rx_mode(card->dev);
rtnl_lock();
qeth_recover_features(card->dev);
rtnl_unlock();
} }
/* let user_space know that device is online */ /* let user_space know that device is online */
kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE); kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE);

View File

@ -2662,6 +2662,8 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
netif_carrier_on(card->dev); netif_carrier_on(card->dev);
else else
netif_carrier_off(card->dev); netif_carrier_off(card->dev);
qeth_enable_hw_features(card->dev);
if (recover_flag == CARD_STATE_RECOVER) { if (recover_flag == CARD_STATE_RECOVER) {
rtnl_lock(); rtnl_lock();
if (recovery_mode) if (recovery_mode)
@ -2669,7 +2671,6 @@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode)
else else
dev_open(card->dev); dev_open(card->dev);
qeth_l3_set_rx_mode(card->dev); qeth_l3_set_rx_mode(card->dev);
qeth_recover_features(card->dev);
rtnl_unlock(); rtnl_unlock();
} }
qeth_trace_features(card); qeth_trace_features(card);

View File

@ -188,12 +188,38 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
\ \
__ret; \ __ret; \
}) })
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype, struct bpf_prog *prog);
int cgroup_bpf_prog_detach(const union bpf_attr *attr,
enum bpf_prog_type ptype);
int cgroup_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr);
#else #else
struct bpf_prog;
struct cgroup_bpf {}; struct cgroup_bpf {};
static inline void cgroup_bpf_put(struct cgroup *cgrp) {} static inline void cgroup_bpf_put(struct cgroup *cgrp) {}
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype,
struct bpf_prog *prog)
{
return -EINVAL;
}
static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr,
enum bpf_prog_type ptype)
{
return -EINVAL;
}
static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
return -EINVAL;
}
#define cgroup_bpf_enabled (0) #define cgroup_bpf_enabled (0)
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0) #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })

View File

@ -696,6 +696,8 @@ static inline void bpf_map_offload_map_free(struct bpf_map *map)
struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key); struct sock *__sock_hash_lookup_elem(struct bpf_map *map, void *key);
int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
int sockmap_get_from_fd(const union bpf_attr *attr, int type,
struct bpf_prog *prog);
#else #else
static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key) static inline struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
{ {
@ -714,6 +716,12 @@ static inline int sock_map_prog(struct bpf_map *map,
{ {
return -EOPNOTSUPP; return -EOPNOTSUPP;
} }
static inline int sockmap_get_from_fd(const union bpf_attr *attr, int type,
struct bpf_prog *prog)
{
return -EINVAL;
}
#endif #endif
#if defined(CONFIG_XDP_SOCKETS) #if defined(CONFIG_XDP_SOCKETS)

View File

@ -5,11 +5,12 @@
#include <uapi/linux/bpf.h> #include <uapi/linux/bpf.h>
#ifdef CONFIG_BPF_LIRC_MODE2 #ifdef CONFIG_BPF_LIRC_MODE2
int lirc_prog_attach(const union bpf_attr *attr); int lirc_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int lirc_prog_detach(const union bpf_attr *attr); int lirc_prog_detach(const union bpf_attr *attr);
int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr); int lirc_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr);
#else #else
static inline int lirc_prog_attach(const union bpf_attr *attr) static inline int lirc_prog_attach(const union bpf_attr *attr,
struct bpf_prog *prog)
{ {
return -EINVAL; return -EINVAL;
} }

View File

@ -470,9 +470,7 @@ struct sock_fprog_kern {
}; };
struct bpf_binary_header { struct bpf_binary_header {
u16 pages; u32 pages;
u16 locked:1;
/* Some arches need word alignment for their instructions */ /* Some arches need word alignment for their instructions */
u8 image[] __aligned(4); u8 image[] __aligned(4);
}; };
@ -481,7 +479,7 @@ struct bpf_prog {
u16 pages; /* Number of allocated pages */ u16 pages; /* Number of allocated pages */
u16 jited:1, /* Is our filter JIT'ed? */ u16 jited:1, /* Is our filter JIT'ed? */
jit_requested:1,/* archs need to JIT the prog */ jit_requested:1,/* archs need to JIT the prog */
locked:1, /* Program image locked? */ undo_set_mem:1, /* Passed set_memory_ro() checkpoint */
gpl_compatible:1, /* Is filter GPL compatible? */ gpl_compatible:1, /* Is filter GPL compatible? */
cb_access:1, /* Is control block accessed? */ cb_access:1, /* Is control block accessed? */
dst_needed:1, /* Do we need dst entry? */ dst_needed:1, /* Do we need dst entry? */
@ -677,46 +675,24 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
static inline void bpf_prog_lock_ro(struct bpf_prog *fp) static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
{ {
#ifdef CONFIG_ARCH_HAS_SET_MEMORY fp->undo_set_mem = 1;
fp->locked = 1; set_memory_ro((unsigned long)fp, fp->pages);
if (set_memory_ro((unsigned long)fp, fp->pages))
fp->locked = 0;
#endif
} }
static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
{ {
#ifdef CONFIG_ARCH_HAS_SET_MEMORY if (fp->undo_set_mem)
if (fp->locked) { set_memory_rw((unsigned long)fp, fp->pages);
WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
/* In case set_memory_rw() fails, we want to be the first
* to crash here instead of some random place later on.
*/
fp->locked = 0;
}
#endif
} }
static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
{ {
#ifdef CONFIG_ARCH_HAS_SET_MEMORY set_memory_ro((unsigned long)hdr, hdr->pages);
hdr->locked = 1;
if (set_memory_ro((unsigned long)hdr, hdr->pages))
hdr->locked = 0;
#endif
} }
static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
{ {
#ifdef CONFIG_ARCH_HAS_SET_MEMORY set_memory_rw((unsigned long)hdr, hdr->pages);
if (hdr->locked) {
WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
/* In case set_memory_rw() fails, we want to be the first
* to crash here instead of some random place later on.
*/
hdr->locked = 0;
}
#endif
} }
static inline struct bpf_binary_header * static inline struct bpf_binary_header *
@ -728,22 +704,6 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
return (void *)addr; return (void *)addr;
} }
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
{
if (!fp->locked)
return -ENOLCK;
if (fp->jited) {
const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
if (!hdr->locked)
return -ENOLCK;
}
return 0;
}
#endif
int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
static inline int sk_filter(struct sock *sk, struct sk_buff *skb) static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
{ {

View File

@ -8,6 +8,8 @@
#include <linux/mlx5/driver.h> #include <linux/mlx5/driver.h>
#define MLX5_ESWITCH_MANAGER(mdev) MLX5_CAP_GEN(mdev, eswitch_manager)
enum { enum {
SRIOV_NONE, SRIOV_NONE,
SRIOV_LEGACY, SRIOV_LEGACY,

View File

@ -922,7 +922,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 vnic_env_queue_counters[0x1]; u8 vnic_env_queue_counters[0x1];
u8 ets[0x1]; u8 ets[0x1];
u8 nic_flow_table[0x1]; u8 nic_flow_table[0x1];
u8 eswitch_flow_table[0x1]; u8 eswitch_manager[0x1];
u8 device_memory[0x1]; u8 device_memory[0x1];
u8 mcam_reg[0x1]; u8 mcam_reg[0x1];
u8 pcam_reg[0x1]; u8 pcam_reg[0x1];

View File

@ -2789,11 +2789,31 @@ static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp,
if (PTR_ERR(pp) != -EINPROGRESS) if (PTR_ERR(pp) != -EINPROGRESS)
NAPI_GRO_CB(skb)->flush |= flush; NAPI_GRO_CB(skb)->flush |= flush;
} }
static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
struct sk_buff **pp,
int flush,
struct gro_remcsum *grc)
{
if (PTR_ERR(pp) != -EINPROGRESS) {
NAPI_GRO_CB(skb)->flush |= flush;
skb_gro_remcsum_cleanup(skb, grc);
skb->remcsum_offload = 0;
}
}
#else #else
static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush) static inline void skb_gro_flush_final(struct sk_buff *skb, struct sk_buff **pp, int flush)
{ {
NAPI_GRO_CB(skb)->flush |= flush; NAPI_GRO_CB(skb)->flush |= flush;
} }
static inline void skb_gro_flush_final_remcsum(struct sk_buff *skb,
struct sk_buff **pp,
int flush,
struct gro_remcsum *grc)
{
NAPI_GRO_CB(skb)->flush |= flush;
skb_gro_remcsum_cleanup(skb, grc);
skb->remcsum_offload = 0;
}
#endif #endif
static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev, static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,

View File

@ -128,6 +128,7 @@ struct net {
#endif #endif
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
struct netns_nf_frag nf_frag; struct netns_nf_frag nf_frag;
struct ctl_table_header *nf_frag_frags_hdr;
#endif #endif
struct sock *nfnl; struct sock *nfnl;
struct sock *nfnl_stash; struct sock *nfnl_stash;

View File

@ -109,7 +109,6 @@ struct netns_ipv6 {
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
struct netns_nf_frag { struct netns_nf_frag {
struct netns_sysctl_ipv6 sysctl;
struct netns_frags frags; struct netns_frags frags;
}; };
#endif #endif

View File

@ -111,6 +111,11 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
{ {
} }
static inline bool tcf_block_shared(struct tcf_block *block)
{
return false;
}
static inline struct Qdisc *tcf_block_q(struct tcf_block *block) static inline struct Qdisc *tcf_block_q(struct tcf_block *block)
{ {
return NULL; return NULL;

View File

@ -1857,7 +1857,8 @@ union bpf_attr {
* is resolved), the nexthop address is returned in ipv4_dst * is resolved), the nexthop address is returned in ipv4_dst
* or ipv6_dst based on family, smac is set to mac address of * or ipv6_dst based on family, smac is set to mac address of
* egress device, dmac is set to nexthop mac address, rt_metric * egress device, dmac is set to nexthop mac address, rt_metric
* is set to metric from route (IPv4/IPv6 only). * is set to metric from route (IPv4/IPv6 only), and ifindex
* is set to the device index of the nexthop from the FIB lookup.
* *
* *plen* argument is the size of the passed in struct. * *plen* argument is the size of the passed in struct.
* *flags* argument can be a combination of one or more of the * *flags* argument can be a combination of one or more of the
@ -1873,9 +1874,10 @@ union bpf_attr {
* *ctx* is either **struct xdp_md** for XDP programs or * *ctx* is either **struct xdp_md** for XDP programs or
* **struct sk_buff** tc cls_act programs. * **struct sk_buff** tc cls_act programs.
* Return * Return
* Egress device index on success, 0 if packet needs to continue * * < 0 if any input argument is invalid
* up the stack for further processing or a negative error in case * * 0 on success (packet is forwarded, nexthop neighbor exists)
* of failure. * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
* * packet is not forwarded or needs assist from full stack
* *
* int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags) * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
* Description * Description
@ -2612,6 +2614,18 @@ struct bpf_raw_tracepoint_args {
#define BPF_FIB_LOOKUP_DIRECT BIT(0) #define BPF_FIB_LOOKUP_DIRECT BIT(0)
#define BPF_FIB_LOOKUP_OUTPUT BIT(1) #define BPF_FIB_LOOKUP_OUTPUT BIT(1)
enum {
BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */
BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */
BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */
BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */
BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
};
struct bpf_fib_lookup { struct bpf_fib_lookup {
/* input: network family for lookup (AF_INET, AF_INET6) /* input: network family for lookup (AF_INET, AF_INET6)
* output: network family of egress nexthop * output: network family of egress nexthop
@ -2625,7 +2639,11 @@ struct bpf_fib_lookup {
/* total length of packet from network header - used for MTU check */ /* total length of packet from network header - used for MTU check */
__u16 tot_len; __u16 tot_len;
__u32 ifindex; /* L3 device index for lookup */
/* input: L3 device index for lookup
* output: device index from FIB lookup
*/
__u32 ifindex;
union { union {
/* inputs to lookup */ /* inputs to lookup */

View File

@ -428,6 +428,60 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
return ret; return ret;
} }
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype, struct bpf_prog *prog)
{
struct cgroup *cgrp;
int ret;
cgrp = cgroup_get_from_fd(attr->target_fd);
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
attr->attach_flags);
cgroup_put(cgrp);
return ret;
}
int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
{
struct bpf_prog *prog;
struct cgroup *cgrp;
int ret;
cgrp = cgroup_get_from_fd(attr->target_fd);
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
if (IS_ERR(prog))
prog = NULL;
ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
if (prog)
bpf_prog_put(prog);
cgroup_put(cgrp);
return ret;
}
int cgroup_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
struct cgroup *cgrp;
int ret;
cgrp = cgroup_get_from_fd(attr->query.target_fd);
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
ret = cgroup_bpf_query(cgrp, attr, uattr);
cgroup_put(cgrp);
return ret;
}
/** /**
* __cgroup_bpf_run_filter_skb() - Run a program for packet filtering * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
* @sk: The socket sending or receiving traffic * @sk: The socket sending or receiving traffic

View File

@ -598,8 +598,6 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
bpf_fill_ill_insns(hdr, size); bpf_fill_ill_insns(hdr, size);
hdr->pages = size / PAGE_SIZE; hdr->pages = size / PAGE_SIZE;
hdr->locked = 0;
hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
PAGE_SIZE - sizeof(*hdr)); PAGE_SIZE - sizeof(*hdr));
start = (get_random_int() % hole) & ~(alignment - 1); start = (get_random_int() % hole) & ~(alignment - 1);
@ -1450,22 +1448,6 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
return 0; return 0;
} }
static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp)
{
#ifdef CONFIG_ARCH_HAS_SET_MEMORY
int i, err;
for (i = 0; i < fp->aux->func_cnt; i++) {
err = bpf_prog_check_pages_ro_single(fp->aux->func[i]);
if (err)
return err;
}
return bpf_prog_check_pages_ro_single(fp);
#endif
return 0;
}
static void bpf_prog_select_func(struct bpf_prog *fp) static void bpf_prog_select_func(struct bpf_prog *fp)
{ {
#ifndef CONFIG_BPF_JIT_ALWAYS_ON #ifndef CONFIG_BPF_JIT_ALWAYS_ON
@ -1524,17 +1506,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
* all eBPF JITs might immediately support all features. * all eBPF JITs might immediately support all features.
*/ */
*err = bpf_check_tail_call(fp); *err = bpf_check_tail_call(fp);
if (*err)
return fp;
/* Checkpoint: at this point onwards any cBPF -> eBPF or
* native eBPF program is read-only. If we failed to change
* the page attributes (e.g. allocation failure from
* splitting large pages), then reject the whole program
* in order to guarantee not ending up with any W+X pages
* from BPF side in kernel.
*/
*err = bpf_prog_check_pages_ro_locked(fp);
return fp; return fp;
} }
EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);

View File

@ -72,6 +72,7 @@ struct bpf_htab {
u32 n_buckets; u32 n_buckets;
u32 elem_size; u32 elem_size;
struct bpf_sock_progs progs; struct bpf_sock_progs progs;
struct rcu_head rcu;
}; };
struct htab_elem { struct htab_elem {
@ -89,8 +90,8 @@ enum smap_psock_state {
struct smap_psock_map_entry { struct smap_psock_map_entry {
struct list_head list; struct list_head list;
struct sock **entry; struct sock **entry;
struct htab_elem *hash_link; struct htab_elem __rcu *hash_link;
struct bpf_htab *htab; struct bpf_htab __rcu *htab;
}; };
struct smap_psock { struct smap_psock {
@ -120,6 +121,7 @@ struct smap_psock {
struct bpf_prog *bpf_parse; struct bpf_prog *bpf_parse;
struct bpf_prog *bpf_verdict; struct bpf_prog *bpf_verdict;
struct list_head maps; struct list_head maps;
spinlock_t maps_lock;
/* Back reference used when sock callback trigger sockmap operations */ /* Back reference used when sock callback trigger sockmap operations */
struct sock *sock; struct sock *sock;
@ -140,6 +142,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
static int bpf_tcp_sendpage(struct sock *sk, struct page *page, static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags); int offset, size_t size, int flags);
static void bpf_tcp_close(struct sock *sk, long timeout);
static inline struct smap_psock *smap_psock_sk(const struct sock *sk) static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
{ {
@ -161,7 +164,42 @@ static bool bpf_tcp_stream_read(const struct sock *sk)
return !empty; return !empty;
} }
static struct proto tcp_bpf_proto; enum {
SOCKMAP_IPV4,
SOCKMAP_IPV6,
SOCKMAP_NUM_PROTS,
};
enum {
SOCKMAP_BASE,
SOCKMAP_TX,
SOCKMAP_NUM_CONFIGS,
};
static struct proto *saved_tcpv6_prot __read_mostly;
static DEFINE_SPINLOCK(tcpv6_prot_lock);
static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS];
static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
struct proto *base)
{
prot[SOCKMAP_BASE] = *base;
prot[SOCKMAP_BASE].close = bpf_tcp_close;
prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg;
prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read;
prot[SOCKMAP_TX] = prot[SOCKMAP_BASE];
prot[SOCKMAP_TX].sendmsg = bpf_tcp_sendmsg;
prot[SOCKMAP_TX].sendpage = bpf_tcp_sendpage;
}
static void update_sk_prot(struct sock *sk, struct smap_psock *psock)
{
int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4;
int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE;
sk->sk_prot = &bpf_tcp_prots[family][conf];
}
static int bpf_tcp_init(struct sock *sk) static int bpf_tcp_init(struct sock *sk)
{ {
struct smap_psock *psock; struct smap_psock *psock;
@ -181,14 +219,17 @@ static int bpf_tcp_init(struct sock *sk)
psock->save_close = sk->sk_prot->close; psock->save_close = sk->sk_prot->close;
psock->sk_proto = sk->sk_prot; psock->sk_proto = sk->sk_prot;
if (psock->bpf_tx_msg) { /* Build IPv6 sockmap whenever the address of tcpv6_prot changes */
tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg; if (sk->sk_family == AF_INET6 &&
tcp_bpf_proto.sendpage = bpf_tcp_sendpage; unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg; spin_lock_bh(&tcpv6_prot_lock);
tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read; if (likely(sk->sk_prot != saved_tcpv6_prot)) {
build_protos(bpf_tcp_prots[SOCKMAP_IPV6], sk->sk_prot);
smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
}
spin_unlock_bh(&tcpv6_prot_lock);
} }
update_sk_prot(sk, psock);
sk->sk_prot = &tcp_bpf_proto;
rcu_read_unlock(); rcu_read_unlock();
return 0; return 0;
} }
@ -219,16 +260,54 @@ static void bpf_tcp_release(struct sock *sk)
rcu_read_unlock(); rcu_read_unlock();
} }
static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
u32 hash, void *key, u32 key_size)
{
struct htab_elem *l;
hlist_for_each_entry_rcu(l, head, hash_node) {
if (l->hash == hash && !memcmp(&l->key, key, key_size))
return l;
}
return NULL;
}
static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
{
return &htab->buckets[hash & (htab->n_buckets - 1)];
}
static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
{
return &__select_bucket(htab, hash)->head;
}
static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
{ {
atomic_dec(&htab->count); atomic_dec(&htab->count);
kfree_rcu(l, rcu); kfree_rcu(l, rcu);
} }
static struct smap_psock_map_entry *psock_map_pop(struct sock *sk,
struct smap_psock *psock)
{
struct smap_psock_map_entry *e;
spin_lock_bh(&psock->maps_lock);
e = list_first_entry_or_null(&psock->maps,
struct smap_psock_map_entry,
list);
if (e)
list_del(&e->list);
spin_unlock_bh(&psock->maps_lock);
return e;
}
static void bpf_tcp_close(struct sock *sk, long timeout) static void bpf_tcp_close(struct sock *sk, long timeout)
{ {
void (*close_fun)(struct sock *sk, long timeout); void (*close_fun)(struct sock *sk, long timeout);
struct smap_psock_map_entry *e, *tmp; struct smap_psock_map_entry *e;
struct sk_msg_buff *md, *mtmp; struct sk_msg_buff *md, *mtmp;
struct smap_psock *psock; struct smap_psock *psock;
struct sock *osk; struct sock *osk;
@ -247,7 +326,6 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
*/ */
close_fun = psock->save_close; close_fun = psock->save_close;
write_lock_bh(&sk->sk_callback_lock);
if (psock->cork) { if (psock->cork) {
free_start_sg(psock->sock, psock->cork); free_start_sg(psock->sock, psock->cork);
kfree(psock->cork); kfree(psock->cork);
@ -260,20 +338,38 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
kfree(md); kfree(md);
} }
list_for_each_entry_safe(e, tmp, &psock->maps, list) { e = psock_map_pop(sk, psock);
while (e) {
if (e->entry) { if (e->entry) {
osk = cmpxchg(e->entry, sk, NULL); osk = cmpxchg(e->entry, sk, NULL);
if (osk == sk) { if (osk == sk) {
list_del(&e->list);
smap_release_sock(psock, sk); smap_release_sock(psock, sk);
} }
} else { } else {
hlist_del_rcu(&e->hash_link->hash_node); struct htab_elem *link = rcu_dereference(e->hash_link);
smap_release_sock(psock, e->hash_link->sk); struct bpf_htab *htab = rcu_dereference(e->htab);
free_htab_elem(e->htab, e->hash_link); struct hlist_head *head;
struct htab_elem *l;
struct bucket *b;
b = __select_bucket(htab, link->hash);
head = &b->head;
raw_spin_lock_bh(&b->lock);
l = lookup_elem_raw(head,
link->hash, link->key,
htab->map.key_size);
/* If another thread deleted this object skip deletion.
* The refcnt on psock may or may not be zero.
*/
if (l) {
hlist_del_rcu(&link->hash_node);
smap_release_sock(psock, link->sk);
free_htab_elem(htab, link);
}
raw_spin_unlock_bh(&b->lock);
} }
e = psock_map_pop(sk, psock);
} }
write_unlock_bh(&sk->sk_callback_lock);
rcu_read_unlock(); rcu_read_unlock();
close_fun(sk, timeout); close_fun(sk, timeout);
} }
@ -1111,8 +1207,7 @@ static void bpf_tcp_msg_add(struct smap_psock *psock,
static int bpf_tcp_ulp_register(void) static int bpf_tcp_ulp_register(void)
{ {
tcp_bpf_proto = tcp_prot; build_protos(bpf_tcp_prots[SOCKMAP_IPV4], &tcp_prot);
tcp_bpf_proto.close = bpf_tcp_close;
/* Once BPF TX ULP is registered it is never unregistered. It /* Once BPF TX ULP is registered it is never unregistered. It
* will be in the ULP list for the lifetime of the system. Doing * will be in the ULP list for the lifetime of the system. Doing
* duplicate registers is not a problem. * duplicate registers is not a problem.
@ -1357,7 +1452,9 @@ static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
{ {
if (refcount_dec_and_test(&psock->refcnt)) { if (refcount_dec_and_test(&psock->refcnt)) {
tcp_cleanup_ulp(sock); tcp_cleanup_ulp(sock);
write_lock_bh(&sock->sk_callback_lock);
smap_stop_sock(psock, sock); smap_stop_sock(psock, sock);
write_unlock_bh(&sock->sk_callback_lock);
clear_bit(SMAP_TX_RUNNING, &psock->state); clear_bit(SMAP_TX_RUNNING, &psock->state);
rcu_assign_sk_user_data(sock, NULL); rcu_assign_sk_user_data(sock, NULL);
call_rcu_sched(&psock->rcu, smap_destroy_psock); call_rcu_sched(&psock->rcu, smap_destroy_psock);
@ -1508,6 +1605,7 @@ static struct smap_psock *smap_init_psock(struct sock *sock, int node)
INIT_LIST_HEAD(&psock->maps); INIT_LIST_HEAD(&psock->maps);
INIT_LIST_HEAD(&psock->ingress); INIT_LIST_HEAD(&psock->ingress);
refcount_set(&psock->refcnt, 1); refcount_set(&psock->refcnt, 1);
spin_lock_init(&psock->maps_lock);
rcu_assign_sk_user_data(sock, psock); rcu_assign_sk_user_data(sock, psock);
sock_hold(sock); sock_hold(sock);
@ -1564,18 +1662,32 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
return ERR_PTR(err); return ERR_PTR(err);
} }
static void smap_list_remove(struct smap_psock *psock, static void smap_list_map_remove(struct smap_psock *psock,
struct sock **entry, struct sock **entry)
struct htab_elem *hash_link)
{ {
struct smap_psock_map_entry *e, *tmp; struct smap_psock_map_entry *e, *tmp;
spin_lock_bh(&psock->maps_lock);
list_for_each_entry_safe(e, tmp, &psock->maps, list) { list_for_each_entry_safe(e, tmp, &psock->maps, list) {
if (e->entry == entry || e->hash_link == hash_link) { if (e->entry == entry)
list_del(&e->list); list_del(&e->list);
break;
}
} }
spin_unlock_bh(&psock->maps_lock);
}
static void smap_list_hash_remove(struct smap_psock *psock,
struct htab_elem *hash_link)
{
struct smap_psock_map_entry *e, *tmp;
spin_lock_bh(&psock->maps_lock);
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
struct htab_elem *c = rcu_dereference(e->hash_link);
if (c == hash_link)
list_del(&e->list);
}
spin_unlock_bh(&psock->maps_lock);
} }
static void sock_map_free(struct bpf_map *map) static void sock_map_free(struct bpf_map *map)
@ -1601,7 +1713,6 @@ static void sock_map_free(struct bpf_map *map)
if (!sock) if (!sock)
continue; continue;
write_lock_bh(&sock->sk_callback_lock);
psock = smap_psock_sk(sock); psock = smap_psock_sk(sock);
/* This check handles a racing sock event that can get the /* This check handles a racing sock event that can get the
* sk_callback_lock before this case but after xchg happens * sk_callback_lock before this case but after xchg happens
@ -1609,10 +1720,9 @@ static void sock_map_free(struct bpf_map *map)
* to be null and queued for garbage collection. * to be null and queued for garbage collection.
*/ */
if (likely(psock)) { if (likely(psock)) {
smap_list_remove(psock, &stab->sock_map[i], NULL); smap_list_map_remove(psock, &stab->sock_map[i]);
smap_release_sock(psock, sock); smap_release_sock(psock, sock);
} }
write_unlock_bh(&sock->sk_callback_lock);
} }
rcu_read_unlock(); rcu_read_unlock();
@ -1661,17 +1771,15 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
if (!sock) if (!sock)
return -EINVAL; return -EINVAL;
write_lock_bh(&sock->sk_callback_lock);
psock = smap_psock_sk(sock); psock = smap_psock_sk(sock);
if (!psock) if (!psock)
goto out; goto out;
if (psock->bpf_parse) if (psock->bpf_parse)
smap_stop_sock(psock, sock); smap_stop_sock(psock, sock);
smap_list_remove(psock, &stab->sock_map[k], NULL); smap_list_map_remove(psock, &stab->sock_map[k]);
smap_release_sock(psock, sock); smap_release_sock(psock, sock);
out: out:
write_unlock_bh(&sock->sk_callback_lock);
return 0; return 0;
} }
@ -1752,7 +1860,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
} }
} }
write_lock_bh(&sock->sk_callback_lock);
psock = smap_psock_sk(sock); psock = smap_psock_sk(sock);
/* 2. Do not allow inheriting programs if psock exists and has /* 2. Do not allow inheriting programs if psock exists and has
@ -1809,7 +1916,9 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
if (err) if (err)
goto out_free; goto out_free;
smap_init_progs(psock, verdict, parse); smap_init_progs(psock, verdict, parse);
write_lock_bh(&sock->sk_callback_lock);
smap_start_sock(psock, sock); smap_start_sock(psock, sock);
write_unlock_bh(&sock->sk_callback_lock);
} }
/* 4. Place psock in sockmap for use and stop any programs on /* 4. Place psock in sockmap for use and stop any programs on
@ -1819,9 +1928,10 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
*/ */
if (map_link) { if (map_link) {
e->entry = map_link; e->entry = map_link;
spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps); list_add_tail(&e->list, &psock->maps);
spin_unlock_bh(&psock->maps_lock);
} }
write_unlock_bh(&sock->sk_callback_lock);
return err; return err;
out_free: out_free:
smap_release_sock(psock, sock); smap_release_sock(psock, sock);
@ -1832,7 +1942,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
} }
if (tx_msg) if (tx_msg)
bpf_prog_put(tx_msg); bpf_prog_put(tx_msg);
write_unlock_bh(&sock->sk_callback_lock);
kfree(e); kfree(e);
return err; return err;
} }
@ -1869,10 +1978,8 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
if (osock) { if (osock) {
struct smap_psock *opsock = smap_psock_sk(osock); struct smap_psock *opsock = smap_psock_sk(osock);
write_lock_bh(&osock->sk_callback_lock); smap_list_map_remove(opsock, &stab->sock_map[i]);
smap_list_remove(opsock, &stab->sock_map[i], NULL);
smap_release_sock(opsock, osock); smap_release_sock(opsock, osock);
write_unlock_bh(&osock->sk_callback_lock);
} }
out: out:
return err; return err;
@ -1915,6 +2022,24 @@ int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type)
return 0; return 0;
} }
int sockmap_get_from_fd(const union bpf_attr *attr, int type,
struct bpf_prog *prog)
{
int ufd = attr->target_fd;
struct bpf_map *map;
struct fd f;
int err;
f = fdget(ufd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
err = sock_map_prog(map, prog, attr->attach_type);
fdput(f);
return err;
}
static void *sock_map_lookup(struct bpf_map *map, void *key) static void *sock_map_lookup(struct bpf_map *map, void *key)
{ {
return NULL; return NULL;
@ -2043,14 +2168,13 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
return ERR_PTR(err); return ERR_PTR(err);
} }
static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) static void __bpf_htab_free(struct rcu_head *rcu)
{ {
return &htab->buckets[hash & (htab->n_buckets - 1)]; struct bpf_htab *htab;
}
static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) htab = container_of(rcu, struct bpf_htab, rcu);
{ bpf_map_area_free(htab->buckets);
return &__select_bucket(htab, hash)->head; kfree(htab);
} }
static void sock_hash_free(struct bpf_map *map) static void sock_hash_free(struct bpf_map *map)
@ -2069,16 +2193,18 @@ static void sock_hash_free(struct bpf_map *map)
*/ */
rcu_read_lock(); rcu_read_lock();
for (i = 0; i < htab->n_buckets; i++) { for (i = 0; i < htab->n_buckets; i++) {
struct hlist_head *head = select_bucket(htab, i); struct bucket *b = __select_bucket(htab, i);
struct hlist_head *head;
struct hlist_node *n; struct hlist_node *n;
struct htab_elem *l; struct htab_elem *l;
raw_spin_lock_bh(&b->lock);
head = &b->head;
hlist_for_each_entry_safe(l, n, head, hash_node) { hlist_for_each_entry_safe(l, n, head, hash_node) {
struct sock *sock = l->sk; struct sock *sock = l->sk;
struct smap_psock *psock; struct smap_psock *psock;
hlist_del_rcu(&l->hash_node); hlist_del_rcu(&l->hash_node);
write_lock_bh(&sock->sk_callback_lock);
psock = smap_psock_sk(sock); psock = smap_psock_sk(sock);
/* This check handles a racing sock event that can get /* This check handles a racing sock event that can get
* the sk_callback_lock before this case but after xchg * the sk_callback_lock before this case but after xchg
@ -2086,16 +2212,15 @@ static void sock_hash_free(struct bpf_map *map)
* (psock) to be null and queued for garbage collection. * (psock) to be null and queued for garbage collection.
*/ */
if (likely(psock)) { if (likely(psock)) {
smap_list_remove(psock, NULL, l); smap_list_hash_remove(psock, l);
smap_release_sock(psock, sock); smap_release_sock(psock, sock);
} }
write_unlock_bh(&sock->sk_callback_lock); free_htab_elem(htab, l);
kfree(l);
} }
raw_spin_unlock_bh(&b->lock);
} }
rcu_read_unlock(); rcu_read_unlock();
bpf_map_area_free(htab->buckets); call_rcu(&htab->rcu, __bpf_htab_free);
kfree(htab);
} }
static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab, static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
@ -2122,19 +2247,6 @@ static struct htab_elem *alloc_sock_hash_elem(struct bpf_htab *htab,
return l_new; return l_new;
} }
static struct htab_elem *lookup_elem_raw(struct hlist_head *head,
u32 hash, void *key, u32 key_size)
{
struct htab_elem *l;
hlist_for_each_entry_rcu(l, head, hash_node) {
if (l->hash == hash && !memcmp(&l->key, key, key_size))
return l;
}
return NULL;
}
static inline u32 htab_map_hash(const void *key, u32 key_len) static inline u32 htab_map_hash(const void *key, u32 key_len)
{ {
return jhash(key, key_len, 0); return jhash(key, key_len, 0);
@ -2254,9 +2366,12 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
goto bucket_err; goto bucket_err;
} }
e->hash_link = l_new; rcu_assign_pointer(e->hash_link, l_new);
e->htab = container_of(map, struct bpf_htab, map); rcu_assign_pointer(e->htab,
container_of(map, struct bpf_htab, map));
spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps); list_add_tail(&e->list, &psock->maps);
spin_unlock_bh(&psock->maps_lock);
/* add new element to the head of the list, so that /* add new element to the head of the list, so that
* concurrent search will find it before old elem * concurrent search will find it before old elem
@ -2266,7 +2381,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
psock = smap_psock_sk(l_old->sk); psock = smap_psock_sk(l_old->sk);
hlist_del_rcu(&l_old->hash_node); hlist_del_rcu(&l_old->hash_node);
smap_list_remove(psock, NULL, l_old); smap_list_hash_remove(psock, l_old);
smap_release_sock(psock, l_old->sk); smap_release_sock(psock, l_old->sk);
free_htab_elem(htab, l_old); free_htab_elem(htab, l_old);
} }
@ -2326,7 +2441,6 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
struct smap_psock *psock; struct smap_psock *psock;
hlist_del_rcu(&l->hash_node); hlist_del_rcu(&l->hash_node);
write_lock_bh(&sock->sk_callback_lock);
psock = smap_psock_sk(sock); psock = smap_psock_sk(sock);
/* This check handles a racing sock event that can get the /* This check handles a racing sock event that can get the
* sk_callback_lock before this case but after xchg happens * sk_callback_lock before this case but after xchg happens
@ -2334,10 +2448,9 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
* to be null and queued for garbage collection. * to be null and queued for garbage collection.
*/ */
if (likely(psock)) { if (likely(psock)) {
smap_list_remove(psock, NULL, l); smap_list_hash_remove(psock, l);
smap_release_sock(psock, sock); smap_release_sock(psock, sock);
} }
write_unlock_bh(&sock->sk_callback_lock);
free_htab_elem(htab, l); free_htab_elem(htab, l);
ret = 0; ret = 0;
} }
@ -2383,6 +2496,7 @@ const struct bpf_map_ops sock_hash_ops = {
.map_get_next_key = sock_hash_get_next_key, .map_get_next_key = sock_hash_get_next_key,
.map_update_elem = sock_hash_update_elem, .map_update_elem = sock_hash_update_elem,
.map_delete_elem = sock_hash_delete_elem, .map_delete_elem = sock_hash_delete_elem,
.map_release_uref = sock_map_release,
}; };
BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock,

View File

@ -1483,8 +1483,6 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
return err; return err;
} }
#ifdef CONFIG_CGROUP_BPF
static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
enum bpf_attach_type attach_type) enum bpf_attach_type attach_type)
{ {
@ -1499,40 +1497,6 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
#define BPF_PROG_ATTACH_LAST_FIELD attach_flags #define BPF_PROG_ATTACH_LAST_FIELD attach_flags
static int sockmap_get_from_fd(const union bpf_attr *attr,
int type, bool attach)
{
struct bpf_prog *prog = NULL;
int ufd = attr->target_fd;
struct bpf_map *map;
struct fd f;
int err;
f = fdget(ufd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
if (attach) {
prog = bpf_prog_get_type(attr->attach_bpf_fd, type);
if (IS_ERR(prog)) {
fdput(f);
return PTR_ERR(prog);
}
}
err = sock_map_prog(map, prog, attr->attach_type);
if (err) {
fdput(f);
if (prog)
bpf_prog_put(prog);
return err;
}
fdput(f);
return 0;
}
#define BPF_F_ATTACH_MASK \ #define BPF_F_ATTACH_MASK \
(BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
@ -1540,7 +1504,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
{ {
enum bpf_prog_type ptype; enum bpf_prog_type ptype;
struct bpf_prog *prog; struct bpf_prog *prog;
struct cgroup *cgrp;
int ret; int ret;
if (!capable(CAP_NET_ADMIN)) if (!capable(CAP_NET_ADMIN))
@ -1577,12 +1540,15 @@ static int bpf_prog_attach(const union bpf_attr *attr)
ptype = BPF_PROG_TYPE_CGROUP_DEVICE; ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
break; break;
case BPF_SK_MSG_VERDICT: case BPF_SK_MSG_VERDICT:
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, true); ptype = BPF_PROG_TYPE_SK_MSG;
break;
case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_PARSER:
case BPF_SK_SKB_STREAM_VERDICT: case BPF_SK_SKB_STREAM_VERDICT:
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, true); ptype = BPF_PROG_TYPE_SK_SKB;
break;
case BPF_LIRC_MODE2: case BPF_LIRC_MODE2:
return lirc_prog_attach(attr); ptype = BPF_PROG_TYPE_LIRC_MODE2;
break;
default: default:
return -EINVAL; return -EINVAL;
} }
@ -1596,18 +1562,20 @@ static int bpf_prog_attach(const union bpf_attr *attr)
return -EINVAL; return -EINVAL;
} }
cgrp = cgroup_get_from_fd(attr->target_fd); switch (ptype) {
if (IS_ERR(cgrp)) { case BPF_PROG_TYPE_SK_SKB:
bpf_prog_put(prog); case BPF_PROG_TYPE_SK_MSG:
return PTR_ERR(cgrp); ret = sockmap_get_from_fd(attr, ptype, prog);
break;
case BPF_PROG_TYPE_LIRC_MODE2:
ret = lirc_prog_attach(attr, prog);
break;
default:
ret = cgroup_bpf_prog_attach(attr, ptype, prog);
} }
ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
attr->attach_flags);
if (ret) if (ret)
bpf_prog_put(prog); bpf_prog_put(prog);
cgroup_put(cgrp);
return ret; return ret;
} }
@ -1616,9 +1584,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
static int bpf_prog_detach(const union bpf_attr *attr) static int bpf_prog_detach(const union bpf_attr *attr)
{ {
enum bpf_prog_type ptype; enum bpf_prog_type ptype;
struct bpf_prog *prog;
struct cgroup *cgrp;
int ret;
if (!capable(CAP_NET_ADMIN)) if (!capable(CAP_NET_ADMIN))
return -EPERM; return -EPERM;
@ -1651,29 +1616,17 @@ static int bpf_prog_detach(const union bpf_attr *attr)
ptype = BPF_PROG_TYPE_CGROUP_DEVICE; ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
break; break;
case BPF_SK_MSG_VERDICT: case BPF_SK_MSG_VERDICT:
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, false); return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL);
case BPF_SK_SKB_STREAM_PARSER: case BPF_SK_SKB_STREAM_PARSER:
case BPF_SK_SKB_STREAM_VERDICT: case BPF_SK_SKB_STREAM_VERDICT:
return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, false); return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_SKB, NULL);
case BPF_LIRC_MODE2: case BPF_LIRC_MODE2:
return lirc_prog_detach(attr); return lirc_prog_detach(attr);
default: default:
return -EINVAL; return -EINVAL;
} }
cgrp = cgroup_get_from_fd(attr->target_fd); return cgroup_bpf_prog_detach(attr, ptype);
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
if (IS_ERR(prog))
prog = NULL;
ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0);
if (prog)
bpf_prog_put(prog);
cgroup_put(cgrp);
return ret;
} }
#define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt #define BPF_PROG_QUERY_LAST_FIELD query.prog_cnt
@ -1681,9 +1634,6 @@ static int bpf_prog_detach(const union bpf_attr *attr)
static int bpf_prog_query(const union bpf_attr *attr, static int bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr) union bpf_attr __user *uattr)
{ {
struct cgroup *cgrp;
int ret;
if (!capable(CAP_NET_ADMIN)) if (!capable(CAP_NET_ADMIN))
return -EPERM; return -EPERM;
if (CHECK_ATTR(BPF_PROG_QUERY)) if (CHECK_ATTR(BPF_PROG_QUERY))
@ -1711,14 +1661,9 @@ static int bpf_prog_query(const union bpf_attr *attr,
default: default:
return -EINVAL; return -EINVAL;
} }
cgrp = cgroup_get_from_fd(attr->query.target_fd);
if (IS_ERR(cgrp)) return cgroup_bpf_prog_query(attr, uattr);
return PTR_ERR(cgrp);
ret = cgroup_bpf_query(cgrp, attr, uattr);
cgroup_put(cgrp);
return ret;
} }
#endif /* CONFIG_CGROUP_BPF */
#define BPF_PROG_TEST_RUN_LAST_FIELD test.duration #define BPF_PROG_TEST_RUN_LAST_FIELD test.duration
@ -2365,7 +2310,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_OBJ_GET: case BPF_OBJ_GET:
err = bpf_obj_get(&attr); err = bpf_obj_get(&attr);
break; break;
#ifdef CONFIG_CGROUP_BPF
case BPF_PROG_ATTACH: case BPF_PROG_ATTACH:
err = bpf_prog_attach(&attr); err = bpf_prog_attach(&attr);
break; break;
@ -2375,7 +2319,6 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_PROG_QUERY: case BPF_PROG_QUERY:
err = bpf_prog_query(&attr, uattr); err = bpf_prog_query(&attr, uattr);
break; break;
#endif
case BPF_PROG_TEST_RUN: case BPF_PROG_TEST_RUN:
err = bpf_prog_test_run(&attr, uattr); err = bpf_prog_test_run(&attr, uattr);
break; break;

View File

@ -5282,21 +5282,31 @@ static struct bpf_test tests[] = {
{ /* Mainly checking JIT here. */ { /* Mainly checking JIT here. */
"BPF_MAXINSNS: Ctx heavy transformations", "BPF_MAXINSNS: Ctx heavy transformations",
{ }, { },
#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
CLASSIC | FLAG_EXPECTED_FAIL,
#else
CLASSIC, CLASSIC,
#endif
{ }, { },
{ {
{ 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) },
{ 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }
}, },
.fill_helper = bpf_fill_maxinsns6, .fill_helper = bpf_fill_maxinsns6,
.expected_errcode = -ENOTSUPP,
}, },
{ /* Mainly checking JIT here. */ { /* Mainly checking JIT here. */
"BPF_MAXINSNS: Call heavy transformations", "BPF_MAXINSNS: Call heavy transformations",
{ }, { },
#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
#else
CLASSIC | FLAG_NO_DATA, CLASSIC | FLAG_NO_DATA,
#endif
{ }, { },
{ { 1, 0 }, { 10, 0 } }, { { 1, 0 }, { 10, 0 } },
.fill_helper = bpf_fill_maxinsns7, .fill_helper = bpf_fill_maxinsns7,
.expected_errcode = -ENOTSUPP,
}, },
{ /* Mainly checking JIT here. */ { /* Mainly checking JIT here. */
"BPF_MAXINSNS: Jump heavy test", "BPF_MAXINSNS: Jump heavy test",
@ -5347,18 +5357,28 @@ static struct bpf_test tests[] = {
{ {
"BPF_MAXINSNS: exec all MSH", "BPF_MAXINSNS: exec all MSH",
{ }, { },
#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
CLASSIC | FLAG_EXPECTED_FAIL,
#else
CLASSIC, CLASSIC,
#endif
{ 0xfa, 0xfb, 0xfc, 0xfd, }, { 0xfa, 0xfb, 0xfc, 0xfd, },
{ { 4, 0xababab83 } }, { { 4, 0xababab83 } },
.fill_helper = bpf_fill_maxinsns13, .fill_helper = bpf_fill_maxinsns13,
.expected_errcode = -ENOTSUPP,
}, },
{ {
"BPF_MAXINSNS: ld_abs+get_processor_id", "BPF_MAXINSNS: ld_abs+get_processor_id",
{ }, { },
#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390)
CLASSIC | FLAG_EXPECTED_FAIL,
#else
CLASSIC, CLASSIC,
#endif
{ }, { },
{ { 1, 0xbee } }, { { 1, 0xbee } },
.fill_helper = bpf_fill_ld_abs_get_processor_id, .fill_helper = bpf_fill_ld_abs_get_processor_id,
.expected_errcode = -ENOTSUPP,
}, },
/* /*
* LD_IND / LD_ABS on fragmented SKBs * LD_IND / LD_ABS on fragmented SKBs

View File

@ -693,7 +693,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
out_unlock: out_unlock:
rcu_read_unlock(); rcu_read_unlock();
out: out:
NAPI_GRO_CB(skb)->flush |= flush; skb_gro_flush_final(skb, pp, flush);
return pp; return pp;
} }

View File

@ -20,11 +20,7 @@ obj-$(CONFIG_TLS) += tls/
obj-$(CONFIG_XFRM) += xfrm/ obj-$(CONFIG_XFRM) += xfrm/
obj-$(CONFIG_UNIX) += unix/ obj-$(CONFIG_UNIX) += unix/
obj-$(CONFIG_NET) += ipv6/ obj-$(CONFIG_NET) += ipv6/
ifneq ($(CC_CAN_LINK),y)
$(warning CC cannot link executables. Skipping bpfilter.)
else
obj-$(CONFIG_BPFILTER) += bpfilter/ obj-$(CONFIG_BPFILTER) += bpfilter/
endif
obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_NET_KEY) += key/
obj-$(CONFIG_BRIDGE) += bridge/ obj-$(CONFIG_BRIDGE) += bridge/

View File

@ -1,6 +1,5 @@
menuconfig BPFILTER menuconfig BPFILTER
bool "BPF based packet filtering framework (BPFILTER)" bool "BPF based packet filtering framework (BPFILTER)"
default n
depends on NET && BPF && INET depends on NET && BPF && INET
help help
This builds experimental bpfilter framework that is aiming to This builds experimental bpfilter framework that is aiming to
@ -9,6 +8,7 @@ menuconfig BPFILTER
if BPFILTER if BPFILTER
config BPFILTER_UMH config BPFILTER_UMH
tristate "bpfilter kernel module with user mode helper" tristate "bpfilter kernel module with user mode helper"
depends on $(success,$(srctree)/scripts/cc-can-link.sh $(CC))
default m default m
help help
This builds bpfilter kernel module with embedded user mode helper This builds bpfilter kernel module with embedded user mode helper

View File

@ -15,20 +15,7 @@ ifeq ($(CONFIG_BPFILTER_UMH), y)
HOSTLDFLAGS += -static HOSTLDFLAGS += -static
endif endif
# a bit of elf magic to convert bpfilter_umh binary into a binary blob $(obj)/bpfilter_umh_blob.o: $(obj)/bpfilter_umh
# inside bpfilter_umh.o elf file referenced by
# _binary_net_bpfilter_bpfilter_umh_start symbol
# which bpfilter_kern.c passes further into umh blob loader at run-time
quiet_cmd_copy_umh = GEN $@
cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \
$(OBJCOPY) -I binary \
`LC_ALL=C $(OBJDUMP) -f net/bpfilter/bpfilter_umh \
|awk -F' |,' '/file format/{print "-O",$$NF} \
/^architecture:/{print "-B",$$2}'` \
--rename-section .data=.init.rodata $< $@
$(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh
$(call cmd,copy_umh)
obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o
bpfilter-objs += bpfilter_kern.o bpfilter_umh.o bpfilter-objs += bpfilter_kern.o bpfilter_umh_blob.o

View File

@ -10,11 +10,8 @@
#include <linux/file.h> #include <linux/file.h>
#include "msgfmt.h" #include "msgfmt.h"
#define UMH_start _binary_net_bpfilter_bpfilter_umh_start extern char bpfilter_umh_start;
#define UMH_end _binary_net_bpfilter_bpfilter_umh_end extern char bpfilter_umh_end;
extern char UMH_start;
extern char UMH_end;
static struct umh_info info; static struct umh_info info;
/* since ip_getsockopt() can run in parallel, serialize access to umh */ /* since ip_getsockopt() can run in parallel, serialize access to umh */
@ -93,7 +90,9 @@ static int __init load_umh(void)
int err; int err;
/* fork usermode process */ /* fork usermode process */
err = fork_usermode_blob(&UMH_start, &UMH_end - &UMH_start, &info); err = fork_usermode_blob(&bpfilter_umh_start,
&bpfilter_umh_end - &bpfilter_umh_start,
&info);
if (err) if (err)
return err; return err;
pr_info("Loaded bpfilter_umh pid %d\n", info.pid); pr_info("Loaded bpfilter_umh pid %d\n", info.pid);

View File

@ -0,0 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
.section .init.rodata, "a"
.global bpfilter_umh_start
bpfilter_umh_start:
.incbin "net/bpfilter/bpfilter_umh"
.global bpfilter_umh_end
bpfilter_umh_end:

View File

@ -285,16 +285,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
if (ifr->ifr_qlen < 0) if (ifr->ifr_qlen < 0)
return -EINVAL; return -EINVAL;
if (dev->tx_queue_len ^ ifr->ifr_qlen) { if (dev->tx_queue_len ^ ifr->ifr_qlen) {
unsigned int orig_len = dev->tx_queue_len; err = dev_change_tx_queue_len(dev, ifr->ifr_qlen);
if (err)
dev->tx_queue_len = ifr->ifr_qlen;
err = call_netdevice_notifiers(
NETDEV_CHANGE_TX_QUEUE_LEN, dev);
err = notifier_to_errno(err);
if (err) {
dev->tx_queue_len = orig_len;
return err; return err;
}
} }
return 0; return 0;

View File

@ -416,6 +416,14 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,
if (rule->mark && r->mark != rule->mark) if (rule->mark && r->mark != rule->mark)
continue; continue;
if (rule->suppress_ifgroup != -1 &&
r->suppress_ifgroup != rule->suppress_ifgroup)
continue;
if (rule->suppress_prefixlen != -1 &&
r->suppress_prefixlen != rule->suppress_prefixlen)
continue;
if (rule->mark_mask && r->mark_mask != rule->mark_mask) if (rule->mark_mask && r->mark_mask != rule->mark_mask)
continue; continue;
@ -436,6 +444,9 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,
if (rule->ip_proto && r->ip_proto != rule->ip_proto) if (rule->ip_proto && r->ip_proto != rule->ip_proto)
continue; continue;
if (rule->proto && r->proto != rule->proto)
continue;
if (fib_rule_port_range_set(&rule->sport_range) && if (fib_rule_port_range_set(&rule->sport_range) &&
!fib_rule_port_range_compare(&r->sport_range, !fib_rule_port_range_compare(&r->sport_range,
&rule->sport_range)) &rule->sport_range))
@ -645,6 +656,73 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
return err; return err;
} }
static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
struct nlattr **tb, struct fib_rule *rule)
{
struct fib_rule *r;
list_for_each_entry(r, &ops->rules_list, list) {
if (r->action != rule->action)
continue;
if (r->table != rule->table)
continue;
if (r->pref != rule->pref)
continue;
if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
continue;
if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
continue;
if (r->mark != rule->mark)
continue;
if (r->suppress_ifgroup != rule->suppress_ifgroup)
continue;
if (r->suppress_prefixlen != rule->suppress_prefixlen)
continue;
if (r->mark_mask != rule->mark_mask)
continue;
if (r->tun_id != rule->tun_id)
continue;
if (r->fr_net != rule->fr_net)
continue;
if (r->l3mdev != rule->l3mdev)
continue;
if (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
!uid_eq(r->uid_range.end, rule->uid_range.end))
continue;
if (r->ip_proto != rule->ip_proto)
continue;
if (r->proto != rule->proto)
continue;
if (!fib_rule_port_range_compare(&r->sport_range,
&rule->sport_range))
continue;
if (!fib_rule_port_range_compare(&r->dport_range,
&rule->dport_range))
continue;
if (!ops->compare(r, frh, tb))
continue;
return 1;
}
return 0;
}
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
@ -679,7 +757,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout; goto errout;
if ((nlh->nlmsg_flags & NLM_F_EXCL) && if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
rule_find(ops, frh, tb, rule, user_priority)) { rule_exists(ops, frh, tb, rule)) {
err = -EEXIST; err = -EEXIST;
goto errout_free; goto errout_free;
} }

View File

@ -4073,8 +4073,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
memcpy(params->smac, dev->dev_addr, ETH_ALEN); memcpy(params->smac, dev->dev_addr, ETH_ALEN);
params->h_vlan_TCI = 0; params->h_vlan_TCI = 0;
params->h_vlan_proto = 0; params->h_vlan_proto = 0;
params->ifindex = dev->ifindex;
return dev->ifindex; return 0;
} }
#endif #endif
@ -4098,7 +4099,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
/* verify forwarding is enabled on this interface */ /* verify forwarding is enabled on this interface */
in_dev = __in_dev_get_rcu(dev); in_dev = __in_dev_get_rcu(dev);
if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev))) if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
return 0; return BPF_FIB_LKUP_RET_FWD_DISABLED;
if (flags & BPF_FIB_LOOKUP_OUTPUT) { if (flags & BPF_FIB_LOOKUP_OUTPUT) {
fl4.flowi4_iif = 1; fl4.flowi4_iif = 1;
@ -4123,7 +4124,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
tb = fib_get_table(net, tbid); tb = fib_get_table(net, tbid);
if (unlikely(!tb)) if (unlikely(!tb))
return 0; return BPF_FIB_LKUP_RET_NOT_FWDED;
err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF); err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
} else { } else {
@ -4135,8 +4136,20 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF); err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
} }
if (err || res.type != RTN_UNICAST) if (err) {
return 0; /* map fib lookup errors to RTN_ type */
if (err == -EINVAL)
return BPF_FIB_LKUP_RET_BLACKHOLE;
if (err == -EHOSTUNREACH)
return BPF_FIB_LKUP_RET_UNREACHABLE;
if (err == -EACCES)
return BPF_FIB_LKUP_RET_PROHIBIT;
return BPF_FIB_LKUP_RET_NOT_FWDED;
}
if (res.type != RTN_UNICAST)
return BPF_FIB_LKUP_RET_NOT_FWDED;
if (res.fi->fib_nhs > 1) if (res.fi->fib_nhs > 1)
fib_select_path(net, &res, &fl4, NULL); fib_select_path(net, &res, &fl4, NULL);
@ -4144,19 +4157,16 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (check_mtu) { if (check_mtu) {
mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst); mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
if (params->tot_len > mtu) if (params->tot_len > mtu)
return 0; return BPF_FIB_LKUP_RET_FRAG_NEEDED;
} }
nh = &res.fi->fib_nh[res.nh_sel]; nh = &res.fi->fib_nh[res.nh_sel];
/* do not handle lwt encaps right now */ /* do not handle lwt encaps right now */
if (nh->nh_lwtstate) if (nh->nh_lwtstate)
return 0; return BPF_FIB_LKUP_RET_UNSUPP_LWT;
dev = nh->nh_dev; dev = nh->nh_dev;
if (unlikely(!dev))
return 0;
if (nh->nh_gw) if (nh->nh_gw)
params->ipv4_dst = nh->nh_gw; params->ipv4_dst = nh->nh_gw;
@ -4166,10 +4176,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
* rcu_read_lock_bh is not needed here * rcu_read_lock_bh is not needed here
*/ */
neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst); neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
if (neigh) if (!neigh)
return bpf_fib_set_fwd_params(params, neigh, dev); return BPF_FIB_LKUP_RET_NO_NEIGH;
return 0; return bpf_fib_set_fwd_params(params, neigh, dev);
} }
#endif #endif
@ -4190,7 +4200,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
/* link local addresses are never forwarded */ /* link local addresses are never forwarded */
if (rt6_need_strict(dst) || rt6_need_strict(src)) if (rt6_need_strict(dst) || rt6_need_strict(src))
return 0; return BPF_FIB_LKUP_RET_NOT_FWDED;
dev = dev_get_by_index_rcu(net, params->ifindex); dev = dev_get_by_index_rcu(net, params->ifindex);
if (unlikely(!dev)) if (unlikely(!dev))
@ -4198,7 +4208,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
idev = __in6_dev_get_safely(dev); idev = __in6_dev_get_safely(dev);
if (unlikely(!idev || !net->ipv6.devconf_all->forwarding)) if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
return 0; return BPF_FIB_LKUP_RET_FWD_DISABLED;
if (flags & BPF_FIB_LOOKUP_OUTPUT) { if (flags & BPF_FIB_LOOKUP_OUTPUT) {
fl6.flowi6_iif = 1; fl6.flowi6_iif = 1;
@ -4225,7 +4235,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
tb = ipv6_stub->fib6_get_table(net, tbid); tb = ipv6_stub->fib6_get_table(net, tbid);
if (unlikely(!tb)) if (unlikely(!tb))
return 0; return BPF_FIB_LKUP_RET_NOT_FWDED;
f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict); f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
} else { } else {
@ -4238,11 +4248,23 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
} }
if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry)) if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
return 0; return BPF_FIB_LKUP_RET_NOT_FWDED;
if (unlikely(f6i->fib6_flags & RTF_REJECT || if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
f6i->fib6_type != RTN_UNICAST)) switch (f6i->fib6_type) {
return 0; case RTN_BLACKHOLE:
return BPF_FIB_LKUP_RET_BLACKHOLE;
case RTN_UNREACHABLE:
return BPF_FIB_LKUP_RET_UNREACHABLE;
case RTN_PROHIBIT:
return BPF_FIB_LKUP_RET_PROHIBIT;
default:
return BPF_FIB_LKUP_RET_NOT_FWDED;
}
}
if (f6i->fib6_type != RTN_UNICAST)
return BPF_FIB_LKUP_RET_NOT_FWDED;
if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0) if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6, f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
@ -4252,11 +4274,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (check_mtu) { if (check_mtu) {
mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src); mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
if (params->tot_len > mtu) if (params->tot_len > mtu)
return 0; return BPF_FIB_LKUP_RET_FRAG_NEEDED;
} }
if (f6i->fib6_nh.nh_lwtstate) if (f6i->fib6_nh.nh_lwtstate)
return 0; return BPF_FIB_LKUP_RET_UNSUPP_LWT;
if (f6i->fib6_flags & RTF_GATEWAY) if (f6i->fib6_flags & RTF_GATEWAY)
*dst = f6i->fib6_nh.nh_gw; *dst = f6i->fib6_nh.nh_gw;
@ -4270,10 +4292,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
*/ */
neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128, neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
ndisc_hashfn, dst, dev); ndisc_hashfn, dst, dev);
if (neigh) if (!neigh)
return bpf_fib_set_fwd_params(params, neigh, dev); return BPF_FIB_LKUP_RET_NO_NEIGH;
return 0; return bpf_fib_set_fwd_params(params, neigh, dev);
} }
#endif #endif
@ -4315,7 +4337,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
struct bpf_fib_lookup *, params, int, plen, u32, flags) struct bpf_fib_lookup *, params, int, plen, u32, flags)
{ {
struct net *net = dev_net(skb->dev); struct net *net = dev_net(skb->dev);
int index = -EAFNOSUPPORT; int rc = -EAFNOSUPPORT;
if (plen < sizeof(*params)) if (plen < sizeof(*params))
return -EINVAL; return -EINVAL;
@ -4326,25 +4348,25 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
switch (params->family) { switch (params->family) {
#if IS_ENABLED(CONFIG_INET) #if IS_ENABLED(CONFIG_INET)
case AF_INET: case AF_INET:
index = bpf_ipv4_fib_lookup(net, params, flags, false); rc = bpf_ipv4_fib_lookup(net, params, flags, false);
break; break;
#endif #endif
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
case AF_INET6: case AF_INET6:
index = bpf_ipv6_fib_lookup(net, params, flags, false); rc = bpf_ipv6_fib_lookup(net, params, flags, false);
break; break;
#endif #endif
} }
if (index > 0) { if (!rc) {
struct net_device *dev; struct net_device *dev;
dev = dev_get_by_index_rcu(net, index); dev = dev_get_by_index_rcu(net, params->ifindex);
if (!is_skb_forwardable(dev, skb)) if (!is_skb_forwardable(dev, skb))
index = 0; rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
} }
return index; return rc;
} }
static const struct bpf_func_proto bpf_skb_fib_lookup_proto = { static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {

View File

@ -5276,8 +5276,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
if (npages >= 1 << order) { if (npages >= 1 << order) {
page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) | page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
__GFP_COMP | __GFP_COMP |
__GFP_NOWARN | __GFP_NOWARN,
__GFP_NORETRY,
order); order);
if (page) if (page)
goto fill_page; goto fill_page;

View File

@ -3243,7 +3243,8 @@ static int req_prot_init(const struct proto *prot)
rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name, rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
rsk_prot->obj_size, 0, rsk_prot->obj_size, 0,
prot->slab_flags, NULL); SLAB_ACCOUNT | prot->slab_flags,
NULL);
if (!rsk_prot->slab) { if (!rsk_prot->slab) {
pr_crit("%s: Can't create request sock SLAB cache!\n", pr_crit("%s: Can't create request sock SLAB cache!\n",
@ -3258,7 +3259,8 @@ int proto_register(struct proto *prot, int alloc_slab)
if (alloc_slab) { if (alloc_slab) {
prot->slab = kmem_cache_create_usercopy(prot->name, prot->slab = kmem_cache_create_usercopy(prot->name,
prot->obj_size, 0, prot->obj_size, 0,
SLAB_HWCACHE_ALIGN | prot->slab_flags, SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
prot->slab_flags,
prot->useroffset, prot->usersize, prot->useroffset, prot->usersize,
NULL); NULL);
@ -3281,6 +3283,7 @@ int proto_register(struct proto *prot, int alloc_slab)
kmem_cache_create(prot->twsk_prot->twsk_slab_name, kmem_cache_create(prot->twsk_prot->twsk_slab_name,
prot->twsk_prot->twsk_obj_size, prot->twsk_prot->twsk_obj_size,
0, 0,
SLAB_ACCOUNT |
prot->slab_flags, prot->slab_flags,
NULL); NULL);
if (prot->twsk_prot->twsk_slab == NULL) if (prot->twsk_prot->twsk_slab == NULL)

View File

@ -448,9 +448,7 @@ static struct sk_buff **gue_gro_receive(struct sock *sk,
out_unlock: out_unlock:
rcu_read_unlock(); rcu_read_unlock();
out: out:
NAPI_GRO_CB(skb)->flush |= flush; skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
skb_gro_remcsum_cleanup(skb, &grc);
skb->remcsum_offload = 0;
return pp; return pp;
} }

View File

@ -223,7 +223,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
out_unlock: out_unlock:
rcu_read_unlock(); rcu_read_unlock();
out: out:
NAPI_GRO_CB(skb)->flush |= flush; skb_gro_flush_final(skb, pp, flush);
return pp; return pp;
} }

View File

@ -265,8 +265,9 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
ipv4.sysctl_tcp_fastopen); ipv4.sysctl_tcp_fastopen);
struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
struct tcp_fastopen_context *ctxt; struct tcp_fastopen_context *ctxt;
int ret;
u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
__le32 key[4];
int ret, i;
tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
if (!tbl.data) if (!tbl.data)
@ -275,11 +276,14 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
rcu_read_lock(); rcu_read_lock();
ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
if (ctxt) if (ctxt)
memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
else else
memset(user_key, 0, sizeof(user_key)); memset(key, 0, sizeof(key));
rcu_read_unlock(); rcu_read_unlock();
for (i = 0; i < ARRAY_SIZE(key); i++)
user_key[i] = le32_to_cpu(key[i]);
snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
user_key[0], user_key[1], user_key[2], user_key[3]); user_key[0], user_key[1], user_key[2], user_key[3]);
ret = proc_dostring(&tbl, write, buffer, lenp, ppos); ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
@ -290,13 +294,17 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
ret = -EINVAL; ret = -EINVAL;
goto bad_key; goto bad_key;
} }
tcp_fastopen_reset_cipher(net, NULL, user_key,
for (i = 0; i < ARRAY_SIZE(user_key); i++)
key[i] = cpu_to_le32(user_key[i]);
tcp_fastopen_reset_cipher(net, NULL, key,
TCP_FASTOPEN_KEY_LENGTH); TCP_FASTOPEN_KEY_LENGTH);
} }
bad_key: bad_key:
pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
user_key[0], user_key[1], user_key[2], user_key[3], user_key[0], user_key[1], user_key[2], user_key[3],
(char *)tbl.data, ret); (char *)tbl.data, ret);
kfree(tbl.data); kfree(tbl.data);
return ret; return ret;

View File

@ -265,7 +265,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
* it is probably a retransmit. * it is probably a retransmit.
*/ */
if (tp->ecn_flags & TCP_ECN_SEEN) if (tp->ecn_flags & TCP_ECN_SEEN)
tcp_enter_quickack_mode(sk, 1); tcp_enter_quickack_mode(sk, 2);
break; break;
case INET_ECN_CE: case INET_ECN_CE:
if (tcp_ca_needs_ecn(sk)) if (tcp_ca_needs_ecn(sk))
@ -273,7 +273,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
/* Better not delay acks, sender can have a very low cwnd */ /* Better not delay acks, sender can have a very low cwnd */
tcp_enter_quickack_mode(sk, 1); tcp_enter_quickack_mode(sk, 2);
tp->ecn_flags |= TCP_ECN_DEMAND_CWR; tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
} }
tp->ecn_flags |= TCP_ECN_SEEN; tp->ecn_flags |= TCP_ECN_SEEN;
@ -3181,6 +3181,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (tcp_is_reno(tp)) { if (tcp_is_reno(tp)) {
tcp_remove_reno_sacks(sk, pkts_acked); tcp_remove_reno_sacks(sk, pkts_acked);
/* If any of the cumulatively ACKed segments was
* retransmitted, non-SACK case cannot confirm that
* progress was due to original transmission due to
* lack of TCPCB_SACKED_ACKED bits even if some of
* the packets may have been never retransmitted.
*/
if (flag & FLAG_RETRANS_DATA_ACKED)
flag &= ~FLAG_ORIG_SACK_ACKED;
} else { } else {
int delta; int delta;

View File

@ -394,7 +394,7 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
out_unlock: out_unlock:
rcu_read_unlock(); rcu_read_unlock();
out: out:
NAPI_GRO_CB(skb)->flush |= flush; skb_gro_flush_final(skb, pp, flush);
return pp; return pp;
} }
EXPORT_SYMBOL(udp_gro_receive); EXPORT_SYMBOL(udp_gro_receive);

View File

@ -4528,6 +4528,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
unsigned long expires, u32 flags) unsigned long expires, u32 flags)
{ {
struct fib6_info *f6i; struct fib6_info *f6i;
u32 prio;
f6i = addrconf_get_prefix_route(&ifp->addr, f6i = addrconf_get_prefix_route(&ifp->addr,
ifp->prefix_len, ifp->prefix_len,
@ -4536,13 +4537,15 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
if (!f6i) if (!f6i)
return -ENOENT; return -ENOENT;
if (f6i->fib6_metric != ifp->rt_priority) { prio = ifp->rt_priority ? : IP6_RT_PRIO_ADDRCONF;
if (f6i->fib6_metric != prio) {
/* delete old one */
ip6_del_rt(dev_net(ifp->idev->dev), f6i);
/* add new one */ /* add new one */
addrconf_prefix_route(&ifp->addr, ifp->prefix_len, addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
ifp->rt_priority, ifp->idev->dev, ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL); expires, flags, GFP_KERNEL);
/* delete old one */
ip6_del_rt(dev_net(ifp->idev->dev), f6i);
} else { } else {
if (!expires) if (!expires)
fib6_clean_expires(f6i); fib6_clean_expires(f6i);

View File

@ -107,7 +107,7 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
if (hdr == NULL) if (hdr == NULL)
goto err_reg; goto err_reg;
net->nf_frag.sysctl.frags_hdr = hdr; net->nf_frag_frags_hdr = hdr;
return 0; return 0;
err_reg: err_reg:
@ -121,8 +121,8 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
{ {
struct ctl_table *table; struct ctl_table *table;
table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg; table = net->nf_frag_frags_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr); unregister_net_sysctl_table(net->nf_frag_frags_hdr);
if (!net_eq(net, &init_net)) if (!net_eq(net, &init_net))
kfree(table); kfree(table);
} }

View File

@ -373,7 +373,7 @@ static int seg6_hmac_init_algo(void)
return -ENOMEM; return -ENOMEM;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL); tfm = crypto_alloc_shash(algo->name, 0, 0);
if (IS_ERR(tfm)) if (IS_ERR(tfm))
return PTR_ERR(tfm); return PTR_ERR(tfm);
p_tfm = per_cpu_ptr(algo->tfms, cpu); p_tfm = per_cpu_ptr(algo->tfms, cpu);

View File

@ -4845,7 +4845,9 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
skb_reset_network_header(skb); skb_reset_network_header(skb);
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
local_bh_disable();
__ieee80211_subif_start_xmit(skb, skb->dev, flags); __ieee80211_subif_start_xmit(skb, skb->dev, flags);
local_bh_enable();
return 0; return 0;
} }

View File

@ -47,6 +47,8 @@ struct nf_conncount_tuple {
struct hlist_node node; struct hlist_node node;
struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple tuple;
struct nf_conntrack_zone zone; struct nf_conntrack_zone zone;
int cpu;
u32 jiffies32;
}; };
struct nf_conncount_rb { struct nf_conncount_rb {
@ -91,11 +93,42 @@ bool nf_conncount_add(struct hlist_head *head,
return false; return false;
conn->tuple = *tuple; conn->tuple = *tuple;
conn->zone = *zone; conn->zone = *zone;
conn->cpu = raw_smp_processor_id();
conn->jiffies32 = (u32)jiffies;
hlist_add_head(&conn->node, head); hlist_add_head(&conn->node, head);
return true; return true;
} }
EXPORT_SYMBOL_GPL(nf_conncount_add); EXPORT_SYMBOL_GPL(nf_conncount_add);
static const struct nf_conntrack_tuple_hash *
find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
{
const struct nf_conntrack_tuple_hash *found;
unsigned long a, b;
int cpu = raw_smp_processor_id();
__s32 age;
found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
if (found)
return found;
b = conn->jiffies32;
a = (u32)jiffies;
/* conn might have been added just before by another cpu and
* might still be unconfirmed. In this case, nf_conntrack_find()
* returns no result. Thus only evict if this cpu added the
* stale entry or if the entry is older than two jiffies.
*/
age = a - b;
if (conn->cpu == cpu || age >= 2) {
hlist_del(&conn->node);
kmem_cache_free(conncount_conn_cachep, conn);
return ERR_PTR(-ENOENT);
}
return ERR_PTR(-EAGAIN);
}
unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone, const struct nf_conntrack_zone *zone,
@ -103,18 +136,27 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
{ {
const struct nf_conntrack_tuple_hash *found; const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn; struct nf_conncount_tuple *conn;
struct hlist_node *n;
struct nf_conn *found_ct; struct nf_conn *found_ct;
struct hlist_node *n;
unsigned int length = 0; unsigned int length = 0;
*addit = tuple ? true : false; *addit = tuple ? true : false;
/* check the saved connections */ /* check the saved connections */
hlist_for_each_entry_safe(conn, n, head, node) { hlist_for_each_entry_safe(conn, n, head, node) {
found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); found = find_or_evict(net, conn);
if (found == NULL) { if (IS_ERR(found)) {
hlist_del(&conn->node); /* Not found, but might be about to be confirmed */
kmem_cache_free(conncount_conn_cachep, conn); if (PTR_ERR(found) == -EAGAIN) {
length++;
if (!tuple)
continue;
if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
nf_ct_zone_id(zone, zone->dir))
*addit = false;
}
continue; continue;
} }

View File

@ -465,6 +465,11 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
nf_ct_expect_iterate_destroy(expect_iter_me, NULL); nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
nf_ct_iterate_destroy(unhelp, me); nf_ct_iterate_destroy(unhelp, me);
/* Maybe someone has gotten the helper already when unhelp above.
* So need to wait it.
*/
synchronize_rcu();
} }
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);

View File

@ -424,6 +424,10 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
if (write) { if (write) {
struct ctl_table tmp = *table; struct ctl_table tmp = *table;
/* proc_dostring() can append to existing strings, so we need to
* initialize it as an empty string.
*/
buf[0] = '\0';
tmp.data = buf; tmp.data = buf;
r = proc_dostring(&tmp, write, buffer, lenp, ppos); r = proc_dostring(&tmp, write, buffer, lenp, ppos);
if (r) if (r)
@ -442,14 +446,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
rcu_assign_pointer(net->nf.nf_loggers[tindex], logger); rcu_assign_pointer(net->nf.nf_loggers[tindex], logger);
mutex_unlock(&nf_log_mutex); mutex_unlock(&nf_log_mutex);
} else { } else {
struct ctl_table tmp = *table;
tmp.data = buf;
mutex_lock(&nf_log_mutex); mutex_lock(&nf_log_mutex);
logger = nft_log_dereference(net->nf.nf_loggers[tindex]); logger = nft_log_dereference(net->nf.nf_loggers[tindex]);
if (!logger) if (!logger)
table->data = "NONE"; strlcpy(buf, "NONE", sizeof(buf));
else else
table->data = logger->name; strlcpy(buf, logger->name, sizeof(buf));
r = proc_dostring(table, write, buffer, lenp, ppos);
mutex_unlock(&nf_log_mutex); mutex_unlock(&nf_log_mutex);
r = proc_dostring(&tmp, write, buffer, lenp, ppos);
} }
return r; return r;

View File

@ -1243,6 +1243,9 @@ static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl,
static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
[NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) }, [NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) },
[NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) },
[NFQA_CFG_QUEUE_MAXLEN] = { .type = NLA_U32 },
[NFQA_CFG_MASK] = { .type = NLA_U32 },
[NFQA_CFG_FLAGS] = { .type = NLA_U32 },
}; };
static const struct nf_queue_handler nfqh = { static const struct nf_queue_handler nfqh = {

View File

@ -659,11 +659,19 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
int rds_conn_init(void) int rds_conn_init(void)
{ {
int ret;
ret = rds_loop_net_init(); /* register pernet callback */
if (ret)
return ret;
rds_conn_slab = kmem_cache_create("rds_connection", rds_conn_slab = kmem_cache_create("rds_connection",
sizeof(struct rds_connection), sizeof(struct rds_connection),
0, 0, NULL); 0, 0, NULL);
if (!rds_conn_slab) if (!rds_conn_slab) {
rds_loop_net_exit();
return -ENOMEM; return -ENOMEM;
}
rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info);
rds_info_register_func(RDS_INFO_SEND_MESSAGES, rds_info_register_func(RDS_INFO_SEND_MESSAGES,
@ -676,6 +684,7 @@ int rds_conn_init(void)
void rds_conn_exit(void) void rds_conn_exit(void)
{ {
rds_loop_net_exit(); /* unregister pernet callback */
rds_loop_exit(); rds_loop_exit();
WARN_ON(!hlist_empty(rds_conn_hash)); WARN_ON(!hlist_empty(rds_conn_hash));

View File

@ -33,6 +33,8 @@
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/in.h> #include <linux/in.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include "rds_single_path.h" #include "rds_single_path.h"
#include "rds.h" #include "rds.h"
@ -40,6 +42,17 @@
static DEFINE_SPINLOCK(loop_conns_lock); static DEFINE_SPINLOCK(loop_conns_lock);
static LIST_HEAD(loop_conns); static LIST_HEAD(loop_conns);
static atomic_t rds_loop_unloading = ATOMIC_INIT(0);
static void rds_loop_set_unloading(void)
{
atomic_set(&rds_loop_unloading, 1);
}
static bool rds_loop_is_unloading(struct rds_connection *conn)
{
return atomic_read(&rds_loop_unloading) != 0;
}
/* /*
* This 'loopback' transport is a special case for flows that originate * This 'loopback' transport is a special case for flows that originate
@ -165,6 +178,8 @@ void rds_loop_exit(void)
struct rds_loop_connection *lc, *_lc; struct rds_loop_connection *lc, *_lc;
LIST_HEAD(tmp_list); LIST_HEAD(tmp_list);
rds_loop_set_unloading();
synchronize_rcu();
/* avoid calling conn_destroy with irqs off */ /* avoid calling conn_destroy with irqs off */
spin_lock_irq(&loop_conns_lock); spin_lock_irq(&loop_conns_lock);
list_splice(&loop_conns, &tmp_list); list_splice(&loop_conns, &tmp_list);
@ -177,6 +192,46 @@ void rds_loop_exit(void)
} }
} }
static void rds_loop_kill_conns(struct net *net)
{
struct rds_loop_connection *lc, *_lc;
LIST_HEAD(tmp_list);
spin_lock_irq(&loop_conns_lock);
list_for_each_entry_safe(lc, _lc, &loop_conns, loop_node) {
struct net *c_net = read_pnet(&lc->conn->c_net);
if (net != c_net)
continue;
list_move_tail(&lc->loop_node, &tmp_list);
}
spin_unlock_irq(&loop_conns_lock);
list_for_each_entry_safe(lc, _lc, &tmp_list, loop_node) {
WARN_ON(lc->conn->c_passive);
rds_conn_destroy(lc->conn);
}
}
static void __net_exit rds_loop_exit_net(struct net *net)
{
rds_loop_kill_conns(net);
}
static struct pernet_operations rds_loop_net_ops = {
.exit = rds_loop_exit_net,
};
int rds_loop_net_init(void)
{
return register_pernet_device(&rds_loop_net_ops);
}
void rds_loop_net_exit(void)
{
unregister_pernet_device(&rds_loop_net_ops);
}
/* /*
* This is missing .xmit_* because loop doesn't go through generic * This is missing .xmit_* because loop doesn't go through generic
* rds_send_xmit() and doesn't call rds_recv_incoming(). .listen_stop and * rds_send_xmit() and doesn't call rds_recv_incoming(). .listen_stop and
@ -194,4 +249,5 @@ struct rds_transport rds_loop_transport = {
.inc_free = rds_loop_inc_free, .inc_free = rds_loop_inc_free,
.t_name = "loopback", .t_name = "loopback",
.t_type = RDS_TRANS_LOOP, .t_type = RDS_TRANS_LOOP,
.t_unloading = rds_loop_is_unloading,
}; };

Some files were not shown because too many files have changed in this diff Show More