Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Minor conflicts in net/mptcp/protocol.h and
tools/testing/selftests/net/Makefile.

In both cases code was added on both sides in the same place
so just keep both.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2020-10-15 12:43:21 -07:00
commit 2295cddf99
45 changed files with 1210 additions and 141 deletions

View File

@ -465,9 +465,9 @@ XPS Configuration
-----------------
XPS is only available if the kconfig symbol CONFIG_XPS is enabled (on by
default for SMP). The functionality remains disabled until explicitly
configured. To enable XPS, the bitmap of CPUs/receive-queues that may
use a transmit queue is configured using the sysfs file entry:
default for SMP). If compiled in, it is driver dependent whether, and
how, XPS is configured at device init. The mapping of CPUs/receive-queues
to transmit queue can be inspected and configured using sysfs:
For selection based on CPUs map::

View File

@ -144,8 +144,6 @@ static int __maybe_unused m_can_runtime_suspend(struct device *dev)
struct net_device *ndev = dev_get_drvdata(dev);
struct m_can_classdev *mcan_class = netdev_priv(ndev);
m_can_class_suspend(dev);
clk_disable_unprepare(mcan_class->cclk);
clk_disable_unprepare(mcan_class->hclk);

View File

@ -103,14 +103,8 @@ void ksz_init_mib_timer(struct ksz_device *dev)
INIT_DELAYED_WORK(&dev->mib_read, ksz_mib_read_work);
/* Read MIB counters every 30 seconds to avoid overflow. */
dev->mib_read_interval = msecs_to_jiffies(30000);
for (i = 0; i < dev->mib_port_cnt; i++)
dev->dev_ops->port_init_cnt(dev, i);
/* Start the timer 2 seconds later. */
schedule_delayed_work(&dev->mib_read, msecs_to_jiffies(2000));
}
EXPORT_SYMBOL_GPL(ksz_init_mib_timer);
@ -143,7 +137,9 @@ void ksz_mac_link_down(struct dsa_switch *ds, int port, unsigned int mode,
/* Read all MIB counters when the link is going down. */
p->read = true;
schedule_delayed_work(&dev->mib_read, 0);
/* timer started */
if (dev->mib_read_interval)
schedule_delayed_work(&dev->mib_read, 0);
}
EXPORT_SYMBOL_GPL(ksz_mac_link_down);
@ -451,6 +447,12 @@ int ksz_switch_register(struct ksz_device *dev,
return ret;
}
/* Read MIB counters every 30 seconds to avoid overflow. */
dev->mib_read_interval = msecs_to_jiffies(30000);
/* Start the MIB timer. */
schedule_delayed_work(&dev->mib_read, 0);
return 0;
}
EXPORT_SYMBOL(ksz_switch_register);

View File

@ -60,6 +60,89 @@ static struct ch_tc_pedit_fields pedits[] = {
PEDIT_FIELDS(IP6_, DST_127_96, 4, nat_lip, 12),
};
static const struct cxgb4_natmode_config cxgb4_natmode_config_array[] = {
/* Default supported NAT modes */
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_NONE,
.natmode = NAT_MODE_NONE,
},
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_DIP,
.natmode = NAT_MODE_DIP,
},
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT,
.natmode = NAT_MODE_DIP_DP,
},
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT |
CXGB4_ACTION_NATMODE_SIP,
.natmode = NAT_MODE_DIP_DP_SIP,
},
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_DPORT |
CXGB4_ACTION_NATMODE_SPORT,
.natmode = NAT_MODE_DIP_DP_SP,
},
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_SIP | CXGB4_ACTION_NATMODE_SPORT,
.natmode = NAT_MODE_SIP_SP,
},
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP |
CXGB4_ACTION_NATMODE_SPORT,
.natmode = NAT_MODE_DIP_SIP_SP,
},
{
.chip = CHELSIO_T5,
.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP |
CXGB4_ACTION_NATMODE_DPORT |
CXGB4_ACTION_NATMODE_SPORT,
.natmode = NAT_MODE_ALL,
},
/* T6+ can ignore L4 ports when they're disabled. */
{
.chip = CHELSIO_T6,
.flags = CXGB4_ACTION_NATMODE_SIP,
.natmode = NAT_MODE_SIP_SP,
},
{
.chip = CHELSIO_T6,
.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SPORT,
.natmode = NAT_MODE_DIP_DP_SP,
},
{
.chip = CHELSIO_T6,
.flags = CXGB4_ACTION_NATMODE_DIP | CXGB4_ACTION_NATMODE_SIP,
.natmode = NAT_MODE_ALL,
},
};
static void cxgb4_action_natmode_tweak(struct ch_filter_specification *fs,
u8 natmode_flags)
{
u8 i = 0;
/* Translate the enabled NAT 4-tuple fields to one of the
* hardware supported NAT mode configurations. This ensures
* that we pick a valid combination, where the disabled fields
* do not get overwritten to 0.
*/
for (i = 0; i < ARRAY_SIZE(cxgb4_natmode_config_array); i++) {
if (cxgb4_natmode_config_array[i].flags == natmode_flags) {
fs->nat_mode = cxgb4_natmode_config_array[i].natmode;
return;
}
}
}
static struct ch_tc_flower_entry *allocate_flower_entry(void)
{
struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL);
@ -289,7 +372,8 @@ static void offload_pedit(struct ch_filter_specification *fs, u32 val, u32 mask,
}
static void process_pedit_field(struct ch_filter_specification *fs, u32 val,
u32 mask, u32 offset, u8 htype)
u32 mask, u32 offset, u8 htype,
u8 *natmode_flags)
{
switch (htype) {
case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
@ -314,67 +398,102 @@ static void process_pedit_field(struct ch_filter_specification *fs, u32 val,
switch (offset) {
case PEDIT_IP4_SRC:
offload_pedit(fs, val, mask, IP4_SRC);
*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP4_DST:
offload_pedit(fs, val, mask, IP4_DST);
*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
}
fs->nat_mode = NAT_MODE_ALL;
break;
case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
switch (offset) {
case PEDIT_IP6_SRC_31_0:
offload_pedit(fs, val, mask, IP6_SRC_31_0);
*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP6_SRC_63_32:
offload_pedit(fs, val, mask, IP6_SRC_63_32);
*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP6_SRC_95_64:
offload_pedit(fs, val, mask, IP6_SRC_95_64);
*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP6_SRC_127_96:
offload_pedit(fs, val, mask, IP6_SRC_127_96);
*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP6_DST_31_0:
offload_pedit(fs, val, mask, IP6_DST_31_0);
*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break;
case PEDIT_IP6_DST_63_32:
offload_pedit(fs, val, mask, IP6_DST_63_32);
*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break;
case PEDIT_IP6_DST_95_64:
offload_pedit(fs, val, mask, IP6_DST_95_64);
*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break;
case PEDIT_IP6_DST_127_96:
offload_pedit(fs, val, mask, IP6_DST_127_96);
*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
}
fs->nat_mode = NAT_MODE_ALL;
break;
case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
switch (offset) {
case PEDIT_TCP_SPORT_DPORT:
if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
if (~mask & PEDIT_TCP_UDP_SPORT_MASK) {
fs->nat_fport = val;
else
*natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
} else {
fs->nat_lport = val >> 16;
*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
}
}
fs->nat_mode = NAT_MODE_ALL;
break;
case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
switch (offset) {
case PEDIT_UDP_SPORT_DPORT:
if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
if (~mask & PEDIT_TCP_UDP_SPORT_MASK) {
fs->nat_fport = val;
else
*natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
} else {
fs->nat_lport = val >> 16;
*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
}
}
fs->nat_mode = NAT_MODE_ALL;
break;
}
}
static int cxgb4_action_natmode_validate(struct adapter *adap, u8 natmode_flags,
struct netlink_ext_ack *extack)
{
u8 i = 0;
/* Extract the NAT mode to enable based on what 4-tuple fields
* are enabled to be overwritten. This ensures that the
* disabled fields don't get overwritten to 0.
*/
for (i = 0; i < ARRAY_SIZE(cxgb4_natmode_config_array); i++) {
const struct cxgb4_natmode_config *c;
c = &cxgb4_natmode_config_array[i];
if (CHELSIO_CHIP_VERSION(adap->params.chip) >= c->chip &&
natmode_flags == c->flags)
return 0;
}
NL_SET_ERR_MSG_MOD(extack, "Unsupported NAT mode 4-tuple combination");
return -EOPNOTSUPP;
}
void cxgb4_process_flow_actions(struct net_device *in,
struct flow_action *actions,
struct ch_filter_specification *fs)
{
struct flow_action_entry *act;
u8 natmode_flags = 0;
int i;
flow_action_for_each(i, act, actions) {
@ -426,7 +545,8 @@ void cxgb4_process_flow_actions(struct net_device *in,
val = act->mangle.val;
offset = act->mangle.offset;
process_pedit_field(fs, val, mask, offset, htype);
process_pedit_field(fs, val, mask, offset, htype,
&natmode_flags);
}
break;
case FLOW_ACTION_QUEUE:
@ -438,6 +558,9 @@ void cxgb4_process_flow_actions(struct net_device *in,
break;
}
}
if (natmode_flags)
cxgb4_action_natmode_tweak(fs, natmode_flags);
}
static bool valid_l4_mask(u32 mask)
@ -454,7 +577,8 @@ static bool valid_l4_mask(u32 mask)
}
static bool valid_pedit_action(struct net_device *dev,
const struct flow_action_entry *act)
const struct flow_action_entry *act,
u8 *natmode_flags)
{
u32 mask, offset;
u8 htype;
@ -479,7 +603,10 @@ static bool valid_pedit_action(struct net_device *dev,
case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
switch (offset) {
case PEDIT_IP4_SRC:
*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP4_DST:
*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break;
default:
netdev_err(dev, "%s: Unsupported pedit field\n",
@ -493,10 +620,13 @@ static bool valid_pedit_action(struct net_device *dev,
case PEDIT_IP6_SRC_63_32:
case PEDIT_IP6_SRC_95_64:
case PEDIT_IP6_SRC_127_96:
*natmode_flags |= CXGB4_ACTION_NATMODE_SIP;
break;
case PEDIT_IP6_DST_31_0:
case PEDIT_IP6_DST_63_32:
case PEDIT_IP6_DST_95_64:
case PEDIT_IP6_DST_127_96:
*natmode_flags |= CXGB4_ACTION_NATMODE_DIP;
break;
default:
netdev_err(dev, "%s: Unsupported pedit field\n",
@ -512,6 +642,10 @@ static bool valid_pedit_action(struct net_device *dev,
__func__);
return false;
}
if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
*natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
else
*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
break;
default:
netdev_err(dev, "%s: Unsupported pedit field\n",
@ -527,6 +661,10 @@ static bool valid_pedit_action(struct net_device *dev,
__func__);
return false;
}
if (~mask & PEDIT_TCP_UDP_SPORT_MASK)
*natmode_flags |= CXGB4_ACTION_NATMODE_SPORT;
else
*natmode_flags |= CXGB4_ACTION_NATMODE_DPORT;
break;
default:
netdev_err(dev, "%s: Unsupported pedit field\n",
@ -546,10 +684,12 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
struct netlink_ext_ack *extack,
u8 matchall_filter)
{
struct adapter *adap = netdev2adap(dev);
struct flow_action_entry *act;
bool act_redir = false;
bool act_pedit = false;
bool act_vlan = false;
u8 natmode_flags = 0;
int i;
if (!flow_action_basic_hw_stats_check(actions, extack))
@ -563,7 +703,6 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
break;
case FLOW_ACTION_MIRRED:
case FLOW_ACTION_REDIRECT: {
struct adapter *adap = netdev2adap(dev);
struct net_device *n_dev, *target_dev;
bool found = false;
unsigned int i;
@ -620,7 +759,8 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
}
break;
case FLOW_ACTION_MANGLE: {
bool pedit_valid = valid_pedit_action(dev, act);
bool pedit_valid = valid_pedit_action(dev, act,
&natmode_flags);
if (!pedit_valid)
return -EOPNOTSUPP;
@ -642,6 +782,15 @@ int cxgb4_validate_flow_actions(struct net_device *dev,
return -EINVAL;
}
if (act_pedit) {
int ret;
ret = cxgb4_action_natmode_validate(adap, natmode_flags,
extack);
if (ret)
return ret;
}
return 0;
}

View File

@ -108,6 +108,21 @@ struct ch_tc_pedit_fields {
#define PEDIT_TCP_SPORT_DPORT 0x0
#define PEDIT_UDP_SPORT_DPORT 0x0
enum cxgb4_action_natmode_flags {
CXGB4_ACTION_NATMODE_NONE = 0,
CXGB4_ACTION_NATMODE_DIP = (1 << 0),
CXGB4_ACTION_NATMODE_SIP = (1 << 1),
CXGB4_ACTION_NATMODE_DPORT = (1 << 2),
CXGB4_ACTION_NATMODE_SPORT = (1 << 3),
};
/* TC PEDIT action to NATMODE translation entry */
struct cxgb4_natmode_config {
enum chip_type chip;
u8 flags;
u8 natmode;
};
void cxgb4_process_flow_actions(struct net_device *in,
struct flow_action *actions,
struct ch_filter_specification *fs);

View File

@ -1912,6 +1912,27 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
return ret;
}
static void fec_enet_phy_reset_after_clk_enable(struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
struct phy_device *phy_dev = ndev->phydev;
if (phy_dev) {
phy_reset_after_clk_enable(phy_dev);
} else if (fep->phy_node) {
/*
* If the PHY still is not bound to the MAC, but there is
* OF PHY node and a matching PHY device instance already,
* use the OF PHY node to obtain the PHY device instance,
* and then use that PHY device instance when triggering
* the PHY reset.
*/
phy_dev = of_phy_find_device(fep->phy_node);
phy_reset_after_clk_enable(phy_dev);
put_device(&phy_dev->mdio.dev);
}
}
static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
{
struct fec_enet_private *fep = netdev_priv(ndev);
@ -1938,7 +1959,7 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
if (ret)
goto failed_clk_ref;
phy_reset_after_clk_enable(ndev->phydev);
fec_enet_phy_reset_after_clk_enable(ndev);
} else {
clk_disable_unprepare(fep->clk_enet_out);
if (fep->clk_ptp) {
@ -2983,16 +3004,16 @@ fec_enet_open(struct net_device *ndev)
/* Init MAC prior to mii bus probe */
fec_restart(ndev);
/* Probe and connect to PHY when open the interface */
ret = fec_enet_mii_probe(ndev);
if (ret)
goto err_enet_mii_probe;
/* Call phy_reset_after_clk_enable() again if it failed during
* phy_reset_after_clk_enable() before because the PHY wasn't probed.
*/
if (reset_again)
phy_reset_after_clk_enable(ndev->phydev);
fec_enet_phy_reset_after_clk_enable(ndev);
/* Probe and connect to PHY when open the interface */
ret = fec_enet_mii_probe(ndev);
if (ret)
goto err_enet_mii_probe;
if (fep->quirks & FEC_QUIRK_ERR006687)
imx6q_cpuidle_fec_irqs_used();

View File

@ -1349,6 +1349,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
int offset = ibmveth_rxq_frame_offset(adapter);
int csum_good = ibmveth_rxq_csum_good(adapter);
int lrg_pkt = ibmveth_rxq_large_packet(adapter);
__sum16 iph_check = 0;
skb = ibmveth_rxq_get_buffer(adapter);
@ -1385,16 +1386,26 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
skb_put(skb, length);
skb->protocol = eth_type_trans(skb, netdev);
/* PHYP without PLSO support places a -1 in the ip
* checksum for large send frames.
*/
if (skb->protocol == cpu_to_be16(ETH_P_IP)) {
struct iphdr *iph = (struct iphdr *)skb->data;
iph_check = iph->check;
}
if ((length > netdev->mtu + ETH_HLEN) ||
lrg_pkt || iph_check == 0xffff) {
ibmveth_rx_mss_helper(skb, mss, lrg_pkt);
adapter->rx_large_packets++;
}
if (csum_good) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
ibmveth_rx_csum_helper(skb, adapter);
}
if (length > netdev->mtu + ETH_HLEN) {
ibmveth_rx_mss_helper(skb, mss, lrg_pkt);
adapter->rx_large_packets++;
}
napi_gro_receive(napi, skb); /* send it up */
netdev->stats.rx_packets++;

View File

@ -1113,7 +1113,7 @@ static int korina_probe(struct platform_device *pdev)
return rc;
probe_err_register:
kfree(lp->td_ring);
kfree(KSEG0ADDR(lp->td_ring));
probe_err_td_ring:
iounmap(lp->tx_dma_regs);
probe_err_dma_tx:
@ -1133,6 +1133,7 @@ static int korina_remove(struct platform_device *pdev)
iounmap(lp->eth_regs);
iounmap(lp->rx_dma_regs);
iounmap(lp->tx_dma_regs);
kfree(KSEG0ADDR(lp->td_ring));
unregister_netdev(bif->dev);
free_netdev(bif->dev);

View File

@ -943,6 +943,9 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
bool clean_complete = true;
int done;
if (!budget)
return 0;
if (priv->tx_ring_num[TX_XDP]) {
xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring];
if (xdp_tx_cq->xdp_busy) {

View File

@ -350,7 +350,7 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
.dma = tx_info->map0_dma,
};
if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
if (!napi_mode || !mlx4_en_rx_recycle(ring->recycle_ring, &frame)) {
dma_unmap_page(priv->ddev, tx_info->map0_dma,
PAGE_SIZE, priv->dma_dir);
put_page(tx_info->page);

View File

@ -2504,7 +2504,7 @@ static void tlan_phy_power_down(struct net_device *dev)
}
/* Wait for 50 ms and powerup
* This is abitrary. It is intended to make sure the
* This is arbitrary. It is intended to make sure the
* transceiver settles.
*/
tlan_set_timer(dev, msecs_to_jiffies(50), TLAN_TIMER_PHY_PUP);

View File

@ -1432,6 +1432,9 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
void ipa_endpoint_suspend(struct ipa *ipa)
{
if (!ipa->setup_complete)
return;
if (ipa->modem_netdev)
ipa_modem_suspend(ipa->modem_netdev);
@ -1443,6 +1446,9 @@ void ipa_endpoint_suspend(struct ipa *ipa)
void ipa_endpoint_resume(struct ipa *ipa)
{
if (!ipa->setup_complete)
return;
ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]);
ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_LAN_RX]);

View File

@ -108,6 +108,7 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb,
unsigned int logflags);
void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m,
struct sock *sk);
void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb);
void nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
unsigned int hooknum, const struct sk_buff *skb,
const struct net_device *in,

View File

@ -56,7 +56,10 @@ static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a)
{
#ifdef CONFIG_NET_CLS_ACT
struct tcf_tunnel_key *t = to_tunnel_key(a);
struct tcf_tunnel_key_params *params = rtnl_dereference(t->params);
struct tcf_tunnel_key_params *params;
params = rcu_dereference_protected(t->params,
lockdep_is_held(&a->tcfa_lock));
return &params->tcft_enc_metadata->u.tun_info;
#else

View File

@ -679,10 +679,6 @@ int decrypt_skb(struct sock *sk, struct sk_buff *skb,
struct scatterlist *sgout);
struct sk_buff *tls_encrypt_skb(struct sk_buff *skb);
struct sk_buff *tls_validate_xmit_skb(struct sock *sk,
struct net_device *dev,
struct sk_buff *skb);
int tls_sw_fallback_init(struct sock *sk,
struct tls_offload_context_tx *offload_ctx,
struct tls_crypto_info *crypto_info);

View File

@ -580,6 +580,7 @@ sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv,
skb->dev = priv->ndev;
can_skb_reserve(skb);
can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
can_skb_prv(skb)->skbcnt = 0;
/* reserve CAN header */
skb_reserve(skb, offsetof(struct can_frame, data));
@ -1487,6 +1488,7 @@ j1939_session *j1939_session_fresh_new(struct j1939_priv *priv,
skb->dev = priv->ndev;
can_skb_reserve(skb);
can_skb_prv(skb)->ifindex = priv->ndev->ifindex;
can_skb_prv(skb)->skbcnt = 0;
skcb = j1939_skb_to_cb(skb);
memcpy(skcb, rel_skcb, sizeof(*skcb));

View File

@ -757,7 +757,6 @@ static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
} else {
sock_reset_flag(sk, SOCK_RCVTSTAMP);
sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
sock_reset_flag(sk, SOCK_TSTAMP_NEW);
}
}
@ -994,8 +993,6 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
__sock_set_timestamps(sk, valbool, true, true);
break;
case SO_TIMESTAMPING_NEW:
sock_set_flag(sk, SOCK_TSTAMP_NEW);
fallthrough;
case SO_TIMESTAMPING_OLD:
if (val & ~SOF_TIMESTAMPING_MASK) {
ret = -EINVAL;
@ -1024,16 +1021,14 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
}
sk->sk_tsflags = val;
sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
sock_enable_timestamp(sk,
SOCK_TIMESTAMPING_RX_SOFTWARE);
else {
if (optname == SO_TIMESTAMPING_NEW)
sock_reset_flag(sk, SOCK_TSTAMP_NEW);
else
sock_disable_timestamp(sk,
(1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
}
break;
case SO_RCVLOWAT:

View File

@ -457,6 +457,23 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
local_bh_enable();
}
/*
* The device used for looking up which routing table to use for sending an ICMP
* error is preferably the source whenever it is set, which should ensure the
* icmp error can be sent to the source host, else lookup using the routing
* table of the destination device, else use the main routing table (index 0).
*/
static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb)
{
struct net_device *route_lookup_dev = NULL;
if (skb->dev)
route_lookup_dev = skb->dev;
else if (skb_dst(skb))
route_lookup_dev = skb_dst(skb)->dev;
return route_lookup_dev;
}
static struct rtable *icmp_route_lookup(struct net *net,
struct flowi4 *fl4,
struct sk_buff *skb_in,
@ -465,6 +482,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
int type, int code,
struct icmp_bxm *param)
{
struct net_device *route_lookup_dev;
struct rtable *rt, *rt2;
struct flowi4 fl4_dec;
int err;
@ -479,7 +497,8 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
route_lookup_dev = icmp_get_route_lookup_dev(skb_in);
fl4->flowi4_oif = l3mdev_master_ifindex(route_lookup_dev);
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
rt = ip_route_output_key_hash(net, fl4, skb_in);
@ -503,7 +522,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
if (err)
goto relookup_failed;
if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev,
if (inet_addr_type_dev_table(net, route_lookup_dev,
fl4_dec.saddr) == RTN_LOCAL) {
rt2 = __ip_route_output_key(net, &fl4_dec);
if (IS_ERR(rt2))

View File

@ -625,9 +625,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
}
if (dev->header_ops) {
/* Need space for new headers */
if (skb_cow_head(skb, dev->needed_headroom -
(tunnel->hlen + sizeof(struct iphdr))))
if (skb_cow_head(skb, 0))
goto free_skb;
tnl_params = (const struct iphdr *)skb->data;
@ -748,7 +746,11 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
len = tunnel->tun_hlen - len;
tunnel->hlen = tunnel->hlen + len;
dev->needed_headroom = dev->needed_headroom + len;
if (dev->header_ops)
dev->hard_header_len += len;
else
dev->needed_headroom += len;
if (set_mtu)
dev->mtu = max_t(int, dev->mtu - len, 68);
@ -944,6 +946,7 @@ static void __gre_tunnel_init(struct net_device *dev)
tunnel->parms.iph.protocol = IPPROTO_GRE;
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
@ -987,10 +990,14 @@ static int ipgre_tunnel_init(struct net_device *dev)
return -EINVAL;
dev->flags = IFF_BROADCAST;
dev->header_ops = &ipgre_header_ops;
dev->hard_header_len = tunnel->hlen + sizeof(*iph);
dev->needed_headroom = 0;
}
#endif
} else if (!tunnel->collect_md) {
dev->header_ops = &ipgre_header_ops;
dev->hard_header_len = tunnel->hlen + sizeof(*iph);
dev->needed_headroom = 0;
}
return ip_tunnel_init(dev);

View File

@ -43,16 +43,31 @@ static void dump_arp_packet(struct nf_log_buf *m,
const struct nf_loginfo *info,
const struct sk_buff *skb, unsigned int nhoff)
{
const struct arphdr *ah;
struct arphdr _arph;
const struct arppayload *ap;
struct arppayload _arpp;
const struct arphdr *ah;
unsigned int logflags;
struct arphdr _arph;
ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
if (ah == NULL) {
nf_log_buf_add(m, "TRUNCATED");
return;
}
if (info->type == NF_LOG_TYPE_LOG)
logflags = info->u.log.logflags;
else
logflags = NF_LOG_DEFAULT_MASK;
if (logflags & NF_LOG_MACDECODE) {
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
nf_log_dump_vlan(m, skb);
nf_log_buf_add(m, "MACPROTO=%04x ",
ntohs(eth_hdr(skb)->h_proto));
}
nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d",
ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op));

View File

@ -284,8 +284,10 @@ static void dump_ipv4_mac_header(struct nf_log_buf *m,
switch (dev->type) {
case ARPHRD_ETHER:
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
nf_log_dump_vlan(m, skb);
nf_log_buf_add(m, "MACPROTO=%04x ",
ntohs(eth_hdr(skb)->h_proto));
return;
default:

View File

@ -2770,10 +2770,12 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
if (IS_ERR(rt))
return rt;
if (flp4->flowi4_proto)
if (flp4->flowi4_proto) {
flp4->flowi4_oif = rt->dst.dev->ifindex;
rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
flowi4_to_flowi(flp4),
sk, 0);
}
return rt;
}

View File

@ -501,8 +501,11 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (__ipv6_addr_needs_scope_id(addr_type)) {
iif = icmp6_iif(skb);
} else {
dst = skb_dst(skb);
iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev);
/*
* The source device is used for looking up which routing table
* to use for sending an ICMP error.
*/
iif = l3mdev_master_ifindex(skb->dev);
}
/*

View File

@ -2622,8 +2622,10 @@ static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
iter->skip = *pos;
if (iter->tbl) {
loff_t p = 0;
ipv6_route_seq_setup_walk(iter, net);
return ipv6_route_seq_next(seq, NULL, pos);
return ipv6_route_seq_next(seq, NULL, &p);
} else {
return NULL;
}

View File

@ -468,8 +468,6 @@ int ip6_forward(struct sk_buff *skb)
* check and decrement ttl
*/
if (hdr->hop_limit <= 1) {
/* Force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);

View File

@ -297,9 +297,11 @@ static void dump_ipv6_mac_header(struct nf_log_buf *m,
switch (dev->type) {
case ARPHRD_ETHER:
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
ntohs(eth_hdr(skb)->h_proto));
nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
nf_log_dump_vlan(m, skb);
nf_log_buf_add(m, "MACPROTO=%04x ",
ntohs(eth_hdr(skb)->h_proto));
return;
default:
break;

View File

@ -2745,7 +2745,8 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
if (confirm_neigh)
dst_confirm_neigh(dst, daddr);
mtu = max_t(u32, mtu, IPV6_MIN_MTU);
if (mtu < IPV6_MIN_MTU)
return;
if (mtu >= dst_mtu(dst))
return;

View File

@ -517,7 +517,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
return ret;
}
if (subflow->use_64bit_ack) {
if (READ_ONCE(msk->use_64bit_ack)) {
ack_size = TCPOLEN_MPTCP_DSS_ACK64;
opts->ext_copy.data_ack = READ_ONCE(msk->ack_seq);
opts->ext_copy.ack64 = 1;
@ -657,6 +657,12 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
if (unlikely(mptcp_check_fallback(sk)))
return false;
/* prevent adding of any MPTCP related options on reset packet
* until we support MP_TCPRST/MP_FASTCLOSE
*/
if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
return false;
if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
ret = true;
else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
@ -711,7 +717,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size,
return false;
}
static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
struct mptcp_subflow_context *subflow,
struct sk_buff *skb,
struct mptcp_options_received *mp_opt)
@ -728,15 +734,20 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq &&
subflow->mp_join && mp_opt->mp_join &&
READ_ONCE(msk->pm.server_side))
tcp_send_ack(sk);
tcp_send_ack(ssk);
goto fully_established;
}
/* we should process OoO packets before the first subflow is fully
* established, but not expected for MP_JOIN subflows
/* we must process OoO packets before the first subflow is fully
* established. OoO packets are instead a protocol violation
* for MP_JOIN subflows as the peer must not send any data
* before receiving the forth ack - cfr. RFC 8684 section 3.2.
*/
if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1)
if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) {
if (subflow->mp_join)
goto reset;
return subflow->mp_capable;
}
if (mp_opt->dss && mp_opt->use_ack) {
/* subflows are fully established as soon as we get any
@ -748,9 +759,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
}
/* If the first established packet does not contain MP_CAPABLE + data
* then fallback to TCP
* then fallback to TCP. Fallback scenarios requires a reset for
* MP_JOIN subflows.
*/
if (!mp_opt->mp_capable) {
if (subflow->mp_join)
goto reset;
subflow->mp_capable = 0;
pr_fallback(msk);
__mptcp_do_fallback(msk);
@ -767,12 +781,16 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk,
subflow->pm_notified = 1;
if (subflow->mp_join) {
clear_3rdack_retransmission(sk);
clear_3rdack_retransmission(ssk);
mptcp_pm_subflow_established(msk, subflow);
} else {
mptcp_pm_fully_established(msk);
}
return true;
reset:
mptcp_subflow_reset(ssk);
return false;
}
static u64 expand_ack(u64 old_ack, u64 cur_ack, bool use_64bit)

View File

@ -1710,6 +1710,20 @@ static void pm_work(struct mptcp_sock *msk)
spin_unlock_bh(&msk->pm.lock);
}
static void __mptcp_close_subflow(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow, *tmp;
list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
if (inet_sk_state_load(ssk) != TCP_CLOSE)
continue;
__mptcp_close_ssk((struct sock *)msk, ssk, subflow, 0);
}
}
static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
@ -1727,6 +1741,9 @@ static void mptcp_worker(struct work_struct *work)
mptcp_clean_una(sk);
mptcp_check_data_fin_ack(sk);
__mptcp_flush_join_list(msk);
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(msk);
__mptcp_move_skbs(msk);
if (msk->pm.status)

View File

@ -90,6 +90,7 @@
#define MPTCP_WORK_RTX 2
#define MPTCP_WORK_EOF 3
#define MPTCP_FALLBACK_DONE 4
#define MPTCP_WORK_CLOSE_SUBFLOW 5
struct mptcp_options_received {
u64 sndr_key;
@ -211,6 +212,7 @@ struct mptcp_sock {
bool fully_established;
bool rcv_data_fin;
bool snd_data_fin_enable;
bool use_64bit_ack; /* Set when we received a 64-bit DSN */
spinlock_t join_list_lock;
struct work_struct work;
struct sk_buff *ooo_last_skb;
@ -310,7 +312,6 @@ struct mptcp_subflow_context {
mpc_map : 1,
backup : 1,
rx_eof : 1,
use_64bit_ack : 1, /* Set when we received a 64-bit DSN */
can_ack : 1; /* only after processing the remote a key */
enum mptcp_data_avail data_avail;
u32 remote_nonce;
@ -369,6 +370,7 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow,
long timeout);
void mptcp_subflow_reset(struct sock *ssk);
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,

View File

@ -271,6 +271,19 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
return thmac == subflow->thmac;
}
void mptcp_subflow_reset(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = subflow->conn;
tcp_set_state(ssk, TCP_CLOSE);
tcp_send_active_reset(ssk, GFP_ATOMIC);
tcp_done(ssk);
if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&
schedule_work(&mptcp_sk(sk)->work))
sock_hold(sk);
}
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@ -343,8 +356,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
return;
do_reset:
tcp_send_active_reset(sk, GFP_ATOMIC);
tcp_done(sk);
mptcp_subflow_reset(sk);
}
struct request_sock_ops mptcp_subflow_request_sock_ops;
@ -770,12 +782,11 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
if (!mpext->dsn64) {
map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
mpext->data_seq);
subflow->use_64bit_ack = 0;
pr_debug("expanded seq=%llu", subflow->map_seq);
} else {
map_seq = mpext->data_seq;
subflow->use_64bit_ack = 1;
}
WRITE_ONCE(mptcp_sk(subflow->conn)->use_64bit_ack, !!mpext->dsn64);
if (subflow->map_valid) {
/* Allow replacing only with an identical map */

View File

@ -609,6 +609,8 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
if (ret == NF_ACCEPT) {
nf_reset_ct(skb);
skb_forward_csum(skb);
if (skb->dev)
skb->tstamp = 0;
}
return ret;
}
@ -649,6 +651,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
if (!local) {
skb_forward_csum(skb);
if (skb->dev)
skb->tstamp = 0;
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else
@ -669,6 +673,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
if (!local) {
ip_vs_drop_early_demux_sk(skb);
skb_forward_csum(skb);
if (skb->dev)
skb->tstamp = 0;
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else

View File

@ -171,6 +171,18 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
}
EXPORT_SYMBOL_GPL(nf_log_dump_packet_common);
void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb)
{
u16 vid;
if (!skb_vlan_tag_present(skb))
return;
vid = skb_vlan_tag_get(skb);
nf_log_buf_add(m, "VPROTO=%04x VID=%u ", ntohs(skb->vlan_proto), vid);
}
EXPORT_SYMBOL_GPL(nf_log_dump_vlan);
/* bridge and netdev logging families share this code. */
void nf_log_l2packet(struct net *net, u_int8_t pf,
__be16 protocol,

View File

@ -2138,7 +2138,8 @@ static bool nft_hook_list_equal(struct list_head *hook_list1,
}
static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
u32 flags)
u32 flags, const struct nlattr *attr,
struct netlink_ext_ack *extack)
{
const struct nlattr * const *nla = ctx->nla;
struct nft_table *table = ctx->table;
@ -2154,9 +2155,10 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
return -EOPNOTSUPP;
if (nla[NFTA_CHAIN_HOOK]) {
if (!nft_is_base_chain(chain))
if (!nft_is_base_chain(chain)) {
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
err = nft_chain_parse_hook(ctx->net, nla, &hook, ctx->family,
false);
if (err < 0)
@ -2165,6 +2167,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
basechain = nft_base_chain(chain);
if (basechain->type != hook.type) {
nft_chain_release_hook(&hook);
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
@ -2172,6 +2175,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
if (!nft_hook_list_equal(&basechain->hook_list,
&hook.list)) {
nft_chain_release_hook(&hook);
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
} else {
@ -2179,6 +2183,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
if (ops->hooknum != hook.num ||
ops->priority != hook.priority) {
nft_chain_release_hook(&hook);
NL_SET_BAD_ATTR(extack, attr);
return -EEXIST;
}
}
@ -2191,8 +2196,10 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
chain2 = nft_chain_lookup(ctx->net, table,
nla[NFTA_CHAIN_NAME], genmask);
if (!IS_ERR(chain2))
if (!IS_ERR(chain2)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
return -EEXIST;
}
}
if (nla[NFTA_CHAIN_COUNTERS]) {
@ -2235,6 +2242,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
nft_trans_chain_update(tmp) &&
nft_trans_chain_name(tmp) &&
strcmp(name, nft_trans_chain_name(tmp)) == 0) {
NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_NAME]);
kfree(name);
goto err;
}
@ -2357,7 +2365,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
return -EOPNOTSUPP;
flags |= chain->flags & NFT_CHAIN_BASE;
return nf_tables_updchain(&ctx, genmask, policy, flags);
return nf_tables_updchain(&ctx, genmask, policy, flags, attr,
extack);
}
return nf_tables_addchain(&ctx, family, genmask, policy, flags);

View File

@ -1596,7 +1596,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
return rc;
}
#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
bool is_dmb, int bufsize)
@ -1615,7 +1615,8 @@ static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
rc = smc_ism_register_dmb(lgr, bufsize, buf_desc);
if (rc) {
kfree(buf_desc);
return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) : ERR_PTR(rc);
return (rc == -ENOMEM) ? ERR_PTR(-EAGAIN) :
ERR_PTR(-EIO);
}
buf_desc->pages = virt_to_page(buf_desc->cpu_addr);
/* CDC header stored in buf. So, pretend it was smaller */

View File

@ -233,8 +233,6 @@ static bool smc_llc_flow_start(struct smc_llc_flow *flow,
default:
flow->type = SMC_LLC_FLOW_NONE;
}
if (qentry == lgr->delayed_event)
lgr->delayed_event = NULL;
smc_llc_flow_qentry_set(flow, qentry);
spin_unlock_bh(&lgr->llc_flow_lock);
return true;
@ -1209,7 +1207,7 @@ static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
/* enqueue a local add_link req to trigger a new add_link flow */
void smc_llc_add_link_local(struct smc_link *link)
{
struct smc_llc_msg_add_link add_llc = {0};
struct smc_llc_msg_add_link add_llc = {};
add_llc.hd.length = sizeof(add_llc);
add_llc.hd.common.type = SMC_LLC_ADD_LINK;
@ -1242,7 +1240,7 @@ static void smc_llc_add_link_work(struct work_struct *work)
*/
void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id)
{
struct smc_llc_msg_del_link del_llc = {0};
struct smc_llc_msg_del_link del_llc = {};
del_llc.hd.length = sizeof(del_llc);
del_llc.hd.common.type = SMC_LLC_DELETE_LINK;
@ -1314,7 +1312,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
*/
void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
{
struct smc_llc_msg_del_link delllc = {0};
struct smc_llc_msg_del_link delllc = {};
int i;
delllc.hd.common.type = SMC_LLC_DELETE_LINK;
@ -1603,13 +1601,12 @@ static void smc_llc_event_work(struct work_struct *work)
struct smc_llc_qentry *qentry;
if (!lgr->llc_flow_lcl.type && lgr->delayed_event) {
if (smc_link_usable(lgr->delayed_event->link)) {
smc_llc_event_handler(lgr->delayed_event);
} else {
qentry = lgr->delayed_event;
lgr->delayed_event = NULL;
qentry = lgr->delayed_event;
lgr->delayed_event = NULL;
if (smc_link_usable(qentry->link))
smc_llc_event_handler(qentry);
else
kfree(qentry);
}
}
again:

View File

@ -150,7 +150,8 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (fragid == FIRST_FRAGMENT) {
if (unlikely(head))
goto err;
frag = skb_unshare(frag, GFP_ATOMIC);
if (skb_cloned(frag))
frag = skb_copy(frag, GFP_ATOMIC);
if (unlikely(!frag))
goto err;
head = *headbuf = frag;

View File

@ -327,8 +327,13 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq,
struct tipc_msg *hdr;
u16 seqno;
spin_lock_bh(&namedq->lock);
skb_queue_walk_safe(namedq, skb, tmp) {
skb_linearize(skb);
if (unlikely(skb_linearize(skb))) {
__skb_unlink(skb, namedq);
kfree_skb(skb);
continue;
}
hdr = buf_msg(skb);
seqno = msg_named_seqno(hdr);
if (msg_is_last_bulk(hdr)) {
@ -338,12 +343,14 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq,
if (msg_is_bulk(hdr) || msg_is_legacy(hdr)) {
__skb_unlink(skb, namedq);
spin_unlock_bh(&namedq->lock);
return skb;
}
if (*open && (*rcv_nxt == seqno)) {
(*rcv_nxt)++;
__skb_unlink(skb, namedq);
spin_unlock_bh(&namedq->lock);
return skb;
}
@ -353,6 +360,7 @@ static struct sk_buff *tipc_named_dequeue(struct sk_buff_head *namedq,
continue;
}
}
spin_unlock_bh(&namedq->lock);
return NULL;
}

View File

@ -1496,7 +1496,7 @@ static void node_lost_contact(struct tipc_node *n,
/* Clean up broadcast state */
tipc_bcast_remove_peer(n->net, n->bc_entry.link);
__skb_queue_purge(&n->bc_entry.namedq);
skb_queue_purge(&n->bc_entry.namedq);
/* Abort any ongoing link failover */
for (i = 0; i < MAX_BEARERS; i++) {

View File

@ -418,14 +418,14 @@ static int tls_push_data(struct sock *sk,
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
struct tls_record_info *record = ctx->open_record;
int tls_push_record_flags;
struct page_frag *pfrag;
size_t orig_size = size;
u32 max_open_record_len;
int copy, rc = 0;
bool more = false;
bool done = false;
int copy, rc = 0;
long timeo;
if (flags &
@ -492,9 +492,8 @@ static int tls_push_data(struct sock *sk,
if (!size) {
last_record:
tls_push_record_flags = flags;
if (more) {
tls_ctx->pending_open_record_frags =
!!record->num_frags;
if (flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE)) {
more = true;
break;
}
@ -526,6 +525,8 @@ static int tls_push_data(struct sock *sk,
}
} while (!done);
tls_ctx->pending_open_record_frags = more;
if (orig_size - size > 0)
rc = orig_size - size;

View File

@ -20,6 +20,7 @@ TEST_PROGS += vrf-xfrm-tests.sh
TEST_PROGS += rxtimestamp.sh
TEST_PROGS += devlink_port_split.py
TEST_PROGS += drop_monitor_tests.sh
TEST_PROGS += vrf_route_leaking.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any

View File

@ -0,0 +1,626 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Copyright (c) 2019 David Ahern <dsahern@gmail.com>. All rights reserved.
# Copyright (c) 2020 Michael Jeanson <mjeanson@efficios.com>. All rights reserved.
#
# Requires CONFIG_NET_VRF, CONFIG_VETH, CONFIG_BRIDGE and CONFIG_NET_NS.
#
#
# Symmetric routing topology
#
# blue red
# +----+ .253 +----+ .253 +----+
# | h1 |-------------------| r1 |-------------------| h2 |
# +----+ .1 +----+ .2 +----+
# 172.16.1/24 172.16.2/24
# 2001:db8:16:1/64 2001:db8:16:2/64
#
#
# Route from h1 to h2 and back goes through r1, incoming vrf blue has a route
# to the outgoing vrf red for the n2 network and red has a route back to n1.
# The red VRF interface has a MTU of 1400.
#
# The first test sends a ping with a ttl of 1 from h1 to h2 and parses the
# output of the command to check that a ttl expired error is received.
#
# The second test runs traceroute from h1 to h2 and parses the output to check
# for a hop on r1.
#
# The third test sends a ping with a packet size of 1450 from h1 to h2 and
# parses the output of the command to check that a fragmentation error is
# received.
#
#
# Asymmetric routing topology
#
# This topology represents a customer setup where the issue with icmp errors
# and VRF route leaking was initialy reported. The MTU test isn't done here
# because of the lack of a return route in the red VRF.
#
# blue red
# .253 +----+ .253
# +----| r1 |----+
# | +----+ |
# +----+ | | +----+
# | h1 |--------------+ +--------------| h2 |
# +----+ .1 | | .2 +----+
# 172.16.1/24 | +----+ | 172.16.2/24
# 2001:db8:16:1/64 +----| r2 |----+ 2001:db8:16:2/64
# .254 +----+ .254
#
#
# Route from h1 to h2 goes through r1, incoming vrf blue has a route to the
# outgoing vrf red for the n2 network but red doesn't have a route back to n1.
# Route from h2 to h1 goes through r2.
#
# The objective is to check that the incoming vrf routing table is selected
# to send an ICMP error back to the source when the ttl of a packet reaches 1
# while it is forwarded between different vrfs.
VERBOSE=0
PAUSE_ON_FAIL=no
DEFAULT_TTYPE=sym
H1_N1=172.16.1.0/24
H1_N1_6=2001:db8:16:1::/64
H1_N1_IP=172.16.1.1
R1_N1_IP=172.16.1.253
R2_N1_IP=172.16.1.254
H1_N1_IP6=2001:db8:16:1::1
R1_N1_IP6=2001:db8:16:1::253
R2_N1_IP6=2001:db8:16:1::254
H2_N2=172.16.2.0/24
H2_N2_6=2001:db8:16:2::/64
H2_N2_IP=172.16.2.2
R1_N2_IP=172.16.2.253
R2_N2_IP=172.16.2.254
H2_N2_IP6=2001:db8:16:2::2
R1_N2_IP6=2001:db8:16:2::253
R2_N2_IP6=2001:db8:16:2::254
################################################################################
# helpers
log_section()
{
echo
echo "###########################################################################"
echo "$*"
echo "###########################################################################"
echo
}
log_test()
{
local rc=$1
local expected=$2
local msg="$3"
if [ "${rc}" -eq "${expected}" ]; then
printf "TEST: %-60s [ OK ]\n" "${msg}"
nsuccess=$((nsuccess+1))
else
ret=1
nfail=$((nfail+1))
printf "TEST: %-60s [FAIL]\n" "${msg}"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
read -r a
[ "$a" = "q" ] && exit 1
fi
fi
}
run_cmd()
{
local cmd="$*"
local out
local rc
if [ "$VERBOSE" = "1" ]; then
echo "COMMAND: $cmd"
fi
# shellcheck disable=SC2086
out=$(eval $cmd 2>&1)
rc=$?
if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
echo "$out"
fi
[ "$VERBOSE" = "1" ] && echo
return $rc
}
run_cmd_grep()
{
local grep_pattern="$1"
shift
local cmd="$*"
local out
local rc
if [ "$VERBOSE" = "1" ]; then
echo "COMMAND: $cmd"
fi
# shellcheck disable=SC2086
out=$(eval $cmd 2>&1)
if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
echo "$out"
fi
echo "$out" | grep -q "$grep_pattern"
rc=$?
[ "$VERBOSE" = "1" ] && echo
return $rc
}
################################################################################
# setup and teardown
cleanup()
{
local ns
for ns in h1 h2 r1 r2; do
ip netns del $ns 2>/dev/null
done
}
setup_vrf()
{
local ns=$1
ip -netns "${ns}" rule del pref 0
ip -netns "${ns}" rule add pref 32765 from all lookup local
ip -netns "${ns}" -6 rule del pref 0
ip -netns "${ns}" -6 rule add pref 32765 from all lookup local
}
create_vrf()
{
local ns=$1
local vrf=$2
local table=$3
ip -netns "${ns}" link add "${vrf}" type vrf table "${table}"
ip -netns "${ns}" link set "${vrf}" up
ip -netns "${ns}" route add vrf "${vrf}" unreachable default metric 8192
ip -netns "${ns}" -6 route add vrf "${vrf}" unreachable default metric 8192
ip -netns "${ns}" addr add 127.0.0.1/8 dev "${vrf}"
ip -netns "${ns}" -6 addr add ::1 dev "${vrf}" nodad
}
setup_sym()
{
local ns
# make sure we are starting with a clean slate
cleanup
#
# create nodes as namespaces
#
for ns in h1 h2 r1; do
ip netns add $ns
ip -netns $ns link set lo up
case "${ns}" in
h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
;;
r1) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
esac
done
#
# create interconnects
#
ip -netns h1 link add eth0 type veth peer name r1h1
ip -netns h1 link set r1h1 netns r1 name eth0 up
ip -netns h2 link add eth0 type veth peer name r1h2
ip -netns h2 link set r1h2 netns r1 name eth1 up
#
# h1
#
ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24
ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad
ip -netns h1 link set eth0 up
# h1 to h2 via r1
ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0
ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0
#
# h2
#
ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
ip -netns h2 link set eth0 up
# h2 to h1 via r1
ip -netns h2 route add default via ${R1_N2_IP} dev eth0
ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0
#
# r1
#
setup_vrf r1
create_vrf r1 blue 1101
create_vrf r1 red 1102
ip -netns r1 link set mtu 1400 dev eth1
ip -netns r1 link set eth0 vrf blue up
ip -netns r1 link set eth1 vrf red up
ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
# Route leak from blue to red
ip -netns r1 route add vrf blue ${H2_N2} dev red
ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
# Route leak from red to blue
ip -netns r1 route add vrf red ${H1_N1} dev blue
ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue
# Wait for ip config to settle
sleep 2
}
setup_asym()
{
local ns
# make sure we are starting with a clean slate
cleanup
#
# create nodes as namespaces
#
for ns in h1 h2 r1 r2; do
ip netns add $ns
ip -netns $ns link set lo up
case "${ns}" in
h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
;;
r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
esac
done
#
# create interconnects
#
ip -netns h1 link add eth0 type veth peer name r1h1
ip -netns h1 link set r1h1 netns r1 name eth0 up
ip -netns h1 link add eth1 type veth peer name r2h1
ip -netns h1 link set r2h1 netns r2 name eth0 up
ip -netns h2 link add eth0 type veth peer name r1h2
ip -netns h2 link set r1h2 netns r1 name eth1 up
ip -netns h2 link add eth1 type veth peer name r2h2
ip -netns h2 link set r2h2 netns r2 name eth1 up
#
# h1
#
ip -netns h1 link add br0 type bridge
ip -netns h1 link set br0 up
ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
ip -netns h1 link set eth0 master br0 up
ip -netns h1 link set eth1 master br0 up
# h1 to h2 via r1
ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev br0
ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0
#
# h2
#
ip -netns h2 link add br0 type bridge
ip -netns h2 link set br0 up
ip -netns h2 addr add dev br0 ${H2_N2_IP}/24
ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad
ip -netns h2 link set eth0 master br0 up
ip -netns h2 link set eth1 master br0 up
# h2 to h1 via r2
ip -netns h2 route add default via ${R2_N2_IP} dev br0
ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0
#
# r1
#
setup_vrf r1
create_vrf r1 blue 1101
create_vrf r1 red 1102
ip -netns r1 link set mtu 1400 dev eth1
ip -netns r1 link set eth0 vrf blue up
ip -netns r1 link set eth1 vrf red up
ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
# Route leak from blue to red
ip -netns r1 route add vrf blue ${H2_N2} dev red
ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
# No route leak from red to blue
#
# r2
#
ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24
ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
# Wait for ip config to settle
sleep 2
}
check_connectivity()
{
ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1
log_test $? 0 "Basic IPv4 connectivity"
return $?
}
check_connectivity6()
{
ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
log_test $? 0 "Basic IPv6 connectivity"
return $?
}
check_traceroute()
{
if [ ! -x "$(command -v traceroute)" ]; then
echo "SKIP: Could not run IPV4 test without traceroute"
return 1
fi
}
check_traceroute6()
{
if [ ! -x "$(command -v traceroute6)" ]; then
echo "SKIP: Could not run IPV6 test without traceroute6"
return 1
fi
}
ipv4_traceroute()
{
local ttype="$1"
[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
log_section "IPv4 ($ttype route): VRF ICMP error route lookup traceroute"
check_traceroute || return
setup_"$ttype"
check_connectivity || return
run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP}
log_test $? 0 "Traceroute reports a hop on r1"
}
ipv4_traceroute_asym()
{
ipv4_traceroute asym
}
ipv6_traceroute()
{
local ttype="$1"
[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
log_section "IPv6 ($ttype route): VRF ICMP error route lookup traceroute"
check_traceroute6 || return
setup_"$ttype"
check_connectivity6 || return
run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6}
log_test $? 0 "Traceroute6 reports a hop on r1"
}
ipv6_traceroute_asym()
{
ipv6_traceroute asym
}
ipv4_ping_ttl()
{
local ttype="$1"
[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
log_section "IPv4 ($ttype route): VRF ICMP ttl error route lookup ping"
setup_"$ttype"
check_connectivity || return
run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP}
log_test $? 0 "Ping received ICMP ttl exceeded"
}
ipv4_ping_ttl_asym()
{
ipv4_ping_ttl asym
}
ipv4_ping_frag()
{
local ttype="$1"
[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
log_section "IPv4 ($ttype route): VRF ICMP fragmentation error route lookup ping"
setup_"$ttype"
check_connectivity || return
run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP}
log_test $? 0 "Ping received ICMP Frag needed"
}
ipv4_ping_frag_asym()
{
ipv4_ping_frag asym
}
ipv6_ping_ttl()
{
local ttype="$1"
[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
log_section "IPv6 ($ttype route): VRF ICMP ttl error route lookup ping"
setup_"$ttype"
check_connectivity6 || return
run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6}
log_test $? 0 "Ping received ICMP Hop limit"
}
ipv6_ping_ttl_asym()
{
ipv6_ping_ttl asym
}
ipv6_ping_frag()
{
local ttype="$1"
[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
log_section "IPv6 ($ttype route): VRF ICMP fragmentation error route lookup ping"
setup_"$ttype"
check_connectivity6 || return
run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6}
log_test $? 0 "Ping received ICMP Packet too big"
}
ipv6_ping_frag_asym()
{
ipv6_ping_frag asym
}
################################################################################
# usage
usage()
{
cat <<EOF
usage: ${0##*/} OPTS
-4 Run IPv4 tests only
-6 Run IPv6 tests only
-t TEST Run only TEST
-p Pause on fail
-v verbose mode (show commands and output)
EOF
}
################################################################################
# main
# Some systems don't have a ping6 binary anymore
command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping)
TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym"
TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_frag ipv6_ping_ttl_asym ipv6_traceroute_asym"
ret=0
nsuccess=0
nfail=0
while getopts :46t:pvh o
do
case $o in
4) TESTS=ipv4;;
6) TESTS=ipv6;;
t) TESTS=$OPTARG;;
p) PAUSE_ON_FAIL=yes;;
v) VERBOSE=1;;
h) usage; exit 0;;
*) usage; exit 1;;
esac
done
#
# show user test config
#
if [ -z "$TESTS" ]; then
TESTS="$TESTS_IPV4 $TESTS_IPV6"
elif [ "$TESTS" = "ipv4" ]; then
TESTS="$TESTS_IPV4"
elif [ "$TESTS" = "ipv6" ]; then
TESTS="$TESTS_IPV6"
fi
for t in $TESTS
do
case $t in
ipv4_ping_ttl|ping) ipv4_ping_ttl;;&
ipv4_ping_ttl_asym|ping) ipv4_ping_ttl_asym;;&
ipv4_traceroute|traceroute) ipv4_traceroute;;&
ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;&
ipv4_ping_frag|ping) ipv4_ping_frag;;&
ipv6_ping_ttl|ping) ipv6_ping_ttl;;&
ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;&
ipv6_traceroute|traceroute) ipv6_traceroute;;&
ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;&
ipv6_ping_frag|ping) ipv6_ping_frag;;&
# setup namespaces and config, but do not run any tests
setup_sym|setup) setup_sym; exit 0;;
setup_asym) setup_asym; exit 0;;
help) echo "Test names: $TESTS"; exit 0;;
esac
done
cleanup
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
exit $ret

View File

@ -17,9 +17,12 @@
struct options {
bool count_packets;
bool gso_enabled;
int verbose;
unsigned int queue_num;
unsigned int timeout;
uint32_t verdict;
uint32_t delay_ms;
};
static unsigned int queue_stats[5];
@ -27,7 +30,7 @@ static struct options opts;
static void help(const char *p)
{
printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p);
printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
}
static int parse_attr_cb(const struct nlattr *attr, void *data)
@ -162,7 +165,7 @@ nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
}
static struct nlmsghdr *
nfq_build_verdict(char *buf, int id, int queue_num, int verd)
nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd)
{
struct nfqnl_msg_verdict_hdr vh = {
.verdict = htonl(verd),
@ -189,9 +192,6 @@ static void print_stats(void)
unsigned int last, total;
int i;
if (!opts.count_packets)
return;
total = 0;
last = queue_stats[0];
@ -234,7 +234,8 @@ struct mnl_socket *open_queue(void)
nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID;
flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0;
flags |= NFQA_CFG_F_UID_GID;
mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
@ -255,6 +256,17 @@ struct mnl_socket *open_queue(void)
return nl;
}
static void sleep_ms(uint32_t delay)
{
struct timespec ts = { .tv_sec = delay / 1000 };
delay %= 1000;
ts.tv_nsec = delay * 1000llu * 1000llu;
nanosleep(&ts, NULL);
}
static int mainloop(void)
{
unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
@ -278,7 +290,7 @@ static int mainloop(void)
ret = mnl_socket_recvfrom(nl, buf, buflen);
if (ret == -1) {
if (errno == ENOBUFS)
if (errno == ENOBUFS || errno == EINTR)
continue;
if (errno == EAGAIN) {
@ -298,7 +310,10 @@ static int mainloop(void)
}
id = ret - MNL_CB_OK;
nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT);
if (opts.delay_ms)
sleep_ms(opts.delay_ms);
nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict);
if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
perror("mnl_socket_sendto");
exit(EXIT_FAILURE);
@ -314,7 +329,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
while ((c = getopt(argc, argv, "chvt:q:")) != -1) {
while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) {
switch (c) {
case 'c':
opts.count_packets = true;
@ -328,20 +343,48 @@ static void parse_opts(int argc, char **argv)
if (opts.queue_num > 0xffff)
opts.queue_num = 0;
break;
case 'Q':
opts.verdict = atoi(optarg);
if (opts.verdict > 0xffff) {
fprintf(stderr, "Expected destination queue number\n");
exit(1);
}
opts.verdict <<= 16;
opts.verdict |= NF_QUEUE;
break;
case 'd':
opts.delay_ms = atoi(optarg);
if (opts.delay_ms == 0) {
fprintf(stderr, "Expected nonzero delay (in milliseconds)\n");
exit(1);
}
break;
case 't':
opts.timeout = atoi(optarg);
break;
case 'G':
opts.gso_enabled = false;
break;
case 'v':
opts.verbose++;
break;
}
}
if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) {
fprintf(stderr, "Cannot use same destination and source queue\n");
exit(1);
}
}
int main(int argc, char *argv[])
{
int ret;
opts.verdict = NF_ACCEPT;
opts.gso_enabled = true;
parse_opts(argc, argv);
ret = mainloop();

View File

@ -7,8 +7,7 @@ ksft_skip=4
sfx=$(mktemp -u "XXXXXXXX")
ns0="ns0-$sfx"
nft --version > /dev/null 2>&1
if [ $? -ne 0 ];then
if ! nft --version > /dev/null 2>&1; then
echo "SKIP: Could not run test without nft tool"
exit $ksft_skip
fi
@ -24,6 +23,8 @@ ip -net "$ns0" addr add 127.0.0.1 dev lo
trap cleanup EXIT
currentyear=$(date +%G)
lastyear=$((currentyear-1))
ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table inet filter {
counter iifcount {}
@ -33,6 +34,9 @@ table inet filter {
counter infproto4count {}
counter il4protocounter {}
counter imarkcounter {}
counter icpu0counter {}
counter ilastyearcounter {}
counter icurrentyearcounter {}
counter oifcount {}
counter oifnamecount {}
@ -54,6 +58,9 @@ table inet filter {
meta nfproto ipv4 counter name "infproto4count"
meta l4proto icmp counter name "il4protocounter"
meta mark 42 counter name "imarkcounter"
meta cpu 0 counter name "icpu0counter"
meta time "$lastyear-01-01" - "$lastyear-12-31" counter name ilastyearcounter
meta time "$currentyear-01-01" - "$currentyear-12-31" counter name icurrentyearcounter
}
chain output {
@ -84,11 +91,10 @@ check_one_counter()
local want="packets $2"
local verbose="$3"
cnt=$(ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want")
if [ $? -ne 0 ];then
if ! ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want"; then
echo "FAIL: $cname, want \"$want\", got"
ret=1
ip netns exec "$ns0" nft list counter inet filter $counter
ip netns exec "$ns0" nft list counter inet filter $cname
fi
}
@ -100,8 +106,7 @@ check_lo_counters()
for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \
oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \
il4protocounter \
ol4protocounter \
il4protocounter icurrentyearcounter ol4protocounter \
; do
check_one_counter "$counter" "$want" "$verbose"
done
@ -116,9 +121,22 @@ check_one_counter oskuidcounter "1" true
check_one_counter oskgidcounter "1" true
check_one_counter imarkcounter "1" true
check_one_counter omarkcounter "1" true
check_one_counter ilastyearcounter "0" true
if [ $ret -eq 0 ];then
echo "OK: nftables meta iif/oif counters at expected values"
else
exit $ret
fi
#First CPU execution and counter
taskset -p 01 $$ > /dev/null
ip netns exec "$ns0" nft reset counters > /dev/null
ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null
check_one_counter icpu0counter "2" true
if [ $ret -eq 0 ];then
echo "OK: nftables meta cpu counter at expected values"
fi
exit $ret

View File

@ -12,6 +12,7 @@ sfx=$(mktemp -u "XXXXXXXX")
ns1="ns1-$sfx"
ns2="ns2-$sfx"
nsrouter="nsrouter-$sfx"
timeout=4
cleanup()
{
@ -20,6 +21,7 @@ cleanup()
ip netns del ${nsrouter}
rm -f "$TMPFILE0"
rm -f "$TMPFILE1"
rm -f "$TMPFILE2" "$TMPFILE3"
}
nft --version > /dev/null 2>&1
@ -42,6 +44,8 @@ fi
TMPFILE0=$(mktemp)
TMPFILE1=$(mktemp)
TMPFILE2=$(mktemp)
TMPFILE3=$(mktemp)
trap cleanup EXIT
ip netns add ${ns1}
@ -83,7 +87,7 @@ load_ruleset() {
local name=$1
local prio=$2
ip netns exec ${nsrouter} nft -f - <<EOF
ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet $name {
chain nfq {
ip protocol icmp queue bypass
@ -118,7 +122,7 @@ EOF
load_counter_ruleset() {
local prio=$1
ip netns exec ${nsrouter} nft -f - <<EOF
ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet countrules {
chain pre {
type filter hook prerouting priority $prio; policy accept;
@ -175,7 +179,7 @@ test_ping_router() {
test_queue_blackhole() {
local proto=$1
ip netns exec ${nsrouter} nft -f - <<EOF
ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table $proto blackh {
chain forward {
type filter hook forward priority 0; policy accept;
@ -184,10 +188,10 @@ table $proto blackh {
}
EOF
if [ $proto = "ip" ] ;then
ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
lret=$?
elif [ $proto = "ip6" ]; then
ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null
lret=$?
else
lret=111
@ -214,8 +218,8 @@ test_queue()
local last=""
# spawn nf-queue listeners
ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" &
ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" &
ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" &
ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" &
sleep 1
test_ping
ret=$?
@ -250,11 +254,11 @@ test_queue()
test_tcp_forward()
{
ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 &
ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout &
local nfqpid=$!
tmpfile=$(mktemp) || exit 1
dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile
dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
@ -270,15 +274,13 @@ test_tcp_forward()
test_tcp_localhost()
{
tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1%
tmpfile=$(mktemp) || exit 1
dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile
dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 &
ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
local nfqpid=$!
sleep 1
@ -287,6 +289,47 @@ test_tcp_localhost()
wait $rpid
[ $? -eq 0 ] && echo "PASS: tcp via loopback"
wait 2>/dev/null
}
test_tcp_localhost_requeue()
{
ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
flush ruleset
table inet filter {
chain output {
type filter hook output priority 0; policy accept;
tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
}
chain post {
type filter hook postrouting priority 0; policy accept;
tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
}
}
EOF
tmpfile=$(mktemp) || exit 1
dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" &
# nfqueue 1 will be called via output hook. But this time,
# re-queue the packet to nfqueue program on queue 2.
ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" &
sleep 1
ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
rm -f "$tmpfile"
wait
if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
echo "FAIL: lost packets during requeue?!" 1>&2
return
fi
echo "PASS: tcp via loopback and re-queueing"
}
ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@ -328,5 +371,6 @@ test_queue 20
test_tcp_forward
test_tcp_localhost
test_tcp_localhost_requeue
exit $ret