mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-14 09:25:12 +00:00
forcedeth: rx data path optimization
This patch optimizes the rx data paths and cleans up the code. Signed-Off-By: Ayaz Abdulla <aabdulla@nvidia.com> Signed-off-by: Jeff Garzik <jeff@garzik.org>
This commit is contained in:
parent
445583b89d
commit
b01867cbd1
@ -1317,9 +1317,9 @@ static int nv_alloc_rx(struct net_device *dev)
|
||||
np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma);
|
||||
wmb();
|
||||
np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
|
||||
if (np->put_rx.orig++ == np->last_rx.orig)
|
||||
if (unlikely(np->put_rx.orig++ == np->last_rx.orig))
|
||||
np->put_rx.orig = np->first_rx.orig;
|
||||
if (np->put_rx_ctx++ == np->last_rx_ctx)
|
||||
if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
|
||||
np->put_rx_ctx = np->first_rx_ctx;
|
||||
} else {
|
||||
return 1;
|
||||
@ -1349,9 +1349,9 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
|
||||
np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF;
|
||||
wmb();
|
||||
np->put_rx.ex->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX2_AVAIL);
|
||||
if (np->put_rx.ex++ == np->last_rx.ex)
|
||||
if (unlikely(np->put_rx.ex++ == np->last_rx.ex))
|
||||
np->put_rx.ex = np->first_rx.ex;
|
||||
if (np->put_rx_ctx++ == np->last_rx_ctx)
|
||||
if (unlikely(np->put_rx_ctx++ == np->last_rx_ctx))
|
||||
np->put_rx_ctx = np->first_rx_ctx;
|
||||
} else {
|
||||
return 1;
|
||||
@ -2046,24 +2046,17 @@ static int nv_rx_process(struct net_device *dev, int limit)
|
||||
{
|
||||
struct fe_priv *np = netdev_priv(dev);
|
||||
u32 flags;
|
||||
u32 vlanflags = 0;
|
||||
int count;
|
||||
u32 rx_processed_cnt = 0;
|
||||
struct sk_buff *skb;
|
||||
int len;
|
||||
|
||||
for (count = 0; count < limit; ++count) {
|
||||
struct sk_buff *skb;
|
||||
int len;
|
||||
|
||||
if (np->get_rx.orig == np->put_rx.orig)
|
||||
break; /* we scanned the whole ring - do not continue */
|
||||
flags = le32_to_cpu(np->get_rx.orig->flaglen);
|
||||
len = nv_descr_getlength(np->get_rx.orig, np->desc_ver);
|
||||
while((np->get_rx.orig != np->put_rx.orig) &&
|
||||
!((flags = le32_to_cpu(np->get_rx.orig->flaglen)) & NV_RX_AVAIL) &&
|
||||
(rx_processed_cnt++ < limit)) {
|
||||
|
||||
dprintk(KERN_DEBUG "%s: nv_rx_process: flags 0x%x.\n",
|
||||
dev->name, flags);
|
||||
|
||||
if (flags & NV_RX_AVAIL)
|
||||
break; /* still owned by hardware, */
|
||||
|
||||
/*
|
||||
* the packet is for us - immediately tear down the pci mapping.
|
||||
* TODO: check if a prefetch of the first cacheline improves
|
||||
@ -2087,99 +2080,80 @@ static int nv_rx_process(struct net_device *dev, int limit)
|
||||
}
|
||||
/* look at what we actually got: */
|
||||
if (np->desc_ver == DESC_VER_1) {
|
||||
if (!(flags & NV_RX_DESCRIPTORVALID)) {
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
|
||||
if (flags & NV_RX_ERROR) {
|
||||
if (flags & NV_RX_MISSEDFRAME) {
|
||||
np->stats.rx_missed_errors++;
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
if (flags & (NV_RX_ERROR1|NV_RX_ERROR2|NV_RX_ERROR3)) {
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
if (flags & NV_RX_CRCERR) {
|
||||
np->stats.rx_crc_errors++;
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
if (flags & NV_RX_OVERFLOW) {
|
||||
np->stats.rx_over_errors++;
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
if (flags & NV_RX_ERROR4) {
|
||||
len = nv_getlen(dev, skb->data, len);
|
||||
if (len < 0) {
|
||||
if (likely(flags & NV_RX_DESCRIPTORVALID)) {
|
||||
len = flags & LEN_MASK_V1;
|
||||
if (unlikely(flags & NV_RX_ERROR)) {
|
||||
if (flags & NV_RX_ERROR4) {
|
||||
len = nv_getlen(dev, skb->data, len);
|
||||
if (len < 0) {
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
}
|
||||
/* framing errors are soft errors */
|
||||
else if (flags & NV_RX_FRAMINGERR) {
|
||||
if (flags & NV_RX_SUBSTRACT1) {
|
||||
len--;
|
||||
}
|
||||
}
|
||||
/* the rest are hard errors */
|
||||
else {
|
||||
if (flags & NV_RX_MISSEDFRAME)
|
||||
np->stats.rx_missed_errors++;
|
||||
if (flags & NV_RX_CRCERR)
|
||||
np->stats.rx_crc_errors++;
|
||||
if (flags & NV_RX_OVERFLOW)
|
||||
np->stats.rx_over_errors++;
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
}
|
||||
/* framing errors are soft errors. */
|
||||
if (flags & NV_RX_FRAMINGERR) {
|
||||
if (flags & NV_RX_SUBSTRACT1) {
|
||||
len--;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
} else {
|
||||
if (!(flags & NV_RX2_DESCRIPTORVALID)) {
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
|
||||
if (flags & NV_RX2_ERROR) {
|
||||
if (flags & (NV_RX2_ERROR1|NV_RX2_ERROR2|NV_RX2_ERROR3)) {
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
if (flags & NV_RX2_CRCERR) {
|
||||
np->stats.rx_crc_errors++;
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
if (flags & NV_RX2_OVERFLOW) {
|
||||
np->stats.rx_over_errors++;
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
if (flags & NV_RX2_ERROR4) {
|
||||
len = nv_getlen(dev, skb->data, len);
|
||||
if (len < 0) {
|
||||
if (likely(flags & NV_RX2_DESCRIPTORVALID)) {
|
||||
len = flags & LEN_MASK_V2;
|
||||
if (unlikely(flags & NV_RX2_ERROR)) {
|
||||
if (flags & NV_RX2_ERROR4) {
|
||||
len = nv_getlen(dev, skb->data, len);
|
||||
if (len < 0) {
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
}
|
||||
/* framing errors are soft errors */
|
||||
else if (flags & NV_RX2_FRAMINGERR) {
|
||||
if (flags & NV_RX2_SUBSTRACT1) {
|
||||
len--;
|
||||
}
|
||||
}
|
||||
/* the rest are hard errors */
|
||||
else {
|
||||
if (flags & NV_RX2_CRCERR)
|
||||
np->stats.rx_crc_errors++;
|
||||
if (flags & NV_RX2_OVERFLOW)
|
||||
np->stats.rx_over_errors++;
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
}
|
||||
/* framing errors are soft errors */
|
||||
if (flags & NV_RX2_FRAMINGERR) {
|
||||
if (flags & NV_RX2_SUBSTRACT1) {
|
||||
len--;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (np->rx_csum) {
|
||||
flags &= NV_RX2_CHECKSUMMASK;
|
||||
if (flags == NV_RX2_CHECKSUMOK1 ||
|
||||
flags == NV_RX2_CHECKSUMOK2 ||
|
||||
flags == NV_RX2_CHECKSUMOK3) {
|
||||
dprintk(KERN_DEBUG "%s: hw checksum hit!.\n", dev->name);
|
||||
if ((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK2)/*ip and tcp */ {
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
} else {
|
||||
dprintk(KERN_DEBUG "%s: hwchecksum miss!.\n", dev->name);
|
||||
if ((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK1 ||
|
||||
(flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK3) {
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
}
|
||||
/* got a valid packet - forward it to the network core */
|
||||
@ -2188,29 +2162,21 @@ static int nv_rx_process(struct net_device *dev, int limit)
|
||||
dprintk(KERN_DEBUG "%s: nv_rx_process: %d bytes, proto %d accepted.\n",
|
||||
dev->name, len, skb->protocol);
|
||||
#ifdef CONFIG_FORCEDETH_NAPI
|
||||
if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT))
|
||||
vlan_hwaccel_receive_skb(skb, np->vlangrp,
|
||||
vlanflags & NV_RX3_VLAN_TAG_MASK);
|
||||
else
|
||||
netif_receive_skb(skb);
|
||||
netif_receive_skb(skb);
|
||||
#else
|
||||
if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT))
|
||||
vlan_hwaccel_rx(skb, np->vlangrp,
|
||||
vlanflags & NV_RX3_VLAN_TAG_MASK);
|
||||
else
|
||||
netif_rx(skb);
|
||||
netif_rx(skb);
|
||||
#endif
|
||||
dev->last_rx = jiffies;
|
||||
np->stats.rx_packets++;
|
||||
np->stats.rx_bytes += len;
|
||||
next_pkt:
|
||||
if (np->get_rx.orig++ == np->last_rx.orig)
|
||||
if (unlikely(np->get_rx.orig++ == np->last_rx.orig))
|
||||
np->get_rx.orig = np->first_rx.orig;
|
||||
if (np->get_rx_ctx++ == np->last_rx_ctx)
|
||||
if (unlikely(np->get_rx_ctx++ == np->last_rx_ctx))
|
||||
np->get_rx_ctx = np->first_rx_ctx;
|
||||
}
|
||||
|
||||
return count;
|
||||
return rx_processed_cnt;
|
||||
}
|
||||
|
||||
static int nv_rx_process_optimized(struct net_device *dev, int limit)
|
||||
@ -2218,24 +2184,17 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
|
||||
struct fe_priv *np = netdev_priv(dev);
|
||||
u32 flags;
|
||||
u32 vlanflags = 0;
|
||||
int count;
|
||||
u32 rx_processed_cnt = 0;
|
||||
struct sk_buff *skb;
|
||||
int len;
|
||||
|
||||
for (count = 0; count < limit; ++count) {
|
||||
struct sk_buff *skb;
|
||||
int len;
|
||||
|
||||
if (np->get_rx.ex == np->put_rx.ex)
|
||||
break; /* we scanned the whole ring - do not continue */
|
||||
flags = le32_to_cpu(np->get_rx.ex->flaglen);
|
||||
len = nv_descr_getlength_ex(np->get_rx.ex, np->desc_ver);
|
||||
vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
|
||||
while((np->get_rx.ex != np->put_rx.ex) &&
|
||||
!((flags = le32_to_cpu(np->get_rx.ex->flaglen)) & NV_RX2_AVAIL) &&
|
||||
(rx_processed_cnt++ < limit)) {
|
||||
|
||||
dprintk(KERN_DEBUG "%s: nv_rx_process_optimized: flags 0x%x.\n",
|
||||
dev->name, flags);
|
||||
|
||||
if (flags & NV_RX_AVAIL)
|
||||
break; /* still owned by hardware, */
|
||||
|
||||
/*
|
||||
* the packet is for us - immediately tear down the pci mapping.
|
||||
* TODO: check if a prefetch of the first cacheline improves
|
||||
@ -2258,84 +2217,91 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit)
|
||||
dprintk("\n");
|
||||
}
|
||||
/* look at what we actually got: */
|
||||
if (!(flags & NV_RX2_DESCRIPTORVALID)) {
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
|
||||
if (flags & NV_RX2_ERROR) {
|
||||
if (flags & (NV_RX2_ERROR1|NV_RX2_ERROR2|NV_RX2_ERROR3)) {
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
if (flags & NV_RX2_CRCERR) {
|
||||
np->stats.rx_crc_errors++;
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
if (flags & NV_RX2_OVERFLOW) {
|
||||
np->stats.rx_over_errors++;
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
if (flags & NV_RX2_ERROR4) {
|
||||
len = nv_getlen(dev, skb->data, len);
|
||||
if (len < 0) {
|
||||
if (likely(flags & NV_RX2_DESCRIPTORVALID)) {
|
||||
len = flags & LEN_MASK_V2;
|
||||
if (unlikely(flags & NV_RX2_ERROR)) {
|
||||
if (flags & NV_RX2_ERROR4) {
|
||||
len = nv_getlen(dev, skb->data, len);
|
||||
if (len < 0) {
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
}
|
||||
/* framing errors are soft errors */
|
||||
else if (flags & NV_RX2_FRAMINGERR) {
|
||||
if (flags & NV_RX2_SUBSTRACT1) {
|
||||
len--;
|
||||
}
|
||||
}
|
||||
/* the rest are hard errors */
|
||||
else {
|
||||
if (flags & NV_RX2_CRCERR)
|
||||
np->stats.rx_crc_errors++;
|
||||
if (flags & NV_RX2_OVERFLOW)
|
||||
np->stats.rx_over_errors++;
|
||||
np->stats.rx_errors++;
|
||||
dev_kfree_skb(skb);
|
||||
goto next_pkt;
|
||||
}
|
||||
}
|
||||
/* framing errors are soft errors */
|
||||
if (flags & NV_RX2_FRAMINGERR) {
|
||||
if (flags & NV_RX2_SUBSTRACT1) {
|
||||
len--;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (np->rx_csum) {
|
||||
flags &= NV_RX2_CHECKSUMMASK;
|
||||
if (flags == NV_RX2_CHECKSUMOK1 ||
|
||||
flags == NV_RX2_CHECKSUMOK2 ||
|
||||
flags == NV_RX2_CHECKSUMOK3) {
|
||||
dprintk(KERN_DEBUG "%s: hw checksum hit!.\n", dev->name);
|
||||
|
||||
if ((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK2)/*ip and tcp */ {
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
} else {
|
||||
dprintk(KERN_DEBUG "%s: hwchecksum miss!.\n", dev->name);
|
||||
if ((flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK1 ||
|
||||
(flags & NV_RX2_CHECKSUMMASK) == NV_RX2_CHECKSUMOK3) {
|
||||
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* got a valid packet - forward it to the network core */
|
||||
skb_put(skb, len);
|
||||
skb->protocol = eth_type_trans(skb, dev);
|
||||
dprintk(KERN_DEBUG "%s: nv_rx_process: %d bytes, proto %d accepted.\n",
|
||||
dev->name, len, skb->protocol);
|
||||
|
||||
/* got a valid packet - forward it to the network core */
|
||||
skb_put(skb, len);
|
||||
skb->protocol = eth_type_trans(skb, dev);
|
||||
prefetch(skb->data);
|
||||
|
||||
dprintk(KERN_DEBUG "%s: nv_rx_process_optimized: %d bytes, proto %d accepted.\n",
|
||||
dev->name, len, skb->protocol);
|
||||
|
||||
if (likely(!np->vlangrp)) {
|
||||
#ifdef CONFIG_FORCEDETH_NAPI
|
||||
if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT))
|
||||
vlan_hwaccel_receive_skb(skb, np->vlangrp,
|
||||
vlanflags & NV_RX3_VLAN_TAG_MASK);
|
||||
else
|
||||
netif_receive_skb(skb);
|
||||
netif_receive_skb(skb);
|
||||
#else
|
||||
if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT))
|
||||
vlan_hwaccel_rx(skb, np->vlangrp,
|
||||
vlanflags & NV_RX3_VLAN_TAG_MASK);
|
||||
else
|
||||
netif_rx(skb);
|
||||
netif_rx(skb);
|
||||
#endif
|
||||
dev->last_rx = jiffies;
|
||||
np->stats.rx_packets++;
|
||||
np->stats.rx_bytes += len;
|
||||
} else {
|
||||
vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
|
||||
if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) {
|
||||
#ifdef CONFIG_FORCEDETH_NAPI
|
||||
vlan_hwaccel_receive_skb(skb, np->vlangrp,
|
||||
vlanflags & NV_RX3_VLAN_TAG_MASK);
|
||||
#else
|
||||
vlan_hwaccel_rx(skb, np->vlangrp,
|
||||
vlanflags & NV_RX3_VLAN_TAG_MASK);
|
||||
#endif
|
||||
} else {
|
||||
#ifdef CONFIG_FORCEDETH_NAPI
|
||||
netif_receive_skb(skb);
|
||||
#else
|
||||
netif_rx(skb);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
dev->last_rx = jiffies;
|
||||
np->stats.rx_packets++;
|
||||
np->stats.rx_bytes += len;
|
||||
} else {
|
||||
dev_kfree_skb(skb);
|
||||
}
|
||||
next_pkt:
|
||||
if (np->get_rx.ex++ == np->last_rx.ex)
|
||||
if (unlikely(np->get_rx.ex++ == np->last_rx.ex))
|
||||
np->get_rx.ex = np->first_rx.ex;
|
||||
if (np->get_rx_ctx++ == np->last_rx_ctx)
|
||||
if (unlikely(np->get_rx_ctx++ == np->last_rx_ctx))
|
||||
np->get_rx_ctx = np->first_rx_ctx;
|
||||
}
|
||||
|
||||
return count;
|
||||
return rx_processed_cnt;
|
||||
}
|
||||
|
||||
static void set_bufsize(struct net_device *dev)
|
||||
|
Loading…
x
Reference in New Issue
Block a user