From 308ff823ebd749a94d3b6ac26b95bc0eb114c39e Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Thu, 25 Jun 2009 16:32:52 +0200 Subject: [PATCH 01/31] nf_conntrack: Use rcu_barrier() RCU barriers, rcu_barrier(), is inserted two places. In nf_conntrack_expect.c nf_conntrack_expect_fini() before the kmem_cache_destroy(). Firstly to make sure the callback to the nf_ct_expect_free_rcu() code is still around. Secondly because I'm unsure about the consequence of having in flight nf_ct_expect_free_rcu/kmem_cache_free() calls while doing a kmem_cache_destroy() slab destroy. And in nf_conntrack_extend.c nf_ct_extend_unregister(), inorder to wait for completion of callbacks to __nf_ct_ext_free_rcu(), which is invoked by __nf_ct_ext_add(). It might be more efficient to call rcu_barrier() in nf_conntrack_core.c nf_conntrack_cleanup_net(), but thats make it more difficult to read the code (as the callback code in located in nf_conntrack_extend.c). Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_expect.c | 4 +++- net/netfilter/nf_conntrack_extend.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index afde8f991646..2032dfe25ca8 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -617,8 +617,10 @@ err1: void nf_conntrack_expect_fini(struct net *net) { exp_proc_remove(net); - if (net_eq(net, &init_net)) + if (net_eq(net, &init_net)) { + rcu_barrier(); /* Wait for call_rcu() before destroy */ kmem_cache_destroy(nf_ct_expect_cachep); + } nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, nf_ct_expect_hsize); } diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 4b2c769d555f..fef95be334bd 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -186,6 +186,6 @@ void nf_ct_extend_unregister(struct nf_ct_ext_type *type) rcu_assign_pointer(nf_ct_ext_types[type->id], NULL); update_alloc_size(type); mutex_unlock(&nf_ct_ext_type_mutex); - synchronize_rcu(); + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } EXPORT_SYMBOL_GPL(nf_ct_extend_unregister); From a3a9f79e361e864f0e9d75ebe2a0cb43d17c4272 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 29 Jun 2009 14:07:56 +0200 Subject: [PATCH 02/31] netfilter: tcp conntrack: fix unacknowledged data detection with NAT When NAT helpers change the TCP packet size, the highest seen sequence number needs to be corrected. This is currently only done upwards, when the packet size is reduced the sequence number is unchanged. This causes TCP conntrack to falsely detect unacknowledged data and decrease the timeout. Fix by updating the highest seen sequence number in both directions after packet mangling. Tested-by: Krzysztof Piotr Oledzki Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_conntrack.h | 4 ++-- net/ipv4/netfilter/nf_nat_helper.c | 17 +++++++++++------ net/netfilter/nf_conntrack_proto_tcp.c | 6 +++--- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index a632689b61b4..cbdd6284996d 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -258,8 +258,8 @@ static inline bool nf_ct_kill(struct nf_conn *ct) /* Update TCP window tracking data when NAT mangles the packet */ extern void nf_conntrack_tcp_update(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conn *ct, - int dir); + struct nf_conn *ct, int dir, + s16 offset); /* Fake conntrack entry for untracked connections */ extern struct nf_conn nf_conntrack_untracked; diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 155c008626c8..09172a65d9b6 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -191,7 +191,8 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, ct, ctinfo); /* Tell TCP window tracking about seq change */ nf_conntrack_tcp_update(skb, ip_hdrlen(skb), - ct, CTINFO2DIR(ctinfo)); + ct, CTINFO2DIR(ctinfo), + (int)rep_len - (int)match_len); nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); } @@ -377,6 +378,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, struct tcphdr *tcph; int dir; __be32 newseq, newack; + s16 seqoff, ackoff; struct nf_conn_nat *nat = nfct_nat(ct); struct nf_nat_seq *this_way, *other_way; @@ -390,15 +392,18 @@ nf_nat_seq_adjust(struct sk_buff *skb, tcph = (void *)skb->data + ip_hdrlen(skb); if (after(ntohl(tcph->seq), this_way->correction_pos)) - newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); + seqoff = this_way->offset_after; else - newseq = htonl(ntohl(tcph->seq) + this_way->offset_before); + seqoff = this_way->offset_before; if (after(ntohl(tcph->ack_seq) - other_way->offset_before, other_way->correction_pos)) - newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after); + ackoff = other_way->offset_after; else - newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); + ackoff = other_way->offset_before; + + newseq = htonl(ntohl(tcph->seq) + seqoff); + newack = htonl(ntohl(tcph->ack_seq) - ackoff); inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); @@ -413,7 +418,7 @@ nf_nat_seq_adjust(struct sk_buff *skb, if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo)) return 0; - nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir); + nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir, seqoff); return 1; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 33fc0a443f3d..97a82ba75376 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -720,8 +720,8 @@ static bool tcp_in_window(const struct nf_conn *ct, /* Caller must linearize skb at tcp header. */ void nf_conntrack_tcp_update(const struct sk_buff *skb, unsigned int dataoff, - struct nf_conn *ct, - int dir) + struct nf_conn *ct, int dir, + s16 offset) { const struct tcphdr *tcph = (const void *)skb->data + dataoff; const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[dir]; @@ -734,7 +734,7 @@ void nf_conntrack_tcp_update(const struct sk_buff *skb, /* * We have to worry for the ack in the reply packet only... */ - if (after(end, ct->proto.tcp.seen[dir].td_end)) + if (ct->proto.tcp.seen[dir].td_end + offset == end) ct->proto.tcp.seen[dir].td_end = end; ct->proto.tcp.last_end = end; spin_unlock_bh(&ct->lock); From 8a3af79361e85db6fec4173ef1916322471c19e3 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Mon, 29 Jun 2009 14:28:27 +0200 Subject: [PATCH 03/31] netfilter: headers_check fix: linux/netfilter/xt_osf.h fix the following 'make headers_check' warnings: usr/include/linux/netfilter/xt_osf.h:40: found __[us]{8,16,32,64} type without #include Signed-off-by: Jaswinder Singh Rajput Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_osf.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/netfilter/xt_osf.h b/include/linux/netfilter/xt_osf.h index fd2272e0959a..18afa495f973 100644 --- a/include/linux/netfilter/xt_osf.h +++ b/include/linux/netfilter/xt_osf.h @@ -20,6 +20,8 @@ #ifndef _XT_OSF_H #define _XT_OSF_H +#include + #define MAXGENRELEN 32 #define XT_OSF_GENRE (1<<0) From d6d3f08b0fd998b647a05540cedd11a067b72867 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 29 Jun 2009 14:31:46 +0200 Subject: [PATCH 04/31] netfilter: xtables: conntrack match revision 2 As reported by Philip, the UNTRACKED state bit does not fit within the 8-bit state_mask member. Enlarge state_mask and give status_mask a few more bits too. Reported-by: Philip Craig References: http://markmail.org/thread/b7eg6aovfh4agyz7 Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_conntrack.h | 13 +++++ net/netfilter/xt_conntrack.c | 66 +++++++++++++++++++++++--- 2 files changed, 73 insertions(+), 6 deletions(-) diff --git a/include/linux/netfilter/xt_conntrack.h b/include/linux/netfilter/xt_conntrack.h index 3430c7751948..7ae05338e94c 100644 --- a/include/linux/netfilter/xt_conntrack.h +++ b/include/linux/netfilter/xt_conntrack.h @@ -81,4 +81,17 @@ struct xt_conntrack_mtinfo1 { __u8 state_mask, status_mask; }; +struct xt_conntrack_mtinfo2 { + union nf_inet_addr origsrc_addr, origsrc_mask; + union nf_inet_addr origdst_addr, origdst_mask; + union nf_inet_addr replsrc_addr, replsrc_mask; + union nf_inet_addr repldst_addr, repldst_mask; + __u32 expires_min, expires_max; + __u16 l4proto; + __be16 origsrc_port, origdst_port; + __be16 replsrc_port, repldst_port; + __u16 match_flags, invert_flags; + __u16 state_mask, status_mask; +}; + #endif /*_XT_CONNTRACK_H*/ diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 0b7139f3dd78..fc581800698e 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -129,7 +129,7 @@ conntrack_addrcmp(const union nf_inet_addr *kaddr, static inline bool conntrack_mt_origsrc(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, @@ -138,7 +138,7 @@ conntrack_mt_origsrc(const struct nf_conn *ct, static inline bool conntrack_mt_origdst(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3, @@ -147,7 +147,7 @@ conntrack_mt_origdst(const struct nf_conn *ct, static inline bool conntrack_mt_replsrc(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3, @@ -156,7 +156,7 @@ conntrack_mt_replsrc(const struct nf_conn *ct, static inline bool conntrack_mt_repldst(const struct nf_conn *ct, - const struct xt_conntrack_mtinfo1 *info, + const struct xt_conntrack_mtinfo2 *info, u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3, @@ -164,7 +164,7 @@ conntrack_mt_repldst(const struct nf_conn *ct, } static inline bool -ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info, +ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info, const struct nf_conn *ct) { const struct nf_conntrack_tuple *tuple; @@ -204,7 +204,7 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info, static bool conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_conntrack_mtinfo1 *info = par->matchinfo; + const struct xt_conntrack_mtinfo2 *info = par->matchinfo; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int statebit; @@ -278,6 +278,16 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) return true; } +static bool +conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct xt_conntrack_mtinfo2 *const *info = par->matchinfo; + struct xt_match_param newpar = *par; + + newpar.matchinfo = *info; + return conntrack_mt(skb, &newpar); +} + static bool conntrack_mt_check(const struct xt_mtchk_param *par) { if (nf_ct_l3proto_try_module_get(par->family) < 0) { @@ -288,11 +298,45 @@ static bool conntrack_mt_check(const struct xt_mtchk_param *par) return true; } +static bool conntrack_mt_check_v1(const struct xt_mtchk_param *par) +{ + struct xt_conntrack_mtinfo1 *info = par->matchinfo; + struct xt_conntrack_mtinfo2 *up; + int ret = conntrack_mt_check(par); + + if (ret < 0) + return ret; + + up = kmalloc(sizeof(*up), GFP_KERNEL); + if (up == NULL) { + nf_ct_l3proto_module_put(par->family); + return -ENOMEM; + } + + /* + * The strategy here is to minimize the overhead of v1 matching, + * by prebuilding a v2 struct and putting the pointer into the + * v1 dataspace. + */ + memcpy(up, info, offsetof(typeof(*info), state_mask)); + up->state_mask = info->state_mask; + up->status_mask = info->status_mask; + *(void **)info = up; + return true; +} + static void conntrack_mt_destroy(const struct xt_mtdtor_param *par) { nf_ct_l3proto_module_put(par->family); } +static void conntrack_mt_destroy_v1(const struct xt_mtdtor_param *par) +{ + struct xt_conntrack_mtinfo2 **info = par->matchinfo; + kfree(*info); + conntrack_mt_destroy(par); +} + #ifdef CONFIG_COMPAT struct compat_xt_conntrack_info { @@ -363,6 +407,16 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = { .revision = 1, .family = NFPROTO_UNSPEC, .matchsize = sizeof(struct xt_conntrack_mtinfo1), + .match = conntrack_mt_v1, + .checkentry = conntrack_mt_check_v1, + .destroy = conntrack_mt_destroy_v1, + .me = THIS_MODULE, + }, + { + .name = "conntrack", + .revision = 2, + .family = NFPROTO_UNSPEC, + .matchsize = sizeof(struct xt_conntrack_mtinfo2), .match = conntrack_mt, .checkentry = conntrack_mt_check, .destroy = conntrack_mt_destroy, From e0af6062aa4f89081afb8a1a4269605775d354de Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Thu, 18 Jun 2009 13:05:49 +0400 Subject: [PATCH 05/31] MAINTAINERS: ieee802154 lists are moderated for non-subscribers. Note that our mailing list is moderated for non-subscribers. Signed-off-by: Dmitry Eremin-Solenikov --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index fa2a16def17a..28c150e916a2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2886,7 +2886,7 @@ P: Dmitry Eremin-Solenikov M: dbaryshkov@gmail.com P: Sergey Lapin M: slapin@ossfans.org -L: linux-zigbee-devel@lists.sourceforge.net +L: linux-zigbee-devel@lists.sourceforge.net (moderated for non-subscribers) W: http://apps.sourceforge.net/trac/linux-zigbee T: git git://git.kernel.org/pub/scm/linux/kernel/git/lowpan/lowpan.git S: Maintained From 932c1329acebc03ef5efa3647c9c3a967b59d0c4 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 19 Jun 2009 17:00:08 +0400 Subject: [PATCH 06/31] nl802154: fix Oops in ieee802154_nl_get_dev ieee802154_nl_get_dev() lacks check for the existance of the device that was returned by dev_get_XXX, thus resulting in Oops for non-existing devices. Fix it. Signed-off-by: Dmitry Eremin-Solenikov --- net/ieee802154/netlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 105ad10876af..332b947ae812 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -276,6 +276,9 @@ static struct net_device *ieee802154_nl_get_dev(struct genl_info *info) else return NULL; + if (!dev) + return NULL; + if (dev->type != ARPHRD_IEEE802154) { dev_put(dev); return NULL; From dfd06fe8246c0425f8d6850b8e2c872b0d691ec3 Mon Sep 17 00:00:00 2001 From: Dmitry Eremin-Solenikov Date: Fri, 19 Jun 2009 17:02:09 +0400 Subject: [PATCH 07/31] nl802154: add module license and description Signed-off-by: Dmitry Eremin-Solenikov --- net/ieee802154/netlink.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 332b947ae812..27eda9fdf3c2 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -524,3 +524,6 @@ static void __exit ieee802154_nl_exit(void) } module_exit(ieee802154_nl_exit); +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("ieee 802.15.4 configuration interface"); + From 8e5b9dda99cc86bdbd822935fcc37c5808e271b3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 28 Jun 2009 18:03:30 +0000 Subject: [PATCH 08/31] tcp: Stop non-TSO packets morphing into TSO If a socket starts out on a non-TSO route, and then switches to a TSO route, then the tail on the tx queue can morph into a TSO packet, causing mischief because the rest of the stack does not expect a partially linear TSO packet. This patch fixes this by ensuring that skb->ip_summed is set to CHECKSUM_PARTIAL before declaring a packet as TSO. Reported-by: Johannes Berg Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 416fc4c2e7eb..5bdf08d312d9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -725,7 +725,8 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - if (skb->len <= mss_now || !sk_can_gso(sk)) { + if (skb->len <= mss_now || !sk_can_gso(sk) || + skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal * non-TSO case. */ From 6828b92bd21acd65113dfe0541f19f5df0d9668f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 28 Jun 2009 18:06:41 +0000 Subject: [PATCH 09/31] tcp: Do not tack on TSO data to non-TSO packet If a socket starts out on a non-TSO route, and then switches to a TSO route, then we will tack on data to the tail of the tx queue even if it started out life as non-TSO. This is suboptimal because all of it will then be copied and checksummed unnecessarily. This patch fixes this by ensuring that skb->ip_summed is set to CHECKSUM_PARTIAL before appending extra data beyond the MSS. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 17b89c523f9d..7870a535dac6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -903,13 +903,17 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, iov++; while (seglen > 0) { - int copy; + int copy = 0; + int max = size_goal; skb = tcp_write_queue_tail(sk); + if (tcp_send_head(sk)) { + if (skb->ip_summed == CHECKSUM_NONE) + max = mss_now; + copy = max - skb->len; + } - if (!tcp_send_head(sk) || - (copy = size_goal - skb->len) <= 0) { - + if (copy <= 0) { new_segment: /* Allocate new segment. If the interface is SG, * allocate skb fitting to single page. @@ -930,6 +934,7 @@ new_segment: skb_entail(sk, skb); copy = size_goal; + max = size_goal; } /* Try to append data to the end of skb. */ @@ -1028,7 +1033,7 @@ new_segment: if ((seglen -= copy) == 0 && iovlen == 0) goto out; - if (skb->len < size_goal || (flags & MSG_OOB)) + if (skb->len < max || (flags & MSG_OOB)) continue; if (forced_push(tp)) { From 1802571b9865c0fc1d8d0fa39cf73275f3a75af3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 28 Jun 2009 18:42:53 +0000 Subject: [PATCH 10/31] xfrm: use xfrm_addr_cmp() instead of compare addresses directly Clean up to use xfrm_addr_cmp() instead of compare addresses directly. Signed-off-by: Wei Yongjun Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 57 ++++++------------------------------------- 1 file changed, 8 insertions(+), 49 deletions(-) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5f1f86565f16..f2f7c638083e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -668,22 +668,10 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *d hlist_for_each_entry(x, entry, net->xfrm.state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || - x->id.proto != proto) + x->id.proto != proto || + xfrm_addr_cmp(&x->id.daddr, daddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)daddr, - (struct in6_addr *) - x->id.daddr.a6)) - continue; - break; - } - xfrm_state_hold(x); return x; } @@ -699,26 +687,11 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, xfrm_addre hlist_for_each_entry(x, entry, net->xfrm.state_bysrc+h, bysrc) { if (x->props.family != family || - x->id.proto != proto) + x->id.proto != proto || + xfrm_addr_cmp(&x->id.daddr, daddr, family) || + xfrm_addr_cmp(&x->props.saddr, saddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4 || - x->props.saddr.a4 != saddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)daddr, - (struct in6_addr *) - x->id.daddr.a6) || - !ipv6_addr_equal((struct in6_addr *)saddr, - (struct in6_addr *) - x->props.saddr.a6)) - continue; - break; - } - xfrm_state_hold(x); return x; } @@ -1001,25 +974,11 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family x->props.family != family || x->km.state != XFRM_STATE_ACQ || x->id.spi != 0 || - x->id.proto != proto) + x->id.proto != proto || + xfrm_addr_cmp(&x->id.daddr, daddr, family) || + xfrm_addr_cmp(&x->props.saddr, saddr, family)) continue; - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4 || - x->props.saddr.a4 != saddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6, - (struct in6_addr *)daddr) || - !ipv6_addr_equal((struct in6_addr *) - x->props.saddr.a6, - (struct in6_addr *)saddr)) - continue; - break; - } - xfrm_state_hold(x); return x; } From d51e9b0d94336db56a13fdc65bb30751e3ea33b7 Mon Sep 17 00:00:00 2001 From: Graf Yang Date: Mon, 29 Jun 2009 09:34:20 +0000 Subject: [PATCH 11/31] net/irda: convert bfin_sir to net_device_ops Signed-off-by: Graf Yang Signed-off-by: Mike Frysinger Signed-off-by: David S. Miller --- drivers/net/irda/bfin_sir.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/net/irda/bfin_sir.c b/drivers/net/irda/bfin_sir.c index f3eed6a8fba5..911c082cee5a 100644 --- a/drivers/net/irda/bfin_sir.c +++ b/drivers/net/irda/bfin_sir.c @@ -677,6 +677,14 @@ static int bfin_sir_init_iobuf(iobuff_t *io, int size) return 0; } +static const struct net_device_ops bfin_sir_ndo = { + .ndo_open = bfin_sir_open, + .ndo_stop = bfin_sir_stop, + .ndo_start_xmit = bfin_sir_hard_xmit, + .ndo_do_ioctl = bfin_sir_ioctl, + .ndo_get_stats = bfin_sir_stats, +}; + static int __devinit bfin_sir_probe(struct platform_device *pdev) { struct net_device *dev; @@ -718,12 +726,8 @@ static int __devinit bfin_sir_probe(struct platform_device *pdev) if (err) goto err_mem_3; - dev->hard_start_xmit = bfin_sir_hard_xmit; - dev->open = bfin_sir_open; - dev->stop = bfin_sir_stop; - dev->do_ioctl = bfin_sir_ioctl; - dev->get_stats = bfin_sir_stats; - dev->irq = sir_port->irq; + dev->netdev_ops = &bfin_sir_ndo; + dev->irq = sir_port->irq; irda_init_max_qos_capabilies(&self->qos); From ff0ac74afb5b9916641723a78796d4ee7937c2ea Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 28 Jun 2009 22:49:37 +0000 Subject: [PATCH 12/31] sctp: xmit sctp packet always return no route error Commit 'net: skb->dst accessors'(adf30907d63893e4208dfe3f5c88ae12bc2f25d5) broken the sctp protocol stack, the sctp packet can never be sent out after Eric Dumazet's patch, which have typo in the sctp code. Signed-off-by: Wei Yongjun Acked-by: Eric Dumazet Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/output.c b/net/sctp/output.c index b76411444515..b94c21190566 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -407,7 +407,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) } dst = dst_clone(tp->dst); skb_dst_set(nskb, dst); - if (dst) + if (!dst) goto no_route; /* Build the SCTP header. */ From 01e532981460594fffbf9b992ecfc96a78369924 Mon Sep 17 00:00:00 2001 From: Naohiro Ooiwa Date: Tue, 30 Jun 2009 12:44:19 -0700 Subject: [PATCH 13/31] bnx2x: Fix the behavior of ethtool when ONBOOT=no This is the same fix as commit 7959ea254ed18faee41160b1c50b3c9664735967 ("bnx2: Fix the behavior of ethtool when ONBOOT=no"), but for bnx2x: -------------------- When configure in ifcfg-eth* is ONBOOT=no, the behavior of ethtool command is wrong. # grep ONBOOT /etc/sysconfig/network-scripts/ifcfg-eth2 ONBOOT=no # ethtool eth2 | tail -n1 Link detected: yes I think "Link detected" should be "no". -------------------- Signed-off-by: Naohiro Ooiwa Acked-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index fbf1352e9c1c..951714a7f90a 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -8637,6 +8637,14 @@ static int bnx2x_nway_reset(struct net_device *dev) return 0; } +static u32 +bnx2x_get_link(struct net_device *dev) +{ + struct bnx2x *bp = netdev_priv(dev); + + return bp->link_vars.link_up; +} + static int bnx2x_get_eeprom_len(struct net_device *dev) { struct bnx2x *bp = netdev_priv(dev); @@ -10034,7 +10042,7 @@ static struct ethtool_ops bnx2x_ethtool_ops = { .get_msglevel = bnx2x_get_msglevel, .set_msglevel = bnx2x_set_msglevel, .nway_reset = bnx2x_nway_reset, - .get_link = ethtool_op_get_link, + .get_link = bnx2x_get_link, .get_eeprom_len = bnx2x_get_eeprom_len, .get_eeprom = bnx2x_get_eeprom, .set_eeprom = bnx2x_set_eeprom, From 008440e3ad4b72f5048d1b1f6f5ed894fdc5ad08 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 30 Jun 2009 12:47:19 -0700 Subject: [PATCH 14/31] ipv4: Fix fib_trie rebalancing, part 3 Alas current delaying of freeing old tnodes by RCU in trie_rebalance is still not enough because we can free a top tnode before updating a t->trie pointer. Reported-by: Pawel Staszewski Tested-by: Pawel Staszewski Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 012cf5a68581..00a54b246dfe 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1021,6 +1021,9 @@ static void trie_rebalance(struct trie *t, struct tnode *tn) (struct node *)tn, wasfull); tp = node_parent((struct node *) tn); + if (!tp) + rcu_assign_pointer(t->trie, (struct node *)tn); + tnode_free_flush(); if (!tp) break; From eaea43abf30c8ccb447c190e7c94b46b5f75eae6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Jun 2009 16:49:40 +0000 Subject: [PATCH 15/31] cdc_eem: Use netdev stats structure Now that netdev has its own stats structure we should use that instead. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/usb/cdc_eem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/cdc_eem.c b/drivers/net/usb/cdc_eem.c index 80e01778dd3b..cd35d50e46d4 100644 --- a/drivers/net/usb/cdc_eem.c +++ b/drivers/net/usb/cdc_eem.c @@ -319,7 +319,7 @@ static int eem_rx_fixup(struct usbnet *dev, struct sk_buff *skb) return crc == crc2; if (unlikely(crc != crc2)) { - dev->stats.rx_errors++; + dev->net->stats.rx_errors++; dev_kfree_skb_any(skb2); } else usbnet_skb_return(dev, skb2); From 9612101cb33862cc160069cc8423926d61db51f8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Jun 2009 16:50:51 +0000 Subject: [PATCH 16/31] dm9601: Use netdev stats structure Now that netdev has its own stats structure we should use that instead. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/usb/dm9601.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c index 7ae82446b93a..1d3730d6690f 100644 --- a/drivers/net/usb/dm9601.c +++ b/drivers/net/usb/dm9601.c @@ -513,11 +513,11 @@ static int dm9601_rx_fixup(struct usbnet *dev, struct sk_buff *skb) len = (skb->data[1] | (skb->data[2] << 8)) - 4; if (unlikely(status & 0xbf)) { - if (status & 0x01) dev->stats.rx_fifo_errors++; - if (status & 0x02) dev->stats.rx_crc_errors++; - if (status & 0x04) dev->stats.rx_frame_errors++; - if (status & 0x20) dev->stats.rx_missed_errors++; - if (status & 0x90) dev->stats.rx_length_errors++; + if (status & 0x01) dev->net->stats.rx_fifo_errors++; + if (status & 0x02) dev->net->stats.rx_crc_errors++; + if (status & 0x04) dev->net->stats.rx_frame_errors++; + if (status & 0x20) dev->net->stats.rx_missed_errors++; + if (status & 0x90) dev->net->stats.rx_length_errors++; return 0; } From a22d2b36a2c4ca58c5914072a88704377bbd34f8 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Jun 2009 16:51:40 +0000 Subject: [PATCH 17/31] net1080: Use netdev stats structure Now that netdev has its own stats structure we should use that instead. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/usb/net1080.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/net/usb/net1080.c b/drivers/net/usb/net1080.c index 034e8a73ca6b..aeb1ab03a9ee 100644 --- a/drivers/net/usb/net1080.c +++ b/drivers/net/usb/net1080.c @@ -433,7 +433,7 @@ static int net1080_rx_fixup(struct usbnet *dev, struct sk_buff *skb) dbg("rx framesize %d range %d..%d mtu %d", skb->len, net->hard_header_len, dev->hard_mtu, net->mtu); #endif - dev->stats.rx_frame_errors++; + dev->net->stats.rx_frame_errors++; nc_ensure_sync(dev); return 0; } @@ -442,12 +442,12 @@ static int net1080_rx_fixup(struct usbnet *dev, struct sk_buff *skb) hdr_len = le16_to_cpup(&header->hdr_len); packet_len = le16_to_cpup(&header->packet_len); if (FRAMED_SIZE(packet_len) > NC_MAX_PACKET) { - dev->stats.rx_frame_errors++; + dev->net->stats.rx_frame_errors++; dbg("packet too big, %d", packet_len); nc_ensure_sync(dev); return 0; } else if (hdr_len < MIN_HEADER) { - dev->stats.rx_frame_errors++; + dev->net->stats.rx_frame_errors++; dbg("header too short, %d", hdr_len); nc_ensure_sync(dev); return 0; @@ -465,21 +465,21 @@ static int net1080_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if ((packet_len & 0x01) == 0) { if (skb->data [packet_len] != PAD_BYTE) { - dev->stats.rx_frame_errors++; + dev->net->stats.rx_frame_errors++; dbg("bad pad"); return 0; } skb_trim(skb, skb->len - 1); } if (skb->len != packet_len) { - dev->stats.rx_frame_errors++; + dev->net->stats.rx_frame_errors++; dbg("bad packet len %d (expected %d)", skb->len, packet_len); nc_ensure_sync(dev); return 0; } if (header->packet_id != get_unaligned(&trailer->packet_id)) { - dev->stats.rx_fifo_errors++; + dev->net->stats.rx_fifo_errors++; dbg("(2+ dropped) rx packet_id mismatch 0x%x 0x%x", le16_to_cpu(header->packet_id), le16_to_cpu(trailer->packet_id)); From 58e2e7d5913e7a2a6d87ef30d3b52e66b88e6e1d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Jun 2009 16:52:26 +0000 Subject: [PATCH 18/31] rndis_host: Use netdev stats structure Now that netdev has its own stats structure we should use that instead. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/usb/rndis_host.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index 1bf243ef950e..2232232b7989 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -487,7 +487,7 @@ int rndis_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (unlikely(hdr->msg_type != RNDIS_MSG_PACKET || skb->len < msg_len || (data_offset + data_len + 8) > msg_len)) { - dev->stats.rx_frame_errors++; + dev->net->stats.rx_frame_errors++; devdbg(dev, "bad rndis message %d/%d/%d/%d, len %d", le32_to_cpu(hdr->msg_type), msg_len, data_offset, data_len, skb->len); From 80667ac13a6cf2c3a3ff275a2a72809671299acb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Jun 2009 16:53:00 +0000 Subject: [PATCH 19/31] smsc95xx: Use netdev stats structure Now that netdev has its own stats structure we should use that instead. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/usb/smsc95xx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 89a91f8c22de..fe045896406b 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1108,18 +1108,18 @@ static int smsc95xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb) if (unlikely(header & RX_STS_ES_)) { if (netif_msg_rx_err(dev)) devdbg(dev, "Error header=0x%08x", header); - dev->stats.rx_errors++; - dev->stats.rx_dropped++; + dev->net->stats.rx_errors++; + dev->net->stats.rx_dropped++; if (header & RX_STS_CRC_) { - dev->stats.rx_crc_errors++; + dev->net->stats.rx_crc_errors++; } else { if (header & (RX_STS_TL_ | RX_STS_RF_)) - dev->stats.rx_frame_errors++; + dev->net->stats.rx_frame_errors++; if ((header & RX_STS_LE_) && (!(header & RX_STS_FT_))) - dev->stats.rx_length_errors++; + dev->net->stats.rx_length_errors++; } } else { /* ETH_FRAME_LEN + 4(CRC) + 2(COE) + 4(Vlan) */ From 7963837f933df8a8ada56fa8f8205ebab40f84d0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Jun 2009 16:53:28 +0000 Subject: [PATCH 20/31] usbnet: Use netdev stats structure Now that netdev has its own stats structure we should use that instead. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/usb/usbnet.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 22c0585a0319..edfd9e10ceba 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -234,8 +234,8 @@ void usbnet_skb_return (struct usbnet *dev, struct sk_buff *skb) int status; skb->protocol = eth_type_trans (skb, dev->net); - dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->net->stats.rx_packets++; + dev->net->stats.rx_bytes += skb->len; if (netif_msg_rx_status (dev)) devdbg (dev, "< rx, len %zu, type 0x%x", @@ -397,7 +397,7 @@ static inline void rx_process (struct usbnet *dev, struct sk_buff *skb) if (netif_msg_rx_err (dev)) devdbg (dev, "drop"); error: - dev->stats.rx_errors++; + dev->net->stats.rx_errors++; skb_queue_tail (&dev->done, skb); } } @@ -420,8 +420,8 @@ static void rx_complete (struct urb *urb) case 0: if (skb->len < dev->net->hard_header_len) { entry->state = rx_cleanup; - dev->stats.rx_errors++; - dev->stats.rx_length_errors++; + dev->net->stats.rx_errors++; + dev->net->stats.rx_length_errors++; if (netif_msg_rx_err (dev)) devdbg (dev, "rx length %d", skb->len); } @@ -433,7 +433,7 @@ static void rx_complete (struct urb *urb) * storm, recovering as needed. */ case -EPIPE: - dev->stats.rx_errors++; + dev->net->stats.rx_errors++; usbnet_defer_kevent (dev, EVENT_RX_HALT); // FALLTHROUGH @@ -451,7 +451,7 @@ static void rx_complete (struct urb *urb) case -EPROTO: case -ETIME: case -EILSEQ: - dev->stats.rx_errors++; + dev->net->stats.rx_errors++; if (!timer_pending (&dev->delay)) { mod_timer (&dev->delay, jiffies + THROTTLE_JIFFIES); if (netif_msg_link (dev)) @@ -465,12 +465,12 @@ block: /* data overrun ... flush fifo? */ case -EOVERFLOW: - dev->stats.rx_over_errors++; + dev->net->stats.rx_over_errors++; // FALLTHROUGH default: entry->state = rx_cleanup; - dev->stats.rx_errors++; + dev->net->stats.rx_errors++; if (netif_msg_rx_err (dev)) devdbg (dev, "rx status %d", urb_status); break; @@ -583,8 +583,8 @@ int usbnet_stop (struct net_device *net) if (netif_msg_ifdown (dev)) devinfo (dev, "stop stats: rx/tx %ld/%ld, errs %ld/%ld", - dev->stats.rx_packets, dev->stats.tx_packets, - dev->stats.rx_errors, dev->stats.tx_errors + net->stats.rx_packets, net->stats.tx_packets, + net->stats.rx_errors, net->stats.tx_errors ); // ensure there are no more active urbs @@ -891,10 +891,10 @@ static void tx_complete (struct urb *urb) struct usbnet *dev = entry->dev; if (urb->status == 0) { - dev->stats.tx_packets++; - dev->stats.tx_bytes += entry->length; + dev->net->stats.tx_packets++; + dev->net->stats.tx_bytes += entry->length; } else { - dev->stats.tx_errors++; + dev->net->stats.tx_errors++; switch (urb->status) { case -EPIPE: @@ -1020,7 +1020,7 @@ int usbnet_start_xmit (struct sk_buff *skb, struct net_device *net) devdbg (dev, "drop, code %d", retval); drop: retval = NET_XMIT_SUCCESS; - dev->stats.tx_dropped++; + dev->net->stats.tx_dropped++; if (skb) dev_kfree_skb_any (skb); usb_free_urb (urb); From d9d62f3f2c6fa609883714f6fd6cd710a83d307f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Jun 2009 16:54:12 +0000 Subject: [PATCH 21/31] usbnet: Remove private stats structure Now that nothing uses the private stats structure we can remove it. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 5d44059f6d63..310e18a880ff 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -42,7 +42,6 @@ struct usbnet { /* protocol/interface state */ struct net_device *net; - struct net_device_stats stats; int msg_enable; unsigned long data [5]; u32 xid; From 88d2b81f4ee8f9ea3798dbe6105beb5609844317 Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Tue, 30 Jun 2009 11:43:55 +0000 Subject: [PATCH 22/31] ixgbe: Fix SFP log messages We had a wide range of log messages for the same sort of SFP failure. This patch makes them all more similar and less confusing along with converting them to dev_err. Signed-off-by: Don Skidmore Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_main.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index e756e220db32..30d8c0e41a9d 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -2701,7 +2701,10 @@ static int ixgbe_up_complete(struct ixgbe_adapter *adapter) */ err = hw->phy.ops.identify(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { - DPRINTK(PROBE, ERR, "PHY not supported on this NIC %d\n", err); + dev_err(&adapter->pdev->dev, "failed to initialize because " + "an unsupported SFP+ module type was detected.\n" + "Reload the driver after installing a supported " + "module.\n"); ixgbe_down(adapter); return err; } @@ -3720,10 +3723,11 @@ static void ixgbe_sfp_task(struct work_struct *work) goto reschedule; ret = hw->phy.ops.reset(hw); if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) { - DPRINTK(PROBE, ERR, "failed to initialize because an " - "unsupported SFP+ module type was detected.\n" - "Reload the driver after installing a " - "supported module.\n"); + dev_err(&adapter->pdev->dev, "failed to initialize " + "because an unsupported SFP+ module type " + "was detected.\n" + "Reload the driver after installing a " + "supported module.\n"); unregister_netdev(adapter->netdev); } else { DPRINTK(PROBE, INFO, "detected SFP+: %d\n", @@ -4526,7 +4530,10 @@ static void ixgbe_sfp_config_module_task(struct work_struct *work) adapter->flags |= IXGBE_FLAG_IN_SFP_MOD_TASK; err = hw->phy.ops.identify_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { - DPRINTK(PROBE, ERR, "PHY not supported on this NIC %d\n", err); + dev_err(&adapter->pdev->dev, "failed to initialize because " + "an unsupported SFP+ module type was detected.\n" + "Reload the driver after installing a supported " + "module.\n"); ixgbe_down(adapter); return; } @@ -5513,8 +5520,10 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, round_jiffies(jiffies + (2 * HZ))); err = 0; } else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { - dev_err(&adapter->pdev->dev, "failed to load because an " - "unsupported SFP+ module type was detected.\n"); + dev_err(&adapter->pdev->dev, "failed to initialize because " + "an unsupported SFP+ module type was detected.\n" + "Reload the driver after installing a supported " + "module.\n"); goto err_sw_init; } else if (err) { dev_err(&adapter->pdev->dev, "HW Init failed: %d\n", err); From a380137900fca5c79e6daa9500bdb6ea5649188e Mon Sep 17 00:00:00 2001 From: Mallikarjuna R Chilakala Date: Tue, 30 Jun 2009 11:44:16 +0000 Subject: [PATCH 23/31] ixgbe: Fix device capabilities of 82599 single speed fiber NICs. 82599 single speed fiber modules only support 10G/Full. Return proper device capabilities while querrying the adapter and error while changing device advertisement/speed/duplex capabilities. Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_ethtool.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_ethtool.c b/drivers/net/ixgbe/ixgbe_ethtool.c index 86f4f3e36f27..0f7b6a3a2e68 100644 --- a/drivers/net/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ixgbe/ixgbe_ethtool.c @@ -139,7 +139,7 @@ static int ixgbe_get_settings(struct net_device *netdev, ecmd->autoneg = AUTONEG_ENABLE; ecmd->transceiver = XCVR_EXTERNAL; if ((hw->phy.media_type == ixgbe_media_type_copper) || - (hw->mac.type == ixgbe_mac_82599EB)) { + (hw->phy.multispeed_fiber)) { ecmd->supported |= (SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg); @@ -217,7 +217,7 @@ static int ixgbe_set_settings(struct net_device *netdev, s32 err = 0; if ((hw->phy.media_type == ixgbe_media_type_copper) || - (hw->mac.type == ixgbe_mac_82599EB)) { + (hw->phy.multispeed_fiber)) { /* 10000/copper and 1000/copper must autoneg * this function does not support any duplex forcing, but can * limit the advertising of the adapter to only 10000 or 1000 */ @@ -245,6 +245,7 @@ static int ixgbe_set_settings(struct net_device *netdev, } else { /* in this case we currently only support 10Gb/FULL */ if ((ecmd->autoneg == AUTONEG_ENABLE) || + (ecmd->advertising != ADVERTISED_10000baseT_Full) || (ecmd->speed + ecmd->duplex != SPEED_10000 + DUPLEX_FULL)) return -EINVAL; } From a1f25324b93ecdab1cbb27d3e9c4cafecb06ceda Mon Sep 17 00:00:00 2001 From: Mallikarjuna R Chilakala Date: Tue, 30 Jun 2009 11:44:36 +0000 Subject: [PATCH 24/31] ixgbe: Fix link capabilities during adapter resets Adapter link advertisement capabilities were not persistent during adapter resets. While configuring multispeed fiber link check for phy autoneg_advertised settings before overwriting with default link capabilities Signed-off-by: Mallikarjuna R Chilakala Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index 30d8c0e41a9d..fce2ef49b3a7 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -4506,7 +4506,8 @@ static void ixgbe_multispeed_fiber_task(struct work_struct *work) u32 autoneg; adapter->flags |= IXGBE_FLAG_IN_SFP_LINK_TASK; - if (hw->mac.ops.get_link_capabilities) + autoneg = hw->phy.autoneg_advertised; + if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) hw->mac.ops.get_link_capabilities(hw, &autoneg, &hw->mac.autoneg); if (hw->mac.ops.setup_link_speed) From 4f57ca6e17edfc56ddde5c87eb893e47e0d2d343 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 30 Jun 2009 11:44:56 +0000 Subject: [PATCH 25/31] ixgbe: fix unmap length bug This patch addresses three WARN_ON statements from DMA-API debug code ixgbe is mapping more than it unmaps, reduce the length of the map call and remove the "used once" local variable. found by Joerg Roedel in 2.6.30, so is a candidate for -stable. in addition, fix missing ->dma = 0 after unmap to prevent double free with pci_unmap_single and lastly, don't unmap (half) pages that aren't mapped. Signed-off-by: Jesse Brandeburg CC: Joerg Roedel Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ixgbe/ixgbe_main.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c index fce2ef49b3a7..5588ef493a3d 100644 --- a/drivers/net/ixgbe/ixgbe_main.c +++ b/drivers/net/ixgbe/ixgbe_main.c @@ -563,7 +563,6 @@ static void ixgbe_alloc_rx_buffers(struct ixgbe_adapter *adapter, union ixgbe_adv_rx_desc *rx_desc; struct ixgbe_rx_buffer *bi; unsigned int i; - unsigned int bufsz = rx_ring->rx_buf_len + NET_IP_ALIGN; i = rx_ring->next_to_use; bi = &rx_ring->rx_buffer_info[i]; @@ -593,7 +592,9 @@ static void ixgbe_alloc_rx_buffers(struct ixgbe_adapter *adapter, if (!bi->skb) { struct sk_buff *skb; - skb = netdev_alloc_skb(adapter->netdev, bufsz); + skb = netdev_alloc_skb(adapter->netdev, + (rx_ring->rx_buf_len + + NET_IP_ALIGN)); if (!skb) { adapter->alloc_rx_buff_failed++; @@ -608,7 +609,8 @@ static void ixgbe_alloc_rx_buffers(struct ixgbe_adapter *adapter, skb_reserve(skb, NET_IP_ALIGN); bi->skb = skb; - bi->dma = pci_map_single(pdev, skb->data, bufsz, + bi->dma = pci_map_single(pdev, skb->data, + rx_ring->rx_buf_len, PCI_DMA_FROMDEVICE); } /* Refresh the desc even if buffer_addrs didn't change because @@ -732,6 +734,7 @@ static bool ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, pci_unmap_single(pdev, rx_buffer_info->dma, rx_ring->rx_buf_len, PCI_DMA_FROMDEVICE); + rx_buffer_info->dma = 0; skb_put(skb, len); } @@ -2815,9 +2818,11 @@ static void ixgbe_clean_rx_ring(struct ixgbe_adapter *adapter, } if (!rx_buffer_info->page) continue; - pci_unmap_page(pdev, rx_buffer_info->page_dma, PAGE_SIZE / 2, - PCI_DMA_FROMDEVICE); - rx_buffer_info->page_dma = 0; + if (rx_buffer_info->page_dma) { + pci_unmap_page(pdev, rx_buffer_info->page_dma, + PAGE_SIZE / 2, PCI_DMA_FROMDEVICE); + rx_buffer_info->page_dma = 0; + } put_page(rx_buffer_info->page); rx_buffer_info->page = NULL; rx_buffer_info->page_offset = 0; From 91615f765a2935b6cbae424b9eee1585ed681ae6 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 30 Jun 2009 12:45:15 +0000 Subject: [PATCH 26/31] igb: fix unmap length bug driver was mixing NET_IP_ALIGN count bytes in map/unmap calls unevenly. Only map the bytes that the hardware might dma into also fix unmap related bug where ->dma was not being cleared after unmap Signed-off-by: Jesse Brandeburg Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/igb/igb_main.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index ea17319624aa..468356d124ea 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -4549,11 +4549,12 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring, cleaned = true; cleaned_count++; + /* this is the fast path for the non-packet split case */ if (!adapter->rx_ps_hdr_size) { pci_unmap_single(pdev, buffer_info->dma, - adapter->rx_buffer_len + - NET_IP_ALIGN, + adapter->rx_buffer_len, PCI_DMA_FROMDEVICE); + buffer_info->dma = 0; skb_put(skb, length); goto send_up; } @@ -4570,8 +4571,9 @@ static bool igb_clean_rx_irq_adv(struct igb_ring *rx_ring, if (!skb_shinfo(skb)->nr_frags) { pci_unmap_single(pdev, buffer_info->dma, - adapter->rx_ps_hdr_size + NET_IP_ALIGN, + adapter->rx_ps_hdr_size, PCI_DMA_FROMDEVICE); + buffer_info->dma = 0; skb_put(skb, hlen); } @@ -4713,7 +4715,6 @@ static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, bufsz = adapter->rx_ps_hdr_size; else bufsz = adapter->rx_buffer_len; - bufsz += NET_IP_ALIGN; while (cleaned_count--) { rx_desc = E1000_RX_DESC_ADV(*rx_ring, i); @@ -4737,7 +4738,7 @@ static void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, } if (!buffer_info->skb) { - skb = netdev_alloc_skb(netdev, bufsz); + skb = netdev_alloc_skb(netdev, bufsz + NET_IP_ALIGN); if (!skb) { adapter->alloc_rx_buff_failed++; goto no_buffers; From 679be3ba0c493eb66d22c206273729ce50925e85 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 30 Jun 2009 12:45:34 +0000 Subject: [PATCH 27/31] e1000: fix unmap bug as reported by kerneloops.org [ 121.781161] ------------[ cut here ]------------ [ 121.781171] WARNING: at lib/dma-debug.c:793 check_unmap+0x14e/0x577() [ 121.781173] Hardware name: S5520HC [ 121.781177] e1000 0000:0a:00.0: DMA-API: device driver tries to free DMA memory it has not allocated [device address=0x00000001d688b0fa] [size=1522 bytes] [ 121.781180] Modules linked in: e1000 mdio dca [last unloaded: ixgbe] [ 121.781187] Pid: 4793, comm: bash Tainted: P 2.6.30-master-06161113 #3 [ 121.781190] Call Trace: [ 121.781195] [] ? check_unmap+0x14e/0x577 [ 121.781201] [] warn_slowpath_common+0x77/0x8f [ 121.781205] [] warn_slowpath_fmt+0x9f/0xa1 [ 121.781212] [] ? _spin_lock_irqsave+0x3f/0x49 [ 121.781216] [] ? get_hash_bucket+0x28/0x33 [ 121.781220] [] check_unmap+0x14e/0x577 [ 121.781225] [] ? check_bytes_and_report+0x38/0xcb [ 121.781230] [] debug_dma_unmap_page+0x80/0x92 [ 121.781234] [] ? unmap_single+0x1a/0x4e [ 121.781239] [] ? __kfree_skb+0x74/0x78 [ 121.781250] [] pci_unmap_single+0x64/0x6d [e1000] [ 121.781259] [] e1000_clean_rx_ring+0x4c/0xbf [e1000] [ 121.781268] [] e1000_clean_all_rx_rings+0x28/0x36 [e1000] [ 121.781277] [] e1000_down+0x138/0x141 [e1000] [ 121.781286] [] __e1000_shutdown+0x6b/0x198 [e1000] [ 121.781296] [] e1000_suspend+0x17/0x50 [e1000] [ 121.781301] [] pci_legacy_suspend+0x3b/0xbe [ 121.781305] [] pci_pm_suspend+0x3e/0xf1 [ 121.781310] [] pm_op+0x57/0xde [ 121.781314] [] dpm_suspend_start+0x31e/0x470 [ 121.781319] [] suspend_devices_and_enter+0x3e/0x1a2 [ 121.781323] [] enter_state+0xd1/0x127 [ 121.781327] [] state_store+0xa7/0xc9 [ 121.781332] [] kobj_attr_store+0x17/0x19 [ 121.781336] [] sysfs_write_file+0xe5/0x121 [ 121.781341] [] vfs_write+0xab/0x105 [ 121.781344] [] sys_write+0x47/0x6d [ 121.781349] [] system_call_fastpath+0x16/0x1b [ 121.781352] ---[ end trace 97bacaaac2ed7786 ]--- Fix is to correctly zero out internal ->dma value when unmapping and make sure never to unmap unless there specifically was a mapping done. Signed-off-by: Jesse Brandeburg Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000/e1000_main.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 5e3356f8eb5a..972e06d984c8 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -2185,12 +2185,16 @@ static void e1000_clean_rx_ring(struct e1000_adapter *adapter, /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { buffer_info = &rx_ring->buffer_info[i]; - if (buffer_info->skb) { + if (buffer_info->dma) { pci_unmap_single(pdev, buffer_info->dma, buffer_info->length, PCI_DMA_FROMDEVICE); + } + buffer_info->dma = 0; + + if (buffer_info->skb) { dev_kfree_skb(buffer_info->skb); buffer_info->skb = NULL; } @@ -4033,6 +4037,7 @@ static bool e1000_clean_rx_irq(struct e1000_adapter *adapter, buffer_info->dma, buffer_info->length, PCI_DMA_FROMDEVICE); + buffer_info->dma = 0; length = le16_to_cpu(rx_desc->length); /* !EOP means multiple descriptors were used to store a single @@ -4222,6 +4227,7 @@ map_skb: pci_unmap_single(pdev, buffer_info->dma, adapter->rx_buffer_len, PCI_DMA_FROMDEVICE); + buffer_info->dma = 0; break; /* while !buffer_info->skb */ } From eab633021c26025b34f36f79f0311d3d99f40ceb Mon Sep 17 00:00:00 2001 From: Andre Detsch Date: Tue, 30 Jun 2009 12:46:13 +0000 Subject: [PATCH 28/31] e1000: return PCI_ERS_RESULT_DISCONNECT on permanent error PCI drivers that implement the io_error_detected callback should return PCI_ERS_RESULT_DISCONNECT if the state passed in is pci_channel_io_perm_failure. This state is not checked in many of the network drivers. The patch fixes the omission in the e1000 driver. Based on Mike Mason's similar patch for e1000e. Signed-off-by: Andre Detsch CC: Mike Mason Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000/e1000_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index 972e06d984c8..5b8cbdb4b520 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -4823,6 +4823,9 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, netif_device_detach(netdev); + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + if (netif_running(netdev)) e1000_down(adapter); pci_disable_device(pdev); From c93b5a76d58656158d195a7df507ebc660010969 Mon Sep 17 00:00:00 2001 From: Mike Mason Date: Tue, 30 Jun 2009 12:45:53 +0000 Subject: [PATCH 29/31] e1000e: io_error_detected callback should return PCI_ERS_RESULT_DISCONNECT on permanent failure PCI drivers that implement the io_error_detected callback should return PCI_ERS_RESULT_DISCONNECT if the state passed in is pci_channel_io_perm_failure. This state is not checked in many of the network drivers. This patch fixes the omission in the e1000e driver. Signed-off-by: Mike Mason Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/e1000e/netdev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 679885a122b4..63415bb6f48f 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -4785,6 +4785,9 @@ static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev, netif_device_detach(netdev); + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + if (netif_running(netdev)) e1000e_down(adapter); pci_disable_device(pdev); From 59ed6eecff4aa00c5c5d18ffd180acac108d596e Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 30 Jun 2009 12:46:34 +0000 Subject: [PATCH 30/31] igb: return PCI_ERS_RESULT_DISCONNECT on permanent error PCI drivers that implement the io_error_detected callback should return PCI_ERS_RESULT_DISCONNECT if the state passed in is pci_channel_io_perm_failure. This patch fixes the issue for igb. Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/igb/igb_main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index 468356d124ea..be480292aba1 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -5339,6 +5339,9 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, netif_device_detach(netdev); + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + if (netif_running(netdev)) igb_down(adapter); pci_disable_device(pdev); From f8a68e752bc4e39644843403168137663c984524 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 30 Jun 2009 16:27:17 +0000 Subject: [PATCH 31/31] Revert "ipv4: arp announce, arp_proxy and windows ip conflict verification" This reverts commit 73ce7b01b4496a5fbf9caf63033c874be692333f. After discovering that we don't listen to gratuitious arps in 2.6.30 I tracked the failure down to this commit. The patch makes absolutely no sense. RFC2131 RFC3927 and RFC5227. are all in agreement that an arp request with sip == 0 should be used for the probe (to prevent learning) and an arp request with sip == tip should be used for the gratitous announcement that people can learn from. It appears the author of the broken patch got those two cases confused and modified the code to drop all gratuitous arp traffic. Ouch! Cc: stable@kernel.org Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/ipv4/arp.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8a3881e28aca..c29d75d8f1b1 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -801,11 +801,8 @@ static int arp_process(struct sk_buff *skb) * cache. */ - /* - * Special case: IPv4 duplicate address detection packet (RFC2131) - * and Gratuitous ARP/ARP Announce. (RFC3927, Section 2.4) - */ - if (sip == 0 || tip == sip) { + /* Special case: IPv4 duplicate address detection packet (RFC2131) */ + if (sip == 0) { if (arp->ar_op == htons(ARPOP_REQUEST) && inet_addr_type(net, tip) == RTN_LOCAL && !arp_ignore(in_dev, sip, tip))