425 lines
9.2 KiB
C
Raw Normal View History

#ifndef _IPV6_H
#define _IPV6_H
#include <linux/config.h>
#include <linux/in6.h>
#include <asm/byteorder.h>
/* The latest drafts declared increase in minimal mtu up to 1280. */
#define IPV6_MIN_MTU 1280
/*
* Advanced API
* source interface/address selection, source routing, etc...
* *under construction*
*/
struct in6_pktinfo {
struct in6_addr ipi6_addr;
int ipi6_ifindex;
};
struct in6_ifreq {
struct in6_addr ifr6_addr;
__u32 ifr6_prefixlen;
int ifr6_ifindex;
};
#define IPV6_SRCRT_STRICT 0x01 /* this hop must be a neighbor */
#define IPV6_SRCRT_TYPE_0 0 /* IPv6 type 0 Routing Header */
/*
* routing header
*/
struct ipv6_rt_hdr {
__u8 nexthdr;
__u8 hdrlen;
__u8 type;
__u8 segments_left;
/*
* type specific data
* variable length field
*/
};
struct ipv6_opt_hdr {
__u8 nexthdr;
__u8 hdrlen;
/*
* TLV encoded option data follows.
*/
};
#define ipv6_destopt_hdr ipv6_opt_hdr
#define ipv6_hopopt_hdr ipv6_opt_hdr
#ifdef __KERNEL__
#define ipv6_optlen(p) (((p)->hdrlen+1) << 3)
#endif
/*
* routing header type 0 (used in cmsghdr struct)
*/
struct rt0_hdr {
struct ipv6_rt_hdr rt_hdr;
__u32 reserved;
struct in6_addr addr[0];
#define rt0_type rt_hdr.type
};
struct ipv6_auth_hdr {
__u8 nexthdr;
__u8 hdrlen; /* This one is measured in 32 bit units! */
__u16 reserved;
__u32 spi;
__u32 seq_no; /* Sequence number */
__u8 auth_data[0]; /* Length variable but >=4. Mind the 64 bit alignment! */
};
struct ipv6_esp_hdr {
__u32 spi;
__u32 seq_no; /* Sequence number */
__u8 enc_data[0]; /* Length variable but >=8. Mind the 64 bit alignment! */
};
struct ipv6_comp_hdr {
__u8 nexthdr;
__u8 flags;
__u16 cpi;
};
/*
* IPv6 fixed header
*
* BEWARE, it is incorrect. The first 4 bits of flow_lbl
* are glued to priority now, forming "class".
*/
struct ipv6hdr {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u8 priority:4,
version:4;
#elif defined(__BIG_ENDIAN_BITFIELD)
__u8 version:4,
priority:4;
#else
#error "Please fix <asm/byteorder.h>"
#endif
__u8 flow_lbl[3];
__u16 payload_len;
__u8 nexthdr;
__u8 hop_limit;
struct in6_addr saddr;
struct in6_addr daddr;
};
/*
* This structure contains configuration options per IPv6 link.
*/
struct ipv6_devconf {
__s32 forwarding;
__s32 hop_limit;
__s32 mtu6;
__s32 accept_ra;
__s32 accept_redirects;
__s32 autoconf;
__s32 dad_transmits;
__s32 rtr_solicits;
__s32 rtr_solicit_interval;
__s32 rtr_solicit_delay;
__s32 force_mld_version;
#ifdef CONFIG_IPV6_PRIVACY
__s32 use_tempaddr;
__s32 temp_valid_lft;
__s32 temp_prefered_lft;
__s32 regen_max_retry;
__s32 max_desync_factor;
#endif
__s32 max_addresses;
void *sysctl;
};
/* index values for the variables in ipv6_devconf */
enum {
DEVCONF_FORWARDING = 0,
DEVCONF_HOPLIMIT,
DEVCONF_MTU6,
DEVCONF_ACCEPT_RA,
DEVCONF_ACCEPT_REDIRECTS,
DEVCONF_AUTOCONF,
DEVCONF_DAD_TRANSMITS,
DEVCONF_RTR_SOLICITS,
DEVCONF_RTR_SOLICIT_INTERVAL,
DEVCONF_RTR_SOLICIT_DELAY,
DEVCONF_USE_TEMPADDR,
DEVCONF_TEMP_VALID_LFT,
DEVCONF_TEMP_PREFERED_LFT,
DEVCONF_REGEN_MAX_RETRY,
DEVCONF_MAX_DESYNC_FACTOR,
DEVCONF_MAX_ADDRESSES,
DEVCONF_FORCE_MLD_VERSION,
DEVCONF_MAX
};
#ifdef __KERNEL__
#include <linux/in6.h> /* struct sockaddr_in6 */
#include <linux/icmpv6.h>
#include <net/if_inet6.h> /* struct ipv6_mc_socklist */
#include <linux/tcp.h>
#include <linux/udp.h>
/*
This structure contains results of exthdrs parsing
as offsets from skb->nh.
*/
struct inet6_skb_parm {
int iif;
__u16 ra;
__u16 hop;
__u16 dst0;
__u16 srcrt;
__u16 dst1;
__u16 lastopt;
};
#define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb))
static inline int inet6_iif(const struct sk_buff *skb)
{
return IP6CB(skb)->iif;
}
struct inet6_request_sock {
struct in6_addr loc_addr;
struct in6_addr rmt_addr;
struct sk_buff *pktopts;
int iif;
};
struct tcp6_request_sock {
struct tcp_request_sock tcp6rsk_tcp;
struct inet6_request_sock tcp6rsk_inet6;
};
/**
* struct ipv6_pinfo - ipv6 private area
*
* In the struct sock hierarchy (tcp6_sock, upd6_sock, etc)
* this _must_ be the last member, so that inet6_sk_generic
* is able to calculate its offset from the base struct sock
* by using the struct proto->slab_obj_size member. -acme
*/
struct ipv6_pinfo {
struct in6_addr saddr;
struct in6_addr rcv_saddr;
struct in6_addr daddr;
struct in6_addr *daddr_cache;
__u32 flow_label;
__u32 frag_size;
__s16 hop_limit;
__s16 mcast_hops;
int mcast_oif;
/* pktoption flags */
union {
struct {
__u16 srcrt:2,
osrcrt:2,
rxinfo:1,
rxoinfo:1,
rxhlim:1,
rxohlim:1,
hopopts:1,
ohopopts:1,
dstopts:1,
odstopts:1,
rxflow:1,
rxtclass:1;
} bits;
__u16 all;
} rxopt;
/* sockopt flags */
__u8 mc_loop:1,
recverr:1,
sndflow:1,
pmtudisc:2,
ipv6only:1;
__u8 tclass;
__u32 dst_cookie;
struct ipv6_mc_socklist *ipv6_mc_list;
struct ipv6_ac_socklist *ipv6_ac_list;
struct ipv6_fl_socklist *ipv6_fl_list;
struct ipv6_txoptions *opt;
struct sk_buff *pktoptions;
struct {
struct ipv6_txoptions *opt;
struct rt6_info *rt;
int hop_limit;
int tclass;
} cork;
};
/* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */
struct raw6_sock {
/* inet_sock has to be the first member of raw6_sock */
struct inet_sock inet;
__u32 checksum; /* perform checksum */
__u32 offset; /* checksum offset */
struct icmp6_filter filter;
/* ipv6_pinfo has to be the last member of raw6_sock, see inet6_sk_generic */
struct ipv6_pinfo inet6;
};
struct udp6_sock {
struct udp_sock udp;
/* ipv6_pinfo has to be the last member of udp6_sock, see inet6_sk_generic */
struct ipv6_pinfo inet6;
};
struct tcp6_sock {
struct tcp_sock tcp;
/* ipv6_pinfo has to be the last member of tcp6_sock, see inet6_sk_generic */
struct ipv6_pinfo inet6;
};
extern int inet6_sk_rebuild_header(struct sock *sk);
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
{
return inet_sk(__sk)->pinet6;
}
static inline struct inet6_request_sock *
inet6_rsk(const struct request_sock *rsk)
{
return (struct inet6_request_sock *)(((u8 *)rsk) +
inet_rsk(rsk)->inet6_rsk_offset);
}
static inline u32 inet6_rsk_offset(struct request_sock *rsk)
{
return rsk->rsk_ops->obj_size - sizeof(struct inet6_request_sock);
}
static inline struct request_sock *inet6_reqsk_alloc(struct request_sock_ops *ops)
{
struct request_sock *req = reqsk_alloc(ops);
if (req != NULL)
inet_rsk(req)->inet6_rsk_offset = inet6_rsk_offset(req);
return req;
}
static inline struct raw6_sock *raw6_sk(const struct sock *sk)
{
return (struct raw6_sock *)sk;
}
static inline void inet_sk_copy_descendant(struct sock *sk_to,
const struct sock *sk_from)
{
int ancestor_size = sizeof(struct inet_sock);
if (sk_from->sk_family == PF_INET6)
ancestor_size += sizeof(struct ipv6_pinfo);
__inet_sk_copy_descendant(sk_to, sk_from, ancestor_size);
}
#define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only)
#define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk))
#include <linux/tcp.h>
struct inet6_timewait_sock {
struct in6_addr tw_v6_daddr;
struct in6_addr tw_v6_rcv_saddr;
};
struct tcp6_timewait_sock {
struct tcp_timewait_sock tcp6tw_tcp;
struct inet6_timewait_sock tcp6tw_inet6;
};
static inline u16 inet6_tw_offset(const struct proto *prot)
{
return prot->twsk_obj_size - sizeof(struct inet6_timewait_sock);
}
static inline struct inet6_timewait_sock *inet6_twsk(const struct sock *sk)
{
return (struct inet6_timewait_sock *)(((u8 *)sk) +
inet_twsk(sk)->tw_ipv6_offset);
}
static inline struct in6_addr *__inet6_rcv_saddr(const struct sock *sk)
{
return likely(sk->sk_state != TCP_TIME_WAIT) ?
&inet6_sk(sk)->rcv_saddr : &inet6_twsk(sk)->tw_v6_rcv_saddr;
}
static inline struct in6_addr *inet6_rcv_saddr(const struct sock *sk)
{
return sk->sk_family == AF_INET6 ? __inet6_rcv_saddr(sk) : NULL;
}
static inline int inet_v6_ipv6only(const struct sock *sk)
{
return likely(sk->sk_state != TCP_TIME_WAIT) ?
ipv6_only_sock(sk) : inet_twsk(sk)->tw_ipv6only;
}
#else
#define __ipv6_only_sock(sk) 0
#define ipv6_only_sock(sk) 0
static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
{
return NULL;
}
static inline struct inet6_request_sock *
inet6_rsk(const struct request_sock *rsk)
{
return NULL;
}
static inline struct raw6_sock *raw6_sk(const struct sock *sk)
{
return NULL;
}
#define __inet6_rcv_saddr(__sk) NULL
#define inet6_rcv_saddr(__sk) NULL
#define tcp_twsk_ipv6only(__sk) 0
#define inet_v6_ipv6only(__sk) 0
#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
[INET]: speedup inet (tcp/dccp) lookups Arnaldo and I agreed it could be applied now, because I have other pending patches depending on this one (Thank you Arnaldo) (The other important patch moves skc_refcnt in a separate cache line, so that the SMP/NUMA performance doesnt suffer from cache line ping pongs) 1) First some performance data : -------------------------------- tcp_v4_rcv() wastes a *lot* of time in __inet_lookup_established() The most time critical code is : sk_for_each(sk, node, &head->chain) { if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } The sk_for_each() does use prefetch() hints but only the begining of "struct sock" is prefetched. As INET_MATCH first comparison uses inet_sk(__sk)->daddr, wich is far away from the begining of "struct sock", it has to bring into CPU cache cold cache line. Each iteration has to use at least 2 cache lines. This can be problematic if some chains are very long. 2) The goal ----------- The idea I had is to change things so that INET_MATCH() may return FALSE in 99% of cases only using the data already in the CPU cache, using one cache line per iteration. 3) Description of the patch --------------------------- Adds a new 'unsigned int skc_hash' field in 'struct sock_common', filling a 32 bits hole on 64 bits platform. struct sock_common { unsigned short skc_family; volatile unsigned char skc_state; unsigned char skc_reuse; int skc_bound_dev_if; struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + unsigned int skc_hash; struct proto *skc_prot; }; Store in this 32 bits field the full hash, not masked by (ehash_size - 1) Using this full hash as the first comparison done in INET_MATCH permits us immediatly skip the element without touching a second cache line in case of a miss. Suppress the sk_hashent/tw_hashent fields since skc_hash (aliased to sk_hash and tw_hash) already contains the slot number if we mask with (ehash_size - 1) File include/net/inet_hashtables.h 64 bits platforms : #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) ((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) 32bits platforms: #define TCP_IPV4_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) - Adds a prefetch(head->chain.first) in __inet_lookup_established()/__tcp_v4_check_established() and __inet6_lookup_established()/__tcp_v6_check_established() and __dccp_v4_check_established() to bring into cache the first element of the list, before the {read|write}_lock(&head->lock); Signed-off-by: Eric Dumazet <dada1@cosmosbay.com> Acked-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net> Signed-off-by: David S. Miller <davem@davemloft.net>
2005-10-03 14:13:38 -07:00
#define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\
(((__sk)->sk_hash == (__hash)) && \
((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
((__sk)->sk_family == AF_INET6) && \
ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \
ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
#endif /* __KERNEL__ */
#endif /* _IPV6_H */