linux/net/atm/clip.c

992 lines
24 KiB
C
Raw Normal View History

/* net/atm/clip.c - RFC1577 Classical IP over ATM */
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
#define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/kernel.h> /* for UINT_MAX */
#include <linux/module.h>
#include <linux/init.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/wait.h>
#include <linux/timer.h>
#include <linux/if_arp.h> /* for some manifest constants */
#include <linux/notifier.h>
#include <linux/atm.h>
#include <linux/atmdev.h>
#include <linux/atmclip.h>
#include <linux/atmarp.h>
#include <linux/capability.h>
#include <linux/ip.h> /* for net/route.h */
#include <linux/in.h> /* for struct sockaddr_in */
#include <linux/if.h> /* for IFF_UP */
#include <linux/inetdevice.h>
#include <linux/bitops.h>
#include <linux/poison.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
#include <linux/jhash.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 08:04:11 +00:00
#include <linux/slab.h>
#include <net/route.h> /* for struct rtable and routing */
#include <net/icmp.h> /* icmp_send */
#include <linux/param.h> /* for HZ */
#include <linux/uaccess.h>
#include <asm/byteorder.h> /* for htons etc. */
#include <asm/system.h> /* save/restore_flags */
#include <linux/atomic.h>
#include "common.h"
#include "resources.h"
#include <net/atmclip.h>
static struct net_device *clip_devs;
static struct atm_vcc *atmarpd;
static struct neigh_table clip_tbl;
static struct timer_list idle_timer;
static int to_atmarpd(enum atmarp_ctrl_type type, int itf, __be32 ip)
{
struct sock *sk;
struct atmarp_ctrl *ctrl;
struct sk_buff *skb;
pr_debug("(%d)\n", type);
if (!atmarpd)
return -EUNATCH;
skb = alloc_skb(sizeof(struct atmarp_ctrl), GFP_ATOMIC);
if (!skb)
return -ENOMEM;
ctrl = (struct atmarp_ctrl *)skb_put(skb, sizeof(struct atmarp_ctrl));
ctrl->type = type;
ctrl->itf_num = itf;
ctrl->ip = ip;
atm_force_charge(atmarpd, skb->truesize);
sk = sk_atm(atmarpd);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk, skb->len);
return 0;
}
static void link_vcc(struct clip_vcc *clip_vcc, struct atmarp_entry *entry)
{
pr_debug("%p to entry %p (neigh %p)\n", clip_vcc, entry, entry->neigh);
clip_vcc->entry = entry;
clip_vcc->xoff = 0; /* @@@ may overrun buffer by one packet */
clip_vcc->next = entry->vccs;
entry->vccs = clip_vcc;
entry->neigh->used = jiffies;
}
static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
{
struct atmarp_entry *entry = clip_vcc->entry;
struct clip_vcc **walk;
if (!entry) {
pr_crit("!clip_vcc->entry (clip_vcc %p)\n", clip_vcc);
return;
}
[NET]: Add netif_tx_lock Various drivers use xmit_lock internally to synchronise with their transmission routines. They do so without setting xmit_lock_owner. This is fine as long as netpoll is not in use. With netpoll it is possible for deadlocks to occur if xmit_lock_owner isn't set. This is because if a printk occurs while xmit_lock is held and xmit_lock_owner is not set can cause netpoll to attempt to take xmit_lock recursively. While it is possible to resolve this by getting netpoll to use trylock, it is suboptimal because netpoll's sole objective is to maximise the chance of getting the printk out on the wire. So delaying or dropping the message is to be avoided as much as possible. So the only alternative is to always set xmit_lock_owner. The following patch does this by introducing the netif_tx_lock family of functions that take care of setting/unsetting xmit_lock_owner. I renamed xmit_lock to _xmit_lock to indicate that it should not be used directly. I didn't provide irq versions of the netif_tx_lock functions since xmit_lock is meant to be a BH-disabling lock. This is pretty much a straight text substitution except for a small bug fix in winbond. It currently uses netif_stop_queue/spin_unlock_wait to stop transmission. This is unsafe as an IRQ can potentially wake up the queue. So it is safer to use netif_tx_disable. The hamradio bits used spin_lock_irq but it is unnecessary as xmit_lock must never be taken in an IRQ handler. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 19:20:56 +00:00
netif_tx_lock_bh(entry->neigh->dev); /* block clip_start_xmit() */
entry->neigh->used = jiffies;
for (walk = &entry->vccs; *walk; walk = &(*walk)->next)
if (*walk == clip_vcc) {
int error;
*walk = clip_vcc->next; /* atomic */
clip_vcc->entry = NULL;
if (clip_vcc->xoff)
netif_wake_queue(entry->neigh->dev);
if (entry->vccs)
goto out;
entry->expires = jiffies - 1;
/* force resolution or expiration */
error = neigh_update(entry->neigh, NULL, NUD_NONE,
NEIGH_UPDATE_F_ADMIN);
if (error)
pr_crit("neigh_update failed with %d\n", error);
goto out;
}
pr_crit("ATMARP: failed (entry %p, vcc 0x%p)\n", entry, clip_vcc);
out:
[NET]: Add netif_tx_lock Various drivers use xmit_lock internally to synchronise with their transmission routines. They do so without setting xmit_lock_owner. This is fine as long as netpoll is not in use. With netpoll it is possible for deadlocks to occur if xmit_lock_owner isn't set. This is because if a printk occurs while xmit_lock is held and xmit_lock_owner is not set can cause netpoll to attempt to take xmit_lock recursively. While it is possible to resolve this by getting netpoll to use trylock, it is suboptimal because netpoll's sole objective is to maximise the chance of getting the printk out on the wire. So delaying or dropping the message is to be avoided as much as possible. So the only alternative is to always set xmit_lock_owner. The following patch does this by introducing the netif_tx_lock family of functions that take care of setting/unsetting xmit_lock_owner. I renamed xmit_lock to _xmit_lock to indicate that it should not be used directly. I didn't provide irq versions of the netif_tx_lock functions since xmit_lock is meant to be a BH-disabling lock. This is pretty much a straight text substitution except for a small bug fix in winbond. It currently uses netif_stop_queue/spin_unlock_wait to stop transmission. This is unsafe as an IRQ can potentially wake up the queue. So it is safer to use netif_tx_disable. The hamradio bits used spin_lock_irq but it is unnecessary as xmit_lock must never be taken in an IRQ handler. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-06-09 19:20:56 +00:00
netif_tx_unlock_bh(entry->neigh->dev);
}
/* The neighbour entry n->lock is held. */
static int neigh_check_cb(struct neighbour *n)
{
struct atmarp_entry *entry = NEIGH2ENTRY(n);
struct clip_vcc *cv;
for (cv = entry->vccs; cv; cv = cv->next) {
unsigned long exp = cv->last_use + cv->idle_timeout;
if (cv->idle_timeout && time_after(jiffies, exp)) {
pr_debug("releasing vcc %p->%p of entry %p\n",
cv, cv->vcc, entry);
vcc_release_async(cv->vcc, -ETIMEDOUT);
}
}
if (entry->vccs || time_before(jiffies, entry->expires))
return 0;
if (atomic_read(&n->refcnt) > 1) {
struct sk_buff *skb;
pr_debug("destruction postponed with ref %d\n",
atomic_read(&n->refcnt));
while ((skb = skb_dequeue(&n->arp_queue)) != NULL)
dev_kfree_skb(skb);
return 0;
}
pr_debug("expired neigh %p\n", n);
return 1;
}
static void idle_timer_check(unsigned long dummy)
{
write_lock(&clip_tbl.lock);
__neigh_for_each_release(&clip_tbl, neigh_check_cb);
mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ);
write_unlock(&clip_tbl.lock);
}
static int clip_arp_rcv(struct sk_buff *skb)
{
struct atm_vcc *vcc;
pr_debug("\n");
vcc = ATM_SKB(skb)->vcc;
if (!vcc || !atm_charge(vcc, skb->truesize)) {
dev_kfree_skb_any(skb);
return 0;
}
pr_debug("pushing to %p\n", vcc);
pr_debug("using %p\n", CLIP_VCC(vcc)->old_push);
CLIP_VCC(vcc)->old_push(vcc, skb);
return 0;
}
static const unsigned char llc_oui[] = {
0xaa, /* DSAP: non-ISO */
0xaa, /* SSAP: non-ISO */
0x03, /* Ctrl: Unnumbered Information Command PDU */
0x00, /* OUI: EtherType */
0x00,
0x00
};
static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
{
struct clip_vcc *clip_vcc = CLIP_VCC(vcc);
pr_debug("\n");
if (!clip_devs) {
atm_return(vcc, skb->truesize);
kfree_skb(skb);
return;
}
if (!skb) {
pr_debug("removing VCC %p\n", clip_vcc);
if (clip_vcc->entry)
unlink_clip_vcc(clip_vcc);
clip_vcc->old_push(vcc, NULL); /* pass on the bad news */
kfree(clip_vcc);
return;
}
atm_return(vcc, skb->truesize);
skb->dev = clip_vcc->entry ? clip_vcc->entry->neigh->dev : clip_devs;
/* clip_vcc->entry == NULL if we don't have an IP address yet */
if (!skb->dev) {
dev_kfree_skb_any(skb);
return;
}
ATM_SKB(skb)->vcc = vcc;
skb_reset_mac_header(skb);
if (!clip_vcc->encap ||
skb->len < RFC1483LLC_LEN ||
memcmp(skb->data, llc_oui, sizeof(llc_oui)))
skb->protocol = htons(ETH_P_IP);
else {
skb->protocol = ((__be16 *)skb->data)[3];
skb_pull(skb, RFC1483LLC_LEN);
if (skb->protocol == htons(ETH_P_ARP)) {
skb->dev->stats.rx_packets++;
skb->dev->stats.rx_bytes += skb->len;
clip_arp_rcv(skb);
return;
}
}
clip_vcc->last_use = jiffies;
skb->dev->stats.rx_packets++;
skb->dev->stats.rx_bytes += skb->len;
memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
netif_rx(skb);
}
/*
* Note: these spinlocks _must_not_ block on non-SMP. The only goal is that
* clip_pop is atomic with respect to the critical section in clip_start_xmit.
*/
static void clip_pop(struct atm_vcc *vcc, struct sk_buff *skb)
{
struct clip_vcc *clip_vcc = CLIP_VCC(vcc);
struct net_device *dev = skb->dev;
int old;
unsigned long flags;
pr_debug("(vcc %p)\n", vcc);
clip_vcc->old_pop(vcc, skb);
/* skb->dev == NULL in outbound ARP packets */
if (!dev)
return;
spin_lock_irqsave(&PRIV(dev)->xoff_lock, flags);
if (atm_may_send(vcc, 0)) {
old = xchg(&clip_vcc->xoff, 0);
if (old)
netif_wake_queue(dev);
}
spin_unlock_irqrestore(&PRIV(dev)->xoff_lock, flags);
}
static void clip_neigh_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
pr_debug("(neigh %p, skb %p)\n", neigh, skb);
to_atmarpd(act_need, PRIV(neigh->dev)->number, NEIGH2ENTRY(neigh)->ip);
}
static void clip_neigh_error(struct neighbour *neigh, struct sk_buff *skb)
{
#ifndef CONFIG_ATM_CLIP_NO_ICMP
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
#endif
kfree_skb(skb);
}
static const struct neigh_ops clip_neigh_ops = {
.family = AF_INET,
.solicit = clip_neigh_solicit,
.error_report = clip_neigh_error,
.output = neigh_direct_output,
.connected_output = neigh_direct_output,
};
static int clip_constructor(struct neighbour *neigh)
{
struct atmarp_entry *entry = NEIGH2ENTRY(neigh);
struct net_device *dev = neigh->dev;
struct in_device *in_dev;
struct neigh_parms *parms;
pr_debug("(neigh %p, entry %p)\n", neigh, entry);
neigh->type = inet_addr_type(&init_net, entry->ip);
if (neigh->type != RTN_UNICAST)
return -EINVAL;
rcu_read_lock();
in_dev = __in_dev_get_rcu(dev);
if (!in_dev) {
rcu_read_unlock();
return -EINVAL;
}
parms = in_dev->arp_parms;
__neigh_parms_put(neigh->parms);
neigh->parms = neigh_parms_clone(parms);
rcu_read_unlock();
neigh->ops = &clip_neigh_ops;
neigh->output = neigh->nud_state & NUD_VALID ?
neigh->ops->connected_output : neigh->ops->output;
entry->neigh = neigh;
entry->vccs = NULL;
entry->expires = jiffies - 1;
return 0;
}
static u32 clip_hash(const void *pkey, const struct net_device *dev, __u32 rnd)
{
return jhash_2words(*(u32 *) pkey, dev->ifindex, rnd);
}
static struct neigh_table clip_tbl = {
.family = AF_INET,
.entry_size = sizeof(struct neighbour)+sizeof(struct atmarp_entry),
.key_len = 4,
.hash = clip_hash,
.constructor = clip_constructor,
.id = "clip_arp_cache",
/* parameters are copied from ARP ... */
.parms = {
.tbl = &clip_tbl,
.base_reachable_time = 30 * HZ,
.retrans_time = 1 * HZ,
.gc_staletime = 60 * HZ,
.reachable_time = 30 * HZ,
.delay_probe_time = 5 * HZ,
neigh: new unresolved queue limits Le mercredi 09 novembre 2011 à 16:21 -0500, David Miller a écrit : > From: David Miller <davem@davemloft.net> > Date: Wed, 09 Nov 2011 16:16:44 -0500 (EST) > > > From: Eric Dumazet <eric.dumazet@gmail.com> > > Date: Wed, 09 Nov 2011 12:14:09 +0100 > > > >> unres_qlen is the number of frames we are able to queue per unresolved > >> neighbour. Its default value (3) was never changed and is responsible > >> for strange drops, especially if IP fragments are used, or multiple > >> sessions start in parallel. Even a single tcp flow can hit this limit. > > ... > > > > Ok, I've applied this, let's see what happens :-) > > Early answer, build fails. > > Please test build this patch with DECNET enabled and resubmit. The > decnet neigh layer still refers to the removed ->queue_len member. > > Thanks. Ouch, this was fixed on one machine yesterday, but not the other one I used this morning, sorry. [PATCH V5 net-next] neigh: new unresolved queue limits unres_qlen is the number of frames we are able to queue per unresolved neighbour. Its default value (3) was never changed and is responsible for strange drops, especially if IP fragments are used, or multiple sessions start in parallel. Even a single tcp flow can hit this limit. $ arp -d 192.168.20.108 ; ping -c 2 -s 8000 192.168.20.108 PING 192.168.20.108 (192.168.20.108) 8000(8028) bytes of data. 8008 bytes from 192.168.20.108: icmp_seq=2 ttl=64 time=0.322 ms Signed-off-by: David S. Miller <davem@davemloft.net>
2011-11-09 12:07:14 +00:00
.queue_len_bytes = 64 * 1024,
.ucast_probes = 3,
.mcast_probes = 3,
.anycast_delay = 1 * HZ,
.proxy_delay = (8 * HZ) / 10,
.proxy_qlen = 64,
.locktime = 1 * HZ,
},
.gc_interval = 30 * HZ,
.gc_thresh1 = 128,
.gc_thresh2 = 512,
.gc_thresh3 = 1024,
};
/* @@@ copy bh locking from arp.c -- need to bh-enable atm code before */
/*
* We play with the resolve flag: 0 and 1 have the usual meaning, but -1 means
* to allocate the neighbour entry but not to ask atmarpd for resolution. Also,
* don't increment the usage count. This is used to create entries in
* clip_setentry.
*/
static int clip_encap(struct atm_vcc *vcc, int mode)
{
CLIP_VCC(vcc)->encap = mode;
return 0;
}
static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct clip_priv *clip_priv = PRIV(dev);
struct dst_entry *dst = skb_dst(skb);
struct atmarp_entry *entry;
struct neighbour *n;
struct atm_vcc *vcc;
int old;
unsigned long flags;
pr_debug("(skb %p)\n", skb);
if (!dst) {
pr_err("skb_dst(skb) == NULL\n");
dev_kfree_skb(skb);
dev->stats.tx_dropped++;
return NETDEV_TX_OK;
}
n = dst_get_neighbour(dst);
if (!n) {
#if 0
n = clip_find_neighbour(skb_dst(skb), 1);
if (!n) {
dev_kfree_skb(skb); /* lost that one */
dev->stats.tx_dropped++;
return 0;
}
dst_set_neighbour(dst, n);
#endif
pr_err("NO NEIGHBOUR !\n");
dev_kfree_skb(skb);
dev->stats.tx_dropped++;
return NETDEV_TX_OK;
}
entry = NEIGH2ENTRY(n);
if (!entry->vccs) {
if (time_after(jiffies, entry->expires)) {
/* should be resolved */
entry->expires = jiffies + ATMARP_RETRY_DELAY * HZ;
to_atmarpd(act_need, PRIV(dev)->number, entry->ip);
}
if (entry->neigh->arp_queue.qlen < ATMARP_MAX_UNRES_PACKETS)
skb_queue_tail(&entry->neigh->arp_queue, skb);
else {
dev_kfree_skb(skb);
dev->stats.tx_dropped++;
}
return NETDEV_TX_OK;
}
pr_debug("neigh %p, vccs %p\n", entry, entry->vccs);
ATM_SKB(skb)->vcc = vcc = entry->vccs->vcc;
pr_debug("using neighbour %p, vcc %p\n", n, vcc);
if (entry->vccs->encap) {
void *here;
here = skb_push(skb, RFC1483LLC_LEN);
memcpy(here, llc_oui, sizeof(llc_oui));
((__be16 *) here)[3] = skb->protocol;
}
atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
ATM_SKB(skb)->atm_options = vcc->atm_options;
entry->vccs->last_use = jiffies;
pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev);
old = xchg(&entry->vccs->xoff, 1); /* assume XOFF ... */
if (old) {
pr_warning("XOFF->XOFF transition\n");
return NETDEV_TX_OK;
}
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
vcc->send(vcc, skb);
if (atm_may_send(vcc, 0)) {
entry->vccs->xoff = 0;
return NETDEV_TX_OK;
}
spin_lock_irqsave(&clip_priv->xoff_lock, flags);
netif_stop_queue(dev); /* XOFF -> throttle immediately */
barrier();
if (!entry->vccs->xoff)
netif_start_queue(dev);
/* Oh, we just raced with clip_pop. netif_start_queue should be
good enough, because nothing should really be asleep because
of the brief netif_stop_queue. If this isn't true or if it
changes, use netif_wake_queue instead. */
spin_unlock_irqrestore(&clip_priv->xoff_lock, flags);
return NETDEV_TX_OK;
}
static int clip_mkip(struct atm_vcc *vcc, int timeout)
{
struct clip_vcc *clip_vcc;
if (!vcc->push)
return -EBADFD;
clip_vcc = kmalloc(sizeof(struct clip_vcc), GFP_KERNEL);
if (!clip_vcc)
return -ENOMEM;
pr_debug("%p vcc %p\n", clip_vcc, vcc);
clip_vcc->vcc = vcc;
vcc->user_back = clip_vcc;
set_bit(ATM_VF_IS_CLIP, &vcc->flags);
clip_vcc->entry = NULL;
clip_vcc->xoff = 0;
clip_vcc->encap = 1;
clip_vcc->last_use = jiffies;
clip_vcc->idle_timeout = timeout * HZ;
clip_vcc->old_push = vcc->push;
clip_vcc->old_pop = vcc->pop;
vcc->push = clip_push;
vcc->pop = clip_pop;
/* re-process everything received between connection setup and MKIP */
vcc_process_recv_queue(vcc);
return 0;
}
static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
{
struct neighbour *neigh;
struct atmarp_entry *entry;
int error;
struct clip_vcc *clip_vcc;
struct rtable *rt;
if (vcc->push != clip_push) {
pr_warning("non-CLIP VCC\n");
return -EBADF;
}
clip_vcc = CLIP_VCC(vcc);
if (!ip) {
if (!clip_vcc->entry) {
pr_err("hiding hidden ATMARP entry\n");
return 0;
}
pr_debug("remove\n");
unlink_clip_vcc(clip_vcc);
return 0;
}
rt = ip_route_output(&init_net, ip, 0, 1, 0);
if (IS_ERR(rt))
return PTR_ERR(rt);
neigh = __neigh_lookup(&clip_tbl, &ip, rt->dst.dev, 1);
ip_rt_put(rt);
if (!neigh)
return -ENOMEM;
entry = NEIGH2ENTRY(neigh);
if (entry != clip_vcc->entry) {
if (!clip_vcc->entry)
pr_debug("add\n");
else {
pr_debug("update\n");
unlink_clip_vcc(clip_vcc);
}
link_vcc(clip_vcc, entry);
}
error = neigh_update(neigh, llc_oui, NUD_PERMANENT,
NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN);
neigh_release(neigh);
return error;
}
static const struct net_device_ops clip_netdev_ops = {
.ndo_start_xmit = clip_start_xmit,
};
static void clip_setup(struct net_device *dev)
{
dev->netdev_ops = &clip_netdev_ops;
dev->type = ARPHRD_ATM;
dev->hard_header_len = RFC1483LLC_LEN;
dev->mtu = RFC1626_MTU;
dev->tx_queue_len = 100; /* "normal" queue (packets) */
/* When using a "real" qdisc, the qdisc determines the queue */
/* length. tx_queue_len is only used for the default case, */
/* without any more elaborate queuing. 100 is a reasonable */
/* compromise between decent burst-tolerance and protection */
/* against memory hogs. */
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
}
static int clip_create(int number)
{
struct net_device *dev;
struct clip_priv *clip_priv;
int error;
if (number != -1) {
for (dev = clip_devs; dev; dev = PRIV(dev)->next)
if (PRIV(dev)->number == number)
return -EEXIST;
} else {
number = 0;
for (dev = clip_devs; dev; dev = PRIV(dev)->next)
if (PRIV(dev)->number >= number)
number = PRIV(dev)->number + 1;
}
dev = alloc_netdev(sizeof(struct clip_priv), "", clip_setup);
if (!dev)
return -ENOMEM;
clip_priv = PRIV(dev);
sprintf(dev->name, "atm%d", number);
spin_lock_init(&clip_priv->xoff_lock);
clip_priv->number = number;
error = register_netdev(dev);
if (error) {
free_netdev(dev);
return error;
}
clip_priv->next = clip_devs;
clip_devs = dev;
pr_debug("registered (net:%s)\n", dev->name);
return number;
}
static int clip_device_event(struct notifier_block *this, unsigned long event,
void *arg)
{
struct net_device *dev = arg;
if (!net_eq(dev_net(dev), &init_net))
return NOTIFY_DONE;
if (event == NETDEV_UNREGISTER) {
neigh_ifdown(&clip_tbl, dev);
return NOTIFY_DONE;
}
/* ignore non-CLIP devices */
if (dev->type != ARPHRD_ATM || dev->netdev_ops != &clip_netdev_ops)
return NOTIFY_DONE;
switch (event) {
case NETDEV_UP:
pr_debug("NETDEV_UP\n");
to_atmarpd(act_up, PRIV(dev)->number, 0);
break;
case NETDEV_GOING_DOWN:
pr_debug("NETDEV_DOWN\n");
to_atmarpd(act_down, PRIV(dev)->number, 0);
break;
case NETDEV_CHANGE:
case NETDEV_CHANGEMTU:
pr_debug("NETDEV_CHANGE*\n");
to_atmarpd(act_change, PRIV(dev)->number, 0);
break;
}
return NOTIFY_DONE;
}
static int clip_inet_event(struct notifier_block *this, unsigned long event,
void *ifa)
{
struct in_device *in_dev;
in_dev = ((struct in_ifaddr *)ifa)->ifa_dev;
/*
* Transitions are of the down-change-up type, so it's sufficient to
* handle the change on up.
*/
if (event != NETDEV_UP)
return NOTIFY_DONE;
return clip_device_event(this, NETDEV_CHANGE, in_dev->dev);
}
static struct notifier_block clip_dev_notifier = {
.notifier_call = clip_device_event,
};
static struct notifier_block clip_inet_notifier = {
.notifier_call = clip_inet_event,
};
static void atmarpd_close(struct atm_vcc *vcc)
{
pr_debug("\n");
rtnl_lock();
atmarpd = NULL;
skb_queue_purge(&sk_atm(vcc)->sk_receive_queue);
rtnl_unlock();
pr_debug("(done)\n");
module_put(THIS_MODULE);
}
static struct atmdev_ops atmarpd_dev_ops = {
.close = atmarpd_close
};
static struct atm_dev atmarpd_dev = {
.ops = &atmarpd_dev_ops,
.type = "arpd",
.number = 999,
.lock = __SPIN_LOCK_UNLOCKED(atmarpd_dev.lock)
};
static int atm_init_atmarp(struct atm_vcc *vcc)
{
rtnl_lock();
if (atmarpd) {
rtnl_unlock();
return -EADDRINUSE;
}
mod_timer(&idle_timer, jiffies + CLIP_CHECK_INTERVAL * HZ);
atmarpd = vcc;
set_bit(ATM_VF_META, &vcc->flags);
set_bit(ATM_VF_READY, &vcc->flags);
/* allow replies and avoid getting closed if signaling dies */
vcc->dev = &atmarpd_dev;
vcc_insert_socket(sk_atm(vcc));
vcc->push = NULL;
vcc->pop = NULL; /* crash */
vcc->push_oam = NULL; /* crash */
rtnl_unlock();
return 0;
}
static int clip_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct atm_vcc *vcc = ATM_SD(sock);
int err = 0;
switch (cmd) {
case SIOCMKCLIP:
case ATMARPD_CTRL:
case ATMARP_MKIP:
case ATMARP_SETENTRY:
case ATMARP_ENCAP:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
break;
default:
return -ENOIOCTLCMD;
}
switch (cmd) {
case SIOCMKCLIP:
err = clip_create(arg);
break;
case ATMARPD_CTRL:
err = atm_init_atmarp(vcc);
if (!err) {
sock->state = SS_CONNECTED;
__module_get(THIS_MODULE);
}
break;
case ATMARP_MKIP:
err = clip_mkip(vcc, arg);
break;
case ATMARP_SETENTRY:
err = clip_setentry(vcc, (__force __be32)arg);
break;
case ATMARP_ENCAP:
err = clip_encap(vcc, arg);
break;
}
return err;
}
static struct atm_ioctl clip_ioctl_ops = {
.owner = THIS_MODULE,
.ioctl = clip_ioctl,
};
#ifdef CONFIG_PROC_FS
static void svc_addr(struct seq_file *seq, struct sockaddr_atmsvc *addr)
{
static int code[] = { 1, 2, 10, 6, 1, 0 };
static int e164[] = { 1, 8, 4, 6, 1, 0 };
if (*addr->sas_addr.pub) {
seq_printf(seq, "%s", addr->sas_addr.pub);
if (*addr->sas_addr.prv)
seq_putc(seq, '+');
} else if (!*addr->sas_addr.prv) {
seq_printf(seq, "%s", "(none)");
return;
}
if (*addr->sas_addr.prv) {
unsigned char *prv = addr->sas_addr.prv;
int *fields;
int i, j;
fields = *prv == ATM_AFI_E164 ? e164 : code;
for (i = 0; fields[i]; i++) {
for (j = fields[i]; j; j--)
seq_printf(seq, "%02X", *prv++);
if (fields[i + 1])
seq_putc(seq, '.');
}
}
}
/* This means the neighbour entry has no attached VCC objects. */
#define SEQ_NO_VCC_TOKEN ((void *) 2)
static void atmarp_info(struct seq_file *seq, struct net_device *dev,
struct atmarp_entry *entry, struct clip_vcc *clip_vcc)
{
unsigned long exp;
char buf[17];
int svc, llc, off;
svc = ((clip_vcc == SEQ_NO_VCC_TOKEN) ||
(sk_atm(clip_vcc->vcc)->sk_family == AF_ATMSVC));
llc = ((clip_vcc == SEQ_NO_VCC_TOKEN) || clip_vcc->encap);
if (clip_vcc == SEQ_NO_VCC_TOKEN)
exp = entry->neigh->used;
else
exp = clip_vcc->last_use;
exp = (jiffies - exp) / HZ;
seq_printf(seq, "%-6s%-4s%-4s%5ld ",
dev->name, svc ? "SVC" : "PVC", llc ? "LLC" : "NULL", exp);
off = scnprintf(buf, sizeof(buf) - 1, "%pI4",
&entry->ip);
while (off < 16)
buf[off++] = ' ';
buf[off] = '\0';
seq_printf(seq, "%s", buf);
if (clip_vcc == SEQ_NO_VCC_TOKEN) {
if (time_before(jiffies, entry->expires))
seq_printf(seq, "(resolving)\n");
else
seq_printf(seq, "(expired, ref %d)\n",
atomic_read(&entry->neigh->refcnt));
} else if (!svc) {
seq_printf(seq, "%d.%d.%d\n",
clip_vcc->vcc->dev->number,
clip_vcc->vcc->vpi, clip_vcc->vcc->vci);
} else {
svc_addr(seq, &clip_vcc->vcc->remote);
seq_putc(seq, '\n');
}
}
struct clip_seq_state {
/* This member must be first. */
struct neigh_seq_state ns;
/* Local to clip specific iteration. */
struct clip_vcc *vcc;
};
static struct clip_vcc *clip_seq_next_vcc(struct atmarp_entry *e,
struct clip_vcc *curr)
{
if (!curr) {
curr = e->vccs;
if (!curr)
return SEQ_NO_VCC_TOKEN;
return curr;
}
if (curr == SEQ_NO_VCC_TOKEN)
return NULL;
curr = curr->next;
return curr;
}
static void *clip_seq_vcc_walk(struct clip_seq_state *state,
struct atmarp_entry *e, loff_t * pos)
{
struct clip_vcc *vcc = state->vcc;
vcc = clip_seq_next_vcc(e, vcc);
if (vcc && pos != NULL) {
while (*pos) {
vcc = clip_seq_next_vcc(e, vcc);
if (!vcc)
break;
--(*pos);
}
}
state->vcc = vcc;
return vcc;
}
static void *clip_seq_sub_iter(struct neigh_seq_state *_state,
struct neighbour *n, loff_t * pos)
{
struct clip_seq_state *state = (struct clip_seq_state *)_state;
return clip_seq_vcc_walk(state, NEIGH2ENTRY(n), pos);
}
static void *clip_seq_start(struct seq_file *seq, loff_t * pos)
{
struct clip_seq_state *state = seq->private;
state->ns.neigh_sub_iter = clip_seq_sub_iter;
return neigh_seq_start(seq, pos, &clip_tbl, NEIGH_SEQ_NEIGH_ONLY);
}
static int clip_seq_show(struct seq_file *seq, void *v)
{
static char atm_arp_banner[] =
"IPitf TypeEncp Idle IP address ATM address\n";
if (v == SEQ_START_TOKEN) {
seq_puts(seq, atm_arp_banner);
} else {
struct clip_seq_state *state = seq->private;
struct neighbour *n = v;
struct clip_vcc *vcc = state->vcc;
atmarp_info(seq, n->dev, NEIGH2ENTRY(n), vcc);
}
return 0;
}
static const struct seq_operations arp_seq_ops = {
.start = clip_seq_start,
.next = neigh_seq_next,
.stop = neigh_seq_stop,
.show = clip_seq_show,
};
static int arp_seq_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &arp_seq_ops,
sizeof(struct clip_seq_state));
[NETNS]: Modify the neighbour table code so it handles multiple network namespaces I'm actually surprised at how much was involved. At first glance it appears that the neighbour table data structures are already split by network device so all that should be needed is to modify the user interface commands to filter the set of neighbours by the network namespace of their devices. However a couple things turned up while I was reading through the code. The proxy neighbour table allows entries with no network device, and the neighbour parms are per network device (except for the defaults) so they now need a per network namespace default. So I updated the two structures (which surprised me) with their very own network namespace parameter. Updated the relevant lookup and destroy routines with a network namespace parameter and modified the code that interacts with users to filter out neighbour table entries for devices of other namespaces. I'm a little concerned that we can modify and display the global table configuration and from all network namespaces. But this appears good enough for now. I keep thinking modifying the neighbour table to have per network namespace instances of each table type would should be cleaner. The hash table is already dynamically sized so there are it is not a limiter. The default parameter would be straight forward to take care of. However when I look at the how the network table is built and used I still find some assumptions that there is only a single neighbour table for each type of table in the kernel. The netlink operations, neigh_seq_start, the non-core network users that call neigh_lookup. So while it might be doable it would require more refactoring than my current approach of just doing a little extra filtering in the code. Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2008-01-24 08:13:18 +00:00
}
static const struct file_operations arp_seq_fops = {
.open = arp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_net,
.owner = THIS_MODULE
};
#endif
static void atm_clip_exit_noproc(void);
static int __init atm_clip_init(void)
{
[NEIGH]: Fix IP-over-ATM and ARP interaction. The classical IP over ATM code maintains its own IPv4 <-> <ATM stuff> ARP table, using the standard neighbour-table code. The neigh_table_init function adds this neighbour table to a linked list of all neighbor tables which is used by the functions neigh_delete() neigh_add() and neightbl_set(), all called by the netlink code. Once the ATM neighbour table is added to the list, there are two tables with family == AF_INET there, and ARP entries sent via netlink go into the first table with matching family. This is indeterminate and often wrong. To see the bug, on a kernel with CLIP enabled, create a standard IPv4 ARP entry by pinging an unused address on a local subnet. Then attempt to complete that entry by doing ip neigh replace <ip address> lladdr <some mac address> nud reachable Looking at the ARP tables by using ip neigh show will reveal two ARP entries for the same address. One of these can be found in /proc/net/arp, and the other in /proc/net/atm/arp. This patch adds a new function, neigh_table_init_no_netlink() which does everything the neigh_table_init() does, except add the table to the netlink all-arp-tables chain. In addition neigh_table_init() has a check that all tables on the chain have a distinct address family. The init call in clip.c is changed to call neigh_table_init_no_netlink(). Since ATM ARP tables are rather more complicated than can currently be handled by the available rtattrs in the netlink protocol, no functionality is lost by this patch, and non-ATM ARP manipulation via netlink is rescued. A more complete solution would involve a rtattr for ATM ARP entries and some way for the netlink code to give neigh_add and friends more information than just address family with which to find the correct ARP table. [ I've changed the assertion checking in neigh_table_init() to not use BUG_ON() while holding neigh_tbl_lock. Instead we remember that we found an existing tbl with the same family, and after dropping the lock we'll give a diagnostic kernel log message and a stack dump. -DaveM ] Signed-off-by: Simon Kelley <simon@thekelleys.org.uk> Signed-off-by: David S. Miller <davem@davemloft.net>
2006-05-12 21:56:08 +00:00
neigh_table_init_no_netlink(&clip_tbl);
clip_tbl_hook = &clip_tbl;
register_atm_ioctl(&clip_ioctl_ops);
register_netdevice_notifier(&clip_dev_notifier);
register_inetaddr_notifier(&clip_inet_notifier);
setup_timer(&idle_timer, idle_timer_check, 0);
#ifdef CONFIG_PROC_FS
{
struct proc_dir_entry *p;
p = proc_create("arp", S_IRUGO, atm_proc_root, &arp_seq_fops);
if (!p) {
pr_err("Unable to initialize /proc/net/atm/arp\n");
atm_clip_exit_noproc();
return -ENOMEM;
}
}
#endif
return 0;
}
static void atm_clip_exit_noproc(void)
{
struct net_device *dev, *next;
unregister_inetaddr_notifier(&clip_inet_notifier);
unregister_netdevice_notifier(&clip_dev_notifier);
deregister_atm_ioctl(&clip_ioctl_ops);
/* First, stop the idle timer, so it stops banging
* on the table.
*/
del_timer_sync(&idle_timer);
/* Next, purge the table, so that the device
* unregister loop below does not hang due to
* device references remaining in the table.
*/
neigh_ifdown(&clip_tbl, NULL);
dev = clip_devs;
while (dev) {
next = PRIV(dev)->next;
unregister_netdev(dev);
free_netdev(dev);
dev = next;
}
/* Now it is safe to fully shutdown whole table. */
neigh_table_clear(&clip_tbl);
clip_tbl_hook = NULL;
}
static void __exit atm_clip_exit(void)
{
remove_proc_entry("arp", atm_proc_root);
atm_clip_exit_noproc();
}
module_init(atm_clip_init);
module_exit(atm_clip_exit);
MODULE_AUTHOR("Werner Almesberger");
MODULE_DESCRIPTION("Classical/IP over ATM interface");
MODULE_LICENSE("GPL");