linux-next/net/sunrpc/svcauth_unix.c
J.Bruce Fields e0bb89ef03 [PATCH] knfsd: nfsd: don't drop silently on upcall deferral
To avoid tying up server threads when nfsd makes an upcall (to mountd, to get
export options, to idmapd, for nfsv4 name<->id mapping, etc.), we temporarily
"drop" the request and save enough information so that we can revisit it
later.

Certain failures during the deferral process can cause us to really drop the
request and never revisit it.

This is often less than ideal, and is unacceptable in the NFSv4 case--rfc 3530
forbids the server from dropping a request without also closing the
connection.

As a first step, we modify the deferral code to return -ETIMEDOUT (which is
translated to nfserr_jukebox in the v3 and v4 cases, and remains a drop in the
v2 case).

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2006-12-13 09:05:54 -08:00

589 lines
13 KiB
C

#include <linux/types.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/svcauth.h>
#include <linux/err.h>
#include <linux/seq_file.h>
#include <linux/hash.h>
#include <linux/string.h>
#include <net/sock.h>
#define RPCDBG_FACILITY RPCDBG_AUTH
/*
* AUTHUNIX and AUTHNULL credentials are both handled here.
* AUTHNULL is treated just like AUTHUNIX except that the uid/gid
* are always nobody (-2). i.e. we do the same IP address checks for
* AUTHNULL as for AUTHUNIX, and that is done here.
*/
struct unix_domain {
struct auth_domain h;
int addr_changes;
/* other stuff later */
};
extern struct auth_ops svcauth_unix;
struct auth_domain *unix_domain_find(char *name)
{
struct auth_domain *rv;
struct unix_domain *new = NULL;
rv = auth_domain_lookup(name, NULL);
while(1) {
if (rv) {
if (new && rv != &new->h)
auth_domain_put(&new->h);
if (rv->flavour != &svcauth_unix) {
auth_domain_put(rv);
return NULL;
}
return rv;
}
new = kmalloc(sizeof(*new), GFP_KERNEL);
if (new == NULL)
return NULL;
kref_init(&new->h.ref);
new->h.name = kstrdup(name, GFP_KERNEL);
new->h.flavour = &svcauth_unix;
new->addr_changes = 0;
rv = auth_domain_lookup(name, &new->h);
}
}
static void svcauth_unix_domain_release(struct auth_domain *dom)
{
struct unix_domain *ud = container_of(dom, struct unix_domain, h);
kfree(dom->name);
kfree(ud);
}
/**************************************************
* cache for IP address to unix_domain
* as needed by AUTH_UNIX
*/
#define IP_HASHBITS 8
#define IP_HASHMAX (1<<IP_HASHBITS)
#define IP_HASHMASK (IP_HASHMAX-1)
struct ip_map {
struct cache_head h;
char m_class[8]; /* e.g. "nfsd" */
struct in_addr m_addr;
struct unix_domain *m_client;
int m_add_change;
};
static struct cache_head *ip_table[IP_HASHMAX];
static void ip_map_put(struct kref *kref)
{
struct cache_head *item = container_of(kref, struct cache_head, ref);
struct ip_map *im = container_of(item, struct ip_map,h);
if (test_bit(CACHE_VALID, &item->flags) &&
!test_bit(CACHE_NEGATIVE, &item->flags))
auth_domain_put(&im->m_client->h);
kfree(im);
}
#if IP_HASHBITS == 8
/* hash_long on a 64 bit machine is currently REALLY BAD for
* IP addresses in reverse-endian (i.e. on a little-endian machine).
* So use a trivial but reliable hash instead
*/
static inline int hash_ip(__be32 ip)
{
int hash = (__force u32)ip ^ ((__force u32)ip>>16);
return (hash ^ (hash>>8)) & 0xff;
}
#endif
static int ip_map_match(struct cache_head *corig, struct cache_head *cnew)
{
struct ip_map *orig = container_of(corig, struct ip_map, h);
struct ip_map *new = container_of(cnew, struct ip_map, h);
return strcmp(orig->m_class, new->m_class) == 0
&& orig->m_addr.s_addr == new->m_addr.s_addr;
}
static void ip_map_init(struct cache_head *cnew, struct cache_head *citem)
{
struct ip_map *new = container_of(cnew, struct ip_map, h);
struct ip_map *item = container_of(citem, struct ip_map, h);
strcpy(new->m_class, item->m_class);
new->m_addr.s_addr = item->m_addr.s_addr;
}
static void update(struct cache_head *cnew, struct cache_head *citem)
{
struct ip_map *new = container_of(cnew, struct ip_map, h);
struct ip_map *item = container_of(citem, struct ip_map, h);
kref_get(&item->m_client->h.ref);
new->m_client = item->m_client;
new->m_add_change = item->m_add_change;
}
static struct cache_head *ip_map_alloc(void)
{
struct ip_map *i = kmalloc(sizeof(*i), GFP_KERNEL);
if (i)
return &i->h;
else
return NULL;
}
static void ip_map_request(struct cache_detail *cd,
struct cache_head *h,
char **bpp, int *blen)
{
char text_addr[20];
struct ip_map *im = container_of(h, struct ip_map, h);
__be32 addr = im->m_addr.s_addr;
snprintf(text_addr, 20, "%u.%u.%u.%u",
ntohl(addr) >> 24 & 0xff,
ntohl(addr) >> 16 & 0xff,
ntohl(addr) >> 8 & 0xff,
ntohl(addr) >> 0 & 0xff);
qword_add(bpp, blen, im->m_class);
qword_add(bpp, blen, text_addr);
(*bpp)[-1] = '\n';
}
static struct ip_map *ip_map_lookup(char *class, struct in_addr addr);
static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry);
static int ip_map_parse(struct cache_detail *cd,
char *mesg, int mlen)
{
/* class ipaddress [domainname] */
/* should be safe just to use the start of the input buffer
* for scratch: */
char *buf = mesg;
int len;
int b1,b2,b3,b4;
char c;
char class[8];
struct in_addr addr;
int err;
struct ip_map *ipmp;
struct auth_domain *dom;
time_t expiry;
if (mesg[mlen-1] != '\n')
return -EINVAL;
mesg[mlen-1] = 0;
/* class */
len = qword_get(&mesg, class, sizeof(class));
if (len <= 0) return -EINVAL;
/* ip address */
len = qword_get(&mesg, buf, mlen);
if (len <= 0) return -EINVAL;
if (sscanf(buf, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4)
return -EINVAL;
expiry = get_expiry(&mesg);
if (expiry ==0)
return -EINVAL;
/* domainname, or empty for NEGATIVE */
len = qword_get(&mesg, buf, mlen);
if (len < 0) return -EINVAL;
if (len) {
dom = unix_domain_find(buf);
if (dom == NULL)
return -ENOENT;
} else
dom = NULL;
addr.s_addr =
htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
ipmp = ip_map_lookup(class,addr);
if (ipmp) {
err = ip_map_update(ipmp,
container_of(dom, struct unix_domain, h),
expiry);
} else
err = -ENOMEM;
if (dom)
auth_domain_put(dom);
cache_flush();
return err;
}
static int ip_map_show(struct seq_file *m,
struct cache_detail *cd,
struct cache_head *h)
{
struct ip_map *im;
struct in_addr addr;
char *dom = "-no-domain-";
if (h == NULL) {
seq_puts(m, "#class IP domain\n");
return 0;
}
im = container_of(h, struct ip_map, h);
/* class addr domain */
addr = im->m_addr;
if (test_bit(CACHE_VALID, &h->flags) &&
!test_bit(CACHE_NEGATIVE, &h->flags))
dom = im->m_client->h.name;
seq_printf(m, "%s %d.%d.%d.%d %s\n",
im->m_class,
ntohl(addr.s_addr) >> 24 & 0xff,
ntohl(addr.s_addr) >> 16 & 0xff,
ntohl(addr.s_addr) >> 8 & 0xff,
ntohl(addr.s_addr) >> 0 & 0xff,
dom
);
return 0;
}
struct cache_detail ip_map_cache = {
.owner = THIS_MODULE,
.hash_size = IP_HASHMAX,
.hash_table = ip_table,
.name = "auth.unix.ip",
.cache_put = ip_map_put,
.cache_request = ip_map_request,
.cache_parse = ip_map_parse,
.cache_show = ip_map_show,
.match = ip_map_match,
.init = ip_map_init,
.update = update,
.alloc = ip_map_alloc,
};
static struct ip_map *ip_map_lookup(char *class, struct in_addr addr)
{
struct ip_map ip;
struct cache_head *ch;
strcpy(ip.m_class, class);
ip.m_addr = addr;
ch = sunrpc_cache_lookup(&ip_map_cache, &ip.h,
hash_str(class, IP_HASHBITS) ^
hash_ip(addr.s_addr));
if (ch)
return container_of(ch, struct ip_map, h);
else
return NULL;
}
static int ip_map_update(struct ip_map *ipm, struct unix_domain *udom, time_t expiry)
{
struct ip_map ip;
struct cache_head *ch;
ip.m_client = udom;
ip.h.flags = 0;
if (!udom)
set_bit(CACHE_NEGATIVE, &ip.h.flags);
else {
ip.m_add_change = udom->addr_changes;
/* if this is from the legacy set_client system call,
* we need m_add_change to be one higher
*/
if (expiry == NEVER)
ip.m_add_change++;
}
ip.h.expiry_time = expiry;
ch = sunrpc_cache_update(&ip_map_cache,
&ip.h, &ipm->h,
hash_str(ipm->m_class, IP_HASHBITS) ^
hash_ip(ipm->m_addr.s_addr));
if (!ch)
return -ENOMEM;
cache_put(ch, &ip_map_cache);
return 0;
}
int auth_unix_add_addr(struct in_addr addr, struct auth_domain *dom)
{
struct unix_domain *udom;
struct ip_map *ipmp;
if (dom->flavour != &svcauth_unix)
return -EINVAL;
udom = container_of(dom, struct unix_domain, h);
ipmp = ip_map_lookup("nfsd", addr);
if (ipmp)
return ip_map_update(ipmp, udom, NEVER);
else
return -ENOMEM;
}
int auth_unix_forget_old(struct auth_domain *dom)
{
struct unix_domain *udom;
if (dom->flavour != &svcauth_unix)
return -EINVAL;
udom = container_of(dom, struct unix_domain, h);
udom->addr_changes++;
return 0;
}
struct auth_domain *auth_unix_lookup(struct in_addr addr)
{
struct ip_map *ipm;
struct auth_domain *rv;
ipm = ip_map_lookup("nfsd", addr);
if (!ipm)
return NULL;
if (cache_check(&ip_map_cache, &ipm->h, NULL))
return NULL;
if ((ipm->m_client->addr_changes - ipm->m_add_change) >0) {
if (test_and_set_bit(CACHE_NEGATIVE, &ipm->h.flags) == 0)
auth_domain_put(&ipm->m_client->h);
rv = NULL;
} else {
rv = &ipm->m_client->h;
kref_get(&rv->ref);
}
cache_put(&ipm->h, &ip_map_cache);
return rv;
}
void svcauth_unix_purge(void)
{
cache_purge(&ip_map_cache);
}
static inline struct ip_map *
ip_map_cached_get(struct svc_rqst *rqstp)
{
struct ip_map *ipm = rqstp->rq_sock->sk_info_authunix;
if (ipm != NULL) {
if (!cache_valid(&ipm->h)) {
/*
* The entry has been invalidated since it was
* remembered, e.g. by a second mount from the
* same IP address.
*/
rqstp->rq_sock->sk_info_authunix = NULL;
cache_put(&ipm->h, &ip_map_cache);
return NULL;
}
cache_get(&ipm->h);
}
return ipm;
}
static inline void
ip_map_cached_put(struct svc_rqst *rqstp, struct ip_map *ipm)
{
struct svc_sock *svsk = rqstp->rq_sock;
if (svsk->sk_sock->type == SOCK_STREAM && svsk->sk_info_authunix == NULL)
svsk->sk_info_authunix = ipm; /* newly cached, keep the reference */
else
cache_put(&ipm->h, &ip_map_cache);
}
void
svcauth_unix_info_release(void *info)
{
struct ip_map *ipm = info;
cache_put(&ipm->h, &ip_map_cache);
}
static int
svcauth_unix_set_client(struct svc_rqst *rqstp)
{
struct ip_map *ipm;
rqstp->rq_client = NULL;
if (rqstp->rq_proc == 0)
return SVC_OK;
ipm = ip_map_cached_get(rqstp);
if (ipm == NULL)
ipm = ip_map_lookup(rqstp->rq_server->sv_program->pg_class,
rqstp->rq_addr.sin_addr);
if (ipm == NULL)
return SVC_DENIED;
switch (cache_check(&ip_map_cache, &ipm->h, &rqstp->rq_chandle)) {
default:
BUG();
case -EAGAIN:
case -ETIMEDOUT:
return SVC_DROP;
case -ENOENT:
return SVC_DENIED;
case 0:
rqstp->rq_client = &ipm->m_client->h;
kref_get(&rqstp->rq_client->ref);
ip_map_cached_put(rqstp, ipm);
break;
}
return SVC_OK;
}
static int
svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
struct svc_cred *cred = &rqstp->rq_cred;
cred->cr_group_info = NULL;
rqstp->rq_client = NULL;
if (argv->iov_len < 3*4)
return SVC_GARBAGE;
if (svc_getu32(argv) != 0) {
dprintk("svc: bad null cred\n");
*authp = rpc_autherr_badcred;
return SVC_DENIED;
}
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
dprintk("svc: bad null verf\n");
*authp = rpc_autherr_badverf;
return SVC_DENIED;
}
/* Signal that mapping to nobody uid/gid is required */
cred->cr_uid = (uid_t) -1;
cred->cr_gid = (gid_t) -1;
cred->cr_group_info = groups_alloc(0);
if (cred->cr_group_info == NULL)
return SVC_DROP; /* kmalloc failure - client must retry */
/* Put NULL verifier */
svc_putnl(resv, RPC_AUTH_NULL);
svc_putnl(resv, 0);
return SVC_OK;
}
static int
svcauth_null_release(struct svc_rqst *rqstp)
{
if (rqstp->rq_client)
auth_domain_put(rqstp->rq_client);
rqstp->rq_client = NULL;
if (rqstp->rq_cred.cr_group_info)
put_group_info(rqstp->rq_cred.cr_group_info);
rqstp->rq_cred.cr_group_info = NULL;
return 0; /* don't drop */
}
struct auth_ops svcauth_null = {
.name = "null",
.owner = THIS_MODULE,
.flavour = RPC_AUTH_NULL,
.accept = svcauth_null_accept,
.release = svcauth_null_release,
.set_client = svcauth_unix_set_client,
};
static int
svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
struct svc_cred *cred = &rqstp->rq_cred;
u32 slen, i;
int len = argv->iov_len;
cred->cr_group_info = NULL;
rqstp->rq_client = NULL;
if ((len -= 3*4) < 0)
return SVC_GARBAGE;
svc_getu32(argv); /* length */
svc_getu32(argv); /* time stamp */
slen = XDR_QUADLEN(svc_getnl(argv)); /* machname length */
if (slen > 64 || (len -= (slen + 3)*4) < 0)
goto badcred;
argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */
argv->iov_len -= slen*4;
cred->cr_uid = svc_getnl(argv); /* uid */
cred->cr_gid = svc_getnl(argv); /* gid */
slen = svc_getnl(argv); /* gids length */
if (slen > 16 || (len -= (slen + 2)*4) < 0)
goto badcred;
cred->cr_group_info = groups_alloc(slen);
if (cred->cr_group_info == NULL)
return SVC_DROP;
for (i = 0; i < slen; i++)
GROUP_AT(cred->cr_group_info, i) = svc_getnl(argv);
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
*authp = rpc_autherr_badverf;
return SVC_DENIED;
}
/* Put NULL verifier */
svc_putnl(resv, RPC_AUTH_NULL);
svc_putnl(resv, 0);
return SVC_OK;
badcred:
*authp = rpc_autherr_badcred;
return SVC_DENIED;
}
static int
svcauth_unix_release(struct svc_rqst *rqstp)
{
/* Verifier (such as it is) is already in place.
*/
if (rqstp->rq_client)
auth_domain_put(rqstp->rq_client);
rqstp->rq_client = NULL;
if (rqstp->rq_cred.cr_group_info)
put_group_info(rqstp->rq_cred.cr_group_info);
rqstp->rq_cred.cr_group_info = NULL;
return 0;
}
struct auth_ops svcauth_unix = {
.name = "unix",
.owner = THIS_MODULE,
.flavour = RPC_AUTH_UNIX,
.accept = svcauth_unix_accept,
.release = svcauth_unix_release,
.domain_release = svcauth_unix_domain_release,
.set_client = svcauth_unix_set_client,
};