Merge branch 'flock' of git://git.kernel.org/pub/scm/linux/kernel/git/arnd/bkl

* 'flock' of git://git.kernel.org/pub/scm/linux/kernel/git/arnd/bkl:
  locks: turn lock_flocks into a spinlock
  fasync: re-organize fasync entry insertion to allow it under a spinlock
  locks/nfsd: allocate file lock outside of spinlock
  lockd: fix nlmsvc_notify_blocked locking
  lockd: push lock_flocks down
This commit is contained in:
Linus Torvalds 2010-10-27 18:13:34 -07:00
commit 7420a8c0de
10 changed files with 116 additions and 72 deletions

View File

@ -53,7 +53,6 @@ config EXPORTFS
config FILE_LOCKING config FILE_LOCKING
bool "Enable POSIX file locking API" if EMBEDDED bool "Enable POSIX file locking API" if EMBEDDED
default y default y
select BKL # while lockd still uses it.
help help
This option enables standard file locking support, required This option enables standard file locking support, required
for filesystems like NFS and for the flock() system for filesystems like NFS and for the flock() system

View File

@ -640,7 +640,7 @@ static void fasync_free_rcu(struct rcu_head *head)
* match the state "is the filp on a fasync list". * match the state "is the filp on a fasync list".
* *
*/ */
static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
{ {
struct fasync_struct *fa, **fp; struct fasync_struct *fa, **fp;
int result = 0; int result = 0;
@ -666,21 +666,28 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
return result; return result;
} }
/* struct fasync_struct *fasync_alloc(void)
* Add a fasync entry. Return negative on error, positive if
* added, and zero if did nothing but change an existing one.
*
* NOTE! It is very important that the FASYNC flag always
* match the state "is the filp on a fasync list".
*/
static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
{ {
struct fasync_struct *new, *fa, **fp; return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
int result = 0; }
new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); /*
if (!new) * NOTE! This can be used only for unused fasync entries:
return -ENOMEM; * entries that actually got inserted on the fasync list
* need to be released by rcu - see fasync_remove_entry.
*/
void fasync_free(struct fasync_struct *new)
{
kmem_cache_free(fasync_cache, new);
}
/*
* Insert a new entry into the fasync list. Return the pointer to the
* old one if we didn't use the new one.
*/
struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
{
struct fasync_struct *fa, **fp;
spin_lock(&filp->f_lock); spin_lock(&filp->f_lock);
spin_lock(&fasync_lock); spin_lock(&fasync_lock);
@ -691,8 +698,6 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
spin_lock_irq(&fa->fa_lock); spin_lock_irq(&fa->fa_lock);
fa->fa_fd = fd; fa->fa_fd = fd;
spin_unlock_irq(&fa->fa_lock); spin_unlock_irq(&fa->fa_lock);
kmem_cache_free(fasync_cache, new);
goto out; goto out;
} }
@ -702,13 +707,42 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
new->fa_fd = fd; new->fa_fd = fd;
new->fa_next = *fapp; new->fa_next = *fapp;
rcu_assign_pointer(*fapp, new); rcu_assign_pointer(*fapp, new);
result = 1;
filp->f_flags |= FASYNC; filp->f_flags |= FASYNC;
out: out:
spin_unlock(&fasync_lock); spin_unlock(&fasync_lock);
spin_unlock(&filp->f_lock); spin_unlock(&filp->f_lock);
return result; return fa;
}
/*
* Add a fasync entry. Return negative on error, positive if
* added, and zero if did nothing but change an existing one.
*
* NOTE! It is very important that the FASYNC flag always
* match the state "is the filp on a fasync list".
*/
static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
{
struct fasync_struct *new;
new = fasync_alloc();
if (!new)
return -ENOMEM;
/*
* fasync_insert_entry() returns the old (update) entry if
* it existed.
*
* So free the (unused) new entry and return 0 to let the
* caller know that we didn't add any new fasync entries.
*/
if (fasync_insert_entry(fd, filp, fapp, new)) {
fasync_free(new);
return 0;
}
return 1;
} }
/* /*

View File

@ -22,7 +22,6 @@
#include <linux/in.h> #include <linux/in.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/freezer.h> #include <linux/freezer.h>
@ -130,15 +129,6 @@ lockd(void *vrqstp)
dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
/*
* FIXME: it would be nice if lockd didn't spend its entire life
* running under the BKL. At the very least, it would be good to
* have someone clarify what it's intended to protect here. I've
* seen some handwavy posts about posix locking needing to be
* done under the BKL, but it's far from clear.
*/
lock_kernel();
if (!nlm_timeout) if (!nlm_timeout)
nlm_timeout = LOCKD_DFLT_TIMEO; nlm_timeout = LOCKD_DFLT_TIMEO;
nlmsvc_timeout = nlm_timeout * HZ; nlmsvc_timeout = nlm_timeout * HZ;
@ -195,7 +185,6 @@ lockd(void *vrqstp)
if (nlmsvc_ops) if (nlmsvc_ops)
nlmsvc_invalidate_all(); nlmsvc_invalidate_all();
nlm_shutdown_hosts(); nlm_shutdown_hosts();
unlock_kernel();
return 0; return 0;
} }

View File

@ -700,14 +700,16 @@ nlmsvc_notify_blocked(struct file_lock *fl)
struct nlm_block *block; struct nlm_block *block;
dprintk("lockd: VFS unblock notification for block %p\n", fl); dprintk("lockd: VFS unblock notification for block %p\n", fl);
spin_lock(&nlm_blocked_lock);
list_for_each_entry(block, &nlm_blocked, b_list) { list_for_each_entry(block, &nlm_blocked, b_list) {
if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
nlmsvc_insert_block(block, 0); nlmsvc_insert_block_locked(block, 0);
spin_unlock(&nlm_blocked_lock);
svc_wake_up(block->b_daemon); svc_wake_up(block->b_daemon);
return; return;
} }
} }
spin_unlock(&nlm_blocked_lock);
printk(KERN_WARNING "lockd: notification for unknown block!\n"); printk(KERN_WARNING "lockd: notification for unknown block!\n");
} }

View File

@ -170,6 +170,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
again: again:
file->f_locks = 0; file->f_locks = 0;
lock_flocks(); /* protects i_flock list */
for (fl = inode->i_flock; fl; fl = fl->fl_next) { for (fl = inode->i_flock; fl; fl = fl->fl_next) {
if (fl->fl_lmops != &nlmsvc_lock_operations) if (fl->fl_lmops != &nlmsvc_lock_operations)
continue; continue;
@ -181,6 +182,7 @@ again:
if (match(lockhost, host)) { if (match(lockhost, host)) {
struct file_lock lock = *fl; struct file_lock lock = *fl;
unlock_flocks();
lock.fl_type = F_UNLCK; lock.fl_type = F_UNLCK;
lock.fl_start = 0; lock.fl_start = 0;
lock.fl_end = OFFSET_MAX; lock.fl_end = OFFSET_MAX;
@ -192,6 +194,7 @@ again:
goto again; goto again;
} }
} }
unlock_flocks();
return 0; return 0;
} }
@ -226,10 +229,14 @@ nlm_file_inuse(struct nlm_file *file)
if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
return 1; return 1;
lock_flocks();
for (fl = inode->i_flock; fl; fl = fl->fl_next) { for (fl = inode->i_flock; fl; fl = fl->fl_next) {
if (fl->fl_lmops == &nlmsvc_lock_operations) if (fl->fl_lmops == &nlmsvc_lock_operations) {
unlock_flocks();
return 1; return 1;
} }
}
unlock_flocks();
file->f_locks = 0; file->f_locks = 0;
return 0; return 0;
} }

View File

@ -142,6 +142,7 @@ int lease_break_time = 45;
static LIST_HEAD(file_lock_list); static LIST_HEAD(file_lock_list);
static LIST_HEAD(blocked_list); static LIST_HEAD(blocked_list);
static DEFINE_SPINLOCK(file_lock_lock);
/* /*
* Protects the two list heads above, plus the inode->i_flock list * Protects the two list heads above, plus the inode->i_flock list
@ -149,23 +150,24 @@ static LIST_HEAD(blocked_list);
*/ */
void lock_flocks(void) void lock_flocks(void)
{ {
lock_kernel(); spin_lock(&file_lock_lock);
} }
EXPORT_SYMBOL_GPL(lock_flocks); EXPORT_SYMBOL_GPL(lock_flocks);
void unlock_flocks(void) void unlock_flocks(void)
{ {
unlock_kernel(); spin_unlock(&file_lock_lock);
} }
EXPORT_SYMBOL_GPL(unlock_flocks); EXPORT_SYMBOL_GPL(unlock_flocks);
static struct kmem_cache *filelock_cache __read_mostly; static struct kmem_cache *filelock_cache __read_mostly;
/* Allocate an empty lock structure. */ /* Allocate an empty lock structure. */
static struct file_lock *locks_alloc_lock(void) struct file_lock *locks_alloc_lock(void)
{ {
return kmem_cache_alloc(filelock_cache, GFP_KERNEL); return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
} }
EXPORT_SYMBOL_GPL(locks_alloc_lock);
void locks_release_private(struct file_lock *fl) void locks_release_private(struct file_lock *fl)
{ {
@ -1365,7 +1367,6 @@ int fcntl_getlease(struct file *filp)
int generic_setlease(struct file *filp, long arg, struct file_lock **flp) int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
{ {
struct file_lock *fl, **before, **my_before = NULL, *lease; struct file_lock *fl, **before, **my_before = NULL, *lease;
struct file_lock *new_fl = NULL;
struct dentry *dentry = filp->f_path.dentry; struct dentry *dentry = filp->f_path.dentry;
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
int error, rdlease_count = 0, wrlease_count = 0; int error, rdlease_count = 0, wrlease_count = 0;
@ -1385,11 +1386,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
lease = *flp; lease = *flp;
if (arg != F_UNLCK) { if (arg != F_UNLCK) {
error = -ENOMEM;
new_fl = locks_alloc_lock();
if (new_fl == NULL)
goto out;
error = -EAGAIN; error = -EAGAIN;
if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
goto out; goto out;
@ -1434,7 +1430,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
goto out; goto out;
} }
error = 0;
if (arg == F_UNLCK) if (arg == F_UNLCK)
goto out; goto out;
@ -1442,15 +1437,11 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
if (!leases_enable) if (!leases_enable)
goto out; goto out;
locks_copy_lock(new_fl, lease); locks_insert_lock(before, lease);
locks_insert_lock(before, new_fl);
*flp = new_fl;
return 0; return 0;
out: out:
if (new_fl != NULL) locks_free_lock(lease);
locks_free_lock(new_fl);
return error; return error;
} }
EXPORT_SYMBOL(generic_setlease); EXPORT_SYMBOL(generic_setlease);
@ -1514,26 +1505,38 @@ EXPORT_SYMBOL_GPL(vfs_setlease);
*/ */
int fcntl_setlease(unsigned int fd, struct file *filp, long arg) int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
{ {
struct file_lock fl, *flp = &fl; struct file_lock *fl;
struct fasync_struct *new;
struct inode *inode = filp->f_path.dentry->d_inode; struct inode *inode = filp->f_path.dentry->d_inode;
int error; int error;
locks_init_lock(&fl); fl = lease_alloc(filp, arg);
error = lease_init(filp, arg, &fl); if (IS_ERR(fl))
if (error) return PTR_ERR(fl);
return error;
new = fasync_alloc();
if (!new) {
locks_free_lock(fl);
return -ENOMEM;
}
lock_flocks(); lock_flocks();
error = __vfs_setlease(filp, arg, &fl);
error = __vfs_setlease(filp, arg, &flp);
if (error || arg == F_UNLCK) if (error || arg == F_UNLCK)
goto out_unlock; goto out_unlock;
error = fasync_helper(fd, filp, 1, &flp->fl_fasync); /*
* fasync_insert_entry() returns the old entry if any.
* If there was no old entry, then it used 'new' and
* inserted it into the fasync list. Clear new so that
* we don't release it here.
*/
if (!fasync_insert_entry(fd, filp, &fl->fl_fasync, new))
new = NULL;
if (error < 0) { if (error < 0) {
/* remove lease just inserted by setlease */ /* remove lease just inserted by setlease */
flp->fl_type = F_UNLCK | F_INPROGRESS; fl->fl_type = F_UNLCK | F_INPROGRESS;
flp->fl_break_time = jiffies - 10; fl->fl_break_time = jiffies - 10;
time_out_leases(inode); time_out_leases(inode);
goto out_unlock; goto out_unlock;
} }
@ -1541,6 +1544,8 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
out_unlock: out_unlock:
unlock_flocks(); unlock_flocks();
if (new)
fasync_free(new);
return error; return error;
} }

View File

@ -1,7 +1,6 @@
config NFS_FS config NFS_FS
tristate "NFS client support" tristate "NFS client support"
depends on INET && FILE_LOCKING depends on INET && FILE_LOCKING
depends on BKL # fix as soon as lockd is done
select LOCKD select LOCKD
select SUNRPC select SUNRPC
select NFS_ACL_SUPPORT if NFS_V3_ACL select NFS_ACL_SUPPORT if NFS_V3_ACL

View File

@ -2,7 +2,6 @@ config NFSD
tristate "NFS server support" tristate "NFS server support"
depends on INET depends on INET
depends on FILE_LOCKING depends on FILE_LOCKING
depends on BKL # fix as soon as lockd is done
select LOCKD select LOCKD
select SUNRPC select SUNRPC
select EXPORTFS select EXPORTFS

View File

@ -2614,7 +2614,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
struct nfs4_delegation *dp; struct nfs4_delegation *dp;
struct nfs4_stateowner *sop = stp->st_stateowner; struct nfs4_stateowner *sop = stp->st_stateowner;
int cb_up = atomic_read(&sop->so_client->cl_cb_set); int cb_up = atomic_read(&sop->so_client->cl_cb_set);
struct file_lock fl, *flp = &fl; struct file_lock *fl;
int status, flag = 0; int status, flag = 0;
flag = NFS4_OPEN_DELEGATE_NONE; flag = NFS4_OPEN_DELEGATE_NONE;
@ -2648,20 +2648,24 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
flag = NFS4_OPEN_DELEGATE_NONE; flag = NFS4_OPEN_DELEGATE_NONE;
goto out; goto out;
} }
locks_init_lock(&fl); status = -ENOMEM;
fl.fl_lmops = &nfsd_lease_mng_ops; fl = locks_alloc_lock();
fl.fl_flags = FL_LEASE; if (!fl)
fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; goto out;
fl.fl_end = OFFSET_MAX; locks_init_lock(fl);
fl.fl_owner = (fl_owner_t)dp; fl->fl_lmops = &nfsd_lease_mng_ops;
fl.fl_file = find_readable_file(stp->st_file); fl->fl_flags = FL_LEASE;
BUG_ON(!fl.fl_file); fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
fl.fl_pid = current->tgid; fl->fl_end = OFFSET_MAX;
fl->fl_owner = (fl_owner_t)dp;
fl->fl_file = find_readable_file(stp->st_file);
BUG_ON(!fl->fl_file);
fl->fl_pid = current->tgid;
/* vfs_setlease checks to see if delegation should be handed out. /* vfs_setlease checks to see if delegation should be handed out.
* the lock_manager callbacks fl_mylease and fl_change are used * the lock_manager callbacks fl_mylease and fl_change are used
*/ */
if ((status = vfs_setlease(fl.fl_file, fl.fl_type, &flp))) { if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
dprintk("NFSD: setlease failed [%d], no delegation\n", status); dprintk("NFSD: setlease failed [%d], no delegation\n", status);
unhash_delegation(dp); unhash_delegation(dp);
flag = NFS4_OPEN_DELEGATE_NONE; flag = NFS4_OPEN_DELEGATE_NONE;

View File

@ -1122,6 +1122,7 @@ extern int fcntl_getlease(struct file *filp);
/* fs/locks.c */ /* fs/locks.c */
extern void locks_init_lock(struct file_lock *); extern void locks_init_lock(struct file_lock *);
extern struct file_lock * locks_alloc_lock(void);
extern void locks_copy_lock(struct file_lock *, struct file_lock *); extern void locks_copy_lock(struct file_lock *, struct file_lock *);
extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
extern void locks_remove_posix(struct file *, fl_owner_t); extern void locks_remove_posix(struct file *, fl_owner_t);
@ -1310,6 +1311,11 @@ struct fasync_struct {
/* SMP safe fasync helpers: */ /* SMP safe fasync helpers: */
extern int fasync_helper(int, struct file *, int, struct fasync_struct **); extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *);
extern int fasync_remove_entry(struct file *, struct fasync_struct **);
extern struct fasync_struct *fasync_alloc(void);
extern void fasync_free(struct fasync_struct *);
/* can be called from interrupts */ /* can be called from interrupts */
extern void kill_fasync(struct fasync_struct **, int, int); extern void kill_fasync(struct fasync_struct **, int, int);