Merge git://oss.sgi.com:8090/xfs/xfs-2.6

* git://oss.sgi.com:8090/xfs/xfs-2.6:
  [XFS] Remove KERNEL_VERSION macros from xfs_dmapi.h
  [XFS] Prevent a deadlock when xfslogd unpins inodes.
  [XFS] Clean up i_flags and i_flags_lock handling.
  [XFS] 956664: dm_read_invis() changes i_atime
  [XFS] rename uio_read() to xfs_uio_read()
  [XFS] Keep lockdep happy.
  [XFS] 956618: Linux crashes on boot with XFS-DMAPI filesystem when
This commit is contained in:
Linus Torvalds 2006-11-13 08:15:30 -08:00
commit eea2078eaf
15 changed files with 192 additions and 116 deletions

@ -21,22 +21,7 @@ EXTRA_CFLAGS += -Ifs/xfs -Ifs/xfs/linux-2.6 -funsigned-char
XFS_LINUX := linux-2.6
ifeq ($(CONFIG_XFS_DEBUG),y)
EXTRA_CFLAGS += -g -DSTATIC="" -DDEBUG
EXTRA_CFLAGS += -DXFS_BUF_LOCK_TRACKING
endif
ifeq ($(CONFIG_XFS_TRACE),y)
EXTRA_CFLAGS += -DXFS_ALLOC_TRACE
EXTRA_CFLAGS += -DXFS_ATTR_TRACE
EXTRA_CFLAGS += -DXFS_BLI_TRACE
EXTRA_CFLAGS += -DXFS_BMAP_TRACE
EXTRA_CFLAGS += -DXFS_BMBT_TRACE
EXTRA_CFLAGS += -DXFS_DIR2_TRACE
EXTRA_CFLAGS += -DXFS_DQUOT_TRACE
EXTRA_CFLAGS += -DXFS_ILOCK_TRACE
EXTRA_CFLAGS += -DXFS_LOG_TRACE
EXTRA_CFLAGS += -DXFS_RW_TRACE
EXTRA_CFLAGS += -DXFS_BUF_TRACE
EXTRA_CFLAGS += -DXFS_VNODE_TRACE
EXTRA_CFLAGS += -g
endif
obj-$(CONFIG_XFS_FS) += xfs.o

@ -15,6 +15,7 @@
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include <linux/stddef.h>
#include <linux/errno.h>
#include <linux/slab.h>
@ -31,7 +32,6 @@
#include <linux/kthread.h>
#include <linux/migrate.h>
#include <linux/backing-dev.h>
#include "xfs_linux.h"
STATIC kmem_zone_t *xfs_buf_zone;
STATIC kmem_shaker_t xfs_buf_shake;
@ -1406,7 +1406,7 @@ xfs_alloc_bufhash(
btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */
btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
btp->bt_hash = kmem_zalloc((1 << btp->bt_hashshift) *
sizeof(xfs_bufhash_t), KM_SLEEP);
sizeof(xfs_bufhash_t), KM_SLEEP | KM_LARGE);
for (i = 0; i < (1 << btp->bt_hashshift); i++) {
spin_lock_init(&btp->bt_hash[i].bh_lock);
INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);

@ -0,0 +1,28 @@
/*
* Copyright (c) 2000-2006 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_DMAPI_PRIV_H__
#define __XFS_DMAPI_PRIV_H__
/*
* Based on IO_ISDIRECT, decide which i_ flag is set.
*/
#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
DM_FLAGS_IMUX : 0)
#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
#endif /*__XFS_DMAPI_PRIV_H__*/

@ -341,8 +341,11 @@ xfs_open_by_handle(
put_unused_fd(new_fd);
return -XFS_ERROR(-PTR_ERR(filp));
}
if (inode->i_mode & S_IFREG)
if (inode->i_mode & S_IFREG) {
/* invisible operation should not change atime */
filp->f_flags |= O_NOATIME;
filp->f_op = &xfs_invis_file_operations;
}
fd_install(new_fd, filp);
return new_fd;

@ -227,9 +227,7 @@ xfs_initialize_vnode(
xfs_revalidate_inode(XFS_BHVTOM(bdp), vp, ip);
xfs_set_inodeops(inode);
spin_lock(&ip->i_flags_lock);
ip->i_flags &= ~XFS_INEW;
spin_unlock(&ip->i_flags_lock);
xfs_iflags_clear(ip, XFS_INEW);
barrier();
unlock_new_inode(inode);

@ -15,11 +15,9 @@
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <xfs.h>
#include "debug.h"
#include "spin.h"
#include <asm/page.h>
#include <linux/sched.h>
#include <linux/kernel.h>
static char message[256]; /* keep it off the stack */
static DEFINE_SPINLOCK(xfs_err_lock);

@ -22,7 +22,7 @@
* as we go.
*/
int
uio_read(caddr_t src, size_t len, struct uio *uio)
xfs_uio_read(caddr_t src, size_t len, struct uio *uio)
{
size_t count;

@ -65,6 +65,6 @@ struct uio {
typedef struct uio uio_t;
typedef struct iovec iovec_t;
extern int uio_read (caddr_t, size_t, uio_t *);
extern int xfs_uio_read (caddr_t, size_t, uio_t *);
#endif /* __XFS_SUPPORT_MOVE_H__ */

@ -17,5 +17,28 @@
*/
#ifndef __XFS_H__
#define __XFS_H__
#ifdef CONFIG_XFS_DEBUG
#define STATIC
#define DEBUG 1
#define XFS_BUF_LOCK_TRACKING 1
/* #define QUOTADEBUG 1 */
#endif
#ifdef CONFIG_XFS_TRACE
#define XFS_ALLOC_TRACE 1
#define XFS_ATTR_TRACE 1
#define XFS_BLI_TRACE 1
#define XFS_BMAP_TRACE 1
#define XFS_BMBT_TRACE 1
#define XFS_DIR2_TRACE 1
#define XFS_DQUOT_TRACE 1
#define XFS_ILOCK_TRACE 1
#define XFS_LOG_TRACE 1
#define XFS_RW_TRACE 1
#define XFS_BUF_TRACE 1
#define XFS_VNODE_TRACE 1
#endif
#include <linux-2.6/xfs_linux.h>
#endif /* __XFS_H__ */

@ -678,7 +678,7 @@ xfs_dir2_put_dirent64_uio(
idbp->d_off = pa->cook;
idbp->d_name[namelen] = '\0';
memcpy(idbp->d_name, pa->name, namelen);
rval = uio_read((caddr_t)idbp, reclen, uio);
rval = xfs_uio_read((caddr_t)idbp, reclen, uio);
pa->done = (rval == 0);
return rval;
}

@ -157,27 +157,9 @@ typedef enum {
#define DM_FLAGS_IALLOCSEM_WR 0x020 /* thread holds i_alloc_sem wr */
/*
* Based on IO_ISDIRECT, decide which i_ flag is set.
* Pull in platform specific event flags defines
*/
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,0)
#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
DM_FLAGS_IMUX : 0)
#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
#endif
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) && \
(LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,22))
#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
DM_FLAGS_IALLOCSEM_RD : DM_FLAGS_IMUX)
#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
#endif
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,4,21)
#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
0 : DM_FLAGS_IMUX)
#define DM_SEM_FLAG_WR (DM_FLAGS_IMUX)
#endif
#include "xfs_dmapi_priv.h"
/*
* Macros to turn caller specified delay/block flags into

@ -215,7 +215,7 @@ again:
* If INEW is set this inode is being set up
* we need to pause and try again.
*/
if (ip->i_flags & XFS_INEW) {
if (xfs_iflags_test(ip, XFS_INEW)) {
read_unlock(&ih->ih_lock);
delay(1);
XFS_STATS_INC(xs_ig_frecycle);
@ -230,22 +230,50 @@ again:
* on its way out of the system,
* we need to pause and try again.
*/
if (ip->i_flags & XFS_IRECLAIM) {
if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
read_unlock(&ih->ih_lock);
delay(1);
XFS_STATS_INC(xs_ig_frecycle);
goto again;
}
ASSERT(xfs_iflags_test(ip, XFS_IRECLAIMABLE));
/*
* If lookup is racing with unlink, then we
* should return an error immediately so we
* don't remove it from the reclaim list and
* potentially leak the inode.
*/
if ((ip->i_d.di_mode == 0) &&
!(flags & XFS_IGET_CREATE)) {
read_unlock(&ih->ih_lock);
return ENOENT;
}
/*
* There may be transactions sitting in the
* incore log buffers or being flushed to disk
* at this time. We can't clear the
* XFS_IRECLAIMABLE flag until these
* transactions have hit the disk, otherwise we
* will void the guarantee the flag provides
* xfs_iunpin()
*/
if (xfs_ipincount(ip)) {
read_unlock(&ih->ih_lock);
xfs_log_force(mp, 0,
XFS_LOG_FORCE|XFS_LOG_SYNC);
XFS_STATS_INC(xs_ig_frecycle);
goto again;
}
vn_trace_exit(vp, "xfs_iget.alloc",
(inst_t *)__return_address);
XFS_STATS_INC(xs_ig_found);
spin_lock(&ip->i_flags_lock);
ip->i_flags &= ~XFS_IRECLAIMABLE;
spin_unlock(&ip->i_flags_lock);
xfs_iflags_clear(ip, XFS_IRECLAIMABLE);
version = ih->ih_version;
read_unlock(&ih->ih_lock);
xfs_ihash_promote(ih, ip, version);
@ -299,10 +327,7 @@ finish_inode:
if (lock_flags != 0)
xfs_ilock(ip, lock_flags);
spin_lock(&ip->i_flags_lock);
ip->i_flags &= ~XFS_ISTALE;
spin_unlock(&ip->i_flags_lock);
xfs_iflags_clear(ip, XFS_ISTALE);
vn_trace_exit(vp, "xfs_iget.found",
(inst_t *)__return_address);
goto return_ip;
@ -371,10 +396,7 @@ finish_inode:
ih->ih_next = ip;
ip->i_udquot = ip->i_gdquot = NULL;
ih->ih_version++;
spin_lock(&ip->i_flags_lock);
ip->i_flags |= XFS_INEW;
spin_unlock(&ip->i_flags_lock);
xfs_iflags_set(ip, XFS_INEW);
write_unlock(&ih->ih_lock);
/*
@ -625,7 +647,7 @@ xfs_iput_new(xfs_inode_t *ip,
vn_trace_entry(vp, "xfs_iput_new", (inst_t *)__return_address);
if ((ip->i_d.di_mode == 0)) {
ASSERT(!(ip->i_flags & XFS_IRECLAIMABLE));
ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
vn_mark_bad(vp);
}
if (inode->i_state & I_NEW)
@ -683,6 +705,7 @@ xfs_ireclaim(xfs_inode_t *ip)
/*
* Free all memory associated with the inode.
*/
xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
xfs_idestroy(ip);
}

@ -2193,7 +2193,7 @@ xfs_ifree_cluster(
/* Inode not in memory or we found it already,
* nothing to do
*/
if (!ip || (ip->i_flags & XFS_ISTALE)) {
if (!ip || xfs_iflags_test(ip, XFS_ISTALE)) {
read_unlock(&ih->ih_lock);
continue;
}
@ -2215,10 +2215,7 @@ xfs_ifree_cluster(
if (ip == free_ip) {
if (xfs_iflock_nowait(ip)) {
spin_lock(&ip->i_flags_lock);
ip->i_flags |= XFS_ISTALE;
spin_unlock(&ip->i_flags_lock);
xfs_iflags_set(ip, XFS_ISTALE);
if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
} else {
@ -2231,9 +2228,7 @@ xfs_ifree_cluster(
if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
if (xfs_iflock_nowait(ip)) {
spin_lock(&ip->i_flags_lock);
ip->i_flags |= XFS_ISTALE;
spin_unlock(&ip->i_flags_lock);
xfs_iflags_set(ip, XFS_ISTALE);
if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
@ -2263,9 +2258,7 @@ xfs_ifree_cluster(
AIL_LOCK(mp,s);
iip->ili_flush_lsn = iip->ili_item.li_lsn;
AIL_UNLOCK(mp, s);
spin_lock(&iip->ili_inode->i_flags_lock);
iip->ili_inode->i_flags |= XFS_ISTALE;
spin_unlock(&iip->ili_inode->i_flags_lock);
xfs_iflags_set(ip, XFS_ISTALE);
pre_flushed++;
}
lip = lip->li_bio_list;
@ -2748,42 +2741,39 @@ xfs_iunpin(
{
ASSERT(atomic_read(&ip->i_pincount) > 0);
if (atomic_dec_and_test(&ip->i_pincount)) {
/*
* If the inode is currently being reclaimed, the
* linux inode _and_ the xfs vnode may have been
* freed so we cannot reference either of them safely.
* Hence we should not try to do anything to them
* if the xfs inode is currently in the reclaim
* path.
*
* However, we still need to issue the unpin wakeup
* call as the inode reclaim may be blocked waiting for
* the inode to become unpinned.
*/
struct inode *inode = NULL;
if (atomic_dec_and_lock(&ip->i_pincount, &ip->i_flags_lock)) {
spin_lock(&ip->i_flags_lock);
if (!(ip->i_flags & (XFS_IRECLAIM|XFS_IRECLAIMABLE))) {
/*
* If the inode is currently being reclaimed, the link between
* the bhv_vnode and the xfs_inode will be broken after the
* XFS_IRECLAIM* flag is set. Hence, if these flags are not
* set, then we can move forward and mark the linux inode dirty
* knowing that it is still valid as it won't freed until after
* the bhv_vnode<->xfs_inode link is broken in xfs_reclaim. The
* i_flags_lock is used to synchronise the setting of the
* XFS_IRECLAIM* flags and the breaking of the link, and so we
* can execute atomically w.r.t to reclaim by holding this lock
* here.
*
* However, we still need to issue the unpin wakeup call as the
* inode reclaim may be blocked waiting for the inode to become
* unpinned.
*/
if (!__xfs_iflags_test(ip, XFS_IRECLAIM|XFS_IRECLAIMABLE)) {
bhv_vnode_t *vp = XFS_ITOV_NULL(ip);
struct inode *inode = NULL;
BUG_ON(vp == NULL);
inode = vn_to_inode(vp);
BUG_ON(inode->i_state & I_CLEAR);
/* make sync come back and flush this inode */
if (vp) {
inode = vn_to_inode(vp);
if (!(inode->i_state &
(I_NEW|I_FREEING|I_CLEAR))) {
inode = igrab(inode);
if (inode)
mark_inode_dirty_sync(inode);
} else
inode = NULL;
}
if (!(inode->i_state & (I_NEW|I_FREEING)))
mark_inode_dirty_sync(inode);
}
spin_unlock(&ip->i_flags_lock);
wake_up(&ip->i_ipin_wait);
if (inode)
iput(inode);
}
}

@ -305,6 +305,47 @@ typedef struct xfs_inode {
#endif
} xfs_inode_t;
/*
* i_flags helper functions
*/
static inline void
__xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
{
ip->i_flags |= flags;
}
static inline void
xfs_iflags_set(xfs_inode_t *ip, unsigned short flags)
{
spin_lock(&ip->i_flags_lock);
__xfs_iflags_set(ip, flags);
spin_unlock(&ip->i_flags_lock);
}
static inline void
xfs_iflags_clear(xfs_inode_t *ip, unsigned short flags)
{
spin_lock(&ip->i_flags_lock);
ip->i_flags &= ~flags;
spin_unlock(&ip->i_flags_lock);
}
static inline int
__xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
{
return (ip->i_flags & flags);
}
static inline int
xfs_iflags_test(xfs_inode_t *ip, unsigned short flags)
{
int ret;
spin_lock(&ip->i_flags_lock);
ret = __xfs_iflags_test(ip, flags);
spin_unlock(&ip->i_flags_lock);
return ret;
}
#endif /* __KERNEL__ */

@ -1013,7 +1013,7 @@ xfs_readlink(
pathlen = (int)ip->i_d.di_size;
if (ip->i_df.if_flags & XFS_IFINLINE) {
error = uio_read(ip->i_df.if_u1.if_data, pathlen, uiop);
error = xfs_uio_read(ip->i_df.if_u1.if_data, pathlen, uiop);
}
else {
/*
@ -1044,7 +1044,7 @@ xfs_readlink(
byte_cnt = pathlen;
pathlen -= byte_cnt;
error = uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop);
error = xfs_uio_read(XFS_BUF_PTR(bp), byte_cnt, uiop);
xfs_buf_relse (bp);
}
@ -3827,11 +3827,16 @@ xfs_reclaim(
*/
xfs_synchronize_atime(ip);
/* If we have nothing to flush with this inode then complete the
* teardown now, otherwise break the link between the xfs inode
* and the linux inode and clean up the xfs inode later. This
* avoids flushing the inode to disk during the delete operation
* itself.
/*
* If we have nothing to flush with this inode then complete the
* teardown now, otherwise break the link between the xfs inode and the
* linux inode and clean up the xfs inode later. This avoids flushing
* the inode to disk during the delete operation itself.
*
* When breaking the link, we need to set the XFS_IRECLAIMABLE flag
* first to ensure that xfs_iunpin() will never see an xfs inode
* that has a linux inode being reclaimed. Synchronisation is provided
* by the i_flags_lock.
*/
if (!ip->i_update_core && (ip->i_itemp == NULL)) {
xfs_ilock(ip, XFS_ILOCK_EXCL);
@ -3840,13 +3845,13 @@ xfs_reclaim(
} else {
xfs_mount_t *mp = ip->i_mount;
/* Protect sync from us */
/* Protect sync and unpin from us */
XFS_MOUNT_ILOCK(mp);
vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
spin_lock(&ip->i_flags_lock);
ip->i_flags |= XFS_IRECLAIMABLE;
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
vn_bhv_remove(VN_BHV_HEAD(vp), XFS_ITOBHV(ip));
spin_unlock(&ip->i_flags_lock);
list_add_tail(&ip->i_reclaim, &mp->m_del_inodes);
XFS_MOUNT_IUNLOCK(mp);
}
return 0;
@ -3872,8 +3877,8 @@ xfs_finish_reclaim(
*/
write_lock(&ih->ih_lock);
spin_lock(&ip->i_flags_lock);
if ((ip->i_flags & XFS_IRECLAIM) ||
(!(ip->i_flags & XFS_IRECLAIMABLE) && vp == NULL)) {
if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
(!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) && vp == NULL)) {
spin_unlock(&ip->i_flags_lock);
write_unlock(&ih->ih_lock);
if (locked) {
@ -3882,7 +3887,7 @@ xfs_finish_reclaim(
}
return 1;
}
ip->i_flags |= XFS_IRECLAIM;
__xfs_iflags_set(ip, XFS_IRECLAIM);
spin_unlock(&ip->i_flags_lock);
write_unlock(&ih->ih_lock);