linux-next/fs/ext2/file.c
Ritesh Harjani (IBM) 6e335cd789 ext2: Add direct-io trace points
This patch adds the trace point to ext2 direct-io apis
in fs/ext2/file.c

Here is how the output looks like

        a.out-467865 [006]  6758.170968: ext2_dio_write_begin: dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 4096 flags DIRECT|WRITE aio 1 ret 0
        a.out-467865 [006]  6758.171061: ext2_dio_write_end:   dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 0 flags DIRECT|WRITE aio 1 ret -529
kworker/3:153-444162 [003]  6758.171252: ext2_dio_write_endio: dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 4096 flags DIRECT|WRITE aio 1 ret 0
        a.out-468222 [001]  6761.628924: ext2_dio_read_begin:  dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 4096 flags DIRECT aio 1 ret 0
        a.out-468222 [001]  6761.629063: ext2_dio_read_end:    dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 0 flags DIRECT aio 1 ret -529
        a.out-468428 [005]  6763.937454: ext2_dio_write_begin: dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 4096 flags DIRECT aio 0 ret 0
        a.out-468428 [005]  6763.937829: ext2_dio_write_endio: dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 4096 flags DIRECT aio 0 ret 0
        a.out-468428 [005]  6763.937847: ext2_dio_write_end:   dev 7:12 ino 0xe isize 0x1000 pos 0x1000 len 0 flags DIRECT aio 0 ret 4096
        a.out-468609 [000]  6765.702878: ext2_dio_read_begin:  dev 7:12 ino 0xe isize 0x1000 pos 0x0 len 4096 flags DIRECT aio 0 ret 0
        a.out-468609 [000]  6765.703243: ext2_dio_read_end:    dev 7:12 ino 0xe isize 0x1000 pos 0x1000 len 0 flags DIRECT aio 0 ret 4096

Reported-and-tested-by: Disha Goel <disgoel@linux.ibm.com>
[Need to add CFLAGS_trace for fixing unable to find trace file problem]
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Message-Id: <b8b0897fa2b273a448d7b4ba7317357ac73c08bc.1682069716.git.ritesh.list@gmail.com>
2023-05-16 11:32:42 +02:00

333 lines
8.6 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/ext2/file.c
*
* Copyright (C) 1992, 1993, 1994, 1995
* Remy Card (card@masi.ibp.fr)
* Laboratoire MASI - Institut Blaise Pascal
* Universite Pierre et Marie Curie (Paris VI)
*
* from
*
* linux/fs/minix/file.c
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
* ext2 fs regular file handling primitives
*
* 64-bit file support on 64-bit platforms by Jakub Jelinek
* (jj@sunsite.ms.mff.cuni.cz)
*/
#include <linux/time.h>
#include <linux/pagemap.h>
#include <linux/dax.h>
#include <linux/quotaops.h>
#include <linux/iomap.h>
#include <linux/uio.h>
#include <linux/buffer_head.h>
#include "ext2.h"
#include "xattr.h"
#include "acl.h"
#include "trace.h"
#ifdef CONFIG_FS_DAX
static ssize_t ext2_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = iocb->ki_filp->f_mapping->host;
ssize_t ret;
if (!iov_iter_count(to))
return 0; /* skip atime */
inode_lock_shared(inode);
ret = dax_iomap_rw(iocb, to, &ext2_iomap_ops);
inode_unlock_shared(inode);
file_accessed(iocb->ki_filp);
return ret;
}
static ssize_t ext2_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
ssize_t ret;
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret <= 0)
goto out_unlock;
ret = file_remove_privs(file);
if (ret)
goto out_unlock;
ret = file_update_time(file);
if (ret)
goto out_unlock;
ret = dax_iomap_rw(iocb, from, &ext2_iomap_ops);
if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
i_size_write(inode, iocb->ki_pos);
mark_inode_dirty(inode);
}
out_unlock:
inode_unlock(inode);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
return ret;
}
/*
* The lock ordering for ext2 DAX fault paths is:
*
* mmap_lock (MM)
* sb_start_pagefault (vfs, freeze)
* address_space->invalidate_lock
* address_space->i_mmap_rwsem or page_lock (mutually exclusive in DAX)
* ext2_inode_info->truncate_mutex
*
* The default page_lock and i_size verification done by non-DAX fault paths
* is sufficient because ext2 doesn't support hole punching.
*/
static vm_fault_t ext2_dax_fault(struct vm_fault *vmf)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
vm_fault_t ret;
bool write = (vmf->flags & FAULT_FLAG_WRITE) &&
(vmf->vma->vm_flags & VM_SHARED);
if (write) {
sb_start_pagefault(inode->i_sb);
file_update_time(vmf->vma->vm_file);
}
filemap_invalidate_lock_shared(inode->i_mapping);
ret = dax_iomap_fault(vmf, PE_SIZE_PTE, NULL, NULL, &ext2_iomap_ops);
filemap_invalidate_unlock_shared(inode->i_mapping);
if (write)
sb_end_pagefault(inode->i_sb);
return ret;
}
static const struct vm_operations_struct ext2_dax_vm_ops = {
.fault = ext2_dax_fault,
/*
* .huge_fault is not supported for DAX because allocation in ext2
* cannot be reliably aligned to huge page sizes and so pmd faults
* will always fail and fail back to regular faults.
*/
.page_mkwrite = ext2_dax_fault,
.pfn_mkwrite = ext2_dax_fault,
};
static int ext2_file_mmap(struct file *file, struct vm_area_struct *vma)
{
if (!IS_DAX(file_inode(file)))
return generic_file_mmap(file, vma);
file_accessed(file);
vma->vm_ops = &ext2_dax_vm_ops;
return 0;
}
#else
#define ext2_file_mmap generic_file_mmap
#endif
/*
* Called when filp is released. This happens when all file descriptors
* for a single struct file are closed. Note that different open() calls
* for the same file yield different struct file structures.
*/
static int ext2_release_file (struct inode * inode, struct file * filp)
{
if (filp->f_mode & FMODE_WRITE) {
mutex_lock(&EXT2_I(inode)->truncate_mutex);
ext2_discard_reservation(inode);
mutex_unlock(&EXT2_I(inode)->truncate_mutex);
}
return 0;
}
int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
int ret;
struct super_block *sb = file->f_mapping->host->i_sb;
ret = generic_buffers_fsync(file, start, end, datasync);
if (ret == -EIO)
/* We don't really know where the IO error happened... */
ext2_error(sb, __func__,
"detected IO error when writing metadata buffers");
return ret;
}
static ssize_t ext2_dio_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
ssize_t ret;
trace_ext2_dio_read_begin(iocb, to, 0);
inode_lock_shared(inode);
ret = iomap_dio_rw(iocb, to, &ext2_iomap_ops, NULL, 0, NULL, 0);
inode_unlock_shared(inode);
trace_ext2_dio_read_end(iocb, to, ret);
return ret;
}
static int ext2_dio_write_end_io(struct kiocb *iocb, ssize_t size,
int error, unsigned int flags)
{
loff_t pos = iocb->ki_pos;
struct inode *inode = file_inode(iocb->ki_filp);
if (error)
goto out;
/*
* If we are extending the file, we have to update i_size here before
* page cache gets invalidated in iomap_dio_rw(). This prevents racing
* buffered reads from zeroing out too much from page cache pages.
* Note that all extending writes always happens synchronously with
* inode lock held by ext2_dio_write_iter(). So it is safe to update
* inode size here for extending file writes.
*/
pos += size;
if (pos > i_size_read(inode)) {
i_size_write(inode, pos);
mark_inode_dirty(inode);
}
out:
trace_ext2_dio_write_endio(iocb, size, error);
return error;
}
static const struct iomap_dio_ops ext2_dio_write_ops = {
.end_io = ext2_dio_write_end_io,
};
static ssize_t ext2_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
ssize_t ret;
unsigned int flags = 0;
unsigned long blocksize = inode->i_sb->s_blocksize;
loff_t offset = iocb->ki_pos;
loff_t count = iov_iter_count(from);
ssize_t status = 0;
trace_ext2_dio_write_begin(iocb, from, 0);
inode_lock(inode);
ret = generic_write_checks(iocb, from);
if (ret <= 0)
goto out_unlock;
ret = kiocb_modified(iocb);
if (ret)
goto out_unlock;
/* use IOMAP_DIO_FORCE_WAIT for unaligned or extending writes */
if (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode) ||
(!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(from), blocksize)))
flags |= IOMAP_DIO_FORCE_WAIT;
ret = iomap_dio_rw(iocb, from, &ext2_iomap_ops, &ext2_dio_write_ops,
flags, NULL, 0);
/* ENOTBLK is magic return value for fallback to buffered-io */
if (ret == -ENOTBLK)
ret = 0;
if (ret < 0 && ret != -EIOCBQUEUED)
ext2_write_failed(inode->i_mapping, offset + count);
/* handle case for partial write and for fallback to buffered write */
if (ret >= 0 && iov_iter_count(from)) {
loff_t pos, endbyte;
int ret2;
iocb->ki_flags &= ~IOCB_DIRECT;
pos = iocb->ki_pos;
status = generic_perform_write(iocb, from);
if (unlikely(status < 0)) {
ret = status;
goto out_unlock;
}
iocb->ki_pos += status;
ret += status;
endbyte = pos + status - 1;
ret2 = filemap_write_and_wait_range(inode->i_mapping, pos,
endbyte);
if (!ret2)
invalidate_mapping_pages(inode->i_mapping,
pos >> PAGE_SHIFT,
endbyte >> PAGE_SHIFT);
if (ret > 0)
generic_write_sync(iocb, ret);
}
out_unlock:
inode_unlock(inode);
if (status)
trace_ext2_dio_write_buff_end(iocb, from, status);
trace_ext2_dio_write_end(iocb, from, ret);
return ret;
}
static ssize_t ext2_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
#ifdef CONFIG_FS_DAX
if (IS_DAX(iocb->ki_filp->f_mapping->host))
return ext2_dax_read_iter(iocb, to);
#endif
if (iocb->ki_flags & IOCB_DIRECT)
return ext2_dio_read_iter(iocb, to);
return generic_file_read_iter(iocb, to);
}
static ssize_t ext2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
#ifdef CONFIG_FS_DAX
if (IS_DAX(iocb->ki_filp->f_mapping->host))
return ext2_dax_write_iter(iocb, from);
#endif
if (iocb->ki_flags & IOCB_DIRECT)
return ext2_dio_write_iter(iocb, from);
return generic_file_write_iter(iocb, from);
}
const struct file_operations ext2_file_operations = {
.llseek = generic_file_llseek,
.read_iter = ext2_file_read_iter,
.write_iter = ext2_file_write_iter,
.unlocked_ioctl = ext2_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext2_compat_ioctl,
#endif
.mmap = ext2_file_mmap,
.open = dquot_file_open,
.release = ext2_release_file,
.fsync = ext2_fsync,
.get_unmapped_area = thp_get_unmapped_area,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
};
const struct inode_operations ext2_file_inode_operations = {
.listxattr = ext2_listxattr,
.getattr = ext2_getattr,
.setattr = ext2_setattr,
.get_inode_acl = ext2_get_acl,
.set_acl = ext2_set_acl,
.fiemap = ext2_fiemap,
.fileattr_get = ext2_fileattr_get,
.fileattr_set = ext2_fileattr_set,
};