linux-next/fs/hfsplus/wrapper.c
Linus Torvalds 70e7730c2a vfs-6.13.misc
-----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCZzcToAAKCRCRxhvAZXjc
 osL9AP948FFumJRC28gDJ4xp+X4eohNOfkgoEG8FTbF2zU6ulwD+O0pr26FqpFli
 pqlG+38UdATImpfqqWjPbb72sBYcfQg=
 =wLUh
 -----END PGP SIGNATURE-----

Merge tag 'vfs-6.13.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull misc vfs updates from Christian Brauner:
 "Features:

   - Fixup and improve NLM and kNFSD file lock callbacks

     Last year both GFS2 and OCFS2 had some work done to make their
     locking more robust when exported over NFS. Unfortunately, part of
     that work caused both NLM (for NFS v3 exports) and kNFSD (for
     NFSv4.1+ exports) to no longer send lock notifications to clients

     This in itself is not a huge problem because most NFS clients will
     still poll the server in order to acquire a conflicted lock

     It's important for NLM and kNFSD that they do not block their
     kernel threads inside filesystem's file_lock implementations
     because that can produce deadlocks. We used to make sure of this by
     only trusting that posix_lock_file() can correctly handle blocking
     lock calls asynchronously, so the lock managers would only setup
     their file_lock requests for async callbacks if the filesystem did
     not define its own lock() file operation

     However, when GFS2 and OCFS2 grew the capability to correctly
     handle blocking lock requests asynchronously, they started
     signalling this behavior with EXPORT_OP_ASYNC_LOCK, and the check
     for also trusting posix_lock_file() was inadvertently dropped, so
     now most filesystems no longer produce lock notifications when
     exported over NFS

     Fix this by using an fop_flag which greatly simplifies the problem
     and grooms the way for future uses by both filesystems and lock
     managers alike

   - Add a sysctl to delete the dentry when a file is removed instead of
     making it a negative dentry

     Commit 681ce86235 ("vfs: Delete the associated dentry when
     deleting a file") introduced an unconditional deletion of the
     associated dentry when a file is removed. However, this led to
     performance regressions in specific benchmarks, such as
     ilebench.sum_operations/s, prompting a revert in commit
     4a4be1ad3a ("Revert "vfs: Delete the associated dentry when
     deleting a file""). This reintroduces the concept conditionally
     through a sysctl

   - Expand the statmount() system call:

       * Report the filesystem subtype in a new fs_subtype field to
         e.g., report fuse filesystem subtypes

       * Report the superblock source in a new sb_source field

       * Add a new way to return filesystem specific mount options in an
         option array that returns filesystem specific mount options
         separated by zero bytes and unescaped. This allows caller's to
         retrieve filesystem specific mount options and immediately pass
         them to e.g., fsconfig() without having to unescape or split
         them

       * Report security (LSM) specific mount options in a separate
         security option array. We don't lump them together with
         filesystem specific mount options as security mount options are
         generic and most users aren't interested in them

         The format is the same as for the filesystem specific mount
         option array

   - Support relative paths in fsconfig()'s FSCONFIG_SET_STRING command

   - Optimize acl_permission_check() to avoid costly {g,u}id ownership
     checks if possible

   - Use smp_mb__after_spinlock() to avoid full smp_mb() in evict()

   - Add synchronous wakeup support for ep_poll_callback.

     Currently, epoll only uses wake_up() to wake up task. But sometimes
     there are epoll users which want to use the synchronous wakeup flag
     to give a hint to the scheduler, e.g., the Android binder driver.
     So add a wake_up_sync() define, and use wake_up_sync() when sync is
     true in ep_poll_callback()

  Fixes:

   - Fix kernel documentation for inode_insert5() and iget5_locked()

   - Annotate racy epoll check on file->f_ep

   - Make F_DUPFD_QUERY associative

   - Avoid filename buffer overrun in initramfs

   - Don't let statmount() return empty strings

   - Add a cond_resched() to dump_user_range() to avoid hogging the CPU

   - Don't query the device logical blocksize multiple times for hfsplus

   - Make filemap_read() check that the offset is positive or zero

  Cleanups:

   - Various typo fixes

   - Cleanup wbc_attach_fdatawrite_inode()

   - Add __releases annotation to wbc_attach_and_unlock_inode()

   - Add hugetlbfs tracepoints

   - Fix various vfs kernel doc parameters

   - Remove obsolete TODO comment from io_cancel()

   - Convert wbc_account_cgroup_owner() to take a folio

   - Fix comments for BANDWITH_INTERVAL and wb_domain_writeout_add()

   - Reorder struct posix_acl to save 8 bytes

   - Annotate struct posix_acl with __counted_by()

   - Replace one-element array with flexible array member in freevxfs

   - Use idiomatic atomic64_inc_return() in alloc_mnt_ns()"

* tag 'vfs-6.13.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (35 commits)
  statmount: retrieve security mount options
  vfs: make evict() use smp_mb__after_spinlock instead of smp_mb
  statmount: add flag to retrieve unescaped options
  fs: add the ability for statmount() to report the sb_source
  writeback: wbc_attach_fdatawrite_inode out of line
  writeback: add a __releases annoation to wbc_attach_and_unlock_inode
  fs: add the ability for statmount() to report the fs_subtype
  fs: don't let statmount return empty strings
  fs:aio: Remove TODO comment suggesting hash or array usage in io_cancel()
  hfsplus: don't query the device logical block size multiple times
  freevxfs: Replace one-element array with flexible array member
  fs: optimize acl_permission_check()
  initramfs: avoid filename buffer overrun
  fs/writeback: convert wbc_account_cgroup_owner to take a folio
  acl: Annotate struct posix_acl with __counted_by()
  acl: Realign struct posix_acl to save 8 bytes
  epoll: Add synchronous wakeup support for ep_poll_callback
  coredump: add cond_resched() to dump_user_range
  mm/page-writeback.c: Fix comment of wb_domain_writeout_add()
  mm/page-writeback.c: Update comment for BANDWIDTH_INTERVAL
  ...
2024-11-18 09:35:30 -08:00

269 lines
7.0 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* linux/fs/hfsplus/wrapper.c
*
* Copyright (C) 2001
* Brad Boyer (flar@allandria.com)
* (C) 2003 Ardis Technologies <roman@ardistech.com>
*
* Handling of HFS wrappers around HFS+ volumes
*/
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/cdrom.h>
#include <linux/unaligned.h>
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
struct hfsplus_wd {
u32 ablk_size;
u16 ablk_start;
u16 embed_start;
u16 embed_count;
};
/**
* hfsplus_submit_bio - Perform block I/O
* @sb: super block of volume for I/O
* @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes
* @buf: buffer for I/O
* @data: output pointer for location of requested data
* @opf: I/O operation type and flags
*
* The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than
* HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads
* @data will return a pointer to the start of the requested sector,
* which may not be the same location as @buf.
*
* If @sector is not aligned to the bdev logical block size it will
* be rounded down. For writes this means that @buf should contain data
* that starts at the rounded-down address. As long as the data was
* read using hfsplus_submit_bio() and the same buffer is used things
* will work correctly.
*
* Returns: %0 on success else -errno code
*/
int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
void *buf, void **data, blk_opf_t opf)
{
const enum req_op op = opf & REQ_OP_MASK;
struct bio *bio;
int ret = 0;
u64 io_size;
loff_t start;
int offset;
/*
* Align sector to hardware sector size and find offset. We
* assume that io_size is a power of two, which _should_
* be true.
*/
io_size = hfsplus_min_io_size(sb);
start = (loff_t)sector << HFSPLUS_SECTOR_SHIFT;
offset = start & (io_size - 1);
sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1);
bio = bio_alloc(sb->s_bdev, 1, opf, GFP_NOIO);
bio->bi_iter.bi_sector = sector;
if (op != REQ_OP_WRITE && data)
*data = (u8 *)buf + offset;
while (io_size > 0) {
unsigned int page_offset = offset_in_page(buf);
unsigned int len = min_t(unsigned int, PAGE_SIZE - page_offset,
io_size);
ret = bio_add_page(bio, virt_to_page(buf), len, page_offset);
if (ret != len) {
ret = -EIO;
goto out;
}
io_size -= len;
buf = (u8 *)buf + len;
}
ret = submit_bio_wait(bio);
out:
bio_put(bio);
return ret < 0 ? ret : 0;
}
static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
{
u32 extent;
u16 attrib;
__be16 sig;
sig = *(__be16 *)(bufptr + HFSP_WRAPOFF_EMBEDSIG);
if (sig != cpu_to_be16(HFSPLUS_VOLHEAD_SIG) &&
sig != cpu_to_be16(HFSPLUS_VOLHEAD_SIGX))
return 0;
attrib = be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ATTRIB));
if (!(attrib & HFSP_WRAP_ATTRIB_SLOCK) ||
!(attrib & HFSP_WRAP_ATTRIB_SPARED))
return 0;
wd->ablk_size =
be32_to_cpu(*(__be32 *)(bufptr + HFSP_WRAPOFF_ABLKSIZE));
if (wd->ablk_size < HFSPLUS_SECTOR_SIZE)
return 0;
if (wd->ablk_size % HFSPLUS_SECTOR_SIZE)
return 0;
wd->ablk_start =
be16_to_cpu(*(__be16 *)(bufptr + HFSP_WRAPOFF_ABLKSTART));
extent = get_unaligned_be32(bufptr + HFSP_WRAPOFF_EMBEDEXT);
wd->embed_start = (extent >> 16) & 0xFFFF;
wd->embed_count = extent & 0xFFFF;
return 1;
}
static int hfsplus_get_last_session(struct super_block *sb,
sector_t *start, sector_t *size)
{
struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk);
/* default values */
*start = 0;
*size = bdev_nr_sectors(sb->s_bdev);
if (HFSPLUS_SB(sb)->session >= 0) {
struct cdrom_tocentry te;
if (!cdi)
return -EINVAL;
te.cdte_track = HFSPLUS_SB(sb)->session;
te.cdte_format = CDROM_LBA;
if (cdrom_read_tocentry(cdi, &te) ||
(te.cdte_ctrl & CDROM_DATA_TRACK) != 4) {
pr_err("invalid session number or type of track\n");
return -EINVAL;
}
*start = (sector_t)te.cdte_addr.lba << 2;
} else if (cdi) {
struct cdrom_multisession ms_info;
ms_info.addr_format = CDROM_LBA;
if (cdrom_multisession(cdi, &ms_info) == 0 && ms_info.xa_flag)
*start = (sector_t)ms_info.addr.lba << 2;
}
return 0;
}
/* Find the volume header and fill in some minimum bits in superblock */
/* Takes in super block, returns true if good data read */
int hfsplus_read_wrapper(struct super_block *sb)
{
struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
struct hfsplus_wd wd;
sector_t part_start, part_size;
u32 blocksize;
int error = 0;
error = -EINVAL;
blocksize = sb_min_blocksize(sb, HFSPLUS_SECTOR_SIZE);
if (!blocksize)
goto out;
sbi->min_io_size = blocksize;
if (hfsplus_get_last_session(sb, &part_start, &part_size))
goto out;
error = -ENOMEM;
sbi->s_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
if (!sbi->s_vhdr_buf)
goto out;
sbi->s_backup_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
if (!sbi->s_backup_vhdr_buf)
goto out_free_vhdr;
reread:
error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR,
sbi->s_vhdr_buf, (void **)&sbi->s_vhdr,
REQ_OP_READ);
if (error)
goto out_free_backup_vhdr;
error = -EINVAL;
switch (sbi->s_vhdr->signature) {
case cpu_to_be16(HFSPLUS_VOLHEAD_SIGX):
set_bit(HFSPLUS_SB_HFSX, &sbi->flags);
fallthrough;
case cpu_to_be16(HFSPLUS_VOLHEAD_SIG):
break;
case cpu_to_be16(HFSP_WRAP_MAGIC):
if (!hfsplus_read_mdb(sbi->s_vhdr, &wd))
goto out_free_backup_vhdr;
wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT;
part_start += (sector_t)wd.ablk_start +
(sector_t)wd.embed_start * wd.ablk_size;
part_size = (sector_t)wd.embed_count * wd.ablk_size;
goto reread;
default:
/*
* Check for a partition block.
*
* (should do this only for cdrom/loop though)
*/
if (hfs_part_find(sb, &part_start, &part_size))
goto out_free_backup_vhdr;
goto reread;
}
error = hfsplus_submit_bio(sb, part_start + part_size - 2,
sbi->s_backup_vhdr_buf,
(void **)&sbi->s_backup_vhdr, REQ_OP_READ);
if (error)
goto out_free_backup_vhdr;
error = -EINVAL;
if (sbi->s_backup_vhdr->signature != sbi->s_vhdr->signature) {
pr_warn("invalid secondary volume header\n");
goto out_free_backup_vhdr;
}
blocksize = be32_to_cpu(sbi->s_vhdr->blocksize);
/*
* Block size must be at least as large as a sector and a multiple of 2.
*/
if (blocksize < HFSPLUS_SECTOR_SIZE || ((blocksize - 1) & blocksize))
goto out_free_backup_vhdr;
sbi->alloc_blksz = blocksize;
sbi->alloc_blksz_shift = ilog2(blocksize);
blocksize = min_t(u32, sbi->alloc_blksz, PAGE_SIZE);
/*
* Align block size to block offset.
*/
while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1))
blocksize >>= 1;
if (sb_set_blocksize(sb, blocksize) != blocksize) {
pr_err("unable to set blocksize to %u!\n", blocksize);
goto out_free_backup_vhdr;
}
sbi->blockoffset =
part_start >> (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT);
sbi->part_start = part_start;
sbi->sect_count = part_size;
sbi->fs_shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits;
return 0;
out_free_backup_vhdr:
kfree(sbi->s_backup_vhdr_buf);
out_free_vhdr:
kfree(sbi->s_vhdr_buf);
out:
return error;
}