mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2024-12-28 00:32:00 +00:00
Merge branch 'for_next' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs.git
This commit is contained in:
commit
8dad5129f0
@ -1257,7 +1257,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
|
||||
}
|
||||
reloc_func_desc = interp_load_addr;
|
||||
|
||||
allow_write_access(interpreter);
|
||||
exe_file_allow_write_access(interpreter);
|
||||
fput(interpreter);
|
||||
|
||||
kfree(interp_elf_ex);
|
||||
@ -1354,7 +1354,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
|
||||
kfree(interp_elf_ex);
|
||||
kfree(interp_elf_phdata);
|
||||
out_free_file:
|
||||
allow_write_access(interpreter);
|
||||
exe_file_allow_write_access(interpreter);
|
||||
if (interpreter)
|
||||
fput(interpreter);
|
||||
out_free_ph:
|
||||
|
@ -394,7 +394,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
|
||||
goto error;
|
||||
}
|
||||
|
||||
allow_write_access(interpreter);
|
||||
exe_file_allow_write_access(interpreter);
|
||||
fput(interpreter);
|
||||
interpreter = NULL;
|
||||
}
|
||||
@ -467,7 +467,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm)
|
||||
|
||||
error:
|
||||
if (interpreter) {
|
||||
allow_write_access(interpreter);
|
||||
exe_file_allow_write_access(interpreter);
|
||||
fput(interpreter);
|
||||
}
|
||||
kfree(interpreter_name);
|
||||
|
@ -2544,6 +2544,15 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't allow defrag on pre-content watched files, as it could
|
||||
* populate the page cache with 0's via readahead.
|
||||
*/
|
||||
if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (argp) {
|
||||
if (copy_from_user(&range, argp, sizeof(range))) {
|
||||
ret = -EFAULT;
|
||||
|
@ -961,7 +961,7 @@ static int btrfs_fill_super(struct super_block *sb,
|
||||
#endif
|
||||
sb->s_xattr = btrfs_xattr_handlers;
|
||||
sb->s_time_gran = 1;
|
||||
sb->s_iflags |= SB_I_CGROUPWB;
|
||||
sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM;
|
||||
|
||||
err = super_setup_bdi(sb);
|
||||
if (err) {
|
||||
|
@ -912,7 +912,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
|
||||
path_noexec(&file->f_path))
|
||||
return ERR_PTR(-EACCES);
|
||||
|
||||
err = deny_write_access(file);
|
||||
err = exe_file_deny_write_access(file);
|
||||
if (err)
|
||||
return ERR_PTR(err);
|
||||
|
||||
@ -927,7 +927,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
|
||||
* Returns ERR_PTR on failure or allocated struct file on success.
|
||||
*
|
||||
* As this is a wrapper for the internal do_open_execat(), callers
|
||||
* must call allow_write_access() before fput() on release. Also see
|
||||
* must call exe_file_allow_write_access() before fput() on release. Also see
|
||||
* do_close_execat().
|
||||
*/
|
||||
struct file *open_exec(const char *name)
|
||||
@ -1471,7 +1471,7 @@ static void do_close_execat(struct file *file)
|
||||
{
|
||||
if (!file)
|
||||
return;
|
||||
allow_write_access(file);
|
||||
exe_file_allow_write_access(file);
|
||||
fput(file);
|
||||
}
|
||||
|
||||
@ -1797,7 +1797,7 @@ static int exec_binprm(struct linux_binprm *bprm)
|
||||
bprm->file = bprm->interpreter;
|
||||
bprm->interpreter = NULL;
|
||||
|
||||
allow_write_access(exec);
|
||||
exe_file_allow_write_access(exec);
|
||||
if (unlikely(bprm->have_execfd)) {
|
||||
if (bprm->executable) {
|
||||
fput(exec);
|
||||
|
@ -756,6 +756,9 @@ static vm_fault_t ext4_dax_huge_fault(struct vm_fault *vmf, unsigned int order)
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
} else {
|
||||
result = filemap_fsnotify_fault(vmf);
|
||||
if (unlikely(result))
|
||||
return result;
|
||||
filemap_invalidate_lock_shared(mapping);
|
||||
}
|
||||
result = dax_iomap_fault(vmf, order, &pfn, &error, &ext4_iomap_ops);
|
||||
|
@ -5312,6 +5312,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
|
||||
/* i_version is always enabled now */
|
||||
sb->s_flags |= SB_I_VERSION;
|
||||
|
||||
/* HSM events are allowed by default. */
|
||||
sb->s_iflags |= SB_I_ALLOW_HSM;
|
||||
|
||||
err = ext4_check_feature_compatibility(sb, es, silent);
|
||||
if (err)
|
||||
goto failed_mount;
|
||||
|
@ -1158,10 +1158,10 @@ static int __init fcntl_init(void)
|
||||
* Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
|
||||
* is defined as O_NONBLOCK on some platforms and not on others.
|
||||
*/
|
||||
BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
|
||||
BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ !=
|
||||
HWEIGHT32(
|
||||
(VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
|
||||
__FMODE_EXEC | __FMODE_NONOTIFY));
|
||||
__FMODE_EXEC));
|
||||
|
||||
fasync_cache = kmem_cache_create("fasync_cache",
|
||||
sizeof(struct fasync_struct), 0,
|
||||
|
@ -301,7 +301,6 @@ static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount,
|
||||
*/
|
||||
static int zisofs_read_folio(struct file *file, struct folio *folio)
|
||||
{
|
||||
struct page *page = &folio->page;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
int err;
|
||||
@ -311,16 +310,15 @@ static int zisofs_read_folio(struct file *file, struct folio *folio)
|
||||
PAGE_SHIFT <= zisofs_block_shift ?
|
||||
(1 << (zisofs_block_shift - PAGE_SHIFT)) : 0;
|
||||
struct page **pages;
|
||||
pgoff_t index = page->index, end_index;
|
||||
pgoff_t index = folio->index, end_index;
|
||||
|
||||
end_index = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
/*
|
||||
* If this page is wholly outside i_size we just return zero;
|
||||
* If this folio is wholly outside i_size we just return zero;
|
||||
* do_generic_file_read() will handle this for us
|
||||
*/
|
||||
if (index >= end_index) {
|
||||
SetPageUptodate(page);
|
||||
unlock_page(page);
|
||||
folio_end_read(folio, true);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -338,10 +336,10 @@ static int zisofs_read_folio(struct file *file, struct folio *folio)
|
||||
pages = kcalloc(max_t(unsigned int, zisofs_pages_per_cblock, 1),
|
||||
sizeof(*pages), GFP_KERNEL);
|
||||
if (!pages) {
|
||||
unlock_page(page);
|
||||
folio_unlock(folio);
|
||||
return -ENOMEM;
|
||||
}
|
||||
pages[full_page] = page;
|
||||
pages[full_page] = &folio->page;
|
||||
|
||||
for (i = 0; i < pcount; i++, index++) {
|
||||
if (i != full_page)
|
||||
|
@ -223,7 +223,7 @@ static int fanotify_get_response(struct fsnotify_group *group,
|
||||
struct fanotify_perm_event *event,
|
||||
struct fsnotify_iter_info *iter_info)
|
||||
{
|
||||
int ret;
|
||||
int ret, errno;
|
||||
|
||||
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
|
||||
|
||||
@ -262,14 +262,23 @@ static int fanotify_get_response(struct fsnotify_group *group,
|
||||
ret = 0;
|
||||
break;
|
||||
case FAN_DENY:
|
||||
/* Check custom errno from pre-content events */
|
||||
errno = fanotify_get_response_errno(event->response);
|
||||
if (errno) {
|
||||
ret = -errno;
|
||||
break;
|
||||
}
|
||||
fallthrough;
|
||||
default:
|
||||
ret = -EPERM;
|
||||
}
|
||||
|
||||
/* Check if the response should be audited */
|
||||
if (event->response & FAN_AUDIT)
|
||||
audit_fanotify(event->response & ~FAN_AUDIT,
|
||||
&event->audit_rule);
|
||||
if (event->response & FAN_AUDIT) {
|
||||
u32 response = event->response &
|
||||
(FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS);
|
||||
audit_fanotify(response & ~FAN_AUDIT, &event->audit_rule);
|
||||
}
|
||||
|
||||
pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
|
||||
group, event, ret);
|
||||
@ -548,9 +557,13 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path,
|
||||
return &pevent->fae;
|
||||
}
|
||||
|
||||
static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path,
|
||||
static struct fanotify_event *fanotify_alloc_perm_event(const void *data,
|
||||
int data_type,
|
||||
gfp_t gfp)
|
||||
{
|
||||
const struct path *path = fsnotify_data_path(data, data_type);
|
||||
const struct file_range *range =
|
||||
fsnotify_data_file_range(data, data_type);
|
||||
struct fanotify_perm_event *pevent;
|
||||
|
||||
pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp);
|
||||
@ -564,6 +577,9 @@ static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path,
|
||||
pevent->hdr.len = 0;
|
||||
pevent->state = FAN_EVENT_INIT;
|
||||
pevent->path = *path;
|
||||
/* NULL ppos means no range info */
|
||||
pevent->ppos = range ? &range->pos : NULL;
|
||||
pevent->count = range ? range->count : 0;
|
||||
path_get(path);
|
||||
|
||||
return &pevent->fae;
|
||||
@ -801,7 +817,7 @@ static struct fanotify_event *fanotify_alloc_event(
|
||||
old_memcg = set_active_memcg(group->memcg);
|
||||
|
||||
if (fanotify_is_perm_event(mask)) {
|
||||
event = fanotify_alloc_perm_event(path, gfp);
|
||||
event = fanotify_alloc_perm_event(data, data_type, gfp);
|
||||
} else if (fanotify_is_error_event(mask)) {
|
||||
event = fanotify_alloc_error_event(group, fsid, data,
|
||||
data_type, &hash);
|
||||
@ -909,8 +925,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
|
||||
BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
|
||||
BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR);
|
||||
BUILD_BUG_ON(FAN_RENAME != FS_RENAME);
|
||||
BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS);
|
||||
|
||||
BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21);
|
||||
BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22);
|
||||
|
||||
mask = fanotify_group_event_mask(group, iter_info, &match_mask,
|
||||
mask, data, data_type, dir);
|
||||
|
@ -425,6 +425,8 @@ FANOTIFY_PE(struct fanotify_event *event)
|
||||
struct fanotify_perm_event {
|
||||
struct fanotify_event fae;
|
||||
struct path path;
|
||||
const loff_t *ppos; /* optional file range info */
|
||||
size_t count;
|
||||
u32 response; /* userspace answer to the event */
|
||||
unsigned short state; /* state of the event */
|
||||
int fd; /* fd we passed to userspace for this event */
|
||||
@ -446,6 +448,14 @@ static inline bool fanotify_is_perm_event(u32 mask)
|
||||
mask & FANOTIFY_PERM_EVENTS;
|
||||
}
|
||||
|
||||
static inline bool fanotify_event_has_access_range(struct fanotify_event *event)
|
||||
{
|
||||
if (!(event->mask & FANOTIFY_PRE_CONTENT_EVENTS))
|
||||
return false;
|
||||
|
||||
return FANOTIFY_PERM(event)->ppos;
|
||||
}
|
||||
|
||||
static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse)
|
||||
{
|
||||
return container_of(fse, struct fanotify_event, fse);
|
||||
@ -518,3 +528,8 @@ static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
|
||||
|
||||
return mflags;
|
||||
}
|
||||
|
||||
static inline u32 fanotify_get_response_errno(int res)
|
||||
{
|
||||
return (res >> FAN_ERRNO_SHIFT) & FAN_ERRNO_MASK;
|
||||
}
|
||||
|
@ -100,8 +100,7 @@ static void __init fanotify_sysctls_init(void)
|
||||
*
|
||||
* Internal and external open flags are stored together in field f_flags of
|
||||
* struct file. Only external open flags shall be allowed in event_f_flags.
|
||||
* Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be
|
||||
* excluded.
|
||||
* Internal flags like FMODE_EXEC shall be excluded.
|
||||
*/
|
||||
#define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \
|
||||
O_ACCMODE | O_APPEND | O_NONBLOCK | \
|
||||
@ -118,10 +117,12 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init;
|
||||
#define FANOTIFY_EVENT_ALIGN 4
|
||||
#define FANOTIFY_FID_INFO_HDR_LEN \
|
||||
(sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle))
|
||||
#define FANOTIFY_PIDFD_INFO_HDR_LEN \
|
||||
#define FANOTIFY_PIDFD_INFO_LEN \
|
||||
sizeof(struct fanotify_event_info_pidfd)
|
||||
#define FANOTIFY_ERROR_INFO_LEN \
|
||||
(sizeof(struct fanotify_event_info_error))
|
||||
#define FANOTIFY_RANGE_INFO_LEN \
|
||||
(sizeof(struct fanotify_event_info_range))
|
||||
|
||||
static int fanotify_fid_info_len(int fh_len, int name_len)
|
||||
{
|
||||
@ -159,9 +160,6 @@ static size_t fanotify_event_len(unsigned int info_mode,
|
||||
int fh_len;
|
||||
int dot_len = 0;
|
||||
|
||||
if (!info_mode)
|
||||
return event_len;
|
||||
|
||||
if (fanotify_is_error_event(event->mask))
|
||||
event_len += FANOTIFY_ERROR_INFO_LEN;
|
||||
|
||||
@ -176,14 +174,17 @@ static size_t fanotify_event_len(unsigned int info_mode,
|
||||
dot_len = 1;
|
||||
}
|
||||
|
||||
if (info_mode & FAN_REPORT_PIDFD)
|
||||
event_len += FANOTIFY_PIDFD_INFO_HDR_LEN;
|
||||
|
||||
if (fanotify_event_has_object_fh(event)) {
|
||||
fh_len = fanotify_event_object_fh_len(event);
|
||||
event_len += fanotify_fid_info_len(fh_len, dot_len);
|
||||
}
|
||||
|
||||
if (info_mode & FAN_REPORT_PIDFD)
|
||||
event_len += FANOTIFY_PIDFD_INFO_LEN;
|
||||
|
||||
if (fanotify_event_has_access_range(event))
|
||||
event_len += FANOTIFY_RANGE_INFO_LEN;
|
||||
|
||||
return event_len;
|
||||
}
|
||||
|
||||
@ -258,12 +259,11 @@ static int create_fd(struct fsnotify_group *group, const struct path *path,
|
||||
return client_fd;
|
||||
|
||||
/*
|
||||
* we need a new file handle for the userspace program so it can read even if it was
|
||||
* originally opened O_WRONLY.
|
||||
* We provide an fd for the userspace program, so it could access the
|
||||
* file without generating fanotify events itself.
|
||||
*/
|
||||
new_file = dentry_open(path,
|
||||
group->fanotify_data.f_flags | __FMODE_NONOTIFY,
|
||||
current_cred());
|
||||
new_file = dentry_open_nonotify(path, group->fanotify_data.f_flags,
|
||||
current_cred());
|
||||
if (IS_ERR(new_file)) {
|
||||
put_unused_fd(client_fd);
|
||||
client_fd = PTR_ERR(new_file);
|
||||
@ -327,11 +327,12 @@ static int process_access_response(struct fsnotify_group *group,
|
||||
struct fanotify_perm_event *event;
|
||||
int fd = response_struct->fd;
|
||||
u32 response = response_struct->response;
|
||||
int errno = fanotify_get_response_errno(response);
|
||||
int ret = info_len;
|
||||
struct fanotify_response_info_audit_rule friar;
|
||||
|
||||
pr_debug("%s: group=%p fd=%d response=%u buf=%p size=%zu\n", __func__,
|
||||
group, fd, response, info, info_len);
|
||||
pr_debug("%s: group=%p fd=%d response=%x errno=%d buf=%p size=%zu\n",
|
||||
__func__, group, fd, response, errno, info, info_len);
|
||||
/*
|
||||
* make sure the response is valid, if invalid we do nothing and either
|
||||
* userspace can send a valid response or we will clean it up after the
|
||||
@ -342,7 +343,31 @@ static int process_access_response(struct fsnotify_group *group,
|
||||
|
||||
switch (response & FANOTIFY_RESPONSE_ACCESS) {
|
||||
case FAN_ALLOW:
|
||||
if (errno)
|
||||
return -EINVAL;
|
||||
break;
|
||||
case FAN_DENY:
|
||||
/* Custom errno is supported only for pre-content groups */
|
||||
if (errno && group->priority != FSNOTIFY_PRIO_PRE_CONTENT)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Limit errno to values expected on open(2)/read(2)/write(2)
|
||||
* of regular files.
|
||||
*/
|
||||
switch (errno) {
|
||||
case 0:
|
||||
case EIO:
|
||||
case EPERM:
|
||||
case EBUSY:
|
||||
case ETXTBSY:
|
||||
case EAGAIN:
|
||||
case ENOSPC:
|
||||
case EDQUOT:
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
@ -506,7 +531,7 @@ static int copy_pidfd_info_to_user(int pidfd,
|
||||
size_t count)
|
||||
{
|
||||
struct fanotify_event_info_pidfd info = { };
|
||||
size_t info_len = FANOTIFY_PIDFD_INFO_HDR_LEN;
|
||||
size_t info_len = FANOTIFY_PIDFD_INFO_LEN;
|
||||
|
||||
if (WARN_ON_ONCE(info_len > count))
|
||||
return -EFAULT;
|
||||
@ -521,6 +546,30 @@ static int copy_pidfd_info_to_user(int pidfd,
|
||||
return info_len;
|
||||
}
|
||||
|
||||
static size_t copy_range_info_to_user(struct fanotify_event *event,
|
||||
char __user *buf, int count)
|
||||
{
|
||||
struct fanotify_perm_event *pevent = FANOTIFY_PERM(event);
|
||||
struct fanotify_event_info_range info = { };
|
||||
size_t info_len = FANOTIFY_RANGE_INFO_LEN;
|
||||
|
||||
if (WARN_ON_ONCE(info_len > count))
|
||||
return -EFAULT;
|
||||
|
||||
if (WARN_ON_ONCE(!pevent->ppos))
|
||||
return -EINVAL;
|
||||
|
||||
info.hdr.info_type = FAN_EVENT_INFO_TYPE_RANGE;
|
||||
info.hdr.len = info_len;
|
||||
info.offset = *(pevent->ppos);
|
||||
info.count = pevent->count;
|
||||
|
||||
if (copy_to_user(buf, &info, info_len))
|
||||
return -EFAULT;
|
||||
|
||||
return info_len;
|
||||
}
|
||||
|
||||
static int copy_info_records_to_user(struct fanotify_event *event,
|
||||
struct fanotify_info *info,
|
||||
unsigned int info_mode, int pidfd,
|
||||
@ -642,6 +691,15 @@ static int copy_info_records_to_user(struct fanotify_event *event,
|
||||
total_bytes += ret;
|
||||
}
|
||||
|
||||
if (fanotify_event_has_access_range(event)) {
|
||||
ret = copy_range_info_to_user(event, buf, count);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
buf += ret;
|
||||
count -= ret;
|
||||
total_bytes += ret;
|
||||
}
|
||||
|
||||
return total_bytes;
|
||||
}
|
||||
|
||||
@ -756,12 +814,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
|
||||
buf += FAN_EVENT_METADATA_LEN;
|
||||
count -= FAN_EVENT_METADATA_LEN;
|
||||
|
||||
if (info_mode) {
|
||||
ret = copy_info_records_to_user(event, info, info_mode, pidfd,
|
||||
buf, count);
|
||||
if (ret < 0)
|
||||
goto out_close_fd;
|
||||
}
|
||||
ret = copy_info_records_to_user(event, info, info_mode, pidfd,
|
||||
buf, count);
|
||||
if (ret < 0)
|
||||
goto out_close_fd;
|
||||
|
||||
if (f)
|
||||
fd_install(fd, f);
|
||||
@ -1294,7 +1350,7 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group)
|
||||
}
|
||||
|
||||
static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
|
||||
unsigned int fan_flags)
|
||||
__u32 mask, unsigned int fan_flags)
|
||||
{
|
||||
/*
|
||||
* Non evictable mark cannot be downgraded to evictable mark.
|
||||
@ -1321,6 +1377,11 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
|
||||
fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
|
||||
return -EEXIST;
|
||||
|
||||
/* For now pre-content events are not generated for directories */
|
||||
mask |= fsn_mark->mask;
|
||||
if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
|
||||
return -EEXIST;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1347,7 +1408,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
|
||||
/*
|
||||
* Check if requested mark flags conflict with an existing mark flags.
|
||||
*/
|
||||
ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags);
|
||||
ret = fanotify_may_update_existing_mark(fsn_mark, mask, fan_flags);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -1409,6 +1470,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
|
||||
unsigned int fid_mode = flags & FANOTIFY_FID_BITS;
|
||||
unsigned int class = flags & FANOTIFY_CLASS_BITS;
|
||||
unsigned int internal_flags = 0;
|
||||
struct file *file;
|
||||
|
||||
pr_debug("%s: flags=%x event_f_flags=%x\n",
|
||||
__func__, flags, event_f_flags);
|
||||
@ -1477,7 +1539,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
|
||||
(!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID)))
|
||||
return -EINVAL;
|
||||
|
||||
f_flags = O_RDWR | __FMODE_NONOTIFY;
|
||||
f_flags = O_RDWR;
|
||||
if (flags & FAN_CLOEXEC)
|
||||
f_flags |= O_CLOEXEC;
|
||||
if (flags & FAN_NONBLOCK)
|
||||
@ -1555,10 +1617,18 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
|
||||
goto out_destroy_group;
|
||||
}
|
||||
|
||||
fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
|
||||
fd = get_unused_fd_flags(f_flags);
|
||||
if (fd < 0)
|
||||
goto out_destroy_group;
|
||||
|
||||
file = anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, group,
|
||||
f_flags, FMODE_NONOTIFY);
|
||||
if (IS_ERR(file)) {
|
||||
fd = PTR_ERR(file);
|
||||
put_unused_fd(fd);
|
||||
goto out_destroy_group;
|
||||
}
|
||||
fd_install(fd, file);
|
||||
return fd;
|
||||
|
||||
out_destroy_group:
|
||||
@ -1638,11 +1708,23 @@ static int fanotify_events_supported(struct fsnotify_group *group,
|
||||
unsigned int flags)
|
||||
{
|
||||
unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
|
||||
bool is_dir = d_is_dir(path->dentry);
|
||||
/* Strict validation of events in non-dir inode mask with v5.17+ APIs */
|
||||
bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) ||
|
||||
(mask & FAN_RENAME) ||
|
||||
(flags & FAN_MARK_IGNORE);
|
||||
|
||||
/*
|
||||
* Filesystems need to opt-into pre-content evnets (a.k.a HSM)
|
||||
* and they are only supported on regular files and directories.
|
||||
*/
|
||||
if (mask & FANOTIFY_PRE_CONTENT_EVENTS) {
|
||||
if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM))
|
||||
return -EOPNOTSUPP;
|
||||
if (!is_dir && !d_is_reg(path->dentry))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Some filesystems such as 'proc' acquire unusual locks when opening
|
||||
* files. For them fanotify permission events have high chances of
|
||||
@ -1675,7 +1757,7 @@ static int fanotify_events_supported(struct fsnotify_group *group,
|
||||
* but because we always allowed it, error only when using new APIs.
|
||||
*/
|
||||
if (strict_dir_events && mark_type == FAN_MARK_INODE &&
|
||||
!d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
|
||||
!is_dir && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
|
||||
return -ENOTDIR;
|
||||
|
||||
return 0;
|
||||
@ -1776,10 +1858,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
|
||||
return -EPERM;
|
||||
|
||||
/*
|
||||
* Permission events require minimum priority FAN_CLASS_CONTENT.
|
||||
* Permission events are not allowed for FAN_CLASS_NOTIF.
|
||||
* Pre-content permission events are not allowed for FAN_CLASS_CONTENT.
|
||||
*/
|
||||
if (mask & FANOTIFY_PERM_EVENTS &&
|
||||
group->priority < FSNOTIFY_PRIO_CONTENT)
|
||||
group->priority == FSNOTIFY_PRIO_NORMAL)
|
||||
return -EINVAL;
|
||||
else if (mask & FANOTIFY_PRE_CONTENT_EVENTS &&
|
||||
group->priority == FSNOTIFY_PRIO_CONTENT)
|
||||
return -EINVAL;
|
||||
|
||||
if (mask & FAN_FS_ERROR &&
|
||||
@ -1814,6 +1900,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
|
||||
if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME))
|
||||
return -EINVAL;
|
||||
|
||||
/* Pre-content events are not currently generated for directories. */
|
||||
if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR)
|
||||
return -EINVAL;
|
||||
|
||||
if (mark_cmd == FAN_MARK_FLUSH) {
|
||||
if (mark_type == FAN_MARK_MOUNT)
|
||||
fsnotify_clear_vfsmount_marks_by_group(group);
|
||||
|
@ -193,7 +193,7 @@ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask,
|
||||
return mask & marks_mask;
|
||||
}
|
||||
|
||||
/* Are there any inode/mount/sb objects that are interested in this event? */
|
||||
/* Are there any inode/mount/sb objects that watch for these events? */
|
||||
static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask,
|
||||
__u32 mask)
|
||||
{
|
||||
@ -203,6 +203,24 @@ static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask,
|
||||
return mask & marks_mask & ALL_FSNOTIFY_EVENTS;
|
||||
}
|
||||
|
||||
/* Report pre-content event with optional range info */
|
||||
int fsnotify_pre_content(const struct path *path, const loff_t *ppos,
|
||||
size_t count)
|
||||
{
|
||||
struct file_range range;
|
||||
|
||||
/* Report page aligned range only when pos is known */
|
||||
if (!ppos)
|
||||
return fsnotify_path(path, FS_PRE_ACCESS);
|
||||
|
||||
range.path = path;
|
||||
range.pos = PAGE_ALIGN_DOWN(*ppos);
|
||||
range.count = PAGE_ALIGN(*ppos + count) - range.pos;
|
||||
|
||||
return fsnotify_parent(path->dentry, FS_PRE_ACCESS, &range,
|
||||
FSNOTIFY_EVENT_FILE_RANGE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Notify this dentry's parent about a child's events with child name info
|
||||
* if parent is watching or if inode/sb/mount are interested in events with
|
||||
@ -623,11 +641,72 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(fsnotify);
|
||||
|
||||
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
|
||||
/*
|
||||
* At open time we check fsnotify_sb_has_priority_watchers() and set the
|
||||
* FMODE_NONOTIFY_ mode bits accordignly.
|
||||
* Later, fsnotify permission hooks do not check if there are permission event
|
||||
* watches, but that there were permission event watches at open time.
|
||||
*/
|
||||
void file_set_fsnotify_mode(struct file *file)
|
||||
{
|
||||
struct dentry *dentry = file->f_path.dentry, *parent;
|
||||
struct super_block *sb = dentry->d_sb;
|
||||
__u32 mnt_mask, p_mask;
|
||||
|
||||
/* Is it a file opened by fanotify? */
|
||||
if (FMODE_FSNOTIFY_NONE(file->f_mode))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Permission events is a super set of pre-content events, so if there
|
||||
* are no permission event watchers, there are also no pre-content event
|
||||
* watchers and this is implied from the single FMODE_NONOTIFY_PERM bit.
|
||||
*/
|
||||
if (likely(!fsnotify_sb_has_priority_watchers(sb,
|
||||
FSNOTIFY_PRIO_CONTENT))) {
|
||||
file->f_mode |= FMODE_NONOTIFY_PERM;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there are permission event watchers but no pre-content event
|
||||
* watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that.
|
||||
*/
|
||||
if ((!d_is_dir(dentry) && !d_is_reg(dentry)) ||
|
||||
likely(!fsnotify_sb_has_priority_watchers(sb,
|
||||
FSNOTIFY_PRIO_PRE_CONTENT))) {
|
||||
file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* OK, there are some pre-content watchers. Check if anybody is
|
||||
* watching for pre-content events on *this* file.
|
||||
*/
|
||||
mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask);
|
||||
if (unlikely(fsnotify_object_watched(d_inode(dentry), mnt_mask,
|
||||
FSNOTIFY_PRE_CONTENT_EVENTS)))
|
||||
return;
|
||||
|
||||
/* Is parent watching for pre-content events on this file? */
|
||||
if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) {
|
||||
parent = dget_parent(dentry);
|
||||
p_mask = fsnotify_inode_watches_children(d_inode(parent));
|
||||
dput(parent);
|
||||
if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS)
|
||||
return;
|
||||
}
|
||||
/* Nobody watching for pre-content events from this file */
|
||||
file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM;
|
||||
}
|
||||
#endif
|
||||
|
||||
static __init int fsnotify_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23);
|
||||
BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 24);
|
||||
|
||||
ret = init_srcu_struct(&fsnotify_mark_srcu);
|
||||
if (ret)
|
||||
|
@ -121,7 +121,7 @@ int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
|
||||
event->sync_cookie = cookie;
|
||||
event->name_len = len;
|
||||
if (len)
|
||||
strcpy(event->name, name->name);
|
||||
strscpy(event->name, name->name, event->name_len + 1);
|
||||
|
||||
ret = fsnotify_add_event(group, fsn_event, inotify_merge);
|
||||
if (ret) {
|
||||
|
62
fs/open.c
62
fs/open.c
@ -81,14 +81,18 @@ long vfs_truncate(const struct path *path, loff_t length)
|
||||
if (!S_ISREG(inode->i_mode))
|
||||
return -EINVAL;
|
||||
|
||||
error = mnt_want_write(path->mnt);
|
||||
if (error)
|
||||
goto out;
|
||||
|
||||
idmap = mnt_idmap(path->mnt);
|
||||
error = inode_permission(idmap, inode, MAY_WRITE);
|
||||
if (error)
|
||||
goto mnt_drop_write_and_out;
|
||||
return error;
|
||||
|
||||
error = fsnotify_truncate_perm(path, length);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = mnt_want_write(path->mnt);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = -EPERM;
|
||||
if (IS_APPEND(inode))
|
||||
@ -114,7 +118,7 @@ long vfs_truncate(const struct path *path, loff_t length)
|
||||
put_write_access(inode);
|
||||
mnt_drop_write_and_out:
|
||||
mnt_drop_write(path->mnt);
|
||||
out:
|
||||
|
||||
return error;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(vfs_truncate);
|
||||
@ -175,11 +179,18 @@ long do_ftruncate(struct file *file, loff_t length, int small)
|
||||
/* Check IS_APPEND on real upper inode */
|
||||
if (IS_APPEND(file_inode(file)))
|
||||
return -EPERM;
|
||||
sb_start_write(inode->i_sb);
|
||||
|
||||
error = security_file_truncate(file);
|
||||
if (!error)
|
||||
error = do_truncate(file_mnt_idmap(file), dentry, length,
|
||||
ATTR_MTIME | ATTR_CTIME, file);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = fsnotify_truncate_perm(&file->f_path, length);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
sb_start_write(inode->i_sb);
|
||||
error = do_truncate(file_mnt_idmap(file), dentry, length,
|
||||
ATTR_MTIME | ATTR_CTIME, file);
|
||||
sb_end_write(inode->i_sb);
|
||||
|
||||
return error;
|
||||
@ -901,7 +912,7 @@ static int do_dentry_open(struct file *f,
|
||||
f->f_sb_err = file_sample_sb_err(f);
|
||||
|
||||
if (unlikely(f->f_flags & O_PATH)) {
|
||||
f->f_mode = FMODE_PATH | FMODE_OPENED;
|
||||
f->f_mode = FMODE_PATH | FMODE_OPENED | FMODE_NONOTIFY;
|
||||
f->f_op = &empty_fops;
|
||||
return 0;
|
||||
}
|
||||
@ -929,6 +940,12 @@ static int do_dentry_open(struct file *f,
|
||||
if (error)
|
||||
goto cleanup_all;
|
||||
|
||||
/*
|
||||
* Set FMODE_NONOTIFY_* bits according to existing permission watches.
|
||||
* If FMODE_NONOTIFY was already set for an fanotify fd, this doesn't
|
||||
* change anything.
|
||||
*/
|
||||
file_set_fsnotify_mode(f);
|
||||
error = fsnotify_open_perm(f);
|
||||
if (error)
|
||||
goto cleanup_all;
|
||||
@ -1105,6 +1122,23 @@ struct file *dentry_open(const struct path *path, int flags,
|
||||
}
|
||||
EXPORT_SYMBOL(dentry_open);
|
||||
|
||||
struct file *dentry_open_nonotify(const struct path *path, int flags,
|
||||
const struct cred *cred)
|
||||
{
|
||||
struct file *f = alloc_empty_file(flags, cred);
|
||||
if (!IS_ERR(f)) {
|
||||
int error;
|
||||
|
||||
f->f_mode |= FMODE_NONOTIFY;
|
||||
error = vfs_open(path, f);
|
||||
if (error) {
|
||||
fput(f);
|
||||
f = ERR_PTR(error);
|
||||
}
|
||||
}
|
||||
return f;
|
||||
}
|
||||
|
||||
/**
|
||||
* dentry_create - Create and open a file
|
||||
* @path: path to create
|
||||
@ -1202,7 +1236,7 @@ inline struct open_how build_open_how(int flags, umode_t mode)
|
||||
inline int build_open_flags(const struct open_how *how, struct open_flags *op)
|
||||
{
|
||||
u64 flags = how->flags;
|
||||
u64 strip = __FMODE_NONOTIFY | O_CLOEXEC;
|
||||
u64 strip = O_CLOEXEC;
|
||||
int lookup_flags = 0;
|
||||
int acc_mode = ACC_MODE(flags);
|
||||
|
||||
@ -1210,9 +1244,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
|
||||
"struct open_flags doesn't yet handle flags > 32 bits");
|
||||
|
||||
/*
|
||||
* Strip flags that either shouldn't be set by userspace like
|
||||
* FMODE_NONOTIFY or that aren't relevant in determining struct
|
||||
* open_flags like O_CLOEXEC.
|
||||
* Strip flags that aren't relevant in determining struct open_flags.
|
||||
*/
|
||||
flags &= ~strip;
|
||||
|
||||
|
@ -1451,6 +1451,9 @@ xfs_dax_read_fault(
|
||||
|
||||
trace_xfs_read_fault(ip, order);
|
||||
|
||||
ret = filemap_fsnotify_fault(vmf);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
|
||||
ret = xfs_dax_fault_locked(vmf, order, false);
|
||||
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
|
||||
@ -1479,6 +1482,16 @@ xfs_write_fault(
|
||||
vm_fault_t ret;
|
||||
|
||||
trace_xfs_write_fault(ip, order);
|
||||
/*
|
||||
* Usually we get here from ->page_mkwrite callback but in case of DAX
|
||||
* we will get here also for ordinary write fault. Handle HSM
|
||||
* notifications for that case.
|
||||
*/
|
||||
if (IS_DAX(inode)) {
|
||||
ret = filemap_fsnotify_fault(vmf);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
}
|
||||
|
||||
sb_start_pagefault(inode->i_sb);
|
||||
file_update_time(vmf->vma->vm_file);
|
||||
|
@ -1730,7 +1730,7 @@ xfs_fs_fill_super(
|
||||
sb->s_time_max = XFS_LEGACY_TIME_MAX;
|
||||
}
|
||||
trace_xfs_inode_timestamp_range(mp, sb->s_time_min, sb->s_time_max);
|
||||
sb->s_iflags |= SB_I_CGROUPWB;
|
||||
sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM;
|
||||
|
||||
set_posix_acl_flag(sb);
|
||||
|
||||
|
@ -89,6 +89,16 @@
|
||||
#define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \
|
||||
FAN_RENAME)
|
||||
|
||||
/* Content events can be used to inspect file content */
|
||||
#define FANOTIFY_CONTENT_PERM_EVENTS (FAN_OPEN_PERM | FAN_OPEN_EXEC_PERM | \
|
||||
FAN_ACCESS_PERM)
|
||||
/* Pre-content events can be used to fill file content */
|
||||
#define FANOTIFY_PRE_CONTENT_EVENTS (FAN_PRE_ACCESS)
|
||||
|
||||
/* Events that require a permission response from user */
|
||||
#define FANOTIFY_PERM_EVENTS (FANOTIFY_CONTENT_PERM_EVENTS | \
|
||||
FANOTIFY_PRE_CONTENT_EVENTS)
|
||||
|
||||
/* Events that can be reported with event->fd */
|
||||
#define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS)
|
||||
|
||||
@ -104,10 +114,6 @@
|
||||
FANOTIFY_INODE_EVENTS | \
|
||||
FANOTIFY_ERROR_EVENTS)
|
||||
|
||||
/* Events that require a permission response from user */
|
||||
#define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM | \
|
||||
FAN_OPEN_EXEC_PERM)
|
||||
|
||||
/* Extra flags that may be reported with event or control handling of events */
|
||||
#define FANOTIFY_EVENT_FLAGS (FAN_EVENT_ON_CHILD | FAN_ONDIR)
|
||||
|
||||
@ -126,7 +132,9 @@
|
||||
/* These masks check for invalid bits in permission responses. */
|
||||
#define FANOTIFY_RESPONSE_ACCESS (FAN_ALLOW | FAN_DENY)
|
||||
#define FANOTIFY_RESPONSE_FLAGS (FAN_AUDIT | FAN_INFO)
|
||||
#define FANOTIFY_RESPONSE_VALID_MASK (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS)
|
||||
#define FANOTIFY_RESPONSE_VALID_MASK \
|
||||
(FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS | \
|
||||
(FAN_ERRNO_MASK << FAN_ERRNO_SHIFT))
|
||||
|
||||
/* Do not use these old uapi constants internally */
|
||||
#undef FAN_ALL_CLASS_BITS
|
||||
|
@ -173,13 +173,20 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
|
||||
|
||||
#define FMODE_NOREUSE ((__force fmode_t)(1 << 23))
|
||||
|
||||
/* FMODE_* bit 24 */
|
||||
|
||||
/* File is embedded in backing_file object */
|
||||
#define FMODE_BACKING ((__force fmode_t)(1 << 25))
|
||||
#define FMODE_BACKING ((__force fmode_t)(1 << 24))
|
||||
|
||||
/* File was opened by fanotify and shouldn't generate fanotify events */
|
||||
#define FMODE_NONOTIFY ((__force fmode_t)(1 << 26))
|
||||
/*
|
||||
* Together with FMODE_NONOTIFY_PERM defines which fsnotify events shouldn't be
|
||||
* generated (see below)
|
||||
*/
|
||||
#define FMODE_NONOTIFY ((__force fmode_t)(1 << 25))
|
||||
|
||||
/*
|
||||
* Together with FMODE_NONOTIFY defines which fsnotify events shouldn't be
|
||||
* generated (see below)
|
||||
*/
|
||||
#define FMODE_NONOTIFY_PERM ((__force fmode_t)(1 << 26))
|
||||
|
||||
/* File is capable of returning -EAGAIN if I/O will block */
|
||||
#define FMODE_NOWAIT ((__force fmode_t)(1 << 27))
|
||||
@ -190,6 +197,32 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
|
||||
/* File does not contribute to nr_files count */
|
||||
#define FMODE_NOACCOUNT ((__force fmode_t)(1 << 29))
|
||||
|
||||
/*
|
||||
* The two FMODE_NONOTIFY* define which fsnotify events should not be generated
|
||||
* for a file. These are the possible values of (f->f_mode &
|
||||
* FMODE_FSNOTIFY_MASK) and their meaning:
|
||||
*
|
||||
* FMODE_NONOTIFY - suppress all (incl. non-permission) events.
|
||||
* FMODE_NONOTIFY_PERM - suppress permission (incl. pre-content) events.
|
||||
* FMODE_NONOTIFY | FMODE_NONOTIFY_PERM - suppress only pre-content events.
|
||||
*/
|
||||
#define FMODE_FSNOTIFY_MASK \
|
||||
(FMODE_NONOTIFY | FMODE_NONOTIFY_PERM)
|
||||
|
||||
#define FMODE_FSNOTIFY_NONE(mode) \
|
||||
((mode & FMODE_FSNOTIFY_MASK) == FMODE_NONOTIFY)
|
||||
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
|
||||
#define FMODE_FSNOTIFY_PERM(mode) \
|
||||
((mode & FMODE_FSNOTIFY_MASK) == 0 || \
|
||||
(mode & FMODE_FSNOTIFY_MASK) == (FMODE_NONOTIFY | FMODE_NONOTIFY_PERM))
|
||||
#define FMODE_FSNOTIFY_HSM(mode) \
|
||||
((mode & FMODE_FSNOTIFY_MASK) == 0)
|
||||
#else
|
||||
#define FMODE_FSNOTIFY_PERM(mode) 0
|
||||
#define FMODE_FSNOTIFY_HSM(mode) 0
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Attribute flags. These should be or-ed together to figure out what
|
||||
* has been changed!
|
||||
@ -1232,6 +1265,7 @@ extern int send_sigurg(struct file *file);
|
||||
#define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */
|
||||
#define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */
|
||||
#define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */
|
||||
#define SB_I_ALLOW_HSM 0x00004000 /* Allow HSM events on this superblock */
|
||||
|
||||
/* Possible states of 'frozen' field */
|
||||
enum {
|
||||
@ -2751,6 +2785,8 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt,
|
||||
}
|
||||
struct file *dentry_open(const struct path *path, int flags,
|
||||
const struct cred *creds);
|
||||
struct file *dentry_open_nonotify(const struct path *path, int flags,
|
||||
const struct cred *cred);
|
||||
struct file *dentry_create(const struct path *path, int flags, umode_t mode,
|
||||
const struct cred *cred);
|
||||
struct path *backing_file_user_path(struct file *f);
|
||||
@ -3059,6 +3095,28 @@ static inline void allow_write_access(struct file *file)
|
||||
if (file)
|
||||
atomic_inc(&file_inode(file)->i_writecount);
|
||||
}
|
||||
|
||||
/*
|
||||
* Do not prevent write to executable file when watched by pre-content events.
|
||||
*
|
||||
* Note that FMODE_FSNOTIFY_HSM mode is set depending on pre-content watches at
|
||||
* the time of file open and remains constant for entire lifetime of the file,
|
||||
* so if pre-content watches are added post execution or removed before the end
|
||||
* of the execution, it will not cause i_writecount reference leak.
|
||||
*/
|
||||
static inline int exe_file_deny_write_access(struct file *exe_file)
|
||||
{
|
||||
if (unlikely(FMODE_FSNOTIFY_HSM(exe_file->f_mode)))
|
||||
return 0;
|
||||
return deny_write_access(exe_file);
|
||||
}
|
||||
static inline void exe_file_allow_write_access(struct file *exe_file)
|
||||
{
|
||||
if (unlikely(!exe_file || FMODE_FSNOTIFY_HSM(exe_file->f_mode)))
|
||||
return;
|
||||
allow_write_access(exe_file);
|
||||
}
|
||||
|
||||
static inline bool inode_is_open_for_write(const struct inode *inode)
|
||||
{
|
||||
return atomic_read(&inode->i_writecount) > 0;
|
||||
@ -3707,11 +3765,9 @@ struct ctl_table;
|
||||
int __init list_bdev_fs_names(char *buf, size_t size);
|
||||
|
||||
#define __FMODE_EXEC ((__force int) FMODE_EXEC)
|
||||
#define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY)
|
||||
|
||||
#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
|
||||
#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
|
||||
(flag & __FMODE_NONOTIFY)))
|
||||
#define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE))
|
||||
|
||||
static inline bool is_sxid(umode_t mode)
|
||||
{
|
||||
|
@ -108,38 +108,35 @@ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask)
|
||||
fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY);
|
||||
}
|
||||
|
||||
static inline int fsnotify_path(const struct path *path, __u32 mask)
|
||||
{
|
||||
return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
|
||||
}
|
||||
|
||||
static inline int fsnotify_file(struct file *file, __u32 mask)
|
||||
{
|
||||
const struct path *path;
|
||||
|
||||
/*
|
||||
* FMODE_NONOTIFY are fds generated by fanotify itself which should not
|
||||
* generate new events. We also don't want to generate events for
|
||||
* FMODE_PATH fds (involves open & close events) as they are just
|
||||
* handle creation / destruction events and not "real" file events.
|
||||
*/
|
||||
if (file->f_mode & (FMODE_NONOTIFY | FMODE_PATH))
|
||||
if (FMODE_FSNOTIFY_NONE(file->f_mode))
|
||||
return 0;
|
||||
|
||||
path = &file->f_path;
|
||||
/* Permission events require group prio >= FSNOTIFY_PRIO_CONTENT */
|
||||
if (mask & ALL_FSNOTIFY_PERM_EVENTS &&
|
||||
!fsnotify_sb_has_priority_watchers(path->dentry->d_sb,
|
||||
FSNOTIFY_PRIO_CONTENT))
|
||||
return 0;
|
||||
|
||||
return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH);
|
||||
return fsnotify_path(&file->f_path, mask);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
|
||||
|
||||
void file_set_fsnotify_mode(struct file *file);
|
||||
|
||||
/*
|
||||
* fsnotify_file_area_perm - permission hook before access to file range
|
||||
*/
|
||||
static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
|
||||
const loff_t *ppos, size_t count)
|
||||
{
|
||||
__u32 fsnotify_mask = FS_ACCESS_PERM;
|
||||
|
||||
/*
|
||||
* filesystem may be modified in the context of permission events
|
||||
* (e.g. by HSM filling a file on access), so sb freeze protection
|
||||
@ -147,14 +144,49 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
|
||||
*/
|
||||
lockdep_assert_once(file_write_not_started(file));
|
||||
|
||||
if (!(perm_mask & (MAY_READ | MAY_WRITE | MAY_ACCESS)))
|
||||
return 0;
|
||||
|
||||
if (likely(!FMODE_FSNOTIFY_PERM(file->f_mode)))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* read()/write() and other types of access generate pre-content events.
|
||||
*/
|
||||
if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) {
|
||||
int ret = fsnotify_pre_content(&file->f_path, ppos, count);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!(perm_mask & MAY_READ))
|
||||
return 0;
|
||||
|
||||
return fsnotify_file(file, fsnotify_mask);
|
||||
/*
|
||||
* read() also generates the legacy FS_ACCESS_PERM event, so content
|
||||
* scanners can inspect the content filled by pre-content event.
|
||||
*/
|
||||
return fsnotify_path(&file->f_path, FS_ACCESS_PERM);
|
||||
}
|
||||
|
||||
/*
|
||||
* fsnotify_file_perm - permission hook before file access
|
||||
* fsnotify_truncate_perm - permission hook before file truncate
|
||||
*/
|
||||
static inline int fsnotify_truncate_perm(const struct path *path, loff_t length)
|
||||
{
|
||||
struct inode *inode = d_inode(path->dentry);
|
||||
|
||||
if (!(inode->i_sb->s_iflags & SB_I_ALLOW_HSM) ||
|
||||
!fsnotify_sb_has_priority_watchers(inode->i_sb,
|
||||
FSNOTIFY_PRIO_PRE_CONTENT))
|
||||
return 0;
|
||||
|
||||
return fsnotify_pre_content(path, &length, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* fsnotify_file_perm - permission hook before file access (unknown range)
|
||||
*/
|
||||
static inline int fsnotify_file_perm(struct file *file, int perm_mask)
|
||||
{
|
||||
@ -168,22 +200,34 @@ static inline int fsnotify_open_perm(struct file *file)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (likely(!FMODE_FSNOTIFY_PERM(file->f_mode)))
|
||||
return 0;
|
||||
|
||||
if (file->f_flags & __FMODE_EXEC) {
|
||||
ret = fsnotify_file(file, FS_OPEN_EXEC_PERM);
|
||||
ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return fsnotify_file(file, FS_OPEN_PERM);
|
||||
return fsnotify_path(&file->f_path, FS_OPEN_PERM);
|
||||
}
|
||||
|
||||
#else
|
||||
static inline void file_set_fsnotify_mode(struct file *file)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int fsnotify_file_area_perm(struct file *file, int perm_mask,
|
||||
const loff_t *ppos, size_t count)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int fsnotify_truncate_perm(const struct path *path, loff_t length)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int fsnotify_file_perm(struct file *file, int perm_mask)
|
||||
{
|
||||
return 0;
|
||||
|
@ -55,6 +55,9 @@
|
||||
#define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */
|
||||
#define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */
|
||||
#define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */
|
||||
/* #define FS_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */
|
||||
|
||||
#define FS_PRE_ACCESS 0x00100000 /* Pre-content access hook */
|
||||
|
||||
/*
|
||||
* Set on inode mark that cares about things that happen to its children.
|
||||
@ -77,8 +80,14 @@
|
||||
*/
|
||||
#define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME)
|
||||
|
||||
#define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \
|
||||
FS_OPEN_EXEC_PERM)
|
||||
/* Content events can be used to inspect file content */
|
||||
#define FSNOTIFY_CONTENT_PERM_EVENTS (FS_OPEN_PERM | FS_OPEN_EXEC_PERM | \
|
||||
FS_ACCESS_PERM)
|
||||
/* Pre-content events can be used to fill file content */
|
||||
#define FSNOTIFY_PRE_CONTENT_EVENTS (FS_PRE_ACCESS)
|
||||
|
||||
#define ALL_FSNOTIFY_PERM_EVENTS (FSNOTIFY_CONTENT_PERM_EVENTS | \
|
||||
FSNOTIFY_PRE_CONTENT_EVENTS)
|
||||
|
||||
/*
|
||||
* This is a list of all events that may get sent to a parent that is watching
|
||||
@ -285,6 +294,7 @@ static inline void fsnotify_group_assert_locked(struct fsnotify_group *group)
|
||||
/* When calling fsnotify tell it if the data is a path or inode */
|
||||
enum fsnotify_data_type {
|
||||
FSNOTIFY_EVENT_NONE,
|
||||
FSNOTIFY_EVENT_FILE_RANGE,
|
||||
FSNOTIFY_EVENT_PATH,
|
||||
FSNOTIFY_EVENT_INODE,
|
||||
FSNOTIFY_EVENT_DENTRY,
|
||||
@ -297,6 +307,17 @@ struct fs_error_report {
|
||||
struct super_block *sb;
|
||||
};
|
||||
|
||||
struct file_range {
|
||||
const struct path *path;
|
||||
loff_t pos;
|
||||
size_t count;
|
||||
};
|
||||
|
||||
static inline const struct path *file_range_path(const struct file_range *range)
|
||||
{
|
||||
return range->path;
|
||||
}
|
||||
|
||||
static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
|
||||
{
|
||||
switch (data_type) {
|
||||
@ -306,6 +327,8 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type)
|
||||
return d_inode(data);
|
||||
case FSNOTIFY_EVENT_PATH:
|
||||
return d_inode(((const struct path *)data)->dentry);
|
||||
case FSNOTIFY_EVENT_FILE_RANGE:
|
||||
return d_inode(file_range_path(data)->dentry);
|
||||
case FSNOTIFY_EVENT_ERROR:
|
||||
return ((struct fs_error_report *)data)->inode;
|
||||
default:
|
||||
@ -321,6 +344,8 @@ static inline struct dentry *fsnotify_data_dentry(const void *data, int data_typ
|
||||
return (struct dentry *)data;
|
||||
case FSNOTIFY_EVENT_PATH:
|
||||
return ((const struct path *)data)->dentry;
|
||||
case FSNOTIFY_EVENT_FILE_RANGE:
|
||||
return file_range_path(data)->dentry;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
@ -332,6 +357,8 @@ static inline const struct path *fsnotify_data_path(const void *data,
|
||||
switch (data_type) {
|
||||
case FSNOTIFY_EVENT_PATH:
|
||||
return data;
|
||||
case FSNOTIFY_EVENT_FILE_RANGE:
|
||||
return file_range_path(data);
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
@ -347,6 +374,8 @@ static inline struct super_block *fsnotify_data_sb(const void *data,
|
||||
return ((struct dentry *)data)->d_sb;
|
||||
case FSNOTIFY_EVENT_PATH:
|
||||
return ((const struct path *)data)->dentry->d_sb;
|
||||
case FSNOTIFY_EVENT_FILE_RANGE:
|
||||
return file_range_path(data)->dentry->d_sb;
|
||||
case FSNOTIFY_EVENT_ERROR:
|
||||
return ((struct fs_error_report *) data)->sb;
|
||||
default:
|
||||
@ -366,6 +395,18 @@ static inline struct fs_error_report *fsnotify_data_error_report(
|
||||
}
|
||||
}
|
||||
|
||||
static inline const struct file_range *fsnotify_data_file_range(
|
||||
const void *data,
|
||||
int data_type)
|
||||
{
|
||||
switch (data_type) {
|
||||
case FSNOTIFY_EVENT_FILE_RANGE:
|
||||
return (struct file_range *)data;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Index to merged marks iterator array that correlates to a type of watch.
|
||||
* The type of watched object can be deduced from the iterator type, but not
|
||||
@ -854,9 +895,17 @@ static inline void fsnotify_init_event(struct fsnotify_event *event)
|
||||
{
|
||||
INIT_LIST_HEAD(&event->list);
|
||||
}
|
||||
int fsnotify_pre_content(const struct path *path, const loff_t *ppos,
|
||||
size_t count);
|
||||
|
||||
#else
|
||||
|
||||
static inline int fsnotify_pre_content(const struct path *path,
|
||||
const loff_t *ppos, size_t count)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int fsnotify(__u32 mask, const void *data, int data_type,
|
||||
struct inode *dir, const struct qstr *name,
|
||||
struct inode *inode, u32 cookie)
|
||||
|
@ -3420,6 +3420,7 @@ extern vm_fault_t filemap_fault(struct vm_fault *vmf);
|
||||
extern vm_fault_t filemap_map_pages(struct vm_fault *vmf,
|
||||
pgoff_t start_pgoff, pgoff_t end_pgoff);
|
||||
extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
|
||||
extern vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf);
|
||||
|
||||
extern unsigned long stack_guard_gap;
|
||||
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
|
||||
|
@ -6,7 +6,6 @@
|
||||
|
||||
/*
|
||||
* FMODE_EXEC is 0x20
|
||||
* FMODE_NONOTIFY is 0x4000000
|
||||
* These cannot be used by userspace O_* until internal and external open
|
||||
* flags are split.
|
||||
* -Eric Paris
|
||||
|
@ -25,6 +25,9 @@
|
||||
#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */
|
||||
#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */
|
||||
#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */
|
||||
/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */
|
||||
|
||||
#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */
|
||||
|
||||
#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */
|
||||
|
||||
@ -143,6 +146,7 @@ struct fanotify_event_metadata {
|
||||
#define FAN_EVENT_INFO_TYPE_DFID 3
|
||||
#define FAN_EVENT_INFO_TYPE_PIDFD 4
|
||||
#define FAN_EVENT_INFO_TYPE_ERROR 5
|
||||
#define FAN_EVENT_INFO_TYPE_RANGE 6
|
||||
|
||||
/* Special info types for FAN_RENAME */
|
||||
#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10
|
||||
@ -189,6 +193,13 @@ struct fanotify_event_info_error {
|
||||
__u32 error_count;
|
||||
};
|
||||
|
||||
struct fanotify_event_info_range {
|
||||
struct fanotify_event_info_header hdr;
|
||||
__u32 pad;
|
||||
__u64 offset;
|
||||
__u64 count;
|
||||
};
|
||||
|
||||
/*
|
||||
* User space may need to record additional information about its decision.
|
||||
* The extra information type records what kind of information is included.
|
||||
@ -224,6 +235,13 @@ struct fanotify_response_info_audit_rule {
|
||||
/* Legit userspace responses to a _PERM event */
|
||||
#define FAN_ALLOW 0x01
|
||||
#define FAN_DENY 0x02
|
||||
/* errno other than EPERM can specified in upper byte of deny response */
|
||||
#define FAN_ERRNO_BITS 8
|
||||
#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS)
|
||||
#define FAN_ERRNO_MASK ((1 << FAN_ERRNO_BITS) - 1)
|
||||
#define FAN_DENY_ERRNO(err) \
|
||||
(FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT))
|
||||
|
||||
#define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */
|
||||
#define FAN_INFO 0x20 /* Bitmask to indicate additional information */
|
||||
|
||||
|
@ -625,8 +625,8 @@ static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm)
|
||||
* We depend on the oldmm having properly denied write access to the
|
||||
* exe_file already.
|
||||
*/
|
||||
if (exe_file && deny_write_access(exe_file))
|
||||
pr_warn_once("deny_write_access() failed in %s\n", __func__);
|
||||
if (exe_file && exe_file_deny_write_access(exe_file))
|
||||
pr_warn_once("exe_file_deny_write_access() failed in %s\n", __func__);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
@ -1424,13 +1424,13 @@ int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
|
||||
* We expect the caller (i.e., sys_execve) to already denied
|
||||
* write access, so this is unlikely to fail.
|
||||
*/
|
||||
if (unlikely(deny_write_access(new_exe_file)))
|
||||
if (unlikely(exe_file_deny_write_access(new_exe_file)))
|
||||
return -EACCES;
|
||||
get_file(new_exe_file);
|
||||
}
|
||||
rcu_assign_pointer(mm->exe_file, new_exe_file);
|
||||
if (old_exe_file) {
|
||||
allow_write_access(old_exe_file);
|
||||
exe_file_allow_write_access(old_exe_file);
|
||||
fput(old_exe_file);
|
||||
}
|
||||
return 0;
|
||||
@ -1471,7 +1471,7 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = deny_write_access(new_exe_file);
|
||||
ret = exe_file_deny_write_access(new_exe_file);
|
||||
if (ret)
|
||||
return -EACCES;
|
||||
get_file(new_exe_file);
|
||||
@ -1483,7 +1483,7 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
|
||||
mmap_write_unlock(mm);
|
||||
|
||||
if (old_exe_file) {
|
||||
allow_write_access(old_exe_file);
|
||||
exe_file_allow_write_access(old_exe_file);
|
||||
fput(old_exe_file);
|
||||
}
|
||||
return 0;
|
||||
|
86
mm/filemap.c
86
mm/filemap.c
@ -47,6 +47,7 @@
|
||||
#include <linux/splice.h>
|
||||
#include <linux/rcupdate_wait.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/fsnotify.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include "internal.h"
|
||||
@ -3150,6 +3151,14 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf)
|
||||
unsigned long vm_flags = vmf->vma->vm_flags;
|
||||
unsigned int mmap_miss;
|
||||
|
||||
/*
|
||||
* If we have pre-content watches we need to disable readahead to make
|
||||
* sure that we don't populate our mapping with 0 filled pages that we
|
||||
* never emitted an event for.
|
||||
*/
|
||||
if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode)))
|
||||
return fpin;
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
/* Use the readahead code, even if readahead is disabled */
|
||||
if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) {
|
||||
@ -3218,6 +3227,10 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,
|
||||
struct file *fpin = NULL;
|
||||
unsigned int mmap_miss;
|
||||
|
||||
/* See comment in do_sync_mmap_readahead. */
|
||||
if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode)))
|
||||
return fpin;
|
||||
|
||||
/* If we don't want any read-ahead, don't bother */
|
||||
if (vmf->vma->vm_flags & VM_RAND_READ || !ra->ra_pages)
|
||||
return fpin;
|
||||
@ -3276,6 +3289,48 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* filemap_fsnotify_fault - maybe emit a pre-content event.
|
||||
* @vmf: struct vm_fault containing details of the fault.
|
||||
*
|
||||
* If we have a pre-content watch on this file we will emit an event for this
|
||||
* range. If we return anything the fault caller should return immediately, we
|
||||
* will return VM_FAULT_RETRY if we had to emit an event, which will trigger the
|
||||
* fault again and then the fault handler will run the second time through.
|
||||
*
|
||||
* Return: a bitwise-OR of %VM_FAULT_ codes, 0 if nothing happened.
|
||||
*/
|
||||
vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct file *fpin = NULL;
|
||||
int mask = (vmf->flags & FAULT_FLAG_WRITE) ? MAY_WRITE : MAY_ACCESS;
|
||||
loff_t pos = vmf->pgoff >> PAGE_SHIFT;
|
||||
size_t count = PAGE_SIZE;
|
||||
int err;
|
||||
|
||||
/*
|
||||
* We already did this and now we're retrying with everything locked,
|
||||
* don't emit the event and continue.
|
||||
*/
|
||||
if (vmf->flags & FAULT_FLAG_TRIED)
|
||||
return 0;
|
||||
|
||||
/* No watches, we're done. */
|
||||
if (likely(!FMODE_FSNOTIFY_HSM(vmf->vma->vm_file->f_mode)))
|
||||
return 0;
|
||||
|
||||
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
|
||||
if (!fpin)
|
||||
return VM_FAULT_SIGBUS;
|
||||
|
||||
err = fsnotify_file_area_perm(fpin, mask, &pos, count);
|
||||
fput(fpin);
|
||||
if (err)
|
||||
return VM_FAULT_SIGBUS;
|
||||
return VM_FAULT_RETRY;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(filemap_fsnotify_fault);
|
||||
|
||||
/**
|
||||
* filemap_fault - read in file data for page fault handling
|
||||
* @vmf: struct vm_fault containing details of the fault
|
||||
@ -3379,6 +3434,37 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)
|
||||
* or because readahead was otherwise unable to retrieve it.
|
||||
*/
|
||||
if (unlikely(!folio_test_uptodate(folio))) {
|
||||
/*
|
||||
* If this is a precontent file we have can now emit an event to
|
||||
* try and populate the folio.
|
||||
*/
|
||||
if (!(vmf->flags & FAULT_FLAG_TRIED) &&
|
||||
unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) {
|
||||
loff_t pos = folio_pos(folio);
|
||||
size_t count = folio_size(folio);
|
||||
|
||||
/* We're NOWAIT, we have to retry. */
|
||||
if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) {
|
||||
folio_unlock(folio);
|
||||
goto out_retry;
|
||||
}
|
||||
|
||||
if (mapping_locked)
|
||||
filemap_invalidate_unlock_shared(mapping);
|
||||
mapping_locked = false;
|
||||
|
||||
folio_unlock(folio);
|
||||
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
|
||||
if (!fpin)
|
||||
goto out_retry;
|
||||
|
||||
error = fsnotify_file_area_perm(fpin, MAY_ACCESS, &pos,
|
||||
count);
|
||||
if (error)
|
||||
ret = VM_FAULT_SIGBUS;
|
||||
goto out_retry;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the invalidate lock is not held, the folio was in cache
|
||||
* and uptodate and now it is not. Strange but possible since we
|
||||
|
19
mm/memory.c
19
mm/memory.c
@ -76,6 +76,7 @@
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/sched/sysctl.h>
|
||||
#include <linux/fsnotify.h>
|
||||
|
||||
#include <trace/events/kmem.h>
|
||||
|
||||
@ -5662,8 +5663,17 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
|
||||
static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
|
||||
if (vma_is_anonymous(vma))
|
||||
return do_huge_pmd_anonymous_page(vmf);
|
||||
/*
|
||||
* Currently we just emit PAGE_SIZE for our fault events, so don't allow
|
||||
* a huge fault if we have a pre content watch on this file. This would
|
||||
* be trivial to support, but there would need to be tests to ensure
|
||||
* this works properly and those don't exist currently.
|
||||
*/
|
||||
if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
|
||||
return VM_FAULT_FALLBACK;
|
||||
if (vma->vm_ops->huge_fault)
|
||||
return vma->vm_ops->huge_fault(vmf, PMD_ORDER);
|
||||
return VM_FAULT_FALLBACK;
|
||||
@ -5687,6 +5697,9 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
|
||||
}
|
||||
|
||||
if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
|
||||
/* See comment in create_huge_pmd. */
|
||||
if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
|
||||
goto split;
|
||||
if (vma->vm_ops->huge_fault) {
|
||||
ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER);
|
||||
if (!(ret & VM_FAULT_FALLBACK))
|
||||
@ -5709,6 +5722,9 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf)
|
||||
/* No support for anonymous transparent PUD pages yet */
|
||||
if (vma_is_anonymous(vma))
|
||||
return VM_FAULT_FALLBACK;
|
||||
/* See comment in create_huge_pmd. */
|
||||
if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
|
||||
return VM_FAULT_FALLBACK;
|
||||
if (vma->vm_ops->huge_fault)
|
||||
return vma->vm_ops->huge_fault(vmf, PUD_ORDER);
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
@ -5726,6 +5742,9 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
|
||||
if (vma_is_anonymous(vma))
|
||||
goto split;
|
||||
if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
|
||||
/* See comment in create_huge_pmd. */
|
||||
if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
|
||||
goto split;
|
||||
if (vma->vm_ops->huge_fault) {
|
||||
ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER);
|
||||
if (!(ret & VM_FAULT_FALLBACK))
|
||||
|
@ -1613,6 +1613,13 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
|
||||
}
|
||||
EXPORT_SYMBOL(remap_vmalloc_range);
|
||||
|
||||
vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf)
|
||||
{
|
||||
BUG();
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(filemap_fsnotify_fault);
|
||||
|
||||
vm_fault_t filemap_fault(struct vm_fault *vmf)
|
||||
{
|
||||
BUG();
|
||||
|
@ -128,6 +128,7 @@
|
||||
#include <linux/blk-cgroup.h>
|
||||
#include <linux/fadvise.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/fsnotify.h>
|
||||
|
||||
#include "internal.h"
|
||||
|
||||
@ -548,6 +549,15 @@ void page_cache_sync_ra(struct readahead_control *ractl,
|
||||
unsigned long max_pages, contig_count;
|
||||
pgoff_t prev_index, miss;
|
||||
|
||||
/*
|
||||
* If we have pre-content watches we need to disable readahead to make
|
||||
* sure that we don't find 0 filled pages in cache that we never emitted
|
||||
* events for. Filesystems supporting HSM must make sure to not call
|
||||
* this function with ractl->file unset for files handled by HSM.
|
||||
*/
|
||||
if (ractl->file && unlikely(FMODE_FSNOTIFY_HSM(ractl->file->f_mode)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Even if readahead is disabled, issue this request as readahead
|
||||
* as we'll need it to satisfy the requested range. The forced
|
||||
@ -626,6 +636,10 @@ void page_cache_async_ra(struct readahead_control *ractl,
|
||||
if (!ra->ra_pages)
|
||||
return;
|
||||
|
||||
/* See the comment in page_cache_sync_ra. */
|
||||
if (ractl->file && unlikely(FMODE_FSNOTIFY_HSM(ractl->file->f_mode)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Same bit is used for PG_readahead and PG_reclaim.
|
||||
*/
|
||||
|
@ -3404,7 +3404,8 @@ static int selinux_path_notify(const struct path *path, u64 mask,
|
||||
perm |= FILE__WATCH_WITH_PERM;
|
||||
|
||||
/* watches on read-like events need the file:watch_reads permission */
|
||||
if (mask & (FS_ACCESS | FS_ACCESS_PERM | FS_CLOSE_NOWRITE))
|
||||
if (mask & (FS_ACCESS | FS_ACCESS_PERM | FS_PRE_ACCESS |
|
||||
FS_CLOSE_NOWRITE))
|
||||
perm |= FILE__WATCH_READS;
|
||||
|
||||
return path_has_perm(current_cred(), path, perm);
|
||||
|
Loading…
Reference in New Issue
Block a user