AFS fixes

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEqG5UsNXhtOCrfGQP+7dXa6fLC2sFAl+ayiUACgkQ+7dXa6fL
 C2uZAg//cVeuhu1cUMzNZwE9VotL0a3GXGl+5S1pyJ4lEiKOylJYyxJxsGEG6YiE
 GxDt9wx78P679Y3VJchDjo7voBXbPqRYFxnbXyq5X/xhNfExRqXhkauao8jWMaku
 77UzretUtav7JmgxkGtQ8eMpYkrua7YqcdvMEVjSJ/TqQi68lcU+rMBTO7UnkURb
 YD43XyFI7D7XXfXpywTc0PYRQi9pEvXryb2OlEvLHLiS0hV9Zj32i6WWmn8GfnhQ
 Q9107kHYZFU2B+O+IbbImkKtlpC9X0yCAGGi2vDd0RirqKK/gfkMlK0XzwjnvzR4
 PoqnMs2yjwcanxTrDD/3gr6MfZ2KRnmrLO6cdRmI3ldSsbkFOSeoQ0DYr4JDdal9
 27OixazIcqmZfIssHwOH5pGZvO9bu5+2hlwdZZV7uISORJnqHZZVZ04Bdy+0chZx
 JTVeyYH2+FDRUM55heVnuI1r6xCbHRyj3On4GF1n8uKrEinkVaEMZCWWcOHlNYnG
 C3DC6MGxS1DRox/bNcBql9Jk6RkzPI/gzliQA92yAngMtOzyn+uZjqftASVve17R
 K9/nSQ/43E9LMc+DIEJ+8KSOkSN1zb6dAJ24Z8g7s+VbVb78WwHxojNjb8J9EfW3
 lo/eTprYtZvidE8PJTisdzyaJooUifMAMhy8eFwPaXdqwRc7Sjc=
 =v0Gd
 -----END PGP SIGNATURE-----

Merge tag 'afs-fixes-20201029' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs

Pull AFS fixes from David Howells:

 - Fix copy_file_range() to an afs file now returning EINVAL if the
   splice_write file op isn't supplied.

 - Fix a deref-before-check in afs_unuse_cell().

 - Fix a use-after-free in afs_xattr_get_acl().

 - Fix afs to not try to clear PG_writeback when laundering a page.

 - Fix afs to take a ref on a page that it sets PG_private on and to
   drop that ref when clearing PG_private. This is done through recently
   added helpers.

 - Fix a page leak if write_begin() fails.

 - Fix afs_write_begin() to not alter the dirty region info stored in
   page->private, but rather do this in afs_write_end() instead when we
   know what we actually changed.

 - Fix afs_invalidatepage() to alter the dirty region info on a page
   when partial page invalidation occurs so that we don't inadvertantly
   include a span of zeros that will get written back if a page gets
   laundered due to a remote 3rd-party induced invalidation.

   We mustn't, however, reduce the dirty region if the page has been
   seen to be mapped (ie. we got called through the page_mkwrite vector)
   as the page might still be mapped and we might lose data if the file
   is extended again.

 - Fix the dirty region info to have a lower resolution if the size of
   the page is too large for this to be encoded (e.g. powerpc32 with 64K
   pages).

   Note that this might not be the ideal way to handle this, since it
   may allow some leakage of undirtied zero bytes to the server's copy
   in the case of a 3rd-party conflict.

To aid the last two fixes, two additional changes:

 - Wrap the manipulations of the dirty region info stored in
   page->private into helper functions.

 - Alter the encoding of the dirty region so that the region bounds can
   be stored with one fewer bit, making a bit available for the
   indication of mappedness.

* tag 'afs-fixes-20201029' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
  afs: Fix dirty-region encoding on ppc32 with 64K pages
  afs: Fix afs_invalidatepage to adjust the dirty region
  afs: Alter dirty range encoding in page->private
  afs: Wrap page->private manipulations in inline functions
  afs: Fix where page->private is set during write
  afs: Fix page leak on afs_write_begin() failure
  afs: Fix to take ref on page when PG_private is set
  afs: Fix afs_launder_page to not clear PG_writeback
  afs: Fix a use after free in afs_xattr_get_acl()
  afs: Fix tracing deref-before-check
  afs: Fix copy_file_range()
This commit is contained in:
Linus Torvalds 2020-10-29 10:13:09 -07:00
commit 598a597636
8 changed files with 188 additions and 95 deletions

View File

@ -589,7 +589,7 @@ struct afs_cell *afs_use_cell(struct afs_cell *cell, enum afs_cell_trace reason)
*/
void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_trace reason)
{
unsigned int debug_id = cell->debug_id;
unsigned int debug_id;
time64_t now, expire_delay;
int u, a;
@ -604,6 +604,7 @@ void afs_unuse_cell(struct afs_net *net, struct afs_cell *cell, enum afs_cell_tr
if (cell->vl_servers->nr_servers)
expire_delay = afs_cell_gc_delay;
debug_id = cell->debug_id;
u = atomic_read(&cell->ref);
a = atomic_dec_return(&cell->active);
trace_afs_cell(debug_id, u, a, reason);

View File

@ -281,8 +281,7 @@ retry:
if (ret < 0)
goto error;
set_page_private(req->pages[i], 1);
SetPagePrivate(req->pages[i]);
attach_page_private(req->pages[i], (void *)1);
unlock_page(req->pages[i]);
i++;
} else {
@ -1975,8 +1974,7 @@ static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags)
_enter("{{%llx:%llu}[%lu]}", dvnode->fid.vid, dvnode->fid.vnode, page->index);
set_page_private(page, 0);
ClearPagePrivate(page);
detach_page_private(page);
/* The directory will need reloading. */
if (test_and_clear_bit(AFS_VNODE_DIR_VALID, &dvnode->flags))
@ -2003,8 +2001,6 @@ static void afs_dir_invalidatepage(struct page *page, unsigned int offset,
afs_stat_v(dvnode, n_inval);
/* we clean up only if the entire page is being invalidated */
if (offset == 0 && length == PAGE_SIZE) {
set_page_private(page, 0);
ClearPagePrivate(page);
}
if (offset == 0 && length == PAGE_SIZE)
detach_page_private(page);
}

View File

@ -243,10 +243,8 @@ void afs_edit_dir_add(struct afs_vnode *vnode,
index, gfp);
if (!page)
goto error;
if (!PagePrivate(page)) {
set_page_private(page, 1);
SetPagePrivate(page);
}
if (!PagePrivate(page))
attach_page_private(page, (void *)1);
dir_page = kmap(page);
}

View File

@ -33,6 +33,7 @@ const struct file_operations afs_file_operations = {
.write_iter = afs_file_write,
.mmap = afs_file_mmap,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
.fsync = afs_fsync,
.lock = afs_lock,
.flock = afs_flock,
@ -600,6 +601,63 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
return ret;
}
/*
* Adjust the dirty region of the page on truncation or full invalidation,
* getting rid of the markers altogether if the region is entirely invalidated.
*/
static void afs_invalidate_dirty(struct page *page, unsigned int offset,
unsigned int length)
{
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
unsigned long priv;
unsigned int f, t, end = offset + length;
priv = page_private(page);
/* we clean up only if the entire page is being invalidated */
if (offset == 0 && length == thp_size(page))
goto full_invalidate;
/* If the page was dirtied by page_mkwrite(), the PTE stays writable
* and we don't get another notification to tell us to expand it
* again.
*/
if (afs_is_page_dirty_mmapped(priv))
return;
/* We may need to shorten the dirty region */
f = afs_page_dirty_from(priv);
t = afs_page_dirty_to(priv);
if (t <= offset || f >= end)
return; /* Doesn't overlap */
if (f < offset && t > end)
return; /* Splits the dirty region - just absorb it */
if (f >= offset && t <= end)
goto undirty;
if (f < offset)
t = offset;
else
f = end;
if (f == t)
goto undirty;
priv = afs_page_dirty(f, t);
set_page_private(page, priv);
trace_afs_page_dirty(vnode, tracepoint_string("trunc"), page->index, priv);
return;
undirty:
trace_afs_page_dirty(vnode, tracepoint_string("undirty"), page->index, priv);
clear_page_dirty_for_io(page);
full_invalidate:
priv = (unsigned long)detach_page_private(page);
trace_afs_page_dirty(vnode, tracepoint_string("inval"), page->index, priv);
}
/*
* invalidate part or all of a page
* - release a page and clean up its private data if offset is 0 (indicating
@ -608,31 +666,23 @@ static int afs_readpages(struct file *file, struct address_space *mapping,
static void afs_invalidatepage(struct page *page, unsigned int offset,
unsigned int length)
{
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
unsigned long priv;
_enter("{%lu},%u,%u", page->index, offset, length);
BUG_ON(!PageLocked(page));
#ifdef CONFIG_AFS_FSCACHE
/* we clean up only if the entire page is being invalidated */
if (offset == 0 && length == PAGE_SIZE) {
#ifdef CONFIG_AFS_FSCACHE
if (PageFsCache(page)) {
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
fscache_wait_on_page_write(vnode->cache, page);
fscache_uncache_page(vnode->cache, page);
}
}
#endif
if (PagePrivate(page)) {
priv = page_private(page);
trace_afs_page_dirty(vnode, tracepoint_string("inval"),
page->index, priv);
set_page_private(page, 0);
ClearPagePrivate(page);
}
}
if (PagePrivate(page))
afs_invalidate_dirty(page, offset, length);
_leave("");
}
@ -660,11 +710,9 @@ static int afs_releasepage(struct page *page, gfp_t gfp_flags)
#endif
if (PagePrivate(page)) {
priv = page_private(page);
priv = (unsigned long)detach_page_private(page);
trace_afs_page_dirty(vnode, tracepoint_string("rel"),
page->index, priv);
set_page_private(page, 0);
ClearPagePrivate(page);
}
/* indicate that the page can be released */

View File

@ -812,6 +812,7 @@ struct afs_operation {
pgoff_t last; /* last page in mapping to deal with */
unsigned first_offset; /* offset into mapping[first] */
unsigned last_to; /* amount of mapping[last] */
bool laundering; /* Laundering page, PG_writeback not set */
} store;
struct {
struct iattr *attr;
@ -857,6 +858,62 @@ struct afs_vnode_cache_aux {
u64 data_version;
} __packed;
/*
* We use page->private to hold the amount of the page that we've written to,
* splitting the field into two parts. However, we need to represent a range
* 0...PAGE_SIZE, so we reduce the resolution if the size of the page
* exceeds what we can encode.
*/
#ifdef CONFIG_64BIT
#define __AFS_PAGE_PRIV_MASK 0x7fffffffUL
#define __AFS_PAGE_PRIV_SHIFT 32
#define __AFS_PAGE_PRIV_MMAPPED 0x80000000UL
#else
#define __AFS_PAGE_PRIV_MASK 0x7fffUL
#define __AFS_PAGE_PRIV_SHIFT 16
#define __AFS_PAGE_PRIV_MMAPPED 0x8000UL
#endif
static inline unsigned int afs_page_dirty_resolution(void)
{
int shift = PAGE_SHIFT - (__AFS_PAGE_PRIV_SHIFT - 1);
return (shift > 0) ? shift : 0;
}
static inline size_t afs_page_dirty_from(unsigned long priv)
{
unsigned long x = priv & __AFS_PAGE_PRIV_MASK;
/* The lower bound is inclusive */
return x << afs_page_dirty_resolution();
}
static inline size_t afs_page_dirty_to(unsigned long priv)
{
unsigned long x = (priv >> __AFS_PAGE_PRIV_SHIFT) & __AFS_PAGE_PRIV_MASK;
/* The upper bound is immediately beyond the region */
return (x + 1) << afs_page_dirty_resolution();
}
static inline unsigned long afs_page_dirty(size_t from, size_t to)
{
unsigned int res = afs_page_dirty_resolution();
from >>= res;
to = (to - 1) >> res;
return (to << __AFS_PAGE_PRIV_SHIFT) | from;
}
static inline unsigned long afs_page_dirty_mmapped(unsigned long priv)
{
return priv | __AFS_PAGE_PRIV_MMAPPED;
}
static inline bool afs_is_page_dirty_mmapped(unsigned long priv)
{
return priv & __AFS_PAGE_PRIV_MMAPPED;
}
#include <trace/events/afs.h>
/*****************************************************************************/

View File

@ -76,7 +76,7 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
*/
int afs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
struct page **_page, void **fsdata)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
struct page *page;
@ -90,11 +90,6 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
_enter("{%llx:%llu},{%lx},%u,%u",
vnode->fid.vid, vnode->fid.vnode, index, from, to);
/* We want to store information about how much of a page is altered in
* page->private.
*/
BUILD_BUG_ON(PAGE_SIZE > 32768 && sizeof(page->private) < 8);
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
@ -110,9 +105,6 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
SetPageUptodate(page);
}
/* page won't leak in error case: it eventually gets cleaned off LRU */
*pagep = page;
try_again:
/* See if this page is already partially written in a way that we can
* merge the new write with.
@ -120,8 +112,8 @@ try_again:
t = f = 0;
if (PagePrivate(page)) {
priv = page_private(page);
f = priv & AFS_PRIV_MAX;
t = priv >> AFS_PRIV_SHIFT;
f = afs_page_dirty_from(priv);
t = afs_page_dirty_to(priv);
ASSERTCMP(f, <=, t);
}
@ -138,21 +130,9 @@ try_again:
if (!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags) &&
(to < f || from > t))
goto flush_conflicting_write;
if (from < f)
f = from;
if (to > t)
t = to;
} else {
f = from;
t = to;
}
priv = (unsigned long)t << AFS_PRIV_SHIFT;
priv |= f;
trace_afs_page_dirty(vnode, tracepoint_string("begin"),
page->index, priv);
SetPagePrivate(page);
set_page_private(page, priv);
*_page = page;
_leave(" = 0");
return 0;
@ -162,17 +142,18 @@ try_again:
flush_conflicting_write:
_debug("flush conflict");
ret = write_one_page(page);
if (ret < 0) {
_leave(" = %d", ret);
return ret;
}
if (ret < 0)
goto error;
ret = lock_page_killable(page);
if (ret < 0) {
_leave(" = %d", ret);
return ret;
}
if (ret < 0)
goto error;
goto try_again;
error:
put_page(page);
_leave(" = %d", ret);
return ret;
}
/*
@ -184,6 +165,9 @@ int afs_write_end(struct file *file, struct address_space *mapping,
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
struct key *key = afs_file_key(file);
unsigned long priv;
unsigned int f, from = pos & (PAGE_SIZE - 1);
unsigned int t, to = from + copied;
loff_t i_size, maybe_i_size;
int ret;
@ -215,6 +199,25 @@ int afs_write_end(struct file *file, struct address_space *mapping,
SetPageUptodate(page);
}
if (PagePrivate(page)) {
priv = page_private(page);
f = afs_page_dirty_from(priv);
t = afs_page_dirty_to(priv);
if (from < f)
f = from;
if (to > t)
t = to;
priv = afs_page_dirty(f, t);
set_page_private(page, priv);
trace_afs_page_dirty(vnode, tracepoint_string("dirty+"),
page->index, priv);
} else {
priv = afs_page_dirty(from, to);
attach_page_private(page, (void *)priv);
trace_afs_page_dirty(vnode, tracepoint_string("dirty"),
page->index, priv);
}
set_page_dirty(page);
if (PageDirty(page))
_debug("dirtied");
@ -334,10 +337,9 @@ static void afs_pages_written_back(struct afs_vnode *vnode,
ASSERTCMP(pv.nr, ==, count);
for (loop = 0; loop < count; loop++) {
priv = page_private(pv.pages[loop]);
priv = (unsigned long)detach_page_private(pv.pages[loop]);
trace_afs_page_dirty(vnode, tracepoint_string("clear"),
pv.pages[loop]->index, priv);
set_page_private(pv.pages[loop], 0);
end_page_writeback(pv.pages[loop]);
}
first += count;
@ -396,7 +398,8 @@ static void afs_store_data_success(struct afs_operation *op)
op->ctime = op->file[0].scb.status.mtime_client;
afs_vnode_commit_status(op, &op->file[0]);
if (op->error == 0) {
afs_pages_written_back(vnode, op->store.first, op->store.last);
if (!op->store.laundering)
afs_pages_written_back(vnode, op->store.first, op->store.last);
afs_stat_v(vnode, n_stores);
atomic_long_add((op->store.last * PAGE_SIZE + op->store.last_to) -
(op->store.first * PAGE_SIZE + op->store.first_offset),
@ -415,7 +418,7 @@ static const struct afs_operation_ops afs_store_data_operation = {
*/
static int afs_store_data(struct address_space *mapping,
pgoff_t first, pgoff_t last,
unsigned offset, unsigned to)
unsigned offset, unsigned to, bool laundering)
{
struct afs_vnode *vnode = AFS_FS_I(mapping->host);
struct afs_operation *op;
@ -448,6 +451,7 @@ static int afs_store_data(struct address_space *mapping,
op->store.last = last;
op->store.first_offset = offset;
op->store.last_to = to;
op->store.laundering = laundering;
op->mtime = vnode->vfs_inode.i_mtime;
op->flags |= AFS_OPERATION_UNINTR;
op->ops = &afs_store_data_operation;
@ -509,8 +513,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping,
*/
start = primary_page->index;
priv = page_private(primary_page);
offset = priv & AFS_PRIV_MAX;
to = priv >> AFS_PRIV_SHIFT;
offset = afs_page_dirty_from(priv);
to = afs_page_dirty_to(priv);
trace_afs_page_dirty(vnode, tracepoint_string("store"),
primary_page->index, priv);
@ -555,8 +559,8 @@ static int afs_write_back_from_locked_page(struct address_space *mapping,
}
priv = page_private(page);
f = priv & AFS_PRIV_MAX;
t = priv >> AFS_PRIV_SHIFT;
f = afs_page_dirty_from(priv);
t = afs_page_dirty_to(priv);
if (f != 0 &&
!test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags)) {
unlock_page(page);
@ -601,7 +605,7 @@ no_more:
if (end > i_size)
to = i_size & ~PAGE_MASK;
ret = afs_store_data(mapping, first, last, offset, to);
ret = afs_store_data(mapping, first, last, offset, to, false);
switch (ret) {
case 0:
ret = count;
@ -857,12 +861,14 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
*/
wait_on_page_writeback(vmf->page);
priv = (unsigned long)PAGE_SIZE << AFS_PRIV_SHIFT; /* To */
priv |= 0; /* From */
priv = afs_page_dirty(0, PAGE_SIZE);
priv = afs_page_dirty_mmapped(priv);
trace_afs_page_dirty(vnode, tracepoint_string("mkwrite"),
vmf->page->index, priv);
SetPagePrivate(vmf->page);
set_page_private(vmf->page, priv);
if (PagePrivate(vmf->page))
set_page_private(vmf->page, priv);
else
attach_page_private(vmf->page, (void *)priv);
file_update_time(file);
sb_end_pagefault(inode->i_sb);
@ -915,19 +921,18 @@ int afs_launder_page(struct page *page)
f = 0;
t = PAGE_SIZE;
if (PagePrivate(page)) {
f = priv & AFS_PRIV_MAX;
t = priv >> AFS_PRIV_SHIFT;
f = afs_page_dirty_from(priv);
t = afs_page_dirty_to(priv);
}
trace_afs_page_dirty(vnode, tracepoint_string("launder"),
page->index, priv);
ret = afs_store_data(mapping, page->index, page->index, t, f);
ret = afs_store_data(mapping, page->index, page->index, t, f, true);
}
priv = (unsigned long)detach_page_private(page);
trace_afs_page_dirty(vnode, tracepoint_string("laundered"),
page->index, priv);
set_page_private(page, 0);
ClearPagePrivate(page);
#ifdef CONFIG_AFS_FSCACHE
if (PageFsCache(page)) {

View File

@ -85,7 +85,7 @@ static int afs_xattr_get_acl(const struct xattr_handler *handler,
if (acl->size <= size)
memcpy(buffer, acl->data, acl->size);
else
op->error = -ERANGE;
ret = -ERANGE;
}
}

View File

@ -966,19 +966,6 @@ TRACE_EVENT(afs_dir_check_failed,
__entry->vnode, __entry->off, __entry->i_size)
);
/*
* We use page->private to hold the amount of the page that we've written to,
* splitting the field into two parts. However, we need to represent a range
* 0...PAGE_SIZE inclusive, so we can't support 64K pages on a 32-bit system.
*/
#if PAGE_SIZE > 32768
#define AFS_PRIV_MAX 0xffffffff
#define AFS_PRIV_SHIFT 32
#else
#define AFS_PRIV_MAX 0xffff
#define AFS_PRIV_SHIFT 16
#endif
TRACE_EVENT(afs_page_dirty,
TP_PROTO(struct afs_vnode *vnode, const char *where,
pgoff_t page, unsigned long priv),
@ -999,10 +986,11 @@ TRACE_EVENT(afs_page_dirty,
__entry->priv = priv;
),
TP_printk("vn=%p %lx %s %lu-%lu",
TP_printk("vn=%p %lx %s %zx-%zx%s",
__entry->vnode, __entry->page, __entry->where,
__entry->priv & AFS_PRIV_MAX,
__entry->priv >> AFS_PRIV_SHIFT)
afs_page_dirty_from(__entry->priv),
afs_page_dirty_to(__entry->priv),
afs_is_page_dirty_mmapped(__entry->priv) ? " M" : "")
);
TRACE_EVENT(afs_call_state,