NFS: Convert readdir page cache to use a cookie based index

Instead of using a linear index to address the pages, use the cookie of
the first entry, since that is what we use to match the page anyway.

This allows us to avoid re-reading the entire cache on a seekdir() type
of operation. The latter is very common when re-exporting NFS, and is a
major performance drain.

The change does affect our duplicate cookie detection, since we can no
longer rely on the page index as a linear offset for detecting whether
we looped backwards. However since we no longer do a linear search
through all the pages on each call to nfs_readdir(), this is less of a
concern than it was previously.
The other downside is that invalidate_mapping_pages() no longer can use
the page index to avoid clearing pages that have been read. A subsequent
patch will restore the functionality this provides to the 'ls -l'
heuristic.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
This commit is contained in:
Trond Myklebust 2022-02-23 11:31:51 -05:00
parent 9332cf14e2
commit f648022faa
3 changed files with 69 additions and 86 deletions

View File

@ -4,6 +4,10 @@ config NFS_FS
depends on INET && FILE_LOCKING && MULTIUSER depends on INET && FILE_LOCKING && MULTIUSER
select LOCKD select LOCKD
select SUNRPC select SUNRPC
select CRYPTO
select CRYPTO_HASH
select XXHASH
select CRYPTO_XXHASH
select NFS_ACL_SUPPORT if NFS_V3_ACL select NFS_ACL_SUPPORT if NFS_V3_ACL
help help
Choose Y here if you want to access files residing on other Choose Y here if you want to access files residing on other

View File

@ -39,6 +39,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/kmemleak.h> #include <linux/kmemleak.h>
#include <linux/xattr.h> #include <linux/xattr.h>
#include <linux/xxhash.h>
#include "delegation.h" #include "delegation.h"
#include "iostat.h" #include "iostat.h"
@ -159,9 +160,7 @@ struct nfs_readdir_descriptor {
pgoff_t page_index_max; pgoff_t page_index_max;
u64 dir_cookie; u64 dir_cookie;
u64 last_cookie; u64 last_cookie;
u64 dup_cookie;
loff_t current_index; loff_t current_index;
loff_t prev_index;
__be32 verf[NFS_DIR_VERIFIER_SIZE]; __be32 verf[NFS_DIR_VERIFIER_SIZE];
unsigned long dir_verifier; unsigned long dir_verifier;
@ -171,7 +170,6 @@ struct nfs_readdir_descriptor {
unsigned int cache_entry_index; unsigned int cache_entry_index;
unsigned int buffer_fills; unsigned int buffer_fills;
unsigned int dtsize; unsigned int dtsize;
signed char duped;
bool plus; bool plus;
bool eob; bool eob;
bool eof; bool eof;
@ -331,6 +329,28 @@ out:
return ret; return ret;
} }
#define NFS_READDIR_COOKIE_MASK (U32_MAX >> 14)
/*
* Hash algorithm allowing content addressible access to sequences
* of directory cookies. Content is addressed by the value of the
* cookie index of the first readdir entry in a page.
*
* The xxhash algorithm is chosen because it is fast, and is supposed
* to result in a decent flat distribution of hashes.
*
* We then select only the first 18 bits to avoid issues with excessive
* memory use for the page cache XArray. 18 bits should allow the caching
* of 262144 pages of sequences of readdir entries. Since each page holds
* 127 readdir entries for a typical 64-bit system, that works out to a
* cache of ~ 33 million entries per directory.
*/
static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie)
{
if (cookie == 0)
return 0;
return xxhash(&cookie, sizeof(cookie), 0) & NFS_READDIR_COOKIE_MASK;
}
static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie, static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie,
u64 change_attr) u64 change_attr)
{ {
@ -352,15 +372,15 @@ static void nfs_readdir_page_unlock_and_put(struct page *page)
} }
static struct page *nfs_readdir_page_get_locked(struct address_space *mapping, static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
pgoff_t index, u64 last_cookie) u64 last_cookie,
u64 change_attr)
{ {
pgoff_t index = nfs_readdir_page_cookie_hash(last_cookie);
struct page *page; struct page *page;
u64 change_attr;
page = grab_cache_page(mapping, index); page = grab_cache_page(mapping, index);
if (!page) if (!page)
return NULL; return NULL;
change_attr = inode_peek_iversion_raw(mapping->host);
if (PageUptodate(page)) { if (PageUptodate(page)) {
if (nfs_readdir_page_validate(page, last_cookie, change_attr)) if (nfs_readdir_page_validate(page, last_cookie, change_attr))
return page; return page;
@ -371,11 +391,6 @@ static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
return page; return page;
} }
static loff_t nfs_readdir_page_offset(struct page *page)
{
return (loff_t)page->index * (loff_t)nfs_readdir_array_maxentries();
}
static u64 nfs_readdir_page_last_cookie(struct page *page) static u64 nfs_readdir_page_last_cookie(struct page *page)
{ {
struct nfs_cache_array *array; struct nfs_cache_array *array;
@ -408,11 +423,11 @@ static void nfs_readdir_page_set_eof(struct page *page)
} }
static struct page *nfs_readdir_page_get_next(struct address_space *mapping, static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
pgoff_t index, u64 cookie) u64 cookie, u64 change_attr)
{ {
struct page *page; struct page *page;
page = nfs_readdir_page_get_locked(mapping, index, cookie); page = nfs_readdir_page_get_locked(mapping, cookie, change_attr);
if (page) { if (page) {
if (nfs_readdir_page_last_cookie(page) == cookie) if (nfs_readdir_page_last_cookie(page) == cookie)
return page; return page;
@ -452,6 +467,13 @@ static void nfs_readdir_seek_next_array(struct nfs_cache_array *array,
desc->last_cookie = array->array[0].cookie; desc->last_cookie = array->array[0].cookie;
} }
static void nfs_readdir_rewind_search(struct nfs_readdir_descriptor *desc)
{
desc->current_index = 0;
desc->last_cookie = 0;
desc->page_index = 0;
}
static int nfs_readdir_search_for_pos(struct nfs_cache_array *array, static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
struct nfs_readdir_descriptor *desc) struct nfs_readdir_descriptor *desc)
{ {
@ -492,8 +514,7 @@ static bool nfs_readdir_array_cookie_in_range(struct nfs_cache_array *array,
static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
struct nfs_readdir_descriptor *desc) struct nfs_readdir_descriptor *desc)
{ {
int i; unsigned int i;
loff_t new_pos;
int status = -EAGAIN; int status = -EAGAIN;
if (!nfs_readdir_array_cookie_in_range(array, desc->dir_cookie)) if (!nfs_readdir_array_cookie_in_range(array, desc->dir_cookie))
@ -501,32 +522,10 @@ static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
for (i = 0; i < array->size; i++) { for (i = 0; i < array->size; i++) {
if (array->array[i].cookie == desc->dir_cookie) { if (array->array[i].cookie == desc->dir_cookie) {
struct nfs_inode *nfsi = NFS_I(file_inode(desc->file));
new_pos = nfs_readdir_page_offset(desc->page) + i;
if (desc->attr_gencount != nfsi->attr_gencount) {
desc->duped = 0;
desc->attr_gencount = nfsi->attr_gencount;
} else if (new_pos < desc->prev_index) {
if (desc->duped > 0
&& desc->dup_cookie == desc->dir_cookie) {
if (printk_ratelimit()) {
pr_notice("NFS: directory %pD2 contains a readdir loop."
"Please contact your server vendor. "
"The file: %s has duplicate cookie %llu\n",
desc->file, array->array[i].name, desc->dir_cookie);
}
status = -ELOOP;
goto out;
}
desc->dup_cookie = desc->dir_cookie;
desc->duped = -1;
}
if (nfs_readdir_use_cookie(desc->file)) if (nfs_readdir_use_cookie(desc->file))
desc->ctx->pos = desc->dir_cookie; desc->ctx->pos = desc->dir_cookie;
else else
desc->ctx->pos = new_pos; desc->ctx->pos = desc->current_index + i;
desc->prev_index = new_pos;
desc->cache_entry_index = i; desc->cache_entry_index = i;
return 0; return 0;
} }
@ -538,7 +537,6 @@ check_eof:
desc->eof = true; desc->eof = true;
} else } else
nfs_readdir_seek_next_array(array, desc); nfs_readdir_seek_next_array(array, desc);
out:
return status; return status;
} }
@ -785,10 +783,9 @@ out:
/* Perform conversion from xdr to cache array */ /* Perform conversion from xdr to cache array */
static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc, static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
struct nfs_entry *entry, struct nfs_entry *entry,
struct page **xdr_pages, struct page **xdr_pages, unsigned int buflen,
unsigned int buflen, struct page **arrays, size_t narrays,
struct page **arrays, u64 change_attr)
size_t narrays)
{ {
struct address_space *mapping = desc->file->f_mapping; struct address_space *mapping = desc->file->f_mapping;
struct xdr_stream stream; struct xdr_stream stream;
@ -828,18 +825,16 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
break; break;
arrays++; arrays++;
*arrays = page = new; *arrays = page = new;
desc->page_index_max++;
} else { } else {
new = nfs_readdir_page_get_next(mapping, new = nfs_readdir_page_get_next(
page->index + 1, mapping, entry->prev_cookie, change_attr);
entry->prev_cookie);
if (!new) if (!new)
break; break;
if (page != *arrays) if (page != *arrays)
nfs_readdir_page_unlock_and_put(page); nfs_readdir_page_unlock_and_put(page);
page = new; page = new;
desc->page_index_max = new->index;
} }
desc->page_index_max++;
status = nfs_readdir_add_to_array(entry, page); status = nfs_readdir_add_to_array(entry, page);
} while (!status && !entry->eof); } while (!status && !entry->eof);
@ -899,6 +894,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
__be32 *verf_arg, __be32 *verf_res, __be32 *verf_arg, __be32 *verf_res,
struct page **arrays, size_t narrays) struct page **arrays, size_t narrays)
{ {
u64 change_attr;
struct page **pages; struct page **pages;
struct page *page = *arrays; struct page *page = *arrays;
struct nfs_entry *entry; struct nfs_entry *entry;
@ -923,6 +919,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
if (!pages) if (!pages)
goto out; goto out;
change_attr = inode_peek_iversion_raw(inode);
status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie, pages, status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie, pages,
dtsize, verf_res); dtsize, verf_res);
if (status < 0) if (status < 0)
@ -931,7 +928,7 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
pglen = status; pglen = status;
if (pglen != 0) if (pglen != 0)
status = nfs_readdir_page_filler(desc, entry, pages, pglen, status = nfs_readdir_page_filler(desc, entry, pages, pglen,
arrays, narrays); arrays, narrays, change_attr);
else else
nfs_readdir_page_set_eof(page); nfs_readdir_page_set_eof(page);
desc->buffer_fills++; desc->buffer_fills++;
@ -961,9 +958,11 @@ nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc)
static struct page * static struct page *
nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc) nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
{ {
return nfs_readdir_page_get_locked(desc->file->f_mapping, struct address_space *mapping = desc->file->f_mapping;
desc->page_index, u64 change_attr = inode_peek_iversion_raw(mapping->host);
desc->last_cookie);
return nfs_readdir_page_get_locked(mapping, desc->last_cookie,
change_attr);
} }
/* /*
@ -995,7 +994,7 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
trace_nfs_readdir_cache_fill_done(inode, res); trace_nfs_readdir_cache_fill_done(inode, res);
if (res == -EBADCOOKIE || res == -ENOTSYNC) { if (res == -EBADCOOKIE || res == -ENOTSYNC) {
invalidate_inode_pages2(desc->file->f_mapping); invalidate_inode_pages2(desc->file->f_mapping);
desc->page_index = 0; nfs_readdir_rewind_search(desc);
trace_nfs_readdir_invalidate_cache_range( trace_nfs_readdir_invalidate_cache_range(
inode, 0, MAX_LFS_FILESIZE); inode, 0, MAX_LFS_FILESIZE);
return -EAGAIN; return -EAGAIN;
@ -1009,12 +1008,10 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
memcmp(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf))) { memcmp(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf))) {
memcpy(nfsi->cookieverf, verf, memcpy(nfsi->cookieverf, verf,
sizeof(nfsi->cookieverf)); sizeof(nfsi->cookieverf));
invalidate_inode_pages2_range(desc->file->f_mapping, invalidate_inode_pages2_range(desc->file->f_mapping, 1,
desc->page_index_max + 1,
-1); -1);
trace_nfs_readdir_invalidate_cache_range( trace_nfs_readdir_invalidate_cache_range(
inode, desc->page_index_max + 1, inode, 1, MAX_LFS_FILESIZE);
MAX_LFS_FILESIZE);
} }
} }
res = nfs_readdir_search_array(desc); res = nfs_readdir_search_array(desc);
@ -1030,11 +1027,6 @@ static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
int res; int res;
do { do {
if (desc->page_index == 0) {
desc->current_index = 0;
desc->prev_index = 0;
desc->last_cookie = 0;
}
res = find_and_lock_cache_page(desc); res = find_and_lock_cache_page(desc);
} while (res == -EAGAIN); } while (res == -EAGAIN);
return res; return res;
@ -1072,8 +1064,6 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
desc->ctx->pos = desc->dir_cookie; desc->ctx->pos = desc->dir_cookie;
else else
desc->ctx->pos++; desc->ctx->pos++;
if (desc->duped != 0)
desc->duped = 1;
} }
if (array->page_is_eof) if (array->page_is_eof)
desc->eof = !desc->eob; desc->eof = !desc->eob;
@ -1115,7 +1105,6 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
desc->page_index = 0; desc->page_index = 0;
desc->cache_entry_index = 0; desc->cache_entry_index = 0;
desc->last_cookie = desc->dir_cookie; desc->last_cookie = desc->dir_cookie;
desc->duped = 0;
desc->page_index_max = 0; desc->page_index_max = 0;
trace_nfs_readdir_uncached(desc->file, desc->verf, desc->last_cookie, trace_nfs_readdir_uncached(desc->file, desc->verf, desc->last_cookie,
@ -1148,6 +1137,8 @@ out_free:
for (i = 0; i < sz && arrays[i]; i++) for (i = 0; i < sz && arrays[i]; i++)
nfs_readdir_page_array_free(arrays[i]); nfs_readdir_page_array_free(arrays[i]);
out: out:
if (!nfs_readdir_use_cookie(desc->file))
nfs_readdir_rewind_search(desc);
desc->page_index_max = -1; desc->page_index_max = -1;
kfree(arrays); kfree(arrays);
dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
@ -1158,17 +1149,14 @@ out:
static void nfs_readdir_handle_cache_misses(struct inode *inode, static void nfs_readdir_handle_cache_misses(struct inode *inode,
struct nfs_readdir_descriptor *desc, struct nfs_readdir_descriptor *desc,
pgoff_t page_index,
unsigned int cache_misses) unsigned int cache_misses)
{ {
if (desc->ctx->pos == 0 || if (desc->ctx->pos == 0 ||
cache_misses <= NFS_READDIR_CACHE_MISS_THRESHOLD) cache_misses <= NFS_READDIR_CACHE_MISS_THRESHOLD)
return; return;
if (invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1) == 0) if (invalidate_mapping_pages(inode->i_mapping, 0, -1) == 0)
return; return;
trace_nfs_readdir_invalidate_cache_range( trace_nfs_readdir_invalidate_cache_range(inode, 0, MAX_LFS_FILESIZE);
inode, (loff_t)(page_index + 1) << PAGE_SHIFT,
MAX_LFS_FILESIZE);
} }
/* The file offset position represents the dirent entry number. A /* The file offset position represents the dirent entry number. A
@ -1183,7 +1171,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
struct nfs_open_dir_context *dir_ctx = file->private_data; struct nfs_open_dir_context *dir_ctx = file->private_data;
struct nfs_readdir_descriptor *desc; struct nfs_readdir_descriptor *desc;
unsigned int cache_hits, cache_misses; unsigned int cache_hits, cache_misses;
pgoff_t page_index;
int res; int res;
dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
@ -1208,10 +1195,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
spin_lock(&file->f_lock); spin_lock(&file->f_lock);
desc->dir_cookie = dir_ctx->dir_cookie; desc->dir_cookie = dir_ctx->dir_cookie;
desc->dup_cookie = dir_ctx->dup_cookie; desc->page_index = dir_ctx->page_index;
desc->duped = dir_ctx->duped;
page_index = dir_ctx->page_index;
desc->page_index = page_index;
desc->last_cookie = dir_ctx->last_cookie; desc->last_cookie = dir_ctx->last_cookie;
desc->attr_gencount = dir_ctx->attr_gencount; desc->attr_gencount = dir_ctx->attr_gencount;
desc->eof = dir_ctx->eof; desc->eof = dir_ctx->eof;
@ -1227,7 +1211,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
} }
desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits, cache_misses); desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits, cache_misses);
nfs_readdir_handle_cache_misses(inode, desc, page_index, cache_misses); nfs_readdir_handle_cache_misses(inode, desc, cache_misses);
do { do {
res = readdir_search_pagecache(desc); res = readdir_search_pagecache(desc);
@ -1247,7 +1231,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
} }
if (res == -ETOOSMALL && desc->plus) { if (res == -ETOOSMALL && desc->plus) {
nfs_zap_caches(inode); nfs_zap_caches(inode);
desc->page_index = 0;
desc->plus = false; desc->plus = false;
desc->eof = false; desc->eof = false;
continue; continue;
@ -1261,9 +1244,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
spin_lock(&file->f_lock); spin_lock(&file->f_lock);
dir_ctx->dir_cookie = desc->dir_cookie; dir_ctx->dir_cookie = desc->dir_cookie;
dir_ctx->dup_cookie = desc->dup_cookie;
dir_ctx->last_cookie = desc->last_cookie; dir_ctx->last_cookie = desc->last_cookie;
dir_ctx->duped = desc->duped;
dir_ctx->attr_gencount = desc->attr_gencount; dir_ctx->attr_gencount = desc->attr_gencount;
dir_ctx->page_index = desc->page_index; dir_ctx->page_index = desc->page_index;
dir_ctx->eof = desc->eof; dir_ctx->eof = desc->eof;
@ -1306,13 +1287,13 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
if (offset != filp->f_pos) { if (offset != filp->f_pos) {
filp->f_pos = offset; filp->f_pos = offset;
dir_ctx->page_index = 0; dir_ctx->page_index = 0;
if (!nfs_readdir_use_cookie(filp)) if (!nfs_readdir_use_cookie(filp)) {
dir_ctx->dir_cookie = 0; dir_ctx->dir_cookie = 0;
else dir_ctx->last_cookie = 0;
} else {
dir_ctx->dir_cookie = offset; dir_ctx->dir_cookie = offset;
if (offset == 0) dir_ctx->last_cookie = offset;
memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf)); }
dir_ctx->duped = 0;
dir_ctx->eof = false; dir_ctx->eof = false;
} }
spin_unlock(&filp->f_lock); spin_unlock(&filp->f_lock);

View File

@ -106,11 +106,9 @@ struct nfs_open_dir_context {
unsigned long attr_gencount; unsigned long attr_gencount;
__be32 verf[NFS_DIR_VERIFIER_SIZE]; __be32 verf[NFS_DIR_VERIFIER_SIZE];
__u64 dir_cookie; __u64 dir_cookie;
__u64 dup_cookie;
__u64 last_cookie; __u64 last_cookie;
pgoff_t page_index; pgoff_t page_index;
unsigned int dtsize; unsigned int dtsize;
signed char duped;
bool eof; bool eof;
struct rcu_head rcu_head; struct rcu_head rcu_head;
}; };