Notable items here are a series to take advantage of David Howells'

netfs helper library from Jeff, three new filesystem client metrics
 from Xiubo, ceph.dir.rsnaps vxattr from Yanhu and two auth-related
 fixes from myself, marked for stable.  Interspersed is a smattering
 of assorted fixes and cleanups across the filesystem.
 -----BEGIN PGP SIGNATURE-----
 
 iQFHBAABCAAxFiEEydHwtzie9C7TfviiSn/eOAIR84sFAmCT8IITHGlkcnlvbW92
 QGdtYWlsLmNvbQAKCRBKf944AhHzizgqCACYbyY4Yr/2C8fZsn+P9rd97zRTbcC6
 eufTZwnlECLnc89BxJQRk9a2UpDJfC8RMM3/9tmiulc8G4M+ggVbdFQTCzsZox3c
 vLAunGeVyfKIY+16Bv2RNuoO3KeeZm5aB3jXJ5QcUPcXmd4XnHKI1FU2ebC56UJb
 pxxfHpE6fb59r6Ek1e5uUFyta4KDMrvwXozghuAPEgT1GpKeA9zMIGI0CkQbBHlW
 PWHpcahTiT6GWa/d9ud0CnfssiBxVydWyKTz9xppYC6LNdsZUf9tBmYYGRklcjoA
 yAwPSuqxNmg+7uWubEawc0+a/3fXORgp2SF7Rbp1XYE+HpfnMF1J+nIn
 =IO5c
 -----END PGP SIGNATURE-----

Merge tag 'ceph-for-5.13-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "Notable items here are

   - a series to take advantage of David Howells' netfs helper library
     from Jeff

   - three new filesystem client metrics from Xiubo

   - ceph.dir.rsnaps vxattr from Yanhu

   - two auth-related fixes from myself, marked for stable.

  Interspersed is a smattering of assorted fixes and cleanups across the
  filesystem"

* tag 'ceph-for-5.13-rc1' of git://github.com/ceph/ceph-client: (24 commits)
  libceph: allow addrvecs with a single NONE/blank address
  libceph: don't set global_id until we get an auth ticket
  libceph: bump CephXAuthenticate encoding version
  ceph: don't allow access to MDS-private inodes
  ceph: fix up some bare fetches of i_size
  ceph: convert some PAGE_SIZE invocations to thp_size()
  ceph: support getting ceph.dir.rsnaps vxattr
  ceph: drop pinned_page parameter from ceph_get_caps
  ceph: fix inode leak on getattr error in __fh_to_dentry
  ceph: only check pool permissions for regular files
  ceph: send opened files/pinned caps/opened inodes metrics to MDS daemon
  ceph: avoid counting the same request twice or more
  ceph: rename the metric helpers
  ceph: fix kerneldoc copypasta over ceph_start_io_direct
  ceph: use attach/detach_page_private for tracking snap context
  ceph: don't use d_add in ceph_handle_snapdir
  ceph: don't clobber i_snap_caps on non-I_NEW inode
  ceph: fix fall-through warnings for Clang
  ceph: convert ceph_readpages to ceph_readahead
  ceph: convert ceph_write_begin to netfs_write_begin
  ...
This commit is contained in:
Linus Torvalds 2021-05-06 10:27:02 -07:00
commit 7ac86b3dca
21 changed files with 582 additions and 724 deletions

View File

@ -6,6 +6,7 @@ config CEPH_FS
select LIBCRC32C
select CRYPTO_AES
select CRYPTO
select NETFS_SUPPORT
default n
help
Choose Y or M here to include support for mounting the

View File

@ -12,6 +12,7 @@
#include <linux/signal.h>
#include <linux/iversion.h>
#include <linux/ktime.h>
#include <linux/netfs.h>
#include "super.h"
#include "mds_client.h"
@ -61,6 +62,9 @@
(CONGESTION_ON_THRESH(congestion_kb) - \
(CONGESTION_ON_THRESH(congestion_kb) >> 2))
static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
struct page *page, void **_fsdata);
static inline struct ceph_snap_context *page_snap_context(struct page *page)
{
if (PagePrivate(page))
@ -124,8 +128,7 @@ static int ceph_set_page_dirty(struct page *page)
* PagePrivate so that we get invalidatepage callback.
*/
BUG_ON(PagePrivate(page));
page->private = (unsigned long)snapc;
SetPagePrivate(page);
attach_page_private(page, snapc);
ret = __set_page_dirty_nobuffers(page);
WARN_ON(!PageLocked(page));
@ -144,19 +147,19 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
{
struct inode *inode;
struct ceph_inode_info *ci;
struct ceph_snap_context *snapc = page_snap_context(page);
struct ceph_snap_context *snapc;
wait_on_page_fscache(page);
inode = page->mapping->host;
ci = ceph_inode(inode);
if (offset != 0 || length != PAGE_SIZE) {
if (offset != 0 || length != thp_size(page)) {
dout("%p invalidatepage %p idx %lu partial dirty page %u~%u\n",
inode, page, page->index, offset, length);
return;
}
ceph_invalidate_fscache_page(inode, page);
WARN_ON(!PageLocked(page));
if (!PagePrivate(page))
return;
@ -164,333 +167,222 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
dout("%p invalidatepage %p idx %lu full dirty page\n",
inode, page, page->index);
snapc = detach_page_private(page);
ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
ceph_put_snap_context(snapc);
page->private = 0;
ClearPagePrivate(page);
}
static int ceph_releasepage(struct page *page, gfp_t g)
static int ceph_releasepage(struct page *page, gfp_t gfp)
{
dout("%p releasepage %p idx %lu (%sdirty)\n", page->mapping->host,
page, page->index, PageDirty(page) ? "" : "not ");
/* Can we release the page from the cache? */
if (!ceph_release_fscache_page(page, g))
return 0;
if (PageFsCache(page)) {
if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS))
return 0;
wait_on_page_fscache(page);
}
return !PagePrivate(page);
}
/* read a single page, without unlocking it. */
static int ceph_do_readpage(struct file *filp, struct page *page)
static void ceph_netfs_expand_readahead(struct netfs_read_request *rreq)
{
struct inode *inode = file_inode(filp);
struct inode *inode = rreq->mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_layout *lo = &ci->i_layout;
u32 blockoff;
u64 blockno;
/* Expand the start downward */
blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
rreq->start = blockno * lo->stripe_unit;
rreq->len += blockoff;
/* Now, round up the length to the next block */
rreq->len = roundup(rreq->len, lo->stripe_unit);
}
static bool ceph_netfs_clamp_length(struct netfs_read_subrequest *subreq)
{
struct inode *inode = subreq->rreq->mapping->host;
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
u64 objno, objoff;
u32 xlen;
/* Truncate the extent at the end of the current block */
ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len,
&objno, &objoff, &xlen);
subreq->len = min(xlen, fsc->mount_options->rsize);
return true;
}
static void finish_netfs_read(struct ceph_osd_request *req)
{
struct ceph_fs_client *fsc = ceph_inode_to_client(req->r_inode);
struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
struct netfs_read_subrequest *subreq = req->r_priv;
int num_pages;
int err = req->r_result;
ceph_update_read_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, err);
dout("%s: result %d subreq->len=%zu i_size=%lld\n", __func__, req->r_result,
subreq->len, i_size_read(req->r_inode));
/* no object means success but no data */
if (err == -ENOENT)
err = 0;
else if (err == -EBLOCKLISTED)
fsc->blocklisted = true;
if (err >= 0 && err < subreq->len)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
netfs_subreq_terminated(subreq, err, true);
num_pages = calc_pages_for(osd_data->alignment, osd_data->length);
ceph_put_page_vector(osd_data->pages, num_pages, false);
iput(req->r_inode);
}
static void ceph_netfs_issue_op(struct netfs_read_subrequest *subreq)
{
struct netfs_read_request *rreq = subreq->rreq;
struct inode *inode = rreq->mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_osd_request *req;
struct ceph_vino vino = ceph_vino(inode);
struct iov_iter iter;
struct page **pages;
size_t page_off;
int err = 0;
u64 off = page_offset(page);
u64 len = PAGE_SIZE;
u64 len = subreq->len;
if (off >= i_size_read(inode)) {
zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
return 0;
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len,
0, 1, CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica,
NULL, ci->i_truncate_seq, ci->i_truncate_size, false);
if (IS_ERR(req)) {
err = PTR_ERR(req);
req = NULL;
goto out;
}
dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len);
iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len);
err = iov_iter_get_pages_alloc(&iter, &pages, len, &page_off);
if (err < 0) {
dout("%s: iov_ter_get_pages_alloc returned %d\n", __func__, err);
goto out;
}
/* should always give us a page-aligned read */
WARN_ON_ONCE(page_off);
len = err;
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false);
req->r_callback = finish_netfs_read;
req->r_priv = subreq;
req->r_inode = inode;
ihold(inode);
err = ceph_osdc_start_request(req->r_osdc, req, false);
if (err)
iput(inode);
out:
ceph_osdc_put_request(req);
if (err)
netfs_subreq_terminated(subreq, err, false);
dout("%s: result %d\n", __func__, err);
}
static void ceph_init_rreq(struct netfs_read_request *rreq, struct file *file)
{
}
static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
{
struct inode *inode = mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
int got = (uintptr_t)priv;
if (got)
ceph_put_cap_refs(ci, got);
}
const struct netfs_read_request_ops ceph_netfs_read_ops = {
.init_rreq = ceph_init_rreq,
.is_cache_enabled = ceph_is_cache_enabled,
.begin_cache_operation = ceph_begin_cache_operation,
.issue_op = ceph_netfs_issue_op,
.expand_readahead = ceph_netfs_expand_readahead,
.clamp_length = ceph_netfs_clamp_length,
.check_write_begin = ceph_netfs_check_write_begin,
.cleanup = ceph_readahead_cleanup,
};
/* read a single page, without unlocking it. */
static int ceph_readpage(struct file *file, struct page *page)
{
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_vino vino = ceph_vino(inode);
u64 off = page_offset(page);
u64 len = thp_size(page);
if (ci->i_inline_version != CEPH_INLINE_NONE) {
/*
* Uptodate inline data should have been added
* into page cache while getting Fcr caps.
*/
if (off == 0)
if (off == 0) {
unlock_page(page);
return -EINVAL;
zero_user_segment(page, 0, PAGE_SIZE);
}
zero_user_segment(page, 0, thp_size(page));
SetPageUptodate(page);
unlock_page(page);
return 0;
}
err = ceph_readpage_from_fscache(inode, page);
if (err == 0)
return -EINPROGRESS;
dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n",
vino.ino, vino.snap, filp, off, len, page, page->index);
req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, 0, 1,
CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL,
ci->i_truncate_seq, ci->i_truncate_size,
false);
if (IS_ERR(req))
return PTR_ERR(req);
vino.ino, vino.snap, file, off, len, page, page->index);
osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false);
err = ceph_osdc_start_request(osdc, req, false);
if (!err)
err = ceph_osdc_wait_request(osdc, req);
ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, err);
ceph_osdc_put_request(req);
dout("readpage result %d\n", err);
if (err == -ENOENT)
err = 0;
if (err < 0) {
ceph_fscache_readpage_cancel(inode, page);
if (err == -EBLOCKLISTED)
fsc->blocklisted = true;
goto out;
}
if (err < PAGE_SIZE)
/* zero fill remainder of page */
zero_user_segment(page, err, PAGE_SIZE);
else
flush_dcache_page(page);
SetPageUptodate(page);
ceph_readpage_to_fscache(inode, page);
out:
return err < 0 ? err : 0;
return netfs_readpage(file, page, &ceph_netfs_read_ops, NULL);
}
static int ceph_readpage(struct file *filp, struct page *page)
static void ceph_readahead(struct readahead_control *ractl)
{
int r = ceph_do_readpage(filp, page);
if (r != -EINPROGRESS)
unlock_page(page);
else
r = 0;
return r;
}
/*
* Finish an async read(ahead) op.
*/
static void finish_read(struct ceph_osd_request *req)
{
struct inode *inode = req->r_inode;
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_data *osd_data;
int rc = req->r_result <= 0 ? req->r_result : 0;
int bytes = req->r_result >= 0 ? req->r_result : 0;
int num_pages;
int i;
dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
if (rc == -EBLOCKLISTED)
ceph_inode_to_client(inode)->blocklisted = true;
/* unlock all pages, zeroing any data we didn't read */
osd_data = osd_req_op_extent_osd_data(req, 0);
BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
num_pages = calc_pages_for((u64)osd_data->alignment,
(u64)osd_data->length);
for (i = 0; i < num_pages; i++) {
struct page *page = osd_data->pages[i];
if (rc < 0 && rc != -ENOENT) {
ceph_fscache_readpage_cancel(inode, page);
goto unlock;
}
if (bytes < (int)PAGE_SIZE) {
/* zero (remainder of) page */
int s = bytes < 0 ? 0 : bytes;
zero_user_segment(page, s, PAGE_SIZE);
}
dout("finish_read %p uptodate %p idx %lu\n", inode, page,
page->index);
flush_dcache_page(page);
SetPageUptodate(page);
ceph_readpage_to_fscache(inode, page);
unlock:
unlock_page(page);
put_page(page);
bytes -= PAGE_SIZE;
}
ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, rc);
kfree(osd_data->pages);
}
/*
* start an async read(ahead) operation. return nr_pages we submitted
* a read for on success, or negative error code.
*/
static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
struct list_head *page_list, int max)
{
struct ceph_osd_client *osdc =
&ceph_inode_to_client(inode)->client->osdc;
struct ceph_inode_info *ci = ceph_inode(inode);
struct page *page = lru_to_page(page_list);
struct ceph_vino vino;
struct ceph_osd_request *req;
u64 off;
u64 len;
int i;
struct page **pages;
pgoff_t next_index;
int nr_pages = 0;
struct inode *inode = file_inode(ractl->file);
struct ceph_file_info *fi = ractl->file->private_data;
struct ceph_rw_context *rw_ctx;
int got = 0;
int ret = 0;
if (!rw_ctx) {
/* caller of readpages does not hold buffer and read caps
* (fadvise, madvise and readahead cases) */
int want = CEPH_CAP_FILE_CACHE;
ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want,
true, &got);
if (ret < 0) {
dout("start_read %p, error getting cap\n", inode);
} else if (!(got & want)) {
dout("start_read %p, no cache cap\n", inode);
ret = 0;
}
if (ret <= 0) {
if (got)
ceph_put_cap_refs(ci, got);
while (!list_empty(page_list)) {
page = lru_to_page(page_list);
list_del(&page->lru);
put_page(page);
}
return ret;
}
}
off = (u64) page_offset(page);
/* count pages */
next_index = page->index;
list_for_each_entry_reverse(page, page_list, lru) {
if (page->index != next_index)
break;
nr_pages++;
next_index++;
if (max && nr_pages == max)
break;
}
len = nr_pages << PAGE_SHIFT;
dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
off, len);
vino = ceph_vino(inode);
req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
0, 1, CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ, NULL,
ci->i_truncate_seq, ci->i_truncate_size,
false);
if (IS_ERR(req)) {
ret = PTR_ERR(req);
goto out;
}
/* build page vector */
nr_pages = calc_pages_for(0, len);
pages = kmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL);
if (!pages) {
ret = -ENOMEM;
goto out_put;
}
for (i = 0; i < nr_pages; ++i) {
page = list_entry(page_list->prev, struct page, lru);
BUG_ON(PageLocked(page));
list_del(&page->lru);
dout("start_read %p adding %p idx %lu\n", inode, page,
page->index);
if (add_to_page_cache_lru(page, &inode->i_data, page->index,
GFP_KERNEL)) {
ceph_fscache_uncache_page(inode, page);
put_page(page);
dout("start_read %p add_to_page_cache failed %p\n",
inode, page);
nr_pages = i;
if (nr_pages > 0) {
len = nr_pages << PAGE_SHIFT;
osd_req_op_extent_update(req, 0, len);
break;
}
goto out_pages;
}
pages[i] = page;
}
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false);
req->r_callback = finish_read;
req->r_inode = inode;
dout("start_read %p starting %p %lld~%lld\n", inode, req, off, len);
ret = ceph_osdc_start_request(osdc, req, false);
if (ret < 0)
goto out_pages;
ceph_osdc_put_request(req);
/* After adding locked pages to page cache, the inode holds cache cap.
* So we can drop our cap refs. */
if (got)
ceph_put_cap_refs(ci, got);
return nr_pages;
out_pages:
for (i = 0; i < nr_pages; ++i) {
ceph_fscache_readpage_cancel(inode, pages[i]);
unlock_page(pages[i]);
}
ceph_put_page_vector(pages, nr_pages, false);
out_put:
ceph_osdc_put_request(req);
out:
if (got)
ceph_put_cap_refs(ci, got);
return ret;
}
/*
* Read multiple pages. Leave pages we don't read + unlock in page_list;
* the caller (VM) cleans them up.
*/
static int ceph_readpages(struct file *file, struct address_space *mapping,
struct list_head *page_list, unsigned nr_pages)
{
struct inode *inode = file_inode(file);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_file_info *fi = file->private_data;
struct ceph_rw_context *rw_ctx;
int rc = 0;
int max = 0;
if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE)
return -EINVAL;
rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
&nr_pages);
if (rc == 0)
goto out;
return;
rw_ctx = ceph_find_rw_context(fi);
max = fsc->mount_options->rsize >> PAGE_SHIFT;
dout("readpages %p file %p ctx %p nr_pages %d max %d\n",
inode, file, rw_ctx, nr_pages, max);
while (!list_empty(page_list)) {
rc = start_read(inode, rw_ctx, page_list, max);
if (rc < 0)
goto out;
}
out:
ceph_fscache_readpages_cancel(inode, page_list);
if (!rw_ctx) {
/*
* readahead callers do not necessarily hold Fcb caps
* (e.g. fadvise, madvise).
*/
int want = CEPH_CAP_FILE_CACHE;
dout("readpages %p file %p ret %d\n", inode, file, rc);
return rc;
ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got);
if (ret < 0)
dout("start_read %p, error getting cap\n", inode);
else if (!(got & want))
dout("start_read %p, no cache cap\n", inode);
if (ret <= 0)
return;
}
netfs_readahead(ractl, &ceph_netfs_read_ops, (void *)(uintptr_t)got);
}
struct ceph_writeback_ctl
@ -585,8 +477,8 @@ static u64 get_writepages_data_length(struct inode *inode,
spin_unlock(&ci->i_ceph_lock);
WARN_ON(!found);
}
if (end > page_offset(page) + PAGE_SIZE)
end = page_offset(page) + PAGE_SIZE;
if (end > page_offset(page) + thp_size(page))
end = page_offset(page) + thp_size(page);
return end > start ? end - start : 0;
}
@ -604,7 +496,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
struct ceph_snap_context *snapc, *oldest;
loff_t page_off = page_offset(page);
int err;
loff_t len = PAGE_SIZE;
loff_t len = thp_size(page);
struct ceph_writeback_ctl ceph_wbc;
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_osd_request *req;
@ -632,7 +524,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
/* is this a partial page at end of file? */
if (page_off >= ceph_wbc.i_size) {
dout("%p page eof %llu\n", page, ceph_wbc.i_size);
page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
page->mapping->a_ops->invalidatepage(page, 0, thp_size(page));
return 0;
}
@ -658,7 +550,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
}
/* it may be a short write due to an object boundary */
WARN_ON_ONCE(len > PAGE_SIZE);
WARN_ON_ONCE(len > thp_size(page));
osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false);
dout("writepage %llu~%llu (%llu bytes)\n", page_off, len, len);
@ -667,7 +559,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
if (!err)
err = ceph_osdc_wait_request(osdc, req);
ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, err);
ceph_osdc_put_request(req);
@ -695,8 +587,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
dout("writepage cleaned page %p\n", page);
err = 0; /* vfs expects us to return 0 */
}
page->private = 0;
ClearPagePrivate(page);
oldest = detach_page_private(page);
WARN_ON_ONCE(oldest != snapc);
end_page_writeback(page);
ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
ceph_put_snap_context(snapc); /* page's reference */
@ -755,7 +647,7 @@ static void writepages_finish(struct ceph_osd_request *req)
ceph_clear_error_write(ci);
}
ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, rc);
/*
@ -788,11 +680,9 @@ static void writepages_finish(struct ceph_osd_request *req)
clear_bdi_congested(inode_to_bdi(inode),
BLK_RW_ASYNC);
ceph_put_snap_context(page_snap_context(page));
page->private = 0;
ClearPagePrivate(page);
dout("unlocking %p\n", page);
ceph_put_snap_context(detach_page_private(page));
end_page_writeback(page);
dout("unlocking %p\n", page);
if (remove_page)
generic_error_remove_page(inode->i_mapping,
@ -949,7 +839,7 @@ static int ceph_writepages_start(struct address_space *mapping,
page_offset(page) >= i_size_read(inode)) &&
clear_page_dirty_for_io(page))
mapping->a_ops->invalidatepage(page,
0, PAGE_SIZE);
0, thp_size(page));
unlock_page(page);
continue;
}
@ -1038,7 +928,7 @@ static int ceph_writepages_start(struct address_space *mapping,
pages[locked_pages++] = page;
pvec.pages[i] = NULL;
len += PAGE_SIZE;
len += thp_size(page);
}
/* did we get anything? */
@ -1087,7 +977,7 @@ static int ceph_writepages_start(struct address_space *mapping,
BUG_ON(IS_ERR(req));
}
BUG_ON(len < page_offset(pages[locked_pages - 1]) +
PAGE_SIZE - offset);
thp_size(page) - offset);
req->r_callback = writepages_finish;
req->r_inode = inode;
@ -1117,7 +1007,7 @@ static int ceph_writepages_start(struct address_space *mapping,
}
set_page_writeback(pages[i]);
len += PAGE_SIZE;
len += thp_size(page);
}
if (ceph_wbc.size_stable) {
@ -1126,7 +1016,7 @@ static int ceph_writepages_start(struct address_space *mapping,
/* writepages_finish() clears writeback pages
* according to the data length, so make sure
* data length covers all locked pages */
u64 min_len = len + 1 - PAGE_SIZE;
u64 min_len = len + 1 - thp_size(page);
len = get_writepages_data_length(inode, pages[i - 1],
offset);
len = max(len, min_len);
@ -1302,6 +1192,31 @@ ceph_find_incompatible(struct page *page)
return NULL;
}
static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
struct page *page, void **_fsdata)
{
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_snap_context *snapc;
snapc = ceph_find_incompatible(page);
if (snapc) {
int r;
unlock_page(page);
put_page(page);
if (IS_ERR(snapc))
return PTR_ERR(snapc);
ceph_queue_writeback(inode);
r = wait_event_killable(ci->i_cap_wq,
context_is_writeable_or_written(inode, snapc));
ceph_put_snap_context(snapc);
return r == 0 ? -EAGAIN : r;
}
return 0;
}
/*
* We are only allowed to write into/dirty the page if the page is
* clean, or already dirty within the same snap context.
@ -1312,75 +1227,47 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
{
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_snap_context *snapc;
struct page *page = NULL;
pgoff_t index = pos >> PAGE_SHIFT;
int pos_in_page = pos & ~PAGE_MASK;
int r = 0;
int r;
dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
for (;;) {
/*
* Uninlining should have already been done and everything updated, EXCEPT
* for inline_version sent to the MDS.
*/
if (ci->i_inline_version != CEPH_INLINE_NONE) {
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
r = -ENOMEM;
break;
}
if (!page)
return -ENOMEM;
snapc = ceph_find_incompatible(page);
if (snapc) {
if (IS_ERR(snapc)) {
r = PTR_ERR(snapc);
break;
/*
* The inline_version on a new inode is set to 1. If that's the
* case, then the page is brand new and isn't yet Uptodate.
*/
r = 0;
if (index == 0 && ci->i_inline_version != 1) {
if (!PageUptodate(page)) {
WARN_ONCE(1, "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n",
ci->i_inline_version);
r = -EINVAL;
}
unlock_page(page);
put_page(page);
page = NULL;
ceph_queue_writeback(inode);
r = wait_event_killable(ci->i_cap_wq,
context_is_writeable_or_written(inode, snapc));
ceph_put_snap_context(snapc);
if (r != 0)
break;
continue;
goto out;
}
if (PageUptodate(page)) {
dout(" page %p already uptodate\n", page);
break;
}
/*
* In some cases we don't need to read at all:
* - full page write
* - write that lies completely beyond EOF
* - write that covers the the page from start to EOF or beyond it
*/
if ((pos_in_page == 0 && len == PAGE_SIZE) ||
(pos >= i_size_read(inode)) ||
(pos_in_page == 0 && (pos + len) >= i_size_read(inode))) {
zero_user_segments(page, 0, pos_in_page,
pos_in_page + len, PAGE_SIZE);
break;
}
/*
* We need to read it. If we get back -EINPROGRESS, then the page was
* handed off to fscache and it will be unlocked when the read completes.
* Refind the page in that case so we can reacquire the page lock. Otherwise
* we got a hard error or the read was completed synchronously.
*/
r = ceph_do_readpage(file, page);
if (r != -EINPROGRESS)
break;
zero_user_segment(page, 0, thp_size(page));
SetPageUptodate(page);
goto out;
}
r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &page, NULL,
&ceph_netfs_read_ops, NULL);
out:
if (r == 0)
wait_on_page_fscache(page);
if (r < 0) {
if (page) {
unlock_page(page);
if (page)
put_page(page);
}
} else {
WARN_ON_ONCE(!PageLocked(page));
*pagep = page;
}
return r;
@ -1438,7 +1325,7 @@ static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter)
const struct address_space_operations ceph_aops = {
.readpage = ceph_readpage,
.readpages = ceph_readpages,
.readahead = ceph_readahead,
.writepage = ceph_writepage,
.writepages = ceph_writepages_start,
.write_begin = ceph_write_begin,
@ -1470,7 +1357,6 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
struct inode *inode = file_inode(vma->vm_file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
struct page *pinned_page = NULL;
loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT;
int want, got, err;
sigset_t oldset;
@ -1478,21 +1364,20 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
ceph_block_sigs(&oldset);
dout("filemap_fault %p %llx.%llx %llu~%zd trying to get caps\n",
inode, ceph_vinop(inode), off, (size_t)PAGE_SIZE);
dout("filemap_fault %p %llx.%llx %llu trying to get caps\n",
inode, ceph_vinop(inode), off);
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
got = 0;
err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_RD, want, -1,
&got, &pinned_page);
err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_RD, want, -1, &got);
if (err < 0)
goto out_restore;
dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got));
dout("filemap_fault %p %llu got cap refs on %s\n",
inode, off, ceph_cap_string(got));
if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
ci->i_inline_version == CEPH_INLINE_NONE) {
@ -1500,14 +1385,11 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
ceph_add_rw_context(fi, &rw_ctx);
ret = filemap_fault(vmf);
ceph_del_rw_context(fi, &rw_ctx);
dout("filemap_fault %p %llu~%zd drop cap refs %s ret %x\n",
inode, off, (size_t)PAGE_SIZE,
ceph_cap_string(got), ret);
dout("filemap_fault %p %llu drop cap refs %s ret %x\n",
inode, off, ceph_cap_string(got), ret);
} else
err = -EAGAIN;
if (pinned_page)
put_page(pinned_page);
ceph_put_cap_refs(ci, got);
if (err != -EAGAIN)
@ -1542,8 +1424,8 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
vmf->page = page;
ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
out_inline:
dout("filemap_fault %p %llu~%zd read inline data ret %x\n",
inode, off, (size_t)PAGE_SIZE, ret);
dout("filemap_fault %p %llu read inline data ret %x\n",
inode, off, ret);
}
out_restore:
ceph_restore_sigs(&oldset);
@ -1553,9 +1435,6 @@ static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
return ret;
}
/*
* Reuse write_begin here for simplicity.
*/
static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
@ -1591,10 +1470,10 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
goto out_free;
}
if (off + PAGE_SIZE <= size)
len = PAGE_SIZE;
if (off + thp_size(page) <= size)
len = thp_size(page);
else
len = size & ~PAGE_MASK;
len = offset_in_thp(page, size);
dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n",
inode, ceph_vinop(inode), off, len, size);
@ -1604,8 +1483,7 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
want = CEPH_CAP_FILE_BUFFER;
got = 0;
err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_WR, want, off + len,
&got, NULL);
err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_WR, want, off + len, &got);
if (err < 0)
goto out_free;
@ -1832,7 +1710,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
if (!err)
err = ceph_osdc_wait_request(&fsc->client->osdc, req);
ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, err);
out_put:
@ -2057,6 +1935,10 @@ int ceph_pool_perm_check(struct inode *inode, int need)
s64 pool;
int ret, flags;
/* Only need to do this for regular files */
if (!S_ISREG(inode->i_mode))
return 0;
if (ci->i_vino.snap != CEPH_NOSNAP) {
/*
* Pool permission check needs to write to the first object.

View File

@ -173,7 +173,6 @@ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
ci->fscache = NULL;
fscache_uncache_all_inode_pages(cookie, &ci->vfs_inode);
fscache_relinquish_cookie(cookie, &ci->i_vino, false);
}
@ -194,7 +193,6 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
dout("fscache_file_set_cookie %p %p disabling cache\n",
inode, filp);
fscache_disable_cookie(ci->fscache, &ci->i_vino, false);
fscache_uncache_all_inode_pages(ci->fscache, inode);
} else {
fscache_enable_cookie(ci->fscache, &ci->i_vino, i_size_read(inode),
ceph_fscache_can_enable, inode);
@ -205,108 +203,6 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
}
}
static void ceph_readpage_from_fscache_complete(struct page *page, void *data, int error)
{
if (!error)
SetPageUptodate(page);
unlock_page(page);
}
static inline bool cache_valid(struct ceph_inode_info *ci)
{
return ci->i_fscache_gen == ci->i_rdcache_gen;
}
/* Atempt to read from the fscache,
*
* This function is called from the readpage_nounlock context. DO NOT attempt to
* unlock the page here (or in the callback).
*/
int ceph_readpage_from_fscache(struct inode *inode, struct page *page)
{
struct ceph_inode_info *ci = ceph_inode(inode);
int ret;
if (!cache_valid(ci))
return -ENOBUFS;
ret = fscache_read_or_alloc_page(ci->fscache, page,
ceph_readpage_from_fscache_complete, NULL,
GFP_KERNEL);
switch (ret) {
case 0: /* Page found */
dout("page read submitted\n");
return 0;
case -ENOBUFS: /* Pages were not found, and can't be */
case -ENODATA: /* Pages were not found */
dout("page/inode not in cache\n");
return ret;
default:
dout("%s: unknown error ret = %i\n", __func__, ret);
return ret;
}
}
int ceph_readpages_from_fscache(struct inode *inode,
struct address_space *mapping,
struct list_head *pages,
unsigned *nr_pages)
{
struct ceph_inode_info *ci = ceph_inode(inode);
int ret;
if (!cache_valid(ci))
return -ENOBUFS;
ret = fscache_read_or_alloc_pages(ci->fscache, mapping, pages, nr_pages,
ceph_readpage_from_fscache_complete,
NULL, mapping_gfp_mask(mapping));
switch (ret) {
case 0: /* All pages found */
dout("all-page read submitted\n");
return 0;
case -ENOBUFS: /* Some pages were not found, and can't be */
case -ENODATA: /* some pages were not found */
dout("page/inode not in cache\n");
return ret;
default:
dout("%s: unknown error ret = %i\n", __func__, ret);
return ret;
}
}
void ceph_readpage_to_fscache(struct inode *inode, struct page *page)
{
struct ceph_inode_info *ci = ceph_inode(inode);
int ret;
if (!PageFsCache(page))
return;
if (!cache_valid(ci))
return;
ret = fscache_write_page(ci->fscache, page, i_size_read(inode),
GFP_KERNEL);
if (ret)
fscache_uncache_page(ci->fscache, page);
}
void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
{
struct ceph_inode_info *ci = ceph_inode(inode);
if (!PageFsCache(page))
return;
fscache_wait_on_page_write(ci->fscache, page);
fscache_uncache_page(ci->fscache, page);
}
void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
{
if (fscache_cookie_valid(fsc->fscache)) {
@ -329,24 +225,3 @@ void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
}
fsc->fscache = NULL;
}
/*
* caller should hold CEPH_CAP_FILE_{RD,CACHE}
*/
void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci)
{
if (cache_valid(ci))
return;
/* resue i_truncate_mutex. There should be no pending
* truncate while the caller holds CEPH_CAP_FILE_RD */
mutex_lock(&ci->i_truncate_mutex);
if (!cache_valid(ci)) {
if (fscache_check_consistency(ci->fscache, &ci->i_vino))
fscache_invalidate(ci->fscache);
spin_lock(&ci->i_ceph_lock);
ci->i_fscache_gen = ci->i_rdcache_gen;
spin_unlock(&ci->i_ceph_lock);
}
mutex_unlock(&ci->i_truncate_mutex);
}

View File

@ -9,6 +9,8 @@
#ifndef _CEPH_CACHE_H
#define _CEPH_CACHE_H
#include <linux/netfs.h>
#ifdef CONFIG_CEPH_FSCACHE
extern struct fscache_netfs ceph_cache_netfs;
@ -29,13 +31,15 @@ int ceph_readpages_from_fscache(struct inode *inode,
struct address_space *mapping,
struct list_head *pages,
unsigned *nr_pages);
void ceph_readpage_to_fscache(struct inode *inode, struct page *page);
void ceph_invalidate_fscache_page(struct inode* inode, struct page *page);
static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
{
ci->fscache = NULL;
ci->i_fscache_gen = 0;
}
static inline struct fscache_cookie *ceph_fscache_cookie(struct ceph_inode_info *ci)
{
return ci->fscache;
}
static inline void ceph_fscache_invalidate(struct inode *inode)
@ -43,40 +47,21 @@ static inline void ceph_fscache_invalidate(struct inode *inode)
fscache_invalidate(ceph_inode(inode)->fscache);
}
static inline void ceph_fscache_uncache_page(struct inode *inode,
struct page *page)
static inline bool ceph_is_cache_enabled(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
return fscache_uncache_page(ci->fscache, page);
struct fscache_cookie *cookie = ceph_fscache_cookie(ceph_inode(inode));
if (!cookie)
return false;
return fscache_cookie_enabled(cookie);
}
static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
static inline int ceph_begin_cache_operation(struct netfs_read_request *rreq)
{
struct inode* inode = page->mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
return fscache_maybe_release_page(ci->fscache, page, gfp);
}
struct fscache_cookie *cookie = ceph_fscache_cookie(ceph_inode(rreq->inode));
static inline void ceph_fscache_readpage_cancel(struct inode *inode,
struct page *page)
{
struct ceph_inode_info *ci = ceph_inode(inode);
if (fscache_cookie_valid(ci->fscache) && PageFsCache(page))
__fscache_uncache_page(ci->fscache, page);
return fscache_begin_read_operation(rreq, cookie);
}
static inline void ceph_fscache_readpages_cancel(struct inode *inode,
struct list_head *pages)
{
struct ceph_inode_info *ci = ceph_inode(inode);
return fscache_readpages_cancel(ci->fscache, pages);
}
static inline void ceph_disable_fscache_readpage(struct ceph_inode_info *ci)
{
ci->i_fscache_gen = ci->i_rdcache_gen - 1;
}
#else
static inline int ceph_fscache_register(void)
@ -102,6 +87,11 @@ static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
{
}
static inline struct fscache_cookie *ceph_fscache_cookie(struct ceph_inode_info *ci)
{
return NULL;
}
static inline void ceph_fscache_register_inode_cookie(struct inode *inode)
{
}
@ -115,62 +105,19 @@ static inline void ceph_fscache_file_set_cookie(struct inode *inode,
{
}
static inline void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci)
{
}
static inline void ceph_fscache_uncache_page(struct inode *inode,
struct page *pages)
{
}
static inline int ceph_readpage_from_fscache(struct inode* inode,
struct page *page)
{
return -ENOBUFS;
}
static inline int ceph_readpages_from_fscache(struct inode *inode,
struct address_space *mapping,
struct list_head *pages,
unsigned *nr_pages)
{
return -ENOBUFS;
}
static inline void ceph_readpage_to_fscache(struct inode *inode,
struct page *page)
{
}
static inline void ceph_fscache_invalidate(struct inode *inode)
{
}
static inline void ceph_invalidate_fscache_page(struct inode *inode,
struct page *page)
static inline bool ceph_is_cache_enabled(struct inode *inode)
{
return false;
}
static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
static inline int ceph_begin_cache_operation(struct netfs_read_request *rreq)
{
return 1;
return -ENOBUFS;
}
static inline void ceph_fscache_readpage_cancel(struct inode *inode,
struct page *page)
{
}
static inline void ceph_fscache_readpages_cancel(struct inode *inode,
struct list_head *pages)
{
}
static inline void ceph_disable_fscache_readpage(struct ceph_inode_info *ci)
{
}
#endif
#endif
#endif /* _CEPH_CACHE_H */

View File

@ -1390,7 +1390,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
arg->flush_tid = flush_tid;
arg->oldest_flush_tid = oldest_flush_tid;
arg->size = inode->i_size;
arg->size = i_size_read(inode);
ci->i_reported_size = arg->size;
arg->max_size = ci->i_wanted_max_size;
if (cap == ci->i_auth_cap) {
@ -1867,6 +1867,7 @@ static int try_nonblocking_invalidate(struct inode *inode)
u32 invalidating_gen = ci->i_rdcache_gen;
spin_unlock(&ci->i_ceph_lock);
ceph_fscache_invalidate(inode);
invalidate_mapping_pages(&inode->i_data, 0, -1);
spin_lock(&ci->i_ceph_lock);
@ -1884,7 +1885,7 @@ static int try_nonblocking_invalidate(struct inode *inode)
bool __ceph_should_report_size(struct ceph_inode_info *ci)
{
loff_t size = ci->vfs_inode.i_size;
loff_t size = i_size_read(&ci->vfs_inode);
/* mds will adjust max size according to the reported size */
if (ci->i_flushing_caps & CEPH_CAP_FILE_WR)
return false;
@ -2730,10 +2731,6 @@ static int try_get_cap_refs(struct inode *inode, int need, int want,
*got = need | want;
else
*got = need;
if (S_ISREG(inode->i_mode) &&
(need & CEPH_CAP_FILE_RD) &&
!(*got & CEPH_CAP_FILE_CACHE))
ceph_disable_fscache_readpage(ci);
ceph_take_cap_refs(ci, *got, true);
ret = 1;
}
@ -2858,8 +2855,7 @@ int ceph_try_get_caps(struct inode *inode, int need, int want,
* due to a small max_size, make sure we check_max_size (and possibly
* ask the mds) so we don't get hung up indefinitely.
*/
int ceph_get_caps(struct file *filp, int need, int want,
loff_t endoff, int *got, struct page **pinned_page)
int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got)
{
struct ceph_file_info *fi = filp->private_data;
struct inode *inode = file_inode(filp);
@ -2957,11 +2953,11 @@ int ceph_get_caps(struct file *filp, int need, int want,
struct page *page =
find_get_page(inode->i_mapping, 0);
if (page) {
if (PageUptodate(page)) {
*pinned_page = page;
break;
}
bool uptodate = PageUptodate(page);
put_page(page);
if (uptodate)
break;
}
/*
* drop cap refs first because getattr while
@ -2983,11 +2979,6 @@ int ceph_get_caps(struct file *filp, int need, int want,
}
break;
}
if (S_ISREG(ci->vfs_inode.i_mode) &&
(_got & CEPH_CAP_FILE_RD) && (_got & CEPH_CAP_FILE_CACHE))
ceph_fscache_revalidate_cookie(ci);
*got = _got;
return 0;
}
@ -3308,7 +3299,7 @@ static void handle_cap_grant(struct inode *inode,
dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
inode, cap, session->s_mds, seq, ceph_cap_string(newcaps));
dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
inode->i_size);
i_size_read(inode));
/*

View File

@ -162,34 +162,34 @@ static int metric_show(struct seq_file *s, void *p)
seq_printf(s, "item total avg_lat(us) min_lat(us) max_lat(us) stdev(us)\n");
seq_printf(s, "-----------------------------------------------------------------------------------\n");
spin_lock(&m->read_latency_lock);
spin_lock(&m->read_metric_lock);
total = m->total_reads;
sum = m->read_latency_sum;
avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
min = m->read_latency_min;
max = m->read_latency_max;
sq = m->read_latency_sq_sum;
spin_unlock(&m->read_latency_lock);
spin_unlock(&m->read_metric_lock);
CEPH_METRIC_SHOW("read", total, avg, min, max, sq);
spin_lock(&m->write_latency_lock);
spin_lock(&m->write_metric_lock);
total = m->total_writes;
sum = m->write_latency_sum;
avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
min = m->write_latency_min;
max = m->write_latency_max;
sq = m->write_latency_sq_sum;
spin_unlock(&m->write_latency_lock);
spin_unlock(&m->write_metric_lock);
CEPH_METRIC_SHOW("write", total, avg, min, max, sq);
spin_lock(&m->metadata_latency_lock);
spin_lock(&m->metadata_metric_lock);
total = m->total_metadatas;
sum = m->metadata_latency_sum;
avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
min = m->metadata_latency_min;
max = m->metadata_latency_max;
sq = m->metadata_latency_sq_sum;
spin_unlock(&m->metadata_latency_lock);
spin_unlock(&m->metadata_metric_lock);
CEPH_METRIC_SHOW("metadata", total, avg, min, max, sq);
seq_printf(s, "\n");

View File

@ -631,10 +631,12 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
switch (whence) {
case SEEK_CUR:
offset += file->f_pos;
break;
case SEEK_SET:
break;
case SEEK_END:
retval = -EOPNOTSUPP;
goto out;
default:
goto out;
}
@ -665,8 +667,8 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence)
/*
* Handle lookups for the hidden .snap directory.
*/
int ceph_handle_snapdir(struct ceph_mds_request *req,
struct dentry *dentry, int err)
struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req,
struct dentry *dentry, int err)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
struct inode *parent = d_inode(dentry->d_parent); /* we hold i_mutex */
@ -674,18 +676,17 @@ int ceph_handle_snapdir(struct ceph_mds_request *req,
/* .snap dir? */
if (err == -ENOENT &&
ceph_snap(parent) == CEPH_NOSNAP &&
strcmp(dentry->d_name.name,
fsc->mount_options->snapdir_name) == 0) {
strcmp(dentry->d_name.name, fsc->mount_options->snapdir_name) == 0) {
struct dentry *res;
struct inode *inode = ceph_get_snapdir(parent);
if (IS_ERR(inode))
return PTR_ERR(inode);
dout("ENOENT on snapdir %p '%pd', linking to snapdir %p\n",
dentry, dentry, inode);
BUG_ON(!d_unhashed(dentry));
d_add(dentry, inode);
err = 0;
res = d_splice_alias(inode, dentry);
dout("ENOENT on snapdir %p '%pd', linking to snapdir %p. Spliced dentry %p\n",
dentry, dentry, inode, res);
if (res)
dentry = res;
}
return err;
return dentry;
}
/*
@ -741,6 +742,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
struct ceph_mds_request *req;
struct dentry *res;
int op;
int mask;
int err;
@ -791,7 +793,13 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
req->r_parent = dir;
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
err = ceph_mdsc_do_request(mdsc, NULL, req);
err = ceph_handle_snapdir(req, dentry, err);
res = ceph_handle_snapdir(req, dentry, err);
if (IS_ERR(res)) {
err = PTR_ERR(res);
} else {
dentry = res;
err = 0;
}
dentry = ceph_finish_lookup(req, dentry, err);
ceph_mdsc_put_request(req); /* will dput(dentry) */
dout("lookup result=%p\n", dentry);

View File

@ -129,6 +129,10 @@ static struct inode *__lookup_inode(struct super_block *sb, u64 ino)
vino.ino = ino;
vino.snap = CEPH_NOSNAP;
if (ceph_vino_is_reserved(vino))
return ERR_PTR(-ESTALE);
inode = ceph_find_inode(sb, vino);
if (!inode) {
struct ceph_mds_request *req;
@ -178,8 +182,10 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
return ERR_CAST(inode);
/* We need LINK caps to reliably check i_nlink */
err = ceph_do_getattr(inode, CEPH_CAP_LINK_SHARED, false);
if (err)
if (err) {
iput(inode);
return ERR_PTR(err);
}
/* -ESTALE if inode as been unlinked and no file is open */
if ((inode->i_nlink == 0) && (atomic_read(&inode->i_count) == 1)) {
iput(inode);
@ -212,6 +218,10 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb,
vino.ino = sfh->ino;
vino.snap = sfh->snapid;
}
if (ceph_vino_is_reserved(vino))
return ERR_PTR(-ESTALE);
inode = ceph_find_inode(sb, vino);
if (inode)
return d_obtain_alias(inode);

View File

@ -739,9 +739,12 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
err = ceph_mdsc_do_request(mdsc,
(flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
req);
err = ceph_handle_snapdir(req, dentry, err);
if (err)
dentry = ceph_handle_snapdir(req, dentry, err);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
goto out_req;
}
err = 0;
if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
@ -892,7 +895,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
if (!ret)
ret = ceph_osdc_wait_request(osdc, req);
ceph_update_read_latency(&fsc->mdsc->metric,
ceph_update_read_metrics(&fsc->mdsc->metric,
req->r_start_latency,
req->r_end_latency,
ret);
@ -1034,16 +1037,6 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
dout("ceph_aio_complete_req %p rc %d bytes %u\n",
inode, rc, osd_data->bvec_pos.iter.bi_size);
/* r_start_latency == 0 means the request was not submitted */
if (req->r_start_latency) {
if (aio_req->write)
ceph_update_write_latency(metric, req->r_start_latency,
req->r_end_latency, rc);
else
ceph_update_read_latency(metric, req->r_start_latency,
req->r_end_latency, rc);
}
if (rc == -EOLDSNAPC) {
struct ceph_aio_work *aio_work;
BUG_ON(!aio_req->write);
@ -1086,6 +1079,16 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req)
}
}
/* r_start_latency == 0 means the request was not submitted */
if (req->r_start_latency) {
if (aio_req->write)
ceph_update_write_metrics(metric, req->r_start_latency,
req->r_end_latency, rc);
else
ceph_update_read_metrics(metric, req->r_start_latency,
req->r_end_latency, rc);
}
put_bvecs(osd_data->bvec_pos.bvecs, osd_data->num_bvecs,
aio_req->should_dirty);
ceph_osdc_put_request(req);
@ -1290,10 +1293,10 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
if (write)
ceph_update_write_latency(metric, req->r_start_latency,
ceph_update_write_metrics(metric, req->r_start_latency,
req->r_end_latency, ret);
else
ceph_update_read_latency(metric, req->r_start_latency,
ceph_update_read_metrics(metric, req->r_start_latency,
req->r_end_latency, ret);
size = i_size_read(inode);
@ -1467,7 +1470,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
if (!ret)
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, ret);
out:
ceph_osdc_put_request(req);
@ -1510,7 +1513,6 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
size_t len = iov_iter_count(to);
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct page *pinned_page = NULL;
bool direct_lock = iocb->ki_flags & IOCB_DIRECT;
ssize_t ret;
int want, got = 0;
@ -1529,8 +1531,7 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1,
&got, &pinned_page);
ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1, &got);
if (ret < 0) {
if (iocb->ki_flags & IOCB_DIRECT)
ceph_end_io_direct(inode);
@ -1571,10 +1572,6 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
if (pinned_page) {
put_page(pinned_page);
pinned_page = NULL;
}
ceph_put_cap_refs(ci, got);
if (direct_lock)
@ -1753,8 +1750,7 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
else
want = CEPH_CAP_FILE_BUFFER;
got = 0;
err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count,
&got, NULL);
err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count, &got);
if (err < 0)
goto out;
@ -2083,7 +2079,7 @@ static long ceph_fallocate(struct file *file, int mode,
else
want = CEPH_CAP_FILE_BUFFER;
ret = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
ret = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, endoff, &got);
if (ret < 0)
goto unlock;
@ -2121,7 +2117,7 @@ static int get_rd_wr_caps(struct file *src_filp, int *src_got,
retry_caps:
ret = ceph_get_caps(dst_filp, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER,
dst_endoff, dst_got, NULL);
dst_endoff, dst_got);
if (ret < 0)
return ret;
@ -2143,7 +2139,7 @@ static int get_rd_wr_caps(struct file *src_filp, int *src_got,
return ret;
}
ret = ceph_get_caps(src_filp, CEPH_CAP_FILE_RD,
CEPH_CAP_FILE_SHARED, -1, src_got, NULL);
CEPH_CAP_FILE_SHARED, -1, src_got);
if (ret < 0)
return ret;
/*... drop src_ci caps too, and retry */

View File

@ -56,6 +56,9 @@ struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
{
struct inode *inode;
if (ceph_vino_is_reserved(vino))
return ERR_PTR(-EREMOTEIO);
inode = iget5_locked(sb, (unsigned long)vino.ino, ceph_ino_compare,
ceph_set_ino_cb, &vino);
if (!inode)
@ -99,14 +102,15 @@ struct inode *ceph_get_snapdir(struct inode *parent)
inode->i_mtime = parent->i_mtime;
inode->i_ctime = parent->i_ctime;
inode->i_atime = parent->i_atime;
inode->i_op = &ceph_snapdir_iops;
inode->i_fop = &ceph_snapdir_fops;
ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */
ci->i_rbytes = 0;
ci->i_btime = ceph_inode(parent)->i_btime;
if (inode->i_state & I_NEW)
if (inode->i_state & I_NEW) {
inode->i_op = &ceph_snapdir_iops;
inode->i_fop = &ceph_snapdir_fops;
ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */
unlock_new_inode(inode);
}
return inode;
}
@ -628,10 +632,11 @@ int ceph_fill_file_size(struct inode *inode, int issued,
{
struct ceph_inode_info *ci = ceph_inode(inode);
int queue_trunc = 0;
loff_t isize = i_size_read(inode);
if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
(truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
dout("size %lld -> %llu\n", inode->i_size, size);
(truncate_seq == ci->i_truncate_seq && size > isize)) {
dout("size %lld -> %llu\n", isize, size);
if (size > 0 && S_ISDIR(inode->i_mode)) {
pr_err("fill_file_size non-zero size for directory\n");
size = 0;
@ -925,6 +930,7 @@ int ceph_fill_inode(struct inode *inode, struct page *locked_page,
ci->i_rfiles = le64_to_cpu(info->rfiles);
ci->i_rsubdirs = le64_to_cpu(info->rsubdirs);
ci->i_dir_pin = iinfo->dir_pin;
ci->i_rsnaps = iinfo->rsnaps;
ceph_decode_timespec64(&ci->i_rctime, &info->rctime);
}
}
@ -1818,7 +1824,7 @@ bool ceph_inode_set_size(struct inode *inode, loff_t size)
bool ret;
spin_lock(&ci->i_ceph_lock);
dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
dout("set_size %p %llu -> %llu\n", inode, i_size_read(inode), size);
i_size_write(inode, size);
inode->i_blocks = calc_inode_blocks(size);
@ -1894,6 +1900,7 @@ static void ceph_do_invalidate_pages(struct inode *inode)
orig_gen = ci->i_rdcache_gen;
spin_unlock(&ci->i_ceph_lock);
ceph_fscache_invalidate(inode);
if (invalidate_inode_pages2(inode->i_mapping) < 0) {
pr_err("invalidate_pages %p fails\n", inode);
}
@ -2124,20 +2131,19 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
}
}
if (ia_valid & ATTR_SIZE) {
dout("setattr %p size %lld -> %lld\n", inode,
inode->i_size, attr->ia_size);
if ((issued & CEPH_CAP_FILE_EXCL) &&
attr->ia_size > inode->i_size) {
loff_t isize = i_size_read(inode);
dout("setattr %p size %lld -> %lld\n", inode, isize, attr->ia_size);
if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size > isize) {
i_size_write(inode, attr->ia_size);
inode->i_blocks = calc_inode_blocks(attr->ia_size);
ci->i_reported_size = attr->ia_size;
dirtied |= CEPH_CAP_FILE_EXCL;
ia_valid |= ATTR_MTIME;
} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
attr->ia_size != inode->i_size) {
attr->ia_size != isize) {
req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
req->r_args.setattr.old_size =
cpu_to_le64(inode->i_size);
req->r_args.setattr.old_size = cpu_to_le64(isize);
mask |= CEPH_SETATTR_SIZE;
release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
@ -2247,7 +2253,7 @@ int ceph_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
return err;
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size > max(inode->i_size, fsc->max_file_size))
attr->ia_size > max(i_size_read(inode), fsc->max_file_size))
return -EFBIG;
if ((attr->ia_valid & ATTR_SIZE) &&

View File

@ -118,7 +118,7 @@ static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
}
/**
* ceph_end_io_direct - declare the file is being used for direct i/o
* ceph_start_io_direct - declare the file is being used for direct i/o
* @inode: file inode
*
* Declare that a direct I/O operation is about to start, and ensure

View File

@ -176,6 +176,13 @@ static int parse_reply_info_in(void **p, void *end,
memset(&info->snap_btime, 0, sizeof(info->snap_btime));
}
/* snapshot count, remains zero for v<=3 */
if (struct_v >= 4) {
ceph_decode_64_safe(p, end, info->rsnaps, bad);
} else {
info->rsnaps = 0;
}
*p = end;
} else {
if (features & CEPH_FEATURE_MDS_INLINE_DATA) {
@ -214,7 +221,7 @@ static int parse_reply_info_in(void **p, void *end,
}
info->dir_pin = -ENODATA;
/* info->snap_btime remains zero */
/* info->snap_btime and info->rsnaps remain zero */
}
return 0;
bad:
@ -433,6 +440,13 @@ static int ceph_parse_deleg_inos(void **p, void *end,
ceph_decode_64_safe(p, end, start, bad);
ceph_decode_64_safe(p, end, len, bad);
/* Don't accept a delegation of system inodes */
if (start < CEPH_INO_SYSTEM_BASE) {
pr_warn_ratelimited("ceph: ignoring reserved inode range delegation (start=0x%llx len=0x%llx)\n",
start, len);
continue;
}
while (len--) {
int err = xa_insert(&s->s_delegated_inos, ino = start++,
DELEGATED_INO_AVAILABLE,
@ -3306,7 +3320,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
/* kick calling process */
complete_request(mdsc, req);
ceph_update_metadata_latency(&mdsc->metric, req->r_start_latency,
ceph_update_metadata_metrics(&mdsc->metric, req->r_start_latency,
req->r_end_latency, err);
out:
ceph_mdsc_put_request(req);
@ -3780,7 +3794,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
rec.v1.cap_id = cpu_to_le64(cap->cap_id);
rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
rec.v1.issued = cpu_to_le32(cap->issued);
rec.v1.size = cpu_to_le64(inode->i_size);
rec.v1.size = cpu_to_le64(i_size_read(inode));
ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);

View File

@ -88,6 +88,7 @@ struct ceph_mds_reply_info_in {
s32 dir_pin;
struct ceph_timespec btime;
struct ceph_timespec snap_btime;
u64 rsnaps;
u64 change_attr;
};

View File

@ -17,6 +17,9 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
struct ceph_metric_write_latency *write;
struct ceph_metric_metadata_latency *meta;
struct ceph_metric_dlease *dlease;
struct ceph_opened_files *files;
struct ceph_pinned_icaps *icaps;
struct ceph_opened_inodes *inodes;
struct ceph_client_metric *m = &mdsc->metric;
u64 nr_caps = atomic64_read(&m->total_caps);
struct ceph_msg *msg;
@ -26,7 +29,8 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
s32 len;
len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
+ sizeof(*meta) + sizeof(*dlease);
+ sizeof(*meta) + sizeof(*dlease) + sizeof(*files)
+ sizeof(*icaps) + sizeof(*inodes);
msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
if (!msg) {
@ -95,6 +99,38 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
items++;
sum = percpu_counter_sum(&m->total_inodes);
/* encode the opened files metric */
files = (struct ceph_opened_files *)(dlease + 1);
files->type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
files->ver = 1;
files->compat = 1;
files->data_len = cpu_to_le32(sizeof(*files) - 10);
files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
files->total = cpu_to_le64(sum);
items++;
/* encode the pinned icaps metric */
icaps = (struct ceph_pinned_icaps *)(files + 1);
icaps->type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
icaps->ver = 1;
icaps->compat = 1;
icaps->data_len = cpu_to_le32(sizeof(*icaps) - 10);
icaps->pinned_icaps = cpu_to_le64(nr_caps);
icaps->total = cpu_to_le64(sum);
items++;
/* encode the opened inodes metric */
inodes = (struct ceph_opened_inodes *)(icaps + 1);
inodes->type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
inodes->ver = 1;
inodes->compat = 1;
inodes->data_len = cpu_to_le32(sizeof(*inodes) - 10);
inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
inodes->total = cpu_to_le64(sum);
items++;
put_unaligned_le32(items, &head->num);
msg->front.iov_len = len;
msg->hdr.version = cpu_to_le16(1);
@ -183,21 +219,21 @@ int ceph_metric_init(struct ceph_client_metric *m)
if (ret)
goto err_i_caps_mis;
spin_lock_init(&m->read_latency_lock);
spin_lock_init(&m->read_metric_lock);
m->read_latency_sq_sum = 0;
m->read_latency_min = KTIME_MAX;
m->read_latency_max = 0;
m->total_reads = 0;
m->read_latency_sum = 0;
spin_lock_init(&m->write_latency_lock);
spin_lock_init(&m->write_metric_lock);
m->write_latency_sq_sum = 0;
m->write_latency_min = KTIME_MAX;
m->write_latency_max = 0;
m->total_writes = 0;
m->write_latency_sum = 0;
spin_lock_init(&m->metadata_latency_lock);
spin_lock_init(&m->metadata_metric_lock);
m->metadata_latency_sq_sum = 0;
m->metadata_latency_min = KTIME_MAX;
m->metadata_latency_max = 0;
@ -274,7 +310,7 @@ static inline void __update_latency(ktime_t *totalp, ktime_t *lsump,
*sq_sump += sq;
}
void ceph_update_read_latency(struct ceph_client_metric *m,
void ceph_update_read_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc)
{
@ -283,14 +319,14 @@ void ceph_update_read_latency(struct ceph_client_metric *m,
if (unlikely(rc < 0 && rc != -ENOENT && rc != -ETIMEDOUT))
return;
spin_lock(&m->read_latency_lock);
spin_lock(&m->read_metric_lock);
__update_latency(&m->total_reads, &m->read_latency_sum,
&m->read_latency_min, &m->read_latency_max,
&m->read_latency_sq_sum, lat);
spin_unlock(&m->read_latency_lock);
spin_unlock(&m->read_metric_lock);
}
void ceph_update_write_latency(struct ceph_client_metric *m,
void ceph_update_write_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc)
{
@ -299,14 +335,14 @@ void ceph_update_write_latency(struct ceph_client_metric *m,
if (unlikely(rc && rc != -ETIMEDOUT))
return;
spin_lock(&m->write_latency_lock);
spin_lock(&m->write_metric_lock);
__update_latency(&m->total_writes, &m->write_latency_sum,
&m->write_latency_min, &m->write_latency_max,
&m->write_latency_sq_sum, lat);
spin_unlock(&m->write_latency_lock);
spin_unlock(&m->write_metric_lock);
}
void ceph_update_metadata_latency(struct ceph_client_metric *m,
void ceph_update_metadata_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc)
{
@ -315,9 +351,9 @@ void ceph_update_metadata_latency(struct ceph_client_metric *m,
if (unlikely(rc && rc != -ENOENT))
return;
spin_lock(&m->metadata_latency_lock);
spin_lock(&m->metadata_metric_lock);
__update_latency(&m->total_metadatas, &m->metadata_latency_sum,
&m->metadata_latency_min, &m->metadata_latency_max,
&m->metadata_latency_sq_sum, lat);
spin_unlock(&m->metadata_latency_lock);
spin_unlock(&m->metadata_metric_lock);
}

View File

@ -14,8 +14,11 @@ enum ceph_metric_type {
CLIENT_METRIC_TYPE_WRITE_LATENCY,
CLIENT_METRIC_TYPE_METADATA_LATENCY,
CLIENT_METRIC_TYPE_DENTRY_LEASE,
CLIENT_METRIC_TYPE_OPENED_FILES,
CLIENT_METRIC_TYPE_PINNED_ICAPS,
CLIENT_METRIC_TYPE_OPENED_INODES,
CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_OPENED_INODES,
};
/*
@ -28,6 +31,9 @@ enum ceph_metric_type {
CLIENT_METRIC_TYPE_WRITE_LATENCY, \
CLIENT_METRIC_TYPE_METADATA_LATENCY, \
CLIENT_METRIC_TYPE_DENTRY_LEASE, \
CLIENT_METRIC_TYPE_OPENED_FILES, \
CLIENT_METRIC_TYPE_PINNED_ICAPS, \
CLIENT_METRIC_TYPE_OPENED_INODES, \
\
CLIENT_METRIC_TYPE_MAX, \
}
@ -94,6 +100,42 @@ struct ceph_metric_dlease {
__le64 total;
} __packed;
/* metric opened files header */
struct ceph_opened_files {
__le32 type; /* ceph metric type */
__u8 ver;
__u8 compat;
__le32 data_len; /* length of sizeof(opened_files + total) */
__le64 opened_files;
__le64 total;
} __packed;
/* metric pinned i_caps header */
struct ceph_pinned_icaps {
__le32 type; /* ceph metric type */
__u8 ver;
__u8 compat;
__le32 data_len; /* length of sizeof(pinned_icaps + total) */
__le64 pinned_icaps;
__le64 total;
} __packed;
/* metric opened inodes header */
struct ceph_opened_inodes {
__le32 type; /* ceph metric type */
__u8 ver;
__u8 compat;
__le32 data_len; /* length of sizeof(opened_inodes + total) */
__le64 opened_inodes;
__le64 total;
} __packed;
struct ceph_metric_head {
__le32 num; /* the number of metrics that will be sent */
} __packed;
@ -108,21 +150,21 @@ struct ceph_client_metric {
struct percpu_counter i_caps_hit;
struct percpu_counter i_caps_mis;
spinlock_t read_latency_lock;
spinlock_t read_metric_lock;
u64 total_reads;
ktime_t read_latency_sum;
ktime_t read_latency_sq_sum;
ktime_t read_latency_min;
ktime_t read_latency_max;
spinlock_t write_latency_lock;
spinlock_t write_metric_lock;
u64 total_writes;
ktime_t write_latency_sum;
ktime_t write_latency_sq_sum;
ktime_t write_latency_min;
ktime_t write_latency_max;
spinlock_t metadata_latency_lock;
spinlock_t metadata_metric_lock;
u64 total_metadatas;
ktime_t metadata_latency_sum;
ktime_t metadata_latency_sq_sum;
@ -162,13 +204,13 @@ static inline void ceph_update_cap_mis(struct ceph_client_metric *m)
percpu_counter_inc(&m->i_caps_mis);
}
extern void ceph_update_read_latency(struct ceph_client_metric *m,
extern void ceph_update_read_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc);
extern void ceph_update_write_latency(struct ceph_client_metric *m,
extern void ceph_update_write_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc);
extern void ceph_update_metadata_latency(struct ceph_client_metric *m,
extern void ceph_update_metadata_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc);
#endif /* _FS_CEPH_MDS_METRIC_H */

View File

@ -605,7 +605,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
BUG_ON(capsnap->writing);
capsnap->size = inode->i_size;
capsnap->size = i_size_read(inode);
capsnap->mtime = inode->i_mtime;
capsnap->atime = inode->i_atime;
capsnap->ctime = inode->i_ctime;

View File

@ -21,6 +21,7 @@
#include <linux/ceph/libceph.h>
#ifdef CONFIG_CEPH_FSCACHE
#define FSCACHE_USE_NEW_IO_API
#include <linux/fscache.h>
#endif
@ -333,7 +334,7 @@ struct ceph_inode_info {
/* for dirs */
struct timespec64 i_rctime;
u64 i_rbytes, i_rfiles, i_rsubdirs;
u64 i_rbytes, i_rfiles, i_rsubdirs, i_rsnaps;
u64 i_files, i_subdirs;
/* quotas */
@ -427,7 +428,6 @@ struct ceph_inode_info {
#ifdef CONFIG_CEPH_FSCACHE
struct fscache_cookie *fscache;
u32 i_fscache_gen;
#endif
errseq_t i_meta_err;
@ -529,10 +529,34 @@ static inline int ceph_ino_compare(struct inode *inode, void *data)
ci->i_vino.snap == pvino->snap;
}
/*
* The MDS reserves a set of inodes for its own usage. These should never
* be accessible by clients, and so the MDS has no reason to ever hand these
* out. The range is CEPH_MDS_INO_MDSDIR_OFFSET..CEPH_INO_SYSTEM_BASE.
*
* These come from src/mds/mdstypes.h in the ceph sources.
*/
#define CEPH_MAX_MDS 0x100
#define CEPH_NUM_STRAY 10
#define CEPH_MDS_INO_MDSDIR_OFFSET (1 * CEPH_MAX_MDS)
#define CEPH_INO_SYSTEM_BASE ((6*CEPH_MAX_MDS) + (CEPH_MAX_MDS * CEPH_NUM_STRAY))
static inline bool ceph_vino_is_reserved(const struct ceph_vino vino)
{
if (vino.ino < CEPH_INO_SYSTEM_BASE &&
vino.ino >= CEPH_MDS_INO_MDSDIR_OFFSET) {
WARN_RATELIMIT(1, "Attempt to access reserved inode number 0x%llx", vino.ino);
return true;
}
return false;
}
static inline struct inode *ceph_find_inode(struct super_block *sb,
struct ceph_vino vino)
{
if (ceph_vino_is_reserved(vino))
return NULL;
/*
* NB: The hashval will be run through the fs/inode.c hash function
* anyway, so there is no need to squash the inode number down to
@ -1156,7 +1180,7 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
int mds, int drop, int unless);
extern int ceph_get_caps(struct file *filp, int need, int want,
loff_t endoff, int *got, struct page **pinned_page);
loff_t endoff, int *got);
extern int ceph_try_get_caps(struct inode *inode,
int need, int want, bool nonblock, int *got);
@ -1193,7 +1217,7 @@ extern const struct dentry_operations ceph_dentry_ops;
extern loff_t ceph_make_fpos(unsigned high, unsigned off, bool hash_order);
extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry);
extern int ceph_handle_snapdir(struct ceph_mds_request *req,
extern struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req,
struct dentry *dentry, int err);
extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
struct dentry *dentry, int err);

View File

@ -233,6 +233,12 @@ static ssize_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
return ceph_fmt_xattr(val, size, "%lld", ci->i_rsubdirs);
}
static ssize_t ceph_vxattrcb_dir_rsnaps(struct ceph_inode_info *ci, char *val,
size_t size)
{
return ceph_fmt_xattr(val, size, "%lld", ci->i_rsnaps);
}
static ssize_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
size_t size)
{
@ -384,6 +390,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
XATTR_RSTAT_FIELD(dir, rentries),
XATTR_RSTAT_FIELD(dir, rfiles),
XATTR_RSTAT_FIELD(dir, rsubdirs),
XATTR_RSTAT_FIELD(dir, rsnaps),
XATTR_RSTAT_FIELD(dir, rbytes),
XATTR_RSTAT_FIELD(dir, rctime),
{

View File

@ -36,6 +36,20 @@ static int init_protocol(struct ceph_auth_client *ac, int proto)
}
}
static void set_global_id(struct ceph_auth_client *ac, u64 global_id)
{
dout("%s global_id %llu\n", __func__, global_id);
if (!global_id)
pr_err("got zero global_id\n");
if (ac->global_id && global_id != ac->global_id)
pr_err("global_id changed from %llu to %llu\n", ac->global_id,
global_id);
ac->global_id = global_id;
}
/*
* setup, teardown.
*/
@ -222,11 +236,6 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
payload_end = payload + payload_len;
if (global_id && ac->global_id != global_id) {
dout(" set global_id %lld -> %lld\n", ac->global_id, global_id);
ac->global_id = global_id;
}
if (ac->negotiating) {
/* server does not support our protocols? */
if (!protocol && result < 0) {
@ -253,11 +262,16 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
ret = ac->ops->handle_reply(ac, result, payload, payload_end,
NULL, NULL, NULL, NULL);
if (ret == -EAGAIN)
if (ret == -EAGAIN) {
ret = build_request(ac, true, reply_buf, reply_len);
else if (ret)
goto out;
} else if (ret) {
pr_err("auth protocol '%s' mauth authentication failed: %d\n",
ceph_auth_proto_name(ac->protocol), result);
goto out;
}
set_global_id(ac, global_id);
out:
mutex_unlock(&ac->mutex);
@ -484,15 +498,11 @@ int ceph_auth_handle_reply_done(struct ceph_auth_client *ac,
int ret;
mutex_lock(&ac->mutex);
if (global_id && ac->global_id != global_id) {
dout("%s global_id %llu -> %llu\n", __func__, ac->global_id,
global_id);
ac->global_id = global_id;
}
ret = ac->ops->handle_reply(ac, 0, reply, reply + reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
if (!ret)
set_global_id(ac, global_id);
mutex_unlock(&ac->mutex);
return ret;
}

View File

@ -526,7 +526,7 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
if (ret < 0)
return ret;
auth->struct_v = 2; /* nautilus+ */
auth->struct_v = 3; /* nautilus+ */
auth->key = 0;
for (u = (u64 *)enc_buf; u + 1 <= (u64 *)(enc_buf + ret); u++)
auth->key ^= *(__le64 *)u;

View File

@ -4,6 +4,7 @@
#include <linux/inet.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/messenger.h> /* for ceph_pr_addr() */
static int
ceph_decode_entity_addr_versioned(void **p, void *end,
@ -110,6 +111,7 @@ int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
}
ceph_decode_32_safe(p, end, addr_cnt, e_inval);
dout("%s addr_cnt %d\n", __func__, addr_cnt);
found = false;
for (i = 0; i < addr_cnt; i++) {
@ -117,6 +119,7 @@ int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
if (ret)
return ret;
dout("%s i %d addr %s\n", __func__, i, ceph_pr_addr(&tmp_addr));
if (tmp_addr.type == my_type) {
if (found) {
pr_err("another match of type %d in addrvec\n",
@ -128,13 +131,18 @@ int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
found = true;
}
}
if (!found && addr_cnt != 0) {
pr_err("no match of type %d in addrvec\n",
le32_to_cpu(my_type));
return -ENOENT;
}
return 0;
if (found)
return 0;
if (!addr_cnt)
return 0; /* normal -- e.g. unused OSD id/slot */
if (addr_cnt == 1 && !memchr_inv(&tmp_addr, 0, sizeof(tmp_addr)))
return 0; /* weird but effectively the same as !addr_cnt */
pr_err("no match of type %d in addrvec\n", le32_to_cpu(my_type));
return -ENOENT;
e_inval:
return -EINVAL;