linux-next/net/ceph/cls_lock_client.c
Ilya Dryomov 22e8bd51bb rbd: support for object-map and fast-diff
Speed up reads, discards and zeroouts through RBD_OBJ_FLAG_MAY_EXIST
and RBD_OBJ_FLAG_NOOP_FOR_NONEXISTENT based on object map.

Invalid object maps are not trusted, but still updated.  Note that we
never iterate, resize or invalidate object maps.  If object-map feature
is enabled but object map fails to load, we just fail the requester
(either "rbd map" or I/O, by way of post-acquire action).

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
2019-07-08 14:01:45 +02:00

426 lines
12 KiB
C

// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/ceph/cls_lock_client.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/libceph.h>
/**
* ceph_cls_lock - grab rados lock for object
* @oid, @oloc: object to lock
* @lock_name: the name of the lock
* @type: lock type (CEPH_CLS_LOCK_EXCLUSIVE or CEPH_CLS_LOCK_SHARED)
* @cookie: user-defined identifier for this instance of the lock
* @tag: user-defined tag
* @desc: user-defined lock description
* @flags: lock flags
*
* All operations on the same lock should use the same tag.
*/
int ceph_cls_lock(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
char *lock_name, u8 type, char *cookie,
char *tag, char *desc, u8 flags)
{
int lock_op_buf_size;
int name_len = strlen(lock_name);
int cookie_len = strlen(cookie);
int tag_len = strlen(tag);
int desc_len = strlen(desc);
void *p, *end;
struct page *lock_op_page;
struct timespec64 mtime;
int ret;
lock_op_buf_size = name_len + sizeof(__le32) +
cookie_len + sizeof(__le32) +
tag_len + sizeof(__le32) +
desc_len + sizeof(__le32) +
sizeof(struct ceph_timespec) +
/* flag and type */
sizeof(u8) + sizeof(u8) +
CEPH_ENCODING_START_BLK_LEN;
if (lock_op_buf_size > PAGE_SIZE)
return -E2BIG;
lock_op_page = alloc_page(GFP_NOIO);
if (!lock_op_page)
return -ENOMEM;
p = page_address(lock_op_page);
end = p + lock_op_buf_size;
/* encode cls_lock_lock_op struct */
ceph_start_encoding(&p, 1, 1,
lock_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
ceph_encode_string(&p, end, lock_name, name_len);
ceph_encode_8(&p, type);
ceph_encode_string(&p, end, cookie, cookie_len);
ceph_encode_string(&p, end, tag, tag_len);
ceph_encode_string(&p, end, desc, desc_len);
/* only support infinite duration */
memset(&mtime, 0, sizeof(mtime));
ceph_encode_timespec64(p, &mtime);
p += sizeof(struct ceph_timespec);
ceph_encode_8(&p, flags);
dout("%s lock_name %s type %d cookie %s tag %s desc %s flags 0x%x\n",
__func__, lock_name, type, cookie, tag, desc, flags);
ret = ceph_osdc_call(osdc, oid, oloc, "lock", "lock",
CEPH_OSD_FLAG_WRITE, lock_op_page,
lock_op_buf_size, NULL, NULL);
dout("%s: status %d\n", __func__, ret);
__free_page(lock_op_page);
return ret;
}
EXPORT_SYMBOL(ceph_cls_lock);
/**
* ceph_cls_unlock - release rados lock for object
* @oid, @oloc: object to lock
* @lock_name: the name of the lock
* @cookie: user-defined identifier for this instance of the lock
*/
int ceph_cls_unlock(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
char *lock_name, char *cookie)
{
int unlock_op_buf_size;
int name_len = strlen(lock_name);
int cookie_len = strlen(cookie);
void *p, *end;
struct page *unlock_op_page;
int ret;
unlock_op_buf_size = name_len + sizeof(__le32) +
cookie_len + sizeof(__le32) +
CEPH_ENCODING_START_BLK_LEN;
if (unlock_op_buf_size > PAGE_SIZE)
return -E2BIG;
unlock_op_page = alloc_page(GFP_NOIO);
if (!unlock_op_page)
return -ENOMEM;
p = page_address(unlock_op_page);
end = p + unlock_op_buf_size;
/* encode cls_lock_unlock_op struct */
ceph_start_encoding(&p, 1, 1,
unlock_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
ceph_encode_string(&p, end, lock_name, name_len);
ceph_encode_string(&p, end, cookie, cookie_len);
dout("%s lock_name %s cookie %s\n", __func__, lock_name, cookie);
ret = ceph_osdc_call(osdc, oid, oloc, "lock", "unlock",
CEPH_OSD_FLAG_WRITE, unlock_op_page,
unlock_op_buf_size, NULL, NULL);
dout("%s: status %d\n", __func__, ret);
__free_page(unlock_op_page);
return ret;
}
EXPORT_SYMBOL(ceph_cls_unlock);
/**
* ceph_cls_break_lock - release rados lock for object for specified client
* @oid, @oloc: object to lock
* @lock_name: the name of the lock
* @cookie: user-defined identifier for this instance of the lock
* @locker: current lock owner
*/
int ceph_cls_break_lock(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
char *lock_name, char *cookie,
struct ceph_entity_name *locker)
{
int break_op_buf_size;
int name_len = strlen(lock_name);
int cookie_len = strlen(cookie);
struct page *break_op_page;
void *p, *end;
int ret;
break_op_buf_size = name_len + sizeof(__le32) +
cookie_len + sizeof(__le32) +
sizeof(u8) + sizeof(__le64) +
CEPH_ENCODING_START_BLK_LEN;
if (break_op_buf_size > PAGE_SIZE)
return -E2BIG;
break_op_page = alloc_page(GFP_NOIO);
if (!break_op_page)
return -ENOMEM;
p = page_address(break_op_page);
end = p + break_op_buf_size;
/* encode cls_lock_break_op struct */
ceph_start_encoding(&p, 1, 1,
break_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
ceph_encode_string(&p, end, lock_name, name_len);
ceph_encode_copy(&p, locker, sizeof(*locker));
ceph_encode_string(&p, end, cookie, cookie_len);
dout("%s lock_name %s cookie %s locker %s%llu\n", __func__, lock_name,
cookie, ENTITY_NAME(*locker));
ret = ceph_osdc_call(osdc, oid, oloc, "lock", "break_lock",
CEPH_OSD_FLAG_WRITE, break_op_page,
break_op_buf_size, NULL, NULL);
dout("%s: status %d\n", __func__, ret);
__free_page(break_op_page);
return ret;
}
EXPORT_SYMBOL(ceph_cls_break_lock);
int ceph_cls_set_cookie(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
char *lock_name, u8 type, char *old_cookie,
char *tag, char *new_cookie)
{
int cookie_op_buf_size;
int name_len = strlen(lock_name);
int old_cookie_len = strlen(old_cookie);
int tag_len = strlen(tag);
int new_cookie_len = strlen(new_cookie);
void *p, *end;
struct page *cookie_op_page;
int ret;
cookie_op_buf_size = name_len + sizeof(__le32) +
old_cookie_len + sizeof(__le32) +
tag_len + sizeof(__le32) +
new_cookie_len + sizeof(__le32) +
sizeof(u8) + CEPH_ENCODING_START_BLK_LEN;
if (cookie_op_buf_size > PAGE_SIZE)
return -E2BIG;
cookie_op_page = alloc_page(GFP_NOIO);
if (!cookie_op_page)
return -ENOMEM;
p = page_address(cookie_op_page);
end = p + cookie_op_buf_size;
/* encode cls_lock_set_cookie_op struct */
ceph_start_encoding(&p, 1, 1,
cookie_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
ceph_encode_string(&p, end, lock_name, name_len);
ceph_encode_8(&p, type);
ceph_encode_string(&p, end, old_cookie, old_cookie_len);
ceph_encode_string(&p, end, tag, tag_len);
ceph_encode_string(&p, end, new_cookie, new_cookie_len);
dout("%s lock_name %s type %d old_cookie %s tag %s new_cookie %s\n",
__func__, lock_name, type, old_cookie, tag, new_cookie);
ret = ceph_osdc_call(osdc, oid, oloc, "lock", "set_cookie",
CEPH_OSD_FLAG_WRITE, cookie_op_page,
cookie_op_buf_size, NULL, NULL);
dout("%s: status %d\n", __func__, ret);
__free_page(cookie_op_page);
return ret;
}
EXPORT_SYMBOL(ceph_cls_set_cookie);
void ceph_free_lockers(struct ceph_locker *lockers, u32 num_lockers)
{
int i;
for (i = 0; i < num_lockers; i++)
kfree(lockers[i].id.cookie);
kfree(lockers);
}
EXPORT_SYMBOL(ceph_free_lockers);
static int decode_locker(void **p, void *end, struct ceph_locker *locker)
{
u8 struct_v;
u32 len;
char *s;
int ret;
ret = ceph_start_decoding(p, end, 1, "locker_id_t", &struct_v, &len);
if (ret)
return ret;
ceph_decode_copy(p, &locker->id.name, sizeof(locker->id.name));
s = ceph_extract_encoded_string(p, end, NULL, GFP_NOIO);
if (IS_ERR(s))
return PTR_ERR(s);
locker->id.cookie = s;
ret = ceph_start_decoding(p, end, 1, "locker_info_t", &struct_v, &len);
if (ret)
return ret;
*p += sizeof(struct ceph_timespec); /* skip expiration */
ret = ceph_decode_entity_addr(p, end, &locker->info.addr);
if (ret)
return ret;
len = ceph_decode_32(p);
*p += len; /* skip description */
dout("%s %s%llu cookie %s addr %s\n", __func__,
ENTITY_NAME(locker->id.name), locker->id.cookie,
ceph_pr_addr(&locker->info.addr));
return 0;
}
static int decode_lockers(void **p, void *end, u8 *type, char **tag,
struct ceph_locker **lockers, u32 *num_lockers)
{
u8 struct_v;
u32 struct_len;
char *s;
int i;
int ret;
ret = ceph_start_decoding(p, end, 1, "cls_lock_get_info_reply",
&struct_v, &struct_len);
if (ret)
return ret;
*num_lockers = ceph_decode_32(p);
*lockers = kcalloc(*num_lockers, sizeof(**lockers), GFP_NOIO);
if (!*lockers)
return -ENOMEM;
for (i = 0; i < *num_lockers; i++) {
ret = decode_locker(p, end, *lockers + i);
if (ret)
goto err_free_lockers;
}
*type = ceph_decode_8(p);
s = ceph_extract_encoded_string(p, end, NULL, GFP_NOIO);
if (IS_ERR(s)) {
ret = PTR_ERR(s);
goto err_free_lockers;
}
*tag = s;
return 0;
err_free_lockers:
ceph_free_lockers(*lockers, *num_lockers);
return ret;
}
/*
* On success, the caller is responsible for:
*
* kfree(tag);
* ceph_free_lockers(lockers, num_lockers);
*/
int ceph_cls_lock_info(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
struct ceph_object_locator *oloc,
char *lock_name, u8 *type, char **tag,
struct ceph_locker **lockers, u32 *num_lockers)
{
int get_info_op_buf_size;
int name_len = strlen(lock_name);
struct page *get_info_op_page, *reply_page;
size_t reply_len = PAGE_SIZE;
void *p, *end;
int ret;
get_info_op_buf_size = name_len + sizeof(__le32) +
CEPH_ENCODING_START_BLK_LEN;
if (get_info_op_buf_size > PAGE_SIZE)
return -E2BIG;
get_info_op_page = alloc_page(GFP_NOIO);
if (!get_info_op_page)
return -ENOMEM;
reply_page = alloc_page(GFP_NOIO);
if (!reply_page) {
__free_page(get_info_op_page);
return -ENOMEM;
}
p = page_address(get_info_op_page);
end = p + get_info_op_buf_size;
/* encode cls_lock_get_info_op struct */
ceph_start_encoding(&p, 1, 1,
get_info_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
ceph_encode_string(&p, end, lock_name, name_len);
dout("%s lock_name %s\n", __func__, lock_name);
ret = ceph_osdc_call(osdc, oid, oloc, "lock", "get_info",
CEPH_OSD_FLAG_READ, get_info_op_page,
get_info_op_buf_size, &reply_page, &reply_len);
dout("%s: status %d\n", __func__, ret);
if (ret >= 0) {
p = page_address(reply_page);
end = p + reply_len;
ret = decode_lockers(&p, end, type, tag, lockers, num_lockers);
}
__free_page(get_info_op_page);
__free_page(reply_page);
return ret;
}
EXPORT_SYMBOL(ceph_cls_lock_info);
int ceph_cls_assert_locked(struct ceph_osd_request *req, int which,
char *lock_name, u8 type, char *cookie, char *tag)
{
int assert_op_buf_size;
int name_len = strlen(lock_name);
int cookie_len = strlen(cookie);
int tag_len = strlen(tag);
struct page **pages;
void *p, *end;
int ret;
assert_op_buf_size = name_len + sizeof(__le32) +
cookie_len + sizeof(__le32) +
tag_len + sizeof(__le32) +
sizeof(u8) + CEPH_ENCODING_START_BLK_LEN;
if (assert_op_buf_size > PAGE_SIZE)
return -E2BIG;
ret = osd_req_op_cls_init(req, which, "lock", "assert_locked");
if (ret)
return ret;
pages = ceph_alloc_page_vector(1, GFP_NOIO);
if (IS_ERR(pages))
return PTR_ERR(pages);
p = page_address(pages[0]);
end = p + assert_op_buf_size;
/* encode cls_lock_assert_op struct */
ceph_start_encoding(&p, 1, 1,
assert_op_buf_size - CEPH_ENCODING_START_BLK_LEN);
ceph_encode_string(&p, end, lock_name, name_len);
ceph_encode_8(&p, type);
ceph_encode_string(&p, end, cookie, cookie_len);
ceph_encode_string(&p, end, tag, tag_len);
WARN_ON(p != end);
osd_req_op_cls_request_data_pages(req, which, pages, assert_op_buf_size,
0, false, true);
return 0;
}
EXPORT_SYMBOL(ceph_cls_assert_locked);