The big ticket item here is support for msgr2 on-wire protocol, which

adds the option of full in-transit encryption using AES-GCM algorithm
 (myself).  On top of that we have a series to avoid intermittent
 errors during recovery with recover_session=clean and some MDS request
 encoding work from Jeff, a cap handling fix and assorted observability
 improvements from Luis and Xiubo and a good number of cleanups.  Luis
 also ran into a corner case with quotas which sadly means that we are
 back to denying cross-quota-realm renames.
 -----BEGIN PGP SIGNATURE-----
 
 iQFHBAABCAAxFiEEydHwtzie9C7TfviiSn/eOAIR84sFAl/beWITHGlkcnlvbW92
 QGdtYWlsLmNvbQAKCRBKf944AhHzi4i0CACnvd87l2n7dndig7p5d5lVsmo8tAFs
 wHYHaIVisWKMcqKoT+YJajSgzaonxjzvYiyCzwLxV7s7vI7cswAwjEfYT7tTDRp2
 pnO1+4N/1ftznnTk/1QdqwOQLUg5UtdgWvFCaXQF+Vr/YroZomKJPaK8fXK882pC
 9FBjoLNy1HWySsoXPCxJktmDzpEEyYRNJg0vquxm7mxwTgQErupWlwEFjNg5LBkm
 gC0UoKhCE3DeUrXnoq21Ga62RIajxHofTooNx7dg+JiSVgluW+nORaWDYJXNzwLC
 j5puSe4pWIah+gmcwIFuyNz4ddkvVL4URvsYPGkVFYXlEefQjErc10Jh
 =6b9f
 -----END PGP SIGNATURE-----

Merge tag 'ceph-for-5.11-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The big ticket item here is support for msgr2 on-wire protocol, which
  adds the option of full in-transit encryption using AES-GCM algorithm
  (myself).

  On top of that we have a series to avoid intermittent errors during
  recovery with recover_session=clean and some MDS request encoding work
  from Jeff, a cap handling fix and assorted observability improvements
  from Luis and Xiubo and a good number of cleanups.

  Luis also ran into a corner case with quotas which sadly means that we
  are back to denying cross-quota-realm renames"

* tag 'ceph-for-5.11-rc1' of git://github.com/ceph/ceph-client: (59 commits)
  libceph: drop ceph_auth_{create,update}_authorizer()
  libceph, ceph: make use of __ceph_auth_get_authorizer() in msgr1
  libceph, ceph: implement msgr2.1 protocol (crc and secure modes)
  libceph: introduce connection modes and ms_mode option
  libceph, rbd: ignore addr->type while comparing in some cases
  libceph, ceph: get and handle cluster maps with addrvecs
  libceph: factor out finish_auth()
  libceph: drop ac->ops->name field
  libceph: amend cephx init_protocol() and build_request()
  libceph, ceph: incorporate nautilus cephx changes
  libceph: safer en/decoding of cephx requests and replies
  libceph: more insight into ticket expiry and invalidation
  libceph: move msgr1 protocol specific fields to its own struct
  libceph: move msgr1 protocol implementation to its own file
  libceph: separate msgr1 protocol implementation
  libceph: export remaining protocol independent infrastructure
  libceph: export zero_page
  libceph: rename and export con->flags bits
  libceph: rename and export con->state states
  libceph: make con->state an int
  ...
This commit is contained in:
Linus Torvalds 2020-12-17 11:53:52 -08:00
commit be695ee29e
41 changed files with 7225 additions and 2193 deletions

View File

@ -3925,8 +3925,12 @@ static int find_watcher(struct rbd_device *rbd_dev,
sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
for (i = 0; i < num_watchers; i++) {
if (!memcmp(&watchers[i].addr, &locker->info.addr,
sizeof(locker->info.addr)) &&
/*
* Ignore addr->type while comparing. This mimics
* entity_addr_t::get_legacy_str() + strcmp().
*/
if (ceph_addr_equal_no_type(&watchers[i].addr,
&locker->info.addr) &&
watchers[i].cookie == cookie) {
struct rbd_client_id cid = {
.gid = le64_to_cpu(watchers[i].name.num),

View File

@ -840,7 +840,7 @@ static int ceph_writepages_start(struct address_space *mapping,
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
if (ci->i_wrbuffer_ref > 0) {
pr_warn_ratelimited(
"writepage_start %p %lld forced umount\n",
@ -1264,7 +1264,7 @@ ceph_find_incompatible(struct page *page)
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout(" page %p forced umount\n", page);
return ERR_PTR(-EIO);
}
@ -1321,7 +1321,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
for (;;) {
page = grab_cache_page_write_begin(mapping, index, 0);
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) {
r = -ENOMEM;
break;

View File

@ -1140,16 +1140,24 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
{
struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci;
struct ceph_mds_client *mdsc =
ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
struct ceph_mds_client *mdsc;
int removed = 0;
/* 'ci' being NULL means the remove have already occurred */
if (!ci) {
dout("%s: cap inode is NULL\n", __func__);
return;
}
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
/* remove from inode's cap rbtree, and clear auth cap */
rb_erase(&cap->ci_node, &ci->i_caps);
if (ci->i_auth_cap == cap) {
WARN_ON_ONCE(!list_empty(&ci->i_dirty_item));
WARN_ON_ONCE(!list_empty(&ci->i_dirty_item) &&
!mdsc->fsc->blocklisted);
ci->i_auth_cap = NULL;
}
@ -2746,7 +2754,7 @@ again:
goto out_unlock;
}
if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode);
ret = -EIO;
goto out_unlock;
@ -4027,15 +4035,13 @@ void ceph_handle_caps(struct ceph_mds_session *session,
}
if (msg_version >= 8) {
u64 flush_tid;
u32 caller_uid, caller_gid;
u32 pool_ns_len;
/* version >= 6 */
ceph_decode_64_safe(&p, end, flush_tid, bad);
ceph_decode_skip_64(&p, end, bad); // flush_tid
/* version >= 7 */
ceph_decode_32_safe(&p, end, caller_uid, bad);
ceph_decode_32_safe(&p, end, caller_gid, bad);
ceph_decode_skip_32(&p, end, bad); // caller_uid
ceph_decode_skip_32(&p, end, bad); // caller_gid
/* version >= 8 */
ceph_decode_32_safe(&p, end, pool_ns_len, bad);
if (pool_ns_len > 0) {
@ -4058,9 +4064,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
}
if (msg_version >= 11) {
u32 flags;
/* version >= 10 */
ceph_decode_32_safe(&p, end, flags, bad);
ceph_decode_skip_32(&p, end, bad); // flags
/* version >= 11 */
extra_info.dirstat_valid = true;
ceph_decode_64_safe(&p, end, extra_info.nfiles, bad);

View File

@ -304,11 +304,25 @@ static int mds_sessions_show(struct seq_file *s, void *ptr)
return 0;
}
static int status_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
struct ceph_entity_inst *inst = &fsc->client->msgr.inst;
struct ceph_entity_addr *client_addr = ceph_client_addr(fsc->client);
seq_printf(s, "instance: %s.%lld %s/%u\n", ENTITY_NAME(inst->name),
ceph_pr_addr(client_addr), le32_to_cpu(client_addr->nonce));
seq_printf(s, "blocklisted: %s\n", fsc->blocklisted ? "true" : "false");
return 0;
}
DEFINE_SHOW_ATTRIBUTE(mdsmap);
DEFINE_SHOW_ATTRIBUTE(mdsc);
DEFINE_SHOW_ATTRIBUTE(caps);
DEFINE_SHOW_ATTRIBUTE(mds_sessions);
DEFINE_SHOW_ATTRIBUTE(metric);
DEFINE_SHOW_ATTRIBUTE(status);
/*
@ -394,6 +408,12 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
fsc->client->debugfs_dir,
fsc,
&caps_fops);
fsc->debugfs_status = debugfs_create_file("status",
0400,
fsc->client->debugfs_dir,
fsc,
&status_fops);
}

View File

@ -1202,12 +1202,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
op = CEPH_MDS_OP_RENAMESNAP;
else
return -EROFS;
} else if (old_dir != new_dir) {
err = ceph_quota_check_rename(mdsc, d_inode(old_dentry),
new_dir);
if (err)
return err;
}
/* don't allow cross-quota renames */
if ((old_dir != new_dir) &&
(!ceph_quota_is_same_realm(old_dir, new_dir)))
return -EXDEV;
dout("rename dir %p dentry %p to dir %p dentry %p\n",
old_dir, old_dentry, new_dir, new_dentry);

View File

@ -1315,15 +1315,10 @@ retry_lookup:
}
if (rinfo->head->is_target) {
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
in = ceph_get_inode(sb, tvino);
if (IS_ERR(in)) {
err = PTR_ERR(in);
goto done;
}
/* Should be filled in by handle_reply */
BUG_ON(!req->r_target_inode);
in = req->r_target_inode;
err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti,
NULL, session,
(!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
@ -1333,11 +1328,13 @@ retry_lookup:
if (err < 0) {
pr_err("ceph_fill_inode badness %p %llx.%llx\n",
in, ceph_vinop(in));
req->r_target_inode = NULL;
if (in->i_state & I_NEW)
discard_new_inode(in);
else
iput(in);
goto done;
}
req->r_target_inode = in;
if (in->i_state & I_NEW)
unlock_new_inode(in);
}
@ -1597,8 +1594,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct dentry *dn;
struct inode *in;
int err = 0, skipped = 0, ret, i;
struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
u32 frag = le32_to_cpu(rhead->args.readdir.frag);
u32 frag = le32_to_cpu(req->r_args.readdir.frag);
u32 last_hash = 0;
u32 fpos_offset;
struct ceph_readdir_cache_control cache_ctl = {};
@ -1615,7 +1611,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
} else if (rinfo->offset_hash) {
/* mds understands offset_hash */
WARN_ON_ONCE(req->r_readdir_offset != 2);
last_hash = le32_to_cpu(rhead->args.readdir.offset_hash);
last_hash = le32_to_cpu(req->r_args.readdir.offset_hash);
}
}
@ -1888,7 +1884,7 @@ static void ceph_do_invalidate_pages(struct inode *inode)
mutex_lock(&ci->i_truncate_mutex);
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
inode, ceph_ino(inode));
mapping_set_error(inode->i_mapping, -EIO);
@ -2340,15 +2336,23 @@ int ceph_permission(struct inode *inode, int mask)
}
/* Craft a mask of needed caps given a set of requested statx attrs. */
static int statx_to_caps(u32 want)
static int statx_to_caps(u32 want, umode_t mode)
{
int mask = 0;
if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME))
mask |= CEPH_CAP_AUTH_SHARED;
if (want & (STATX_NLINK|STATX_CTIME))
mask |= CEPH_CAP_LINK_SHARED;
if (want & (STATX_NLINK|STATX_CTIME)) {
/*
* The link count for directories depends on inode->i_subdirs,
* and that is only updated when Fs caps are held.
*/
if (S_ISDIR(mode))
mask |= CEPH_CAP_FILE_SHARED;
else
mask |= CEPH_CAP_LINK_SHARED;
}
if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|
STATX_BLOCKS))
@ -2374,8 +2378,9 @@ int ceph_getattr(const struct path *path, struct kstat *stat,
/* Skip the getattr altogether if we're asked not to sync */
if (!(flags & AT_STATX_DONT_SYNC)) {
err = ceph_do_getattr(inode, statx_to_caps(request_mask),
flags & AT_STATX_FORCE_SYNC);
err = ceph_do_getattr(inode,
statx_to_caps(request_mask, inode->i_mode),
flags & AT_STATX_FORCE_SYNC);
if (err)
return err;
}

View File

@ -57,7 +57,7 @@ static const struct file_lock_operations ceph_fl_lock_ops = {
.fl_release_private = ceph_fl_release_lock,
};
/**
/*
* Implement fcntl and flock locking functions.
*/
static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
@ -225,7 +225,7 @@ static int try_unlock_file(struct file *file, struct file_lock *fl)
return 1;
}
/**
/*
* Attempt to set an fcntl lock.
* For now, this just goes away to the server. Later it may be more awesome.
*/
@ -408,7 +408,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock,
return err;
}
/**
/*
* Encode the flock and fcntl locks for the given inode into the ceph_filelock
* array. Must be called with inode->i_lock already held.
* If we encounter more of a specific lock type than expected, return -ENOSPC.
@ -458,7 +458,7 @@ fail:
return err;
}
/**
/*
* Copy the encoded flock and fcntl locks into the pagelist.
* Format is: #fcntl locks, sequential fcntl locks, #flock locks,
* sequential flock locks.

View File

@ -516,13 +516,9 @@ static int parse_reply_info_create(void **p, void *end,
/* Malformed reply? */
info->has_create_ino = false;
} else if (test_bit(CEPHFS_FEATURE_DELEG_INO, &s->s_features)) {
u8 struct_v, struct_compat;
u32 len;
info->has_create_ino = true;
ceph_decode_8_safe(p, end, struct_v, bad);
ceph_decode_8_safe(p, end, struct_compat, bad);
ceph_decode_32_safe(p, end, len, bad);
/* struct_v, struct_compat, and len */
ceph_decode_skip_n(p, end, 2 + sizeof(u32), bad);
ceph_decode_64_safe(p, end, info->ino, bad);
ret = ceph_parse_deleg_inos(p, end, s);
if (ret)
@ -837,6 +833,7 @@ void ceph_mdsc_release_request(struct kref *kref)
}
kfree(req->r_path1);
kfree(req->r_path2);
put_cred(req->r_cred);
if (req->r_pagelist)
ceph_pagelist_release(req->r_pagelist);
put_request_session(req);
@ -892,8 +889,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
ceph_mdsc_get_request(req);
insert_request(&mdsc->request_tree, req);
req->r_uid = current_fsuid();
req->r_gid = current_fsgid();
req->r_cred = get_current_cred();
if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK)
mdsc->oldest_tid = req->r_tid;
@ -1243,7 +1239,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
{
struct ceph_msg *msg;
struct ceph_mds_session_head *h;
int i = -1;
int i;
int extra_bytes = 0;
int metadata_key_count = 0;
struct ceph_options *opt = mdsc->fsc->client->options;
@ -1595,7 +1591,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_cap_flush *cf;
struct ceph_mds_client *mdsc = fsc->mdsc;
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
if (inode->i_data.nrpages > 0)
invalidate = true;
if (ci->i_wrbuffer_ref > 0)
@ -2482,21 +2478,24 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
/*
* called under mdsc->mutex
*/
static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
struct ceph_mds_request *req,
int mds, bool drop_cap_releases)
bool drop_cap_releases)
{
int mds = session->s_mds;
struct ceph_mds_client *mdsc = session->s_mdsc;
struct ceph_msg *msg;
struct ceph_mds_request_head *head;
struct ceph_mds_request_head_old *head;
const char *path1 = NULL;
const char *path2 = NULL;
u64 ino1 = 0, ino2 = 0;
int pathlen1 = 0, pathlen2 = 0;
bool freepath1 = false, freepath2 = false;
int len;
int len, i;
u16 releases;
void *p, *end;
int ret;
bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME);
ret = set_request_path_attr(req->r_inode, req->r_dentry,
req->r_parent, req->r_path1, req->r_ino1.ino,
@ -2518,14 +2517,23 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
goto out_free1;
}
len = sizeof(*head) +
pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
if (legacy) {
/* Old style */
len = sizeof(*head);
} else {
/* New style: add gid_list and any later fields */
len = sizeof(struct ceph_mds_request_head) + sizeof(u32) +
(sizeof(u64) * req->r_cred->group_info->ngroups);
}
len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
sizeof(struct ceph_timespec);
/* calculate (max) length for cap releases */
len += sizeof(struct ceph_mds_request_release) *
(!!req->r_inode_drop + !!req->r_dentry_drop +
!!req->r_old_inode_drop + !!req->r_old_dentry_drop);
if (req->r_dentry_drop)
len += pathlen1;
if (req->r_old_dentry_drop)
@ -2537,17 +2545,33 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
goto out_free2;
}
msg->hdr.version = cpu_to_le16(2);
msg->hdr.tid = cpu_to_le64(req->r_tid);
head = msg->front.iov_base;
p = msg->front.iov_base + sizeof(*head);
/*
* The old ceph_mds_request_header didn't contain a version field, and
* one was added when we moved the message version from 3->4.
*/
if (legacy) {
msg->hdr.version = cpu_to_le16(3);
head = msg->front.iov_base;
p = msg->front.iov_base + sizeof(*head);
} else {
struct ceph_mds_request_head *new_head = msg->front.iov_base;
msg->hdr.version = cpu_to_le16(4);
new_head->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
head = (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
p = msg->front.iov_base + sizeof(*new_head);
}
end = msg->front.iov_base + msg->front.iov_len;
head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
head->op = cpu_to_le32(req->r_op);
head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid));
head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid));
head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns,
req->r_cred->fsuid));
head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns,
req->r_cred->fsgid));
head->ino = cpu_to_le64(req->r_deleg_ino);
head->args = req->r_args;
@ -2592,6 +2616,14 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
ceph_encode_copy(&p, &ts, sizeof(ts));
}
/* gid list */
if (!legacy) {
ceph_encode_32(&p, req->r_cred->group_info->ngroups);
for (i = 0; i < req->r_cred->group_info->ngroups; i++)
ceph_encode_64(&p, from_kgid(&init_user_ns,
req->r_cred->group_info->gid[i]));
}
if (WARN_ON_ONCE(p > end)) {
ceph_msg_put(msg);
msg = ERR_PTR(-ERANGE);
@ -2635,14 +2667,28 @@ static void complete_request(struct ceph_mds_client *mdsc,
complete_all(&req->r_completion);
}
static struct ceph_mds_request_head_old *
find_old_request_head(void *p, u64 features)
{
bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
struct ceph_mds_request_head *new_head;
if (legacy)
return (struct ceph_mds_request_head_old *)p;
new_head = (struct ceph_mds_request_head *)p;
return (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
}
/*
* called under mdsc->mutex
*/
static int __prepare_send_request(struct ceph_mds_client *mdsc,
static int __prepare_send_request(struct ceph_mds_session *session,
struct ceph_mds_request *req,
int mds, bool drop_cap_releases)
bool drop_cap_releases)
{
struct ceph_mds_request_head *rhead;
int mds = session->s_mds;
struct ceph_mds_client *mdsc = session->s_mdsc;
struct ceph_mds_request_head_old *rhead;
struct ceph_msg *msg;
int flags = 0;
@ -2661,6 +2707,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
void *p;
/*
* Replay. Do not regenerate message (and rebuild
* paths, etc.); just use the original message.
@ -2668,7 +2715,8 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
* d_move mangles the src name.
*/
msg = req->r_request;
rhead = msg->front.iov_base;
rhead = find_old_request_head(msg->front.iov_base,
session->s_con.peer_features);
flags = le32_to_cpu(rhead->flags);
flags |= CEPH_MDS_FLAG_REPLAY;
@ -2699,14 +2747,15 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
ceph_msg_put(req->r_request);
req->r_request = NULL;
}
msg = create_request_message(mdsc, req, mds, drop_cap_releases);
msg = create_request_message(session, req, drop_cap_releases);
if (IS_ERR(msg)) {
req->r_err = PTR_ERR(msg);
return PTR_ERR(msg);
}
req->r_request = msg;
rhead = msg->front.iov_base;
rhead = find_old_request_head(msg->front.iov_base,
session->s_con.peer_features);
rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
flags |= CEPH_MDS_FLAG_REPLAY;
@ -2725,15 +2774,13 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
/*
* called under mdsc->mutex
*/
static int __send_request(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
static int __send_request(struct ceph_mds_session *session,
struct ceph_mds_request *req,
bool drop_cap_releases)
{
int err;
err = __prepare_send_request(mdsc, req, session->s_mds,
drop_cap_releases);
err = __prepare_send_request(session, req, drop_cap_releases);
if (!err) {
ceph_msg_get(req->r_request);
ceph_con_send(&session->s_con, req->r_request);
@ -2818,10 +2865,6 @@ static void __do_request(struct ceph_mds_client *mdsc,
ceph_session_state_name(session->s_state));
if (session->s_state != CEPH_MDS_SESSION_OPEN &&
session->s_state != CEPH_MDS_SESSION_HUNG) {
if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
err = -EACCES;
goto out_session;
}
/*
* We cannot queue async requests since the caps and delegated
* inodes are bound to the session. Just return -EJUKEBOX and
@ -2831,6 +2874,20 @@ static void __do_request(struct ceph_mds_client *mdsc,
err = -EJUKEBOX;
goto out_session;
}
/*
* If the session has been REJECTED, then return a hard error,
* unless it's a CLEANRECOVER mount, in which case we'll queue
* it to the mdsc queue.
*/
if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
if (ceph_test_mount_opt(mdsc->fsc, CLEANRECOVER))
list_add(&req->r_wait, &mdsc->waiting_for_map);
else
err = -EACCES;
goto out_session;
}
if (session->s_state == CEPH_MDS_SESSION_NEW ||
session->s_state == CEPH_MDS_SESSION_CLOSING) {
err = __open_session(mdsc, session);
@ -2850,7 +2907,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
if (req->r_request_started == 0) /* note request start time */
req->r_request_started = jiffies;
err = __send_request(mdsc, session, req, false);
err = __send_request(session, req, false);
out_session:
ceph_put_mds_session(session);
@ -3173,6 +3230,23 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
err = parse_reply_info(session, msg, rinfo, session->s_con.peer_features);
mutex_unlock(&mdsc->mutex);
/* Must find target inode outside of mutexes to avoid deadlocks */
if ((err >= 0) && rinfo->head->is_target) {
struct inode *in;
struct ceph_vino tvino = {
.ino = le64_to_cpu(rinfo->targeti.in->ino),
.snap = le64_to_cpu(rinfo->targeti.in->snapid)
};
in = ceph_get_inode(mdsc->fsc->sb, tvino);
if (IS_ERR(in)) {
err = PTR_ERR(in);
mutex_lock(&session->s_mutex);
goto out_err;
}
req->r_target_inode = in;
}
mutex_lock(&session->s_mutex);
if (err < 0) {
pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
@ -3514,7 +3588,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
mutex_lock(&mdsc->mutex);
list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item)
__send_request(mdsc, session, req, true);
__send_request(session, req, true);
/*
* also re-send old requests when MDS enters reconnect stage. So that MDS
@ -3535,7 +3609,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
ceph_mdsc_release_dir_caps_no_check(req);
__send_request(mdsc, session, req, true);
__send_request(session, req, true);
}
mutex_unlock(&mdsc->mutex);
}
@ -4374,12 +4448,7 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
if (!READ_ONCE(fsc->blocklisted))
return;
if (fsc->last_auto_reconnect &&
time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30))
return;
pr_info("auto reconnect after blocklisted\n");
fsc->last_auto_reconnect = jiffies;
ceph_force_reconnect(fsc->sb);
}
@ -4678,7 +4747,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
{
u64 want_tid, want_flush;
if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN)
return;
dout("sync\n");
@ -4855,10 +4924,8 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
void *p = msg->front.iov_base;
void *end = p + msg->front.iov_len;
u32 epoch;
u32 map_len;
u32 num_fs;
u32 mount_fscid = (u32)-1;
u8 struct_v, struct_cv;
int err = -EINVAL;
ceph_decode_need(&p, end, sizeof(u32), bad);
@ -4866,24 +4933,17 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
dout("handle_fsmap epoch %u\n", epoch);
ceph_decode_need(&p, end, 2 + sizeof(u32), bad);
struct_v = ceph_decode_8(&p);
struct_cv = ceph_decode_8(&p);
map_len = ceph_decode_32(&p);
/* struct_v, struct_cv, map_len, epoch, legacy_client_fscid */
ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 3, bad);
ceph_decode_need(&p, end, sizeof(u32) * 3, bad);
p += sizeof(u32) * 2; /* skip epoch and legacy_client_fscid */
num_fs = ceph_decode_32(&p);
ceph_decode_32_safe(&p, end, num_fs, bad);
while (num_fs-- > 0) {
void *info_p, *info_end;
u32 info_len;
u8 info_v, info_cv;
u32 fscid, namelen;
ceph_decode_need(&p, end, 2 + sizeof(u32), bad);
info_v = ceph_decode_8(&p);
info_cv = ceph_decode_8(&p);
p += 2; // info_v, info_cv
info_len = ceph_decode_32(&p);
ceph_decode_need(&p, end, info_len, bad);
info_p = p;
@ -4954,7 +5014,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
return;
}
newmap = ceph_mdsmap_decode(&p, end);
newmap = ceph_mdsmap_decode(&p, end, ceph_msgr2(mdsc->fsc->client));
if (IS_ERR(newmap)) {
err = PTR_ERR(newmap);
goto bad_unlock;
@ -5081,23 +5141,12 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth;
int ret;
if (force_new && auth->authorizer) {
ceph_auth_destroy_authorizer(auth->authorizer);
auth->authorizer = NULL;
}
if (!auth->authorizer) {
int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
auth);
if (ret)
return ERR_PTR(ret);
} else {
int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
auth);
if (ret)
return ERR_PTR(ret);
}
*proto = ac->protocol;
ret = __ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_MDS,
force_new, proto, NULL, NULL);
if (ret)
return ERR_PTR(ret);
return auth;
}
@ -5118,8 +5167,11 @@ static int verify_authorizer_reply(struct ceph_connection *con)
struct ceph_mds_session *s = con->private;
struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth;
return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer);
return ceph_auth_verify_authorizer_reply(ac, auth->authorizer,
auth->authorizer_reply_buf, auth->authorizer_reply_buf_len,
NULL, NULL, NULL, NULL);
}
static int invalidate_authorizer(struct ceph_connection *con)
@ -5133,6 +5185,80 @@ static int invalidate_authorizer(struct ceph_connection *con)
return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
}
static int mds_get_auth_request(struct ceph_connection *con,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_mds_session *s = con->private;
struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth;
int ret;
ret = ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_MDS,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int mds_handle_auth_reply_more(struct ceph_connection *con,
void *reply, int reply_len,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_mds_session *s = con->private;
struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth;
int ret;
ret = ceph_auth_handle_svc_reply_more(ac, auth, reply, reply_len,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int mds_handle_auth_done(struct ceph_connection *con,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_mds_session *s = con->private;
struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth;
return ceph_auth_handle_svc_reply_done(ac, auth, reply, reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
}
static int mds_handle_auth_bad_method(struct ceph_connection *con,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
struct ceph_mds_session *s = con->private;
struct ceph_mon_client *monc = &s->s_mdsc->fsc->client->monc;
int ret;
if (ceph_auth_handle_bad_authorizer(monc->auth, CEPH_ENTITY_TYPE_MDS,
used_proto, result,
allowed_protos, proto_cnt,
allowed_modes, mode_cnt)) {
ret = ceph_monc_validate_auth(monc);
if (ret)
return ret;
}
return -EACCES;
}
static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
struct ceph_msg_header *hdr, int *skip)
{
@ -5182,6 +5308,10 @@ static const struct ceph_connection_operations mds_con_ops = {
.alloc_msg = mds_alloc_msg,
.sign_message = mds_sign_message,
.check_message_signature = mds_check_message_signature,
.get_auth_request = mds_get_auth_request,
.handle_auth_reply_more = mds_handle_auth_reply_more,
.handle_auth_done = mds_handle_auth_done,
.handle_auth_bad_method = mds_handle_auth_bad_method,
};
/* eof */

View File

@ -275,8 +275,7 @@ struct ceph_mds_request {
union ceph_mds_request_args r_args;
int r_fmode; /* file mode, if expecting cap */
kuid_t r_uid;
kgid_t r_gid;
const struct cred *r_cred;
int r_request_release_offset;
struct timespec64 r_stamp;

View File

@ -114,7 +114,7 @@ bad:
* Ignore any fields we don't care about (there are quite a few of
* them).
*/
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
{
struct ceph_mdsmap *m;
const void *start = *p;
@ -201,18 +201,19 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
namelen = ceph_decode_32(p); /* skip mds name */
*p += namelen;
ceph_decode_need(p, end,
4*sizeof(u32) + sizeof(u64) +
sizeof(addr) + sizeof(struct ceph_timespec),
bad);
mds = ceph_decode_32(p);
inc = ceph_decode_32(p);
state = ceph_decode_32(p);
ceph_decode_32_safe(p, end, mds, bad);
ceph_decode_32_safe(p, end, inc, bad);
ceph_decode_32_safe(p, end, state, bad);
*p += sizeof(u64); /* state_seq */
err = ceph_decode_entity_addr(p, end, &addr);
if (info_v >= 8)
err = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
err = ceph_decode_entity_addr(p, end, &addr);
if (err)
goto corrupt;
ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
ceph_decode_copy_safe(p, end, &laggy_since, sizeof(laggy_since),
bad);
laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0;
*p += sizeof(u32);
ceph_decode_32_safe(p, end, namelen, bad);
@ -243,8 +244,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
}
if (state <= 0) {
pr_warn("mdsmap_decode got incorrect state(%s)\n",
ceph_mds_state_name(state));
dout("mdsmap_decode got incorrect state(%s)\n",
ceph_mds_state_name(state));
continue;
}

View File

@ -16,6 +16,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
struct ceph_metric_read_latency *read;
struct ceph_metric_write_latency *write;
struct ceph_metric_metadata_latency *meta;
struct ceph_metric_dlease *dlease;
struct ceph_client_metric *m = &mdsc->metric;
u64 nr_caps = atomic64_read(&m->total_caps);
struct ceph_msg *msg;
@ -25,7 +26,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
s32 len;
len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
+ sizeof(*meta);
+ sizeof(*meta) + sizeof(*dlease);
msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
if (!msg) {
@ -42,8 +43,8 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
cap->ver = 1;
cap->compat = 1;
cap->data_len = cpu_to_le32(sizeof(*cap) - 10);
cap->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_hit));
cap->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_mis));
cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
cap->total = cpu_to_le64(nr_caps);
items++;
@ -83,6 +84,17 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
meta->nsec = cpu_to_le32(ts.tv_nsec);
items++;
/* encode the dentry lease metric */
dlease = (struct ceph_metric_dlease *)(meta + 1);
dlease->type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
dlease->ver = 1;
dlease->compat = 1;
dlease->data_len = cpu_to_le32(sizeof(*dlease) - 10);
dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
items++;
put_unaligned_le32(items, &head->num);
msg->front.iov_len = len;
msg->hdr.version = cpu_to_le16(1);

View File

@ -27,6 +27,7 @@ enum ceph_metric_type {
CLIENT_METRIC_TYPE_READ_LATENCY, \
CLIENT_METRIC_TYPE_WRITE_LATENCY, \
CLIENT_METRIC_TYPE_METADATA_LATENCY, \
CLIENT_METRIC_TYPE_DENTRY_LEASE, \
\
CLIENT_METRIC_TYPE_MAX, \
}
@ -80,6 +81,19 @@ struct ceph_metric_metadata_latency {
__le32 nsec;
} __packed;
/* metric dentry lease header */
struct ceph_metric_dlease {
__le32 type; /* ceph metric type */
__u8 ver;
__u8 compat;
__le32 data_len; /* length of sizeof(hit + mis + total) */
__le64 hit;
__le64 mis;
__le64 total;
} __packed;
struct ceph_metric_head {
__le32 num; /* the number of metrics that will be sent */
} __packed;

View File

@ -264,7 +264,7 @@ restart:
return NULL;
}
static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
{
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
struct ceph_snap_realm *old_realm, *new_realm;
@ -516,59 +516,3 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
return is_updated;
}
/*
* ceph_quota_check_rename - check if a rename can be executed
* @mdsc: MDS client instance
* @old: inode to be copied
* @new: destination inode (directory)
*
* This function verifies if a rename (e.g. moving a file or directory) can be
* executed. It forces an rstat update in the @new target directory (and in the
* source @old as well, if it's a directory). The actual check is done both for
* max_files and max_bytes.
*
* This function returns 0 if it's OK to do the rename, or, if quotas are
* exceeded, -EXDEV (if @old is a directory) or -EDQUOT.
*/
int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
struct inode *old, struct inode *new)
{
struct ceph_inode_info *ci_old = ceph_inode(old);
int ret = 0;
if (ceph_quota_is_same_realm(old, new))
return 0;
/*
* Get the latest rstat for target directory (and for source, if a
* directory)
*/
ret = ceph_do_getattr(new, CEPH_STAT_RSTAT, false);
if (ret)
return ret;
if (S_ISDIR(old->i_mode)) {
ret = ceph_do_getattr(old, CEPH_STAT_RSTAT, false);
if (ret)
return ret;
ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
ci_old->i_rbytes);
if (!ret)
ret = check_quota_exceeded(new,
QUOTA_CHECK_MAX_FILES_OP,
ci_old->i_rfiles +
ci_old->i_rsubdirs);
if (ret)
ret = -EXDEV;
} else {
ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
i_size_read(old));
if (!ret)
ret = check_quota_exceeded(new,
QUOTA_CHECK_MAX_FILES_OP, 1);
if (ret)
ret = -EDQUOT;
}
return ret;
}

View File

@ -831,6 +831,13 @@ static void destroy_caches(void)
ceph_fscache_unregister();
}
static void __ceph_umount_begin(struct ceph_fs_client *fsc)
{
ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
ceph_mdsc_force_umount(fsc->mdsc);
fsc->filp_gen++; // invalidate open files
}
/*
* ceph_umount_begin - initiate forced umount. Tear down the
* mount, skipping steps that may hang while waiting for server(s).
@ -843,9 +850,7 @@ static void ceph_umount_begin(struct super_block *sb)
if (!fsc)
return;
fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
ceph_mdsc_force_umount(fsc->mdsc);
fsc->filp_gen++; // invalidate open files
__ceph_umount_begin(fsc);
}
static const struct super_operations ceph_super_ops = {
@ -1234,7 +1239,8 @@ int ceph_force_reconnect(struct super_block *sb)
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
int err = 0;
ceph_umount_begin(sb);
fsc->mount_state = CEPH_MOUNT_RECOVER;
__ceph_umount_begin(fsc);
/* Make sure all page caches get invalidated.
* see remove_session_caps_cb() */

View File

@ -106,9 +106,8 @@ struct ceph_fs_client {
struct ceph_mount_options *mount_options;
struct ceph_client *client;
unsigned long mount_state;
int mount_state;
unsigned long last_auto_reconnect;
bool blocklisted;
bool have_copy_from2;
@ -129,6 +128,7 @@ struct ceph_fs_client {
struct dentry *debugfs_bdi;
struct dentry *debugfs_mdsc, *debugfs_mdsmap;
struct dentry *debugfs_metric;
struct dentry *debugfs_status;
struct dentry *debugfs_mds_sessions;
#endif
@ -1222,14 +1222,13 @@ extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session,
struct ceph_msg *msg);
extern bool ceph_quota_is_max_files_exceeded(struct inode *inode);
extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new);
extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode,
loff_t newlen);
extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
loff_t newlen);
extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
struct kstatfs *buf);
extern int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
struct inode *old, struct inode *new);
extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
#endif /* _FS_CEPH_SUPER_H */

View File

@ -42,6 +42,7 @@ struct ceph_vxattr {
#define VXATTR_FLAG_READONLY (1<<0)
#define VXATTR_FLAG_HIDDEN (1<<1)
#define VXATTR_FLAG_RSTAT (1<<2)
#define VXATTR_FLAG_DIRSTAT (1<<3)
/* layouts */
@ -303,6 +304,36 @@ static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val,
ci->i_snap_btime.tv_nsec);
}
static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci,
char *val, size_t size)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
return ceph_fmt_xattr(val, size, "%pU", &fsc->client->fsid);
}
static ssize_t ceph_vxattrcb_client_id(struct ceph_inode_info *ci,
char *val, size_t size)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
return ceph_fmt_xattr(val, size, "client%lld",
ceph_client_gid(fsc->client));
}
static ssize_t ceph_vxattrcb_caps(struct ceph_inode_info *ci, char *val,
size_t size)
{
int issued;
spin_lock(&ci->i_ceph_lock);
issued = __ceph_caps_issued(ci, NULL);
spin_unlock(&ci->i_ceph_lock);
return ceph_fmt_xattr(val, size, "%s/0x%x",
ceph_cap_string(issued), issued);
}
#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
#define CEPH_XATTR_NAME2(_type, _name, _name2) \
XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
@ -347,9 +378,9 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
XATTR_LAYOUT_FIELD(dir, layout, object_size),
XATTR_LAYOUT_FIELD(dir, layout, pool),
XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
XATTR_NAME_CEPH(dir, entries, 0),
XATTR_NAME_CEPH(dir, files, 0),
XATTR_NAME_CEPH(dir, subdirs, 0),
XATTR_NAME_CEPH(dir, entries, VXATTR_FLAG_DIRSTAT),
XATTR_NAME_CEPH(dir, files, VXATTR_FLAG_DIRSTAT),
XATTR_NAME_CEPH(dir, subdirs, VXATTR_FLAG_DIRSTAT),
XATTR_RSTAT_FIELD(dir, rentries),
XATTR_RSTAT_FIELD(dir, rfiles),
XATTR_RSTAT_FIELD(dir, rsubdirs),
@ -378,6 +409,13 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
.exists_cb = ceph_vxattrcb_snap_btime_exists,
.flags = VXATTR_FLAG_READONLY,
},
{
.name = "ceph.caps",
.name_size = sizeof("ceph.caps"),
.getxattr_cb = ceph_vxattrcb_caps,
.exists_cb = NULL,
.flags = VXATTR_FLAG_HIDDEN,
},
{ .name = NULL, 0 } /* Required table terminator */
};
@ -403,6 +441,31 @@ static struct ceph_vxattr ceph_file_vxattrs[] = {
.exists_cb = ceph_vxattrcb_snap_btime_exists,
.flags = VXATTR_FLAG_READONLY,
},
{
.name = "ceph.caps",
.name_size = sizeof("ceph.caps"),
.getxattr_cb = ceph_vxattrcb_caps,
.exists_cb = NULL,
.flags = VXATTR_FLAG_HIDDEN,
},
{ .name = NULL, 0 } /* Required table terminator */
};
static struct ceph_vxattr ceph_common_vxattrs[] = {
{
.name = "ceph.cluster_fsid",
.name_size = sizeof("ceph.cluster_fsid"),
.getxattr_cb = ceph_vxattrcb_cluster_fsid,
.exists_cb = NULL,
.flags = VXATTR_FLAG_READONLY,
},
{
.name = "ceph.client_id",
.name_size = sizeof("ceph.client_id"),
.getxattr_cb = ceph_vxattrcb_client_id,
.exists_cb = NULL,
.flags = VXATTR_FLAG_READONLY,
},
{ .name = NULL, 0 } /* Required table terminator */
};
@ -428,6 +491,13 @@ static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
}
}
vxattr = ceph_common_vxattrs;
while (vxattr->name) {
if (!strcmp(vxattr->name, name))
return vxattr;
vxattr++;
}
return NULL;
}
@ -837,6 +907,8 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
int mask = 0;
if (vxattr->flags & VXATTR_FLAG_RSTAT)
mask |= CEPH_STAT_RSTAT;
if (vxattr->flags & VXATTR_FLAG_DIRSTAT)
mask |= CEPH_CAP_FILE_SHARED;
err = ceph_do_getattr(inode, mask, true);
if (err)
return err;
@ -950,6 +1022,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_pagelist *pagelist = NULL;
int op = CEPH_MDS_OP_SETXATTR;
int err;
@ -988,6 +1061,8 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
if (op == CEPH_MDS_OP_SETXATTR) {
req->r_args.setxattr.flags = cpu_to_le32(flags);
req->r_args.setxattr.osdmap_epoch =
cpu_to_le32(osdc->osdmap->epoch);
req->r_pagelist = pagelist;
pagelist = NULL;
}

View File

@ -32,8 +32,6 @@ struct ceph_auth_handshake {
};
struct ceph_auth_client_ops {
const char *name;
/*
* true if we are authenticated and can connect to
* services.
@ -53,7 +51,9 @@ struct ceph_auth_client_ops {
*/
int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end);
int (*handle_reply)(struct ceph_auth_client *ac, int result,
void *buf, void *end);
void *buf, void *end, u8 *session_key,
int *session_key_len, u8 *con_secret,
int *con_secret_len);
/*
* Create authorizer for connecting to a service, and verify
@ -69,7 +69,10 @@ struct ceph_auth_client_ops {
void *challenge_buf,
int challenge_buf_len);
int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
struct ceph_authorizer *a);
struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
void (*invalidate_authorizer)(struct ceph_auth_client *ac,
int peer_type);
@ -95,11 +98,15 @@ struct ceph_auth_client {
const struct ceph_crypto_key *key; /* our secret key */
unsigned want_keys; /* which services we want */
int preferred_mode; /* CEPH_CON_MODE_* */
int fallback_mode; /* ditto */
struct mutex mutex;
};
extern struct ceph_auth_client *ceph_auth_init(const char *name,
const struct ceph_crypto_key *key);
struct ceph_auth_client *ceph_auth_init(const char *name,
const struct ceph_crypto_key *key,
const int *con_modes);
extern void ceph_auth_destroy(struct ceph_auth_client *ac);
extern void ceph_auth_reset(struct ceph_auth_client *ac);
@ -113,21 +120,22 @@ int ceph_auth_entity_name_encode(const char *name, void **p, void *end);
extern int ceph_build_auth(struct ceph_auth_client *ac,
void *msg_buf, size_t msg_len);
extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac);
extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *auth);
int __ceph_auth_get_authorizer(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
int peer_type, bool force_new,
int *proto, int *pref_mode, int *fallb_mode);
void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *a);
int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *challenge_buf,
int challenge_buf_len);
extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a);
int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
int peer_type);
@ -147,4 +155,34 @@ int ceph_auth_check_message_signature(struct ceph_auth_handshake *auth,
return auth->check_message_signature(auth, msg);
return 0;
}
int ceph_auth_get_request(struct ceph_auth_client *ac, void *buf, int buf_len);
int ceph_auth_handle_reply_more(struct ceph_auth_client *ac, void *reply,
int reply_len, void *buf, int buf_len);
int ceph_auth_handle_reply_done(struct ceph_auth_client *ac,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
bool ceph_auth_handle_bad_method(struct ceph_auth_client *ac,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
int ceph_auth_get_authorizer(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
int peer_type, void *buf, int *buf_len);
int ceph_auth_handle_svc_reply_more(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
void *buf, int *buf_len);
int ceph_auth_handle_svc_reply_done(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
bool ceph_auth_handle_bad_authorizer(struct ceph_auth_client *ac,
int peer_type, int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
#endif

View File

@ -8,7 +8,8 @@
* feature. Base case is 1 (first use).
*/
#define CEPH_FEATURE_INCARNATION_1 (0ull)
#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL
#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // SERVER_JEWEL
#define CEPH_FEATURE_INCARNATION_3 ((1ull<<57)|(1ull<<28)) // SERVER_MIMIC
#define DEFINE_CEPH_FEATURE(bit, incarnation, name) \
static const uint64_t __maybe_unused CEPH_FEATURE_##name = (1ULL<<bit); \
@ -75,7 +76,7 @@
DEFINE_CEPH_FEATURE( 0, 1, UID)
DEFINE_CEPH_FEATURE( 1, 1, NOSRCADDR)
DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)
DEFINE_CEPH_FEATURE( 2, 3, SERVER_NAUTILUS)
DEFINE_CEPH_FEATURE( 3, 1, FLOCK)
DEFINE_CEPH_FEATURE( 4, 1, SUBSCRIBE2)
DEFINE_CEPH_FEATURE( 5, 1, MONNAMES)
@ -114,7 +115,7 @@ DEFINE_CEPH_FEATURE(25, 1, CRUSH_TUNABLES2)
DEFINE_CEPH_FEATURE(26, 1, CREATEPOOLID)
DEFINE_CEPH_FEATURE(27, 1, REPLY_CREATE_INODE)
DEFINE_CEPH_FEATURE_RETIRED(28, 1, OSD_HBMSGS, HAMMER, JEWEL)
DEFINE_CEPH_FEATURE(28, 2, SERVER_M)
DEFINE_CEPH_FEATURE(28, 2, SERVER_MIMIC)
DEFINE_CEPH_FEATURE(29, 1, MDSENC)
DEFINE_CEPH_FEATURE(30, 1, OSDHASHPSPOOL)
DEFINE_CEPH_FEATURE(31, 1, MON_SINGLE_PAXOS) // deprecate me
@ -177,13 +178,16 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
*/
#define CEPH_FEATURES_SUPPORTED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
CEPH_FEATURE_SERVER_NAUTILUS | \
CEPH_FEATURE_FLOCK | \
CEPH_FEATURE_SUBSCRIBE2 | \
CEPH_FEATURE_MONNAMES | \
CEPH_FEATURE_RECONNECT_SEQ | \
CEPH_FEATURE_DIRLAYOUTHASH | \
CEPH_FEATURE_PGID64 | \
CEPH_FEATURE_PGPOOL3 | \
CEPH_FEATURE_OSDENC | \
CEPH_FEATURE_MONENC | \
CEPH_FEATURE_CRUSH_TUNABLES | \
CEPH_FEATURE_SERVER_LUMINOUS | \
CEPH_FEATURE_RESEND_ON_SPLIT | \
@ -193,6 +197,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
CEPH_FEATURE_MSG_AUTH | \
CEPH_FEATURE_CRUSH_TUNABLES2 | \
CEPH_FEATURE_REPLY_CREATE_INODE | \
CEPH_FEATURE_SERVER_MIMIC | \
CEPH_FEATURE_MDSENC | \
CEPH_FEATURE_OSDHASHPSPOOL | \
CEPH_FEATURE_OSD_CACHEPOOL | \

View File

@ -93,8 +93,19 @@ struct ceph_dir_layout {
#define CEPH_AUTH_NONE 0x1
#define CEPH_AUTH_CEPHX 0x2
#define CEPH_AUTH_MODE_NONE 0
#define CEPH_AUTH_MODE_AUTHORIZER 1
#define CEPH_AUTH_MODE_MON 10
/* msgr2 protocol modes */
#define CEPH_CON_MODE_UNKNOWN 0x0
#define CEPH_CON_MODE_CRC 0x1
#define CEPH_CON_MODE_SECURE 0x2
#define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
const char *ceph_auth_proto_name(int proto);
const char *ceph_con_mode_name(int mode);
/*********************************************
* message layer
@ -424,6 +435,7 @@ union ceph_mds_request_args {
} __attribute__ ((packed)) open;
struct {
__le32 flags;
__le32 osdmap_epoch; /* used for setting file/dir layouts */
} __attribute__ ((packed)) setxattr;
struct {
struct ceph_file_layout_legacy layout;
@ -445,11 +457,25 @@ union ceph_mds_request_args {
} __attribute__ ((packed)) lookupino;
} __attribute__ ((packed));
union ceph_mds_request_args_ext {
union ceph_mds_request_args old;
struct {
__le32 mode;
__le32 uid;
__le32 gid;
struct ceph_timespec mtime;
struct ceph_timespec atime;
__le64 size, old_size; /* old_size needed by truncate */
__le32 mask; /* CEPH_SETATTR_* */
struct ceph_timespec btime;
} __attribute__ ((packed)) setattr_ext;
};
#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */
#define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */
struct ceph_mds_request_head {
struct ceph_mds_request_head_old {
__le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */
__le32 flags; /* CEPH_MDS_FLAG_* */
@ -462,6 +488,22 @@ struct ceph_mds_request_head {
union ceph_mds_request_args args;
} __attribute__ ((packed));
#define CEPH_MDS_REQUEST_HEAD_VERSION 1
struct ceph_mds_request_head {
__le16 version; /* struct version */
__le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */
__le32 flags; /* CEPH_MDS_FLAG_* */
__u8 num_retry, num_fwd; /* count retry, fwd attempts */
__le16 num_releases; /* # include cap/lease release records */
__le32 op; /* mds op code */
__le32 caller_uid, caller_gid;
__le64 ino; /* use this ino for openc, mkdir, mknod,
etc. (if replaying) */
union ceph_mds_request_args_ext args;
} __attribute__ ((packed));
/* cap/lease release record */
struct ceph_mds_request_release {
__le64 ino, cap_id; /* ino and unique cap id */

View File

@ -220,6 +220,8 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
*/
#define CEPH_ENTITY_ADDR_TYPE_NONE 0
#define CEPH_ENTITY_ADDR_TYPE_LEGACY __cpu_to_le32(1)
#define CEPH_ENTITY_ADDR_TYPE_MSGR2 __cpu_to_le32(2)
#define CEPH_ENTITY_ADDR_TYPE_ANY __cpu_to_le32(3)
static inline void ceph_encode_banner_addr(struct ceph_entity_addr *a)
{
@ -239,6 +241,12 @@ static inline void ceph_decode_banner_addr(struct ceph_entity_addr *a)
extern int ceph_decode_entity_addr(void **p, void *end,
struct ceph_entity_addr *addr);
int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr);
int ceph_entity_addr_encoding_len(const struct ceph_entity_addr *addr);
void ceph_encode_entity_addr(void **p, const struct ceph_entity_addr *addr);
/*
* encoders
*/

View File

@ -31,10 +31,10 @@
#define CEPH_OPT_FSID (1<<0)
#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
#define CEPH_OPT_MYIP (1<<2) /* specified my ip */
#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */
#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes (msgr1) */
#define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */
#define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */
#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs */
#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs (msgr1) */
#define CEPH_OPT_ABORT_ON_FULL (1<<7) /* abort w/ ENOSPC when full */
#define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY)
@ -53,6 +53,7 @@ struct ceph_options {
unsigned long osd_keepalive_timeout; /* jiffies */
unsigned long osd_request_timeout; /* jiffies */
u32 read_from_replica; /* CEPH_OSD_FLAG_BALANCE/LOCALIZE_READS */
int con_modes[2]; /* CEPH_CON_MODE_* */
/*
* any type that can't be simply compared or doesn't need
@ -83,6 +84,7 @@ struct ceph_options {
#define CEPH_MONC_HUNT_BACKOFF 2
#define CEPH_MONC_HUNT_MAX_MULT 10
#define CEPH_MSG_MAX_CONTROL_LEN (16*1024*1024)
#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
@ -104,6 +106,7 @@ enum {
CEPH_MOUNT_UNMOUNTING,
CEPH_MOUNT_UNMOUNTED,
CEPH_MOUNT_SHUTDOWN,
CEPH_MOUNT_RECOVER,
};
static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
@ -150,6 +153,10 @@ struct ceph_client {
#define from_msgr(ms) container_of(ms, struct ceph_client, msgr)
static inline bool ceph_msgr2(struct ceph_client *client)
{
return client->options->con_modes[0] != CEPH_CON_MODE_UNKNOWN;
}
/*
* snapshots

View File

@ -64,7 +64,7 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
}
extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2);
extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m);

View File

@ -3,6 +3,7 @@
#define __FS_CEPH_MESSENGER_H
#include <linux/bvec.h>
#include <linux/crypto.h>
#include <linux/kref.h>
#include <linux/mutex.h>
#include <linux/net.h>
@ -52,6 +53,23 @@ struct ceph_connection_operations {
int (*sign_message) (struct ceph_msg *msg);
int (*check_message_signature) (struct ceph_msg *msg);
/* msgr2 authentication exchange */
int (*get_auth_request)(struct ceph_connection *con,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len);
int (*handle_auth_reply_more)(struct ceph_connection *con,
void *reply, int reply_len,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len);
int (*handle_auth_done)(struct ceph_connection *con,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
int (*handle_auth_bad_method)(struct ceph_connection *con,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
};
/* use format string %s%lld */
@ -235,14 +253,171 @@ struct ceph_msg {
bool more_to_follow;
bool needs_out_seq;
int front_alloc_len;
unsigned long ack_stamp; /* tx: when we were acked */
struct ceph_msgpool *pool;
};
/*
* connection states
*/
#define CEPH_CON_S_CLOSED 1
#define CEPH_CON_S_PREOPEN 2
#define CEPH_CON_S_V1_BANNER 3
#define CEPH_CON_S_V1_CONNECT_MSG 4
#define CEPH_CON_S_V2_BANNER_PREFIX 5
#define CEPH_CON_S_V2_BANNER_PAYLOAD 6
#define CEPH_CON_S_V2_HELLO 7
#define CEPH_CON_S_V2_AUTH 8
#define CEPH_CON_S_V2_AUTH_SIGNATURE 9
#define CEPH_CON_S_V2_SESSION_CONNECT 10
#define CEPH_CON_S_V2_SESSION_RECONNECT 11
#define CEPH_CON_S_OPEN 12
#define CEPH_CON_S_STANDBY 13
/*
* ceph_connection flag bits
*/
#define CEPH_CON_F_LOSSYTX 0 /* we can close channel or drop
messages on errors */
#define CEPH_CON_F_KEEPALIVE_PENDING 1 /* we need to send a keepalive */
#define CEPH_CON_F_WRITE_PENDING 2 /* we have data ready to send */
#define CEPH_CON_F_SOCK_CLOSED 3 /* socket state changed to closed */
#define CEPH_CON_F_BACKOFF 4 /* need to retry queuing delayed
work */
/* ceph connection fault delay defaults, for exponential backoff */
#define BASE_DELAY_INTERVAL (HZ/2)
#define MAX_DELAY_INTERVAL (5 * 60 * HZ)
#define BASE_DELAY_INTERVAL (HZ / 4)
#define MAX_DELAY_INTERVAL (15 * HZ)
struct ceph_connection_v1_info {
struct kvec out_kvec[8], /* sending header/footer data */
*out_kvec_cur;
int out_kvec_left; /* kvec's left in out_kvec */
int out_skip; /* skip this many bytes */
int out_kvec_bytes; /* total bytes left */
bool out_more; /* there is more data after the kvecs */
bool out_msg_done;
struct ceph_auth_handshake *auth;
int auth_retry; /* true if we need a newer authorizer */
/* connection negotiation temps */
u8 in_banner[CEPH_BANNER_MAX_LEN];
struct ceph_entity_addr actual_peer_addr;
struct ceph_entity_addr peer_addr_for_me;
struct ceph_msg_connect out_connect;
struct ceph_msg_connect_reply in_reply;
int in_base_pos; /* bytes read */
/* message in temps */
u8 in_tag; /* protocol control byte */
struct ceph_msg_header in_hdr;
__le64 in_temp_ack; /* for reading an ack */
/* message out temps */
struct ceph_msg_header out_hdr;
__le64 out_temp_ack; /* for writing an ack */
struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
stamp */
u32 connect_seq; /* identify the most recent connection
attempt for this session */
u32 peer_global_seq; /* peer's global seq for this connection */
};
#define CEPH_CRC_LEN 4
#define CEPH_GCM_KEY_LEN 16
#define CEPH_GCM_IV_LEN sizeof(struct ceph_gcm_nonce)
#define CEPH_GCM_BLOCK_LEN 16
#define CEPH_GCM_TAG_LEN 16
#define CEPH_PREAMBLE_LEN 32
#define CEPH_PREAMBLE_INLINE_LEN 48
#define CEPH_PREAMBLE_PLAIN_LEN CEPH_PREAMBLE_LEN
#define CEPH_PREAMBLE_SECURE_LEN (CEPH_PREAMBLE_LEN + \
CEPH_PREAMBLE_INLINE_LEN + \
CEPH_GCM_TAG_LEN)
#define CEPH_EPILOGUE_PLAIN_LEN (1 + 3 * CEPH_CRC_LEN)
#define CEPH_EPILOGUE_SECURE_LEN (CEPH_GCM_BLOCK_LEN + CEPH_GCM_TAG_LEN)
#define CEPH_FRAME_MAX_SEGMENT_COUNT 4
struct ceph_frame_desc {
int fd_tag; /* FRAME_TAG_* */
int fd_seg_cnt;
int fd_lens[CEPH_FRAME_MAX_SEGMENT_COUNT]; /* logical */
int fd_aligns[CEPH_FRAME_MAX_SEGMENT_COUNT];
};
struct ceph_gcm_nonce {
__le32 fixed;
__le64 counter __packed;
};
struct ceph_connection_v2_info {
struct iov_iter in_iter;
struct kvec in_kvecs[5]; /* recvmsg */
struct bio_vec in_bvec; /* recvmsg (in_cursor) */
int in_kvec_cnt;
int in_state; /* IN_S_* */
struct iov_iter out_iter;
struct kvec out_kvecs[8]; /* sendmsg */
struct bio_vec out_bvec; /* sendpage (out_cursor, out_zero),
sendmsg (out_enc_pages) */
int out_kvec_cnt;
int out_state; /* OUT_S_* */
int out_zero; /* # of zero bytes to send */
bool out_iter_sendpage; /* use sendpage if possible */
struct ceph_frame_desc in_desc;
struct ceph_msg_data_cursor in_cursor;
struct ceph_msg_data_cursor out_cursor;
struct crypto_shash *hmac_tfm; /* post-auth signature */
struct crypto_aead *gcm_tfm; /* on-wire encryption */
struct aead_request *gcm_req;
struct crypto_wait gcm_wait;
struct ceph_gcm_nonce in_gcm_nonce;
struct ceph_gcm_nonce out_gcm_nonce;
struct page **out_enc_pages;
int out_enc_page_cnt;
int out_enc_resid;
int out_enc_i;
int con_mode; /* CEPH_CON_MODE_* */
void *conn_bufs[16];
int conn_buf_cnt;
struct kvec in_sign_kvecs[8];
struct kvec out_sign_kvecs[8];
int in_sign_kvec_cnt;
int out_sign_kvec_cnt;
u64 client_cookie;
u64 server_cookie;
u64 global_seq;
u64 connect_seq;
u64 peer_global_seq;
u8 in_buf[CEPH_PREAMBLE_SECURE_LEN];
u8 out_buf[CEPH_PREAMBLE_SECURE_LEN];
struct {
u8 late_status; /* FRAME_LATE_STATUS_* */
union {
struct {
u32 front_crc;
u32 middle_crc;
u32 data_crc;
} __packed;
u8 pad[CEPH_GCM_BLOCK_LEN - 1];
};
} out_epil;
};
/*
* A single connection with another host.
@ -258,24 +433,16 @@ struct ceph_connection {
struct ceph_messenger *msgr;
int state; /* CEPH_CON_S_* */
atomic_t sock_state;
struct socket *sock;
struct ceph_entity_addr peer_addr; /* peer address */
struct ceph_entity_addr peer_addr_for_me;
unsigned long flags;
unsigned long state;
unsigned long flags; /* CEPH_CON_F_* */
const char *error_msg; /* error message, if any */
struct ceph_entity_name peer_name; /* peer name */
struct ceph_entity_addr peer_addr; /* peer address */
u64 peer_features;
u32 connect_seq; /* identify the most recent connection
attempt for this connection, client */
u32 peer_global_seq; /* peer's global seq for this connection */
struct ceph_auth_handshake *auth;
int auth_retry; /* true if we need a newer authorizer */
struct mutex mutex;
@ -286,43 +453,80 @@ struct ceph_connection {
u64 in_seq, in_seq_acked; /* last message received, acked */
/* connection negotiation temps */
char in_banner[CEPH_BANNER_MAX_LEN];
struct ceph_msg_connect out_connect;
struct ceph_msg_connect_reply in_reply;
struct ceph_entity_addr actual_peer_addr;
/* message out temps */
struct ceph_msg_header out_hdr;
struct ceph_msg *in_msg;
struct ceph_msg *out_msg; /* sending message (== tail of
out_sent) */
bool out_msg_done;
struct kvec out_kvec[8], /* sending header/footer data */
*out_kvec_cur;
int out_kvec_left; /* kvec's left in out_kvec */
int out_skip; /* skip this many bytes */
int out_kvec_bytes; /* total bytes left */
int out_more; /* there is more data after the kvecs */
__le64 out_temp_ack; /* for writing an ack */
struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
stamp */
/* message in temps */
struct ceph_msg_header in_hdr;
struct ceph_msg *in_msg;
u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */
char in_tag; /* protocol control byte */
int in_base_pos; /* bytes read */
__le64 in_temp_ack; /* for reading an ack */
struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */
struct delayed_work work; /* send|recv work */
unsigned long delay; /* current delay interval */
union {
struct ceph_connection_v1_info v1;
struct ceph_connection_v2_info v2;
};
};
extern struct page *ceph_zero_page;
void ceph_con_flag_clear(struct ceph_connection *con, unsigned long con_flag);
void ceph_con_flag_set(struct ceph_connection *con, unsigned long con_flag);
bool ceph_con_flag_test(struct ceph_connection *con, unsigned long con_flag);
bool ceph_con_flag_test_and_clear(struct ceph_connection *con,
unsigned long con_flag);
bool ceph_con_flag_test_and_set(struct ceph_connection *con,
unsigned long con_flag);
void ceph_encode_my_addr(struct ceph_messenger *msgr);
int ceph_tcp_connect(struct ceph_connection *con);
int ceph_con_close_socket(struct ceph_connection *con);
void ceph_con_reset_session(struct ceph_connection *con);
u32 ceph_get_global_seq(struct ceph_messenger *msgr, u32 gt);
void ceph_con_discard_sent(struct ceph_connection *con, u64 ack_seq);
void ceph_con_discard_requeued(struct ceph_connection *con, u64 reconnect_seq);
void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor,
struct ceph_msg *msg, size_t length);
struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
size_t *page_offset, size_t *length,
bool *last_piece);
void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes);
u32 ceph_crc32c_page(u32 crc, struct page *page, unsigned int page_offset,
unsigned int length);
bool ceph_addr_is_blank(const struct ceph_entity_addr *addr);
int ceph_addr_port(const struct ceph_entity_addr *addr);
void ceph_addr_set_port(struct ceph_entity_addr *addr, int p);
void ceph_con_process_message(struct ceph_connection *con);
int ceph_con_in_msg_alloc(struct ceph_connection *con,
struct ceph_msg_header *hdr, int *skip);
void ceph_con_get_out_msg(struct ceph_connection *con);
/* messenger_v1.c */
int ceph_con_v1_try_read(struct ceph_connection *con);
int ceph_con_v1_try_write(struct ceph_connection *con);
void ceph_con_v1_revoke(struct ceph_connection *con);
void ceph_con_v1_revoke_incoming(struct ceph_connection *con);
bool ceph_con_v1_opened(struct ceph_connection *con);
void ceph_con_v1_reset_session(struct ceph_connection *con);
void ceph_con_v1_reset_protocol(struct ceph_connection *con);
/* messenger_v2.c */
int ceph_con_v2_try_read(struct ceph_connection *con);
int ceph_con_v2_try_write(struct ceph_connection *con);
void ceph_con_v2_revoke(struct ceph_connection *con);
void ceph_con_v2_revoke_incoming(struct ceph_connection *con);
bool ceph_con_v2_opened(struct ceph_connection *con);
void ceph_con_v2_reset_session(struct ceph_connection *con);
void ceph_con_v2_reset_protocol(struct ceph_connection *con);
extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr);
@ -330,7 +534,6 @@ extern int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr,
int max_count, int *count);
extern int ceph_msgr_init(void);
extern void ceph_msgr_exit(void);
extern void ceph_msgr_flush(void);

View File

@ -8,24 +8,45 @@
#define CEPH_MON_PORT 6789 /* default monitor port */
/*
* client-side processes will try to bind to ports in this
* range, simply for the benefit of tools like nmap or wireshark
* that would like to identify the protocol.
*/
#define CEPH_PORT_FIRST 6789
#define CEPH_PORT_START 6800 /* non-monitors start here */
#define CEPH_PORT_LAST 6900
/*
* tcp connection banner. include a protocol version. and adjust
* whenever the wire protocol changes. try to keep this string length
* constant.
*/
#define CEPH_BANNER "ceph v027"
#define CEPH_BANNER_LEN 9
#define CEPH_BANNER_MAX_LEN 30
/*
* messenger V2 connection banner prefix.
* The full banner string should have the form: "ceph v2\n<le16>"
* the 2 bytes are the length of the remaining banner.
*/
#define CEPH_BANNER_V2 "ceph v2\n"
#define CEPH_BANNER_V2_LEN 8
#define CEPH_BANNER_V2_PREFIX_LEN (CEPH_BANNER_V2_LEN + sizeof(__le16))
/*
* messenger V2 features
*/
#define CEPH_MSGR2_INCARNATION_1 (0ull)
#define DEFINE_MSGR2_FEATURE(bit, incarnation, name) \
static const uint64_t CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \
static const uint64_t CEPH_MSGR2_FEATUREMASK_##name = \
(1ULL << bit | CEPH_MSGR2_INCARNATION_##incarnation);
#define HAVE_MSGR2_FEATURE(x, name) \
(((x) & (CEPH_MSGR2_FEATUREMASK_##name)) == (CEPH_MSGR2_FEATUREMASK_##name))
DEFINE_MSGR2_FEATURE( 0, 1, REVISION_1) // msgr2.1
#define CEPH_MSGR2_SUPPORTED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1)
#define CEPH_MSGR2_REQUIRED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1)
/*
* Rollover-safe type and comparator for 32-bit sequence numbers.
* Comparator returns -1, 0, or 1.
@ -61,11 +82,18 @@ extern const char *ceph_entity_type_name(int type);
* entity_addr -- network address
*/
struct ceph_entity_addr {
__le32 type;
__le32 type; /* CEPH_ENTITY_ADDR_TYPE_* */
__le32 nonce; /* unique id for process (e.g. pid) */
struct sockaddr_storage in_addr;
} __attribute__ ((packed));
static inline bool ceph_addr_equal_no_type(const struct ceph_entity_addr *lhs,
const struct ceph_entity_addr *rhs)
{
return !memcmp(&lhs->in_addr, &rhs->in_addr, sizeof(lhs->in_addr)) &&
lhs->nonce == rhs->nonce;
}
struct ceph_entity_inst {
struct ceph_entity_name name;
struct ceph_entity_addr addr;
@ -160,6 +188,24 @@ struct ceph_msg_header {
__le32 crc; /* header crc32c */
} __attribute__ ((packed));
struct ceph_msg_header2 {
__le64 seq; /* message seq# for this session */
__le64 tid; /* transaction id */
__le16 type; /* message type */
__le16 priority; /* priority. higher value == higher priority */
__le16 version; /* version of message encoding */
__le32 data_pre_padding_len;
__le16 data_off; /* sender: include full offset;
receiver: mask against ~PAGE_MASK */
__le64 ack_seq;
__u8 flags;
/* oldest code we think can decode this. unknown if zero. */
__le16 compat_version;
__le16 reserved;
} __attribute__ ((packed));
#define CEPH_MSG_PRIO_LOW 64
#define CEPH_MSG_PRIO_DEFAULT 127
#define CEPH_MSG_PRIO_HIGH 196

View File

@ -251,8 +251,8 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
}
struct ceph_osdmap *ceph_osdmap_alloc(void);
extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end);
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2);
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
struct ceph_osdmap *map);
extern void ceph_osdmap_destroy(struct ceph_osdmap *map);

View File

@ -5,6 +5,9 @@ config CEPH_LIB
select LIBCRC32C
select CRYPTO_AES
select CRYPTO_CBC
select CRYPTO_GCM
select CRYPTO_HMAC
select CRYPTO_SHA256
select CRYPTO
select KEYS
default n

View File

@ -14,4 +14,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
crypto.o armor.o \
auth_x.o \
ceph_strings.o ceph_hash.o \
pagevec.o snapshot.o string_table.o
pagevec.o snapshot.o string_table.o \
messenger_v1.o messenger_v2.o

View File

@ -21,28 +21,31 @@ static u32 supported_protocols[] = {
CEPH_AUTH_CEPHX
};
static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol)
static int init_protocol(struct ceph_auth_client *ac, int proto)
{
switch (protocol) {
dout("%s proto %d\n", __func__, proto);
switch (proto) {
case CEPH_AUTH_NONE:
return ceph_auth_none_init(ac);
case CEPH_AUTH_CEPHX:
return ceph_x_init(ac);
default:
return -ENOENT;
pr_err("bad auth protocol %d\n", proto);
return -EINVAL;
}
}
/*
* setup, teardown.
*/
struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_crypto_key *key)
struct ceph_auth_client *ceph_auth_init(const char *name,
const struct ceph_crypto_key *key,
const int *con_modes)
{
struct ceph_auth_client *ac;
int ret;
dout("auth_init name '%s'\n", name);
ret = -ENOMEM;
ac = kzalloc(sizeof(*ac), GFP_NOFS);
if (!ac)
@ -54,8 +57,12 @@ struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_cryp
ac->name = name;
else
ac->name = CEPH_AUTH_NAME_DEFAULT;
dout("auth_init name %s\n", ac->name);
ac->key = key;
ac->preferred_mode = con_modes[0];
ac->fallback_mode = con_modes[1];
dout("%s name '%s' preferred_mode %d fallback_mode %d\n", __func__,
ac->name, ac->preferred_mode, ac->fallback_mode);
return ac;
out:
@ -145,31 +152,35 @@ bad:
goto out;
}
static int ceph_build_auth_request(struct ceph_auth_client *ac,
void *msg_buf, size_t msg_len)
static int build_request(struct ceph_auth_client *ac, bool add_header,
void *buf, int buf_len)
{
struct ceph_mon_request_header *monhdr = msg_buf;
void *p = monhdr + 1;
void *end = msg_buf + msg_len;
void *end = buf + buf_len;
void *p;
int ret;
monhdr->have_version = 0;
monhdr->session_mon = cpu_to_le16(-1);
monhdr->session_mon_tid = 0;
ceph_encode_32(&p, ac->protocol);
p = buf;
if (add_header) {
/* struct ceph_mon_request_header + protocol */
ceph_encode_64_safe(&p, end, 0, e_range);
ceph_encode_16_safe(&p, end, -1, e_range);
ceph_encode_64_safe(&p, end, 0, e_range);
ceph_encode_32_safe(&p, end, ac->protocol, e_range);
}
ceph_encode_need(&p, end, sizeof(u32), e_range);
ret = ac->ops->build_request(ac, p + sizeof(u32), end);
if (ret < 0) {
pr_err("error %d building auth method %s request\n", ret,
ac->ops->name);
goto out;
pr_err("auth protocol '%s' building request failed: %d\n",
ceph_auth_proto_name(ac->protocol), ret);
return ret;
}
dout(" built request %d bytes\n", ret);
ceph_encode_32(&p, ret);
ret = p + ret - msg_buf;
out:
return ret;
return p + ret - buf;
e_range:
return -ERANGE;
}
/*
@ -229,10 +240,10 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
ac->ops = NULL;
}
if (ac->protocol != protocol) {
ret = ceph_auth_init_protocol(ac, protocol);
ret = init_protocol(ac, protocol);
if (ret) {
pr_err("error %d on auth protocol %d init\n",
ret, protocol);
pr_err("auth protocol '%s' init failed: %d\n",
ceph_auth_proto_name(protocol), ret);
goto out;
}
}
@ -240,12 +251,13 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
ac->negotiating = false;
}
ret = ac->ops->handle_reply(ac, result, payload, payload_end);
if (ret == -EAGAIN) {
ret = ceph_build_auth_request(ac, reply_buf, reply_len);
} else if (ret) {
pr_err("auth method '%s' error %d\n", ac->ops->name, ret);
}
ret = ac->ops->handle_reply(ac, result, payload, payload_end,
NULL, NULL, NULL, NULL);
if (ret == -EAGAIN)
ret = build_request(ac, true, reply_buf, reply_len);
else if (ret)
pr_err("auth protocol '%s' mauth authentication failed: %d\n",
ceph_auth_proto_name(ac->protocol), result);
out:
mutex_unlock(&ac->mutex);
@ -264,7 +276,7 @@ int ceph_build_auth(struct ceph_auth_client *ac,
mutex_lock(&ac->mutex);
if (ac->ops->should_authenticate(ac))
ret = ceph_build_auth_request(ac, msg_buf, msg_len);
ret = build_request(ac, true, msg_buf, msg_len);
mutex_unlock(&ac->mutex);
return ret;
}
@ -281,19 +293,38 @@ int ceph_auth_is_authenticated(struct ceph_auth_client *ac)
}
EXPORT_SYMBOL(ceph_auth_is_authenticated);
int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *auth)
int __ceph_auth_get_authorizer(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
int peer_type, bool force_new,
int *proto, int *pref_mode, int *fallb_mode)
{
int ret = 0;
int ret;
mutex_lock(&ac->mutex);
if (ac->ops && ac->ops->create_authorizer)
if (force_new && auth->authorizer) {
ceph_auth_destroy_authorizer(auth->authorizer);
auth->authorizer = NULL;
}
if (!auth->authorizer)
ret = ac->ops->create_authorizer(ac, peer_type, auth);
else if (ac->ops->update_authorizer)
ret = ac->ops->update_authorizer(ac, peer_type, auth);
else
ret = 0;
if (ret)
goto out;
*proto = ac->protocol;
if (pref_mode && fallb_mode) {
*pref_mode = ac->preferred_mode;
*fallb_mode = ac->fallback_mode;
}
out:
mutex_unlock(&ac->mutex);
return ret;
}
EXPORT_SYMBOL(ceph_auth_create_authorizer);
EXPORT_SYMBOL(__ceph_auth_get_authorizer);
void ceph_auth_destroy_authorizer(struct ceph_authorizer *a)
{
@ -301,20 +332,6 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a)
}
EXPORT_SYMBOL(ceph_auth_destroy_authorizer);
int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *a)
{
int ret = 0;
mutex_lock(&ac->mutex);
if (ac->ops && ac->ops->update_authorizer)
ret = ac->ops->update_authorizer(ac, peer_type, a);
mutex_unlock(&ac->mutex);
return ret;
}
EXPORT_SYMBOL(ceph_auth_update_authorizer);
int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *challenge_buf,
@ -332,13 +349,18 @@ int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge);
int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a)
struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
int ret = 0;
mutex_lock(&ac->mutex);
if (ac->ops && ac->ops->verify_authorizer_reply)
ret = ac->ops->verify_authorizer_reply(ac, a);
ret = ac->ops->verify_authorizer_reply(ac, a,
reply, reply_len, session_key, session_key_len,
con_secret, con_secret_len);
mutex_unlock(&ac->mutex);
return ret;
}
@ -352,3 +374,279 @@ void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type)
mutex_unlock(&ac->mutex);
}
EXPORT_SYMBOL(ceph_auth_invalidate_authorizer);
/*
* msgr2 authentication
*/
static bool contains(const int *arr, int cnt, int val)
{
int i;
for (i = 0; i < cnt; i++) {
if (arr[i] == val)
return true;
}
return false;
}
static int encode_con_modes(void **p, void *end, int pref_mode, int fallb_mode)
{
WARN_ON(pref_mode == CEPH_CON_MODE_UNKNOWN);
if (fallb_mode != CEPH_CON_MODE_UNKNOWN) {
ceph_encode_32_safe(p, end, 2, e_range);
ceph_encode_32_safe(p, end, pref_mode, e_range);
ceph_encode_32_safe(p, end, fallb_mode, e_range);
} else {
ceph_encode_32_safe(p, end, 1, e_range);
ceph_encode_32_safe(p, end, pref_mode, e_range);
}
return 0;
e_range:
return -ERANGE;
}
/*
* Similar to ceph_auth_build_hello().
*/
int ceph_auth_get_request(struct ceph_auth_client *ac, void *buf, int buf_len)
{
int proto = ac->key ? CEPH_AUTH_CEPHX : CEPH_AUTH_NONE;
void *end = buf + buf_len;
void *lenp;
void *p;
int ret;
mutex_lock(&ac->mutex);
if (ac->protocol == CEPH_AUTH_UNKNOWN) {
ret = init_protocol(ac, proto);
if (ret) {
pr_err("auth protocol '%s' init failed: %d\n",
ceph_auth_proto_name(proto), ret);
goto out;
}
} else {
WARN_ON(ac->protocol != proto);
ac->ops->reset(ac);
}
p = buf;
ceph_encode_32_safe(&p, end, ac->protocol, e_range);
ret = encode_con_modes(&p, end, ac->preferred_mode, ac->fallback_mode);
if (ret)
goto out;
lenp = p;
p += 4; /* space for len */
ceph_encode_8_safe(&p, end, CEPH_AUTH_MODE_MON, e_range);
ret = ceph_auth_entity_name_encode(ac->name, &p, end);
if (ret)
goto out;
ceph_encode_64_safe(&p, end, ac->global_id, e_range);
ceph_encode_32(&lenp, p - lenp - 4);
ret = p - buf;
out:
mutex_unlock(&ac->mutex);
return ret;
e_range:
ret = -ERANGE;
goto out;
}
int ceph_auth_handle_reply_more(struct ceph_auth_client *ac, void *reply,
int reply_len, void *buf, int buf_len)
{
int ret;
mutex_lock(&ac->mutex);
ret = ac->ops->handle_reply(ac, 0, reply, reply + reply_len,
NULL, NULL, NULL, NULL);
if (ret == -EAGAIN)
ret = build_request(ac, false, buf, buf_len);
else
WARN_ON(ret >= 0);
mutex_unlock(&ac->mutex);
return ret;
}
int ceph_auth_handle_reply_done(struct ceph_auth_client *ac,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
int ret;
mutex_lock(&ac->mutex);
if (global_id && ac->global_id != global_id) {
dout("%s global_id %llu -> %llu\n", __func__, ac->global_id,
global_id);
ac->global_id = global_id;
}
ret = ac->ops->handle_reply(ac, 0, reply, reply + reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
mutex_unlock(&ac->mutex);
return ret;
}
bool ceph_auth_handle_bad_method(struct ceph_auth_client *ac,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
mutex_lock(&ac->mutex);
WARN_ON(used_proto != ac->protocol);
if (result == -EOPNOTSUPP) {
if (!contains(allowed_protos, proto_cnt, ac->protocol)) {
pr_err("auth protocol '%s' not allowed\n",
ceph_auth_proto_name(ac->protocol));
goto not_allowed;
}
if (!contains(allowed_modes, mode_cnt, ac->preferred_mode) &&
(ac->fallback_mode == CEPH_CON_MODE_UNKNOWN ||
!contains(allowed_modes, mode_cnt, ac->fallback_mode))) {
pr_err("preferred mode '%s' not allowed\n",
ceph_con_mode_name(ac->preferred_mode));
if (ac->fallback_mode == CEPH_CON_MODE_UNKNOWN)
pr_err("no fallback mode\n");
else
pr_err("fallback mode '%s' not allowed\n",
ceph_con_mode_name(ac->fallback_mode));
goto not_allowed;
}
}
WARN_ON(result == -EOPNOTSUPP || result >= 0);
pr_err("auth protocol '%s' msgr authentication failed: %d\n",
ceph_auth_proto_name(ac->protocol), result);
mutex_unlock(&ac->mutex);
return true;
not_allowed:
mutex_unlock(&ac->mutex);
return false;
}
int ceph_auth_get_authorizer(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
int peer_type, void *buf, int *buf_len)
{
void *end = buf + *buf_len;
int pref_mode, fallb_mode;
int proto;
void *p;
int ret;
ret = __ceph_auth_get_authorizer(ac, auth, peer_type, true, &proto,
&pref_mode, &fallb_mode);
if (ret)
return ret;
p = buf;
ceph_encode_32_safe(&p, end, proto, e_range);
ret = encode_con_modes(&p, end, pref_mode, fallb_mode);
if (ret)
return ret;
ceph_encode_32_safe(&p, end, auth->authorizer_buf_len, e_range);
*buf_len = p - buf;
return 0;
e_range:
return -ERANGE;
}
EXPORT_SYMBOL(ceph_auth_get_authorizer);
int ceph_auth_handle_svc_reply_more(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
void *buf, int *buf_len)
{
void *end = buf + *buf_len;
void *p;
int ret;
ret = ceph_auth_add_authorizer_challenge(ac, auth->authorizer,
reply, reply_len);
if (ret)
return ret;
p = buf;
ceph_encode_32_safe(&p, end, auth->authorizer_buf_len, e_range);
*buf_len = p - buf;
return 0;
e_range:
return -ERANGE;
}
EXPORT_SYMBOL(ceph_auth_handle_svc_reply_more);
int ceph_auth_handle_svc_reply_done(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
return ceph_auth_verify_authorizer_reply(ac, auth->authorizer,
reply, reply_len, session_key, session_key_len,
con_secret, con_secret_len);
}
EXPORT_SYMBOL(ceph_auth_handle_svc_reply_done);
bool ceph_auth_handle_bad_authorizer(struct ceph_auth_client *ac,
int peer_type, int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
mutex_lock(&ac->mutex);
WARN_ON(used_proto != ac->protocol);
if (result == -EOPNOTSUPP) {
if (!contains(allowed_protos, proto_cnt, ac->protocol)) {
pr_err("auth protocol '%s' not allowed by %s\n",
ceph_auth_proto_name(ac->protocol),
ceph_entity_type_name(peer_type));
goto not_allowed;
}
if (!contains(allowed_modes, mode_cnt, ac->preferred_mode) &&
(ac->fallback_mode == CEPH_CON_MODE_UNKNOWN ||
!contains(allowed_modes, mode_cnt, ac->fallback_mode))) {
pr_err("preferred mode '%s' not allowed by %s\n",
ceph_con_mode_name(ac->preferred_mode),
ceph_entity_type_name(peer_type));
if (ac->fallback_mode == CEPH_CON_MODE_UNKNOWN)
pr_err("no fallback mode\n");
else
pr_err("fallback mode '%s' not allowed by %s\n",
ceph_con_mode_name(ac->fallback_mode),
ceph_entity_type_name(peer_type));
goto not_allowed;
}
}
WARN_ON(result == -EOPNOTSUPP || result >= 0);
pr_err("auth protocol '%s' authorization to %s failed: %d\n",
ceph_auth_proto_name(ac->protocol),
ceph_entity_type_name(peer_type), result);
if (ac->ops->invalidate_authorizer)
ac->ops->invalidate_authorizer(ac, peer_type);
mutex_unlock(&ac->mutex);
return true;
not_allowed:
mutex_unlock(&ac->mutex);
return false;
}
EXPORT_SYMBOL(ceph_auth_handle_bad_authorizer);

View File

@ -70,7 +70,9 @@ static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
* authenticate state, so nothing happens here.
*/
static int handle_reply(struct ceph_auth_client *ac, int result,
void *buf, void *end)
void *buf, void *end, u8 *session_key,
int *session_key_len, u8 *con_secret,
int *con_secret_len)
{
struct ceph_auth_none_info *xi = ac->private;
@ -116,7 +118,6 @@ static int ceph_auth_none_create_authorizer(
}
static const struct ceph_auth_client_ops ceph_auth_none_ops = {
.name = "none",
.reset = reset,
.destroy = destroy,
.is_authenticated = is_authenticated,

View File

@ -22,12 +22,15 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
{
struct ceph_x_info *xi = ac->private;
int need;
int missing;
int need; /* missing + need renewal */
ceph_x_validate_tickets(ac, &need);
dout("ceph_x_is_authenticated want=%d need=%d have=%d\n",
ac->want_keys, need, xi->have_keys);
return (ac->want_keys & xi->have_keys) == ac->want_keys;
missing = ac->want_keys & ~xi->have_keys;
WARN_ON((need & missing) != missing);
dout("%s want 0x%x have 0x%x missing 0x%x -> %d\n", __func__,
ac->want_keys, xi->have_keys, missing, !missing);
return !missing;
}
static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
@ -36,9 +39,9 @@ static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
int need;
ceph_x_validate_tickets(ac, &need);
dout("ceph_x_should_authenticate want=%d need=%d have=%d\n",
ac->want_keys, need, xi->have_keys);
return need != 0;
dout("%s want 0x%x have 0x%x need 0x%x -> %d\n", __func__,
ac->want_keys, xi->have_keys, need, !!need);
return !!need;
}
static int ceph_x_encrypt_offset(void)
@ -197,7 +200,7 @@ static int process_one_ticket(struct ceph_auth_client *ac,
dout(" decrypted %d bytes\n", ret);
dend = dp + ret;
tkt_struct_v = ceph_decode_8(&dp);
ceph_decode_8_safe(&dp, dend, tkt_struct_v, bad);
if (tkt_struct_v != 1)
goto bad;
@ -205,6 +208,7 @@ static int process_one_ticket(struct ceph_auth_client *ac,
if (ret)
goto out;
ceph_decode_need(&dp, dend, sizeof(struct ceph_timespec), bad);
ceph_decode_timespec64(&validity, dp);
dp += sizeof(struct ceph_timespec);
new_expires = ktime_get_real_seconds() + validity.tv_sec;
@ -265,22 +269,21 @@ out:
static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
struct ceph_crypto_key *secret,
void *buf, void *end)
void **p, void *end)
{
void *p = buf;
u8 reply_struct_v;
u32 num;
int ret;
ceph_decode_8_safe(&p, end, reply_struct_v, bad);
ceph_decode_8_safe(p, end, reply_struct_v, bad);
if (reply_struct_v != 1)
return -EINVAL;
ceph_decode_32_safe(&p, end, num, bad);
ceph_decode_32_safe(p, end, num, bad);
dout("%d tickets\n", num);
while (num--) {
ret = process_one_ticket(ac, secret, &p, end);
ret = process_one_ticket(ac, secret, p, end);
if (ret)
return ret;
}
@ -379,6 +382,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
}
}
au->service = th->service;
WARN_ON(!th->secret_id);
au->secret_id = th->secret_id;
msg_a = au->buf->vec.iov_base;
@ -442,9 +446,10 @@ static bool need_key(struct ceph_x_ticket_handler *th)
static bool have_key(struct ceph_x_ticket_handler *th)
{
if (th->have_key) {
if (ktime_get_real_seconds() >= th->expires)
th->have_key = false;
if (th->have_key && ktime_get_real_seconds() >= th->expires) {
dout("ticket %d (%s) secret_id %llu expired\n", th->service,
ceph_entity_type_name(th->service), th->secret_id);
th->have_key = false;
}
return th->have_key;
@ -486,6 +491,7 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
struct ceph_x_info *xi = ac->private;
int need;
struct ceph_x_request_header *head = buf;
void *p;
int ret;
struct ceph_x_ticket_handler *th =
get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
@ -494,18 +500,17 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
return PTR_ERR(th);
ceph_x_validate_tickets(ac, &need);
dout("build_request want %x have %x need %x\n",
ac->want_keys, xi->have_keys, need);
dout("%s want 0x%x have 0x%x need 0x%x\n", __func__, ac->want_keys,
xi->have_keys, need);
if (need & CEPH_ENTITY_TYPE_AUTH) {
struct ceph_x_authenticate *auth = (void *)(head + 1);
void *p = auth + 1;
void *enc_buf = xi->auth_authorizer.enc_buf;
struct ceph_x_challenge_blob *blob = enc_buf +
ceph_x_encrypt_offset();
u64 *u;
p = auth + 1;
if (p > end)
return -ERANGE;
@ -521,7 +526,7 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
if (ret < 0)
return ret;
auth->struct_v = 1;
auth->struct_v = 2; /* nautilus+ */
auth->key = 0;
for (u = (u64 *)enc_buf; u + 1 <= (u64 *)(enc_buf + ret); u++)
auth->key ^= *(__le64 *)u;
@ -534,39 +539,117 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
if (ret < 0)
return ret;
/* nautilus+: request service tickets at the same time */
need = ac->want_keys & ~CEPH_ENTITY_TYPE_AUTH;
WARN_ON(!need);
ceph_encode_32_safe(&p, end, need, e_range);
return p - buf;
}
if (need) {
void *p = head + 1;
struct ceph_x_service_ticket_request *req;
if (p > end)
return -ERANGE;
head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY);
dout(" get_principal_session_key\n");
ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer);
if (ret)
return ret;
ceph_encode_copy(&p, xi->auth_authorizer.buf->vec.iov_base,
xi->auth_authorizer.buf->vec.iov_len);
req = p;
req->keys = cpu_to_le32(need);
p += sizeof(*req);
p = buf;
ceph_encode_16_safe(&p, end, CEPHX_GET_PRINCIPAL_SESSION_KEY,
e_range);
ceph_encode_copy_safe(&p, end,
xi->auth_authorizer.buf->vec.iov_base,
xi->auth_authorizer.buf->vec.iov_len, e_range);
ceph_encode_8_safe(&p, end, 1, e_range);
ceph_encode_32_safe(&p, end, need, e_range);
return p - buf;
}
return 0;
e_range:
return -ERANGE;
}
static int handle_auth_session_key(struct ceph_auth_client *ac,
void **p, void *end,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_x_info *xi = ac->private;
struct ceph_x_ticket_handler *th;
void *dp, *dend;
int len;
int ret;
/* AUTH ticket */
ret = ceph_x_proc_ticket_reply(ac, &xi->secret, p, end);
if (ret)
return ret;
if (*p == end) {
/* pre-nautilus (or didn't request service tickets!) */
WARN_ON(session_key || con_secret);
return 0;
}
th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
if (IS_ERR(th))
return PTR_ERR(th);
if (session_key) {
memcpy(session_key, th->session_key.key, th->session_key.len);
*session_key_len = th->session_key.len;
}
/* connection secret */
ceph_decode_32_safe(p, end, len, e_inval);
dout("%s connection secret blob len %d\n", __func__, len);
if (len > 0) {
dp = *p + ceph_x_encrypt_offset();
ret = ceph_x_decrypt(&th->session_key, p, *p + len);
if (ret < 0)
return ret;
dout("%s decrypted %d bytes\n", __func__, ret);
dend = dp + ret;
ceph_decode_32_safe(&dp, dend, len, e_inval);
if (len > CEPH_MAX_CON_SECRET_LEN) {
pr_err("connection secret too big %d\n", len);
return -EINVAL;
}
dout("%s connection secret len %d\n", __func__, len);
if (con_secret) {
memcpy(con_secret, dp, len);
*con_secret_len = len;
}
}
/* service tickets */
ceph_decode_32_safe(p, end, len, e_inval);
dout("%s service tickets blob len %d\n", __func__, len);
if (len > 0) {
ret = ceph_x_proc_ticket_reply(ac, &th->session_key,
p, *p + len);
if (ret)
return ret;
}
return 0;
e_inval:
return -EINVAL;
}
static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
void *buf, void *end)
void *buf, void *end,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_x_info *xi = ac->private;
struct ceph_x_reply_header *head = buf;
struct ceph_x_ticket_handler *th;
int len = end - buf;
void *p;
int op;
int ret;
@ -587,22 +670,25 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
return -EAGAIN;
}
op = le16_to_cpu(head->op);
result = le32_to_cpu(head->result);
p = buf;
ceph_decode_16_safe(&p, end, op, e_inval);
ceph_decode_32_safe(&p, end, result, e_inval);
dout("handle_reply op %d result %d\n", op, result);
switch (op) {
case CEPHX_GET_AUTH_SESSION_KEY:
/* verify auth key */
ret = ceph_x_proc_ticket_reply(ac, &xi->secret,
buf + sizeof(*head), end);
/* AUTH ticket + [connection secret] + service tickets */
ret = handle_auth_session_key(ac, &p, end, session_key,
session_key_len, con_secret,
con_secret_len);
break;
case CEPHX_GET_PRINCIPAL_SESSION_KEY:
th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
if (IS_ERR(th))
return PTR_ERR(th);
ret = ceph_x_proc_ticket_reply(ac, &th->session_key,
buf + sizeof(*head), end);
/* service tickets */
ret = ceph_x_proc_ticket_reply(ac, &th->session_key, &p, end);
break;
default:
@ -613,6 +699,9 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
if (ac->want_keys == xi->have_keys)
return 0;
return -EAGAIN;
e_inval:
return -EINVAL;
}
static void ceph_x_destroy_authorizer(struct ceph_authorizer *a)
@ -678,40 +767,44 @@ static int ceph_x_update_authorizer(
return 0;
}
static int decrypt_authorize_challenge(struct ceph_x_authorizer *au,
void *challenge_buf,
int challenge_buf_len,
u64 *server_challenge)
/*
* CephXAuthorizeChallenge
*/
static int decrypt_authorizer_challenge(struct ceph_crypto_key *secret,
void *challenge, int challenge_len,
u64 *server_challenge)
{
struct ceph_x_authorize_challenge *ch =
challenge_buf + sizeof(struct ceph_x_encrypt_header);
void *dp, *dend;
int ret;
/* no leading len */
ret = __ceph_x_decrypt(&au->session_key, challenge_buf,
challenge_buf_len);
ret = __ceph_x_decrypt(secret, challenge, challenge_len);
if (ret < 0)
return ret;
if (ret < sizeof(*ch)) {
pr_err("bad size %d for ceph_x_authorize_challenge\n", ret);
return -EINVAL;
}
*server_challenge = le64_to_cpu(ch->server_challenge);
dout("%s decrypted %d bytes\n", __func__, ret);
dp = challenge + sizeof(struct ceph_x_encrypt_header);
dend = dp + ret;
ceph_decode_skip_8(&dp, dend, e_inval); /* struct_v */
ceph_decode_64_safe(&dp, dend, *server_challenge, e_inval);
dout("%s server_challenge %llu\n", __func__, *server_challenge);
return 0;
e_inval:
return -EINVAL;
}
static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *challenge_buf,
int challenge_buf_len)
void *challenge, int challenge_len)
{
struct ceph_x_authorizer *au = (void *)a;
u64 server_challenge;
int ret;
ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len,
&server_challenge);
ret = decrypt_authorizer_challenge(&au->session_key, challenge,
challenge_len, &server_challenge);
if (ret) {
pr_err("failed to decrypt authorize challenge: %d", ret);
return ret;
@ -726,29 +819,76 @@ static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac,
return 0;
}
static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a)
/*
* CephXAuthorizeReply
*/
static int decrypt_authorizer_reply(struct ceph_crypto_key *secret,
void **p, void *end, u64 *nonce_plus_one,
u8 *con_secret, int *con_secret_len)
{
struct ceph_x_authorizer *au = (void *)a;
void *p = au->enc_buf;
struct ceph_x_authorize_reply *reply = p + ceph_x_encrypt_offset();
void *dp, *dend;
u8 struct_v;
int len;
int ret;
ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN);
dp = *p + ceph_x_encrypt_offset();
ret = ceph_x_decrypt(secret, p, end);
if (ret < 0)
return ret;
if (ret < sizeof(*reply)) {
pr_err("bad size %d for ceph_x_authorize_reply\n", ret);
return -EINVAL;
dout("%s decrypted %d bytes\n", __func__, ret);
dend = dp + ret;
ceph_decode_8_safe(&dp, dend, struct_v, e_inval);
ceph_decode_64_safe(&dp, dend, *nonce_plus_one, e_inval);
dout("%s nonce_plus_one %llu\n", __func__, *nonce_plus_one);
if (struct_v >= 2) {
ceph_decode_32_safe(&dp, dend, len, e_inval);
if (len > CEPH_MAX_CON_SECRET_LEN) {
pr_err("connection secret too big %d\n", len);
return -EINVAL;
}
dout("%s connection secret len %d\n", __func__, len);
if (con_secret) {
memcpy(con_secret, dp, len);
*con_secret_len = len;
}
}
if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one))
ret = -EPERM;
else
ret = 0;
dout("verify_authorizer_reply nonce %llx got %llx ret %d\n",
au->nonce, le64_to_cpu(reply->nonce_plus_one), ret);
return ret;
return 0;
e_inval:
return -EINVAL;
}
static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_x_authorizer *au = (void *)a;
u64 nonce_plus_one;
int ret;
if (session_key) {
memcpy(session_key, au->session_key.key, au->session_key.len);
*session_key_len = au->session_key.len;
}
ret = decrypt_authorizer_reply(&au->session_key, &reply,
reply + reply_len, &nonce_plus_one,
con_secret, con_secret_len);
if (ret)
return ret;
if (nonce_plus_one != au->nonce + 1) {
pr_err("failed to authenticate server\n");
return -EPERM;
}
return 0;
}
static void ceph_x_reset(struct ceph_auth_client *ac)
@ -785,8 +925,15 @@ static void invalidate_ticket(struct ceph_auth_client *ac, int peer_type)
struct ceph_x_ticket_handler *th;
th = get_ticket_handler(ac, peer_type);
if (!IS_ERR(th))
if (IS_ERR(th))
return;
if (th->have_key) {
dout("ticket %d (%s) secret_id %llu invalidated\n",
th->service, ceph_entity_type_name(th->service),
th->secret_id);
th->have_key = false;
}
}
static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
@ -911,7 +1058,6 @@ static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
}
static const struct ceph_auth_client_ops ceph_x_ops = {
.name = "x",
.is_authenticated = ceph_x_is_authenticated,
.should_authenticate = ceph_x_should_authenticate,
.build_request = ceph_x_build_request,

View File

@ -38,7 +38,8 @@ struct ceph_x_authenticate {
__u8 struct_v;
__le64 client_challenge;
__le64 key;
/* ticket blob */
/* old_ticket blob */
/* nautilus+: other_keys */
} __attribute__ ((packed));
struct ceph_x_service_ticket_request {

View File

@ -265,6 +265,7 @@ enum {
Opt_ip,
Opt_crush_location,
Opt_read_from_replica,
Opt_ms_mode,
/* string args above */
Opt_share,
Opt_crc,
@ -287,6 +288,23 @@ static const struct constant_table ceph_param_read_from_replica[] = {
{}
};
enum ceph_ms_mode {
Opt_ms_mode_legacy,
Opt_ms_mode_crc,
Opt_ms_mode_secure,
Opt_ms_mode_prefer_crc,
Opt_ms_mode_prefer_secure
};
static const struct constant_table ceph_param_ms_mode[] = {
{"legacy", Opt_ms_mode_legacy},
{"crc", Opt_ms_mode_crc},
{"secure", Opt_ms_mode_secure},
{"prefer-crc", Opt_ms_mode_prefer_crc},
{"prefer-secure", Opt_ms_mode_prefer_secure},
{}
};
static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_flag ("abort_on_full", Opt_abort_on_full),
fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures),
@ -305,6 +323,8 @@ static const struct fs_parameter_spec ceph_parameters[] = {
fs_param_deprecated, NULL),
fsparam_enum ("read_from_replica", Opt_read_from_replica,
ceph_param_read_from_replica),
fsparam_enum ("ms_mode", Opt_ms_mode,
ceph_param_ms_mode),
fsparam_string ("secret", Opt_secret),
fsparam_flag_no ("share", Opt_share),
fsparam_flag_no ("tcp_nodelay", Opt_tcp_nodelay),
@ -333,6 +353,8 @@ struct ceph_options *ceph_alloc_options(void)
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
opt->read_from_replica = CEPH_READ_FROM_REPLICA_DEFAULT;
opt->con_modes[0] = CEPH_CON_MODE_UNKNOWN;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
return opt;
}
EXPORT_SYMBOL(ceph_alloc_options);
@ -503,6 +525,32 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
BUG();
}
break;
case Opt_ms_mode:
switch (result.uint_32) {
case Opt_ms_mode_legacy:
opt->con_modes[0] = CEPH_CON_MODE_UNKNOWN;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
break;
case Opt_ms_mode_crc:
opt->con_modes[0] = CEPH_CON_MODE_CRC;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
break;
case Opt_ms_mode_secure:
opt->con_modes[0] = CEPH_CON_MODE_SECURE;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
break;
case Opt_ms_mode_prefer_crc:
opt->con_modes[0] = CEPH_CON_MODE_CRC;
opt->con_modes[1] = CEPH_CON_MODE_SECURE;
break;
case Opt_ms_mode_prefer_secure:
opt->con_modes[0] = CEPH_CON_MODE_SECURE;
opt->con_modes[1] = CEPH_CON_MODE_CRC;
break;
default:
BUG();
}
break;
case Opt_osdtimeout:
warn_plog(&log, "Ignoring osdtimeout");
@ -616,6 +664,21 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
} else if (opt->read_from_replica == CEPH_OSD_FLAG_LOCALIZE_READS) {
seq_puts(m, "read_from_replica=localize,");
}
if (opt->con_modes[0] != CEPH_CON_MODE_UNKNOWN) {
if (opt->con_modes[0] == CEPH_CON_MODE_CRC &&
opt->con_modes[1] == CEPH_CON_MODE_UNKNOWN) {
seq_puts(m, "ms_mode=crc,");
} else if (opt->con_modes[0] == CEPH_CON_MODE_SECURE &&
opt->con_modes[1] == CEPH_CON_MODE_UNKNOWN) {
seq_puts(m, "ms_mode=secure,");
} else if (opt->con_modes[0] == CEPH_CON_MODE_CRC &&
opt->con_modes[1] == CEPH_CON_MODE_SECURE) {
seq_puts(m, "ms_mode=prefer-crc,");
} else if (opt->con_modes[0] == CEPH_CON_MODE_SECURE &&
opt->con_modes[1] == CEPH_CON_MODE_CRC) {
seq_puts(m, "ms_mode=prefer-secure,");
}
}
if (opt->flags & CEPH_OPT_FSID)
seq_printf(m, "fsid=%pU,", &opt->fsid);

View File

@ -18,6 +18,34 @@ const char *ceph_entity_type_name(int type)
}
EXPORT_SYMBOL(ceph_entity_type_name);
const char *ceph_auth_proto_name(int proto)
{
switch (proto) {
case CEPH_AUTH_UNKNOWN:
return "unknown";
case CEPH_AUTH_NONE:
return "none";
case CEPH_AUTH_CEPHX:
return "cephx";
default:
return "???";
}
}
const char *ceph_con_mode_name(int mode)
{
switch (mode) {
case CEPH_CON_MODE_UNKNOWN:
return "unknown";
case CEPH_CON_MODE_CRC:
return "crc";
case CEPH_CON_MODE_SECURE:
return "secure";
default:
return "???";
}
}
const char *ceph_osd_op_name(int op)
{
switch (op) {

View File

@ -5,6 +5,9 @@
#include <linux/ceph/types.h>
#include <linux/ceph/buffer.h>
#define CEPH_KEY_LEN 16
#define CEPH_MAX_CON_SECRET_LEN 64
/*
* cryptographic secret
*/

View File

@ -1,4 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/inet.h>
#include <linux/ceph/decode.h>
@ -82,3 +85,101 @@ bad:
}
EXPORT_SYMBOL(ceph_decode_entity_addr);
/*
* Return addr of desired type (MSGR2 or LEGACY) or error.
* Make sure there is only one match.
*
* Assume encoding with MSG_ADDR2.
*/
int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr)
{
__le32 my_type = msgr2 ? CEPH_ENTITY_ADDR_TYPE_MSGR2 :
CEPH_ENTITY_ADDR_TYPE_LEGACY;
struct ceph_entity_addr tmp_addr;
int addr_cnt;
bool found;
u8 marker;
int ret;
int i;
ceph_decode_8_safe(p, end, marker, e_inval);
if (marker != 2) {
pr_err("bad addrvec marker %d\n", marker);
return -EINVAL;
}
ceph_decode_32_safe(p, end, addr_cnt, e_inval);
found = false;
for (i = 0; i < addr_cnt; i++) {
ret = ceph_decode_entity_addr(p, end, &tmp_addr);
if (ret)
return ret;
if (tmp_addr.type == my_type) {
if (found) {
pr_err("another match of type %d in addrvec\n",
le32_to_cpu(my_type));
return -EINVAL;
}
memcpy(addr, &tmp_addr, sizeof(*addr));
found = true;
}
}
if (!found && addr_cnt != 0) {
pr_err("no match of type %d in addrvec\n",
le32_to_cpu(my_type));
return -ENOENT;
}
return 0;
e_inval:
return -EINVAL;
}
EXPORT_SYMBOL(ceph_decode_entity_addrvec);
static int get_sockaddr_encoding_len(sa_family_t family)
{
union {
struct sockaddr sa;
struct sockaddr_in sin;
struct sockaddr_in6 sin6;
} u;
switch (family) {
case AF_INET:
return sizeof(u.sin);
case AF_INET6:
return sizeof(u.sin6);
default:
return sizeof(u);
}
}
int ceph_entity_addr_encoding_len(const struct ceph_entity_addr *addr)
{
sa_family_t family = get_unaligned(&addr->in_addr.ss_family);
int addr_len = get_sockaddr_encoding_len(family);
return 1 + CEPH_ENCODING_START_BLK_LEN + 4 + 4 + 4 + addr_len;
}
void ceph_encode_entity_addr(void **p, const struct ceph_entity_addr *addr)
{
sa_family_t family = get_unaligned(&addr->in_addr.ss_family);
int addr_len = get_sockaddr_encoding_len(family);
ceph_encode_8(p, 1); /* marker */
ceph_start_encoding(p, 1, 1, sizeof(addr->type) +
sizeof(addr->nonce) +
sizeof(u32) + addr_len);
ceph_encode_copy(p, &addr->type, sizeof(addr->type));
ceph_encode_copy(p, &addr->nonce, sizeof(addr->nonce));
ceph_encode_32(p, addr_len);
ceph_encode_16(p, family);
ceph_encode_copy(p, addr->in_addr.__data, addr_len - sizeof(family));
}

File diff suppressed because it is too large Load Diff

1506
net/ceph/messenger_v1.c Normal file

File diff suppressed because it is too large Load Diff

3443
net/ceph/messenger_v2.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -36,57 +36,122 @@ static const struct ceph_connection_operations mon_con_ops;
static int __validate_auth(struct ceph_mon_client *monc);
static int decode_mon_info(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr)
{
void *mon_info_end;
u32 struct_len;
u8 struct_v;
int ret;
ret = ceph_start_decoding(p, end, 1, "mon_info_t", &struct_v,
&struct_len);
if (ret)
return ret;
mon_info_end = *p + struct_len;
ceph_decode_skip_string(p, end, e_inval); /* skip mon name */
ret = ceph_decode_entity_addrvec(p, end, msgr2, addr);
if (ret)
return ret;
*p = mon_info_end;
return 0;
e_inval:
return -EINVAL;
}
/*
* Decode a monmap blob (e.g., during mount).
*
* Assume MonMap v3 (i.e. encoding with MONNAMES and MONENC).
*/
static struct ceph_monmap *ceph_monmap_decode(void *p, void *end)
static struct ceph_monmap *ceph_monmap_decode(void **p, void *end, bool msgr2)
{
struct ceph_monmap *m = NULL;
int i, err = -EINVAL;
struct ceph_monmap *monmap = NULL;
struct ceph_fsid fsid;
u32 epoch, num_mon;
u32 len;
u32 struct_len;
int blob_len;
int num_mon;
u8 struct_v;
u32 epoch;
int ret;
int i;
ceph_decode_32_safe(&p, end, len, bad);
ceph_decode_need(&p, end, len, bad);
ceph_decode_32_safe(p, end, blob_len, e_inval);
ceph_decode_need(p, end, blob_len, e_inval);
dout("monmap_decode %p %p len %d (%d)\n", p, end, len, (int)(end-p));
p += sizeof(u16); /* skip version */
ret = ceph_start_decoding(p, end, 6, "monmap", &struct_v, &struct_len);
if (ret)
goto fail;
ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad);
ceph_decode_copy(&p, &fsid, sizeof(fsid));
epoch = ceph_decode_32(&p);
dout("%s struct_v %d\n", __func__, struct_v);
ceph_decode_copy_safe(p, end, &fsid, sizeof(fsid), e_inval);
ceph_decode_32_safe(p, end, epoch, e_inval);
if (struct_v >= 6) {
u32 feat_struct_len;
u8 feat_struct_v;
num_mon = ceph_decode_32(&p);
*p += sizeof(struct ceph_timespec); /* skip last_changed */
*p += sizeof(struct ceph_timespec); /* skip created */
if (num_mon > CEPH_MAX_MON)
goto bad;
m = kmalloc(struct_size(m, mon_inst, num_mon), GFP_NOFS);
if (m == NULL)
return ERR_PTR(-ENOMEM);
m->fsid = fsid;
m->epoch = epoch;
m->num_mon = num_mon;
for (i = 0; i < num_mon; ++i) {
struct ceph_entity_inst *inst = &m->mon_inst[i];
ret = ceph_start_decoding(p, end, 1, "mon_feature_t",
&feat_struct_v, &feat_struct_len);
if (ret)
goto fail;
/* copy name portion */
ceph_decode_copy_safe(&p, end, &inst->name,
sizeof(inst->name), bad);
err = ceph_decode_entity_addr(&p, end, &inst->addr);
if (err)
goto bad;
*p += feat_struct_len; /* skip persistent_features */
ret = ceph_start_decoding(p, end, 1, "mon_feature_t",
&feat_struct_v, &feat_struct_len);
if (ret)
goto fail;
*p += feat_struct_len; /* skip optional_features */
}
dout("monmap_decode epoch %d, num_mon %d\n", m->epoch,
m->num_mon);
for (i = 0; i < m->num_mon; i++)
dout("monmap_decode mon%d is %s\n", i,
ceph_pr_addr(&m->mon_inst[i].addr));
return m;
bad:
dout("monmap_decode failed with %d\n", err);
kfree(m);
return ERR_PTR(err);
ceph_decode_32_safe(p, end, num_mon, e_inval);
dout("%s fsid %pU epoch %u num_mon %d\n", __func__, &fsid, epoch,
num_mon);
if (num_mon > CEPH_MAX_MON)
goto e_inval;
monmap = kmalloc(struct_size(monmap, mon_inst, num_mon), GFP_NOIO);
if (!monmap) {
ret = -ENOMEM;
goto fail;
}
monmap->fsid = fsid;
monmap->epoch = epoch;
monmap->num_mon = num_mon;
/* legacy_mon_addr map or mon_info map */
for (i = 0; i < num_mon; i++) {
struct ceph_entity_inst *inst = &monmap->mon_inst[i];
ceph_decode_skip_string(p, end, e_inval); /* skip mon name */
inst->name.type = CEPH_ENTITY_TYPE_MON;
inst->name.num = cpu_to_le64(i);
if (struct_v >= 6)
ret = decode_mon_info(p, end, msgr2, &inst->addr);
else
ret = ceph_decode_entity_addr(p, end, &inst->addr);
if (ret)
goto fail;
dout("%s mon%d addr %s\n", __func__, i,
ceph_pr_addr(&inst->addr));
}
return monmap;
e_inval:
ret = -EINVAL;
fail:
kfree(monmap);
return ERR_PTR(ret);
}
/*
@ -96,9 +161,11 @@ int ceph_monmap_contains(struct ceph_monmap *m, struct ceph_entity_addr *addr)
{
int i;
for (i = 0; i < m->num_mon; i++)
if (memcmp(addr, &m->mon_inst[i].addr, sizeof(*addr)) == 0)
for (i = 0; i < m->num_mon; i++) {
if (ceph_addr_equal_no_type(addr, &m->mon_inst[i].addr))
return 1;
}
return 0;
}
@ -190,10 +257,16 @@ static void __open_session(struct ceph_mon_client *monc)
&monc->monmap->mon_inst[monc->cur_mon].addr);
/*
* send an initial keepalive to ensure our timestamp is valid
* by the time we are in an OPENED state
* Queue a keepalive to ensure that in case of an early fault
* the messenger doesn't put us into STANDBY state and instead
* retries. This also ensures that our timestamp is valid by
* the time we finish hunting and delayed_work() checks it.
*/
ceph_con_keepalive(&monc->con);
if (ceph_msgr2(monc->client)) {
monc->pending_auth = 1;
return;
}
/* initiate authentication handshake */
ret = ceph_auth_build_hello(monc->auth,
@ -476,7 +549,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
p = msg->front.iov_base;
end = p + msg->front.iov_len;
monmap = ceph_monmap_decode(p, end);
monmap = ceph_monmap_decode(&p, end, ceph_msgr2(client));
if (IS_ERR(monmap)) {
pr_err("problem decoding monmap, %d\n",
(int)PTR_ERR(monmap));
@ -1052,8 +1125,9 @@ static void delayed_work(struct work_struct *work)
*/
static int build_initial_monmap(struct ceph_mon_client *monc)
{
__le32 my_type = ceph_msgr2(monc->client) ?
CEPH_ENTITY_ADDR_TYPE_MSGR2 : CEPH_ENTITY_ADDR_TYPE_LEGACY;
struct ceph_options *opt = monc->client->options;
struct ceph_entity_addr *mon_addr = opt->mon_addr;
int num_mon = opt->num_mon;
int i;
@ -1062,12 +1136,16 @@ static int build_initial_monmap(struct ceph_mon_client *monc)
GFP_KERNEL);
if (!monc->monmap)
return -ENOMEM;
for (i = 0; i < num_mon; i++) {
monc->monmap->mon_inst[i].addr = mon_addr[i];
monc->monmap->mon_inst[i].addr.nonce = 0;
monc->monmap->mon_inst[i].name.type =
CEPH_ENTITY_TYPE_MON;
monc->monmap->mon_inst[i].name.num = cpu_to_le64(i);
struct ceph_entity_inst *inst = &monc->monmap->mon_inst[i];
memcpy(&inst->addr.in_addr, &opt->mon_addr[i].in_addr,
sizeof(inst->addr.in_addr));
inst->addr.type = my_type;
inst->addr.nonce = 0;
inst->name.type = CEPH_ENTITY_TYPE_MON;
inst->name.num = cpu_to_le64(i);
}
monc->monmap->num_mon = num_mon;
return 0;
@ -1089,8 +1167,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
/* connection */
/* authentication */
monc->auth = ceph_auth_init(cl->options->name,
cl->options->key);
monc->auth = ceph_auth_init(cl->options->name, cl->options->key,
cl->options->con_modes);
if (IS_ERR(monc->auth)) {
err = PTR_ERR(monc->auth);
goto out_monmap;
@ -1194,30 +1272,22 @@ static void finish_hunting(struct ceph_mon_client *monc)
}
}
static void handle_auth_reply(struct ceph_mon_client *monc,
struct ceph_msg *msg)
static void finish_auth(struct ceph_mon_client *monc, int auth_err,
bool was_authed)
{
int ret;
int was_auth = 0;
dout("%s auth_err %d was_authed %d\n", __func__, auth_err, was_authed);
WARN_ON(auth_err > 0);
mutex_lock(&monc->mutex);
was_auth = ceph_auth_is_authenticated(monc->auth);
monc->pending_auth = 0;
ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
msg->front.iov_len,
monc->m_auth->front.iov_base,
monc->m_auth->front_alloc_len);
if (ret > 0) {
__send_prepared_auth_request(monc, ret);
goto out;
if (auth_err) {
monc->client->auth_err = auth_err;
wake_up_all(&monc->client->auth_wq);
return;
}
finish_hunting(monc);
if (ret < 0) {
monc->client->auth_err = ret;
} else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
dout("authenticated, starting session\n");
if (!was_authed && ceph_auth_is_authenticated(monc->auth)) {
dout("%s authenticated, starting session global_id %llu\n",
__func__, monc->auth->global_id);
monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
monc->client->msgr.inst.name.num =
@ -1229,11 +1299,27 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
pr_info("mon%d %s session established\n", monc->cur_mon,
ceph_pr_addr(&monc->con.peer_addr));
}
}
out:
static void handle_auth_reply(struct ceph_mon_client *monc,
struct ceph_msg *msg)
{
bool was_authed;
int ret;
mutex_lock(&monc->mutex);
was_authed = ceph_auth_is_authenticated(monc->auth);
ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
msg->front.iov_len,
monc->m_auth->front.iov_base,
monc->m_auth->front_alloc_len);
if (ret > 0) {
__send_prepared_auth_request(monc, ret);
} else {
finish_auth(monc, ret, was_authed);
finish_hunting(monc);
}
mutex_unlock(&monc->mutex);
if (monc->client->auth_err < 0)
wake_up_all(&monc->client->auth_wq);
}
static int __validate_auth(struct ceph_mon_client *monc)
@ -1262,6 +1348,88 @@ int ceph_monc_validate_auth(struct ceph_mon_client *monc)
}
EXPORT_SYMBOL(ceph_monc_validate_auth);
static int mon_get_auth_request(struct ceph_connection *con,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_mon_client *monc = con->private;
int ret;
mutex_lock(&monc->mutex);
ret = ceph_auth_get_request(monc->auth, buf, *buf_len);
mutex_unlock(&monc->mutex);
if (ret < 0)
return ret;
*buf_len = ret;
*authorizer = NULL;
*authorizer_len = 0;
return 0;
}
static int mon_handle_auth_reply_more(struct ceph_connection *con,
void *reply, int reply_len,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_mon_client *monc = con->private;
int ret;
mutex_lock(&monc->mutex);
ret = ceph_auth_handle_reply_more(monc->auth, reply, reply_len,
buf, *buf_len);
mutex_unlock(&monc->mutex);
if (ret < 0)
return ret;
*buf_len = ret;
*authorizer = NULL;
*authorizer_len = 0;
return 0;
}
static int mon_handle_auth_done(struct ceph_connection *con,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_mon_client *monc = con->private;
bool was_authed;
int ret;
mutex_lock(&monc->mutex);
WARN_ON(!monc->hunting);
was_authed = ceph_auth_is_authenticated(monc->auth);
ret = ceph_auth_handle_reply_done(monc->auth, global_id,
reply, reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
finish_auth(monc, ret, was_authed);
if (!ret)
finish_hunting(monc);
mutex_unlock(&monc->mutex);
return 0;
}
static int mon_handle_auth_bad_method(struct ceph_connection *con,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
struct ceph_mon_client *monc = con->private;
bool was_authed;
mutex_lock(&monc->mutex);
WARN_ON(!monc->hunting);
was_authed = ceph_auth_is_authenticated(monc->auth);
ceph_auth_handle_bad_method(monc->auth, used_proto, result,
allowed_protos, proto_cnt,
allowed_modes, mode_cnt);
finish_auth(monc, -EACCES, was_authed);
mutex_unlock(&monc->mutex);
return 0;
}
/*
* handle incoming message
*/
@ -1412,4 +1580,8 @@ static const struct ceph_connection_operations mon_con_ops = {
.dispatch = dispatch,
.fault = mon_fault,
.alloc_msg = mon_alloc_msg,
.get_auth_request = mon_get_auth_request,
.handle_auth_reply_more = mon_handle_auth_reply_more,
.handle_auth_done = mon_handle_auth_done,
.handle_auth_bad_method = mon_handle_auth_bad_method,
};

View File

@ -3918,9 +3918,11 @@ static int handle_one_map(struct ceph_osd_client *osdc,
set_pool_was_full(osdc);
if (incremental)
newmap = osdmap_apply_incremental(&p, end, osdc->osdmap);
newmap = osdmap_apply_incremental(&p, end,
ceph_msgr2(osdc->client),
osdc->osdmap);
else
newmap = ceph_osdmap_decode(&p, end);
newmap = ceph_osdmap_decode(&p, end, ceph_msgr2(osdc->client));
if (IS_ERR(newmap))
return PTR_ERR(newmap);
@ -5575,6 +5577,7 @@ static void put_osd_con(struct ceph_connection *con)
/*
* authentication
*/
/*
* Note: returned pointer is the address of a structure that's
* managed separately. Caller must *not* attempt to free it.
@ -5586,23 +5589,12 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
int ret;
if (force_new && auth->authorizer) {
ceph_auth_destroy_authorizer(auth->authorizer);
auth->authorizer = NULL;
}
if (!auth->authorizer) {
int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
auth);
if (ret)
return ERR_PTR(ret);
} else {
int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
auth);
if (ret)
return ERR_PTR(ret);
}
*proto = ac->protocol;
ret = __ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_OSD,
force_new, proto, NULL, NULL);
if (ret)
return ERR_PTR(ret);
return auth;
}
@ -5623,8 +5615,11 @@ static int verify_authorizer_reply(struct ceph_connection *con)
struct ceph_osd *o = con->private;
struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer);
return ceph_auth_verify_authorizer_reply(ac, auth->authorizer,
auth->authorizer_reply_buf, auth->authorizer_reply_buf_len,
NULL, NULL, NULL, NULL);
}
static int invalidate_authorizer(struct ceph_connection *con)
@ -5637,6 +5632,80 @@ static int invalidate_authorizer(struct ceph_connection *con)
return ceph_monc_validate_auth(&osdc->client->monc);
}
static int osd_get_auth_request(struct ceph_connection *con,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_osd *o = con->private;
struct ceph_auth_client *ac = o->o_osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
int ret;
ret = ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_OSD,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int osd_handle_auth_reply_more(struct ceph_connection *con,
void *reply, int reply_len,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_osd *o = con->private;
struct ceph_auth_client *ac = o->o_osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
int ret;
ret = ceph_auth_handle_svc_reply_more(ac, auth, reply, reply_len,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int osd_handle_auth_done(struct ceph_connection *con,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_osd *o = con->private;
struct ceph_auth_client *ac = o->o_osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
return ceph_auth_handle_svc_reply_done(ac, auth, reply, reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
}
static int osd_handle_auth_bad_method(struct ceph_connection *con,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
struct ceph_osd *o = con->private;
struct ceph_mon_client *monc = &o->o_osdc->client->monc;
int ret;
if (ceph_auth_handle_bad_authorizer(monc->auth, CEPH_ENTITY_TYPE_OSD,
used_proto, result,
allowed_protos, proto_cnt,
allowed_modes, mode_cnt)) {
ret = ceph_monc_validate_auth(monc);
if (ret)
return ret;
}
return -EACCES;
}
static void osd_reencode_message(struct ceph_msg *msg)
{
int type = le16_to_cpu(msg->hdr.type);
@ -5674,4 +5743,8 @@ static const struct ceph_connection_operations osd_con_ops = {
.sign_message = osd_sign_message,
.check_message_signature = osd_check_message_signature,
.fault = osd_fault,
.get_auth_request = osd_get_auth_request,
.handle_auth_reply_more = osd_handle_auth_reply_more,
.handle_auth_done = osd_handle_auth_done,
.handle_auth_bad_method = osd_handle_auth_bad_method,
};

View File

@ -1647,7 +1647,8 @@ static int decode_old_pg_upmap_items(void **p, void *end,
/*
* decode a full map.
*/
static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
static int osdmap_decode(void **p, void *end, bool msgr2,
struct ceph_osdmap *map)
{
u8 struct_v;
u32 epoch = 0;
@ -1718,9 +1719,16 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
goto e_inval;
for (i = 0; i < map->max_osd; i++) {
err = ceph_decode_entity_addr(p, end, &map->osd_addr[i]);
struct ceph_entity_addr *addr = &map->osd_addr[i];
if (struct_v >= 8)
err = ceph_decode_entity_addrvec(p, end, msgr2, addr);
else
err = ceph_decode_entity_addr(p, end, addr);
if (err)
goto bad;
dout("%s osd%d addr %s\n", __func__, i, ceph_pr_addr(addr));
}
/* pg_temp */
@ -1790,7 +1798,7 @@ bad:
/*
* Allocate and decode a full map.
*/
struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2)
{
struct ceph_osdmap *map;
int ret;
@ -1799,7 +1807,7 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
if (!map)
return ERR_PTR(-ENOMEM);
ret = osdmap_decode(p, end, map);
ret = osdmap_decode(p, end, msgr2, map);
if (ret) {
ceph_osdmap_destroy(map);
return ERR_PTR(ret);
@ -1817,12 +1825,13 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
* new_state: { osd=6, xorstate=EXISTS } # clear osd_state
*/
static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
struct ceph_osdmap *map)
bool msgr2, struct ceph_osdmap *map)
{
void *new_up_client;
void *new_state;
void *new_weight_end;
u32 len;
int ret;
int i;
new_up_client = *p;
@ -1831,8 +1840,12 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
struct ceph_entity_addr addr;
ceph_decode_skip_32(p, end, e_inval);
if (ceph_decode_entity_addr(p, end, &addr))
goto e_inval;
if (struct_v >= 7)
ret = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
ret = ceph_decode_entity_addr(p, end, &addr);
if (ret)
return ret;
}
new_state = *p;
@ -1874,7 +1887,6 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
while (len--) {
s32 osd;
u32 xorstate;
int ret;
osd = ceph_decode_32(p);
if (struct_v >= 5)
@ -1910,8 +1922,15 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
osd = ceph_decode_32(p);
BUG_ON(osd >= map->max_osd);
if (ceph_decode_entity_addr(p, end, &addr))
goto e_inval;
if (struct_v >= 7)
ret = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
ret = ceph_decode_entity_addr(p, end, &addr);
if (ret)
return ret;
dout("%s osd%d addr %s\n", __func__, osd, ceph_pr_addr(&addr));
pr_info("osd%d up\n", osd);
map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
map->osd_addr[osd] = addr;
@ -1927,7 +1946,7 @@ e_inval:
/*
* decode and apply an incremental map update.
*/
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
struct ceph_osdmap *map)
{
struct ceph_fsid fsid;
@ -1962,7 +1981,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
if (len > 0) {
dout("apply_incremental full map len %d, %p to %p\n",
len, *p, end);
return ceph_osdmap_decode(p, min(*p+len, end));
return ceph_osdmap_decode(p, min(*p+len, end), msgr2);
}
/* new crush? */
@ -2014,7 +2033,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
}
/* new_up_client, new_state, new_weight */
err = decode_new_up_state_weight(p, end, struct_v, map);
err = decode_new_up_state_weight(p, end, struct_v, msgr2, map);
if (err)
goto bad;