The big ticket item here is support for msgr2 on-wire protocol, which

adds the option of full in-transit encryption using AES-GCM algorithm
 (myself).  On top of that we have a series to avoid intermittent
 errors during recovery with recover_session=clean and some MDS request
 encoding work from Jeff, a cap handling fix and assorted observability
 improvements from Luis and Xiubo and a good number of cleanups.  Luis
 also ran into a corner case with quotas which sadly means that we are
 back to denying cross-quota-realm renames.
 -----BEGIN PGP SIGNATURE-----
 
 iQFHBAABCAAxFiEEydHwtzie9C7TfviiSn/eOAIR84sFAl/beWITHGlkcnlvbW92
 QGdtYWlsLmNvbQAKCRBKf944AhHzi4i0CACnvd87l2n7dndig7p5d5lVsmo8tAFs
 wHYHaIVisWKMcqKoT+YJajSgzaonxjzvYiyCzwLxV7s7vI7cswAwjEfYT7tTDRp2
 pnO1+4N/1ftznnTk/1QdqwOQLUg5UtdgWvFCaXQF+Vr/YroZomKJPaK8fXK882pC
 9FBjoLNy1HWySsoXPCxJktmDzpEEyYRNJg0vquxm7mxwTgQErupWlwEFjNg5LBkm
 gC0UoKhCE3DeUrXnoq21Ga62RIajxHofTooNx7dg+JiSVgluW+nORaWDYJXNzwLC
 j5puSe4pWIah+gmcwIFuyNz4ddkvVL4URvsYPGkVFYXlEefQjErc10Jh
 =6b9f
 -----END PGP SIGNATURE-----

Merge tag 'ceph-for-5.11-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "The big ticket item here is support for msgr2 on-wire protocol, which
  adds the option of full in-transit encryption using AES-GCM algorithm
  (myself).

  On top of that we have a series to avoid intermittent errors during
  recovery with recover_session=clean and some MDS request encoding work
  from Jeff, a cap handling fix and assorted observability improvements
  from Luis and Xiubo and a good number of cleanups.

  Luis also ran into a corner case with quotas which sadly means that we
  are back to denying cross-quota-realm renames"

* tag 'ceph-for-5.11-rc1' of git://github.com/ceph/ceph-client: (59 commits)
  libceph: drop ceph_auth_{create,update}_authorizer()
  libceph, ceph: make use of __ceph_auth_get_authorizer() in msgr1
  libceph, ceph: implement msgr2.1 protocol (crc and secure modes)
  libceph: introduce connection modes and ms_mode option
  libceph, rbd: ignore addr->type while comparing in some cases
  libceph, ceph: get and handle cluster maps with addrvecs
  libceph: factor out finish_auth()
  libceph: drop ac->ops->name field
  libceph: amend cephx init_protocol() and build_request()
  libceph, ceph: incorporate nautilus cephx changes
  libceph: safer en/decoding of cephx requests and replies
  libceph: more insight into ticket expiry and invalidation
  libceph: move msgr1 protocol specific fields to its own struct
  libceph: move msgr1 protocol implementation to its own file
  libceph: separate msgr1 protocol implementation
  libceph: export remaining protocol independent infrastructure
  libceph: export zero_page
  libceph: rename and export con->flags bits
  libceph: rename and export con->state states
  libceph: make con->state an int
  ...
This commit is contained in:
Linus Torvalds 2020-12-17 11:53:52 -08:00
commit be695ee29e
41 changed files with 7225 additions and 2193 deletions

View File

@ -3925,8 +3925,12 @@ static int find_watcher(struct rbd_device *rbd_dev,
sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie); sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
for (i = 0; i < num_watchers; i++) { for (i = 0; i < num_watchers; i++) {
if (!memcmp(&watchers[i].addr, &locker->info.addr, /*
sizeof(locker->info.addr)) && * Ignore addr->type while comparing. This mimics
* entity_addr_t::get_legacy_str() + strcmp().
*/
if (ceph_addr_equal_no_type(&watchers[i].addr,
&locker->info.addr) &&
watchers[i].cookie == cookie) { watchers[i].cookie == cookie) {
struct rbd_client_id cid = { struct rbd_client_id cid = {
.gid = le64_to_cpu(watchers[i].name.num), .gid = le64_to_cpu(watchers[i].name.num),

View File

@ -840,7 +840,7 @@ static int ceph_writepages_start(struct address_space *mapping,
wbc->sync_mode == WB_SYNC_NONE ? "NONE" : wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
if (ci->i_wrbuffer_ref > 0) { if (ci->i_wrbuffer_ref > 0) {
pr_warn_ratelimited( pr_warn_ratelimited(
"writepage_start %p %lld forced umount\n", "writepage_start %p %lld forced umount\n",
@ -1264,7 +1264,7 @@ ceph_find_incompatible(struct page *page)
struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout(" page %p forced umount\n", page); dout(" page %p forced umount\n", page);
return ERR_PTR(-EIO); return ERR_PTR(-EIO);
} }
@ -1321,7 +1321,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len); dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
for (;;) { for (;;) {
page = grab_cache_page_write_begin(mapping, index, 0); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) { if (!page) {
r = -ENOMEM; r = -ENOMEM;
break; break;

View File

@ -1140,16 +1140,24 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
{ {
struct ceph_mds_session *session = cap->session; struct ceph_mds_session *session = cap->session;
struct ceph_inode_info *ci = cap->ci; struct ceph_inode_info *ci = cap->ci;
struct ceph_mds_client *mdsc = struct ceph_mds_client *mdsc;
ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
int removed = 0; int removed = 0;
/* 'ci' being NULL means the remove have already occurred */
if (!ci) {
dout("%s: cap inode is NULL\n", __func__);
return;
}
dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
/* remove from inode's cap rbtree, and clear auth cap */ /* remove from inode's cap rbtree, and clear auth cap */
rb_erase(&cap->ci_node, &ci->i_caps); rb_erase(&cap->ci_node, &ci->i_caps);
if (ci->i_auth_cap == cap) { if (ci->i_auth_cap == cap) {
WARN_ON_ONCE(!list_empty(&ci->i_dirty_item)); WARN_ON_ONCE(!list_empty(&ci->i_dirty_item) &&
!mdsc->fsc->blocklisted);
ci->i_auth_cap = NULL; ci->i_auth_cap = NULL;
} }
@ -2746,7 +2754,7 @@ again:
goto out_unlock; goto out_unlock;
} }
if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode); dout("get_cap_refs %p forced umount\n", inode);
ret = -EIO; ret = -EIO;
goto out_unlock; goto out_unlock;
@ -4027,15 +4035,13 @@ void ceph_handle_caps(struct ceph_mds_session *session,
} }
if (msg_version >= 8) { if (msg_version >= 8) {
u64 flush_tid;
u32 caller_uid, caller_gid;
u32 pool_ns_len; u32 pool_ns_len;
/* version >= 6 */ /* version >= 6 */
ceph_decode_64_safe(&p, end, flush_tid, bad); ceph_decode_skip_64(&p, end, bad); // flush_tid
/* version >= 7 */ /* version >= 7 */
ceph_decode_32_safe(&p, end, caller_uid, bad); ceph_decode_skip_32(&p, end, bad); // caller_uid
ceph_decode_32_safe(&p, end, caller_gid, bad); ceph_decode_skip_32(&p, end, bad); // caller_gid
/* version >= 8 */ /* version >= 8 */
ceph_decode_32_safe(&p, end, pool_ns_len, bad); ceph_decode_32_safe(&p, end, pool_ns_len, bad);
if (pool_ns_len > 0) { if (pool_ns_len > 0) {
@ -4058,9 +4064,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
} }
if (msg_version >= 11) { if (msg_version >= 11) {
u32 flags;
/* version >= 10 */ /* version >= 10 */
ceph_decode_32_safe(&p, end, flags, bad); ceph_decode_skip_32(&p, end, bad); // flags
/* version >= 11 */ /* version >= 11 */
extra_info.dirstat_valid = true; extra_info.dirstat_valid = true;
ceph_decode_64_safe(&p, end, extra_info.nfiles, bad); ceph_decode_64_safe(&p, end, extra_info.nfiles, bad);

View File

@ -304,11 +304,25 @@ static int mds_sessions_show(struct seq_file *s, void *ptr)
return 0; return 0;
} }
static int status_show(struct seq_file *s, void *p)
{
struct ceph_fs_client *fsc = s->private;
struct ceph_entity_inst *inst = &fsc->client->msgr.inst;
struct ceph_entity_addr *client_addr = ceph_client_addr(fsc->client);
seq_printf(s, "instance: %s.%lld %s/%u\n", ENTITY_NAME(inst->name),
ceph_pr_addr(client_addr), le32_to_cpu(client_addr->nonce));
seq_printf(s, "blocklisted: %s\n", fsc->blocklisted ? "true" : "false");
return 0;
}
DEFINE_SHOW_ATTRIBUTE(mdsmap); DEFINE_SHOW_ATTRIBUTE(mdsmap);
DEFINE_SHOW_ATTRIBUTE(mdsc); DEFINE_SHOW_ATTRIBUTE(mdsc);
DEFINE_SHOW_ATTRIBUTE(caps); DEFINE_SHOW_ATTRIBUTE(caps);
DEFINE_SHOW_ATTRIBUTE(mds_sessions); DEFINE_SHOW_ATTRIBUTE(mds_sessions);
DEFINE_SHOW_ATTRIBUTE(metric); DEFINE_SHOW_ATTRIBUTE(metric);
DEFINE_SHOW_ATTRIBUTE(status);
/* /*
@ -394,6 +408,12 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
fsc->client->debugfs_dir, fsc->client->debugfs_dir,
fsc, fsc,
&caps_fops); &caps_fops);
fsc->debugfs_status = debugfs_create_file("status",
0400,
fsc->client->debugfs_dir,
fsc,
&status_fops);
} }

View File

@ -1202,12 +1202,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
op = CEPH_MDS_OP_RENAMESNAP; op = CEPH_MDS_OP_RENAMESNAP;
else else
return -EROFS; return -EROFS;
} else if (old_dir != new_dir) {
err = ceph_quota_check_rename(mdsc, d_inode(old_dentry),
new_dir);
if (err)
return err;
} }
/* don't allow cross-quota renames */
if ((old_dir != new_dir) &&
(!ceph_quota_is_same_realm(old_dir, new_dir)))
return -EXDEV;
dout("rename dir %p dentry %p to dir %p dentry %p\n", dout("rename dir %p dentry %p to dir %p dentry %p\n",
old_dir, old_dentry, new_dir, new_dentry); old_dir, old_dentry, new_dir, new_dentry);

View File

@ -1315,15 +1315,10 @@ retry_lookup:
} }
if (rinfo->head->is_target) { if (rinfo->head->is_target) {
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino); /* Should be filled in by handle_reply */
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid); BUG_ON(!req->r_target_inode);
in = ceph_get_inode(sb, tvino);
if (IS_ERR(in)) {
err = PTR_ERR(in);
goto done;
}
in = req->r_target_inode;
err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti, err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti,
NULL, session, NULL, session,
(!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) && (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
@ -1333,11 +1328,13 @@ retry_lookup:
if (err < 0) { if (err < 0) {
pr_err("ceph_fill_inode badness %p %llx.%llx\n", pr_err("ceph_fill_inode badness %p %llx.%llx\n",
in, ceph_vinop(in)); in, ceph_vinop(in));
req->r_target_inode = NULL;
if (in->i_state & I_NEW) if (in->i_state & I_NEW)
discard_new_inode(in); discard_new_inode(in);
else
iput(in);
goto done; goto done;
} }
req->r_target_inode = in;
if (in->i_state & I_NEW) if (in->i_state & I_NEW)
unlock_new_inode(in); unlock_new_inode(in);
} }
@ -1597,8 +1594,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct dentry *dn; struct dentry *dn;
struct inode *in; struct inode *in;
int err = 0, skipped = 0, ret, i; int err = 0, skipped = 0, ret, i;
struct ceph_mds_request_head *rhead = req->r_request->front.iov_base; u32 frag = le32_to_cpu(req->r_args.readdir.frag);
u32 frag = le32_to_cpu(rhead->args.readdir.frag);
u32 last_hash = 0; u32 last_hash = 0;
u32 fpos_offset; u32 fpos_offset;
struct ceph_readdir_cache_control cache_ctl = {}; struct ceph_readdir_cache_control cache_ctl = {};
@ -1615,7 +1611,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
} else if (rinfo->offset_hash) { } else if (rinfo->offset_hash) {
/* mds understands offset_hash */ /* mds understands offset_hash */
WARN_ON_ONCE(req->r_readdir_offset != 2); WARN_ON_ONCE(req->r_readdir_offset != 2);
last_hash = le32_to_cpu(rhead->args.readdir.offset_hash); last_hash = le32_to_cpu(req->r_args.readdir.offset_hash);
} }
} }
@ -1888,7 +1884,7 @@ static void ceph_do_invalidate_pages(struct inode *inode)
mutex_lock(&ci->i_truncate_mutex); mutex_lock(&ci->i_truncate_mutex);
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n", pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
inode, ceph_ino(inode)); inode, ceph_ino(inode));
mapping_set_error(inode->i_mapping, -EIO); mapping_set_error(inode->i_mapping, -EIO);
@ -2340,15 +2336,23 @@ int ceph_permission(struct inode *inode, int mask)
} }
/* Craft a mask of needed caps given a set of requested statx attrs. */ /* Craft a mask of needed caps given a set of requested statx attrs. */
static int statx_to_caps(u32 want) static int statx_to_caps(u32 want, umode_t mode)
{ {
int mask = 0; int mask = 0;
if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME)) if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME))
mask |= CEPH_CAP_AUTH_SHARED; mask |= CEPH_CAP_AUTH_SHARED;
if (want & (STATX_NLINK|STATX_CTIME)) if (want & (STATX_NLINK|STATX_CTIME)) {
mask |= CEPH_CAP_LINK_SHARED; /*
* The link count for directories depends on inode->i_subdirs,
* and that is only updated when Fs caps are held.
*/
if (S_ISDIR(mode))
mask |= CEPH_CAP_FILE_SHARED;
else
mask |= CEPH_CAP_LINK_SHARED;
}
if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE| if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|
STATX_BLOCKS)) STATX_BLOCKS))
@ -2374,8 +2378,9 @@ int ceph_getattr(const struct path *path, struct kstat *stat,
/* Skip the getattr altogether if we're asked not to sync */ /* Skip the getattr altogether if we're asked not to sync */
if (!(flags & AT_STATX_DONT_SYNC)) { if (!(flags & AT_STATX_DONT_SYNC)) {
err = ceph_do_getattr(inode, statx_to_caps(request_mask), err = ceph_do_getattr(inode,
flags & AT_STATX_FORCE_SYNC); statx_to_caps(request_mask, inode->i_mode),
flags & AT_STATX_FORCE_SYNC);
if (err) if (err)
return err; return err;
} }

View File

@ -57,7 +57,7 @@ static const struct file_lock_operations ceph_fl_lock_ops = {
.fl_release_private = ceph_fl_release_lock, .fl_release_private = ceph_fl_release_lock,
}; };
/** /*
* Implement fcntl and flock locking functions. * Implement fcntl and flock locking functions.
*/ */
static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode, static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
@ -225,7 +225,7 @@ static int try_unlock_file(struct file *file, struct file_lock *fl)
return 1; return 1;
} }
/** /*
* Attempt to set an fcntl lock. * Attempt to set an fcntl lock.
* For now, this just goes away to the server. Later it may be more awesome. * For now, this just goes away to the server. Later it may be more awesome.
*/ */
@ -408,7 +408,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock,
return err; return err;
} }
/** /*
* Encode the flock and fcntl locks for the given inode into the ceph_filelock * Encode the flock and fcntl locks for the given inode into the ceph_filelock
* array. Must be called with inode->i_lock already held. * array. Must be called with inode->i_lock already held.
* If we encounter more of a specific lock type than expected, return -ENOSPC. * If we encounter more of a specific lock type than expected, return -ENOSPC.
@ -458,7 +458,7 @@ fail:
return err; return err;
} }
/** /*
* Copy the encoded flock and fcntl locks into the pagelist. * Copy the encoded flock and fcntl locks into the pagelist.
* Format is: #fcntl locks, sequential fcntl locks, #flock locks, * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
* sequential flock locks. * sequential flock locks.

View File

@ -516,13 +516,9 @@ static int parse_reply_info_create(void **p, void *end,
/* Malformed reply? */ /* Malformed reply? */
info->has_create_ino = false; info->has_create_ino = false;
} else if (test_bit(CEPHFS_FEATURE_DELEG_INO, &s->s_features)) { } else if (test_bit(CEPHFS_FEATURE_DELEG_INO, &s->s_features)) {
u8 struct_v, struct_compat;
u32 len;
info->has_create_ino = true; info->has_create_ino = true;
ceph_decode_8_safe(p, end, struct_v, bad); /* struct_v, struct_compat, and len */
ceph_decode_8_safe(p, end, struct_compat, bad); ceph_decode_skip_n(p, end, 2 + sizeof(u32), bad);
ceph_decode_32_safe(p, end, len, bad);
ceph_decode_64_safe(p, end, info->ino, bad); ceph_decode_64_safe(p, end, info->ino, bad);
ret = ceph_parse_deleg_inos(p, end, s); ret = ceph_parse_deleg_inos(p, end, s);
if (ret) if (ret)
@ -837,6 +833,7 @@ void ceph_mdsc_release_request(struct kref *kref)
} }
kfree(req->r_path1); kfree(req->r_path1);
kfree(req->r_path2); kfree(req->r_path2);
put_cred(req->r_cred);
if (req->r_pagelist) if (req->r_pagelist)
ceph_pagelist_release(req->r_pagelist); ceph_pagelist_release(req->r_pagelist);
put_request_session(req); put_request_session(req);
@ -892,8 +889,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
ceph_mdsc_get_request(req); ceph_mdsc_get_request(req);
insert_request(&mdsc->request_tree, req); insert_request(&mdsc->request_tree, req);
req->r_uid = current_fsuid(); req->r_cred = get_current_cred();
req->r_gid = current_fsgid();
if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK) if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK)
mdsc->oldest_tid = req->r_tid; mdsc->oldest_tid = req->r_tid;
@ -1243,7 +1239,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
{ {
struct ceph_msg *msg; struct ceph_msg *msg;
struct ceph_mds_session_head *h; struct ceph_mds_session_head *h;
int i = -1; int i;
int extra_bytes = 0; int extra_bytes = 0;
int metadata_key_count = 0; int metadata_key_count = 0;
struct ceph_options *opt = mdsc->fsc->client->options; struct ceph_options *opt = mdsc->fsc->client->options;
@ -1595,7 +1591,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_cap_flush *cf; struct ceph_cap_flush *cf;
struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_client *mdsc = fsc->mdsc;
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) { if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
if (inode->i_data.nrpages > 0) if (inode->i_data.nrpages > 0)
invalidate = true; invalidate = true;
if (ci->i_wrbuffer_ref > 0) if (ci->i_wrbuffer_ref > 0)
@ -2482,21 +2478,24 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
/* /*
* called under mdsc->mutex * called under mdsc->mutex
*/ */
static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
struct ceph_mds_request *req, struct ceph_mds_request *req,
int mds, bool drop_cap_releases) bool drop_cap_releases)
{ {
int mds = session->s_mds;
struct ceph_mds_client *mdsc = session->s_mdsc;
struct ceph_msg *msg; struct ceph_msg *msg;
struct ceph_mds_request_head *head; struct ceph_mds_request_head_old *head;
const char *path1 = NULL; const char *path1 = NULL;
const char *path2 = NULL; const char *path2 = NULL;
u64 ino1 = 0, ino2 = 0; u64 ino1 = 0, ino2 = 0;
int pathlen1 = 0, pathlen2 = 0; int pathlen1 = 0, pathlen2 = 0;
bool freepath1 = false, freepath2 = false; bool freepath1 = false, freepath2 = false;
int len; int len, i;
u16 releases; u16 releases;
void *p, *end; void *p, *end;
int ret; int ret;
bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME);
ret = set_request_path_attr(req->r_inode, req->r_dentry, ret = set_request_path_attr(req->r_inode, req->r_dentry,
req->r_parent, req->r_path1, req->r_ino1.ino, req->r_parent, req->r_path1, req->r_ino1.ino,
@ -2518,14 +2517,23 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
goto out_free1; goto out_free1;
} }
len = sizeof(*head) + if (legacy) {
pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + /* Old style */
len = sizeof(*head);
} else {
/* New style: add gid_list and any later fields */
len = sizeof(struct ceph_mds_request_head) + sizeof(u32) +
(sizeof(u64) * req->r_cred->group_info->ngroups);
}
len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
sizeof(struct ceph_timespec); sizeof(struct ceph_timespec);
/* calculate (max) length for cap releases */ /* calculate (max) length for cap releases */
len += sizeof(struct ceph_mds_request_release) * len += sizeof(struct ceph_mds_request_release) *
(!!req->r_inode_drop + !!req->r_dentry_drop + (!!req->r_inode_drop + !!req->r_dentry_drop +
!!req->r_old_inode_drop + !!req->r_old_dentry_drop); !!req->r_old_inode_drop + !!req->r_old_dentry_drop);
if (req->r_dentry_drop) if (req->r_dentry_drop)
len += pathlen1; len += pathlen1;
if (req->r_old_dentry_drop) if (req->r_old_dentry_drop)
@ -2537,17 +2545,33 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
goto out_free2; goto out_free2;
} }
msg->hdr.version = cpu_to_le16(2);
msg->hdr.tid = cpu_to_le64(req->r_tid); msg->hdr.tid = cpu_to_le64(req->r_tid);
head = msg->front.iov_base; /*
p = msg->front.iov_base + sizeof(*head); * The old ceph_mds_request_header didn't contain a version field, and
* one was added when we moved the message version from 3->4.
*/
if (legacy) {
msg->hdr.version = cpu_to_le16(3);
head = msg->front.iov_base;
p = msg->front.iov_base + sizeof(*head);
} else {
struct ceph_mds_request_head *new_head = msg->front.iov_base;
msg->hdr.version = cpu_to_le16(4);
new_head->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
head = (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
p = msg->front.iov_base + sizeof(*new_head);
}
end = msg->front.iov_base + msg->front.iov_len; end = msg->front.iov_base + msg->front.iov_len;
head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch); head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
head->op = cpu_to_le32(req->r_op); head->op = cpu_to_le32(req->r_op);
head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid)); head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns,
head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid)); req->r_cred->fsuid));
head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns,
req->r_cred->fsgid));
head->ino = cpu_to_le64(req->r_deleg_ino); head->ino = cpu_to_le64(req->r_deleg_ino);
head->args = req->r_args; head->args = req->r_args;
@ -2592,6 +2616,14 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
ceph_encode_copy(&p, &ts, sizeof(ts)); ceph_encode_copy(&p, &ts, sizeof(ts));
} }
/* gid list */
if (!legacy) {
ceph_encode_32(&p, req->r_cred->group_info->ngroups);
for (i = 0; i < req->r_cred->group_info->ngroups; i++)
ceph_encode_64(&p, from_kgid(&init_user_ns,
req->r_cred->group_info->gid[i]));
}
if (WARN_ON_ONCE(p > end)) { if (WARN_ON_ONCE(p > end)) {
ceph_msg_put(msg); ceph_msg_put(msg);
msg = ERR_PTR(-ERANGE); msg = ERR_PTR(-ERANGE);
@ -2635,14 +2667,28 @@ static void complete_request(struct ceph_mds_client *mdsc,
complete_all(&req->r_completion); complete_all(&req->r_completion);
} }
static struct ceph_mds_request_head_old *
find_old_request_head(void *p, u64 features)
{
bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
struct ceph_mds_request_head *new_head;
if (legacy)
return (struct ceph_mds_request_head_old *)p;
new_head = (struct ceph_mds_request_head *)p;
return (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
}
/* /*
* called under mdsc->mutex * called under mdsc->mutex
*/ */
static int __prepare_send_request(struct ceph_mds_client *mdsc, static int __prepare_send_request(struct ceph_mds_session *session,
struct ceph_mds_request *req, struct ceph_mds_request *req,
int mds, bool drop_cap_releases) bool drop_cap_releases)
{ {
struct ceph_mds_request_head *rhead; int mds = session->s_mds;
struct ceph_mds_client *mdsc = session->s_mdsc;
struct ceph_mds_request_head_old *rhead;
struct ceph_msg *msg; struct ceph_msg *msg;
int flags = 0; int flags = 0;
@ -2661,6 +2707,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) { if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
void *p; void *p;
/* /*
* Replay. Do not regenerate message (and rebuild * Replay. Do not regenerate message (and rebuild
* paths, etc.); just use the original message. * paths, etc.); just use the original message.
@ -2668,7 +2715,8 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
* d_move mangles the src name. * d_move mangles the src name.
*/ */
msg = req->r_request; msg = req->r_request;
rhead = msg->front.iov_base; rhead = find_old_request_head(msg->front.iov_base,
session->s_con.peer_features);
flags = le32_to_cpu(rhead->flags); flags = le32_to_cpu(rhead->flags);
flags |= CEPH_MDS_FLAG_REPLAY; flags |= CEPH_MDS_FLAG_REPLAY;
@ -2699,14 +2747,15 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
ceph_msg_put(req->r_request); ceph_msg_put(req->r_request);
req->r_request = NULL; req->r_request = NULL;
} }
msg = create_request_message(mdsc, req, mds, drop_cap_releases); msg = create_request_message(session, req, drop_cap_releases);
if (IS_ERR(msg)) { if (IS_ERR(msg)) {
req->r_err = PTR_ERR(msg); req->r_err = PTR_ERR(msg);
return PTR_ERR(msg); return PTR_ERR(msg);
} }
req->r_request = msg; req->r_request = msg;
rhead = msg->front.iov_base; rhead = find_old_request_head(msg->front.iov_base,
session->s_con.peer_features);
rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc)); rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
flags |= CEPH_MDS_FLAG_REPLAY; flags |= CEPH_MDS_FLAG_REPLAY;
@ -2725,15 +2774,13 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
/* /*
* called under mdsc->mutex * called under mdsc->mutex
*/ */
static int __send_request(struct ceph_mds_client *mdsc, static int __send_request(struct ceph_mds_session *session,
struct ceph_mds_session *session,
struct ceph_mds_request *req, struct ceph_mds_request *req,
bool drop_cap_releases) bool drop_cap_releases)
{ {
int err; int err;
err = __prepare_send_request(mdsc, req, session->s_mds, err = __prepare_send_request(session, req, drop_cap_releases);
drop_cap_releases);
if (!err) { if (!err) {
ceph_msg_get(req->r_request); ceph_msg_get(req->r_request);
ceph_con_send(&session->s_con, req->r_request); ceph_con_send(&session->s_con, req->r_request);
@ -2818,10 +2865,6 @@ static void __do_request(struct ceph_mds_client *mdsc,
ceph_session_state_name(session->s_state)); ceph_session_state_name(session->s_state));
if (session->s_state != CEPH_MDS_SESSION_OPEN && if (session->s_state != CEPH_MDS_SESSION_OPEN &&
session->s_state != CEPH_MDS_SESSION_HUNG) { session->s_state != CEPH_MDS_SESSION_HUNG) {
if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
err = -EACCES;
goto out_session;
}
/* /*
* We cannot queue async requests since the caps and delegated * We cannot queue async requests since the caps and delegated
* inodes are bound to the session. Just return -EJUKEBOX and * inodes are bound to the session. Just return -EJUKEBOX and
@ -2831,6 +2874,20 @@ static void __do_request(struct ceph_mds_client *mdsc,
err = -EJUKEBOX; err = -EJUKEBOX;
goto out_session; goto out_session;
} }
/*
* If the session has been REJECTED, then return a hard error,
* unless it's a CLEANRECOVER mount, in which case we'll queue
* it to the mdsc queue.
*/
if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
if (ceph_test_mount_opt(mdsc->fsc, CLEANRECOVER))
list_add(&req->r_wait, &mdsc->waiting_for_map);
else
err = -EACCES;
goto out_session;
}
if (session->s_state == CEPH_MDS_SESSION_NEW || if (session->s_state == CEPH_MDS_SESSION_NEW ||
session->s_state == CEPH_MDS_SESSION_CLOSING) { session->s_state == CEPH_MDS_SESSION_CLOSING) {
err = __open_session(mdsc, session); err = __open_session(mdsc, session);
@ -2850,7 +2907,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
if (req->r_request_started == 0) /* note request start time */ if (req->r_request_started == 0) /* note request start time */
req->r_request_started = jiffies; req->r_request_started = jiffies;
err = __send_request(mdsc, session, req, false); err = __send_request(session, req, false);
out_session: out_session:
ceph_put_mds_session(session); ceph_put_mds_session(session);
@ -3173,6 +3230,23 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
err = parse_reply_info(session, msg, rinfo, session->s_con.peer_features); err = parse_reply_info(session, msg, rinfo, session->s_con.peer_features);
mutex_unlock(&mdsc->mutex); mutex_unlock(&mdsc->mutex);
/* Must find target inode outside of mutexes to avoid deadlocks */
if ((err >= 0) && rinfo->head->is_target) {
struct inode *in;
struct ceph_vino tvino = {
.ino = le64_to_cpu(rinfo->targeti.in->ino),
.snap = le64_to_cpu(rinfo->targeti.in->snapid)
};
in = ceph_get_inode(mdsc->fsc->sb, tvino);
if (IS_ERR(in)) {
err = PTR_ERR(in);
mutex_lock(&session->s_mutex);
goto out_err;
}
req->r_target_inode = in;
}
mutex_lock(&session->s_mutex); mutex_lock(&session->s_mutex);
if (err < 0) { if (err < 0) {
pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid); pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
@ -3514,7 +3588,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
mutex_lock(&mdsc->mutex); mutex_lock(&mdsc->mutex);
list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item) list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item)
__send_request(mdsc, session, req, true); __send_request(session, req, true);
/* /*
* also re-send old requests when MDS enters reconnect stage. So that MDS * also re-send old requests when MDS enters reconnect stage. So that MDS
@ -3535,7 +3609,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
ceph_mdsc_release_dir_caps_no_check(req); ceph_mdsc_release_dir_caps_no_check(req);
__send_request(mdsc, session, req, true); __send_request(session, req, true);
} }
mutex_unlock(&mdsc->mutex); mutex_unlock(&mdsc->mutex);
} }
@ -4374,12 +4448,7 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
if (!READ_ONCE(fsc->blocklisted)) if (!READ_ONCE(fsc->blocklisted))
return; return;
if (fsc->last_auto_reconnect &&
time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30))
return;
pr_info("auto reconnect after blocklisted\n"); pr_info("auto reconnect after blocklisted\n");
fsc->last_auto_reconnect = jiffies;
ceph_force_reconnect(fsc->sb); ceph_force_reconnect(fsc->sb);
} }
@ -4678,7 +4747,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
{ {
u64 want_tid, want_flush; u64 want_tid, want_flush;
if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN)
return; return;
dout("sync\n"); dout("sync\n");
@ -4855,10 +4924,8 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
void *p = msg->front.iov_base; void *p = msg->front.iov_base;
void *end = p + msg->front.iov_len; void *end = p + msg->front.iov_len;
u32 epoch; u32 epoch;
u32 map_len;
u32 num_fs; u32 num_fs;
u32 mount_fscid = (u32)-1; u32 mount_fscid = (u32)-1;
u8 struct_v, struct_cv;
int err = -EINVAL; int err = -EINVAL;
ceph_decode_need(&p, end, sizeof(u32), bad); ceph_decode_need(&p, end, sizeof(u32), bad);
@ -4866,24 +4933,17 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
dout("handle_fsmap epoch %u\n", epoch); dout("handle_fsmap epoch %u\n", epoch);
ceph_decode_need(&p, end, 2 + sizeof(u32), bad); /* struct_v, struct_cv, map_len, epoch, legacy_client_fscid */
struct_v = ceph_decode_8(&p); ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 3, bad);
struct_cv = ceph_decode_8(&p);
map_len = ceph_decode_32(&p);
ceph_decode_need(&p, end, sizeof(u32) * 3, bad); ceph_decode_32_safe(&p, end, num_fs, bad);
p += sizeof(u32) * 2; /* skip epoch and legacy_client_fscid */
num_fs = ceph_decode_32(&p);
while (num_fs-- > 0) { while (num_fs-- > 0) {
void *info_p, *info_end; void *info_p, *info_end;
u32 info_len; u32 info_len;
u8 info_v, info_cv;
u32 fscid, namelen; u32 fscid, namelen;
ceph_decode_need(&p, end, 2 + sizeof(u32), bad); ceph_decode_need(&p, end, 2 + sizeof(u32), bad);
info_v = ceph_decode_8(&p); p += 2; // info_v, info_cv
info_cv = ceph_decode_8(&p);
info_len = ceph_decode_32(&p); info_len = ceph_decode_32(&p);
ceph_decode_need(&p, end, info_len, bad); ceph_decode_need(&p, end, info_len, bad);
info_p = p; info_p = p;
@ -4954,7 +5014,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
return; return;
} }
newmap = ceph_mdsmap_decode(&p, end); newmap = ceph_mdsmap_decode(&p, end, ceph_msgr2(mdsc->fsc->client));
if (IS_ERR(newmap)) { if (IS_ERR(newmap)) {
err = PTR_ERR(newmap); err = PTR_ERR(newmap);
goto bad_unlock; goto bad_unlock;
@ -5081,23 +5141,12 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
struct ceph_mds_client *mdsc = s->s_mdsc; struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth; struct ceph_auth_handshake *auth = &s->s_auth;
int ret;
if (force_new && auth->authorizer) { ret = __ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_MDS,
ceph_auth_destroy_authorizer(auth->authorizer); force_new, proto, NULL, NULL);
auth->authorizer = NULL; if (ret)
} return ERR_PTR(ret);
if (!auth->authorizer) {
int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
auth);
if (ret)
return ERR_PTR(ret);
} else {
int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
auth);
if (ret)
return ERR_PTR(ret);
}
*proto = ac->protocol;
return auth; return auth;
} }
@ -5118,8 +5167,11 @@ static int verify_authorizer_reply(struct ceph_connection *con)
struct ceph_mds_session *s = con->private; struct ceph_mds_session *s = con->private;
struct ceph_mds_client *mdsc = s->s_mdsc; struct ceph_mds_client *mdsc = s->s_mdsc;
struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth;
return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer); return ceph_auth_verify_authorizer_reply(ac, auth->authorizer,
auth->authorizer_reply_buf, auth->authorizer_reply_buf_len,
NULL, NULL, NULL, NULL);
} }
static int invalidate_authorizer(struct ceph_connection *con) static int invalidate_authorizer(struct ceph_connection *con)
@ -5133,6 +5185,80 @@ static int invalidate_authorizer(struct ceph_connection *con)
return ceph_monc_validate_auth(&mdsc->fsc->client->monc); return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
} }
static int mds_get_auth_request(struct ceph_connection *con,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_mds_session *s = con->private;
struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth;
int ret;
ret = ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_MDS,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int mds_handle_auth_reply_more(struct ceph_connection *con,
void *reply, int reply_len,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_mds_session *s = con->private;
struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth;
int ret;
ret = ceph_auth_handle_svc_reply_more(ac, auth, reply, reply_len,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int mds_handle_auth_done(struct ceph_connection *con,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_mds_session *s = con->private;
struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth;
struct ceph_auth_handshake *auth = &s->s_auth;
return ceph_auth_handle_svc_reply_done(ac, auth, reply, reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
}
static int mds_handle_auth_bad_method(struct ceph_connection *con,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
struct ceph_mds_session *s = con->private;
struct ceph_mon_client *monc = &s->s_mdsc->fsc->client->monc;
int ret;
if (ceph_auth_handle_bad_authorizer(monc->auth, CEPH_ENTITY_TYPE_MDS,
used_proto, result,
allowed_protos, proto_cnt,
allowed_modes, mode_cnt)) {
ret = ceph_monc_validate_auth(monc);
if (ret)
return ret;
}
return -EACCES;
}
static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con, static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
struct ceph_msg_header *hdr, int *skip) struct ceph_msg_header *hdr, int *skip)
{ {
@ -5182,6 +5308,10 @@ static const struct ceph_connection_operations mds_con_ops = {
.alloc_msg = mds_alloc_msg, .alloc_msg = mds_alloc_msg,
.sign_message = mds_sign_message, .sign_message = mds_sign_message,
.check_message_signature = mds_check_message_signature, .check_message_signature = mds_check_message_signature,
.get_auth_request = mds_get_auth_request,
.handle_auth_reply_more = mds_handle_auth_reply_more,
.handle_auth_done = mds_handle_auth_done,
.handle_auth_bad_method = mds_handle_auth_bad_method,
}; };
/* eof */ /* eof */

View File

@ -275,8 +275,7 @@ struct ceph_mds_request {
union ceph_mds_request_args r_args; union ceph_mds_request_args r_args;
int r_fmode; /* file mode, if expecting cap */ int r_fmode; /* file mode, if expecting cap */
kuid_t r_uid; const struct cred *r_cred;
kgid_t r_gid;
int r_request_release_offset; int r_request_release_offset;
struct timespec64 r_stamp; struct timespec64 r_stamp;

View File

@ -114,7 +114,7 @@ bad:
* Ignore any fields we don't care about (there are quite a few of * Ignore any fields we don't care about (there are quite a few of
* them). * them).
*/ */
struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
{ {
struct ceph_mdsmap *m; struct ceph_mdsmap *m;
const void *start = *p; const void *start = *p;
@ -201,18 +201,19 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
namelen = ceph_decode_32(p); /* skip mds name */ namelen = ceph_decode_32(p); /* skip mds name */
*p += namelen; *p += namelen;
ceph_decode_need(p, end, ceph_decode_32_safe(p, end, mds, bad);
4*sizeof(u32) + sizeof(u64) + ceph_decode_32_safe(p, end, inc, bad);
sizeof(addr) + sizeof(struct ceph_timespec), ceph_decode_32_safe(p, end, state, bad);
bad);
mds = ceph_decode_32(p);
inc = ceph_decode_32(p);
state = ceph_decode_32(p);
*p += sizeof(u64); /* state_seq */ *p += sizeof(u64); /* state_seq */
err = ceph_decode_entity_addr(p, end, &addr); if (info_v >= 8)
err = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
err = ceph_decode_entity_addr(p, end, &addr);
if (err) if (err)
goto corrupt; goto corrupt;
ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
ceph_decode_copy_safe(p, end, &laggy_since, sizeof(laggy_since),
bad);
laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0; laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0;
*p += sizeof(u32); *p += sizeof(u32);
ceph_decode_32_safe(p, end, namelen, bad); ceph_decode_32_safe(p, end, namelen, bad);
@ -243,8 +244,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
} }
if (state <= 0) { if (state <= 0) {
pr_warn("mdsmap_decode got incorrect state(%s)\n", dout("mdsmap_decode got incorrect state(%s)\n",
ceph_mds_state_name(state)); ceph_mds_state_name(state));
continue; continue;
} }

View File

@ -16,6 +16,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
struct ceph_metric_read_latency *read; struct ceph_metric_read_latency *read;
struct ceph_metric_write_latency *write; struct ceph_metric_write_latency *write;
struct ceph_metric_metadata_latency *meta; struct ceph_metric_metadata_latency *meta;
struct ceph_metric_dlease *dlease;
struct ceph_client_metric *m = &mdsc->metric; struct ceph_client_metric *m = &mdsc->metric;
u64 nr_caps = atomic64_read(&m->total_caps); u64 nr_caps = atomic64_read(&m->total_caps);
struct ceph_msg *msg; struct ceph_msg *msg;
@ -25,7 +26,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
s32 len; s32 len;
len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write) len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
+ sizeof(*meta); + sizeof(*meta) + sizeof(*dlease);
msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true); msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
if (!msg) { if (!msg) {
@ -42,8 +43,8 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
cap->ver = 1; cap->ver = 1;
cap->compat = 1; cap->compat = 1;
cap->data_len = cpu_to_le32(sizeof(*cap) - 10); cap->data_len = cpu_to_le32(sizeof(*cap) - 10);
cap->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_hit)); cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
cap->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_mis)); cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
cap->total = cpu_to_le64(nr_caps); cap->total = cpu_to_le64(nr_caps);
items++; items++;
@ -83,6 +84,17 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
meta->nsec = cpu_to_le32(ts.tv_nsec); meta->nsec = cpu_to_le32(ts.tv_nsec);
items++; items++;
/* encode the dentry lease metric */
dlease = (struct ceph_metric_dlease *)(meta + 1);
dlease->type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
dlease->ver = 1;
dlease->compat = 1;
dlease->data_len = cpu_to_le32(sizeof(*dlease) - 10);
dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
items++;
put_unaligned_le32(items, &head->num); put_unaligned_le32(items, &head->num);
msg->front.iov_len = len; msg->front.iov_len = len;
msg->hdr.version = cpu_to_le16(1); msg->hdr.version = cpu_to_le16(1);

View File

@ -27,6 +27,7 @@ enum ceph_metric_type {
CLIENT_METRIC_TYPE_READ_LATENCY, \ CLIENT_METRIC_TYPE_READ_LATENCY, \
CLIENT_METRIC_TYPE_WRITE_LATENCY, \ CLIENT_METRIC_TYPE_WRITE_LATENCY, \
CLIENT_METRIC_TYPE_METADATA_LATENCY, \ CLIENT_METRIC_TYPE_METADATA_LATENCY, \
CLIENT_METRIC_TYPE_DENTRY_LEASE, \
\ \
CLIENT_METRIC_TYPE_MAX, \ CLIENT_METRIC_TYPE_MAX, \
} }
@ -80,6 +81,19 @@ struct ceph_metric_metadata_latency {
__le32 nsec; __le32 nsec;
} __packed; } __packed;
/* metric dentry lease header */
struct ceph_metric_dlease {
__le32 type; /* ceph metric type */
__u8 ver;
__u8 compat;
__le32 data_len; /* length of sizeof(hit + mis + total) */
__le64 hit;
__le64 mis;
__le64 total;
} __packed;
struct ceph_metric_head { struct ceph_metric_head {
__le32 num; /* the number of metrics that will be sent */ __le32 num; /* the number of metrics that will be sent */
} __packed; } __packed;

View File

@ -264,7 +264,7 @@ restart:
return NULL; return NULL;
} }
static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new) bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
{ {
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb); struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
struct ceph_snap_realm *old_realm, *new_realm; struct ceph_snap_realm *old_realm, *new_realm;
@ -516,59 +516,3 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
return is_updated; return is_updated;
} }
/*
* ceph_quota_check_rename - check if a rename can be executed
* @mdsc: MDS client instance
* @old: inode to be copied
* @new: destination inode (directory)
*
* This function verifies if a rename (e.g. moving a file or directory) can be
* executed. It forces an rstat update in the @new target directory (and in the
* source @old as well, if it's a directory). The actual check is done both for
* max_files and max_bytes.
*
* This function returns 0 if it's OK to do the rename, or, if quotas are
* exceeded, -EXDEV (if @old is a directory) or -EDQUOT.
*/
int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
struct inode *old, struct inode *new)
{
struct ceph_inode_info *ci_old = ceph_inode(old);
int ret = 0;
if (ceph_quota_is_same_realm(old, new))
return 0;
/*
* Get the latest rstat for target directory (and for source, if a
* directory)
*/
ret = ceph_do_getattr(new, CEPH_STAT_RSTAT, false);
if (ret)
return ret;
if (S_ISDIR(old->i_mode)) {
ret = ceph_do_getattr(old, CEPH_STAT_RSTAT, false);
if (ret)
return ret;
ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
ci_old->i_rbytes);
if (!ret)
ret = check_quota_exceeded(new,
QUOTA_CHECK_MAX_FILES_OP,
ci_old->i_rfiles +
ci_old->i_rsubdirs);
if (ret)
ret = -EXDEV;
} else {
ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
i_size_read(old));
if (!ret)
ret = check_quota_exceeded(new,
QUOTA_CHECK_MAX_FILES_OP, 1);
if (ret)
ret = -EDQUOT;
}
return ret;
}

View File

@ -831,6 +831,13 @@ static void destroy_caches(void)
ceph_fscache_unregister(); ceph_fscache_unregister();
} }
static void __ceph_umount_begin(struct ceph_fs_client *fsc)
{
ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
ceph_mdsc_force_umount(fsc->mdsc);
fsc->filp_gen++; // invalidate open files
}
/* /*
* ceph_umount_begin - initiate forced umount. Tear down the * ceph_umount_begin - initiate forced umount. Tear down the
* mount, skipping steps that may hang while waiting for server(s). * mount, skipping steps that may hang while waiting for server(s).
@ -843,9 +850,7 @@ static void ceph_umount_begin(struct super_block *sb)
if (!fsc) if (!fsc)
return; return;
fsc->mount_state = CEPH_MOUNT_SHUTDOWN; fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
ceph_osdc_abort_requests(&fsc->client->osdc, -EIO); __ceph_umount_begin(fsc);
ceph_mdsc_force_umount(fsc->mdsc);
fsc->filp_gen++; // invalidate open files
} }
static const struct super_operations ceph_super_ops = { static const struct super_operations ceph_super_ops = {
@ -1234,7 +1239,8 @@ int ceph_force_reconnect(struct super_block *sb)
struct ceph_fs_client *fsc = ceph_sb_to_client(sb); struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
int err = 0; int err = 0;
ceph_umount_begin(sb); fsc->mount_state = CEPH_MOUNT_RECOVER;
__ceph_umount_begin(fsc);
/* Make sure all page caches get invalidated. /* Make sure all page caches get invalidated.
* see remove_session_caps_cb() */ * see remove_session_caps_cb() */

View File

@ -106,9 +106,8 @@ struct ceph_fs_client {
struct ceph_mount_options *mount_options; struct ceph_mount_options *mount_options;
struct ceph_client *client; struct ceph_client *client;
unsigned long mount_state; int mount_state;
unsigned long last_auto_reconnect;
bool blocklisted; bool blocklisted;
bool have_copy_from2; bool have_copy_from2;
@ -129,6 +128,7 @@ struct ceph_fs_client {
struct dentry *debugfs_bdi; struct dentry *debugfs_bdi;
struct dentry *debugfs_mdsc, *debugfs_mdsmap; struct dentry *debugfs_mdsc, *debugfs_mdsmap;
struct dentry *debugfs_metric; struct dentry *debugfs_metric;
struct dentry *debugfs_status;
struct dentry *debugfs_mds_sessions; struct dentry *debugfs_mds_sessions;
#endif #endif
@ -1222,14 +1222,13 @@ extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session, struct ceph_mds_session *session,
struct ceph_msg *msg); struct ceph_msg *msg);
extern bool ceph_quota_is_max_files_exceeded(struct inode *inode); extern bool ceph_quota_is_max_files_exceeded(struct inode *inode);
extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new);
extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode, extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode,
loff_t newlen); loff_t newlen);
extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode, extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
loff_t newlen); loff_t newlen);
extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
struct kstatfs *buf); struct kstatfs *buf);
extern int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
struct inode *old, struct inode *new);
extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc); extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
#endif /* _FS_CEPH_SUPER_H */ #endif /* _FS_CEPH_SUPER_H */

View File

@ -42,6 +42,7 @@ struct ceph_vxattr {
#define VXATTR_FLAG_READONLY (1<<0) #define VXATTR_FLAG_READONLY (1<<0)
#define VXATTR_FLAG_HIDDEN (1<<1) #define VXATTR_FLAG_HIDDEN (1<<1)
#define VXATTR_FLAG_RSTAT (1<<2) #define VXATTR_FLAG_RSTAT (1<<2)
#define VXATTR_FLAG_DIRSTAT (1<<3)
/* layouts */ /* layouts */
@ -303,6 +304,36 @@ static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val,
ci->i_snap_btime.tv_nsec); ci->i_snap_btime.tv_nsec);
} }
static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci,
char *val, size_t size)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
return ceph_fmt_xattr(val, size, "%pU", &fsc->client->fsid);
}
static ssize_t ceph_vxattrcb_client_id(struct ceph_inode_info *ci,
char *val, size_t size)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
return ceph_fmt_xattr(val, size, "client%lld",
ceph_client_gid(fsc->client));
}
static ssize_t ceph_vxattrcb_caps(struct ceph_inode_info *ci, char *val,
size_t size)
{
int issued;
spin_lock(&ci->i_ceph_lock);
issued = __ceph_caps_issued(ci, NULL);
spin_unlock(&ci->i_ceph_lock);
return ceph_fmt_xattr(val, size, "%s/0x%x",
ceph_cap_string(issued), issued);
}
#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name #define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
#define CEPH_XATTR_NAME2(_type, _name, _name2) \ #define CEPH_XATTR_NAME2(_type, _name, _name2) \
XATTR_CEPH_PREFIX #_type "." #_name "." #_name2 XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
@ -347,9 +378,9 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
XATTR_LAYOUT_FIELD(dir, layout, object_size), XATTR_LAYOUT_FIELD(dir, layout, object_size),
XATTR_LAYOUT_FIELD(dir, layout, pool), XATTR_LAYOUT_FIELD(dir, layout, pool),
XATTR_LAYOUT_FIELD(dir, layout, pool_namespace), XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
XATTR_NAME_CEPH(dir, entries, 0), XATTR_NAME_CEPH(dir, entries, VXATTR_FLAG_DIRSTAT),
XATTR_NAME_CEPH(dir, files, 0), XATTR_NAME_CEPH(dir, files, VXATTR_FLAG_DIRSTAT),
XATTR_NAME_CEPH(dir, subdirs, 0), XATTR_NAME_CEPH(dir, subdirs, VXATTR_FLAG_DIRSTAT),
XATTR_RSTAT_FIELD(dir, rentries), XATTR_RSTAT_FIELD(dir, rentries),
XATTR_RSTAT_FIELD(dir, rfiles), XATTR_RSTAT_FIELD(dir, rfiles),
XATTR_RSTAT_FIELD(dir, rsubdirs), XATTR_RSTAT_FIELD(dir, rsubdirs),
@ -378,6 +409,13 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
.exists_cb = ceph_vxattrcb_snap_btime_exists, .exists_cb = ceph_vxattrcb_snap_btime_exists,
.flags = VXATTR_FLAG_READONLY, .flags = VXATTR_FLAG_READONLY,
}, },
{
.name = "ceph.caps",
.name_size = sizeof("ceph.caps"),
.getxattr_cb = ceph_vxattrcb_caps,
.exists_cb = NULL,
.flags = VXATTR_FLAG_HIDDEN,
},
{ .name = NULL, 0 } /* Required table terminator */ { .name = NULL, 0 } /* Required table terminator */
}; };
@ -403,6 +441,31 @@ static struct ceph_vxattr ceph_file_vxattrs[] = {
.exists_cb = ceph_vxattrcb_snap_btime_exists, .exists_cb = ceph_vxattrcb_snap_btime_exists,
.flags = VXATTR_FLAG_READONLY, .flags = VXATTR_FLAG_READONLY,
}, },
{
.name = "ceph.caps",
.name_size = sizeof("ceph.caps"),
.getxattr_cb = ceph_vxattrcb_caps,
.exists_cb = NULL,
.flags = VXATTR_FLAG_HIDDEN,
},
{ .name = NULL, 0 } /* Required table terminator */
};
static struct ceph_vxattr ceph_common_vxattrs[] = {
{
.name = "ceph.cluster_fsid",
.name_size = sizeof("ceph.cluster_fsid"),
.getxattr_cb = ceph_vxattrcb_cluster_fsid,
.exists_cb = NULL,
.flags = VXATTR_FLAG_READONLY,
},
{
.name = "ceph.client_id",
.name_size = sizeof("ceph.client_id"),
.getxattr_cb = ceph_vxattrcb_client_id,
.exists_cb = NULL,
.flags = VXATTR_FLAG_READONLY,
},
{ .name = NULL, 0 } /* Required table terminator */ { .name = NULL, 0 } /* Required table terminator */
}; };
@ -428,6 +491,13 @@ static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
} }
} }
vxattr = ceph_common_vxattrs;
while (vxattr->name) {
if (!strcmp(vxattr->name, name))
return vxattr;
vxattr++;
}
return NULL; return NULL;
} }
@ -837,6 +907,8 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
int mask = 0; int mask = 0;
if (vxattr->flags & VXATTR_FLAG_RSTAT) if (vxattr->flags & VXATTR_FLAG_RSTAT)
mask |= CEPH_STAT_RSTAT; mask |= CEPH_STAT_RSTAT;
if (vxattr->flags & VXATTR_FLAG_DIRSTAT)
mask |= CEPH_CAP_FILE_SHARED;
err = ceph_do_getattr(inode, mask, true); err = ceph_do_getattr(inode, mask, true);
if (err) if (err)
return err; return err;
@ -950,6 +1022,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_request *req; struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_pagelist *pagelist = NULL; struct ceph_pagelist *pagelist = NULL;
int op = CEPH_MDS_OP_SETXATTR; int op = CEPH_MDS_OP_SETXATTR;
int err; int err;
@ -988,6 +1061,8 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
if (op == CEPH_MDS_OP_SETXATTR) { if (op == CEPH_MDS_OP_SETXATTR) {
req->r_args.setxattr.flags = cpu_to_le32(flags); req->r_args.setxattr.flags = cpu_to_le32(flags);
req->r_args.setxattr.osdmap_epoch =
cpu_to_le32(osdc->osdmap->epoch);
req->r_pagelist = pagelist; req->r_pagelist = pagelist;
pagelist = NULL; pagelist = NULL;
} }

View File

@ -32,8 +32,6 @@ struct ceph_auth_handshake {
}; };
struct ceph_auth_client_ops { struct ceph_auth_client_ops {
const char *name;
/* /*
* true if we are authenticated and can connect to * true if we are authenticated and can connect to
* services. * services.
@ -53,7 +51,9 @@ struct ceph_auth_client_ops {
*/ */
int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end); int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end);
int (*handle_reply)(struct ceph_auth_client *ac, int result, int (*handle_reply)(struct ceph_auth_client *ac, int result,
void *buf, void *end); void *buf, void *end, u8 *session_key,
int *session_key_len, u8 *con_secret,
int *con_secret_len);
/* /*
* Create authorizer for connecting to a service, and verify * Create authorizer for connecting to a service, and verify
@ -69,7 +69,10 @@ struct ceph_auth_client_ops {
void *challenge_buf, void *challenge_buf,
int challenge_buf_len); int challenge_buf_len);
int (*verify_authorizer_reply)(struct ceph_auth_client *ac, int (*verify_authorizer_reply)(struct ceph_auth_client *ac,
struct ceph_authorizer *a); struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
void (*invalidate_authorizer)(struct ceph_auth_client *ac, void (*invalidate_authorizer)(struct ceph_auth_client *ac,
int peer_type); int peer_type);
@ -95,11 +98,15 @@ struct ceph_auth_client {
const struct ceph_crypto_key *key; /* our secret key */ const struct ceph_crypto_key *key; /* our secret key */
unsigned want_keys; /* which services we want */ unsigned want_keys; /* which services we want */
int preferred_mode; /* CEPH_CON_MODE_* */
int fallback_mode; /* ditto */
struct mutex mutex; struct mutex mutex;
}; };
extern struct ceph_auth_client *ceph_auth_init(const char *name, struct ceph_auth_client *ceph_auth_init(const char *name,
const struct ceph_crypto_key *key); const struct ceph_crypto_key *key,
const int *con_modes);
extern void ceph_auth_destroy(struct ceph_auth_client *ac); extern void ceph_auth_destroy(struct ceph_auth_client *ac);
extern void ceph_auth_reset(struct ceph_auth_client *ac); extern void ceph_auth_reset(struct ceph_auth_client *ac);
@ -113,21 +120,22 @@ int ceph_auth_entity_name_encode(const char *name, void **p, void *end);
extern int ceph_build_auth(struct ceph_auth_client *ac, extern int ceph_build_auth(struct ceph_auth_client *ac,
void *msg_buf, size_t msg_len); void *msg_buf, size_t msg_len);
extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac); extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac);
extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac,
int peer_type, int __ceph_auth_get_authorizer(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth); struct ceph_auth_handshake *auth,
int peer_type, bool force_new,
int *proto, int *pref_mode, int *fallb_mode);
void ceph_auth_destroy_authorizer(struct ceph_authorizer *a); void ceph_auth_destroy_authorizer(struct ceph_authorizer *a);
extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *a);
int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a, struct ceph_authorizer *a,
void *challenge_buf, void *challenge_buf,
int challenge_buf_len); int challenge_buf_len);
extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a); struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
int peer_type); int peer_type);
@ -147,4 +155,34 @@ int ceph_auth_check_message_signature(struct ceph_auth_handshake *auth,
return auth->check_message_signature(auth, msg); return auth->check_message_signature(auth, msg);
return 0; return 0;
} }
int ceph_auth_get_request(struct ceph_auth_client *ac, void *buf, int buf_len);
int ceph_auth_handle_reply_more(struct ceph_auth_client *ac, void *reply,
int reply_len, void *buf, int buf_len);
int ceph_auth_handle_reply_done(struct ceph_auth_client *ac,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
bool ceph_auth_handle_bad_method(struct ceph_auth_client *ac,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
int ceph_auth_get_authorizer(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
int peer_type, void *buf, int *buf_len);
int ceph_auth_handle_svc_reply_more(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
void *buf, int *buf_len);
int ceph_auth_handle_svc_reply_done(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
bool ceph_auth_handle_bad_authorizer(struct ceph_auth_client *ac,
int peer_type, int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
#endif #endif

View File

@ -8,7 +8,8 @@
* feature. Base case is 1 (first use). * feature. Base case is 1 (first use).
*/ */
#define CEPH_FEATURE_INCARNATION_1 (0ull) #define CEPH_FEATURE_INCARNATION_1 (0ull)
#define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL #define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // SERVER_JEWEL
#define CEPH_FEATURE_INCARNATION_3 ((1ull<<57)|(1ull<<28)) // SERVER_MIMIC
#define DEFINE_CEPH_FEATURE(bit, incarnation, name) \ #define DEFINE_CEPH_FEATURE(bit, incarnation, name) \
static const uint64_t __maybe_unused CEPH_FEATURE_##name = (1ULL<<bit); \ static const uint64_t __maybe_unused CEPH_FEATURE_##name = (1ULL<<bit); \
@ -75,7 +76,7 @@
DEFINE_CEPH_FEATURE( 0, 1, UID) DEFINE_CEPH_FEATURE( 0, 1, UID)
DEFINE_CEPH_FEATURE( 1, 1, NOSRCADDR) DEFINE_CEPH_FEATURE( 1, 1, NOSRCADDR)
DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS) DEFINE_CEPH_FEATURE_RETIRED( 2, 1, MONCLOCKCHECK, JEWEL, LUMINOUS)
DEFINE_CEPH_FEATURE( 2, 3, SERVER_NAUTILUS)
DEFINE_CEPH_FEATURE( 3, 1, FLOCK) DEFINE_CEPH_FEATURE( 3, 1, FLOCK)
DEFINE_CEPH_FEATURE( 4, 1, SUBSCRIBE2) DEFINE_CEPH_FEATURE( 4, 1, SUBSCRIBE2)
DEFINE_CEPH_FEATURE( 5, 1, MONNAMES) DEFINE_CEPH_FEATURE( 5, 1, MONNAMES)
@ -114,7 +115,7 @@ DEFINE_CEPH_FEATURE(25, 1, CRUSH_TUNABLES2)
DEFINE_CEPH_FEATURE(26, 1, CREATEPOOLID) DEFINE_CEPH_FEATURE(26, 1, CREATEPOOLID)
DEFINE_CEPH_FEATURE(27, 1, REPLY_CREATE_INODE) DEFINE_CEPH_FEATURE(27, 1, REPLY_CREATE_INODE)
DEFINE_CEPH_FEATURE_RETIRED(28, 1, OSD_HBMSGS, HAMMER, JEWEL) DEFINE_CEPH_FEATURE_RETIRED(28, 1, OSD_HBMSGS, HAMMER, JEWEL)
DEFINE_CEPH_FEATURE(28, 2, SERVER_M) DEFINE_CEPH_FEATURE(28, 2, SERVER_MIMIC)
DEFINE_CEPH_FEATURE(29, 1, MDSENC) DEFINE_CEPH_FEATURE(29, 1, MDSENC)
DEFINE_CEPH_FEATURE(30, 1, OSDHASHPSPOOL) DEFINE_CEPH_FEATURE(30, 1, OSDHASHPSPOOL)
DEFINE_CEPH_FEATURE(31, 1, MON_SINGLE_PAXOS) // deprecate me DEFINE_CEPH_FEATURE(31, 1, MON_SINGLE_PAXOS) // deprecate me
@ -177,13 +178,16 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
*/ */
#define CEPH_FEATURES_SUPPORTED_DEFAULT \ #define CEPH_FEATURES_SUPPORTED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \ (CEPH_FEATURE_NOSRCADDR | \
CEPH_FEATURE_SERVER_NAUTILUS | \
CEPH_FEATURE_FLOCK | \ CEPH_FEATURE_FLOCK | \
CEPH_FEATURE_SUBSCRIBE2 | \ CEPH_FEATURE_SUBSCRIBE2 | \
CEPH_FEATURE_MONNAMES | \
CEPH_FEATURE_RECONNECT_SEQ | \ CEPH_FEATURE_RECONNECT_SEQ | \
CEPH_FEATURE_DIRLAYOUTHASH | \ CEPH_FEATURE_DIRLAYOUTHASH | \
CEPH_FEATURE_PGID64 | \ CEPH_FEATURE_PGID64 | \
CEPH_FEATURE_PGPOOL3 | \ CEPH_FEATURE_PGPOOL3 | \
CEPH_FEATURE_OSDENC | \ CEPH_FEATURE_OSDENC | \
CEPH_FEATURE_MONENC | \
CEPH_FEATURE_CRUSH_TUNABLES | \ CEPH_FEATURE_CRUSH_TUNABLES | \
CEPH_FEATURE_SERVER_LUMINOUS | \ CEPH_FEATURE_SERVER_LUMINOUS | \
CEPH_FEATURE_RESEND_ON_SPLIT | \ CEPH_FEATURE_RESEND_ON_SPLIT | \
@ -193,6 +197,7 @@ DEFINE_CEPH_FEATURE_DEPRECATED(63, 1, RESERVED_BROKEN, LUMINOUS) // client-facin
CEPH_FEATURE_MSG_AUTH | \ CEPH_FEATURE_MSG_AUTH | \
CEPH_FEATURE_CRUSH_TUNABLES2 | \ CEPH_FEATURE_CRUSH_TUNABLES2 | \
CEPH_FEATURE_REPLY_CREATE_INODE | \ CEPH_FEATURE_REPLY_CREATE_INODE | \
CEPH_FEATURE_SERVER_MIMIC | \
CEPH_FEATURE_MDSENC | \ CEPH_FEATURE_MDSENC | \
CEPH_FEATURE_OSDHASHPSPOOL | \ CEPH_FEATURE_OSDHASHPSPOOL | \
CEPH_FEATURE_OSD_CACHEPOOL | \ CEPH_FEATURE_OSD_CACHEPOOL | \

View File

@ -93,8 +93,19 @@ struct ceph_dir_layout {
#define CEPH_AUTH_NONE 0x1 #define CEPH_AUTH_NONE 0x1
#define CEPH_AUTH_CEPHX 0x2 #define CEPH_AUTH_CEPHX 0x2
#define CEPH_AUTH_MODE_NONE 0
#define CEPH_AUTH_MODE_AUTHORIZER 1
#define CEPH_AUTH_MODE_MON 10
/* msgr2 protocol modes */
#define CEPH_CON_MODE_UNKNOWN 0x0
#define CEPH_CON_MODE_CRC 0x1
#define CEPH_CON_MODE_SECURE 0x2
#define CEPH_AUTH_UID_DEFAULT ((__u64) -1) #define CEPH_AUTH_UID_DEFAULT ((__u64) -1)
const char *ceph_auth_proto_name(int proto);
const char *ceph_con_mode_name(int mode);
/********************************************* /*********************************************
* message layer * message layer
@ -424,6 +435,7 @@ union ceph_mds_request_args {
} __attribute__ ((packed)) open; } __attribute__ ((packed)) open;
struct { struct {
__le32 flags; __le32 flags;
__le32 osdmap_epoch; /* used for setting file/dir layouts */
} __attribute__ ((packed)) setxattr; } __attribute__ ((packed)) setxattr;
struct { struct {
struct ceph_file_layout_legacy layout; struct ceph_file_layout_legacy layout;
@ -445,11 +457,25 @@ union ceph_mds_request_args {
} __attribute__ ((packed)) lookupino; } __attribute__ ((packed)) lookupino;
} __attribute__ ((packed)); } __attribute__ ((packed));
union ceph_mds_request_args_ext {
union ceph_mds_request_args old;
struct {
__le32 mode;
__le32 uid;
__le32 gid;
struct ceph_timespec mtime;
struct ceph_timespec atime;
__le64 size, old_size; /* old_size needed by truncate */
__le32 mask; /* CEPH_SETATTR_* */
struct ceph_timespec btime;
} __attribute__ ((packed)) setattr_ext;
};
#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ #define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */
#define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */ #define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */
struct ceph_mds_request_head { struct ceph_mds_request_head_old {
__le64 oldest_client_tid; __le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */ __le32 mdsmap_epoch; /* on client */
__le32 flags; /* CEPH_MDS_FLAG_* */ __le32 flags; /* CEPH_MDS_FLAG_* */
@ -462,6 +488,22 @@ struct ceph_mds_request_head {
union ceph_mds_request_args args; union ceph_mds_request_args args;
} __attribute__ ((packed)); } __attribute__ ((packed));
#define CEPH_MDS_REQUEST_HEAD_VERSION 1
struct ceph_mds_request_head {
__le16 version; /* struct version */
__le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */
__le32 flags; /* CEPH_MDS_FLAG_* */
__u8 num_retry, num_fwd; /* count retry, fwd attempts */
__le16 num_releases; /* # include cap/lease release records */
__le32 op; /* mds op code */
__le32 caller_uid, caller_gid;
__le64 ino; /* use this ino for openc, mkdir, mknod,
etc. (if replaying) */
union ceph_mds_request_args_ext args;
} __attribute__ ((packed));
/* cap/lease release record */ /* cap/lease release record */
struct ceph_mds_request_release { struct ceph_mds_request_release {
__le64 ino, cap_id; /* ino and unique cap id */ __le64 ino, cap_id; /* ino and unique cap id */

View File

@ -220,6 +220,8 @@ static inline void ceph_encode_timespec64(struct ceph_timespec *tv,
*/ */
#define CEPH_ENTITY_ADDR_TYPE_NONE 0 #define CEPH_ENTITY_ADDR_TYPE_NONE 0
#define CEPH_ENTITY_ADDR_TYPE_LEGACY __cpu_to_le32(1) #define CEPH_ENTITY_ADDR_TYPE_LEGACY __cpu_to_le32(1)
#define CEPH_ENTITY_ADDR_TYPE_MSGR2 __cpu_to_le32(2)
#define CEPH_ENTITY_ADDR_TYPE_ANY __cpu_to_le32(3)
static inline void ceph_encode_banner_addr(struct ceph_entity_addr *a) static inline void ceph_encode_banner_addr(struct ceph_entity_addr *a)
{ {
@ -239,6 +241,12 @@ static inline void ceph_decode_banner_addr(struct ceph_entity_addr *a)
extern int ceph_decode_entity_addr(void **p, void *end, extern int ceph_decode_entity_addr(void **p, void *end,
struct ceph_entity_addr *addr); struct ceph_entity_addr *addr);
int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr);
int ceph_entity_addr_encoding_len(const struct ceph_entity_addr *addr);
void ceph_encode_entity_addr(void **p, const struct ceph_entity_addr *addr);
/* /*
* encoders * encoders
*/ */

View File

@ -31,10 +31,10 @@
#define CEPH_OPT_FSID (1<<0) #define CEPH_OPT_FSID (1<<0)
#define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */
#define CEPH_OPT_MYIP (1<<2) /* specified my ip */ #define CEPH_OPT_MYIP (1<<2) /* specified my ip */
#define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */ #define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes (msgr1) */
#define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */ #define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */
#define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */ #define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */
#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs */ #define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs (msgr1) */
#define CEPH_OPT_ABORT_ON_FULL (1<<7) /* abort w/ ENOSPC when full */ #define CEPH_OPT_ABORT_ON_FULL (1<<7) /* abort w/ ENOSPC when full */
#define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY) #define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY)
@ -53,6 +53,7 @@ struct ceph_options {
unsigned long osd_keepalive_timeout; /* jiffies */ unsigned long osd_keepalive_timeout; /* jiffies */
unsigned long osd_request_timeout; /* jiffies */ unsigned long osd_request_timeout; /* jiffies */
u32 read_from_replica; /* CEPH_OSD_FLAG_BALANCE/LOCALIZE_READS */ u32 read_from_replica; /* CEPH_OSD_FLAG_BALANCE/LOCALIZE_READS */
int con_modes[2]; /* CEPH_CON_MODE_* */
/* /*
* any type that can't be simply compared or doesn't need * any type that can't be simply compared or doesn't need
@ -83,6 +84,7 @@ struct ceph_options {
#define CEPH_MONC_HUNT_BACKOFF 2 #define CEPH_MONC_HUNT_BACKOFF 2
#define CEPH_MONC_HUNT_MAX_MULT 10 #define CEPH_MONC_HUNT_MAX_MULT 10
#define CEPH_MSG_MAX_CONTROL_LEN (16*1024*1024)
#define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024)
#define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024) #define CEPH_MSG_MAX_MIDDLE_LEN (16*1024*1024)
@ -104,6 +106,7 @@ enum {
CEPH_MOUNT_UNMOUNTING, CEPH_MOUNT_UNMOUNTING,
CEPH_MOUNT_UNMOUNTED, CEPH_MOUNT_UNMOUNTED,
CEPH_MOUNT_SHUTDOWN, CEPH_MOUNT_SHUTDOWN,
CEPH_MOUNT_RECOVER,
}; };
static inline unsigned long ceph_timeout_jiffies(unsigned long timeout) static inline unsigned long ceph_timeout_jiffies(unsigned long timeout)
@ -150,6 +153,10 @@ struct ceph_client {
#define from_msgr(ms) container_of(ms, struct ceph_client, msgr) #define from_msgr(ms) container_of(ms, struct ceph_client, msgr)
static inline bool ceph_msgr2(struct ceph_client *client)
{
return client->options->con_modes[0] != CEPH_CON_MODE_UNKNOWN;
}
/* /*
* snapshots * snapshots

View File

@ -64,7 +64,7 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
} }
extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2);
extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m); extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m);

View File

@ -3,6 +3,7 @@
#define __FS_CEPH_MESSENGER_H #define __FS_CEPH_MESSENGER_H
#include <linux/bvec.h> #include <linux/bvec.h>
#include <linux/crypto.h>
#include <linux/kref.h> #include <linux/kref.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/net.h> #include <linux/net.h>
@ -52,6 +53,23 @@ struct ceph_connection_operations {
int (*sign_message) (struct ceph_msg *msg); int (*sign_message) (struct ceph_msg *msg);
int (*check_message_signature) (struct ceph_msg *msg); int (*check_message_signature) (struct ceph_msg *msg);
/* msgr2 authentication exchange */
int (*get_auth_request)(struct ceph_connection *con,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len);
int (*handle_auth_reply_more)(struct ceph_connection *con,
void *reply, int reply_len,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len);
int (*handle_auth_done)(struct ceph_connection *con,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len);
int (*handle_auth_bad_method)(struct ceph_connection *con,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
}; };
/* use format string %s%lld */ /* use format string %s%lld */
@ -235,14 +253,171 @@ struct ceph_msg {
bool more_to_follow; bool more_to_follow;
bool needs_out_seq; bool needs_out_seq;
int front_alloc_len; int front_alloc_len;
unsigned long ack_stamp; /* tx: when we were acked */
struct ceph_msgpool *pool; struct ceph_msgpool *pool;
}; };
/*
* connection states
*/
#define CEPH_CON_S_CLOSED 1
#define CEPH_CON_S_PREOPEN 2
#define CEPH_CON_S_V1_BANNER 3
#define CEPH_CON_S_V1_CONNECT_MSG 4
#define CEPH_CON_S_V2_BANNER_PREFIX 5
#define CEPH_CON_S_V2_BANNER_PAYLOAD 6
#define CEPH_CON_S_V2_HELLO 7
#define CEPH_CON_S_V2_AUTH 8
#define CEPH_CON_S_V2_AUTH_SIGNATURE 9
#define CEPH_CON_S_V2_SESSION_CONNECT 10
#define CEPH_CON_S_V2_SESSION_RECONNECT 11
#define CEPH_CON_S_OPEN 12
#define CEPH_CON_S_STANDBY 13
/*
* ceph_connection flag bits
*/
#define CEPH_CON_F_LOSSYTX 0 /* we can close channel or drop
messages on errors */
#define CEPH_CON_F_KEEPALIVE_PENDING 1 /* we need to send a keepalive */
#define CEPH_CON_F_WRITE_PENDING 2 /* we have data ready to send */
#define CEPH_CON_F_SOCK_CLOSED 3 /* socket state changed to closed */
#define CEPH_CON_F_BACKOFF 4 /* need to retry queuing delayed
work */
/* ceph connection fault delay defaults, for exponential backoff */ /* ceph connection fault delay defaults, for exponential backoff */
#define BASE_DELAY_INTERVAL (HZ/2) #define BASE_DELAY_INTERVAL (HZ / 4)
#define MAX_DELAY_INTERVAL (5 * 60 * HZ) #define MAX_DELAY_INTERVAL (15 * HZ)
struct ceph_connection_v1_info {
struct kvec out_kvec[8], /* sending header/footer data */
*out_kvec_cur;
int out_kvec_left; /* kvec's left in out_kvec */
int out_skip; /* skip this many bytes */
int out_kvec_bytes; /* total bytes left */
bool out_more; /* there is more data after the kvecs */
bool out_msg_done;
struct ceph_auth_handshake *auth;
int auth_retry; /* true if we need a newer authorizer */
/* connection negotiation temps */
u8 in_banner[CEPH_BANNER_MAX_LEN];
struct ceph_entity_addr actual_peer_addr;
struct ceph_entity_addr peer_addr_for_me;
struct ceph_msg_connect out_connect;
struct ceph_msg_connect_reply in_reply;
int in_base_pos; /* bytes read */
/* message in temps */
u8 in_tag; /* protocol control byte */
struct ceph_msg_header in_hdr;
__le64 in_temp_ack; /* for reading an ack */
/* message out temps */
struct ceph_msg_header out_hdr;
__le64 out_temp_ack; /* for writing an ack */
struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
stamp */
u32 connect_seq; /* identify the most recent connection
attempt for this session */
u32 peer_global_seq; /* peer's global seq for this connection */
};
#define CEPH_CRC_LEN 4
#define CEPH_GCM_KEY_LEN 16
#define CEPH_GCM_IV_LEN sizeof(struct ceph_gcm_nonce)
#define CEPH_GCM_BLOCK_LEN 16
#define CEPH_GCM_TAG_LEN 16
#define CEPH_PREAMBLE_LEN 32
#define CEPH_PREAMBLE_INLINE_LEN 48
#define CEPH_PREAMBLE_PLAIN_LEN CEPH_PREAMBLE_LEN
#define CEPH_PREAMBLE_SECURE_LEN (CEPH_PREAMBLE_LEN + \
CEPH_PREAMBLE_INLINE_LEN + \
CEPH_GCM_TAG_LEN)
#define CEPH_EPILOGUE_PLAIN_LEN (1 + 3 * CEPH_CRC_LEN)
#define CEPH_EPILOGUE_SECURE_LEN (CEPH_GCM_BLOCK_LEN + CEPH_GCM_TAG_LEN)
#define CEPH_FRAME_MAX_SEGMENT_COUNT 4
struct ceph_frame_desc {
int fd_tag; /* FRAME_TAG_* */
int fd_seg_cnt;
int fd_lens[CEPH_FRAME_MAX_SEGMENT_COUNT]; /* logical */
int fd_aligns[CEPH_FRAME_MAX_SEGMENT_COUNT];
};
struct ceph_gcm_nonce {
__le32 fixed;
__le64 counter __packed;
};
struct ceph_connection_v2_info {
struct iov_iter in_iter;
struct kvec in_kvecs[5]; /* recvmsg */
struct bio_vec in_bvec; /* recvmsg (in_cursor) */
int in_kvec_cnt;
int in_state; /* IN_S_* */
struct iov_iter out_iter;
struct kvec out_kvecs[8]; /* sendmsg */
struct bio_vec out_bvec; /* sendpage (out_cursor, out_zero),
sendmsg (out_enc_pages) */
int out_kvec_cnt;
int out_state; /* OUT_S_* */
int out_zero; /* # of zero bytes to send */
bool out_iter_sendpage; /* use sendpage if possible */
struct ceph_frame_desc in_desc;
struct ceph_msg_data_cursor in_cursor;
struct ceph_msg_data_cursor out_cursor;
struct crypto_shash *hmac_tfm; /* post-auth signature */
struct crypto_aead *gcm_tfm; /* on-wire encryption */
struct aead_request *gcm_req;
struct crypto_wait gcm_wait;
struct ceph_gcm_nonce in_gcm_nonce;
struct ceph_gcm_nonce out_gcm_nonce;
struct page **out_enc_pages;
int out_enc_page_cnt;
int out_enc_resid;
int out_enc_i;
int con_mode; /* CEPH_CON_MODE_* */
void *conn_bufs[16];
int conn_buf_cnt;
struct kvec in_sign_kvecs[8];
struct kvec out_sign_kvecs[8];
int in_sign_kvec_cnt;
int out_sign_kvec_cnt;
u64 client_cookie;
u64 server_cookie;
u64 global_seq;
u64 connect_seq;
u64 peer_global_seq;
u8 in_buf[CEPH_PREAMBLE_SECURE_LEN];
u8 out_buf[CEPH_PREAMBLE_SECURE_LEN];
struct {
u8 late_status; /* FRAME_LATE_STATUS_* */
union {
struct {
u32 front_crc;
u32 middle_crc;
u32 data_crc;
} __packed;
u8 pad[CEPH_GCM_BLOCK_LEN - 1];
};
} out_epil;
};
/* /*
* A single connection with another host. * A single connection with another host.
@ -258,24 +433,16 @@ struct ceph_connection {
struct ceph_messenger *msgr; struct ceph_messenger *msgr;
int state; /* CEPH_CON_S_* */
atomic_t sock_state; atomic_t sock_state;
struct socket *sock; struct socket *sock;
struct ceph_entity_addr peer_addr; /* peer address */
struct ceph_entity_addr peer_addr_for_me;
unsigned long flags; unsigned long flags; /* CEPH_CON_F_* */
unsigned long state;
const char *error_msg; /* error message, if any */ const char *error_msg; /* error message, if any */
struct ceph_entity_name peer_name; /* peer name */ struct ceph_entity_name peer_name; /* peer name */
struct ceph_entity_addr peer_addr; /* peer address */
u64 peer_features; u64 peer_features;
u32 connect_seq; /* identify the most recent connection
attempt for this connection, client */
u32 peer_global_seq; /* peer's global seq for this connection */
struct ceph_auth_handshake *auth;
int auth_retry; /* true if we need a newer authorizer */
struct mutex mutex; struct mutex mutex;
@ -286,43 +453,80 @@ struct ceph_connection {
u64 in_seq, in_seq_acked; /* last message received, acked */ u64 in_seq, in_seq_acked; /* last message received, acked */
/* connection negotiation temps */ struct ceph_msg *in_msg;
char in_banner[CEPH_BANNER_MAX_LEN];
struct ceph_msg_connect out_connect;
struct ceph_msg_connect_reply in_reply;
struct ceph_entity_addr actual_peer_addr;
/* message out temps */
struct ceph_msg_header out_hdr;
struct ceph_msg *out_msg; /* sending message (== tail of struct ceph_msg *out_msg; /* sending message (== tail of
out_sent) */ out_sent) */
bool out_msg_done;
struct kvec out_kvec[8], /* sending header/footer data */
*out_kvec_cur;
int out_kvec_left; /* kvec's left in out_kvec */
int out_skip; /* skip this many bytes */
int out_kvec_bytes; /* total bytes left */
int out_more; /* there is more data after the kvecs */
__le64 out_temp_ack; /* for writing an ack */
struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2
stamp */
/* message in temps */
struct ceph_msg_header in_hdr;
struct ceph_msg *in_msg;
u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */ u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */
char in_tag; /* protocol control byte */
int in_base_pos; /* bytes read */
__le64 in_temp_ack; /* for reading an ack */
struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */ struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */
struct delayed_work work; /* send|recv work */ struct delayed_work work; /* send|recv work */
unsigned long delay; /* current delay interval */ unsigned long delay; /* current delay interval */
union {
struct ceph_connection_v1_info v1;
struct ceph_connection_v2_info v2;
};
}; };
extern struct page *ceph_zero_page;
void ceph_con_flag_clear(struct ceph_connection *con, unsigned long con_flag);
void ceph_con_flag_set(struct ceph_connection *con, unsigned long con_flag);
bool ceph_con_flag_test(struct ceph_connection *con, unsigned long con_flag);
bool ceph_con_flag_test_and_clear(struct ceph_connection *con,
unsigned long con_flag);
bool ceph_con_flag_test_and_set(struct ceph_connection *con,
unsigned long con_flag);
void ceph_encode_my_addr(struct ceph_messenger *msgr);
int ceph_tcp_connect(struct ceph_connection *con);
int ceph_con_close_socket(struct ceph_connection *con);
void ceph_con_reset_session(struct ceph_connection *con);
u32 ceph_get_global_seq(struct ceph_messenger *msgr, u32 gt);
void ceph_con_discard_sent(struct ceph_connection *con, u64 ack_seq);
void ceph_con_discard_requeued(struct ceph_connection *con, u64 reconnect_seq);
void ceph_msg_data_cursor_init(struct ceph_msg_data_cursor *cursor,
struct ceph_msg *msg, size_t length);
struct page *ceph_msg_data_next(struct ceph_msg_data_cursor *cursor,
size_t *page_offset, size_t *length,
bool *last_piece);
void ceph_msg_data_advance(struct ceph_msg_data_cursor *cursor, size_t bytes);
u32 ceph_crc32c_page(u32 crc, struct page *page, unsigned int page_offset,
unsigned int length);
bool ceph_addr_is_blank(const struct ceph_entity_addr *addr);
int ceph_addr_port(const struct ceph_entity_addr *addr);
void ceph_addr_set_port(struct ceph_entity_addr *addr, int p);
void ceph_con_process_message(struct ceph_connection *con);
int ceph_con_in_msg_alloc(struct ceph_connection *con,
struct ceph_msg_header *hdr, int *skip);
void ceph_con_get_out_msg(struct ceph_connection *con);
/* messenger_v1.c */
int ceph_con_v1_try_read(struct ceph_connection *con);
int ceph_con_v1_try_write(struct ceph_connection *con);
void ceph_con_v1_revoke(struct ceph_connection *con);
void ceph_con_v1_revoke_incoming(struct ceph_connection *con);
bool ceph_con_v1_opened(struct ceph_connection *con);
void ceph_con_v1_reset_session(struct ceph_connection *con);
void ceph_con_v1_reset_protocol(struct ceph_connection *con);
/* messenger_v2.c */
int ceph_con_v2_try_read(struct ceph_connection *con);
int ceph_con_v2_try_write(struct ceph_connection *con);
void ceph_con_v2_revoke(struct ceph_connection *con);
void ceph_con_v2_revoke_incoming(struct ceph_connection *con);
bool ceph_con_v2_opened(struct ceph_connection *con);
void ceph_con_v2_reset_session(struct ceph_connection *con);
void ceph_con_v2_reset_protocol(struct ceph_connection *con);
extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr); extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr);
@ -330,7 +534,6 @@ extern int ceph_parse_ips(const char *c, const char *end,
struct ceph_entity_addr *addr, struct ceph_entity_addr *addr,
int max_count, int *count); int max_count, int *count);
extern int ceph_msgr_init(void); extern int ceph_msgr_init(void);
extern void ceph_msgr_exit(void); extern void ceph_msgr_exit(void);
extern void ceph_msgr_flush(void); extern void ceph_msgr_flush(void);

View File

@ -8,24 +8,45 @@
#define CEPH_MON_PORT 6789 /* default monitor port */ #define CEPH_MON_PORT 6789 /* default monitor port */
/*
* client-side processes will try to bind to ports in this
* range, simply for the benefit of tools like nmap or wireshark
* that would like to identify the protocol.
*/
#define CEPH_PORT_FIRST 6789
#define CEPH_PORT_START 6800 /* non-monitors start here */
#define CEPH_PORT_LAST 6900
/* /*
* tcp connection banner. include a protocol version. and adjust * tcp connection banner. include a protocol version. and adjust
* whenever the wire protocol changes. try to keep this string length * whenever the wire protocol changes. try to keep this string length
* constant. * constant.
*/ */
#define CEPH_BANNER "ceph v027" #define CEPH_BANNER "ceph v027"
#define CEPH_BANNER_LEN 9
#define CEPH_BANNER_MAX_LEN 30 #define CEPH_BANNER_MAX_LEN 30
/*
* messenger V2 connection banner prefix.
* The full banner string should have the form: "ceph v2\n<le16>"
* the 2 bytes are the length of the remaining banner.
*/
#define CEPH_BANNER_V2 "ceph v2\n"
#define CEPH_BANNER_V2_LEN 8
#define CEPH_BANNER_V2_PREFIX_LEN (CEPH_BANNER_V2_LEN + sizeof(__le16))
/*
* messenger V2 features
*/
#define CEPH_MSGR2_INCARNATION_1 (0ull)
#define DEFINE_MSGR2_FEATURE(bit, incarnation, name) \
static const uint64_t CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \
static const uint64_t CEPH_MSGR2_FEATUREMASK_##name = \
(1ULL << bit | CEPH_MSGR2_INCARNATION_##incarnation);
#define HAVE_MSGR2_FEATURE(x, name) \
(((x) & (CEPH_MSGR2_FEATUREMASK_##name)) == (CEPH_MSGR2_FEATUREMASK_##name))
DEFINE_MSGR2_FEATURE( 0, 1, REVISION_1) // msgr2.1
#define CEPH_MSGR2_SUPPORTED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1)
#define CEPH_MSGR2_REQUIRED_FEATURES (CEPH_MSGR2_FEATURE_REVISION_1)
/* /*
* Rollover-safe type and comparator for 32-bit sequence numbers. * Rollover-safe type and comparator for 32-bit sequence numbers.
* Comparator returns -1, 0, or 1. * Comparator returns -1, 0, or 1.
@ -61,11 +82,18 @@ extern const char *ceph_entity_type_name(int type);
* entity_addr -- network address * entity_addr -- network address
*/ */
struct ceph_entity_addr { struct ceph_entity_addr {
__le32 type; __le32 type; /* CEPH_ENTITY_ADDR_TYPE_* */
__le32 nonce; /* unique id for process (e.g. pid) */ __le32 nonce; /* unique id for process (e.g. pid) */
struct sockaddr_storage in_addr; struct sockaddr_storage in_addr;
} __attribute__ ((packed)); } __attribute__ ((packed));
static inline bool ceph_addr_equal_no_type(const struct ceph_entity_addr *lhs,
const struct ceph_entity_addr *rhs)
{
return !memcmp(&lhs->in_addr, &rhs->in_addr, sizeof(lhs->in_addr)) &&
lhs->nonce == rhs->nonce;
}
struct ceph_entity_inst { struct ceph_entity_inst {
struct ceph_entity_name name; struct ceph_entity_name name;
struct ceph_entity_addr addr; struct ceph_entity_addr addr;
@ -160,6 +188,24 @@ struct ceph_msg_header {
__le32 crc; /* header crc32c */ __le32 crc; /* header crc32c */
} __attribute__ ((packed)); } __attribute__ ((packed));
struct ceph_msg_header2 {
__le64 seq; /* message seq# for this session */
__le64 tid; /* transaction id */
__le16 type; /* message type */
__le16 priority; /* priority. higher value == higher priority */
__le16 version; /* version of message encoding */
__le32 data_pre_padding_len;
__le16 data_off; /* sender: include full offset;
receiver: mask against ~PAGE_MASK */
__le64 ack_seq;
__u8 flags;
/* oldest code we think can decode this. unknown if zero. */
__le16 compat_version;
__le16 reserved;
} __attribute__ ((packed));
#define CEPH_MSG_PRIO_LOW 64 #define CEPH_MSG_PRIO_LOW 64
#define CEPH_MSG_PRIO_DEFAULT 127 #define CEPH_MSG_PRIO_DEFAULT 127
#define CEPH_MSG_PRIO_HIGH 196 #define CEPH_MSG_PRIO_HIGH 196

View File

@ -251,8 +251,8 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
} }
struct ceph_osdmap *ceph_osdmap_alloc(void); struct ceph_osdmap *ceph_osdmap_alloc(void);
extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end); struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2);
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
struct ceph_osdmap *map); struct ceph_osdmap *map);
extern void ceph_osdmap_destroy(struct ceph_osdmap *map); extern void ceph_osdmap_destroy(struct ceph_osdmap *map);

View File

@ -5,6 +5,9 @@ config CEPH_LIB
select LIBCRC32C select LIBCRC32C
select CRYPTO_AES select CRYPTO_AES
select CRYPTO_CBC select CRYPTO_CBC
select CRYPTO_GCM
select CRYPTO_HMAC
select CRYPTO_SHA256
select CRYPTO select CRYPTO
select KEYS select KEYS
default n default n

View File

@ -14,4 +14,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \
crypto.o armor.o \ crypto.o armor.o \
auth_x.o \ auth_x.o \
ceph_strings.o ceph_hash.o \ ceph_strings.o ceph_hash.o \
pagevec.o snapshot.o string_table.o pagevec.o snapshot.o string_table.o \
messenger_v1.o messenger_v2.o

View File

@ -21,28 +21,31 @@ static u32 supported_protocols[] = {
CEPH_AUTH_CEPHX CEPH_AUTH_CEPHX
}; };
static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) static int init_protocol(struct ceph_auth_client *ac, int proto)
{ {
switch (protocol) { dout("%s proto %d\n", __func__, proto);
switch (proto) {
case CEPH_AUTH_NONE: case CEPH_AUTH_NONE:
return ceph_auth_none_init(ac); return ceph_auth_none_init(ac);
case CEPH_AUTH_CEPHX: case CEPH_AUTH_CEPHX:
return ceph_x_init(ac); return ceph_x_init(ac);
default: default:
return -ENOENT; pr_err("bad auth protocol %d\n", proto);
return -EINVAL;
} }
} }
/* /*
* setup, teardown. * setup, teardown.
*/ */
struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_crypto_key *key) struct ceph_auth_client *ceph_auth_init(const char *name,
const struct ceph_crypto_key *key,
const int *con_modes)
{ {
struct ceph_auth_client *ac; struct ceph_auth_client *ac;
int ret; int ret;
dout("auth_init name '%s'\n", name);
ret = -ENOMEM; ret = -ENOMEM;
ac = kzalloc(sizeof(*ac), GFP_NOFS); ac = kzalloc(sizeof(*ac), GFP_NOFS);
if (!ac) if (!ac)
@ -54,8 +57,12 @@ struct ceph_auth_client *ceph_auth_init(const char *name, const struct ceph_cryp
ac->name = name; ac->name = name;
else else
ac->name = CEPH_AUTH_NAME_DEFAULT; ac->name = CEPH_AUTH_NAME_DEFAULT;
dout("auth_init name %s\n", ac->name);
ac->key = key; ac->key = key;
ac->preferred_mode = con_modes[0];
ac->fallback_mode = con_modes[1];
dout("%s name '%s' preferred_mode %d fallback_mode %d\n", __func__,
ac->name, ac->preferred_mode, ac->fallback_mode);
return ac; return ac;
out: out:
@ -145,31 +152,35 @@ bad:
goto out; goto out;
} }
static int ceph_build_auth_request(struct ceph_auth_client *ac, static int build_request(struct ceph_auth_client *ac, bool add_header,
void *msg_buf, size_t msg_len) void *buf, int buf_len)
{ {
struct ceph_mon_request_header *monhdr = msg_buf; void *end = buf + buf_len;
void *p = monhdr + 1; void *p;
void *end = msg_buf + msg_len;
int ret; int ret;
monhdr->have_version = 0; p = buf;
monhdr->session_mon = cpu_to_le16(-1); if (add_header) {
monhdr->session_mon_tid = 0; /* struct ceph_mon_request_header + protocol */
ceph_encode_64_safe(&p, end, 0, e_range);
ceph_encode_32(&p, ac->protocol); ceph_encode_16_safe(&p, end, -1, e_range);
ceph_encode_64_safe(&p, end, 0, e_range);
ceph_encode_32_safe(&p, end, ac->protocol, e_range);
}
ceph_encode_need(&p, end, sizeof(u32), e_range);
ret = ac->ops->build_request(ac, p + sizeof(u32), end); ret = ac->ops->build_request(ac, p + sizeof(u32), end);
if (ret < 0) { if (ret < 0) {
pr_err("error %d building auth method %s request\n", ret, pr_err("auth protocol '%s' building request failed: %d\n",
ac->ops->name); ceph_auth_proto_name(ac->protocol), ret);
goto out; return ret;
} }
dout(" built request %d bytes\n", ret); dout(" built request %d bytes\n", ret);
ceph_encode_32(&p, ret); ceph_encode_32(&p, ret);
ret = p + ret - msg_buf; return p + ret - buf;
out:
return ret; e_range:
return -ERANGE;
} }
/* /*
@ -229,10 +240,10 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
ac->ops = NULL; ac->ops = NULL;
} }
if (ac->protocol != protocol) { if (ac->protocol != protocol) {
ret = ceph_auth_init_protocol(ac, protocol); ret = init_protocol(ac, protocol);
if (ret) { if (ret) {
pr_err("error %d on auth protocol %d init\n", pr_err("auth protocol '%s' init failed: %d\n",
ret, protocol); ceph_auth_proto_name(protocol), ret);
goto out; goto out;
} }
} }
@ -240,12 +251,13 @@ int ceph_handle_auth_reply(struct ceph_auth_client *ac,
ac->negotiating = false; ac->negotiating = false;
} }
ret = ac->ops->handle_reply(ac, result, payload, payload_end); ret = ac->ops->handle_reply(ac, result, payload, payload_end,
if (ret == -EAGAIN) { NULL, NULL, NULL, NULL);
ret = ceph_build_auth_request(ac, reply_buf, reply_len); if (ret == -EAGAIN)
} else if (ret) { ret = build_request(ac, true, reply_buf, reply_len);
pr_err("auth method '%s' error %d\n", ac->ops->name, ret); else if (ret)
} pr_err("auth protocol '%s' mauth authentication failed: %d\n",
ceph_auth_proto_name(ac->protocol), result);
out: out:
mutex_unlock(&ac->mutex); mutex_unlock(&ac->mutex);
@ -264,7 +276,7 @@ int ceph_build_auth(struct ceph_auth_client *ac,
mutex_lock(&ac->mutex); mutex_lock(&ac->mutex);
if (ac->ops->should_authenticate(ac)) if (ac->ops->should_authenticate(ac))
ret = ceph_build_auth_request(ac, msg_buf, msg_len); ret = build_request(ac, true, msg_buf, msg_len);
mutex_unlock(&ac->mutex); mutex_unlock(&ac->mutex);
return ret; return ret;
} }
@ -281,19 +293,38 @@ int ceph_auth_is_authenticated(struct ceph_auth_client *ac)
} }
EXPORT_SYMBOL(ceph_auth_is_authenticated); EXPORT_SYMBOL(ceph_auth_is_authenticated);
int ceph_auth_create_authorizer(struct ceph_auth_client *ac, int __ceph_auth_get_authorizer(struct ceph_auth_client *ac,
int peer_type, struct ceph_auth_handshake *auth,
struct ceph_auth_handshake *auth) int peer_type, bool force_new,
int *proto, int *pref_mode, int *fallb_mode)
{ {
int ret = 0; int ret;
mutex_lock(&ac->mutex); mutex_lock(&ac->mutex);
if (ac->ops && ac->ops->create_authorizer) if (force_new && auth->authorizer) {
ceph_auth_destroy_authorizer(auth->authorizer);
auth->authorizer = NULL;
}
if (!auth->authorizer)
ret = ac->ops->create_authorizer(ac, peer_type, auth); ret = ac->ops->create_authorizer(ac, peer_type, auth);
else if (ac->ops->update_authorizer)
ret = ac->ops->update_authorizer(ac, peer_type, auth);
else
ret = 0;
if (ret)
goto out;
*proto = ac->protocol;
if (pref_mode && fallb_mode) {
*pref_mode = ac->preferred_mode;
*fallb_mode = ac->fallback_mode;
}
out:
mutex_unlock(&ac->mutex); mutex_unlock(&ac->mutex);
return ret; return ret;
} }
EXPORT_SYMBOL(ceph_auth_create_authorizer); EXPORT_SYMBOL(__ceph_auth_get_authorizer);
void ceph_auth_destroy_authorizer(struct ceph_authorizer *a) void ceph_auth_destroy_authorizer(struct ceph_authorizer *a)
{ {
@ -301,20 +332,6 @@ void ceph_auth_destroy_authorizer(struct ceph_authorizer *a)
} }
EXPORT_SYMBOL(ceph_auth_destroy_authorizer); EXPORT_SYMBOL(ceph_auth_destroy_authorizer);
int ceph_auth_update_authorizer(struct ceph_auth_client *ac,
int peer_type,
struct ceph_auth_handshake *a)
{
int ret = 0;
mutex_lock(&ac->mutex);
if (ac->ops && ac->ops->update_authorizer)
ret = ac->ops->update_authorizer(ac, peer_type, a);
mutex_unlock(&ac->mutex);
return ret;
}
EXPORT_SYMBOL(ceph_auth_update_authorizer);
int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac, int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a, struct ceph_authorizer *a,
void *challenge_buf, void *challenge_buf,
@ -332,13 +349,18 @@ int ceph_auth_add_authorizer_challenge(struct ceph_auth_client *ac,
EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge); EXPORT_SYMBOL(ceph_auth_add_authorizer_challenge);
int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac, int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a) struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{ {
int ret = 0; int ret = 0;
mutex_lock(&ac->mutex); mutex_lock(&ac->mutex);
if (ac->ops && ac->ops->verify_authorizer_reply) if (ac->ops && ac->ops->verify_authorizer_reply)
ret = ac->ops->verify_authorizer_reply(ac, a); ret = ac->ops->verify_authorizer_reply(ac, a,
reply, reply_len, session_key, session_key_len,
con_secret, con_secret_len);
mutex_unlock(&ac->mutex); mutex_unlock(&ac->mutex);
return ret; return ret;
} }
@ -352,3 +374,279 @@ void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac, int peer_type)
mutex_unlock(&ac->mutex); mutex_unlock(&ac->mutex);
} }
EXPORT_SYMBOL(ceph_auth_invalidate_authorizer); EXPORT_SYMBOL(ceph_auth_invalidate_authorizer);
/*
* msgr2 authentication
*/
static bool contains(const int *arr, int cnt, int val)
{
int i;
for (i = 0; i < cnt; i++) {
if (arr[i] == val)
return true;
}
return false;
}
static int encode_con_modes(void **p, void *end, int pref_mode, int fallb_mode)
{
WARN_ON(pref_mode == CEPH_CON_MODE_UNKNOWN);
if (fallb_mode != CEPH_CON_MODE_UNKNOWN) {
ceph_encode_32_safe(p, end, 2, e_range);
ceph_encode_32_safe(p, end, pref_mode, e_range);
ceph_encode_32_safe(p, end, fallb_mode, e_range);
} else {
ceph_encode_32_safe(p, end, 1, e_range);
ceph_encode_32_safe(p, end, pref_mode, e_range);
}
return 0;
e_range:
return -ERANGE;
}
/*
* Similar to ceph_auth_build_hello().
*/
int ceph_auth_get_request(struct ceph_auth_client *ac, void *buf, int buf_len)
{
int proto = ac->key ? CEPH_AUTH_CEPHX : CEPH_AUTH_NONE;
void *end = buf + buf_len;
void *lenp;
void *p;
int ret;
mutex_lock(&ac->mutex);
if (ac->protocol == CEPH_AUTH_UNKNOWN) {
ret = init_protocol(ac, proto);
if (ret) {
pr_err("auth protocol '%s' init failed: %d\n",
ceph_auth_proto_name(proto), ret);
goto out;
}
} else {
WARN_ON(ac->protocol != proto);
ac->ops->reset(ac);
}
p = buf;
ceph_encode_32_safe(&p, end, ac->protocol, e_range);
ret = encode_con_modes(&p, end, ac->preferred_mode, ac->fallback_mode);
if (ret)
goto out;
lenp = p;
p += 4; /* space for len */
ceph_encode_8_safe(&p, end, CEPH_AUTH_MODE_MON, e_range);
ret = ceph_auth_entity_name_encode(ac->name, &p, end);
if (ret)
goto out;
ceph_encode_64_safe(&p, end, ac->global_id, e_range);
ceph_encode_32(&lenp, p - lenp - 4);
ret = p - buf;
out:
mutex_unlock(&ac->mutex);
return ret;
e_range:
ret = -ERANGE;
goto out;
}
int ceph_auth_handle_reply_more(struct ceph_auth_client *ac, void *reply,
int reply_len, void *buf, int buf_len)
{
int ret;
mutex_lock(&ac->mutex);
ret = ac->ops->handle_reply(ac, 0, reply, reply + reply_len,
NULL, NULL, NULL, NULL);
if (ret == -EAGAIN)
ret = build_request(ac, false, buf, buf_len);
else
WARN_ON(ret >= 0);
mutex_unlock(&ac->mutex);
return ret;
}
int ceph_auth_handle_reply_done(struct ceph_auth_client *ac,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
int ret;
mutex_lock(&ac->mutex);
if (global_id && ac->global_id != global_id) {
dout("%s global_id %llu -> %llu\n", __func__, ac->global_id,
global_id);
ac->global_id = global_id;
}
ret = ac->ops->handle_reply(ac, 0, reply, reply + reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
mutex_unlock(&ac->mutex);
return ret;
}
bool ceph_auth_handle_bad_method(struct ceph_auth_client *ac,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
mutex_lock(&ac->mutex);
WARN_ON(used_proto != ac->protocol);
if (result == -EOPNOTSUPP) {
if (!contains(allowed_protos, proto_cnt, ac->protocol)) {
pr_err("auth protocol '%s' not allowed\n",
ceph_auth_proto_name(ac->protocol));
goto not_allowed;
}
if (!contains(allowed_modes, mode_cnt, ac->preferred_mode) &&
(ac->fallback_mode == CEPH_CON_MODE_UNKNOWN ||
!contains(allowed_modes, mode_cnt, ac->fallback_mode))) {
pr_err("preferred mode '%s' not allowed\n",
ceph_con_mode_name(ac->preferred_mode));
if (ac->fallback_mode == CEPH_CON_MODE_UNKNOWN)
pr_err("no fallback mode\n");
else
pr_err("fallback mode '%s' not allowed\n",
ceph_con_mode_name(ac->fallback_mode));
goto not_allowed;
}
}
WARN_ON(result == -EOPNOTSUPP || result >= 0);
pr_err("auth protocol '%s' msgr authentication failed: %d\n",
ceph_auth_proto_name(ac->protocol), result);
mutex_unlock(&ac->mutex);
return true;
not_allowed:
mutex_unlock(&ac->mutex);
return false;
}
int ceph_auth_get_authorizer(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
int peer_type, void *buf, int *buf_len)
{
void *end = buf + *buf_len;
int pref_mode, fallb_mode;
int proto;
void *p;
int ret;
ret = __ceph_auth_get_authorizer(ac, auth, peer_type, true, &proto,
&pref_mode, &fallb_mode);
if (ret)
return ret;
p = buf;
ceph_encode_32_safe(&p, end, proto, e_range);
ret = encode_con_modes(&p, end, pref_mode, fallb_mode);
if (ret)
return ret;
ceph_encode_32_safe(&p, end, auth->authorizer_buf_len, e_range);
*buf_len = p - buf;
return 0;
e_range:
return -ERANGE;
}
EXPORT_SYMBOL(ceph_auth_get_authorizer);
int ceph_auth_handle_svc_reply_more(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
void *buf, int *buf_len)
{
void *end = buf + *buf_len;
void *p;
int ret;
ret = ceph_auth_add_authorizer_challenge(ac, auth->authorizer,
reply, reply_len);
if (ret)
return ret;
p = buf;
ceph_encode_32_safe(&p, end, auth->authorizer_buf_len, e_range);
*buf_len = p - buf;
return 0;
e_range:
return -ERANGE;
}
EXPORT_SYMBOL(ceph_auth_handle_svc_reply_more);
int ceph_auth_handle_svc_reply_done(struct ceph_auth_client *ac,
struct ceph_auth_handshake *auth,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
return ceph_auth_verify_authorizer_reply(ac, auth->authorizer,
reply, reply_len, session_key, session_key_len,
con_secret, con_secret_len);
}
EXPORT_SYMBOL(ceph_auth_handle_svc_reply_done);
bool ceph_auth_handle_bad_authorizer(struct ceph_auth_client *ac,
int peer_type, int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
mutex_lock(&ac->mutex);
WARN_ON(used_proto != ac->protocol);
if (result == -EOPNOTSUPP) {
if (!contains(allowed_protos, proto_cnt, ac->protocol)) {
pr_err("auth protocol '%s' not allowed by %s\n",
ceph_auth_proto_name(ac->protocol),
ceph_entity_type_name(peer_type));
goto not_allowed;
}
if (!contains(allowed_modes, mode_cnt, ac->preferred_mode) &&
(ac->fallback_mode == CEPH_CON_MODE_UNKNOWN ||
!contains(allowed_modes, mode_cnt, ac->fallback_mode))) {
pr_err("preferred mode '%s' not allowed by %s\n",
ceph_con_mode_name(ac->preferred_mode),
ceph_entity_type_name(peer_type));
if (ac->fallback_mode == CEPH_CON_MODE_UNKNOWN)
pr_err("no fallback mode\n");
else
pr_err("fallback mode '%s' not allowed by %s\n",
ceph_con_mode_name(ac->fallback_mode),
ceph_entity_type_name(peer_type));
goto not_allowed;
}
}
WARN_ON(result == -EOPNOTSUPP || result >= 0);
pr_err("auth protocol '%s' authorization to %s failed: %d\n",
ceph_auth_proto_name(ac->protocol),
ceph_entity_type_name(peer_type), result);
if (ac->ops->invalidate_authorizer)
ac->ops->invalidate_authorizer(ac, peer_type);
mutex_unlock(&ac->mutex);
return true;
not_allowed:
mutex_unlock(&ac->mutex);
return false;
}
EXPORT_SYMBOL(ceph_auth_handle_bad_authorizer);

View File

@ -70,7 +70,9 @@ static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
* authenticate state, so nothing happens here. * authenticate state, so nothing happens here.
*/ */
static int handle_reply(struct ceph_auth_client *ac, int result, static int handle_reply(struct ceph_auth_client *ac, int result,
void *buf, void *end) void *buf, void *end, u8 *session_key,
int *session_key_len, u8 *con_secret,
int *con_secret_len)
{ {
struct ceph_auth_none_info *xi = ac->private; struct ceph_auth_none_info *xi = ac->private;
@ -116,7 +118,6 @@ static int ceph_auth_none_create_authorizer(
} }
static const struct ceph_auth_client_ops ceph_auth_none_ops = { static const struct ceph_auth_client_ops ceph_auth_none_ops = {
.name = "none",
.reset = reset, .reset = reset,
.destroy = destroy, .destroy = destroy,
.is_authenticated = is_authenticated, .is_authenticated = is_authenticated,

View File

@ -22,12 +22,15 @@ static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed);
static int ceph_x_is_authenticated(struct ceph_auth_client *ac) static int ceph_x_is_authenticated(struct ceph_auth_client *ac)
{ {
struct ceph_x_info *xi = ac->private; struct ceph_x_info *xi = ac->private;
int need; int missing;
int need; /* missing + need renewal */
ceph_x_validate_tickets(ac, &need); ceph_x_validate_tickets(ac, &need);
dout("ceph_x_is_authenticated want=%d need=%d have=%d\n", missing = ac->want_keys & ~xi->have_keys;
ac->want_keys, need, xi->have_keys); WARN_ON((need & missing) != missing);
return (ac->want_keys & xi->have_keys) == ac->want_keys; dout("%s want 0x%x have 0x%x missing 0x%x -> %d\n", __func__,
ac->want_keys, xi->have_keys, missing, !missing);
return !missing;
} }
static int ceph_x_should_authenticate(struct ceph_auth_client *ac) static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
@ -36,9 +39,9 @@ static int ceph_x_should_authenticate(struct ceph_auth_client *ac)
int need; int need;
ceph_x_validate_tickets(ac, &need); ceph_x_validate_tickets(ac, &need);
dout("ceph_x_should_authenticate want=%d need=%d have=%d\n", dout("%s want 0x%x have 0x%x need 0x%x -> %d\n", __func__,
ac->want_keys, need, xi->have_keys); ac->want_keys, xi->have_keys, need, !!need);
return need != 0; return !!need;
} }
static int ceph_x_encrypt_offset(void) static int ceph_x_encrypt_offset(void)
@ -197,7 +200,7 @@ static int process_one_ticket(struct ceph_auth_client *ac,
dout(" decrypted %d bytes\n", ret); dout(" decrypted %d bytes\n", ret);
dend = dp + ret; dend = dp + ret;
tkt_struct_v = ceph_decode_8(&dp); ceph_decode_8_safe(&dp, dend, tkt_struct_v, bad);
if (tkt_struct_v != 1) if (tkt_struct_v != 1)
goto bad; goto bad;
@ -205,6 +208,7 @@ static int process_one_ticket(struct ceph_auth_client *ac,
if (ret) if (ret)
goto out; goto out;
ceph_decode_need(&dp, dend, sizeof(struct ceph_timespec), bad);
ceph_decode_timespec64(&validity, dp); ceph_decode_timespec64(&validity, dp);
dp += sizeof(struct ceph_timespec); dp += sizeof(struct ceph_timespec);
new_expires = ktime_get_real_seconds() + validity.tv_sec; new_expires = ktime_get_real_seconds() + validity.tv_sec;
@ -265,22 +269,21 @@ out:
static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac,
struct ceph_crypto_key *secret, struct ceph_crypto_key *secret,
void *buf, void *end) void **p, void *end)
{ {
void *p = buf;
u8 reply_struct_v; u8 reply_struct_v;
u32 num; u32 num;
int ret; int ret;
ceph_decode_8_safe(&p, end, reply_struct_v, bad); ceph_decode_8_safe(p, end, reply_struct_v, bad);
if (reply_struct_v != 1) if (reply_struct_v != 1)
return -EINVAL; return -EINVAL;
ceph_decode_32_safe(&p, end, num, bad); ceph_decode_32_safe(p, end, num, bad);
dout("%d tickets\n", num); dout("%d tickets\n", num);
while (num--) { while (num--) {
ret = process_one_ticket(ac, secret, &p, end); ret = process_one_ticket(ac, secret, p, end);
if (ret) if (ret)
return ret; return ret;
} }
@ -379,6 +382,7 @@ static int ceph_x_build_authorizer(struct ceph_auth_client *ac,
} }
} }
au->service = th->service; au->service = th->service;
WARN_ON(!th->secret_id);
au->secret_id = th->secret_id; au->secret_id = th->secret_id;
msg_a = au->buf->vec.iov_base; msg_a = au->buf->vec.iov_base;
@ -442,9 +446,10 @@ static bool need_key(struct ceph_x_ticket_handler *th)
static bool have_key(struct ceph_x_ticket_handler *th) static bool have_key(struct ceph_x_ticket_handler *th)
{ {
if (th->have_key) { if (th->have_key && ktime_get_real_seconds() >= th->expires) {
if (ktime_get_real_seconds() >= th->expires) dout("ticket %d (%s) secret_id %llu expired\n", th->service,
th->have_key = false; ceph_entity_type_name(th->service), th->secret_id);
th->have_key = false;
} }
return th->have_key; return th->have_key;
@ -486,6 +491,7 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
struct ceph_x_info *xi = ac->private; struct ceph_x_info *xi = ac->private;
int need; int need;
struct ceph_x_request_header *head = buf; struct ceph_x_request_header *head = buf;
void *p;
int ret; int ret;
struct ceph_x_ticket_handler *th = struct ceph_x_ticket_handler *th =
get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
@ -494,18 +500,17 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
return PTR_ERR(th); return PTR_ERR(th);
ceph_x_validate_tickets(ac, &need); ceph_x_validate_tickets(ac, &need);
dout("%s want 0x%x have 0x%x need 0x%x\n", __func__, ac->want_keys,
dout("build_request want %x have %x need %x\n", xi->have_keys, need);
ac->want_keys, xi->have_keys, need);
if (need & CEPH_ENTITY_TYPE_AUTH) { if (need & CEPH_ENTITY_TYPE_AUTH) {
struct ceph_x_authenticate *auth = (void *)(head + 1); struct ceph_x_authenticate *auth = (void *)(head + 1);
void *p = auth + 1;
void *enc_buf = xi->auth_authorizer.enc_buf; void *enc_buf = xi->auth_authorizer.enc_buf;
struct ceph_x_challenge_blob *blob = enc_buf + struct ceph_x_challenge_blob *blob = enc_buf +
ceph_x_encrypt_offset(); ceph_x_encrypt_offset();
u64 *u; u64 *u;
p = auth + 1;
if (p > end) if (p > end)
return -ERANGE; return -ERANGE;
@ -521,7 +526,7 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
if (ret < 0) if (ret < 0)
return ret; return ret;
auth->struct_v = 1; auth->struct_v = 2; /* nautilus+ */
auth->key = 0; auth->key = 0;
for (u = (u64 *)enc_buf; u + 1 <= (u64 *)(enc_buf + ret); u++) for (u = (u64 *)enc_buf; u + 1 <= (u64 *)(enc_buf + ret); u++)
auth->key ^= *(__le64 *)u; auth->key ^= *(__le64 *)u;
@ -534,39 +539,117 @@ static int ceph_x_build_request(struct ceph_auth_client *ac,
if (ret < 0) if (ret < 0)
return ret; return ret;
/* nautilus+: request service tickets at the same time */
need = ac->want_keys & ~CEPH_ENTITY_TYPE_AUTH;
WARN_ON(!need);
ceph_encode_32_safe(&p, end, need, e_range);
return p - buf; return p - buf;
} }
if (need) { if (need) {
void *p = head + 1; dout(" get_principal_session_key\n");
struct ceph_x_service_ticket_request *req;
if (p > end)
return -ERANGE;
head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY);
ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer); ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer);
if (ret) if (ret)
return ret; return ret;
ceph_encode_copy(&p, xi->auth_authorizer.buf->vec.iov_base,
xi->auth_authorizer.buf->vec.iov_len);
req = p; p = buf;
req->keys = cpu_to_le32(need); ceph_encode_16_safe(&p, end, CEPHX_GET_PRINCIPAL_SESSION_KEY,
p += sizeof(*req); e_range);
ceph_encode_copy_safe(&p, end,
xi->auth_authorizer.buf->vec.iov_base,
xi->auth_authorizer.buf->vec.iov_len, e_range);
ceph_encode_8_safe(&p, end, 1, e_range);
ceph_encode_32_safe(&p, end, need, e_range);
return p - buf; return p - buf;
} }
return 0; return 0;
e_range:
return -ERANGE;
}
static int handle_auth_session_key(struct ceph_auth_client *ac,
void **p, void *end,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_x_info *xi = ac->private;
struct ceph_x_ticket_handler *th;
void *dp, *dend;
int len;
int ret;
/* AUTH ticket */
ret = ceph_x_proc_ticket_reply(ac, &xi->secret, p, end);
if (ret)
return ret;
if (*p == end) {
/* pre-nautilus (or didn't request service tickets!) */
WARN_ON(session_key || con_secret);
return 0;
}
th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
if (IS_ERR(th))
return PTR_ERR(th);
if (session_key) {
memcpy(session_key, th->session_key.key, th->session_key.len);
*session_key_len = th->session_key.len;
}
/* connection secret */
ceph_decode_32_safe(p, end, len, e_inval);
dout("%s connection secret blob len %d\n", __func__, len);
if (len > 0) {
dp = *p + ceph_x_encrypt_offset();
ret = ceph_x_decrypt(&th->session_key, p, *p + len);
if (ret < 0)
return ret;
dout("%s decrypted %d bytes\n", __func__, ret);
dend = dp + ret;
ceph_decode_32_safe(&dp, dend, len, e_inval);
if (len > CEPH_MAX_CON_SECRET_LEN) {
pr_err("connection secret too big %d\n", len);
return -EINVAL;
}
dout("%s connection secret len %d\n", __func__, len);
if (con_secret) {
memcpy(con_secret, dp, len);
*con_secret_len = len;
}
}
/* service tickets */
ceph_decode_32_safe(p, end, len, e_inval);
dout("%s service tickets blob len %d\n", __func__, len);
if (len > 0) {
ret = ceph_x_proc_ticket_reply(ac, &th->session_key,
p, *p + len);
if (ret)
return ret;
}
return 0;
e_inval:
return -EINVAL;
} }
static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
void *buf, void *end) void *buf, void *end,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{ {
struct ceph_x_info *xi = ac->private; struct ceph_x_info *xi = ac->private;
struct ceph_x_reply_header *head = buf;
struct ceph_x_ticket_handler *th; struct ceph_x_ticket_handler *th;
int len = end - buf; int len = end - buf;
void *p;
int op; int op;
int ret; int ret;
@ -587,22 +670,25 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
return -EAGAIN; return -EAGAIN;
} }
op = le16_to_cpu(head->op); p = buf;
result = le32_to_cpu(head->result); ceph_decode_16_safe(&p, end, op, e_inval);
ceph_decode_32_safe(&p, end, result, e_inval);
dout("handle_reply op %d result %d\n", op, result); dout("handle_reply op %d result %d\n", op, result);
switch (op) { switch (op) {
case CEPHX_GET_AUTH_SESSION_KEY: case CEPHX_GET_AUTH_SESSION_KEY:
/* verify auth key */ /* AUTH ticket + [connection secret] + service tickets */
ret = ceph_x_proc_ticket_reply(ac, &xi->secret, ret = handle_auth_session_key(ac, &p, end, session_key,
buf + sizeof(*head), end); session_key_len, con_secret,
con_secret_len);
break; break;
case CEPHX_GET_PRINCIPAL_SESSION_KEY: case CEPHX_GET_PRINCIPAL_SESSION_KEY:
th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH);
if (IS_ERR(th)) if (IS_ERR(th))
return PTR_ERR(th); return PTR_ERR(th);
ret = ceph_x_proc_ticket_reply(ac, &th->session_key,
buf + sizeof(*head), end); /* service tickets */
ret = ceph_x_proc_ticket_reply(ac, &th->session_key, &p, end);
break; break;
default: default:
@ -613,6 +699,9 @@ static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result,
if (ac->want_keys == xi->have_keys) if (ac->want_keys == xi->have_keys)
return 0; return 0;
return -EAGAIN; return -EAGAIN;
e_inval:
return -EINVAL;
} }
static void ceph_x_destroy_authorizer(struct ceph_authorizer *a) static void ceph_x_destroy_authorizer(struct ceph_authorizer *a)
@ -678,40 +767,44 @@ static int ceph_x_update_authorizer(
return 0; return 0;
} }
static int decrypt_authorize_challenge(struct ceph_x_authorizer *au, /*
void *challenge_buf, * CephXAuthorizeChallenge
int challenge_buf_len, */
u64 *server_challenge) static int decrypt_authorizer_challenge(struct ceph_crypto_key *secret,
void *challenge, int challenge_len,
u64 *server_challenge)
{ {
struct ceph_x_authorize_challenge *ch = void *dp, *dend;
challenge_buf + sizeof(struct ceph_x_encrypt_header);
int ret; int ret;
/* no leading len */ /* no leading len */
ret = __ceph_x_decrypt(&au->session_key, challenge_buf, ret = __ceph_x_decrypt(secret, challenge, challenge_len);
challenge_buf_len);
if (ret < 0) if (ret < 0)
return ret; return ret;
if (ret < sizeof(*ch)) {
pr_err("bad size %d for ceph_x_authorize_challenge\n", ret);
return -EINVAL;
}
*server_challenge = le64_to_cpu(ch->server_challenge); dout("%s decrypted %d bytes\n", __func__, ret);
dp = challenge + sizeof(struct ceph_x_encrypt_header);
dend = dp + ret;
ceph_decode_skip_8(&dp, dend, e_inval); /* struct_v */
ceph_decode_64_safe(&dp, dend, *server_challenge, e_inval);
dout("%s server_challenge %llu\n", __func__, *server_challenge);
return 0; return 0;
e_inval:
return -EINVAL;
} }
static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac, static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac,
struct ceph_authorizer *a, struct ceph_authorizer *a,
void *challenge_buf, void *challenge, int challenge_len)
int challenge_buf_len)
{ {
struct ceph_x_authorizer *au = (void *)a; struct ceph_x_authorizer *au = (void *)a;
u64 server_challenge; u64 server_challenge;
int ret; int ret;
ret = decrypt_authorize_challenge(au, challenge_buf, challenge_buf_len, ret = decrypt_authorizer_challenge(&au->session_key, challenge,
&server_challenge); challenge_len, &server_challenge);
if (ret) { if (ret) {
pr_err("failed to decrypt authorize challenge: %d", ret); pr_err("failed to decrypt authorize challenge: %d", ret);
return ret; return ret;
@ -726,29 +819,76 @@ static int ceph_x_add_authorizer_challenge(struct ceph_auth_client *ac,
return 0; return 0;
} }
static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, /*
struct ceph_authorizer *a) * CephXAuthorizeReply
*/
static int decrypt_authorizer_reply(struct ceph_crypto_key *secret,
void **p, void *end, u64 *nonce_plus_one,
u8 *con_secret, int *con_secret_len)
{ {
struct ceph_x_authorizer *au = (void *)a; void *dp, *dend;
void *p = au->enc_buf; u8 struct_v;
struct ceph_x_authorize_reply *reply = p + ceph_x_encrypt_offset(); int len;
int ret; int ret;
ret = ceph_x_decrypt(&au->session_key, &p, p + CEPHX_AU_ENC_BUF_LEN); dp = *p + ceph_x_encrypt_offset();
ret = ceph_x_decrypt(secret, p, end);
if (ret < 0) if (ret < 0)
return ret; return ret;
if (ret < sizeof(*reply)) {
pr_err("bad size %d for ceph_x_authorize_reply\n", ret); dout("%s decrypted %d bytes\n", __func__, ret);
return -EINVAL; dend = dp + ret;
ceph_decode_8_safe(&dp, dend, struct_v, e_inval);
ceph_decode_64_safe(&dp, dend, *nonce_plus_one, e_inval);
dout("%s nonce_plus_one %llu\n", __func__, *nonce_plus_one);
if (struct_v >= 2) {
ceph_decode_32_safe(&dp, dend, len, e_inval);
if (len > CEPH_MAX_CON_SECRET_LEN) {
pr_err("connection secret too big %d\n", len);
return -EINVAL;
}
dout("%s connection secret len %d\n", __func__, len);
if (con_secret) {
memcpy(con_secret, dp, len);
*con_secret_len = len;
}
} }
if (au->nonce + 1 != le64_to_cpu(reply->nonce_plus_one)) return 0;
ret = -EPERM;
else e_inval:
ret = 0; return -EINVAL;
dout("verify_authorizer_reply nonce %llx got %llx ret %d\n", }
au->nonce, le64_to_cpu(reply->nonce_plus_one), ret);
return ret; static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac,
struct ceph_authorizer *a,
void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_x_authorizer *au = (void *)a;
u64 nonce_plus_one;
int ret;
if (session_key) {
memcpy(session_key, au->session_key.key, au->session_key.len);
*session_key_len = au->session_key.len;
}
ret = decrypt_authorizer_reply(&au->session_key, &reply,
reply + reply_len, &nonce_plus_one,
con_secret, con_secret_len);
if (ret)
return ret;
if (nonce_plus_one != au->nonce + 1) {
pr_err("failed to authenticate server\n");
return -EPERM;
}
return 0;
} }
static void ceph_x_reset(struct ceph_auth_client *ac) static void ceph_x_reset(struct ceph_auth_client *ac)
@ -785,8 +925,15 @@ static void invalidate_ticket(struct ceph_auth_client *ac, int peer_type)
struct ceph_x_ticket_handler *th; struct ceph_x_ticket_handler *th;
th = get_ticket_handler(ac, peer_type); th = get_ticket_handler(ac, peer_type);
if (!IS_ERR(th)) if (IS_ERR(th))
return;
if (th->have_key) {
dout("ticket %d (%s) secret_id %llu invalidated\n",
th->service, ceph_entity_type_name(th->service),
th->secret_id);
th->have_key = false; th->have_key = false;
}
} }
static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
@ -911,7 +1058,6 @@ static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
} }
static const struct ceph_auth_client_ops ceph_x_ops = { static const struct ceph_auth_client_ops ceph_x_ops = {
.name = "x",
.is_authenticated = ceph_x_is_authenticated, .is_authenticated = ceph_x_is_authenticated,
.should_authenticate = ceph_x_should_authenticate, .should_authenticate = ceph_x_should_authenticate,
.build_request = ceph_x_build_request, .build_request = ceph_x_build_request,

View File

@ -38,7 +38,8 @@ struct ceph_x_authenticate {
__u8 struct_v; __u8 struct_v;
__le64 client_challenge; __le64 client_challenge;
__le64 key; __le64 key;
/* ticket blob */ /* old_ticket blob */
/* nautilus+: other_keys */
} __attribute__ ((packed)); } __attribute__ ((packed));
struct ceph_x_service_ticket_request { struct ceph_x_service_ticket_request {

View File

@ -265,6 +265,7 @@ enum {
Opt_ip, Opt_ip,
Opt_crush_location, Opt_crush_location,
Opt_read_from_replica, Opt_read_from_replica,
Opt_ms_mode,
/* string args above */ /* string args above */
Opt_share, Opt_share,
Opt_crc, Opt_crc,
@ -287,6 +288,23 @@ static const struct constant_table ceph_param_read_from_replica[] = {
{} {}
}; };
enum ceph_ms_mode {
Opt_ms_mode_legacy,
Opt_ms_mode_crc,
Opt_ms_mode_secure,
Opt_ms_mode_prefer_crc,
Opt_ms_mode_prefer_secure
};
static const struct constant_table ceph_param_ms_mode[] = {
{"legacy", Opt_ms_mode_legacy},
{"crc", Opt_ms_mode_crc},
{"secure", Opt_ms_mode_secure},
{"prefer-crc", Opt_ms_mode_prefer_crc},
{"prefer-secure", Opt_ms_mode_prefer_secure},
{}
};
static const struct fs_parameter_spec ceph_parameters[] = { static const struct fs_parameter_spec ceph_parameters[] = {
fsparam_flag ("abort_on_full", Opt_abort_on_full), fsparam_flag ("abort_on_full", Opt_abort_on_full),
fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures), fsparam_flag_no ("cephx_require_signatures", Opt_cephx_require_signatures),
@ -305,6 +323,8 @@ static const struct fs_parameter_spec ceph_parameters[] = {
fs_param_deprecated, NULL), fs_param_deprecated, NULL),
fsparam_enum ("read_from_replica", Opt_read_from_replica, fsparam_enum ("read_from_replica", Opt_read_from_replica,
ceph_param_read_from_replica), ceph_param_read_from_replica),
fsparam_enum ("ms_mode", Opt_ms_mode,
ceph_param_ms_mode),
fsparam_string ("secret", Opt_secret), fsparam_string ("secret", Opt_secret),
fsparam_flag_no ("share", Opt_share), fsparam_flag_no ("share", Opt_share),
fsparam_flag_no ("tcp_nodelay", Opt_tcp_nodelay), fsparam_flag_no ("tcp_nodelay", Opt_tcp_nodelay),
@ -333,6 +353,8 @@ struct ceph_options *ceph_alloc_options(void)
opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT;
opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT;
opt->read_from_replica = CEPH_READ_FROM_REPLICA_DEFAULT; opt->read_from_replica = CEPH_READ_FROM_REPLICA_DEFAULT;
opt->con_modes[0] = CEPH_CON_MODE_UNKNOWN;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
return opt; return opt;
} }
EXPORT_SYMBOL(ceph_alloc_options); EXPORT_SYMBOL(ceph_alloc_options);
@ -503,6 +525,32 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt,
BUG(); BUG();
} }
break; break;
case Opt_ms_mode:
switch (result.uint_32) {
case Opt_ms_mode_legacy:
opt->con_modes[0] = CEPH_CON_MODE_UNKNOWN;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
break;
case Opt_ms_mode_crc:
opt->con_modes[0] = CEPH_CON_MODE_CRC;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
break;
case Opt_ms_mode_secure:
opt->con_modes[0] = CEPH_CON_MODE_SECURE;
opt->con_modes[1] = CEPH_CON_MODE_UNKNOWN;
break;
case Opt_ms_mode_prefer_crc:
opt->con_modes[0] = CEPH_CON_MODE_CRC;
opt->con_modes[1] = CEPH_CON_MODE_SECURE;
break;
case Opt_ms_mode_prefer_secure:
opt->con_modes[0] = CEPH_CON_MODE_SECURE;
opt->con_modes[1] = CEPH_CON_MODE_CRC;
break;
default:
BUG();
}
break;
case Opt_osdtimeout: case Opt_osdtimeout:
warn_plog(&log, "Ignoring osdtimeout"); warn_plog(&log, "Ignoring osdtimeout");
@ -616,6 +664,21 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client,
} else if (opt->read_from_replica == CEPH_OSD_FLAG_LOCALIZE_READS) { } else if (opt->read_from_replica == CEPH_OSD_FLAG_LOCALIZE_READS) {
seq_puts(m, "read_from_replica=localize,"); seq_puts(m, "read_from_replica=localize,");
} }
if (opt->con_modes[0] != CEPH_CON_MODE_UNKNOWN) {
if (opt->con_modes[0] == CEPH_CON_MODE_CRC &&
opt->con_modes[1] == CEPH_CON_MODE_UNKNOWN) {
seq_puts(m, "ms_mode=crc,");
} else if (opt->con_modes[0] == CEPH_CON_MODE_SECURE &&
opt->con_modes[1] == CEPH_CON_MODE_UNKNOWN) {
seq_puts(m, "ms_mode=secure,");
} else if (opt->con_modes[0] == CEPH_CON_MODE_CRC &&
opt->con_modes[1] == CEPH_CON_MODE_SECURE) {
seq_puts(m, "ms_mode=prefer-crc,");
} else if (opt->con_modes[0] == CEPH_CON_MODE_SECURE &&
opt->con_modes[1] == CEPH_CON_MODE_CRC) {
seq_puts(m, "ms_mode=prefer-secure,");
}
}
if (opt->flags & CEPH_OPT_FSID) if (opt->flags & CEPH_OPT_FSID)
seq_printf(m, "fsid=%pU,", &opt->fsid); seq_printf(m, "fsid=%pU,", &opt->fsid);

View File

@ -18,6 +18,34 @@ const char *ceph_entity_type_name(int type)
} }
EXPORT_SYMBOL(ceph_entity_type_name); EXPORT_SYMBOL(ceph_entity_type_name);
const char *ceph_auth_proto_name(int proto)
{
switch (proto) {
case CEPH_AUTH_UNKNOWN:
return "unknown";
case CEPH_AUTH_NONE:
return "none";
case CEPH_AUTH_CEPHX:
return "cephx";
default:
return "???";
}
}
const char *ceph_con_mode_name(int mode)
{
switch (mode) {
case CEPH_CON_MODE_UNKNOWN:
return "unknown";
case CEPH_CON_MODE_CRC:
return "crc";
case CEPH_CON_MODE_SECURE:
return "secure";
default:
return "???";
}
}
const char *ceph_osd_op_name(int op) const char *ceph_osd_op_name(int op)
{ {
switch (op) { switch (op) {

View File

@ -5,6 +5,9 @@
#include <linux/ceph/types.h> #include <linux/ceph/types.h>
#include <linux/ceph/buffer.h> #include <linux/ceph/buffer.h>
#define CEPH_KEY_LEN 16
#define CEPH_MAX_CON_SECRET_LEN 64
/* /*
* cryptographic secret * cryptographic secret
*/ */

View File

@ -1,4 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <linux/ceph/ceph_debug.h>
#include <linux/inet.h>
#include <linux/ceph/decode.h> #include <linux/ceph/decode.h>
@ -82,3 +85,101 @@ bad:
} }
EXPORT_SYMBOL(ceph_decode_entity_addr); EXPORT_SYMBOL(ceph_decode_entity_addr);
/*
* Return addr of desired type (MSGR2 or LEGACY) or error.
* Make sure there is only one match.
*
* Assume encoding with MSG_ADDR2.
*/
int ceph_decode_entity_addrvec(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr)
{
__le32 my_type = msgr2 ? CEPH_ENTITY_ADDR_TYPE_MSGR2 :
CEPH_ENTITY_ADDR_TYPE_LEGACY;
struct ceph_entity_addr tmp_addr;
int addr_cnt;
bool found;
u8 marker;
int ret;
int i;
ceph_decode_8_safe(p, end, marker, e_inval);
if (marker != 2) {
pr_err("bad addrvec marker %d\n", marker);
return -EINVAL;
}
ceph_decode_32_safe(p, end, addr_cnt, e_inval);
found = false;
for (i = 0; i < addr_cnt; i++) {
ret = ceph_decode_entity_addr(p, end, &tmp_addr);
if (ret)
return ret;
if (tmp_addr.type == my_type) {
if (found) {
pr_err("another match of type %d in addrvec\n",
le32_to_cpu(my_type));
return -EINVAL;
}
memcpy(addr, &tmp_addr, sizeof(*addr));
found = true;
}
}
if (!found && addr_cnt != 0) {
pr_err("no match of type %d in addrvec\n",
le32_to_cpu(my_type));
return -ENOENT;
}
return 0;
e_inval:
return -EINVAL;
}
EXPORT_SYMBOL(ceph_decode_entity_addrvec);
static int get_sockaddr_encoding_len(sa_family_t family)
{
union {
struct sockaddr sa;
struct sockaddr_in sin;
struct sockaddr_in6 sin6;
} u;
switch (family) {
case AF_INET:
return sizeof(u.sin);
case AF_INET6:
return sizeof(u.sin6);
default:
return sizeof(u);
}
}
int ceph_entity_addr_encoding_len(const struct ceph_entity_addr *addr)
{
sa_family_t family = get_unaligned(&addr->in_addr.ss_family);
int addr_len = get_sockaddr_encoding_len(family);
return 1 + CEPH_ENCODING_START_BLK_LEN + 4 + 4 + 4 + addr_len;
}
void ceph_encode_entity_addr(void **p, const struct ceph_entity_addr *addr)
{
sa_family_t family = get_unaligned(&addr->in_addr.ss_family);
int addr_len = get_sockaddr_encoding_len(family);
ceph_encode_8(p, 1); /* marker */
ceph_start_encoding(p, 1, 1, sizeof(addr->type) +
sizeof(addr->nonce) +
sizeof(u32) + addr_len);
ceph_encode_copy(p, &addr->type, sizeof(addr->type));
ceph_encode_copy(p, &addr->nonce, sizeof(addr->nonce));
ceph_encode_32(p, addr_len);
ceph_encode_16(p, family);
ceph_encode_copy(p, addr->in_addr.__data, addr_len - sizeof(family));
}

File diff suppressed because it is too large Load Diff

1506
net/ceph/messenger_v1.c Normal file

File diff suppressed because it is too large Load Diff

3443
net/ceph/messenger_v2.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -36,57 +36,122 @@ static const struct ceph_connection_operations mon_con_ops;
static int __validate_auth(struct ceph_mon_client *monc); static int __validate_auth(struct ceph_mon_client *monc);
static int decode_mon_info(void **p, void *end, bool msgr2,
struct ceph_entity_addr *addr)
{
void *mon_info_end;
u32 struct_len;
u8 struct_v;
int ret;
ret = ceph_start_decoding(p, end, 1, "mon_info_t", &struct_v,
&struct_len);
if (ret)
return ret;
mon_info_end = *p + struct_len;
ceph_decode_skip_string(p, end, e_inval); /* skip mon name */
ret = ceph_decode_entity_addrvec(p, end, msgr2, addr);
if (ret)
return ret;
*p = mon_info_end;
return 0;
e_inval:
return -EINVAL;
}
/* /*
* Decode a monmap blob (e.g., during mount). * Decode a monmap blob (e.g., during mount).
*
* Assume MonMap v3 (i.e. encoding with MONNAMES and MONENC).
*/ */
static struct ceph_monmap *ceph_monmap_decode(void *p, void *end) static struct ceph_monmap *ceph_monmap_decode(void **p, void *end, bool msgr2)
{ {
struct ceph_monmap *m = NULL; struct ceph_monmap *monmap = NULL;
int i, err = -EINVAL;
struct ceph_fsid fsid; struct ceph_fsid fsid;
u32 epoch, num_mon; u32 struct_len;
u32 len; int blob_len;
int num_mon;
u8 struct_v;
u32 epoch;
int ret;
int i;
ceph_decode_32_safe(&p, end, len, bad); ceph_decode_32_safe(p, end, blob_len, e_inval);
ceph_decode_need(&p, end, len, bad); ceph_decode_need(p, end, blob_len, e_inval);
dout("monmap_decode %p %p len %d (%d)\n", p, end, len, (int)(end-p)); ret = ceph_start_decoding(p, end, 6, "monmap", &struct_v, &struct_len);
p += sizeof(u16); /* skip version */ if (ret)
goto fail;
ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad); dout("%s struct_v %d\n", __func__, struct_v);
ceph_decode_copy(&p, &fsid, sizeof(fsid)); ceph_decode_copy_safe(p, end, &fsid, sizeof(fsid), e_inval);
epoch = ceph_decode_32(&p); ceph_decode_32_safe(p, end, epoch, e_inval);
if (struct_v >= 6) {
u32 feat_struct_len;
u8 feat_struct_v;
num_mon = ceph_decode_32(&p); *p += sizeof(struct ceph_timespec); /* skip last_changed */
*p += sizeof(struct ceph_timespec); /* skip created */
if (num_mon > CEPH_MAX_MON) ret = ceph_start_decoding(p, end, 1, "mon_feature_t",
goto bad; &feat_struct_v, &feat_struct_len);
m = kmalloc(struct_size(m, mon_inst, num_mon), GFP_NOFS); if (ret)
if (m == NULL) goto fail;
return ERR_PTR(-ENOMEM);
m->fsid = fsid;
m->epoch = epoch;
m->num_mon = num_mon;
for (i = 0; i < num_mon; ++i) {
struct ceph_entity_inst *inst = &m->mon_inst[i];
/* copy name portion */ *p += feat_struct_len; /* skip persistent_features */
ceph_decode_copy_safe(&p, end, &inst->name,
sizeof(inst->name), bad); ret = ceph_start_decoding(p, end, 1, "mon_feature_t",
err = ceph_decode_entity_addr(&p, end, &inst->addr); &feat_struct_v, &feat_struct_len);
if (err) if (ret)
goto bad; goto fail;
*p += feat_struct_len; /* skip optional_features */
} }
dout("monmap_decode epoch %d, num_mon %d\n", m->epoch, ceph_decode_32_safe(p, end, num_mon, e_inval);
m->num_mon);
for (i = 0; i < m->num_mon; i++) dout("%s fsid %pU epoch %u num_mon %d\n", __func__, &fsid, epoch,
dout("monmap_decode mon%d is %s\n", i, num_mon);
ceph_pr_addr(&m->mon_inst[i].addr)); if (num_mon > CEPH_MAX_MON)
return m; goto e_inval;
bad:
dout("monmap_decode failed with %d\n", err); monmap = kmalloc(struct_size(monmap, mon_inst, num_mon), GFP_NOIO);
kfree(m); if (!monmap) {
return ERR_PTR(err); ret = -ENOMEM;
goto fail;
}
monmap->fsid = fsid;
monmap->epoch = epoch;
monmap->num_mon = num_mon;
/* legacy_mon_addr map or mon_info map */
for (i = 0; i < num_mon; i++) {
struct ceph_entity_inst *inst = &monmap->mon_inst[i];
ceph_decode_skip_string(p, end, e_inval); /* skip mon name */
inst->name.type = CEPH_ENTITY_TYPE_MON;
inst->name.num = cpu_to_le64(i);
if (struct_v >= 6)
ret = decode_mon_info(p, end, msgr2, &inst->addr);
else
ret = ceph_decode_entity_addr(p, end, &inst->addr);
if (ret)
goto fail;
dout("%s mon%d addr %s\n", __func__, i,
ceph_pr_addr(&inst->addr));
}
return monmap;
e_inval:
ret = -EINVAL;
fail:
kfree(monmap);
return ERR_PTR(ret);
} }
/* /*
@ -96,9 +161,11 @@ int ceph_monmap_contains(struct ceph_monmap *m, struct ceph_entity_addr *addr)
{ {
int i; int i;
for (i = 0; i < m->num_mon; i++) for (i = 0; i < m->num_mon; i++) {
if (memcmp(addr, &m->mon_inst[i].addr, sizeof(*addr)) == 0) if (ceph_addr_equal_no_type(addr, &m->mon_inst[i].addr))
return 1; return 1;
}
return 0; return 0;
} }
@ -190,10 +257,16 @@ static void __open_session(struct ceph_mon_client *monc)
&monc->monmap->mon_inst[monc->cur_mon].addr); &monc->monmap->mon_inst[monc->cur_mon].addr);
/* /*
* send an initial keepalive to ensure our timestamp is valid * Queue a keepalive to ensure that in case of an early fault
* by the time we are in an OPENED state * the messenger doesn't put us into STANDBY state and instead
* retries. This also ensures that our timestamp is valid by
* the time we finish hunting and delayed_work() checks it.
*/ */
ceph_con_keepalive(&monc->con); ceph_con_keepalive(&monc->con);
if (ceph_msgr2(monc->client)) {
monc->pending_auth = 1;
return;
}
/* initiate authentication handshake */ /* initiate authentication handshake */
ret = ceph_auth_build_hello(monc->auth, ret = ceph_auth_build_hello(monc->auth,
@ -476,7 +549,7 @@ static void ceph_monc_handle_map(struct ceph_mon_client *monc,
p = msg->front.iov_base; p = msg->front.iov_base;
end = p + msg->front.iov_len; end = p + msg->front.iov_len;
monmap = ceph_monmap_decode(p, end); monmap = ceph_monmap_decode(&p, end, ceph_msgr2(client));
if (IS_ERR(monmap)) { if (IS_ERR(monmap)) {
pr_err("problem decoding monmap, %d\n", pr_err("problem decoding monmap, %d\n",
(int)PTR_ERR(monmap)); (int)PTR_ERR(monmap));
@ -1052,8 +1125,9 @@ static void delayed_work(struct work_struct *work)
*/ */
static int build_initial_monmap(struct ceph_mon_client *monc) static int build_initial_monmap(struct ceph_mon_client *monc)
{ {
__le32 my_type = ceph_msgr2(monc->client) ?
CEPH_ENTITY_ADDR_TYPE_MSGR2 : CEPH_ENTITY_ADDR_TYPE_LEGACY;
struct ceph_options *opt = monc->client->options; struct ceph_options *opt = monc->client->options;
struct ceph_entity_addr *mon_addr = opt->mon_addr;
int num_mon = opt->num_mon; int num_mon = opt->num_mon;
int i; int i;
@ -1062,12 +1136,16 @@ static int build_initial_monmap(struct ceph_mon_client *monc)
GFP_KERNEL); GFP_KERNEL);
if (!monc->monmap) if (!monc->monmap)
return -ENOMEM; return -ENOMEM;
for (i = 0; i < num_mon; i++) { for (i = 0; i < num_mon; i++) {
monc->monmap->mon_inst[i].addr = mon_addr[i]; struct ceph_entity_inst *inst = &monc->monmap->mon_inst[i];
monc->monmap->mon_inst[i].addr.nonce = 0;
monc->monmap->mon_inst[i].name.type = memcpy(&inst->addr.in_addr, &opt->mon_addr[i].in_addr,
CEPH_ENTITY_TYPE_MON; sizeof(inst->addr.in_addr));
monc->monmap->mon_inst[i].name.num = cpu_to_le64(i); inst->addr.type = my_type;
inst->addr.nonce = 0;
inst->name.type = CEPH_ENTITY_TYPE_MON;
inst->name.num = cpu_to_le64(i);
} }
monc->monmap->num_mon = num_mon; monc->monmap->num_mon = num_mon;
return 0; return 0;
@ -1089,8 +1167,8 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
/* connection */ /* connection */
/* authentication */ /* authentication */
monc->auth = ceph_auth_init(cl->options->name, monc->auth = ceph_auth_init(cl->options->name, cl->options->key,
cl->options->key); cl->options->con_modes);
if (IS_ERR(monc->auth)) { if (IS_ERR(monc->auth)) {
err = PTR_ERR(monc->auth); err = PTR_ERR(monc->auth);
goto out_monmap; goto out_monmap;
@ -1194,30 +1272,22 @@ static void finish_hunting(struct ceph_mon_client *monc)
} }
} }
static void handle_auth_reply(struct ceph_mon_client *monc, static void finish_auth(struct ceph_mon_client *monc, int auth_err,
struct ceph_msg *msg) bool was_authed)
{ {
int ret; dout("%s auth_err %d was_authed %d\n", __func__, auth_err, was_authed);
int was_auth = 0; WARN_ON(auth_err > 0);
mutex_lock(&monc->mutex);
was_auth = ceph_auth_is_authenticated(monc->auth);
monc->pending_auth = 0; monc->pending_auth = 0;
ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, if (auth_err) {
msg->front.iov_len, monc->client->auth_err = auth_err;
monc->m_auth->front.iov_base, wake_up_all(&monc->client->auth_wq);
monc->m_auth->front_alloc_len); return;
if (ret > 0) {
__send_prepared_auth_request(monc, ret);
goto out;
} }
finish_hunting(monc); if (!was_authed && ceph_auth_is_authenticated(monc->auth)) {
dout("%s authenticated, starting session global_id %llu\n",
if (ret < 0) { __func__, monc->auth->global_id);
monc->client->auth_err = ret;
} else if (!was_auth && ceph_auth_is_authenticated(monc->auth)) {
dout("authenticated, starting session\n");
monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT; monc->client->msgr.inst.name.type = CEPH_ENTITY_TYPE_CLIENT;
monc->client->msgr.inst.name.num = monc->client->msgr.inst.name.num =
@ -1229,11 +1299,27 @@ static void handle_auth_reply(struct ceph_mon_client *monc,
pr_info("mon%d %s session established\n", monc->cur_mon, pr_info("mon%d %s session established\n", monc->cur_mon,
ceph_pr_addr(&monc->con.peer_addr)); ceph_pr_addr(&monc->con.peer_addr));
} }
}
out: static void handle_auth_reply(struct ceph_mon_client *monc,
struct ceph_msg *msg)
{
bool was_authed;
int ret;
mutex_lock(&monc->mutex);
was_authed = ceph_auth_is_authenticated(monc->auth);
ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base,
msg->front.iov_len,
monc->m_auth->front.iov_base,
monc->m_auth->front_alloc_len);
if (ret > 0) {
__send_prepared_auth_request(monc, ret);
} else {
finish_auth(monc, ret, was_authed);
finish_hunting(monc);
}
mutex_unlock(&monc->mutex); mutex_unlock(&monc->mutex);
if (monc->client->auth_err < 0)
wake_up_all(&monc->client->auth_wq);
} }
static int __validate_auth(struct ceph_mon_client *monc) static int __validate_auth(struct ceph_mon_client *monc)
@ -1262,6 +1348,88 @@ int ceph_monc_validate_auth(struct ceph_mon_client *monc)
} }
EXPORT_SYMBOL(ceph_monc_validate_auth); EXPORT_SYMBOL(ceph_monc_validate_auth);
static int mon_get_auth_request(struct ceph_connection *con,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_mon_client *monc = con->private;
int ret;
mutex_lock(&monc->mutex);
ret = ceph_auth_get_request(monc->auth, buf, *buf_len);
mutex_unlock(&monc->mutex);
if (ret < 0)
return ret;
*buf_len = ret;
*authorizer = NULL;
*authorizer_len = 0;
return 0;
}
static int mon_handle_auth_reply_more(struct ceph_connection *con,
void *reply, int reply_len,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_mon_client *monc = con->private;
int ret;
mutex_lock(&monc->mutex);
ret = ceph_auth_handle_reply_more(monc->auth, reply, reply_len,
buf, *buf_len);
mutex_unlock(&monc->mutex);
if (ret < 0)
return ret;
*buf_len = ret;
*authorizer = NULL;
*authorizer_len = 0;
return 0;
}
static int mon_handle_auth_done(struct ceph_connection *con,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_mon_client *monc = con->private;
bool was_authed;
int ret;
mutex_lock(&monc->mutex);
WARN_ON(!monc->hunting);
was_authed = ceph_auth_is_authenticated(monc->auth);
ret = ceph_auth_handle_reply_done(monc->auth, global_id,
reply, reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
finish_auth(monc, ret, was_authed);
if (!ret)
finish_hunting(monc);
mutex_unlock(&monc->mutex);
return 0;
}
static int mon_handle_auth_bad_method(struct ceph_connection *con,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
struct ceph_mon_client *monc = con->private;
bool was_authed;
mutex_lock(&monc->mutex);
WARN_ON(!monc->hunting);
was_authed = ceph_auth_is_authenticated(monc->auth);
ceph_auth_handle_bad_method(monc->auth, used_proto, result,
allowed_protos, proto_cnt,
allowed_modes, mode_cnt);
finish_auth(monc, -EACCES, was_authed);
mutex_unlock(&monc->mutex);
return 0;
}
/* /*
* handle incoming message * handle incoming message
*/ */
@ -1412,4 +1580,8 @@ static const struct ceph_connection_operations mon_con_ops = {
.dispatch = dispatch, .dispatch = dispatch,
.fault = mon_fault, .fault = mon_fault,
.alloc_msg = mon_alloc_msg, .alloc_msg = mon_alloc_msg,
.get_auth_request = mon_get_auth_request,
.handle_auth_reply_more = mon_handle_auth_reply_more,
.handle_auth_done = mon_handle_auth_done,
.handle_auth_bad_method = mon_handle_auth_bad_method,
}; };

View File

@ -3918,9 +3918,11 @@ static int handle_one_map(struct ceph_osd_client *osdc,
set_pool_was_full(osdc); set_pool_was_full(osdc);
if (incremental) if (incremental)
newmap = osdmap_apply_incremental(&p, end, osdc->osdmap); newmap = osdmap_apply_incremental(&p, end,
ceph_msgr2(osdc->client),
osdc->osdmap);
else else
newmap = ceph_osdmap_decode(&p, end); newmap = ceph_osdmap_decode(&p, end, ceph_msgr2(osdc->client));
if (IS_ERR(newmap)) if (IS_ERR(newmap))
return PTR_ERR(newmap); return PTR_ERR(newmap);
@ -5575,6 +5577,7 @@ static void put_osd_con(struct ceph_connection *con)
/* /*
* authentication * authentication
*/ */
/* /*
* Note: returned pointer is the address of a structure that's * Note: returned pointer is the address of a structure that's
* managed separately. Caller must *not* attempt to free it. * managed separately. Caller must *not* attempt to free it.
@ -5586,23 +5589,12 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
struct ceph_osd_client *osdc = o->o_osdc; struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth; struct ceph_auth_client *ac = osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth; struct ceph_auth_handshake *auth = &o->o_auth;
int ret;
if (force_new && auth->authorizer) { ret = __ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_OSD,
ceph_auth_destroy_authorizer(auth->authorizer); force_new, proto, NULL, NULL);
auth->authorizer = NULL; if (ret)
} return ERR_PTR(ret);
if (!auth->authorizer) {
int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
auth);
if (ret)
return ERR_PTR(ret);
} else {
int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_OSD,
auth);
if (ret)
return ERR_PTR(ret);
}
*proto = ac->protocol;
return auth; return auth;
} }
@ -5623,8 +5615,11 @@ static int verify_authorizer_reply(struct ceph_connection *con)
struct ceph_osd *o = con->private; struct ceph_osd *o = con->private;
struct ceph_osd_client *osdc = o->o_osdc; struct ceph_osd_client *osdc = o->o_osdc;
struct ceph_auth_client *ac = osdc->client->monc.auth; struct ceph_auth_client *ac = osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
return ceph_auth_verify_authorizer_reply(ac, o->o_auth.authorizer); return ceph_auth_verify_authorizer_reply(ac, auth->authorizer,
auth->authorizer_reply_buf, auth->authorizer_reply_buf_len,
NULL, NULL, NULL, NULL);
} }
static int invalidate_authorizer(struct ceph_connection *con) static int invalidate_authorizer(struct ceph_connection *con)
@ -5637,6 +5632,80 @@ static int invalidate_authorizer(struct ceph_connection *con)
return ceph_monc_validate_auth(&osdc->client->monc); return ceph_monc_validate_auth(&osdc->client->monc);
} }
static int osd_get_auth_request(struct ceph_connection *con,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_osd *o = con->private;
struct ceph_auth_client *ac = o->o_osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
int ret;
ret = ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_OSD,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int osd_handle_auth_reply_more(struct ceph_connection *con,
void *reply, int reply_len,
void *buf, int *buf_len,
void **authorizer, int *authorizer_len)
{
struct ceph_osd *o = con->private;
struct ceph_auth_client *ac = o->o_osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
int ret;
ret = ceph_auth_handle_svc_reply_more(ac, auth, reply, reply_len,
buf, buf_len);
if (ret)
return ret;
*authorizer = auth->authorizer_buf;
*authorizer_len = auth->authorizer_buf_len;
return 0;
}
static int osd_handle_auth_done(struct ceph_connection *con,
u64 global_id, void *reply, int reply_len,
u8 *session_key, int *session_key_len,
u8 *con_secret, int *con_secret_len)
{
struct ceph_osd *o = con->private;
struct ceph_auth_client *ac = o->o_osdc->client->monc.auth;
struct ceph_auth_handshake *auth = &o->o_auth;
return ceph_auth_handle_svc_reply_done(ac, auth, reply, reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
}
static int osd_handle_auth_bad_method(struct ceph_connection *con,
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt)
{
struct ceph_osd *o = con->private;
struct ceph_mon_client *monc = &o->o_osdc->client->monc;
int ret;
if (ceph_auth_handle_bad_authorizer(monc->auth, CEPH_ENTITY_TYPE_OSD,
used_proto, result,
allowed_protos, proto_cnt,
allowed_modes, mode_cnt)) {
ret = ceph_monc_validate_auth(monc);
if (ret)
return ret;
}
return -EACCES;
}
static void osd_reencode_message(struct ceph_msg *msg) static void osd_reencode_message(struct ceph_msg *msg)
{ {
int type = le16_to_cpu(msg->hdr.type); int type = le16_to_cpu(msg->hdr.type);
@ -5674,4 +5743,8 @@ static const struct ceph_connection_operations osd_con_ops = {
.sign_message = osd_sign_message, .sign_message = osd_sign_message,
.check_message_signature = osd_check_message_signature, .check_message_signature = osd_check_message_signature,
.fault = osd_fault, .fault = osd_fault,
.get_auth_request = osd_get_auth_request,
.handle_auth_reply_more = osd_handle_auth_reply_more,
.handle_auth_done = osd_handle_auth_done,
.handle_auth_bad_method = osd_handle_auth_bad_method,
}; };

View File

@ -1647,7 +1647,8 @@ static int decode_old_pg_upmap_items(void **p, void *end,
/* /*
* decode a full map. * decode a full map.
*/ */
static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map) static int osdmap_decode(void **p, void *end, bool msgr2,
struct ceph_osdmap *map)
{ {
u8 struct_v; u8 struct_v;
u32 epoch = 0; u32 epoch = 0;
@ -1718,9 +1719,16 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
goto e_inval; goto e_inval;
for (i = 0; i < map->max_osd; i++) { for (i = 0; i < map->max_osd; i++) {
err = ceph_decode_entity_addr(p, end, &map->osd_addr[i]); struct ceph_entity_addr *addr = &map->osd_addr[i];
if (struct_v >= 8)
err = ceph_decode_entity_addrvec(p, end, msgr2, addr);
else
err = ceph_decode_entity_addr(p, end, addr);
if (err) if (err)
goto bad; goto bad;
dout("%s osd%d addr %s\n", __func__, i, ceph_pr_addr(addr));
} }
/* pg_temp */ /* pg_temp */
@ -1790,7 +1798,7 @@ bad:
/* /*
* Allocate and decode a full map. * Allocate and decode a full map.
*/ */
struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end) struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end, bool msgr2)
{ {
struct ceph_osdmap *map; struct ceph_osdmap *map;
int ret; int ret;
@ -1799,7 +1807,7 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
if (!map) if (!map)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
ret = osdmap_decode(p, end, map); ret = osdmap_decode(p, end, msgr2, map);
if (ret) { if (ret) {
ceph_osdmap_destroy(map); ceph_osdmap_destroy(map);
return ERR_PTR(ret); return ERR_PTR(ret);
@ -1817,12 +1825,13 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
* new_state: { osd=6, xorstate=EXISTS } # clear osd_state * new_state: { osd=6, xorstate=EXISTS } # clear osd_state
*/ */
static int decode_new_up_state_weight(void **p, void *end, u8 struct_v, static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
struct ceph_osdmap *map) bool msgr2, struct ceph_osdmap *map)
{ {
void *new_up_client; void *new_up_client;
void *new_state; void *new_state;
void *new_weight_end; void *new_weight_end;
u32 len; u32 len;
int ret;
int i; int i;
new_up_client = *p; new_up_client = *p;
@ -1831,8 +1840,12 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
struct ceph_entity_addr addr; struct ceph_entity_addr addr;
ceph_decode_skip_32(p, end, e_inval); ceph_decode_skip_32(p, end, e_inval);
if (ceph_decode_entity_addr(p, end, &addr)) if (struct_v >= 7)
goto e_inval; ret = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
ret = ceph_decode_entity_addr(p, end, &addr);
if (ret)
return ret;
} }
new_state = *p; new_state = *p;
@ -1874,7 +1887,6 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
while (len--) { while (len--) {
s32 osd; s32 osd;
u32 xorstate; u32 xorstate;
int ret;
osd = ceph_decode_32(p); osd = ceph_decode_32(p);
if (struct_v >= 5) if (struct_v >= 5)
@ -1910,8 +1922,15 @@ static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
osd = ceph_decode_32(p); osd = ceph_decode_32(p);
BUG_ON(osd >= map->max_osd); BUG_ON(osd >= map->max_osd);
if (ceph_decode_entity_addr(p, end, &addr)) if (struct_v >= 7)
goto e_inval; ret = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
else
ret = ceph_decode_entity_addr(p, end, &addr);
if (ret)
return ret;
dout("%s osd%d addr %s\n", __func__, osd, ceph_pr_addr(&addr));
pr_info("osd%d up\n", osd); pr_info("osd%d up\n", osd);
map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP; map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
map->osd_addr[osd] = addr; map->osd_addr[osd] = addr;
@ -1927,7 +1946,7 @@ e_inval:
/* /*
* decode and apply an incremental map update. * decode and apply an incremental map update.
*/ */
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, bool msgr2,
struct ceph_osdmap *map) struct ceph_osdmap *map)
{ {
struct ceph_fsid fsid; struct ceph_fsid fsid;
@ -1962,7 +1981,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
if (len > 0) { if (len > 0) {
dout("apply_incremental full map len %d, %p to %p\n", dout("apply_incremental full map len %d, %p to %p\n",
len, *p, end); len, *p, end);
return ceph_osdmap_decode(p, min(*p+len, end)); return ceph_osdmap_decode(p, min(*p+len, end), msgr2);
} }
/* new crush? */ /* new crush? */
@ -2014,7 +2033,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
} }
/* new_up_client, new_state, new_weight */ /* new_up_client, new_state, new_weight */
err = decode_new_up_state_weight(p, end, struct_v, map); err = decode_new_up_state_weight(p, end, struct_v, msgr2, map);
if (err) if (err)
goto bad; goto bad;