mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-09 06:43:09 +00:00
libceph: introduce ceph_osd_request_target, calc_target()
Introduce ceph_osd_request_target, containing all mapping-related fields of ceph_osd_request and calc_target() for calculating mappings and populating it. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
This commit is contained in:
parent
04812acf57
commit
63244fa123
@ -1774,7 +1774,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool)
|
||||
wr_req->r_flags = CEPH_OSD_FLAG_WRITE |
|
||||
CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK;
|
||||
osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL);
|
||||
wr_req->r_base_oloc.pool = pool;
|
||||
ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc);
|
||||
ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid);
|
||||
|
||||
err = ceph_osdc_alloc_messages(wr_req, GFP_NOFS);
|
||||
|
@ -714,7 +714,7 @@ static void ceph_aio_retry_work(struct work_struct *work)
|
||||
req->r_flags = CEPH_OSD_FLAG_ORDERSNAP |
|
||||
CEPH_OSD_FLAG_ONDISK |
|
||||
CEPH_OSD_FLAG_WRITE;
|
||||
req->r_base_oloc = orig_req->r_base_oloc;
|
||||
ceph_oloc_copy(&req->r_base_oloc, &orig_req->r_base_oloc);
|
||||
ceph_oid_copy(&req->r_base_oid, &orig_req->r_base_oid);
|
||||
|
||||
ret = ceph_osdc_alloc_messages(req, GFP_NOFS);
|
||||
|
@ -24,6 +24,8 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *,
|
||||
struct ceph_msg *);
|
||||
typedef void (*ceph_osdc_unsafe_callback_t)(struct ceph_osd_request *, bool);
|
||||
|
||||
#define CEPH_HOMELESS_OSD -1
|
||||
|
||||
/* a given osd we're communicating with */
|
||||
struct ceph_osd {
|
||||
atomic_t o_ref;
|
||||
@ -118,6 +120,27 @@ struct ceph_osd_req_op {
|
||||
};
|
||||
};
|
||||
|
||||
struct ceph_osd_request_target {
|
||||
struct ceph_object_id base_oid;
|
||||
struct ceph_object_locator base_oloc;
|
||||
struct ceph_object_id target_oid;
|
||||
struct ceph_object_locator target_oloc;
|
||||
|
||||
struct ceph_pg pgid;
|
||||
u32 pg_num;
|
||||
u32 pg_num_mask;
|
||||
struct ceph_osds acting;
|
||||
struct ceph_osds up;
|
||||
int size;
|
||||
int min_size;
|
||||
bool sort_bitwise;
|
||||
|
||||
unsigned int flags; /* CEPH_OSD_FLAG_* */
|
||||
bool paused;
|
||||
|
||||
int osd;
|
||||
};
|
||||
|
||||
/* an in-flight request */
|
||||
struct ceph_osd_request {
|
||||
u64 r_tid; /* unique for this client */
|
||||
|
@ -28,6 +28,7 @@ int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs);
|
||||
|
||||
#define CEPH_POOL_FLAG_HASHPSPOOL (1ULL << 0) /* hash pg seed and pool id
|
||||
together */
|
||||
#define CEPH_POOL_FLAG_FULL (1ULL << 1) /* pool is full */
|
||||
|
||||
struct ceph_pg_pool_info {
|
||||
struct rb_node node;
|
||||
@ -62,6 +63,22 @@ struct ceph_object_locator {
|
||||
s64 pool;
|
||||
};
|
||||
|
||||
static inline void ceph_oloc_init(struct ceph_object_locator *oloc)
|
||||
{
|
||||
oloc->pool = -1;
|
||||
}
|
||||
|
||||
static inline bool ceph_oloc_empty(const struct ceph_object_locator *oloc)
|
||||
{
|
||||
return oloc->pool == -1;
|
||||
}
|
||||
|
||||
static inline void ceph_oloc_copy(struct ceph_object_locator *dest,
|
||||
const struct ceph_object_locator *src)
|
||||
{
|
||||
dest->pool = src->pool;
|
||||
}
|
||||
|
||||
/*
|
||||
* Maximum supported by kernel client object name length
|
||||
*
|
||||
@ -227,6 +244,23 @@ static inline void ceph_osds_init(struct ceph_osds *set)
|
||||
|
||||
void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src);
|
||||
|
||||
bool ceph_is_new_interval(const struct ceph_osds *old_acting,
|
||||
const struct ceph_osds *new_acting,
|
||||
const struct ceph_osds *old_up,
|
||||
const struct ceph_osds *new_up,
|
||||
int old_size,
|
||||
int new_size,
|
||||
int old_min_size,
|
||||
int new_min_size,
|
||||
u32 old_pg_num,
|
||||
u32 new_pg_num,
|
||||
bool old_sort_bitwise,
|
||||
bool new_sort_bitwise,
|
||||
const struct ceph_pg *pgid);
|
||||
bool ceph_osds_changed(const struct ceph_osds *old_acting,
|
||||
const struct ceph_osds *new_acting,
|
||||
bool any_change);
|
||||
|
||||
/* calculate mapping of a file extent to an object */
|
||||
extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout,
|
||||
u64 off, u64 len,
|
||||
|
@ -153,6 +153,11 @@ extern const char *ceph_osd_state_name(int s);
|
||||
#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */
|
||||
#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */
|
||||
#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */
|
||||
#define CEPH_OSDMAP_NOSCRUB (1<<11) /* block periodic scrub */
|
||||
#define CEPH_OSDMAP_NODEEP_SCRUB (1<<12) /* block periodic deep-scrub */
|
||||
#define CEPH_OSDMAP_NOTIERAGENT (1<<13) /* disable tiering agent */
|
||||
#define CEPH_OSDMAP_NOREBALANCE (1<<14) /* block osd backfill unless pg is degraded */
|
||||
#define CEPH_OSDMAP_SORTBITWISE (1<<15) /* use bitwise hobject_t sort */
|
||||
|
||||
/*
|
||||
* The error code to return when an OSD can't handle a write
|
||||
|
@ -298,6 +298,30 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Assumes @t is zero-initialized.
|
||||
*/
|
||||
static void target_init(struct ceph_osd_request_target *t)
|
||||
{
|
||||
ceph_oid_init(&t->base_oid);
|
||||
ceph_oloc_init(&t->base_oloc);
|
||||
ceph_oid_init(&t->target_oid);
|
||||
ceph_oloc_init(&t->target_oloc);
|
||||
|
||||
ceph_osds_init(&t->acting);
|
||||
ceph_osds_init(&t->up);
|
||||
t->size = -1;
|
||||
t->min_size = -1;
|
||||
|
||||
t->osd = CEPH_HOMELESS_OSD;
|
||||
}
|
||||
|
||||
static void target_destroy(struct ceph_osd_request_target *t)
|
||||
{
|
||||
ceph_oid_destroy(&t->base_oid);
|
||||
ceph_oid_destroy(&t->target_oid);
|
||||
}
|
||||
|
||||
/*
|
||||
* requests
|
||||
*/
|
||||
@ -1273,6 +1297,11 @@ void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_osdc_set_request_linger);
|
||||
|
||||
static bool __pool_full(struct ceph_pg_pool_info *pi)
|
||||
{
|
||||
return pi->flags & CEPH_POOL_FLAG_FULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns whether a request should be blocked from being sent
|
||||
* based on the current osdmap and osd_client settings.
|
||||
@ -1289,6 +1318,20 @@ static bool __req_should_be_paused(struct ceph_osd_client *osdc,
|
||||
(req->r_flags & CEPH_OSD_FLAG_WRITE && pausewr);
|
||||
}
|
||||
|
||||
static bool target_should_be_paused(struct ceph_osd_client *osdc,
|
||||
const struct ceph_osd_request_target *t,
|
||||
struct ceph_pg_pool_info *pi)
|
||||
{
|
||||
bool pauserd = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD);
|
||||
bool pausewr = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR) ||
|
||||
ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) ||
|
||||
__pool_full(pi);
|
||||
|
||||
WARN_ON(pi->id != t->base_oloc.pool);
|
||||
return (t->flags & CEPH_OSD_FLAG_READ && pauserd) ||
|
||||
(t->flags & CEPH_OSD_FLAG_WRITE && pausewr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate mapping of a request to a PG. Takes tiering into account.
|
||||
*/
|
||||
@ -1328,6 +1371,116 @@ static int __calc_request_pg(struct ceph_osdmap *osdmap,
|
||||
&req->r_target_oloc, pg_out);
|
||||
}
|
||||
|
||||
enum calc_target_result {
|
||||
CALC_TARGET_NO_ACTION = 0,
|
||||
CALC_TARGET_NEED_RESEND,
|
||||
CALC_TARGET_POOL_DNE,
|
||||
};
|
||||
|
||||
static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
|
||||
struct ceph_osd_request_target *t,
|
||||
u32 *last_force_resend,
|
||||
bool any_change)
|
||||
{
|
||||
struct ceph_pg_pool_info *pi;
|
||||
struct ceph_pg pgid, last_pgid;
|
||||
struct ceph_osds up, acting;
|
||||
bool force_resend = false;
|
||||
bool need_check_tiering = false;
|
||||
bool need_resend = false;
|
||||
bool sort_bitwise = ceph_osdmap_flag(osdc->osdmap,
|
||||
CEPH_OSDMAP_SORTBITWISE);
|
||||
enum calc_target_result ct_res;
|
||||
int ret;
|
||||
|
||||
pi = ceph_pg_pool_by_id(osdc->osdmap, t->base_oloc.pool);
|
||||
if (!pi) {
|
||||
t->osd = CEPH_HOMELESS_OSD;
|
||||
ct_res = CALC_TARGET_POOL_DNE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (osdc->osdmap->epoch == pi->last_force_request_resend) {
|
||||
if (last_force_resend &&
|
||||
*last_force_resend < pi->last_force_request_resend) {
|
||||
*last_force_resend = pi->last_force_request_resend;
|
||||
force_resend = true;
|
||||
} else if (!last_force_resend) {
|
||||
force_resend = true;
|
||||
}
|
||||
}
|
||||
if (ceph_oid_empty(&t->target_oid) || force_resend) {
|
||||
ceph_oid_copy(&t->target_oid, &t->base_oid);
|
||||
need_check_tiering = true;
|
||||
}
|
||||
if (ceph_oloc_empty(&t->target_oloc) || force_resend) {
|
||||
ceph_oloc_copy(&t->target_oloc, &t->base_oloc);
|
||||
need_check_tiering = true;
|
||||
}
|
||||
|
||||
if (need_check_tiering &&
|
||||
(t->flags & CEPH_OSD_FLAG_IGNORE_OVERLAY) == 0) {
|
||||
if (t->flags & CEPH_OSD_FLAG_READ && pi->read_tier >= 0)
|
||||
t->target_oloc.pool = pi->read_tier;
|
||||
if (t->flags & CEPH_OSD_FLAG_WRITE && pi->write_tier >= 0)
|
||||
t->target_oloc.pool = pi->write_tier;
|
||||
}
|
||||
|
||||
ret = ceph_object_locator_to_pg(osdc->osdmap, &t->target_oid,
|
||||
&t->target_oloc, &pgid);
|
||||
if (ret) {
|
||||
WARN_ON(ret != -ENOENT);
|
||||
t->osd = CEPH_HOMELESS_OSD;
|
||||
ct_res = CALC_TARGET_POOL_DNE;
|
||||
goto out;
|
||||
}
|
||||
last_pgid.pool = pgid.pool;
|
||||
last_pgid.seed = ceph_stable_mod(pgid.seed, t->pg_num, t->pg_num_mask);
|
||||
|
||||
ceph_pg_to_up_acting_osds(osdc->osdmap, &pgid, &up, &acting);
|
||||
if (any_change &&
|
||||
ceph_is_new_interval(&t->acting,
|
||||
&acting,
|
||||
&t->up,
|
||||
&up,
|
||||
t->size,
|
||||
pi->size,
|
||||
t->min_size,
|
||||
pi->min_size,
|
||||
t->pg_num,
|
||||
pi->pg_num,
|
||||
t->sort_bitwise,
|
||||
sort_bitwise,
|
||||
&last_pgid))
|
||||
force_resend = true;
|
||||
|
||||
if (t->paused && !target_should_be_paused(osdc, t, pi)) {
|
||||
t->paused = false;
|
||||
need_resend = true;
|
||||
}
|
||||
|
||||
if (ceph_pg_compare(&t->pgid, &pgid) ||
|
||||
ceph_osds_changed(&t->acting, &acting, any_change) ||
|
||||
force_resend) {
|
||||
t->pgid = pgid; /* struct */
|
||||
ceph_osds_copy(&t->acting, &acting);
|
||||
ceph_osds_copy(&t->up, &up);
|
||||
t->size = pi->size;
|
||||
t->min_size = pi->min_size;
|
||||
t->pg_num = pi->pg_num;
|
||||
t->pg_num_mask = pi->pg_num_mask;
|
||||
t->sort_bitwise = sort_bitwise;
|
||||
|
||||
t->osd = acting.primary;
|
||||
need_resend = true;
|
||||
}
|
||||
|
||||
ct_res = need_resend ? CALC_TARGET_NEED_RESEND : CALC_TARGET_NO_ACTION;
|
||||
out:
|
||||
dout("%s t %p -> ct_res %d osd %d\n", __func__, t, ct_res, t->osd);
|
||||
return ct_res;
|
||||
}
|
||||
|
||||
static void __enqueue_request(struct ceph_osd_request *req)
|
||||
{
|
||||
struct ceph_osd_client *osdc = req->r_osdc;
|
||||
@ -1805,12 +1958,12 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg)
|
||||
redir.oloc.pool = -1;
|
||||
}
|
||||
|
||||
if (redir.oloc.pool != -1) {
|
||||
if (!ceph_oloc_empty(&redir.oloc)) {
|
||||
dout("redirect pool %lld\n", redir.oloc.pool);
|
||||
|
||||
__unregister_request(osdc, req);
|
||||
|
||||
req->r_target_oloc = redir.oloc; /* struct */
|
||||
ceph_oloc_copy(&req->r_target_oloc, &redir.oloc);
|
||||
|
||||
/*
|
||||
* Start redirect requests with nofail=true. If
|
||||
|
@ -1521,6 +1521,32 @@ void ceph_oid_destroy(struct ceph_object_id *oid)
|
||||
}
|
||||
EXPORT_SYMBOL(ceph_oid_destroy);
|
||||
|
||||
/*
|
||||
* osds only
|
||||
*/
|
||||
static bool __osds_equal(const struct ceph_osds *lhs,
|
||||
const struct ceph_osds *rhs)
|
||||
{
|
||||
if (lhs->size == rhs->size &&
|
||||
!memcmp(lhs->osds, rhs->osds, rhs->size * sizeof(rhs->osds[0])))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* osds + primary
|
||||
*/
|
||||
static bool osds_equal(const struct ceph_osds *lhs,
|
||||
const struct ceph_osds *rhs)
|
||||
{
|
||||
if (__osds_equal(lhs, rhs) &&
|
||||
lhs->primary == rhs->primary)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool osds_valid(const struct ceph_osds *set)
|
||||
{
|
||||
/* non-empty set */
|
||||
@ -1553,6 +1579,101 @@ void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src)
|
||||
dest->primary = src->primary;
|
||||
}
|
||||
|
||||
static bool is_split(const struct ceph_pg *pgid,
|
||||
u32 old_pg_num,
|
||||
u32 new_pg_num)
|
||||
{
|
||||
int old_bits = calc_bits_of(old_pg_num);
|
||||
int old_mask = (1 << old_bits) - 1;
|
||||
int n;
|
||||
|
||||
WARN_ON(pgid->seed >= old_pg_num);
|
||||
if (new_pg_num <= old_pg_num)
|
||||
return false;
|
||||
|
||||
for (n = 1; ; n++) {
|
||||
int next_bit = n << (old_bits - 1);
|
||||
u32 s = next_bit | pgid->seed;
|
||||
|
||||
if (s < old_pg_num || s == pgid->seed)
|
||||
continue;
|
||||
if (s >= new_pg_num)
|
||||
break;
|
||||
|
||||
s = ceph_stable_mod(s, old_pg_num, old_mask);
|
||||
if (s == pgid->seed)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ceph_is_new_interval(const struct ceph_osds *old_acting,
|
||||
const struct ceph_osds *new_acting,
|
||||
const struct ceph_osds *old_up,
|
||||
const struct ceph_osds *new_up,
|
||||
int old_size,
|
||||
int new_size,
|
||||
int old_min_size,
|
||||
int new_min_size,
|
||||
u32 old_pg_num,
|
||||
u32 new_pg_num,
|
||||
bool old_sort_bitwise,
|
||||
bool new_sort_bitwise,
|
||||
const struct ceph_pg *pgid)
|
||||
{
|
||||
return !osds_equal(old_acting, new_acting) ||
|
||||
!osds_equal(old_up, new_up) ||
|
||||
old_size != new_size ||
|
||||
old_min_size != new_min_size ||
|
||||
is_split(pgid, old_pg_num, new_pg_num) ||
|
||||
old_sort_bitwise != new_sort_bitwise;
|
||||
}
|
||||
|
||||
static int calc_pg_rank(int osd, const struct ceph_osds *acting)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < acting->size; i++) {
|
||||
if (acting->osds[i] == osd)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static bool primary_changed(const struct ceph_osds *old_acting,
|
||||
const struct ceph_osds *new_acting)
|
||||
{
|
||||
if (!old_acting->size && !new_acting->size)
|
||||
return false; /* both still empty */
|
||||
|
||||
if (!old_acting->size ^ !new_acting->size)
|
||||
return true; /* was empty, now not, or vice versa */
|
||||
|
||||
if (old_acting->primary != new_acting->primary)
|
||||
return true; /* primary changed */
|
||||
|
||||
if (calc_pg_rank(old_acting->primary, old_acting) !=
|
||||
calc_pg_rank(new_acting->primary, new_acting))
|
||||
return true;
|
||||
|
||||
return false; /* same primary (tho replicas may have changed) */
|
||||
}
|
||||
|
||||
bool ceph_osds_changed(const struct ceph_osds *old_acting,
|
||||
const struct ceph_osds *new_acting,
|
||||
bool any_change)
|
||||
{
|
||||
if (primary_changed(old_acting, new_acting))
|
||||
return true;
|
||||
|
||||
if (any_change && !__osds_equal(old_acting, new_acting))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* calculate file layout from given offset, length.
|
||||
* fill in correct oid, logical length, and object extent
|
||||
|
Loading…
Reference in New Issue
Block a user