vmw_balloon: change batch/single lock abstractions

The current abstractions for batch vs single operations seem suboptimal
and complicate the implementation of additional features (OOM,
compaction).

The immediate problem of the current abstractions is that they cause
differences in how operations are handled when batching is on or off.
For example, the refused_alloc counter is not updated when batching is
on. These discrepancies are caused by code redundancies.

Instead, this patch presents three type of operations, according to
whether batching is on or off: (1) add page, (2) communication with the
hypervisor and (3) retrieving the status of a page.

To avoid the overhead of virtual functions, and since we do not expect
additional interfaces for communication with the hypervisor, we use
static keys instead of virtual functions.

Finally, while we are at it, change vmballoon_init_batching() to return
int instead of bool, to be consistent in the return type and avoid
potential coding errors.

Reviewed-by: Xavier Deguillard <xdeguillard@vmware.com>
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Nadav Amit 2018-09-20 10:30:12 -07:00 committed by Greg Kroah-Hartman
parent 622074a9f6
commit df8d0d42af

View File

@ -165,15 +165,7 @@ struct vmballoon_stats {
#define STATS_INC(stat) #define STATS_INC(stat)
#endif #endif
struct vmballoon; static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching);
struct vmballoon_ops {
void (*add_page)(struct vmballoon *b, int idx, struct page *p);
int (*lock)(struct vmballoon *b, unsigned int num_pages,
bool is_2m_pages);
int (*unlock)(struct vmballoon *b, unsigned int num_pages,
bool is_2m_pages);
};
struct vmballoon_page_size { struct vmballoon_page_size {
/* list of reserved physical pages */ /* list of reserved physical pages */
@ -223,8 +215,6 @@ struct vmballoon {
unsigned int batch_max_pages; unsigned int batch_max_pages;
struct page *page; struct page *page;
const struct vmballoon_ops *ops;
#ifdef CONFIG_DEBUG_FS #ifdef CONFIG_DEBUG_FS
/* statistics */ /* statistics */
struct vmballoon_stats stats; struct vmballoon_stats stats;
@ -393,53 +383,6 @@ static bool vmballoon_send_get_target(struct vmballoon *b)
return false; return false;
} }
/*
* Notify the host about allocated page so that host can use it without
* fear that guest will need it. Host may reject some pages, we need to
* check the return value and maybe submit a different page.
*/
static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
unsigned int *hv_status, bool lock)
{
unsigned long status, cmd;
u32 pfn32;
pfn32 = (u32)pfn;
if (pfn32 != pfn)
return -EINVAL;
cmd = lock ? VMW_BALLOON_CMD_LOCK : VMW_BALLOON_CMD_UNLOCK;
*hv_status = status = vmballoon_cmd(b, cmd, pfn, 0);
if (status == VMW_BALLOON_SUCCESS)
return 0;
return -EIO;
}
static int vmballoon_send_batched_lock(struct vmballoon *b,
unsigned int num_pages, bool is_2m_pages,
bool lock)
{
unsigned long pfn = PHYS_PFN(virt_to_phys(b->batch_page));
unsigned long status, cmd;
if (lock)
cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_LOCK :
VMW_BALLOON_CMD_BATCHED_LOCK;
else
cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_UNLOCK :
VMW_BALLOON_CMD_BATCHED_UNLOCK;
status = vmballoon_cmd(b, cmd, pfn, num_pages);
if (status == VMW_BALLOON_SUCCESS)
return 0;
return 1;
}
static struct page *vmballoon_alloc_page(bool is_2m_page) static struct page *vmballoon_alloc_page(bool is_2m_page)
{ {
if (is_2m_page) if (is_2m_page)
@ -488,88 +431,126 @@ static void vmballoon_pop(struct vmballoon *b)
b->batch_page = NULL; b->batch_page = NULL;
} }
/* /**
* Notify the host of a ballooned page. If host rejects the page put it on the * vmballoon_status_page - returns the status of (un)lock operation
* refuse list, those refused page are then released at the end of the *
* inflation cycle. * @b: pointer to the balloon.
* @idx: index for the page for which the operation is performed.
* @p: pointer to where the page struct is returned.
*
* Following a lock or unlock operation, returns the status of the operation for
* an individual page. Provides the page that the operation was performed on on
* the @page argument.
*
* Returns: The status of a lock or unlock operation for an individual page.
*/ */
static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages, static unsigned long vmballoon_status_page(struct vmballoon *b, int idx,
bool is_2m_pages) struct page **p)
{ {
int locked, hv_status; if (static_branch_likely(&vmw_balloon_batching)) {
struct page *page = b->page; /* batching mode */
struct vmballoon_page_size *page_size = &b->page_sizes[false]; *p = pfn_to_page(b->batch_page[idx].pfn);
return b->batch_page[idx].status;
/* is_2m_pages can never happen as 2m pages support implies batching */
locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
true);
if (locked) {
STATS_INC(b->stats.refused_alloc[false]);
if (locked == -EIO &&
(hv_status == VMW_BALLOON_ERROR_RESET ||
hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED)) {
vmballoon_free_page(page, false);
return -EIO;
}
/*
* Place page on the list of non-balloonable pages
* and retry allocation, unless we already accumulated
* too many of them, in which case take a breather.
*/
if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
page_size->n_refused_pages++;
list_add(&page->lru, &page_size->refused_pages);
} else {
vmballoon_free_page(page, false);
}
return locked;
} }
/* track allocated page */ /* non-batching mode */
list_add(&page->lru, &page_size->pages); *p = b->page;
/* update balloon size */ /*
b->size++; * If a failure occurs, the indication will be provided in the status
* of the entire operation, which is considered before the individual
return 0; * page status. So for non-batching mode, the indication is always of
* success.
*/
return VMW_BALLOON_SUCCESS;
} }
static int vmballoon_lock_batched_page(struct vmballoon *b, /**
unsigned int num_pages, bool is_2m_pages) * vmballoon_lock_op - notifies the host about inflated/deflated pages.
* @b: pointer to the balloon.
* @num_pages: number of inflated/deflated pages.
* @is_2m_pages: whether the page(s) are 2M (or 4k).
* @lock: whether the operation is lock (or unlock).
*
* Notify the host about page(s) that were ballooned (or removed from the
* balloon) so that host can use it without fear that guest will need it (or
* stop using them since the VM does). Host may reject some pages, we need to
* check the return value and maybe submit a different page. The pages that are
* inflated/deflated are pointed by @b->page.
*
* Return: result as provided by the hypervisor.
*/
static unsigned long vmballoon_lock_op(struct vmballoon *b,
unsigned int num_pages,
bool is_2m_pages, bool lock)
{ {
int locked, i; unsigned long cmd, pfn;
u16 size_per_page = vmballoon_page_size(is_2m_pages);
locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages, true); if (static_branch_likely(&vmw_balloon_batching)) {
if (lock)
cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_LOCK :
VMW_BALLOON_CMD_BATCHED_LOCK;
else
cmd = is_2m_pages ? VMW_BALLOON_CMD_BATCHED_2M_UNLOCK :
VMW_BALLOON_CMD_BATCHED_UNLOCK;
if (locked > 0) { pfn = PHYS_PFN(virt_to_phys(b->batch_page));
for (i = 0; i < num_pages; i++) { } else {
struct page *p = pfn_to_page(b->batch_page[i].pfn); cmd = lock ? VMW_BALLOON_CMD_LOCK : VMW_BALLOON_CMD_UNLOCK;
pfn = page_to_pfn(b->page);
vmballoon_free_page(p, is_2m_pages); /* In non-batching mode, PFNs must fit in 32-bit */
} if (unlikely(pfn != (u32)pfn))
return VMW_BALLOON_ERROR_PPN_INVALID;
return -EIO;
} }
return vmballoon_cmd(b, cmd, pfn, num_pages);
}
static int vmballoon_lock(struct vmballoon *b, unsigned int num_pages,
bool is_2m_pages)
{
unsigned long batch_status;
int i;
u16 size_per_page = vmballoon_page_size(is_2m_pages);
batch_status = vmballoon_lock_op(b, num_pages, is_2m_pages, true);
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
struct page *p = pfn_to_page(b->batch_page[i].pfn); unsigned long status;
struct page *p;
struct vmballoon_page_size *page_size = struct vmballoon_page_size *page_size =
&b->page_sizes[is_2m_pages]; &b->page_sizes[is_2m_pages];
locked = b->batch_page[i].status; status = vmballoon_status_page(b, i, &p);
switch (locked) { /*
case VMW_BALLOON_SUCCESS: * Failure of the whole batch overrides a single operation
* results.
*/
if (batch_status != VMW_BALLOON_SUCCESS)
status = batch_status;
if (status == VMW_BALLOON_SUCCESS) {
/* track allocated page */
list_add(&p->lru, &page_size->pages); list_add(&p->lru, &page_size->pages);
/* update balloon size */
b->size += size_per_page; b->size += size_per_page;
break; continue;
}
/* Error occurred */
STATS_INC(b->stats.refused_alloc[is_2m_pages]);
switch (status) {
case VMW_BALLOON_ERROR_PPN_PINNED: case VMW_BALLOON_ERROR_PPN_PINNED:
case VMW_BALLOON_ERROR_PPN_INVALID: case VMW_BALLOON_ERROR_PPN_INVALID:
/*
* Place page on the list of non-balloonable pages
* and retry allocation, unless we already accumulated
* too many of them, in which case take a breather.
*/
if (page_size->n_refused_pages if (page_size->n_refused_pages
< VMW_BALLOON_MAX_REFUSED) { < VMW_BALLOON_MAX_REFUSED) {
list_add(&p->lru, &page_size->refused_pages); list_add(&p->lru, &page_size->refused_pages);
@ -587,7 +568,7 @@ static int vmballoon_lock_batched_page(struct vmballoon *b,
} }
} }
return 0; return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
} }
/* /*
@ -595,51 +576,31 @@ static int vmballoon_lock_batched_page(struct vmballoon *b,
* the host so it can make sure the page will be available for the guest * the host so it can make sure the page will be available for the guest
* to use, if needed. * to use, if needed.
*/ */
static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages, static int vmballoon_unlock(struct vmballoon *b, unsigned int num_pages,
bool is_2m_pages) bool is_2m_pages)
{ {
struct page *page = b->page; int i;
struct vmballoon_page_size *page_size = &b->page_sizes[false]; unsigned long batch_status;
unsigned int hv_status;
/* is_2m_pages can never happen as 2m pages support implies batching */
if (!vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
false)) {
list_add(&page->lru, &page_size->pages);
return -EIO;
}
/* deallocate page */
vmballoon_free_page(page, false);
STATS_INC(b->stats.free[false]);
/* update balloon size */
b->size--;
return 0;
}
static int vmballoon_unlock_batched_page(struct vmballoon *b,
unsigned int num_pages, bool is_2m_pages)
{
int locked, i, ret = 0;
bool hv_success;
u16 size_per_page = vmballoon_page_size(is_2m_pages); u16 size_per_page = vmballoon_page_size(is_2m_pages);
hv_success = vmballoon_send_batched_lock(b, num_pages, is_2m_pages, batch_status = vmballoon_lock_op(b, num_pages, is_2m_pages, false);
false);
if (!hv_success)
ret = -EIO;
for (i = 0; i < num_pages; i++) { for (i = 0; i < num_pages; i++) {
struct page *p = pfn_to_page(b->batch_page[i].pfn); struct vmballoon_page_size *page_size;
struct vmballoon_page_size *page_size = unsigned long status;
&b->page_sizes[is_2m_pages]; struct page *p;
locked = b->batch_page[i].status; status = vmballoon_status_page(b, i, &p);
if (!hv_success || locked != VMW_BALLOON_SUCCESS) { page_size = &b->page_sizes[is_2m_pages];
/*
* Failure of the whole batch overrides a single operation
* results.
*/
if (batch_status != VMW_BALLOON_SUCCESS)
status = batch_status;
if (status != VMW_BALLOON_SUCCESS) {
/* /*
* That page wasn't successfully unlocked by the * That page wasn't successfully unlocked by the
* hypervisor, re-add it to the list of pages owned by * hypervisor, re-add it to the list of pages owned by
@ -656,7 +617,7 @@ static int vmballoon_unlock_batched_page(struct vmballoon *b,
} }
} }
return ret; return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
} }
/* /*
@ -681,14 +642,11 @@ static void vmballoon_release_refused_pages(struct vmballoon *b,
static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p) static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
{ {
b->page = p; if (static_branch_likely(&vmw_balloon_batching))
} b->batch_page[idx] = (struct vmballoon_batch_entry)
static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
struct page *p)
{
b->batch_page[idx] = (struct vmballoon_batch_entry)
{ .pfn = page_to_pfn(p) }; { .pfn = page_to_pfn(p) };
else
b->page = p;
} }
/* /*
@ -737,7 +695,7 @@ static void vmballoon_inflate(struct vmballoon *b)
if (!page) { if (!page) {
STATS_INC(b->stats.alloc_fail[is_2m_pages]); STATS_INC(b->stats.alloc_fail[is_2m_pages]);
if (is_2m_pages) { if (is_2m_pages) {
b->ops->lock(b, num_pages, true); vmballoon_lock(b, num_pages, true);
/* /*
* ignore errors from locking as we now switch * ignore errors from locking as we now switch
@ -752,9 +710,9 @@ static void vmballoon_inflate(struct vmballoon *b)
break; break;
} }
b->ops->add_page(b, num_pages++, page); vmballoon_add_page(b, num_pages++, page);
if (num_pages == b->batch_max_pages) { if (num_pages == b->batch_max_pages) {
error = b->ops->lock(b, num_pages, is_2m_pages); error = vmballoon_lock(b, num_pages, is_2m_pages);
num_pages = 0; num_pages = 0;
if (error) if (error)
@ -765,7 +723,7 @@ static void vmballoon_inflate(struct vmballoon *b)
} }
if (num_pages > 0) if (num_pages > 0)
b->ops->lock(b, num_pages, is_2m_pages); vmballoon_lock(b, num_pages, is_2m_pages);
vmballoon_release_refused_pages(b, true); vmballoon_release_refused_pages(b, true);
vmballoon_release_refused_pages(b, false); vmballoon_release_refused_pages(b, false);
@ -797,12 +755,12 @@ static void vmballoon_deflate(struct vmballoon *b)
break; break;
list_del(&page->lru); list_del(&page->lru);
b->ops->add_page(b, num_pages++, page); vmballoon_add_page(b, num_pages++, page);
if (num_pages == b->batch_max_pages) { if (num_pages == b->batch_max_pages) {
int error; int error;
error = b->ops->unlock(b, num_pages, error = vmballoon_unlock(b, num_pages,
is_2m_pages); is_2m_pages);
num_pages = 0; num_pages = 0;
if (error) if (error)
@ -813,32 +771,50 @@ static void vmballoon_deflate(struct vmballoon *b)
} }
if (num_pages > 0) if (num_pages > 0)
b->ops->unlock(b, num_pages, is_2m_pages); vmballoon_unlock(b, num_pages, is_2m_pages);
} }
} }
static const struct vmballoon_ops vmballoon_basic_ops = { /**
.add_page = vmballoon_add_page, * vmballoon_deinit_batching - disables batching mode.
.lock = vmballoon_lock_page, *
.unlock = vmballoon_unlock_page * @b: pointer to &struct vmballoon.
}; *
* Disables batching, by deallocating the page for communication with the
* hypervisor and disabling the static key to indicate that batching is off.
*/
static void vmballoon_deinit_batching(struct vmballoon *b)
{
free_page((unsigned long)b->batch_page);
b->batch_page = NULL;
static_branch_disable(&vmw_balloon_batching);
b->batch_max_pages = 1;
}
static const struct vmballoon_ops vmballoon_batched_ops = { /**
.add_page = vmballoon_add_batched_page, * vmballoon_init_batching - enable batching mode.
.lock = vmballoon_lock_batched_page, *
.unlock = vmballoon_unlock_batched_page * @b: pointer to &struct vmballoon.
}; *
* Enables batching, by allocating a page for communication with the hypervisor
static bool vmballoon_init_batching(struct vmballoon *b) * and enabling the static_key to use batching.
*
* Return: zero on success or an appropriate error-code.
*/
static int vmballoon_init_batching(struct vmballoon *b)
{ {
struct page *page; struct page *page;
page = alloc_page(GFP_KERNEL | __GFP_ZERO); page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page) if (!page)
return false; return -ENOMEM;
b->batch_page = page_address(page); b->batch_page = page_address(page);
return true; b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry);
static_branch_enable(&vmw_balloon_batching);
return 0;
} }
/* /*
@ -915,10 +891,7 @@ static void vmballoon_reset(struct vmballoon *b)
return; return;
if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) { if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
b->ops = &vmballoon_batched_ops; if (vmballoon_init_batching(b)) {
b->batch_max_pages = PAGE_SIZE / sizeof(struct
vmballoon_batch_entry);
if (!vmballoon_init_batching(b)) {
/* /*
* We failed to initialize batching, inform the monitor * We failed to initialize batching, inform the monitor
* about it by sending a null capability. * about it by sending a null capability.
@ -929,8 +902,7 @@ static void vmballoon_reset(struct vmballoon *b)
return; return;
} }
} else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) { } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
b->ops = &vmballoon_basic_ops; vmballoon_deinit_batching(b);
b->batch_max_pages = 1;
} }
b->reset_required = false; b->reset_required = false;