lguest: per-vcpu lguest pgdir management

this patch makes the pgdir management per-vcpu. The pgdirs pool
is still guest-wide (although it'll probably need to grow when we
are really executing more vcpus), but the pgdidx index is gone,
since it makes no sense anymore. Instead, we use a per-vcpu
index.

Signed-off-by: Glauber de Oliveira Costa <gcosta@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
This commit is contained in:
Glauber de Oliveira Costa 2008-01-07 11:05:37 -02:00 committed by Rusty Russell
parent 5e232f4f42
commit 1713608f28
5 changed files with 44 additions and 42 deletions

View File

@ -62,7 +62,7 @@ static void do_hcall(struct lg_cpu *cpu, struct hcall_args *args)
if (args->arg1) if (args->arg1)
guest_pagetable_clear_all(cpu); guest_pagetable_clear_all(cpu);
else else
guest_pagetable_flush_user(lg); guest_pagetable_flush_user(cpu);
break; break;
/* All these calls simply pass the arguments through to the right /* All these calls simply pass the arguments through to the right

View File

@ -76,7 +76,7 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err)
virtstack = cpu->esp1; virtstack = cpu->esp1;
ss = cpu->ss1; ss = cpu->ss1;
origstack = gstack = guest_pa(lg, virtstack); origstack = gstack = guest_pa(cpu, virtstack);
/* We push the old stack segment and pointer onto the new /* We push the old stack segment and pointer onto the new
* stack: when the Guest does an "iret" back from the interrupt * stack: when the Guest does an "iret" back from the interrupt
* handler the CPU will notice they're dropping privilege * handler the CPU will notice they're dropping privilege
@ -88,7 +88,7 @@ static void set_guest_interrupt(struct lg_cpu *cpu, u32 lo, u32 hi, int has_err)
virtstack = cpu->regs->esp; virtstack = cpu->regs->esp;
ss = cpu->regs->ss; ss = cpu->regs->ss;
origstack = gstack = guest_pa(lg, virtstack); origstack = gstack = guest_pa(cpu, virtstack);
} }
/* Remember that we never let the Guest actually disable interrupts, so /* Remember that we never let the Guest actually disable interrupts, so
@ -323,7 +323,7 @@ void pin_stack_pages(struct lg_cpu *cpu)
* start of the page after the kernel stack. Subtract one to * start of the page after the kernel stack. Subtract one to
* get back onto the first stack page, and keep subtracting to * get back onto the first stack page, and keep subtracting to
* get to the rest of the stack pages. */ * get to the rest of the stack pages. */
pin_page(lg, cpu->esp1 - 1 - i * PAGE_SIZE); pin_page(cpu, cpu->esp1 - 1 - i * PAGE_SIZE);
} }
/* Direct traps also mean that we need to know whenever the Guest wants to use /* Direct traps also mean that we need to know whenever the Guest wants to use

View File

@ -57,6 +57,8 @@ struct lg_cpu {
unsigned long regs_page; unsigned long regs_page;
struct lguest_regs *regs; struct lguest_regs *regs;
int cpu_pgd; /* which pgd this cpu is currently using */
/* If a hypercall was asked for, this points to the arguments. */ /* If a hypercall was asked for, this points to the arguments. */
struct hcall_args *hcall; struct hcall_args *hcall;
u32 next_hcall; u32 next_hcall;
@ -92,8 +94,6 @@ struct lguest
int changed; int changed;
struct lguest_pages *last_pages; struct lguest_pages *last_pages;
/* We keep a small number of these. */
u32 pgdidx;
struct pgdir pgdirs[4]; struct pgdir pgdirs[4];
unsigned long noirq_start, noirq_end; unsigned long noirq_start, noirq_end;
@ -169,13 +169,13 @@ void free_guest_pagetable(struct lguest *lg);
void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable); void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable);
void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i); void guest_set_pmd(struct lguest *lg, unsigned long gpgdir, u32 i);
void guest_pagetable_clear_all(struct lg_cpu *cpu); void guest_pagetable_clear_all(struct lg_cpu *cpu);
void guest_pagetable_flush_user(struct lguest *lg); void guest_pagetable_flush_user(struct lg_cpu *cpu);
void guest_set_pte(struct lguest *lg, unsigned long gpgdir, void guest_set_pte(struct lguest *lg, unsigned long gpgdir,
unsigned long vaddr, pte_t val); unsigned long vaddr, pte_t val);
void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages); void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages);
int demand_page(struct lguest *info, unsigned long cr2, int errcode); int demand_page(struct lg_cpu *cpu, unsigned long cr2, int errcode);
void pin_page(struct lguest *lg, unsigned long vaddr); void pin_page(struct lg_cpu *cpu, unsigned long vaddr);
unsigned long guest_pa(struct lguest *lg, unsigned long vaddr); unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr);
void page_table_guest_data_init(struct lguest *lg); void page_table_guest_data_init(struct lguest *lg);
/* <arch>/core.c: */ /* <arch>/core.c: */

View File

@ -94,10 +94,10 @@ static pte_t *spte_addr(struct lguest *lg, pgd_t spgd, unsigned long vaddr)
/* These two functions just like the above two, except they access the Guest /* These two functions just like the above two, except they access the Guest
* page tables. Hence they return a Guest address. */ * page tables. Hence they return a Guest address. */
static unsigned long gpgd_addr(struct lguest *lg, unsigned long vaddr) static unsigned long gpgd_addr(struct lg_cpu *cpu, unsigned long vaddr)
{ {
unsigned int index = vaddr >> (PGDIR_SHIFT); unsigned int index = vaddr >> (PGDIR_SHIFT);
return lg->pgdirs[lg->pgdidx].gpgdir + index * sizeof(pgd_t); return cpu->lg->pgdirs[cpu->cpu_pgd].gpgdir + index * sizeof(pgd_t);
} }
static unsigned long gpte_addr(struct lguest *lg, static unsigned long gpte_addr(struct lguest *lg,
@ -200,22 +200,23 @@ static void check_gpgd(struct lguest *lg, pgd_t gpgd)
* *
* If we fixed up the fault (ie. we mapped the address), this routine returns * If we fixed up the fault (ie. we mapped the address), this routine returns
* true. Otherwise, it was a real fault and we need to tell the Guest. */ * true. Otherwise, it was a real fault and we need to tell the Guest. */
int demand_page(struct lguest *lg, unsigned long vaddr, int errcode) int demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
{ {
pgd_t gpgd; pgd_t gpgd;
pgd_t *spgd; pgd_t *spgd;
unsigned long gpte_ptr; unsigned long gpte_ptr;
pte_t gpte; pte_t gpte;
pte_t *spte; pte_t *spte;
struct lguest *lg = cpu->lg;
/* First step: get the top-level Guest page table entry. */ /* First step: get the top-level Guest page table entry. */
gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); gpgd = lgread(lg, gpgd_addr(cpu, vaddr), pgd_t);
/* Toplevel not present? We can't map it in. */ /* Toplevel not present? We can't map it in. */
if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
return 0; return 0;
/* Now look at the matching shadow entry. */ /* Now look at the matching shadow entry. */
spgd = spgd_addr(lg, lg->pgdidx, vaddr); spgd = spgd_addr(lg, cpu->cpu_pgd, vaddr);
if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) { if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) {
/* No shadow entry: allocate a new shadow PTE page. */ /* No shadow entry: allocate a new shadow PTE page. */
unsigned long ptepage = get_zeroed_page(GFP_KERNEL); unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
@ -297,19 +298,19 @@ int demand_page(struct lguest *lg, unsigned long vaddr, int errcode)
* *
* This is a quick version which answers the question: is this virtual address * This is a quick version which answers the question: is this virtual address
* mapped by the shadow page tables, and is it writable? */ * mapped by the shadow page tables, and is it writable? */
static int page_writable(struct lguest *lg, unsigned long vaddr) static int page_writable(struct lg_cpu *cpu, unsigned long vaddr)
{ {
pgd_t *spgd; pgd_t *spgd;
unsigned long flags; unsigned long flags;
/* Look at the current top level entry: is it present? */ /* Look at the current top level entry: is it present? */
spgd = spgd_addr(lg, lg->pgdidx, vaddr); spgd = spgd_addr(cpu->lg, cpu->cpu_pgd, vaddr);
if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) if (!(pgd_flags(*spgd) & _PAGE_PRESENT))
return 0; return 0;
/* Check the flags on the pte entry itself: it must be present and /* Check the flags on the pte entry itself: it must be present and
* writable. */ * writable. */
flags = pte_flags(*(spte_addr(lg, *spgd, vaddr))); flags = pte_flags(*(spte_addr(cpu->lg, *spgd, vaddr)));
return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW); return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
} }
@ -317,10 +318,10 @@ static int page_writable(struct lguest *lg, unsigned long vaddr)
/* So, when pin_stack_pages() asks us to pin a page, we check if it's already /* So, when pin_stack_pages() asks us to pin a page, we check if it's already
* in the page tables, and if not, we call demand_page() with error code 2 * in the page tables, and if not, we call demand_page() with error code 2
* (meaning "write"). */ * (meaning "write"). */
void pin_page(struct lguest *lg, unsigned long vaddr) void pin_page(struct lg_cpu *cpu, unsigned long vaddr)
{ {
if (!page_writable(lg, vaddr) && !demand_page(lg, vaddr, 2)) if (!page_writable(cpu, vaddr) && !demand_page(cpu, vaddr, 2))
kill_guest(lg, "bad stack page %#lx", vaddr); kill_guest(cpu->lg, "bad stack page %#lx", vaddr);
} }
/*H:450 If we chase down the release_pgd() code, it looks like this: */ /*H:450 If we chase down the release_pgd() code, it looks like this: */
@ -358,28 +359,28 @@ static void flush_user_mappings(struct lguest *lg, int idx)
* *
* The Guest has a hypercall to throw away the page tables: it's used when a * The Guest has a hypercall to throw away the page tables: it's used when a
* large number of mappings have been changed. */ * large number of mappings have been changed. */
void guest_pagetable_flush_user(struct lguest *lg) void guest_pagetable_flush_user(struct lg_cpu *cpu)
{ {
/* Drop the userspace part of the current page table. */ /* Drop the userspace part of the current page table. */
flush_user_mappings(lg, lg->pgdidx); flush_user_mappings(cpu->lg, cpu->cpu_pgd);
} }
/*:*/ /*:*/
/* We walk down the guest page tables to get a guest-physical address */ /* We walk down the guest page tables to get a guest-physical address */
unsigned long guest_pa(struct lguest *lg, unsigned long vaddr) unsigned long guest_pa(struct lg_cpu *cpu, unsigned long vaddr)
{ {
pgd_t gpgd; pgd_t gpgd;
pte_t gpte; pte_t gpte;
/* First step: get the top-level Guest page table entry. */ /* First step: get the top-level Guest page table entry. */
gpgd = lgread(lg, gpgd_addr(lg, vaddr), pgd_t); gpgd = lgread(cpu->lg, gpgd_addr(cpu, vaddr), pgd_t);
/* Toplevel not present? We can't map it in. */ /* Toplevel not present? We can't map it in. */
if (!(pgd_flags(gpgd) & _PAGE_PRESENT)) if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
kill_guest(lg, "Bad address %#lx", vaddr); kill_guest(cpu->lg, "Bad address %#lx", vaddr);
gpte = lgread(lg, gpte_addr(lg, gpgd, vaddr), pte_t); gpte = lgread(cpu->lg, gpte_addr(cpu->lg, gpgd, vaddr), pte_t);
if (!(pte_flags(gpte) & _PAGE_PRESENT)) if (!(pte_flags(gpte) & _PAGE_PRESENT))
kill_guest(lg, "Bad address %#lx", vaddr); kill_guest(cpu->lg, "Bad address %#lx", vaddr);
return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK); return pte_pfn(gpte) * PAGE_SIZE | (vaddr & ~PAGE_MASK);
} }
@ -399,11 +400,12 @@ static unsigned int find_pgdir(struct lguest *lg, unsigned long pgtable)
/*H:435 And this is us, creating the new page directory. If we really do /*H:435 And this is us, creating the new page directory. If we really do
* allocate a new one (and so the kernel parts are not there), we set * allocate a new one (and so the kernel parts are not there), we set
* blank_pgdir. */ * blank_pgdir. */
static unsigned int new_pgdir(struct lguest *lg, static unsigned int new_pgdir(struct lg_cpu *cpu,
unsigned long gpgdir, unsigned long gpgdir,
int *blank_pgdir) int *blank_pgdir)
{ {
unsigned int next; unsigned int next;
struct lguest *lg = cpu->lg;
/* We pick one entry at random to throw out. Choosing the Least /* We pick one entry at random to throw out. Choosing the Least
* Recently Used might be better, but this is easy. */ * Recently Used might be better, but this is easy. */
@ -413,7 +415,7 @@ static unsigned int new_pgdir(struct lguest *lg,
lg->pgdirs[next].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL); lg->pgdirs[next].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
/* If the allocation fails, just keep using the one we have */ /* If the allocation fails, just keep using the one we have */
if (!lg->pgdirs[next].pgdir) if (!lg->pgdirs[next].pgdir)
next = lg->pgdidx; next = cpu->cpu_pgd;
else else
/* This is a blank page, so there are no kernel /* This is a blank page, so there are no kernel
* mappings: caller must map the stack! */ * mappings: caller must map the stack! */
@ -442,9 +444,9 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
/* If not, we allocate or mug an existing one: if it's a fresh one, /* If not, we allocate or mug an existing one: if it's a fresh one,
* repin gets set to 1. */ * repin gets set to 1. */
if (newpgdir == ARRAY_SIZE(lg->pgdirs)) if (newpgdir == ARRAY_SIZE(lg->pgdirs))
newpgdir = new_pgdir(lg, pgtable, &repin); newpgdir = new_pgdir(cpu, pgtable, &repin);
/* Change the current pgd index to the new one. */ /* Change the current pgd index to the new one. */
lg->pgdidx = newpgdir; cpu->cpu_pgd = newpgdir;
/* If it was completely blank, we map in the Guest kernel stack */ /* If it was completely blank, we map in the Guest kernel stack */
if (repin) if (repin)
pin_stack_pages(cpu); pin_stack_pages(cpu);
@ -591,11 +593,11 @@ int init_guest_pagetable(struct lguest *lg, unsigned long pgtable)
{ {
/* We start on the first shadow page table, and give it a blank PGD /* We start on the first shadow page table, and give it a blank PGD
* page. */ * page. */
lg->pgdidx = 0; lg->pgdirs[0].gpgdir = pgtable;
lg->pgdirs[lg->pgdidx].gpgdir = pgtable; lg->pgdirs[0].pgdir = (pgd_t *)get_zeroed_page(GFP_KERNEL);
lg->pgdirs[lg->pgdidx].pgdir = (pgd_t*)get_zeroed_page(GFP_KERNEL); if (!lg->pgdirs[0].pgdir)
if (!lg->pgdirs[lg->pgdidx].pgdir)
return -ENOMEM; return -ENOMEM;
lg->cpus[0].cpu_pgd = 0;
return 0; return 0;
} }
@ -607,7 +609,7 @@ void page_table_guest_data_init(struct lguest *lg)
/* We tell the Guest that it can't use the top 4MB of virtual /* We tell the Guest that it can't use the top 4MB of virtual
* addresses used by the Switcher. */ * addresses used by the Switcher. */
|| put_user(4U*1024*1024, &lg->lguest_data->reserve_mem) || put_user(4U*1024*1024, &lg->lguest_data->reserve_mem)
|| put_user(lg->pgdirs[lg->pgdidx].gpgdir,&lg->lguest_data->pgdir)) || put_user(lg->pgdirs[0].gpgdir, &lg->lguest_data->pgdir))
kill_guest(lg, "bad guest page %p", lg->lguest_data); kill_guest(lg, "bad guest page %p", lg->lguest_data);
/* In flush_user_mappings() we loop from 0 to /* In flush_user_mappings() we loop from 0 to
@ -637,7 +639,6 @@ void free_guest_pagetable(struct lguest *lg)
* Guest is about to run on this CPU. */ * Guest is about to run on this CPU. */
void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages) void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
{ {
struct lguest *lg = cpu->lg;
pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages); pte_t *switcher_pte_page = __get_cpu_var(switcher_pte_pages);
pgd_t switcher_pgd; pgd_t switcher_pgd;
pte_t regs_pte; pte_t regs_pte;
@ -647,7 +648,7 @@ void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
* page for this CPU (with appropriate flags). */ * page for this CPU (with appropriate flags). */
switcher_pgd = __pgd(__pa(switcher_pte_page) | _PAGE_KERNEL); switcher_pgd = __pgd(__pa(switcher_pte_page) | _PAGE_KERNEL);
lg->pgdirs[lg->pgdidx].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd; cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
/* We also change the Switcher PTE page. When we're running the Guest, /* We also change the Switcher PTE page. When we're running the Guest,
* we want the Guest's "regs" page to appear where the first Switcher * we want the Guest's "regs" page to appear where the first Switcher

View File

@ -145,7 +145,7 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages)
* 0-th argument above, ie "a"). %ebx contains the * 0-th argument above, ie "a"). %ebx contains the
* physical address of the Guest's top-level page * physical address of the Guest's top-level page
* directory. */ * directory. */
: "0"(pages), "1"(__pa(lg->pgdirs[lg->pgdidx].pgdir)) : "0"(pages), "1"(__pa(lg->pgdirs[cpu->cpu_pgd].pgdir))
/* We tell gcc that all these registers could change, /* We tell gcc that all these registers could change,
* which means we don't have to save and restore them in * which means we don't have to save and restore them in
* the Switcher. */ * the Switcher. */
@ -223,7 +223,7 @@ static int emulate_insn(struct lg_cpu *cpu)
unsigned int insnlen = 0, in = 0, shift = 0; unsigned int insnlen = 0, in = 0, shift = 0;
/* The eip contains the *virtual* address of the Guest's instruction: /* The eip contains the *virtual* address of the Guest's instruction:
* guest_pa just subtracts the Guest's page_offset. */ * guest_pa just subtracts the Guest's page_offset. */
unsigned long physaddr = guest_pa(lg, cpu->regs->eip); unsigned long physaddr = guest_pa(cpu, cpu->regs->eip);
/* This must be the Guest kernel trying to do something, not userspace! /* This must be the Guest kernel trying to do something, not userspace!
* The bottom two bits of the CS segment register are the privilege * The bottom two bits of the CS segment register are the privilege
@ -305,7 +305,8 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu)
* *
* The errcode tells whether this was a read or a write, and * The errcode tells whether this was a read or a write, and
* whether kernel or userspace code. */ * whether kernel or userspace code. */
if (demand_page(lg,cpu->arch.last_pagefault,cpu->regs->errcode)) if (demand_page(cpu, cpu->arch.last_pagefault,
cpu->regs->errcode))
return; return;
/* OK, it's really not there (or not OK): the Guest needs to /* OK, it's really not there (or not OK): the Guest needs to