mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-12 08:00:09 +00:00
[PATCH] ARM: Fix Xscale copy_page implementation
The ARM copypage changes in 2.6.12-rc4-git1 removed the preempt locking from the copypage functions which broke the XScale implementation. This patch fixes the locking on XScale and removes the now unneeded minicache code. Signed-off-by: Russell King <rmk@arm.linux.org.uk> Checked-by: Richard Purdie
This commit is contained in:
parent
17d82fcc6a
commit
f8f98a9335
@ -228,7 +228,6 @@ config CPU_SA1100
|
||||
select CPU_CACHE_V4WB
|
||||
select CPU_CACHE_VIVT
|
||||
select CPU_TLB_V4WB
|
||||
select CPU_MINICACHE
|
||||
|
||||
# XScale
|
||||
config CPU_XSCALE
|
||||
@ -239,7 +238,6 @@ config CPU_XSCALE
|
||||
select CPU_ABRT_EV5T
|
||||
select CPU_CACHE_VIVT
|
||||
select CPU_TLB_V4WBI
|
||||
select CPU_MINICACHE
|
||||
|
||||
# ARMv6
|
||||
config CPU_V6
|
||||
@ -345,11 +343,6 @@ config CPU_TLB_V4WBI
|
||||
config CPU_TLB_V6
|
||||
bool
|
||||
|
||||
config CPU_MINICACHE
|
||||
bool
|
||||
help
|
||||
Processor has a minicache.
|
||||
|
||||
comment "Processor Features"
|
||||
|
||||
config ARM_THUMB
|
||||
|
@ -31,8 +31,6 @@ obj-$(CONFIG_CPU_COPY_V6) += copypage-v6.o mmu.o
|
||||
obj-$(CONFIG_CPU_SA1100) += copypage-v4mc.o
|
||||
obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o
|
||||
|
||||
obj-$(CONFIG_CPU_MINICACHE) += minicache.o
|
||||
|
||||
obj-$(CONFIG_CPU_TLB_V3) += tlb-v3.o
|
||||
obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o
|
||||
obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o
|
||||
|
@ -1,113 +0,0 @@
|
||||
/*
|
||||
* linux/arch/arm/lib/copypage-xscale.S
|
||||
*
|
||||
* Copyright (C) 2001 Russell King
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
#include <linux/linkage.h>
|
||||
#include <linux/init.h>
|
||||
#include <asm/constants.h>
|
||||
|
||||
/*
|
||||
* General note:
|
||||
* We don't really want write-allocate cache behaviour for these functions
|
||||
* since that will just eat through 8K of the cache.
|
||||
*/
|
||||
|
||||
.text
|
||||
.align 5
|
||||
/*
|
||||
* XScale optimised copy_user_page
|
||||
* r0 = destination
|
||||
* r1 = source
|
||||
* r2 = virtual user address of ultimate destination page
|
||||
*
|
||||
* The source page may have some clean entries in the cache already, but we
|
||||
* can safely ignore them - break_cow() will flush them out of the cache
|
||||
* if we eventually end up using our copied page.
|
||||
*
|
||||
* What we could do is use the mini-cache to buffer reads from the source
|
||||
* page. We rely on the mini-cache being smaller than one page, so we'll
|
||||
* cycle through the complete cache anyway.
|
||||
*/
|
||||
ENTRY(xscale_mc_copy_user_page)
|
||||
stmfd sp!, {r4, r5, lr}
|
||||
mov r5, r0
|
||||
mov r0, r1
|
||||
bl map_page_minicache
|
||||
mov r1, r5
|
||||
mov lr, #PAGE_SZ/64-1
|
||||
|
||||
/*
|
||||
* Strangely enough, best performance is achieved
|
||||
* when prefetching destination as well. (NP)
|
||||
*/
|
||||
pld [r0, #0]
|
||||
pld [r0, #32]
|
||||
pld [r1, #0]
|
||||
pld [r1, #32]
|
||||
|
||||
1: pld [r0, #64]
|
||||
pld [r0, #96]
|
||||
pld [r1, #64]
|
||||
pld [r1, #96]
|
||||
|
||||
2: ldrd r2, [r0], #8
|
||||
ldrd r4, [r0], #8
|
||||
mov ip, r1
|
||||
strd r2, [r1], #8
|
||||
ldrd r2, [r0], #8
|
||||
strd r4, [r1], #8
|
||||
ldrd r4, [r0], #8
|
||||
strd r2, [r1], #8
|
||||
strd r4, [r1], #8
|
||||
mcr p15, 0, ip, c7, c10, 1 @ clean D line
|
||||
ldrd r2, [r0], #8
|
||||
mcr p15, 0, ip, c7, c6, 1 @ invalidate D line
|
||||
ldrd r4, [r0], #8
|
||||
mov ip, r1
|
||||
strd r2, [r1], #8
|
||||
ldrd r2, [r0], #8
|
||||
strd r4, [r1], #8
|
||||
ldrd r4, [r0], #8
|
||||
strd r2, [r1], #8
|
||||
strd r4, [r1], #8
|
||||
mcr p15, 0, ip, c7, c10, 1 @ clean D line
|
||||
subs lr, lr, #1
|
||||
mcr p15, 0, ip, c7, c6, 1 @ invalidate D line
|
||||
bgt 1b
|
||||
beq 2b
|
||||
|
||||
ldmfd sp!, {r4, r5, pc}
|
||||
|
||||
.align 5
|
||||
/*
|
||||
* XScale optimised clear_user_page
|
||||
* r0 = destination
|
||||
* r1 = virtual user address of ultimate destination page
|
||||
*/
|
||||
ENTRY(xscale_mc_clear_user_page)
|
||||
mov r1, #PAGE_SZ/32
|
||||
mov r2, #0
|
||||
mov r3, #0
|
||||
1: mov ip, r0
|
||||
strd r2, [r0], #8
|
||||
strd r2, [r0], #8
|
||||
strd r2, [r0], #8
|
||||
strd r2, [r0], #8
|
||||
mcr p15, 0, ip, c7, c10, 1 @ clean D line
|
||||
subs r1, r1, #1
|
||||
mcr p15, 0, ip, c7, c6, 1 @ invalidate D line
|
||||
bne 1b
|
||||
mov pc, lr
|
||||
|
||||
__INITDATA
|
||||
|
||||
.type xscale_mc_user_fns, #object
|
||||
ENTRY(xscale_mc_user_fns)
|
||||
.long xscale_mc_clear_user_page
|
||||
.long xscale_mc_copy_user_page
|
||||
.size xscale_mc_user_fns, . - xscale_mc_user_fns
|
131
arch/arm/mm/copypage-xscale.c
Normal file
131
arch/arm/mm/copypage-xscale.c
Normal file
@ -0,0 +1,131 @@
|
||||
/*
|
||||
* linux/arch/arm/lib/copypage-xscale.S
|
||||
*
|
||||
* Copyright (C) 1995-2005 Russell King
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This handles the mini data cache, as found on SA11x0 and XScale
|
||||
* processors. When we copy a user page page, we map it in such a way
|
||||
* that accesses to this page will not touch the main data cache, but
|
||||
* will be cached in the mini data cache. This prevents us thrashing
|
||||
* the main data cache on page faults.
|
||||
*/
|
||||
#include <linux/init.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
/*
|
||||
* 0xffff8000 to 0xffffffff is reserved for any ARM architecture
|
||||
* specific hacks for copying pages efficiently.
|
||||
*/
|
||||
#define COPYPAGE_MINICACHE 0xffff8000
|
||||
|
||||
#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
|
||||
L_PTE_CACHEABLE)
|
||||
|
||||
#define TOP_PTE(x) pte_offset_kernel(top_pmd, x)
|
||||
|
||||
static DEFINE_SPINLOCK(minicache_lock);
|
||||
|
||||
/*
|
||||
* XScale mini-dcache optimised copy_user_page
|
||||
*
|
||||
* We flush the destination cache lines just before we write the data into the
|
||||
* corresponding address. Since the Dcache is read-allocate, this removes the
|
||||
* Dcache aliasing issue. The writes will be forwarded to the write buffer,
|
||||
* and merged as appropriate.
|
||||
*/
|
||||
static void __attribute__((naked))
|
||||
mc_copy_user_page(void *from, void *to)
|
||||
{
|
||||
/*
|
||||
* Strangely enough, best performance is achieved
|
||||
* when prefetching destination as well. (NP)
|
||||
*/
|
||||
asm volatile(
|
||||
"stmfd sp!, {r4, r5, lr} \n\
|
||||
mov lr, %2 \n\
|
||||
pld [r0, #0] \n\
|
||||
pld [r0, #32] \n\
|
||||
pld [r1, #0] \n\
|
||||
pld [r1, #32] \n\
|
||||
1: pld [r0, #64] \n\
|
||||
pld [r0, #96] \n\
|
||||
pld [r1, #64] \n\
|
||||
pld [r1, #96] \n\
|
||||
2: ldrd r2, [r0], #8 \n\
|
||||
ldrd r4, [r0], #8 \n\
|
||||
mov ip, r1 \n\
|
||||
strd r2, [r1], #8 \n\
|
||||
ldrd r2, [r0], #8 \n\
|
||||
strd r4, [r1], #8 \n\
|
||||
ldrd r4, [r0], #8 \n\
|
||||
strd r2, [r1], #8 \n\
|
||||
strd r4, [r1], #8 \n\
|
||||
mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\
|
||||
ldrd r2, [r0], #8 \n\
|
||||
mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\
|
||||
ldrd r4, [r0], #8 \n\
|
||||
mov ip, r1 \n\
|
||||
strd r2, [r1], #8 \n\
|
||||
ldrd r2, [r0], #8 \n\
|
||||
strd r4, [r1], #8 \n\
|
||||
ldrd r4, [r0], #8 \n\
|
||||
strd r2, [r1], #8 \n\
|
||||
strd r4, [r1], #8 \n\
|
||||
mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\
|
||||
subs lr, lr, #1 \n\
|
||||
mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\
|
||||
bgt 1b \n\
|
||||
beq 2b \n\
|
||||
ldmfd sp!, {r4, r5, pc} "
|
||||
:
|
||||
: "r" (from), "r" (to), "I" (PAGE_SIZE / 64 - 1));
|
||||
}
|
||||
|
||||
void xscale_mc_copy_user_page(void *kto, const void *kfrom, unsigned long vaddr)
|
||||
{
|
||||
spin_lock(&minicache_lock);
|
||||
|
||||
set_pte(TOP_PTE(COPYPAGE_MINICACHE), pfn_pte(__pa(kfrom) >> PAGE_SHIFT, minicache_pgprot));
|
||||
flush_tlb_kernel_page(COPYPAGE_MINICACHE);
|
||||
|
||||
mc_copy_user_page((void *)COPYPAGE_MINICACHE, kto);
|
||||
|
||||
spin_unlock(&minicache_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* XScale optimised clear_user_page
|
||||
*/
|
||||
void __attribute__((naked))
|
||||
xscale_mc_clear_user_page(void *kaddr, unsigned long vaddr)
|
||||
{
|
||||
asm volatile(
|
||||
"mov r1, %0 \n\
|
||||
mov r2, #0 \n\
|
||||
mov r3, #0 \n\
|
||||
1: mov ip, r0 \n\
|
||||
strd r2, [r0], #8 \n\
|
||||
strd r2, [r0], #8 \n\
|
||||
strd r2, [r0], #8 \n\
|
||||
strd r2, [r0], #8 \n\
|
||||
mcr p15, 0, ip, c7, c10, 1 @ clean D line\n\
|
||||
subs r1, r1, #1 \n\
|
||||
mcr p15, 0, ip, c7, c6, 1 @ invalidate D line\n\
|
||||
bne 1b \n\
|
||||
mov pc, lr"
|
||||
:
|
||||
: "I" (PAGE_SIZE / 32));
|
||||
}
|
||||
|
||||
struct cpu_user_fns xscale_mc_user_fns __initdata = {
|
||||
.cpu_clear_user_page = xscale_mc_clear_user_page,
|
||||
.cpu_copy_user_page = xscale_mc_copy_user_page,
|
||||
};
|
@ -1,73 +0,0 @@
|
||||
/*
|
||||
* linux/arch/arm/mm/minicache.c
|
||||
*
|
||||
* Copyright (C) 2001 Russell King
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This handles the mini data cache, as found on SA11x0 and XScale
|
||||
* processors. When we copy a user page page, we map it in such a way
|
||||
* that accesses to this page will not touch the main data cache, but
|
||||
* will be cached in the mini data cache. This prevents us thrashing
|
||||
* the main data cache on page faults.
|
||||
*/
|
||||
#include <linux/init.h>
|
||||
#include <linux/mm.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
/*
|
||||
* 0xffff8000 to 0xffffffff is reserved for any ARM architecture
|
||||
* specific hacks for copying pages efficiently.
|
||||
*/
|
||||
#define minicache_address (0xffff8000)
|
||||
#define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
|
||||
L_PTE_CACHEABLE)
|
||||
|
||||
static pte_t *minicache_pte;
|
||||
|
||||
/*
|
||||
* Note that this is intended to be called only from the copy_user_page
|
||||
* asm code; anything else will require special locking to prevent the
|
||||
* mini-cache space being re-used. (Note: probably preempt unsafe).
|
||||
*
|
||||
* We rely on the fact that the minicache is 2K, and we'll be pushing
|
||||
* 4K of data through it, so we don't actually have to specifically
|
||||
* flush the minicache when we change the mapping.
|
||||
*
|
||||
* Note also: assert(PAGE_OFFSET <= virt < high_memory).
|
||||
* Unsafe: preempt, kmap.
|
||||
*/
|
||||
unsigned long map_page_minicache(unsigned long virt)
|
||||
{
|
||||
set_pte(minicache_pte, pfn_pte(__pa(virt) >> PAGE_SHIFT, minicache_pgprot));
|
||||
flush_tlb_kernel_page(minicache_address);
|
||||
|
||||
return minicache_address;
|
||||
}
|
||||
|
||||
static int __init minicache_init(void)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pmd_t *pmd;
|
||||
|
||||
spin_lock(&init_mm.page_table_lock);
|
||||
|
||||
pgd = pgd_offset_k(minicache_address);
|
||||
pmd = pmd_alloc(&init_mm, pgd, minicache_address);
|
||||
if (!pmd)
|
||||
BUG();
|
||||
minicache_pte = pte_alloc_kernel(&init_mm, pmd, minicache_address);
|
||||
if (!minicache_pte)
|
||||
BUG();
|
||||
|
||||
spin_unlock(&init_mm.page_table_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
core_initcall(minicache_init);
|
Loading…
x
Reference in New Issue
Block a user