From 23759dc6430428897a36c4d493f611eca55c9481 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Sun, 2 Apr 2006 00:07:39 +0100 Subject: [PATCH] [ARM] 3439/2: xsc3: add I/O coherency support Patch from Lennert Buytenhek This patch adds support for the I/O coherent cache available on the xsc3. The approach is to provide a simple API to determine whether the chipset supports coherency by calling arch_is_coherent() and then setting the appropriate system memory PTE and PMD bits. In addition, we call this API on dma_alloc_coherent() and dma_map_single() calls. A generic version exists that will compile out all the coherency-related code that is not needed on the majority of ARM systems. Note that we do not check for coherency in the dma_alloc_writecombine() function as that still requires a special PTE setting. We also don't touch dma_mmap_coherent() as that is a special ARM-only API that is by definition only used on non-coherent system. Signed-off-by: Deepak Saxena Signed-off-by: Lennert Buytenhek Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 3 +++ arch/arm/mach-ixp23xx/pci.c | 6 ++++++ arch/arm/mm/consistent.c | 17 +++++++++++++++++ arch/arm/mm/mm-armv.c | 11 +++++++++++ arch/arm/mm/proc-xsc3.S | 2 +- include/asm-arm/arch-ixp23xx/memory.h | 17 +++++++++++++++++ include/asm-arm/dma-mapping.h | 22 +++++++++++++++------- include/asm-arm/memory.h | 8 ++++++++ include/asm-arm/pgtable-hwdef.h | 1 + include/asm-arm/pgtable.h | 1 + 10 files changed, 80 insertions(+), 8 deletions(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index b7cd280bfd63..437528403959 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -252,6 +252,9 @@ static void __init dump_cpu_info(int cpu) dump_cache("cache", cpu, CACHE_ISIZE(info)); } } + + if (arch_is_coherent()) + printk("Cache coherency enabled\n"); } int cpu_architecture(void) diff --git a/arch/arm/mach-ixp23xx/pci.c b/arch/arm/mach-ixp23xx/pci.c index ba6b4367a1d5..ac72f94c5b4d 100644 --- a/arch/arm/mach-ixp23xx/pci.c +++ b/arch/arm/mach-ixp23xx/pci.c @@ -219,6 +219,12 @@ static void __init ixp23xx_pci_common_init(void) *IXP23XX_PCI_CPP_ADDR_BITS &= ~(1 << 1); } else { *IXP23XX_PCI_CPP_ADDR_BITS |= (1 << 1); + + /* + * Enable coherency on A2 silicon. + */ + if (arch_is_coherent()) + *IXP23XX_CPP2XSI_CURR_XFER_REG3 &= ~IXP23XX_CPP2XSI_COH_OFF; } } diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c index 8a1bfcd50087..50e6b6bfb2e2 100644 --- a/arch/arm/mm/consistent.c +++ b/arch/arm/mm/consistent.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -272,6 +273,17 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp) { + if (arch_is_coherent()) { + void *virt; + + virt = kmalloc(size, gfp); + if (!virt) + return NULL; + *handle = virt_to_dma(dev, virt); + + return virt; + } + return __dma_alloc(dev, size, handle, gfp, pgprot_noncached(pgprot_kernel)); } @@ -350,6 +362,11 @@ void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr WARN_ON(irqs_disabled()); + if (arch_is_coherent()) { + kfree(cpu_addr); + return; + } + size = PAGE_ALIGN(size); spin_lock_irqsave(&consistent_lock, flags); diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c index 5e5d05bcad50..f14b2d0f3690 100644 --- a/arch/arm/mm/mm-armv.c +++ b/arch/arm/mm/mm-armv.c @@ -388,6 +388,17 @@ void __init build_mem_type_table(void) cp = &cache_policies[cachepolicy]; kern_pgprot = user_pgprot = cp->pte; + /* + * Enable CPU-specific coherency if supported. + * (Only available on XSC3 at the moment.) + */ + if (arch_is_coherent()) { + if (cpu_is_xsc3()) { + mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY].prot_pte |= L_PTE_COHERENT; + } + } + /* * ARMv6 and above have extended page tables. */ diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index b9dfce57c272..80873b36c3f7 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -371,7 +371,7 @@ ENTRY(cpu_xsc3_switch_mm) ENTRY(cpu_xsc3_set_pte) str r1, [r0], #-2048 @ linux version - bic r2, r1, #0xff0 + bic r2, r1, #0xdf0 @ Keep C, B, coherency bits orr r2, r2, #PTE_TYPE_EXT @ extended page eor r3, r1, #L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_WRITE | L_PTE_DIRTY diff --git a/include/asm-arm/arch-ixp23xx/memory.h b/include/asm-arm/arch-ixp23xx/memory.h index bebcf0aa0d72..6e19f46d54d1 100644 --- a/include/asm-arm/arch-ixp23xx/memory.h +++ b/include/asm-arm/arch-ixp23xx/memory.h @@ -28,6 +28,7 @@ * to an address that the kernel can use. */ #ifndef __ASSEMBLY__ +#include #define __virt_to_bus(v) \ ({ unsigned int ret; \ @@ -40,6 +41,22 @@ data = *((volatile int *)IXP23XX_PCI_SDRAM_BAR); \ __phys_to_virt((((b - (data & 0xfffffff0)) + 0x00000000))); }) +/* + * Coherency support. Only supported on A2 CPUs or on A1 + * systems that have the cache coherency workaround. + */ +static inline int __ixp23xx_arch_is_coherent(void) +{ + extern unsigned int processor_id; + + if (((processor_id & 15) >= 2) || machine_is_roadrunner()) + return 1; + + return 0; +} + +#define arch_is_coherent() __ixp23xx_arch_is_coherent() + #endif diff --git a/include/asm-arm/dma-mapping.h b/include/asm-arm/dma-mapping.h index e3e8541ee63b..63ca7412a462 100644 --- a/include/asm-arm/dma-mapping.h +++ b/include/asm-arm/dma-mapping.h @@ -47,7 +47,7 @@ static inline int dma_get_cache_alignment(void) static inline int dma_is_consistent(dma_addr_t handle) { - return 0; + return !!arch_is_coherent(); } /* @@ -145,7 +145,9 @@ static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size, enum dma_data_direction dir) { - consistent_sync(cpu_addr, size, dir); + if (!arch_is_coherent()) + consistent_sync(cpu_addr, size, dir); + return virt_to_dma(dev, (unsigned long)cpu_addr); } #else @@ -255,7 +257,9 @@ dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, sg->dma_address = page_to_dma(dev, sg->page) + sg->offset; virt = page_address(sg->page) + sg->offset; - consistent_sync(virt, sg->length, dir); + + if (!arch_is_coherent()) + consistent_sync(virt, sg->length, dir); } return nents; @@ -310,14 +314,16 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { - consistent_sync((void *)dma_to_virt(dev, handle), size, dir); + if (!arch_is_coherent()) + consistent_sync((void *)dma_to_virt(dev, handle), size, dir); } static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir) { - consistent_sync((void *)dma_to_virt(dev, handle), size, dir); + if (!arch_is_coherent()) + consistent_sync((void *)dma_to_virt(dev, handle), size, dir); } #else extern void dma_sync_single_for_cpu(struct device*, dma_addr_t, size_t, enum dma_data_direction); @@ -347,7 +353,8 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents, for (i = 0; i < nents; i++, sg++) { char *virt = page_address(sg->page) + sg->offset; - consistent_sync(virt, sg->length, dir); + if (!arch_is_coherent()) + consistent_sync(virt, sg->length, dir); } } @@ -359,7 +366,8 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nents, for (i = 0; i < nents; i++, sg++) { char *virt = page_address(sg->page) + sg->offset; - consistent_sync(virt, sg->length, dir); + if (!arch_is_coherent()) + consistent_sync(virt, sg->length, dir); } } #else diff --git a/include/asm-arm/memory.h b/include/asm-arm/memory.h index afa5c3ea077c..2b3cf69b3ed9 100644 --- a/include/asm-arm/memory.h +++ b/include/asm-arm/memory.h @@ -234,6 +234,14 @@ static inline __deprecated void *bus_to_virt(unsigned long x) #define virt_to_dma(dev, addr) (__arch_virt_to_dma(dev, addr)) #endif +/* + * Optional coherency support. Currently used only by selected + * Intel XSC3-based systems. + */ +#ifndef arch_is_coherent +#define arch_is_coherent() 0 +#endif + #endif #include diff --git a/include/asm-arm/pgtable-hwdef.h b/include/asm-arm/pgtable-hwdef.h index 1d033495cc75..1bc1f997bda2 100644 --- a/include/asm-arm/pgtable-hwdef.h +++ b/include/asm-arm/pgtable-hwdef.h @@ -73,6 +73,7 @@ #define PTE_EXT_AP_URW_SRW (PTE_EXT_AP1|PTE_EXT_AP0) #define PTE_EXT_TEX(x) ((x) << 6) /* v5 */ #define PTE_EXT_APX (1 << 9) /* v6 */ +#define PTE_EXT_COHERENT (1 << 9) /* XScale3 */ #define PTE_EXT_SHARED (1 << 10) /* v6 */ #define PTE_EXT_NG (1 << 11) /* v6 */ diff --git a/include/asm-arm/pgtable.h b/include/asm-arm/pgtable.h index e595ae24efe2..e85c08d78dda 100644 --- a/include/asm-arm/pgtable.h +++ b/include/asm-arm/pgtable.h @@ -156,6 +156,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define L_PTE_WRITE (1 << 5) #define L_PTE_EXEC (1 << 6) #define L_PTE_DIRTY (1 << 7) +#define L_PTE_COHERENT (1 << 9) /* I/O coherent (xsc3) */ #define L_PTE_SHARED (1 << 10) /* shared between CPUs (v6) */ #define L_PTE_ASID (1 << 11) /* non-global (use ASID, v6) */