Yinghai Lu 752bea4abb x86: reserve dma32 early for gart
a system with 256 GB of RAM, when NUMA is disabled crashes the
following way:

Your BIOS doesn't leave a aperture memory hole
Please enable the IOMMU option in the BIOS setup
This costs you 64 MB of RAM
Cannot allocate aperture memory hole (ffff8101c0000000,65536K)
Kernel panic - not syncing: Not enough memory for aperture
Pid: 0, comm: swapper Not tainted 2.6.25-rc4-x86-latest.git #33

Call Trace:
 [<ffffffff84037c62>] panic+0xb2/0x190
 [<ffffffff840381fc>] ? release_console_sem+0x7c/0x250
 [<ffffffff847b1628>] ? __alloc_bootmem_nopanic+0x48/0x90
 [<ffffffff847b0ac9>] ? free_bootmem+0x29/0x50
 [<ffffffff847ac1f7>] gart_iommu_hole_init+0x5e7/0x680
 [<ffffffff847b255b>] ? alloc_large_system_hash+0x16b/0x310
 [<ffffffff84506a2f>] ? _etext+0x0/0x1
 [<ffffffff847a2e8c>] pci_iommu_alloc+0x1c/0x40
 [<ffffffff847ac795>] mem_init+0x45/0x1a0
 [<ffffffff8479ff35>] start_kernel+0x295/0x380
 [<ffffffff8479f1c2>] _sinittext+0x1c2/0x230

the root cause is : memmap PMD is too big,
[ffffe200e0600000-ffffe200e07fffff] PMD ->ffff81383c000000 on node 0
almost near 4G..., and vmemmap_alloc_block will use up the ram under 4G.

solution will be:
1. make memmap allocation get memory above 4G...
2. reserve some dma32 range early before we try to set up memmap for all.
and release that before pci_iommu_alloc, so gart or swiotlb could get some
range under 4g limit for sure.

the patch is using method 2.
because method1 may need more code to handle SPARSEMEM and SPASEMEM_VMEMMAP

will get
Your BIOS doesn't leave a aperture memory hole
Please enable the IOMMU option in the BIOS setup
This costs you 64 MB of RAM
Mapping aperture over 65536 KB of RAM @ 4000000
Memory: 264245736k/268959744k available (8484k kernel code, 4187464k reserved, 4004k data, 724k init)

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-04-19 19:19:55 +02:00

67 lines
1.8 KiB
C

#ifndef __x8664_PCI_H
#define __x8664_PCI_H
#ifdef __KERNEL__
#ifdef CONFIG_CALGARY_IOMMU
static inline void *pci_iommu(struct pci_bus *bus)
{
struct pci_sysdata *sd = bus->sysdata;
return sd->iommu;
}
static inline void set_pci_iommu(struct pci_bus *bus, void *val)
{
struct pci_sysdata *sd = bus->sysdata;
sd->iommu = val;
}
#endif /* CONFIG_CALGARY_IOMMU */
extern int (*pci_config_read)(int seg, int bus, int dev, int fn,
int reg, int len, u32 *value);
extern int (*pci_config_write)(int seg, int bus, int dev, int fn,
int reg, int len, u32 value);
extern void dma32_reserve_bootmem(void);
extern void pci_iommu_alloc(void);
/* The PCI address space does equal the physical memory
* address space. The networking and block device layers use
* this boolean for bounce buffer decisions
*
* On AMD64 it mostly equals, but we set it to zero if a hardware
* IOMMU (gart) of sotware IOMMU (swiotlb) is available.
*/
#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU)
#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
dma_addr_t ADDR_NAME;
#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
__u32 LEN_NAME;
#define pci_unmap_addr(PTR, ADDR_NAME) \
((PTR)->ADDR_NAME)
#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
(((PTR)->ADDR_NAME) = (VAL))
#define pci_unmap_len(PTR, LEN_NAME) \
((PTR)->LEN_NAME)
#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
(((PTR)->LEN_NAME) = (VAL))
#else
/* No IOMMU */
#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
#define pci_unmap_addr(PTR, ADDR_NAME) (0)
#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0)
#define pci_unmap_len(PTR, LEN_NAME) (0)
#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0)
#endif
#endif /* __KERNEL__ */
#endif /* __x8664_PCI_H */