From 1ecd3c7ea76488c63b4b0a2561fd7eaf96cc8028 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Wed, 8 Feb 2012 17:13:37 -0800 Subject: [PATCH 1/5] nilfs2: avoid overflowing segment numbers in nilfs_ioctl_clean_segments() nsegs is read from userspace. Limit its value and avoid overflowing nsegs * sizeof(__u64) in the subsequent call to memdup_user(). This patch complements 481fe17e973fb9 ("nilfs2: potential integer overflow in nilfs_ioctl_clean_segments()"). Signed-off-by: Xi Wang Cc: Haogang Chen Acked-by: Ryusuke Konishi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nilfs2/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 886649627c3d..2a70fce70c65 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -603,6 +603,8 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, nsegs = argv[4].v_nmembs; if (argv[4].v_size != argsz[4]) goto out; + if (nsegs > UINT_MAX / sizeof(__u64)) + goto out; /* * argv[4] points to segment numbers this ioctl cleans. We From dc9086004b3d5db75997a645b3fe08d9138b7ad0 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 8 Feb 2012 17:13:38 -0800 Subject: [PATCH 2/5] mm: compaction: check for overlapping nodes during isolation for migration When isolating pages for migration, migration starts at the start of a zone while the free scanner starts at the end of the zone. Migration avoids entering a new zone by never going beyond the free scanned. Unfortunately, in very rare cases nodes can overlap. When this happens, migration isolates pages without the LRU lock held, corrupting lists which will trigger errors in reclaim or during page free such as in the following oops BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: [] free_pcppages_bulk+0xcc/0x450 PGD 1dda554067 PUD 1e1cb58067 PMD 0 Oops: 0000 [#1] SMP CPU 37 Pid: 17088, comm: memcg_process_s Tainted: G X RIP: free_pcppages_bulk+0xcc/0x450 Process memcg_process_s (pid: 17088, threadinfo ffff881c2926e000, task ffff881c2926c0c0) Call Trace: free_hot_cold_page+0x17e/0x1f0 __pagevec_free+0x90/0xb0 release_pages+0x22a/0x260 pagevec_lru_move_fn+0xf3/0x110 putback_lru_page+0x66/0xe0 unmap_and_move+0x156/0x180 migrate_pages+0x9e/0x1b0 compact_zone+0x1f3/0x2f0 compact_zone_order+0xa2/0xe0 try_to_compact_pages+0xdf/0x110 __alloc_pages_direct_compact+0xee/0x1c0 __alloc_pages_slowpath+0x370/0x830 __alloc_pages_nodemask+0x1b1/0x1c0 alloc_pages_vma+0x9b/0x160 do_huge_pmd_anonymous_page+0x160/0x270 do_page_fault+0x207/0x4c0 page_fault+0x25/0x30 The "X" in the taint flag means that external modules were loaded but but is unrelated to the bug triggering. The real problem was because the PFN layout looks like this Zone PFN ranges: DMA 0x00000010 -> 0x00001000 DMA32 0x00001000 -> 0x00100000 Normal 0x00100000 -> 0x01e80000 Movable zone start PFN for each node early_node_map[14] active PFN ranges 0: 0x00000010 -> 0x0000009b 0: 0x00000100 -> 0x0007a1ec 0: 0x0007a354 -> 0x0007a379 0: 0x0007f7ff -> 0x0007f800 0: 0x00100000 -> 0x00680000 1: 0x00680000 -> 0x00e80000 0: 0x00e80000 -> 0x01080000 1: 0x01080000 -> 0x01280000 0: 0x01280000 -> 0x01480000 1: 0x01480000 -> 0x01680000 0: 0x01680000 -> 0x01880000 1: 0x01880000 -> 0x01a80000 0: 0x01a80000 -> 0x01c80000 1: 0x01c80000 -> 0x01e80000 The fix is straight-forward. isolate_migratepages() has to make a similar check to isolate_freepage to ensure that it never isolates pages from a zone it does not hold the LRU lock for. This was discovered in a 3.0-based kernel but it affects 3.1.x, 3.2.x and current mainline. Signed-off-by: Mel Gorman Acked-by: Michal Nazarewicz Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/compaction.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/mm/compaction.c b/mm/compaction.c index bd939a574b84..d9ebebe1a2aa 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -330,8 +330,17 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, continue; nr_scanned++; - /* Get the page and skip if free */ + /* + * Get the page and ensure the page is within the same zone. + * See the comment in isolate_freepages about overlapping + * nodes. It is deliberate that the new zone lock is not taken + * as memory compaction should not move pages between nodes. + */ page = pfn_to_page(low_pfn); + if (page_zone(page) != zone) + continue; + + /* Skip if free */ if (PageBuddy(page)) continue; From ec44fd429879f7d4f28021f1a7f9fb0b5f831aab Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 8 Feb 2012 17:13:39 -0800 Subject: [PATCH 3/5] drivers/leds/leds-lm3530.c: fix setting pltfm->als_vmax In current code, pltfm->als_vmin is set to LM3530_ALS_WINDOW_mV and pltfm->als_vmax is 0. This does not make sense. I think what we want here is setting pltfm->als_vmax to LM3530_ALS_WINDOW_mV. Both als_vmin and als_vmax local variables will be set to pltfm->als_vmin and pltfm->als_vmax by a few lines latter. Thus also remove a redundant assignment for als_vmin and als_vmax in this patch. Signed-off-by: Axel Lin Cc: Shreshtha Kumar Sahu Acked-by: Milo(Woogyom) Kim Tested-by: Milo(Woogyom) Kim Cc: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/leds/leds-lm3530.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/leds/leds-lm3530.c b/drivers/leds/leds-lm3530.c index 45e6878d7374..e59c166a0ce2 100644 --- a/drivers/leds/leds-lm3530.c +++ b/drivers/leds/leds-lm3530.c @@ -164,8 +164,8 @@ static int lm3530_init_registers(struct lm3530_data *drvdata) if (drvdata->mode == LM3530_BL_MODE_ALS) { if (pltfm->als_vmax == 0) { - pltfm->als_vmin = als_vmin = 0; - pltfm->als_vmin = als_vmax = LM3530_ALS_WINDOW_mV; + pltfm->als_vmin = 0; + pltfm->als_vmax = LM3530_ALS_WINDOW_mV; } als_vmin = pltfm->als_vmin; From b9980cdcf2524c5fe15d8cbae9c97b3ed6385563 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 8 Feb 2012 17:13:40 -0800 Subject: [PATCH 4/5] mm: fix UP THP spin_is_locked BUGs Fix CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_SMP=n CONFIG_DEBUG_VM=y CONFIG_DEBUG_SPINLOCK=n kernel: spin_is_locked() is then always false, and so triggers some BUGs in Transparent HugePage codepaths. asm-generic/bug.h mentions this problem, and provides a WARN_ON_SMP(x); but being too lazy to add VM_BUG_ON_SMP, BUG_ON_SMP, WARN_ON_SMP_ONCE, VM_WARN_ON_SMP_ONCE, just test NR_CPUS != 1 in the existing VM_BUG_ONs. Signed-off-by: Hugh Dickins Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 4 ++-- mm/swap.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b3ffc21ce801..91d3efb25d15 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2083,7 +2083,7 @@ static void collect_mm_slot(struct mm_slot *mm_slot) { struct mm_struct *mm = mm_slot->mm; - VM_BUG_ON(!spin_is_locked(&khugepaged_mm_lock)); + VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock)); if (khugepaged_test_exit(mm)) { /* free mm_slot */ @@ -2113,7 +2113,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int progress = 0; VM_BUG_ON(!pages); - VM_BUG_ON(!spin_is_locked(&khugepaged_mm_lock)); + VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock)); if (khugepaged_scan.mm_slot) mm_slot = khugepaged_scan.mm_slot; diff --git a/mm/swap.c b/mm/swap.c index b0f529b38979..fff1ff7fb9ad 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -659,7 +659,7 @@ void lru_add_page_tail(struct zone* zone, VM_BUG_ON(!PageHead(page)); VM_BUG_ON(PageCompound(page_tail)); VM_BUG_ON(PageLRU(page_tail)); - VM_BUG_ON(!spin_is_locked(&zone->lru_lock)); + VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&zone->lru_lock)); SetPageLRU(page_tail); From 025e4ab3db07fcbf62c01e4f30d1012234beb980 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 8 Feb 2012 17:13:41 -0800 Subject: [PATCH 5/5] pcmcia: fix socket refcount decrementing on each resume This fixes a memory-corrupting bug: not only does it cause the warning, but as a result of dropping the refcount to zero, it causes the pcmcia_socket0 device structure to be freed while it still has references, causing slab caches corruption. A fatal oops quickly follows this warning - often even just a 'dmesg' following the warning causes the kernel to oops. While testing suspend/resume on an ARM device with PCMCIA support, and a CF card inserted, I found that after five suspend and resumes, the kernel would complain, and shortly die after with slab corruption. WARNING: at include/linux/kref.h:41 kobject_get+0x28/0x50() As the message doesn't give a clue about which kobject, and the built-in debugging in drivers/base/power/main.c happens too late, this was added right before each get_device(): printk("%s: %p [%s] %u\n", __func__, dev, kobject_name(&dev->kobj), atomic_read(&dev->kobj.kref.refcount)); and on the 3rd s2ram cycle, the following behaviour observed: On the 3rd suspend/resume cycle: dpm_prepare: c1a0d998 [pcmcia_socket0] 3 dpm_suspend: c1a0d998 [pcmcia_socket0] 3 dpm_suspend_noirq: c1a0d998 [pcmcia_socket0] 3 dpm_resume_noirq: c1a0d998 [pcmcia_socket0] 3 dpm_resume: c1a0d998 [pcmcia_socket0] 3 dpm_complete: c1a0d998 [pcmcia_socket0] 2 4th: dpm_prepare: c1a0d998 [pcmcia_socket0] 2 dpm_suspend: c1a0d998 [pcmcia_socket0] 2 dpm_suspend_noirq: c1a0d998 [pcmcia_socket0] 2 dpm_resume_noirq: c1a0d998 [pcmcia_socket0] 2 dpm_resume: c1a0d998 [pcmcia_socket0] 2 dpm_complete: c1a0d998 [pcmcia_socket0] 1 5th: dpm_prepare: c1a0d998 [pcmcia_socket0] 1 dpm_suspend: c1a0d998 [pcmcia_socket0] 1 dpm_suspend_noirq: c1a0d998 [pcmcia_socket0] 1 dpm_resume_noirq: c1a0d998 [pcmcia_socket0] 1 dpm_resume: c1a0d998 [pcmcia_socket0] 1 dpm_complete: c1a0d998 [pcmcia_socket0] 0 ------------[ cut here ]------------ WARNING: at include/linux/kref.h:41 kobject_get+0x28/0x50() Modules linked in: ucb1x00_core Backtrace: [] (dump_backtrace+0x0/0x110) from [] (dump_stack+0x18/0x1c) [] (dump_stack+0x0/0x1c) from [] (warn_slowpath_common+0x50/0x68) [] (warn_slowpath_common+0x0/0x68) from [] (warn_slowpath_null+0x24/0x28) [] (warn_slowpath_null+0x0/0x28) from [] (kobject_get+0x28/0x50) [] (kobject_get+0x0/0x50) from [] (get_device+0x1c/0x24) [] (dpm_complete+0x0/0x1a0) from [] (dpm_resume_end+0x1c/0x20) ... Looking at commit 7b24e7988263 ("pcmcia: split up central event handler"), the following change was made to cs.c: return 0; } #endif - - send_event(skt, CS_EVENT_PM_RESUME, CS_EVENT_PRI_LOW); + if (!(skt->state & SOCKET_CARDBUS) && (skt->callback)) + skt->callback->early_resume(skt); return 0; } And the corresponding change in ds.c is from: -static int ds_event(struct pcmcia_socket *skt, event_t event, int priority) -{ - struct pcmcia_socket *s = pcmcia_get_socket(skt); ... - switch (event) { ... - case CS_EVENT_PM_RESUME: - if (verify_cis_cache(skt) != 0) { - dev_dbg(&skt->dev, "cis mismatch - different card\n"); - /* first, remove the card */ - ds_event(skt, CS_EVENT_CARD_REMOVAL, CS_EVENT_PRI_HIGH); - mutex_lock(&s->ops_mutex); - destroy_cis_cache(skt); - kfree(skt->fake_cis); - skt->fake_cis = NULL; - s->functions = 0; - mutex_unlock(&s->ops_mutex); - /* now, add the new card */ - ds_event(skt, CS_EVENT_CARD_INSERTION, - CS_EVENT_PRI_LOW); - } - break; ... - } - pcmcia_put_socket(s); - return 0; -} /* ds_event */ to: +static int pcmcia_bus_early_resume(struct pcmcia_socket *skt) +{ + if (!verify_cis_cache(skt)) { + pcmcia_put_socket(skt); + return 0; + } + dev_dbg(&skt->dev, "cis mismatch - different card\n"); + /* first, remove the card */ + pcmcia_bus_remove(skt); + mutex_lock(&skt->ops_mutex); + destroy_cis_cache(skt); + kfree(skt->fake_cis); + skt->fake_cis = NULL; + skt->functions = 0; + mutex_unlock(&skt->ops_mutex); + /* now, add the new card */ + pcmcia_bus_add(skt); + return 0; +} As can be seen, the original function called pcmcia_get_socket() and pcmcia_put_socket() around the guts, whereas the replacement code calls pcmcia_put_socket() only in one path. This creates an imbalance in the refcounting. Testing with pcmcia_put_socket() put removed shows that the bug is gone: dpm_suspend: c1a10998 [pcmcia_socket0] 5 dpm_suspend_noirq: c1a10998 [pcmcia_socket0] 5 dpm_resume_noirq: c1a10998 [pcmcia_socket0] 5 dpm_resume: c1a10998 [pcmcia_socket0] 5 dpm_complete: c1a10998 [pcmcia_socket0] 5 Tested-by: Russell King Signed-off-by: Russell King Cc: Dominik Brodowski Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/pcmcia/ds.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 749c2a16012c..1932029de48d 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -1269,10 +1269,8 @@ static int pcmcia_bus_add(struct pcmcia_socket *skt) static int pcmcia_bus_early_resume(struct pcmcia_socket *skt) { - if (!verify_cis_cache(skt)) { - pcmcia_put_socket(skt); + if (!verify_cis_cache(skt)) return 0; - } dev_dbg(&skt->dev, "cis mismatch - different card\n");