From 3964acd0dbec123aa0a621973a2a0580034b4788 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 31 Jul 2013 13:53:28 -0700 Subject: [PATCH 01/20] mm: mempolicy: fix mbind_range() && vma_adjust() interaction vma_adjust() does vma_set_policy(vma, vma_policy(next)) and this is doubly wrong: 1. This leaks vma->vm_policy if it is not NULL and not equal to next->vm_policy. This can happen if vma_merge() expands "area", not prev (case 8). 2. This sets the wrong policy if vma_merge() joins prev and area, area is the vma the caller needs to update and it still has the old policy. Revert commit 1444f92c8498 ("mm: merging memory blocks resets mempolicy") which introduced these problems. Change mbind_range() to recheck mpol_equal() after vma_merge() to fix the problem that commit tried to address. Signed-off-by: Oleg Nesterov Acked-by: KOSAKI Motohiro Cc: Steven T Hampson Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: Rik van Riel Cc: Andi Kleen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 6 +++++- mm/mmap.c | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 74310017296e..4baf12e534d1 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -732,7 +732,10 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, if (prev) { vma = prev; next = vma->vm_next; - continue; + if (mpol_equal(vma_policy(vma), new_pol)) + continue; + /* vma_merge() joined vma && vma->next, case 8 */ + goto replace; } if (vma->vm_start != vmstart) { err = split_vma(vma->vm_mm, vma, vmstart, 1); @@ -744,6 +747,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, if (err) goto out; } + replace: err = vma_replace_policy(vma, new_pol); if (err) goto out; diff --git a/mm/mmap.c b/mm/mmap.c index fbad7b091090..1edbaa3136c3 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -865,7 +865,7 @@ again: remove_next = 1 + (end > next->vm_end); if (next->anon_vma) anon_vma_merge(vma, next); mm->map_count--; - vma_set_policy(vma, vma_policy(next)); + mpol_put(vma_policy(next)); kmem_cache_free(vm_area_cachep, next); /* * In mprotect's case 6 (see comments on vma_merge), From 62c610460d73fcfa5637cb497c6923ef1c42b12d Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Wed, 31 Jul 2013 13:53:29 -0700 Subject: [PATCH 02/20] ocfs2/refcounttree: add the missing NULL check of the return value of find_or_create_page() Add the missing NULL check of the return value of find_or_create_page() in function ocfs2_duplicate_clusters_by_page(). [akpm@linux-foundation.org: fix layout, per Joel] Signed-off-by: Gu Zheng Acked-by: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/refcounttree.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 998b17eda09d..9f6b96a09615 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2965,6 +2965,11 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, to = map_end & (PAGE_CACHE_SIZE - 1); page = find_or_create_page(mapping, page_index, GFP_NOFS); + if (!page) { + ret = -ENOMEM; + mlog_errno(ret); + break; + } /* * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page From d39de28c95876f8becb559d242eefe718ea1f747 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 31 Jul 2013 13:53:30 -0700 Subject: [PATCH 03/20] dmi_scan: add comments on dmi_present() and the loop in dmi_scan_machine() My previous refactoring in commit 79bae42d51a5 ("dmi_scan: refactor dmi_scan_machine(), {smbios,dmi}_present()") resulted in slightly tricky code (though I think it's more elegant). Explain what it's doing. Signed-off-by: Ben Hutchings Cc: Zhenzhong Duan Cc: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/firmware/dmi_scan.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index eb760a218da4..232fa8fce26a 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -419,6 +419,13 @@ static void __init dmi_format_ids(char *buf, size_t len) dmi_get_system_info(DMI_BIOS_DATE)); } +/* + * Check for DMI/SMBIOS headers in the system firmware image. Any + * SMBIOS header must start 16 bytes before the DMI header, so take a + * 32 byte buffer and check for DMI at offset 16 and SMBIOS at offset + * 0. If the DMI header is present, set dmi_ver accordingly (SMBIOS + * takes precedence) and return 0. Otherwise return 1. + */ static int __init dmi_present(const u8 *buf) { int smbios_ver; @@ -506,6 +513,13 @@ void __init dmi_scan_machine(void) if (p == NULL) goto error; + /* + * Iterate over all possible DMI header addresses q. + * Maintain the 32 bytes around q in buf. On the + * first iteration, substitute zero for the + * out-of-range bytes so there is no chance of falsely + * detecting an SMBIOS header. + */ memset(buf, 0, 16); for (q = p; q < p + 0x10000; q += 16) { memcpy_fromio(buf + 16, q, 16); From 5c4a97d1dabb3443da7acc309ae61c5de8494fa9 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 31 Jul 2013 13:53:32 -0700 Subject: [PATCH 04/20] MAINTAINERS: dynamic debug: Jason's not there... He must be too, umm, busy to update his own bouncing email address too. Signed-off-by: Joe Perches Acked-by: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a26b10e52aea..d83f70ffdbed 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2871,7 +2871,7 @@ F: drivers/media/usb/dvb-usb-v2/dvb_usb* F: drivers/media/usb/dvb-usb-v2/usb_urb.c DYNAMIC DEBUG -M: Jason Baron +M: Jason Baron S: Maintained F: lib/dynamic_debug.c F: include/linux/dynamic_debug.h From 4e5052948110140f1628dd986e27912b0e6b2065 Mon Sep 17 00:00:00 2001 From: Markus Trippelsdorf Date: Wed, 31 Jul 2013 13:53:33 -0700 Subject: [PATCH 05/20] .gitignore: ignore *.lz4 files Now that lz4 kernel compression is available, add *.lz4 to .gitignore. Signed-off-by: Markus Trippelsdorf Acked-by: Kyungsik Lee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 3b8b9b33be38..7e9932e55475 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,7 @@ modules.builtin *.bz2 *.lzma *.xz +*.lz4 *.lzo *.patch *.gcno From f93f3c4ee48727713c3349b659adf8048fd4524b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 31 Jul 2013 13:53:34 -0700 Subject: [PATCH 06/20] rapidio: fix use after free in rio_unregister_scan() We're freeing the list iterator so we can't move to the next entry. Since there is only one matching mport_id, we can just break after finding it. Signed-off-by: Dan Carpenter Reviewed-by: Ryan Mallon Acked-by: Alexandre Bounine Cc: Matt Porter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rapidio/rio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c index f4f30af2df68..2e8a20cac588 100644 --- a/drivers/rapidio/rio.c +++ b/drivers/rapidio/rio.c @@ -1715,11 +1715,13 @@ int rio_unregister_scan(int mport_id, struct rio_scan *scan_ops) (mport_id == RIO_MPORT_ANY && port->nscan == scan_ops)) port->nscan = NULL; - list_for_each_entry(scan, &rio_scans, node) + list_for_each_entry(scan, &rio_scans, node) { if (scan->mport_id == mport_id) { list_del(&scan->node); kfree(scan); + break; } + } mutex_unlock(&rio_mport_list_lock); From 10e84b97ed799be404836dc7f71ab47d4571265a Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 31 Jul 2013 13:53:35 -0700 Subject: [PATCH 07/20] mm: sched: numa: fix NUMA balancing when !SCHED_DEBUG Commit 3105b86a9fee ("mm: sched: numa: Control enabling and disabling of NUMA balancing if !SCHED_DEBUG") defined numabalancing_enabled to control the enabling and disabling of automatic NUMA balancing, but it is never used. I believe the intention was to use this in place of sched_feat_numa(NUMA). Currently, if SCHED_DEBUG is not defined, sched_feat_numa(NUMA) will never be changed from the initial "false". Signed-off-by: Dave Kleikamp Acked-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched/fair.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bb456f44b7b1..9565645e3202 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -851,7 +851,7 @@ void task_numa_fault(int node, int pages, bool migrated) { struct task_struct *p = current; - if (!sched_feat_numa(NUMA)) + if (!numabalancing_enabled) return; /* FIXME: Allocate task-specific structure for placement policy here */ @@ -5786,7 +5786,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) entity_tick(cfs_rq, se, queued); } - if (sched_feat_numa(NUMA)) + if (numabalancing_enabled) task_tick_numa(rq, curr); update_rq_runnable_avg(rq, 1); From 31a1b26f16e822577def5402ffc79cfe4aed2db9 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 31 Jul 2013 13:53:36 -0700 Subject: [PATCH 08/20] arch/x86/platform/ce4100/ce4100.c: include reboot.h Fix the build: arch/x86/platform/ce4100/ce4100.c: In function 'x86_ce4100_early_setup': arch/x86/platform/ce4100/ce4100.c:165:2: error: 'reboot_type' undeclared (first use in this function) Reported-by: Wu Fengguang Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/platform/ce4100/ce4100.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c index 643b8b5eee86..8244f5ec2f4c 100644 --- a/arch/x86/platform/ce4100/ce4100.c +++ b/arch/x86/platform/ce4100/ce4100.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include From ef2a2cbdda7e9d084a85846770fcc844958881f6 Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Wed, 31 Jul 2013 13:53:37 -0700 Subject: [PATCH 09/20] mm/swap.c: clear PageActive before adding pages onto unevictable list As a result of commit 13f7f78981e4 ("mm: pagevec: defer deciding which LRU to add a page to until pagevec drain time"), pages on unevictable lists can have both of PageActive and PageUnevictable set. This is not only confusing, but also corrupts page migration and shrink_[in]active_list. This patch fixes the problem by adding ClearPageActive before adding pages into unevictable list. It also cleans up VM_BUG_ONs. Signed-off-by: Naoya Horiguchi Cc: Mel Gorman Cc: KOSAKI Motohiro Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/mm/swap.c b/mm/swap.c index 4a1d0d2c52fa..d3982c062629 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -512,12 +512,7 @@ EXPORT_SYMBOL(__lru_cache_add); */ void lru_cache_add(struct page *page) { - if (PageActive(page)) { - VM_BUG_ON(PageUnevictable(page)); - } else if (PageUnevictable(page)) { - VM_BUG_ON(PageActive(page)); - } - + VM_BUG_ON(PageActive(page) && PageUnevictable(page)); VM_BUG_ON(PageLRU(page)); __lru_cache_add(page); } @@ -539,6 +534,7 @@ void add_page_to_unevictable_list(struct page *page) spin_lock_irq(&zone->lru_lock); lruvec = mem_cgroup_page_lruvec(page, zone); + ClearPageActive(page); SetPageUnevictable(page); SetPageLRU(page); add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE); @@ -833,7 +829,6 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, int active = PageActive(page); enum lru_list lru = page_lru(page); - VM_BUG_ON(PageUnevictable(page)); VM_BUG_ON(PageLRU(page)); SetPageLRU(page); From e180cf806a93ea1abbce47b245d25204ff557ce9 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 31 Jul 2013 13:53:39 -0700 Subject: [PATCH 10/20] thp, mm: avoid PageUnevictable on active/inactive lru lists active/inactive lru lists can contain unevicable pages (i.e. ramfs pages that have been placed on the LRU lists when first allocated), but these pages must not have PageUnevictable set - otherwise shrink_[in]active_list goes crazy: kernel BUG at /home/space/kas/git/public/linux-next/mm/vmscan.c:1122! 1090 static unsigned long isolate_lru_pages(unsigned long nr_to_scan, 1091 struct lruvec *lruvec, struct list_head *dst, 1092 unsigned long *nr_scanned, struct scan_control *sc, 1093 isolate_mode_t mode, enum lru_list lru) 1094 { ... 1108 switch (__isolate_lru_page(page, mode)) { 1109 case 0: ... 1116 case -EBUSY: ... 1121 default: 1122 BUG(); 1123 } 1124 } ... 1130 } __isolate_lru_page() returns EINVAL for PageUnevictable(page). For lru_add_page_tail(), it means we should not set PageUnevictable() for tail pages unless we're sure that it will go to LRU_UNEVICTABLE. Let's just copy PG_active and PG_unevictable from head page in __split_huge_page_refcount(), it will simplify lru_add_page_tail(). This will fix one more bug in lru_add_page_tail(): if page_evictable(page_tail) is false and PageLRU(page) is true, page_tail will go to the same lru as page, but nobody cares to sync page_tail active/inactive state with page. So we can end up with inactive page on active lru. The patch will fix it as well since we copy PG_active from head page. Signed-off-by: Kirill A. Shutemov Acked-by: Dave Hansen Cc: Naoya Horiguchi Cc: Mel Gorman Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/huge_memory.c | 4 +++- mm/swap.c | 20 ++------------------ 2 files changed, 5 insertions(+), 19 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 243e710c6039..a92012a71702 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1620,7 +1620,9 @@ static void __split_huge_page_refcount(struct page *page, ((1L << PG_referenced) | (1L << PG_swapbacked) | (1L << PG_mlocked) | - (1L << PG_uptodate))); + (1L << PG_uptodate) | + (1L << PG_active) | + (1L << PG_unevictable))); page_tail->flags |= (1L << PG_dirty); /* clear PageTail before overwriting first_page */ diff --git a/mm/swap.c b/mm/swap.c index d3982c062629..62b78a6e224f 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -770,8 +770,6 @@ EXPORT_SYMBOL(__pagevec_release); void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *list) { - int uninitialized_var(active); - enum lru_list lru; const int file = 0; VM_BUG_ON(!PageHead(page)); @@ -783,20 +781,6 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, if (!list) SetPageLRU(page_tail); - if (page_evictable(page_tail)) { - if (PageActive(page)) { - SetPageActive(page_tail); - active = 1; - lru = LRU_ACTIVE_ANON; - } else { - active = 0; - lru = LRU_INACTIVE_ANON; - } - } else { - SetPageUnevictable(page_tail); - lru = LRU_UNEVICTABLE; - } - if (likely(PageLRU(page))) list_add_tail(&page_tail->lru, &page->lru); else if (list) { @@ -812,13 +796,13 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, * Use the standard add function to put page_tail on the list, * but then correct its position so they all end up in order. */ - add_page_to_lru_list(page_tail, lruvec, lru); + add_page_to_lru_list(page_tail, lruvec, page_lru(page_tail)); list_head = page_tail->lru.prev; list_move_tail(&page_tail->lru, list_head); } if (!PageUnevictable(page)) - update_page_reclaim_stat(lruvec, file, active); + update_page_reclaim_stat(lruvec, file, PageActive(page_tail)); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ From 9d8c5b5284e4a0167c5f7b1193692492358b8700 Mon Sep 17 00:00:00 2001 From: Heesub Shin Date: Wed, 31 Jul 2013 13:53:40 -0700 Subject: [PATCH 11/20] mm: zbud: fix condition check on allocation size zbud_alloc() incorrectly verifies the size of allocation limit. It should deny the allocation request greater than (PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE), not (PAGE_SIZE - ZHDR_SIZE_ALIGNED) which has no remaining spaces for its buddy. There is no point in spending the entire zbud page storing only a single page, since we don't have any benefits. Signed-off-by: Heesub Shin Acked-by: Seth Jennings Cc: Bob Liu Cc: Dongjun Shin Cc: Sunae Seo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/zbud.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/zbud.c b/mm/zbud.c index 9bb4710e3589..ad1e781284fd 100644 --- a/mm/zbud.c +++ b/mm/zbud.c @@ -257,7 +257,7 @@ int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp, if (size <= 0 || gfp & __GFP_HIGHMEM) return -EINVAL; - if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED) + if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE) return -ENOSPC; chunks = size_to_chunks(size); spin_lock(&pool->lock); From b99b94b52339dc186810e29f1f6472d86c42d2d9 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Wed, 31 Jul 2013 13:53:41 -0700 Subject: [PATCH 12/20] drivers/rtc/rtc-twl.c: fix: rtcX/wakealarm attribute isn't created The device_init_wakeup() should be called before rtc_device_register(). Otherwise, sysfs "sys/class/rtc/rtcX/wakealarm" attribute will not be seen from User space. Signed-off-by: Grygorii Strashko Cc: Kevin Hilman Cc: Tony Lindgren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-twl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 02faf3c4e0d5..c2e80d7ca5e2 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -524,6 +524,8 @@ static int twl_rtc_probe(struct platform_device *pdev) if (ret < 0) goto out1; + device_init_wakeup(&pdev->dev, 1); + rtc = rtc_device_register(pdev->name, &pdev->dev, &twl_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { @@ -542,7 +544,6 @@ static int twl_rtc_probe(struct platform_device *pdev) } platform_set_drvdata(pdev, rtc); - device_init_wakeup(&pdev->dev, 1); return 0; out2: From b9ee979e9d770dc10f94936ef6ff9efddc23c911 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 31 Jul 2013 13:53:42 -0700 Subject: [PATCH 13/20] printk: move to separate directory for easier modification Make it easier to break up printk into bite-sized chunks. Remove printk path/filename from comment. Signed-off-by: Joe Perches Cc: Samuel Thibault Cc: Ming Lei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/DocBook/device-drivers.tmpl | 2 +- kernel/Makefile | 3 ++- kernel/printk/Makefile | 1 + kernel/{ => printk}/printk.c | 0 4 files changed, 4 insertions(+), 2 deletions(-) create mode 100644 kernel/printk/Makefile rename kernel/{ => printk}/printk.c (100%) diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl index cbfdf5486639..fe397f90a34f 100644 --- a/Documentation/DocBook/device-drivers.tmpl +++ b/Documentation/DocBook/device-drivers.tmpl @@ -84,7 +84,7 @@ X!Iinclude/linux/kobject.h Kernel utility functions !Iinclude/linux/kernel.h -!Ekernel/printk.c +!Ekernel/printk/printk.c !Ekernel/panic.c !Ekernel/sys.c !Ekernel/rcupdate.c diff --git a/kernel/Makefile b/kernel/Makefile index 470839d1a30e..35ef1185e359 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel. # -obj-y = fork.o exec_domain.o panic.o printk.o \ +obj-y = fork.o exec_domain.o panic.o \ cpu.o exit.o itimer.o time.o softirq.o resource.o \ sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \ signal.o sys.o kmod.o workqueue.o pid.o task_work.o \ @@ -24,6 +24,7 @@ endif obj-y += sched/ obj-y += power/ +obj-y += printk/ obj-y += cpu/ obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile new file mode 100644 index 000000000000..36d306d9273c --- /dev/null +++ b/kernel/printk/Makefile @@ -0,0 +1 @@ +obj-y = printk.o diff --git a/kernel/printk.c b/kernel/printk/printk.c similarity index 100% rename from kernel/printk.c rename to kernel/printk/printk.c From d197c43d04decb6b1298fa3ef26ea04a9ca7c977 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 31 Jul 2013 13:53:44 -0700 Subject: [PATCH 14/20] printk: add console_cmdline.h Add an include file for the console_cmdline struct so that the braille console driver can be separated. Signed-off-by: Joe Perches Cc: Samuel Thibault Cc: Ming Lei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/console_cmdline.h | 14 ++++++++++++++ kernel/printk/printk.c | 12 +++--------- 2 files changed, 17 insertions(+), 9 deletions(-) create mode 100644 kernel/printk/console_cmdline.h diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h new file mode 100644 index 000000000000..cbd69d842341 --- /dev/null +++ b/kernel/printk/console_cmdline.h @@ -0,0 +1,14 @@ +#ifndef _CONSOLE_CMDLINE_H +#define _CONSOLE_CMDLINE_H + +struct console_cmdline +{ + char name[8]; /* Name of the driver */ + int index; /* Minor dev. to use */ + char *options; /* Options for the driver */ +#ifdef CONFIG_A11Y_BRAILLE_CONSOLE + char *brl_options; /* Options for braille driver */ +#endif +}; + +#endif diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 69b0890ed7e5..4da2b2c7f67d 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -51,6 +51,8 @@ #define CREATE_TRACE_POINTS #include +#include "console_cmdline.h" + /* printk's without a loglevel use this.. */ #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL @@ -105,19 +107,11 @@ static struct console *exclusive_console; /* * Array of consoles built from command line options (console=) */ -struct console_cmdline -{ - char name[8]; /* Name of the driver */ - int index; /* Minor dev. to use */ - char *options; /* Options for the driver */ -#ifdef CONFIG_A11Y_BRAILLE_CONSOLE - char *brl_options; /* Options for braille driver */ -#endif -}; #define MAX_CMDLINECONSOLES 8 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; + static int selected_console = -1; static int preferred_console = -1; int console_set_on_cmdline; From bbeddf52adc1b4207674ab88686cbbe58c24f721 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 31 Jul 2013 13:53:45 -0700 Subject: [PATCH 15/20] printk: move braille console support into separate braille.[ch] files Create files with prototypes and static inlines for braille support. Make braille_console functions return 1 on success. Corrected CONFIG_A11Y_BRAILLE_CONSOLE=n _braille_console_setup return value to NULL. Signed-off-by: Joe Perches Reviewed-by: Samuel Thibault Cc: Ming Lei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .../accessibility/braille/braille_console.c | 9 +++- kernel/printk/Makefile | 1 + kernel/printk/braille.c | 48 +++++++++++++++++++ kernel/printk/braille.h | 48 +++++++++++++++++++ kernel/printk/printk.c | 44 +++++------------ 5 files changed, 117 insertions(+), 33 deletions(-) create mode 100644 kernel/printk/braille.c create mode 100644 kernel/printk/braille.h diff --git a/drivers/accessibility/braille/braille_console.c b/drivers/accessibility/braille/braille_console.c index d21167bfc865..dc34a5b8bcee 100644 --- a/drivers/accessibility/braille/braille_console.c +++ b/drivers/accessibility/braille/braille_console.c @@ -359,6 +359,9 @@ int braille_register_console(struct console *console, int index, char *console_options, char *braille_options) { int ret; + + if (!(console->flags & CON_BRL)) + return 0; if (!console_options) /* Only support VisioBraille for now */ console_options = "57600o8"; @@ -374,15 +377,17 @@ int braille_register_console(struct console *console, int index, braille_co = console; register_keyboard_notifier(&keyboard_notifier_block); register_vt_notifier(&vt_notifier_block); - return 0; + return 1; } int braille_unregister_console(struct console *console) { if (braille_co != console) return -EINVAL; + if (!(console->flags & CON_BRL)) + return 0; unregister_keyboard_notifier(&keyboard_notifier_block); unregister_vt_notifier(&vt_notifier_block); braille_co = NULL; - return 0; + return 1; } diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index 36d306d9273c..85405bdcf2b3 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1 +1,2 @@ obj-y = printk.o +obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c new file mode 100644 index 000000000000..b51087fb9ace --- /dev/null +++ b/kernel/printk/braille.c @@ -0,0 +1,48 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#include "console_cmdline.h" +#include "braille.h" + +char *_braille_console_setup(char **str, char **brl_options) +{ + if (!memcmp(*str, "brl,", 4)) { + *brl_options = ""; + *str += 4; + } else if (!memcmp(str, "brl=", 4)) { + *brl_options = *str + 4; + *str = strchr(*brl_options, ','); + if (!*str) + pr_err("need port name after brl=\n"); + else + *((*str)++) = 0; + } + + return *str; +} + +int +_braille_register_console(struct console *console, struct console_cmdline *c) +{ + int rtn = 0; + + if (c->brl_options) { + console->flags |= CON_BRL; + rtn = braille_register_console(console, c->index, c->options, + c->brl_options); + } + + return rtn; +} + +int +_braille_unregister_console(struct console *console) +{ + if (console->flags & CON_BRL) + return braille_unregister_console(console); + + return 0; +} diff --git a/kernel/printk/braille.h b/kernel/printk/braille.h new file mode 100644 index 000000000000..769d771145c8 --- /dev/null +++ b/kernel/printk/braille.h @@ -0,0 +1,48 @@ +#ifndef _PRINTK_BRAILLE_H +#define _PRINTK_BRAILLE_H + +#ifdef CONFIG_A11Y_BRAILLE_CONSOLE + +static inline void +braille_set_options(struct console_cmdline *c, char *brl_options) +{ + c->brl_options = brl_options; +} + +char * +_braille_console_setup(char **str, char **brl_options); + +int +_braille_register_console(struct console *console, struct console_cmdline *c); + +int +_braille_unregister_console(struct console *console); + +#else + +static inline void +braille_set_options(struct console_cmdline *c, char *brl_options) +{ +} + +static inline char * +_braille_console_setup(char **str, char **brl_options) +{ + return NULL; +} + +static inline int +_braille_register_console(struct console *console, struct console_cmdline *c) +{ + return 0; +} + +static inline int +_braille_unregister_console(struct console *console) +{ + return 0; +} + +#endif + +#endif diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 4da2b2c7f67d..5a022e0c654c 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -52,6 +52,7 @@ #include #include "console_cmdline.h" +#include "braille.h" /* printk's without a loglevel use this.. */ #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL @@ -1769,9 +1770,8 @@ static int __add_preferred_console(char *name, int idx, char *options, c = &console_cmdline[i]; strlcpy(c->name, name, sizeof(c->name)); c->options = options; -#ifdef CONFIG_A11Y_BRAILLE_CONSOLE - c->brl_options = brl_options; -#endif + braille_set_options(c, brl_options); + c->index = idx; return 0; } @@ -1784,20 +1784,8 @@ static int __init console_setup(char *str) char *s, *options, *brl_options = NULL; int idx; -#ifdef CONFIG_A11Y_BRAILLE_CONSOLE - if (!memcmp(str, "brl,", 4)) { - brl_options = ""; - str += 4; - } else if (!memcmp(str, "brl=", 4)) { - brl_options = str + 4; - str = strchr(brl_options, ','); - if (!str) { - printk(KERN_ERR "need port name after brl=\n"); - return 1; - } - *(str++) = 0; - } -#endif + if (_braille_console_setup(&str, &brl_options)) + return 1; /* * Decode str into name, index, options. @@ -2291,16 +2279,10 @@ void register_console(struct console *newcon) continue; if (newcon->index < 0) newcon->index = console_cmdline[i].index; -#ifdef CONFIG_A11Y_BRAILLE_CONSOLE - if (console_cmdline[i].brl_options) { - newcon->flags |= CON_BRL; - braille_register_console(newcon, - console_cmdline[i].index, - console_cmdline[i].options, - console_cmdline[i].brl_options); + + if (_braille_register_console(newcon, &console_cmdline[i])) return; - } -#endif + if (newcon->setup && newcon->setup(newcon, console_cmdline[i].options) != 0) break; @@ -2388,13 +2370,13 @@ EXPORT_SYMBOL(register_console); int unregister_console(struct console *console) { struct console *a, *b; - int res = 1; + int res; -#ifdef CONFIG_A11Y_BRAILLE_CONSOLE - if (console->flags & CON_BRL) - return braille_unregister_console(console); -#endif + res = _braille_unregister_console(console); + if (res) + return res; + res = 1; console_lock(); if (console_drivers == console) { console_drivers=console->next; From 23475408c618ecd5b44b7e069fd65ec73d17d9f0 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 31 Jul 2013 13:53:46 -0700 Subject: [PATCH 16/20] printk: use pointer for console_cmdline indexing Make the code a bit more compact by always using a pointer for the active console_cmdline. Move overly indented code to correct indent level. Signed-off-by: Joe Perches Cc: Samuel Thibault Cc: Ming Lei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 49 ++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5a022e0c654c..8f1fb50aa3ce 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1756,18 +1756,19 @@ static int __add_preferred_console(char *name, int idx, char *options, * See if this tty is not yet registered, and * if we have a slot free. */ - for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) - if (strcmp(console_cmdline[i].name, name) == 0 && - console_cmdline[i].index == idx) { - if (!brl_options) - selected_console = i; - return 0; + for (i = 0, c = console_cmdline; + i < MAX_CMDLINECONSOLES && c->name[0]; + i++, c++) { + if (strcmp(c->name, name) == 0 && c->index == idx) { + if (!brl_options) + selected_console = i; + return 0; } + } if (i == MAX_CMDLINECONSOLES) return -E2BIG; if (!brl_options) selected_console = i; - c = &console_cmdline[i]; strlcpy(c->name, name, sizeof(c->name)); c->options = options; braille_set_options(c, brl_options); @@ -1840,15 +1841,15 @@ int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, cha struct console_cmdline *c; int i; - for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; i++) - if (strcmp(console_cmdline[i].name, name) == 0 && - console_cmdline[i].index == idx) { - c = &console_cmdline[i]; - strlcpy(c->name, name_new, sizeof(c->name)); - c->name[sizeof(c->name) - 1] = 0; - c->options = options; - c->index = idx_new; - return i; + for (i = 0, c = console_cmdline; + i < MAX_CMDLINECONSOLES && c->name[0]; + i++, c++) + if (strcmp(c->name, name) == 0 && c->index == idx) { + strlcpy(c->name, name_new, sizeof(c->name)); + c->name[sizeof(c->name) - 1] = 0; + c->options = options; + c->index = idx_new; + return i; } /* not found */ return -1; @@ -2223,6 +2224,7 @@ void register_console(struct console *newcon) int i; unsigned long flags; struct console *bcon = NULL; + struct console_cmdline *c; /* * before we register a new CON_BOOT console, make sure we don't @@ -2270,24 +2272,25 @@ void register_console(struct console *newcon) * See if this console matches one we selected on * the command line. */ - for (i = 0; i < MAX_CMDLINECONSOLES && console_cmdline[i].name[0]; - i++) { - if (strcmp(console_cmdline[i].name, newcon->name) != 0) + for (i = 0, c = console_cmdline; + i < MAX_CMDLINECONSOLES && c->name[0]; + i++, c++) { + if (strcmp(c->name, newcon->name) != 0) continue; if (newcon->index >= 0 && - newcon->index != console_cmdline[i].index) + newcon->index != c->index) continue; if (newcon->index < 0) - newcon->index = console_cmdline[i].index; + newcon->index = c->index; - if (_braille_register_console(newcon, &console_cmdline[i])) + if (_braille_register_console(newcon, c)) return; if (newcon->setup && newcon->setup(newcon, console_cmdline[i].options) != 0) break; newcon->flags |= CON_ENABLED; - newcon->index = console_cmdline[i].index; + newcon->index = c->index; if (i == selected_console) { newcon->flags |= CON_CONSDEV; preferred_console = selected_console; From 62e32ac3505a0cab1c5ef8ea2c0eab3b26ed855f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Wed, 31 Jul 2013 13:53:47 -0700 Subject: [PATCH 17/20] printk: rename struct log to struct printk_log Rename the struct to enable moving portions of printk.c to separate files. The rename changes output of /proc/vmcoreinfo. Signed-off-by: Joe Perches Cc: Samuel Thibault Cc: Ming Lei Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/printk/printk.c | 80 +++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 8f1fb50aa3ce..5b5a7080e2a5 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -173,7 +173,7 @@ static int console_may_schedule; * 67 "g" * 0032 00 00 00 padding to next message header * - * The 'struct log' buffer header must never be directly exported to + * The 'struct printk_log' buffer header must never be directly exported to * userspace, it is a kernel-private implementation detail that might * need to be changed in the future, when the requirements change. * @@ -195,7 +195,7 @@ enum log_flags { LOG_CONT = 8, /* text is a fragment of a continuation line */ }; -struct log { +struct printk_log { u64 ts_nsec; /* timestamp in nanoseconds */ u16 len; /* length of entire record */ u16 text_len; /* length of text buffer */ @@ -243,7 +243,7 @@ static u32 clear_idx; #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) #define LOG_ALIGN 4 #else -#define LOG_ALIGN __alignof__(struct log) +#define LOG_ALIGN __alignof__(struct printk_log) #endif #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); @@ -254,35 +254,35 @@ static u32 log_buf_len = __LOG_BUF_LEN; static volatile unsigned int logbuf_cpu = UINT_MAX; /* human readable text of the record */ -static char *log_text(const struct log *msg) +static char *log_text(const struct printk_log *msg) { - return (char *)msg + sizeof(struct log); + return (char *)msg + sizeof(struct printk_log); } /* optional key/value pair dictionary attached to the record */ -static char *log_dict(const struct log *msg) +static char *log_dict(const struct printk_log *msg) { - return (char *)msg + sizeof(struct log) + msg->text_len; + return (char *)msg + sizeof(struct printk_log) + msg->text_len; } /* get record by index; idx must point to valid msg */ -static struct log *log_from_idx(u32 idx) +static struct printk_log *log_from_idx(u32 idx) { - struct log *msg = (struct log *)(log_buf + idx); + struct printk_log *msg = (struct printk_log *)(log_buf + idx); /* * A length == 0 record is the end of buffer marker. Wrap around and * read the message at the start of the buffer. */ if (!msg->len) - return (struct log *)log_buf; + return (struct printk_log *)log_buf; return msg; } /* get next record; idx must point to valid msg */ static u32 log_next(u32 idx) { - struct log *msg = (struct log *)(log_buf + idx); + struct printk_log *msg = (struct printk_log *)(log_buf + idx); /* length == 0 indicates the end of the buffer; wrap */ /* @@ -291,7 +291,7 @@ static u32 log_next(u32 idx) * return the one after that. */ if (!msg->len) { - msg = (struct log *)log_buf; + msg = (struct printk_log *)log_buf; return msg->len; } return idx + msg->len; @@ -303,11 +303,11 @@ static void log_store(int facility, int level, const char *dict, u16 dict_len, const char *text, u16 text_len) { - struct log *msg; + struct printk_log *msg; u32 size, pad_len; /* number of '\0' padding bytes to next message */ - size = sizeof(struct log) + text_len + dict_len; + size = sizeof(struct printk_log) + text_len + dict_len; pad_len = (-size) & (LOG_ALIGN - 1); size += pad_len; @@ -319,7 +319,7 @@ static void log_store(int facility, int level, else free = log_first_idx - log_next_idx; - if (free > size + sizeof(struct log)) + if (free > size + sizeof(struct printk_log)) break; /* drop old messages until we have enough contiuous space */ @@ -327,18 +327,18 @@ static void log_store(int facility, int level, log_first_seq++; } - if (log_next_idx + size + sizeof(struct log) >= log_buf_len) { + if (log_next_idx + size + sizeof(struct printk_log) >= log_buf_len) { /* * This message + an additional empty header does not fit * at the end of the buffer. Add an empty header with len == 0 * to signify a wrap around. */ - memset(log_buf + log_next_idx, 0, sizeof(struct log)); + memset(log_buf + log_next_idx, 0, sizeof(struct printk_log)); log_next_idx = 0; } /* fill message */ - msg = (struct log *)(log_buf + log_next_idx); + msg = (struct printk_log *)(log_buf + log_next_idx); memcpy(log_text(msg), text, text_len); msg->text_len = text_len; memcpy(log_dict(msg), dict, dict_len); @@ -351,7 +351,7 @@ static void log_store(int facility, int level, else msg->ts_nsec = local_clock(); memset(log_dict(msg) + dict_len, 0, pad_len); - msg->len = sizeof(struct log) + text_len + dict_len + pad_len; + msg->len = sizeof(struct printk_log) + text_len + dict_len + pad_len; /* insert message */ log_next_idx += msg->len; @@ -474,7 +474,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct devkmsg_user *user = file->private_data; - struct log *msg; + struct printk_log *msg; u64 ts_usec; size_t i; char cont = '-'; @@ -719,14 +719,14 @@ void log_buf_kexec_setup(void) VMCOREINFO_SYMBOL(log_first_idx); VMCOREINFO_SYMBOL(log_next_idx); /* - * Export struct log size and field offsets. User space tools can + * Export struct printk_log size and field offsets. User space tools can * parse it and detect any changes to structure down the line. */ - VMCOREINFO_STRUCT_SIZE(log); - VMCOREINFO_OFFSET(log, ts_nsec); - VMCOREINFO_OFFSET(log, len); - VMCOREINFO_OFFSET(log, text_len); - VMCOREINFO_OFFSET(log, dict_len); + VMCOREINFO_STRUCT_SIZE(printk_log); + VMCOREINFO_OFFSET(printk_log, ts_nsec); + VMCOREINFO_OFFSET(printk_log, len); + VMCOREINFO_OFFSET(printk_log, text_len); + VMCOREINFO_OFFSET(printk_log, dict_len); } #endif @@ -879,7 +879,7 @@ static size_t print_time(u64 ts, char *buf) (unsigned long)ts, rem_nsec / 1000); } -static size_t print_prefix(const struct log *msg, bool syslog, char *buf) +static size_t print_prefix(const struct printk_log *msg, bool syslog, char *buf) { size_t len = 0; unsigned int prefix = (msg->facility << 3) | msg->level; @@ -902,7 +902,7 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf) return len; } -static size_t msg_print_text(const struct log *msg, enum log_flags prev, +static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev, bool syslog, char *buf, size_t size) { const char *text = log_text(msg); @@ -964,7 +964,7 @@ static size_t msg_print_text(const struct log *msg, enum log_flags prev, static int syslog_print(char __user *buf, int size) { char *text; - struct log *msg; + struct printk_log *msg; int len = 0; text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); @@ -1055,7 +1055,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) idx = clear_idx; prev = 0; while (seq < log_next_seq) { - struct log *msg = log_from_idx(idx); + struct printk_log *msg = log_from_idx(idx); len += msg_print_text(msg, prev, true, NULL, 0); prev = msg->flags; @@ -1068,7 +1068,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) idx = clear_idx; prev = 0; while (len > size && seq < log_next_seq) { - struct log *msg = log_from_idx(idx); + struct printk_log *msg = log_from_idx(idx); len -= msg_print_text(msg, prev, true, NULL, 0); prev = msg->flags; @@ -1082,7 +1082,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) len = 0; prev = 0; while (len >= 0 && seq < next_seq) { - struct log *msg = log_from_idx(idx); + struct printk_log *msg = log_from_idx(idx); int textlen; textlen = msg_print_text(msg, prev, true, text, @@ -1228,7 +1228,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file) error = 0; while (seq < log_next_seq) { - struct log *msg = log_from_idx(idx); + struct printk_log *msg = log_from_idx(idx); error += msg_print_text(msg, prev, true, NULL, 0); idx = log_next(idx); @@ -1714,10 +1714,10 @@ static struct cont { u8 level; bool flushed:1; } cont; -static struct log *log_from_idx(u32 idx) { return NULL; } +static struct printk_log *log_from_idx(u32 idx) { return NULL; } static u32 log_next(u32 idx) { return 0; } static void call_console_drivers(int level, const char *text, size_t len) {} -static size_t msg_print_text(const struct log *msg, enum log_flags prev, +static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev, bool syslog, char *buf, size_t size) { return 0; } static size_t cont_print_text(char *text, size_t size) { return 0; } @@ -2029,7 +2029,7 @@ void console_unlock(void) console_cont_flush(text, sizeof(text)); again: for (;;) { - struct log *msg; + struct printk_log *msg; size_t len; int level; @@ -2645,7 +2645,7 @@ void kmsg_dump(enum kmsg_dump_reason reason) bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, char *line, size_t size, size_t *len) { - struct log *msg; + struct printk_log *msg; size_t l = 0; bool ret = false; @@ -2757,7 +2757,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, idx = dumper->cur_idx; prev = 0; while (seq < dumper->next_seq) { - struct log *msg = log_from_idx(idx); + struct printk_log *msg = log_from_idx(idx); l += msg_print_text(msg, prev, true, NULL, 0); idx = log_next(idx); @@ -2770,7 +2770,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, idx = dumper->cur_idx; prev = 0; while (l > size && seq < dumper->next_seq) { - struct log *msg = log_from_idx(idx); + struct printk_log *msg = log_from_idx(idx); l -= msg_print_text(msg, prev, true, NULL, 0); idx = log_next(idx); @@ -2785,7 +2785,7 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, l = 0; prev = 0; while (seq < dumper->next_seq) { - struct log *msg = log_from_idx(idx); + struct printk_log *msg = log_from_idx(idx); l += msg_print_text(msg, prev, syslog, buf + l, size - l); idx = log_next(idx); From 22f2020f84c6da2dd0acb2dce12e39e59ff7c8be Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 31 Jul 2013 13:53:48 -0700 Subject: [PATCH 18/20] vmpressure: change vmpressure::sr_lock to spinlock There is nothing that can sleep inside critical sections protected by this lock and those sections are really small so there doesn't make much sense to use mutex for them. Change the log to a spinlock Signed-off-by: Michal Hocko Reported-by: Tejun Heo Cc: Anton Vorontsov Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Li Zefan Reviewed-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmpressure.h | 2 +- mm/vmpressure.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 76be077340ea..2081680e015d 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -12,7 +12,7 @@ struct vmpressure { unsigned long scanned; unsigned long reclaimed; /* The lock is used to keep the scanned/reclaimed above in sync. */ - struct mutex sr_lock; + struct spinlock sr_lock; /* The list of vmpressure_event structs. */ struct list_head events; diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 736a6011c2c8..f4ee6a190a4d 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -180,12 +180,12 @@ static void vmpressure_work_fn(struct work_struct *work) if (!vmpr->scanned) return; - mutex_lock(&vmpr->sr_lock); + spin_lock(&vmpr->sr_lock); scanned = vmpr->scanned; reclaimed = vmpr->reclaimed; vmpr->scanned = 0; vmpr->reclaimed = 0; - mutex_unlock(&vmpr->sr_lock); + spin_unlock(&vmpr->sr_lock); do { if (vmpressure_event(vmpr, scanned, reclaimed)) @@ -240,11 +240,11 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, if (!scanned) return; - mutex_lock(&vmpr->sr_lock); + spin_lock(&vmpr->sr_lock); vmpr->scanned += scanned; vmpr->reclaimed += reclaimed; scanned = vmpr->scanned; - mutex_unlock(&vmpr->sr_lock); + spin_unlock(&vmpr->sr_lock); if (scanned < vmpressure_win || work_pending(&vmpr->work)) return; @@ -367,7 +367,7 @@ void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft, */ void vmpressure_init(struct vmpressure *vmpr) { - mutex_init(&vmpr->sr_lock); + spin_lock_init(&vmpr->sr_lock); mutex_init(&vmpr->events_lock); INIT_LIST_HEAD(&vmpr->events); INIT_WORK(&vmpr->work, vmpressure_work_fn); From 8e0ed445b3478468372449859c45c6b3032acf2f Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 31 Jul 2013 13:53:50 -0700 Subject: [PATCH 19/20] vmpressure: do not check for pending work to prevent from new work because it is racy and it doesn't give us much anyway as schedule_work handles this case already. Signed-off-by: Michal Hocko Reported-by: Tejun Heo Cc: Anton Vorontsov Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Li Zefan Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmpressure.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/vmpressure.c b/mm/vmpressure.c index f4ee6a190a4d..192f9731931d 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -246,7 +246,7 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, scanned = vmpr->scanned; spin_unlock(&vmpr->sr_lock); - if (scanned < vmpressure_win || work_pending(&vmpr->work)) + if (scanned < vmpressure_win) return; schedule_work(&vmpr->work); } From 33cb876e947b9ddda8dca3fb99234b743a597ef9 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 31 Jul 2013 13:53:51 -0700 Subject: [PATCH 20/20] vmpressure: make sure there are no events queued after memcg is offlined vmpressure is called synchronously from reclaim where the target_memcg is guaranteed to be alive but the eventfd is signaled from the work queue context. This means that memcg (along with vmpressure structure which is embedded into it) might go away while the work item is pending which would result in use-after-release bug. We have two possible ways how to fix this. Either vmpressure pins memcg before it schedules vmpr->work and unpin it in vmpressure_work_fn or explicitely flush the work item from the css_offline context (as suggested by Tejun). This patch implements the later one and it introduces vmpressure_cleanup which flushes the vmpressure work queue item item. It hooks into mem_cgroup_css_offline after the memcg itself is cleaned up. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Michal Hocko Reported-by: Tejun Heo Cc: Anton Vorontsov Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Li Zefan Acked-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmpressure.h | 1 + mm/memcontrol.c | 1 + mm/vmpressure.c | 16 ++++++++++++++++ 3 files changed, 18 insertions(+) diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 2081680e015d..7dc17e2456de 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -30,6 +30,7 @@ extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio); extern void vmpressure_init(struct vmpressure *vmpr); +extern void vmpressure_cleanup(struct vmpressure *vmpr); extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); extern struct vmpressure *css_to_vmpressure(struct cgroup_subsys_state *css); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 00a7a664b9c1..c290a1cf3862 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6335,6 +6335,7 @@ static void mem_cgroup_css_offline(struct cgroup *cont) mem_cgroup_invalidate_reclaim_iterators(memcg); mem_cgroup_reparent_charges(memcg); mem_cgroup_destroy_all_caches(memcg); + vmpressure_cleanup(&memcg->vmpressure); } static void mem_cgroup_css_free(struct cgroup *cont) diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 192f9731931d..0c1e37d829fa 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -372,3 +372,19 @@ void vmpressure_init(struct vmpressure *vmpr) INIT_LIST_HEAD(&vmpr->events); INIT_WORK(&vmpr->work, vmpressure_work_fn); } + +/** + * vmpressure_cleanup() - shuts down vmpressure control structure + * @vmpr: Structure to be cleaned up + * + * This function should be called before the structure in which it is + * embedded is cleaned up. + */ +void vmpressure_cleanup(struct vmpressure *vmpr) +{ + /* + * Make sure there is no pending work before eventfd infrastructure + * goes away. + */ + flush_work(&vmpr->work); +}