From 9b6c2d2e2ba5280649eb043cbc7e3483c77e5d69 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:35:57 -0800 Subject: [PATCH 001/108] lib/bitmap.c: change prototype of bitmap_copy_le Make the prototype of bitmap_copy_le the same as bitmap_copy's. All other bitmap_* functions take unsigned long* parameters; there's no reason this should be special. The only current user is the static inline uwb_mas_bm_copy_le, which already does the void* laundering, so the end users can pass their u8 or __le32 buffers without a cast. Furthermore, this allows us to simply let bitmap_copy_le be an alias for bitmap_copy on little-endian; see next patch. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 2 +- lib/bitmap.c | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 5f5c00de39f0..334fe32d8f0e 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -170,7 +170,7 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); -extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits); +extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits); extern int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); diff --git a/lib/bitmap.c b/lib/bitmap.c index ad161a6c82db..e4ac20bec76c 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1191,16 +1191,15 @@ EXPORT_SYMBOL(bitmap_allocate_region); * * Require nbits % BITS_PER_LONG == 0. */ -void bitmap_copy_le(void *dst, const unsigned long *src, int nbits) +void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits) { - unsigned long *d = dst; - int i; + unsigned int i; for (i = 0; i < nbits/BITS_PER_LONG; i++) { if (BITS_PER_LONG == 64) - d[i] = cpu_to_le64(src[i]); + dst[i] = cpu_to_le64(src[i]); else - d[i] = cpu_to_le32(src[i]); + dst[i] = cpu_to_le32(src[i]); } } EXPORT_SYMBOL(bitmap_copy_le); From e8f24278329dc31b3b8223c83a5465c9df153d9d Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:36:00 -0800 Subject: [PATCH 002/108] lib/bitmap.c: elide bitmap_copy_le on little-endian On little-endian, there's no reason to have an extra, presumably less efficient, way of copying a bitmap. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 4 ++++ lib/bitmap.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 334fe32d8f0e..cffc89c23c02 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -170,7 +170,11 @@ extern void bitmap_fold(unsigned long *dst, const unsigned long *orig, extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order); extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order); extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order); +#ifdef __BIG_ENDIAN extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits); +#else +#define bitmap_copy_le bitmap_copy +#endif extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits); extern int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int nmaskbits); diff --git a/lib/bitmap.c b/lib/bitmap.c index e4ac20bec76c..d2cd50cd4f5d 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1191,6 +1191,7 @@ EXPORT_SYMBOL(bitmap_allocate_region); * * Require nbits % BITS_PER_LONG == 0. */ +#ifdef __BIG_ENDIAN void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits) { unsigned int i; @@ -1203,3 +1204,4 @@ void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int n } } EXPORT_SYMBOL(bitmap_copy_le); +#endif From 2fbad29917c9852fa018d572cd3d43a13465d0f8 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:36:02 -0800 Subject: [PATCH 003/108] lib: bitmap: change bitmap_shift_right to take unsigned parameters I've previously changed the nbits parameter of most bitmap_* functions to unsigned; now it is bitmap_shift_{left,right}'s turn. This alone saves some .text, but while at it I found that there were a few other things one could do. The end result of these seven patches is $ scripts/bloat-o-meter /tmp/bitmap.o.{old,new} add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-328 (-328) function old new delta __bitmap_shift_right 384 226 -158 __bitmap_shift_left 306 136 -170 and less importantly also a smaller stack footprint $ stack-o-meter.pl master bitmap file function old new delta lib/bitmap.o __bitmap_shift_right 24 8 -16 lib/bitmap.o __bitmap_shift_left 24 0 -24 For each pair of 0 <= shift <= nbits <= 256 I've tested the end result with a few randomly filled src buffers (including garbage beyond nbits), in each case verifying that the shift {left,right}-most bits of dst are zero and the remaining nbits-shift bits correspond to src, so I'm fairly confident I didn't screw up. That hasn't stopped me from being wrong before, though. This patch (of 7): gcc can generate slightly better code for stuff like "nbits % BITS_PER_LONG" when it knows nbits is not negative. Since negative size bitmaps or shift amounts don't make sense, change these parameters of bitmap_shift_right to unsigned. The expressions involving "lim - 1" are still ok, since if lim is 0 the loop is never executed. Also use "shift" and "nbits" consistently for the parameter names. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 12 ++++++------ lib/bitmap.c | 10 +++++----- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index cffc89c23c02..c168a807ab9a 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -96,8 +96,8 @@ extern int __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits); -extern void __bitmap_shift_right(unsigned long *dst, - const unsigned long *src, int shift, int bits); +extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits); extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, int shift, int bits); extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, @@ -313,13 +313,13 @@ static inline int bitmap_weight(const unsigned long *src, unsigned int nbits) return __bitmap_weight(src, nbits); } -static inline void bitmap_shift_right(unsigned long *dst, - const unsigned long *src, int n, int nbits) +static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *src, + unsigned int shift, int nbits) { if (small_const_nbits(nbits)) - *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> n; + *dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift; else - __bitmap_shift_right(dst, src, n, nbits); + __bitmap_shift_right(dst, src, shift, nbits); } static inline void bitmap_shift_left(unsigned long *dst, diff --git a/lib/bitmap.c b/lib/bitmap.c index d2cd50cd4f5d..45e7d14ebdfd 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -104,17 +104,17 @@ EXPORT_SYMBOL(__bitmap_complement); * @dst : destination bitmap * @src : source bitmap * @shift : shift by this many bits - * @bits : bitmap size, in bits + * @nbits : bitmap size, in bits * * Shifting right (dividing) means moving bits in the MS -> LS bit * direction. Zeros are fed into the vacated MS positions and the * LS bits shifted off the bottom are lost. */ -void __bitmap_shift_right(unsigned long *dst, - const unsigned long *src, int shift, int bits) +void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, + unsigned shift, unsigned nbits) { - int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; - int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + unsigned k, lim = BITS_TO_LONGS(nbits), left = nbits % BITS_PER_LONG; + unsigned off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; unsigned long mask = (1UL << left) - 1; for (k = 0; off + k < lim; ++k) { unsigned long upper, lower; From 9d8a6b2a02c5fae53d47bfffaabd5f12bb6ec2c0 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:36:05 -0800 Subject: [PATCH 004/108] lib: bitmap: eliminate branch in __bitmap_shift_right We can shift the bits from lower and upper into place before assembling dst[k]; moving the shift of upper into the branch where we already know that rem is non-zero allows us to remove a conditional. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/bitmap.c b/lib/bitmap.c index 45e7d14ebdfd..a7a8bc02892d 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -129,13 +129,13 @@ void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, upper = src[off + k + 1]; if (off + k + 1 == lim - 1 && left) upper &= mask; + upper <<= (BITS_PER_LONG - rem); } lower = src[off + k]; if (left && off + k == lim - 1) lower &= mask; - dst[k] = lower >> rem; - if (rem) - dst[k] |= upper << (BITS_PER_LONG - rem); + lower >>= rem; + dst[k] = lower | upper; if (left && k == lim - 1) dst[k] &= mask; } From 97fb8e940bc56b157517c812c9c6cbfc83d48d78 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:36:08 -0800 Subject: [PATCH 005/108] lib: bitmap: remove redundant code from __bitmap_shift_right If the condition k==lim-1 is true, we must have off == 0 (otherwise, k could never become that big). But in that case we have upper == 0 and hence dst[k] == (src[k] & mask) >> rem. Since mask consists of a consecutive range of bits starting from the LSB, anding dst[k] with mask is a no-op. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/bitmap.c b/lib/bitmap.c index a7a8bc02892d..761d2d4989ee 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -136,8 +136,6 @@ void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, lower &= mask; lower >>= rem; dst[k] = lower | upper; - if (left && k == lim - 1) - dst[k] &= mask; } if (off) memset(&dst[lim - off], 0, off*sizeof(unsigned long)); From cfac1d080a00544e5cecd2d7c9ebe3d6a5cc18e6 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:36:10 -0800 Subject: [PATCH 006/108] lib: bitmap: yet another simplification in __bitmap_shift_right If left is 0, we can just let mask be ~0UL, so that anding with it is a no-op. Conveniently, BITMAP_LAST_WORD_MASK provides precisely what we need, and we can eliminate left. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/bitmap.c b/lib/bitmap.c index 761d2d4989ee..db88512c3451 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -113,9 +113,9 @@ EXPORT_SYMBOL(__bitmap_complement); void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, unsigned shift, unsigned nbits) { - unsigned k, lim = BITS_TO_LONGS(nbits), left = nbits % BITS_PER_LONG; + unsigned k, lim = BITS_TO_LONGS(nbits); unsigned off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; - unsigned long mask = (1UL << left) - 1; + unsigned long mask = BITMAP_LAST_WORD_MASK(nbits); for (k = 0; off + k < lim; ++k) { unsigned long upper, lower; @@ -127,12 +127,12 @@ void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, upper = 0; else { upper = src[off + k + 1]; - if (off + k + 1 == lim - 1 && left) + if (off + k + 1 == lim - 1) upper &= mask; upper <<= (BITS_PER_LONG - rem); } lower = src[off + k]; - if (left && off + k == lim - 1) + if (off + k == lim - 1) lower &= mask; lower >>= rem; dst[k] = lower | upper; From dba94c2553da1928303c2a6c6410247c88cafc1d Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:36:13 -0800 Subject: [PATCH 007/108] lib: bitmap: change bitmap_shift_left to take unsigned parameters gcc can generate slightly better code for stuff like "nbits % BITS_PER_LONG" when it knows nbits is not negative. Since negative size bitmaps or shift amounts don't make sense, change these parameters of bitmap_shift_right to unsigned. If off >= lim (which requires shift >= nbits), k is initialized with a large positive value, but since I've let k continue to be signed, the loop will never run and dst will be zeroed as expected. Inside the loop, k is guaranteed to be non-negative, so the fact that it is promoted to unsigned in the various expressions it appears in is harmless. Also use "shift" and "nbits" consistently for the parameter names. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 12 ++++++------ lib/bitmap.c | 11 ++++++----- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index c168a807ab9a..5e7f75a6d7d0 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -98,8 +98,8 @@ extern void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int nbits); extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits); -extern void __bitmap_shift_left(unsigned long *dst, - const unsigned long *src, int shift, int bits); +extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits); extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int nbits); extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, @@ -322,13 +322,13 @@ static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *s __bitmap_shift_right(dst, src, shift, nbits); } -static inline void bitmap_shift_left(unsigned long *dst, - const unsigned long *src, int n, int nbits) +static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits) { if (small_const_nbits(nbits)) - *dst = (*src << n) & BITMAP_LAST_WORD_MASK(nbits); + *dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits); else - __bitmap_shift_left(dst, src, n, nbits); + __bitmap_shift_left(dst, src, shift, nbits); } static inline int bitmap_parse(const char *buf, unsigned int buflen, diff --git a/lib/bitmap.c b/lib/bitmap.c index db88512c3451..74bdf3601245 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -148,18 +148,19 @@ EXPORT_SYMBOL(__bitmap_shift_right); * @dst : destination bitmap * @src : source bitmap * @shift : shift by this many bits - * @bits : bitmap size, in bits + * @nbits : bitmap size, in bits * * Shifting left (multiplying) means moving bits in the LS -> MS * direction. Zeros are fed into the vacated LS bit positions * and those MS bits shifted off the top are lost. */ -void __bitmap_shift_left(unsigned long *dst, - const unsigned long *src, int shift, int bits) +void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits) { - int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; - int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + int k; + unsigned int lim = BITS_TO_LONGS(nbits), left = nbits % BITS_PER_LONG; + unsigned int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; for (k = lim - off - 1; k >= 0; --k) { unsigned long upper, lower; From 6d874eca6595629258a5d9af237c5ae53a9544e1 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:36:16 -0800 Subject: [PATCH 008/108] lib: bitmap: eliminate branch in __bitmap_shift_left We can shift the bits from lower and upper into place before assembling dst[k + off]; moving the shift of lower into the branch where we already know that rem is non-zero allows us to remove a conditional. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/bitmap.c b/lib/bitmap.c index 74bdf3601245..36e380da00c5 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -169,15 +169,14 @@ void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, * word below and make them the bottom rem bits of result. */ if (rem && k > 0) - lower = src[k - 1]; + lower = src[k - 1] >> (BITS_PER_LONG - rem); else lower = 0; upper = src[k]; if (left && k == lim - 1) upper &= (1UL << left) - 1; - dst[k + off] = upper << rem; - if (rem) - dst[k + off] |= lower >> (BITS_PER_LONG - rem); + upper <<= rem; + dst[k + off] = lower | upper; if (left && k + off == lim - 1) dst[k + off] &= (1UL << left) - 1; } From 7f590657937f1c59163ff14a13062439a18e4a37 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:36:19 -0800 Subject: [PATCH 009/108] lib: bitmap: remove redundant code from __bitmap_shift_left The first of these conditionals is completely redundant: If k == lim-1, we must have off==0, so the second conditional will also trigger and then it wouldn't matter if upper had some high bits set. But the second conditional is in fact also redundant, since it only serves to clear out some high-order "don't care" bits of dst, about which no guarantee is made. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/lib/bitmap.c b/lib/bitmap.c index 36e380da00c5..a13c7f4e325a 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -159,7 +159,7 @@ void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, unsigned int shift, unsigned int nbits) { int k; - unsigned int lim = BITS_TO_LONGS(nbits), left = nbits % BITS_PER_LONG; + unsigned int lim = BITS_TO_LONGS(nbits); unsigned int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; for (k = lim - off - 1; k >= 0; --k) { unsigned long upper, lower; @@ -172,13 +172,8 @@ void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, lower = src[k - 1] >> (BITS_PER_LONG - rem); else lower = 0; - upper = src[k]; - if (left && k == lim - 1) - upper &= (1UL << left) - 1; - upper <<= rem; + upper = src[k] << rem; dst[k + off] = lower | upper; - if (left && k + off == lim - 1) - dst[k + off] &= (1UL << left) - 1; } if (off) memset(dst, 0, off*sizeof(unsigned long)); From f5e38b9284e13e28c1ef00e508238f279cf0ac3a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 13 Feb 2015 14:36:21 -0800 Subject: [PATCH 010/108] lib: crc32: constify crc32 lookup table Commit 8f243af42ade ("sections: fix const sections for crc32 table") removed the compile-time generated crc32 tables from the RO sections, because it conflicts with the definition of __cacheline_aligned which puts all such aligned data into .data..cacheline_aligned section optimized for wasting less space, and can cause alignment issues when used in combination with const with some gcc versions like 4.7.0 due to a gcc bug [1]. Given that most gcc versions should have the fix by now, we can just use ____cacheline_aligned, which only aligns the data but doesn't move it into specific sections as opposed to __cacheline_aligned. In case of gcc versions having the mentioned bug, the alignment attribute will have no effect, but the data will still be made RO. After patch tables are in RO: $ nm -v lib/crc32.o | grep -1 -E "crc32c?table" 0000000000000000 t arch_local_irq_enable 0000000000000000 r crc32ctable_le 0000000000000000 t crc32_exit -- 0000000000000960 t test_buf 0000000000002000 r crc32table_be 0000000000004000 r crc32table_le 000000001d1056e5 A __crc_crc32_be [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52181 Signed-off-by: Daniel Borkmann Cc: Joe Mario Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/gen_crc32table.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c index 71fcfcd96410..d83a372fa76f 100644 --- a/lib/gen_crc32table.c +++ b/lib/gen_crc32table.c @@ -109,7 +109,7 @@ int main(int argc, char** argv) if (CRC_LE_BITS > 1) { crc32init_le(); - printf("static u32 __cacheline_aligned " + printf("static const u32 ____cacheline_aligned " "crc32table_le[%d][%d] = {", LE_TABLE_ROWS, LE_TABLE_SIZE); output_table(crc32table_le, LE_TABLE_ROWS, @@ -119,7 +119,7 @@ int main(int argc, char** argv) if (CRC_BE_BITS > 1) { crc32init_be(); - printf("static u32 __cacheline_aligned " + printf("static const u32 ____cacheline_aligned " "crc32table_be[%d][%d] = {", BE_TABLE_ROWS, BE_TABLE_SIZE); output_table(crc32table_be, LE_TABLE_ROWS, @@ -128,7 +128,7 @@ int main(int argc, char** argv) } if (CRC_LE_BITS > 1) { crc32cinit_le(); - printf("static u32 __cacheline_aligned " + printf("static const u32 ____cacheline_aligned " "crc32ctable_le[%d][%d] = {", LE_TABLE_ROWS, LE_TABLE_SIZE); output_table(crc32ctable_le, LE_TABLE_ROWS, From a4bb1e43e22d3cade8f942fc6f95920248eb2fd0 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 13 Feb 2015 14:36:24 -0800 Subject: [PATCH 011/108] mm/util: add kstrdup_const kstrdup() is often used to duplicate strings where neither source neither destination will be ever modified. In such case we can just reuse the source instead of duplicating it. The problem is that we must be sure that the source is non-modifiable and its life-time is long enough. I suspect the good candidates for such strings are strings located in kernel .rodata section, they cannot be modifed because the section is read-only and their life-time is equal to kernel life-time. This small patchset proposes alternative version of kstrdup - kstrdup_const, which returns source string if it is located in .rodata otherwise it fallbacks to kstrdup. To verify if the source is in .rodata function checks if the address is between sentinels __start_rodata, __end_rodata. I guess it should work with all architectures. The main patch is accompanied by four patches constifying kstrdup for cases where situtation described above happens frequently. I have tested the patchset on mobile platform (exynos4210-trats) and it saves 3272 string allocations. Since minimal allocation is 32 or 64 bytes depending on Kconfig options the patchset saves respectively about 100KB or 200KB of memory. Stats from tested platform show that the main offender is sysfs: By caller: 2260 __kernfs_new_node 631 clk_register+0xc8/0x1b8 318 clk_register+0x34/0x1b8 51 kmem_cache_create 12 alloc_vfsmnt By string (with count >= 5): 883 power 876 subsystem 135 parameters 132 device 61 iommu_group ... This patch (of 5): Add an alternative version of kstrdup which returns pointer to constant char array. The function checks if input string is in persistent and read-only memory section, if yes it returns the input string, otherwise it fallbacks to kstrdup. kstrdup_const is accompanied by kfree_const performing conditional memory deallocation of the string. Signed-off-by: Andrzej Hajda Cc: Marek Szyprowski Cc: Kyungmin Park Cc: Mike Turquette Cc: Alexander Viro Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Tejun Heo Cc: Greg KH Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 3 +++ mm/util.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/include/linux/string.h b/include/linux/string.h index b9bc9a5d9e21..e40099e585c9 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -112,7 +112,10 @@ extern void * memchr(const void *,int,__kernel_size_t); #endif void *memchr_inv(const void *s, int c, size_t n); +extern void kfree_const(const void *x); + extern char *kstrdup(const char *s, gfp_t gfp); +extern const char *kstrdup_const(const char *s, gfp_t gfp); extern char *kstrndup(const char *s, size_t len, gfp_t gfp); extern void *kmemdup(const void *src, size_t len, gfp_t gfp); diff --git a/mm/util.c b/mm/util.c index f3ef639c4857..3981ae9d1b15 100644 --- a/mm/util.c +++ b/mm/util.c @@ -12,10 +12,30 @@ #include #include +#include #include #include "internal.h" +static inline int is_kernel_rodata(unsigned long addr) +{ + return addr >= (unsigned long)__start_rodata && + addr < (unsigned long)__end_rodata; +} + +/** + * kfree_const - conditionally free memory + * @x: pointer to the memory + * + * Function calls kfree only if @x is not in .rodata section. + */ +void kfree_const(const void *x) +{ + if (!is_kernel_rodata((unsigned long)x)) + kfree(x); +} +EXPORT_SYMBOL(kfree_const); + /** * kstrdup - allocate space for and copy an existing string * @s: the string to duplicate @@ -37,6 +57,24 @@ char *kstrdup(const char *s, gfp_t gfp) } EXPORT_SYMBOL(kstrdup); +/** + * kstrdup_const - conditionally duplicate an existing const string + * @s: the string to duplicate + * @gfp: the GFP mask used in the kmalloc() call when allocating memory + * + * Function returns source string if it is in .rodata section otherwise it + * fallbacks to kstrdup. + * Strings allocated by kstrdup_const should be freed by kfree_const. + */ +const char *kstrdup_const(const char *s, gfp_t gfp) +{ + if (is_kernel_rodata((unsigned long)s)) + return s; + + return kstrdup(s, gfp); +} +EXPORT_SYMBOL(kstrdup_const); + /** * kstrndup - allocate space for and copy an existing string * @s: the string to duplicate From 75287a677ba1beab7ca0db948468f44eb23a709f Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 13 Feb 2015 14:36:27 -0800 Subject: [PATCH 012/108] kernfs: convert node name allocation to kstrdup_const sysfs frequently performs duplication of strings located in read-only memory section. Replacing kstrdup by kstrdup_const allows to avoid such operations. Signed-off-by: Andrzej Hajda Cc: Marek Szyprowski Cc: Kyungmin Park Cc: Mike Turquette Cc: Alexander Viro Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Acked-by: Tejun Heo Cc: Greg KH Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/kernfs/dir.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 2d881b381d2b..35e40879860a 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -412,7 +412,7 @@ void kernfs_put(struct kernfs_node *kn) if (kernfs_type(kn) == KERNFS_LINK) kernfs_put(kn->symlink.target_kn); if (!(kn->flags & KERNFS_STATIC_NAME)) - kfree(kn->name); + kfree_const(kn->name); if (kn->iattr) { if (kn->iattr->ia_secdata) security_release_secctx(kn->iattr->ia_secdata, @@ -506,12 +506,12 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, const char *name, umode_t mode, unsigned flags) { - char *dup_name = NULL; + const char *dup_name = NULL; struct kernfs_node *kn; int ret; if (!(flags & KERNFS_STATIC_NAME)) { - name = dup_name = kstrdup(name, GFP_KERNEL); + name = dup_name = kstrdup_const(name, GFP_KERNEL); if (!name) return NULL; } @@ -538,7 +538,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, err_out2: kmem_cache_free(kernfs_node_cache, kn); err_out1: - kfree(dup_name); + kfree_const(dup_name); return NULL; } @@ -1264,7 +1264,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, /* rename kernfs_node */ if (strcmp(kn->name, new_name) != 0) { error = -ENOMEM; - new_name = kstrdup(new_name, GFP_KERNEL); + new_name = kstrdup_const(new_name, GFP_KERNEL); if (!new_name) goto out; } else { @@ -1297,7 +1297,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, kernfs_link_sibling(kn); kernfs_put(old_parent); - kfree(old_name); + kfree_const(old_name); error = 0; out: From dfeb0750b630b72b5d4fb2461bc7179eceb54666 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:36:31 -0800 Subject: [PATCH 013/108] kernfs: remove KERNFS_STATIC_NAME When a new kernfs node is created, KERNFS_STATIC_NAME is used to avoid making a separate copy of its name. It's currently only used for sysfs attributes whose filenames are required to stay accessible and unchanged. There are rare exceptions where these names are allocated and formatted dynamically but for the vast majority of cases they're consts in the rodata section. Now that kernfs is converted to use kstrdup_const() and kfree_const(), there's little point in keeping KERNFS_STATIC_NAME around. Remove it. Signed-off-by: Tejun Heo Cc: Andrzej Hajda Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/kernfs/dir.c | 20 ++++++++------------ fs/kernfs/file.c | 4 ---- fs/sysfs/file.c | 2 +- include/linux/kernfs.h | 7 ++----- kernel/cgroup.c | 2 +- 5 files changed, 12 insertions(+), 23 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 35e40879860a..6acc9648f986 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -411,8 +411,9 @@ void kernfs_put(struct kernfs_node *kn) if (kernfs_type(kn) == KERNFS_LINK) kernfs_put(kn->symlink.target_kn); - if (!(kn->flags & KERNFS_STATIC_NAME)) - kfree_const(kn->name); + + kfree_const(kn->name); + if (kn->iattr) { if (kn->iattr->ia_secdata) security_release_secctx(kn->iattr->ia_secdata, @@ -506,15 +507,12 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, const char *name, umode_t mode, unsigned flags) { - const char *dup_name = NULL; struct kernfs_node *kn; int ret; - if (!(flags & KERNFS_STATIC_NAME)) { - name = dup_name = kstrdup_const(name, GFP_KERNEL); - if (!name) - return NULL; - } + name = kstrdup_const(name, GFP_KERNEL); + if (!name) + return NULL; kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL); if (!kn) @@ -538,7 +536,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, err_out2: kmem_cache_free(kernfs_node_cache, kn); err_out1: - kfree_const(dup_name); + kfree_const(name); return NULL; } @@ -1285,9 +1283,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, kn->ns = new_ns; if (new_name) { - if (!(kn->flags & KERNFS_STATIC_NAME)) - old_name = kn->name; - kn->flags &= ~KERNFS_STATIC_NAME; + old_name = kn->name; kn->name = new_name; } diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index ddc9f9612f16..b684e8a132e6 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -901,7 +901,6 @@ const struct file_operations kernfs_file_fops = { * @ops: kernfs operations for the file * @priv: private data for the file * @ns: optional namespace tag of the file - * @name_is_static: don't copy file name * @key: lockdep key for the file's active_ref, %NULL to disable lockdep * * Returns the created node on success, ERR_PTR() value on error. @@ -911,7 +910,6 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, umode_t mode, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, - bool name_is_static, struct lock_class_key *key) { struct kernfs_node *kn; @@ -919,8 +917,6 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, int rc; flags = KERNFS_FILE; - if (name_is_static) - flags |= KERNFS_STATIC_NAME; kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG, flags); if (!kn) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index dfe928a9540f..7c2867b44141 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -295,7 +295,7 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent, key = attr->key ?: (struct lock_class_key *)&attr->skey; #endif kn = __kernfs_create_file(parent, attr->name, mode & 0777, size, ops, - (void *)attr, ns, true, key); + (void *)attr, ns, key); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) sysfs_warn_dup(parent, attr->name); diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index d4e01b358341..71ecdab1671b 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -43,7 +43,6 @@ enum kernfs_node_flag { KERNFS_HAS_SEQ_SHOW = 0x0040, KERNFS_HAS_MMAP = 0x0080, KERNFS_LOCKDEP = 0x0100, - KERNFS_STATIC_NAME = 0x0200, KERNFS_SUICIDAL = 0x0400, KERNFS_SUICIDED = 0x0800, }; @@ -291,7 +290,6 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, umode_t mode, loff_t size, const struct kernfs_ops *ops, void *priv, const void *ns, - bool name_is_static, struct lock_class_key *key); struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, const char *name, @@ -369,8 +367,7 @@ kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, static inline struct kernfs_node * __kernfs_create_file(struct kernfs_node *parent, const char *name, umode_t mode, loff_t size, const struct kernfs_ops *ops, - void *priv, const void *ns, bool name_is_static, - struct lock_class_key *key) + void *priv, const void *ns, struct lock_class_key *key) { return ERR_PTR(-ENOSYS); } static inline struct kernfs_node * @@ -439,7 +436,7 @@ kernfs_create_file_ns(struct kernfs_node *parent, const char *name, key = (struct lock_class_key *)&ops->lockdep_key; #endif return __kernfs_create_file(parent, name, mode, size, ops, priv, ns, - false, key); + key); } static inline struct kernfs_node * diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d5f6ec251fb2..29a7b2cc593e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3077,7 +3077,7 @@ static int cgroup_add_file(struct cgroup *cgrp, struct cftype *cft) #endif kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name), cgroup_file_mode(cft), 0, cft->kf_ops, cft, - NULL, false, key); + NULL, key); if (IS_ERR(kn)) return PTR_ERR(kn); From 612936f21277d1254dd885de2e383aacdc7ca67f Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 13 Feb 2015 14:36:33 -0800 Subject: [PATCH 014/108] clk: convert clock name allocations to kstrdup_const Clock subsystem frequently performs duplication of strings located in read-only memory section. Replacing kstrdup by kstrdup_const allows to avoid such operations. Signed-off-by: Andrzej Hajda Cc: Marek Szyprowski Cc: Kyungmin Park Cc: Mike Turquette Cc: Alexander Viro Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Tejun Heo Cc: Greg KH Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/clk/clk.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index d48ac71c6c8b..642cf37124d3 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -2048,7 +2048,7 @@ struct clk *clk_register(struct device *dev, struct clk_hw *hw) goto fail_out; } - clk->name = kstrdup(hw->init->name, GFP_KERNEL); + clk->name = kstrdup_const(hw->init->name, GFP_KERNEL); if (!clk->name) { pr_err("%s: could not allocate clk->name\n", __func__); ret = -ENOMEM; @@ -2075,7 +2075,7 @@ struct clk *clk_register(struct device *dev, struct clk_hw *hw) /* copy each string name in case parent_names is __initdata */ for (i = 0; i < clk->num_parents; i++) { - clk->parent_names[i] = kstrdup(hw->init->parent_names[i], + clk->parent_names[i] = kstrdup_const(hw->init->parent_names[i], GFP_KERNEL); if (!clk->parent_names[i]) { pr_err("%s: could not copy parent_names\n", __func__); @@ -2090,10 +2090,10 @@ struct clk *clk_register(struct device *dev, struct clk_hw *hw) fail_parent_names_copy: while (--i >= 0) - kfree(clk->parent_names[i]); + kfree_const(clk->parent_names[i]); kfree(clk->parent_names); fail_parent_names: - kfree(clk->name); + kfree_const(clk->name); fail_name: kfree(clk); fail_out: @@ -2112,10 +2112,10 @@ static void __clk_release(struct kref *ref) kfree(clk->parents); while (--i >= 0) - kfree(clk->parent_names[i]); + kfree_const(clk->parent_names[i]); kfree(clk->parent_names); - kfree(clk->name); + kfree_const(clk->name); kfree(clk); } From 3dec16ea38afce38ceb49c8938901318797a08c4 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 13 Feb 2015 14:36:38 -0800 Subject: [PATCH 015/108] mm/slab: convert cache name allocations to kstrdup_const slab frequently performs duplication of strings located in read-only memory section. Replacing kstrdup by kstrdup_const allows to avoid such operations. [akpm@linux-foundation.org: make the handling of kmem_cache.name const-correct] Signed-off-by: Andrzej Hajda Cc: Marek Szyprowski Cc: Kyungmin Park Cc: Mike Turquette Cc: Alexander Viro Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Tejun Heo Cc: Greg KH Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slab_common.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mm/slab_common.c b/mm/slab_common.c index 1a1cc89acaa3..429a4506b382 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -295,8 +295,8 @@ unsigned long calculate_alignment(unsigned long flags, } static struct kmem_cache * -do_kmem_cache_create(char *name, size_t object_size, size_t size, size_t align, - unsigned long flags, void (*ctor)(void *), +do_kmem_cache_create(const char *name, size_t object_size, size_t size, + size_t align, unsigned long flags, void (*ctor)(void *), struct mem_cgroup *memcg, struct kmem_cache *root_cache) { struct kmem_cache *s; @@ -363,7 +363,7 @@ kmem_cache_create(const char *name, size_t size, size_t align, unsigned long flags, void (*ctor)(void *)) { struct kmem_cache *s; - char *cache_name; + const char *cache_name; int err; get_online_cpus(); @@ -390,7 +390,7 @@ kmem_cache_create(const char *name, size_t size, size_t align, if (s) goto out_unlock; - cache_name = kstrdup(name, GFP_KERNEL); + cache_name = kstrdup_const(name, GFP_KERNEL); if (!cache_name) { err = -ENOMEM; goto out_unlock; @@ -401,7 +401,7 @@ kmem_cache_create(const char *name, size_t size, size_t align, flags, ctor, NULL, NULL); if (IS_ERR(s)) { err = PTR_ERR(s); - kfree(cache_name); + kfree_const(cache_name); } out_unlock: @@ -607,7 +607,7 @@ void memcg_destroy_kmem_caches(struct mem_cgroup *memcg) void slab_kmem_cache_release(struct kmem_cache *s) { destroy_memcg_params(s); - kfree(s->name); + kfree_const(s->name); kmem_cache_free(kmem_cache, s); } From fcc139ae227b97bd81352e9102d8e79498d1e930 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Fri, 13 Feb 2015 14:36:41 -0800 Subject: [PATCH 016/108] fs/namespace: convert devname allocation to kstrdup_const VFS frequently performs duplication of strings located in read-only memory section. Replacing kstrdup by kstrdup_const allows to avoid such operations. Signed-off-by: Andrzej Hajda Cc: Marek Szyprowski Cc: Kyungmin Park Cc: Mike Turquette Cc: Alexander Viro Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Tejun Heo Cc: Greg KH Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namespace.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index cd1e9681a0cf..6dae553dd69c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -201,7 +201,7 @@ static struct mount *alloc_vfsmnt(const char *name) goto out_free_cache; if (name) { - mnt->mnt_devname = kstrdup(name, GFP_KERNEL); + mnt->mnt_devname = kstrdup_const(name, GFP_KERNEL); if (!mnt->mnt_devname) goto out_free_id; } @@ -234,7 +234,7 @@ static struct mount *alloc_vfsmnt(const char *name) #ifdef CONFIG_SMP out_free_devname: - kfree(mnt->mnt_devname); + kfree_const(mnt->mnt_devname); #endif out_free_id: mnt_free_id(mnt); @@ -568,7 +568,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) static void free_vfsmnt(struct mount *mnt) { - kfree(mnt->mnt_devname); + kfree_const(mnt->mnt_devname); #ifdef CONFIG_SMP free_percpu(mnt->mnt_pcp); #endif From 8da53d4595a53fb9a3380dd4d1c9bc24c7c9aab8 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 13 Feb 2015 14:36:44 -0800 Subject: [PATCH 017/108] lib/string.c: improve strrchr() Instead of potentially passing over the string twice in case c is not found, just keep track of the last occurrence. According to bloat-o-meter, this also cuts the generated code by a third (54 vs 36 bytes). Oh, and we get rid of those 7-space indented lines. Signed-off-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/string.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/string.c b/lib/string.c index 3206d0178296..cdd97f431ae2 100644 --- a/lib/string.c +++ b/lib/string.c @@ -313,12 +313,12 @@ EXPORT_SYMBOL(strchrnul); */ char *strrchr(const char *s, int c) { - const char *p = s + strlen(s); - do { - if (*p == (char)c) - return (char *)p; - } while (--p >= s); - return NULL; + const char *last = NULL; + do { + if (*s == (char)c) + last = s; + } while (*s++); + return (char *)last; } EXPORT_SYMBOL(strrchr); #endif From 310ee9e8f370f8fd7a76856726aea88839bb0f8f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 13 Feb 2015 14:36:47 -0800 Subject: [PATCH 018/108] lib/genalloc.c: check result of devres_alloc() devm_gen_pool_create() calls devres_alloc() and dereferences its result without checking whether devres_alloc() succeeded. Check for error and bail out if it happened. Coverity-id 1016493. Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/genalloc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/genalloc.c b/lib/genalloc.c index 0fe1cbe87700..d214866eeea2 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -586,6 +586,8 @@ struct gen_pool *devm_gen_pool_create(struct device *dev, int min_alloc_order, struct gen_pool **ptr, *pool; ptr = devres_alloc(devm_gen_pool_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; pool = gen_pool_create(min_alloc_order, nid); if (pool) { From 513e3d2d11c9f05db1edc70deb18a82555cf9309 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:36:50 -0800 Subject: [PATCH 019/108] cpumask: always use nr_cpu_ids in formatting and parsing functions bitmap implements two variants of scnprintf functions to format a bitmap into a string and cpumask and nodemask wrap them to provide equivalent interfaces. The scnprintf family of functions require a string buffer as an output target which complicates code paths which just want to print out the mask through printk for informational or debug purposes as they have to worry about how large the buffer should be and whether it's too large to allocate on stack. Neither cpumask or nodemask provides a guildeline on how large the target buffer should be forcing users come up with their own solutions - some allocate an arbitrarily sized buffer which is small enough to allocate on stack but may be too short in corner cases, other come up with a custom upper limit calculation considering the output format, some allocate the buffer dynamically while one resorted to using lock to synchronize access to a static buffer. This is an artificial problem which is being solved repeatedly for no benefit. In a lot of cases, the output area already exists and can be targeted directly making the intermediate buffer unnecessary. This patchset teaches printf family of functions how to format bitmaps and replace the dedicated formatting functions with it. Pointer formatting is extended to cover bitmap formatting. It uses the field width for the number of bits instead of precision. The format used is '%*pb[l]', with the optional trailing 'l' specifying list format instead of hex masks. For more details, please see 0002. This patch (of 31): Currently, the formatting and parsing functions in cpumask.h use nr_cpumask_bits like other cpumask functions; however, nr_cpumask_bits is either NR_CPUS or nr_cpu_ids depending on CONFIG_CPUMASK_OFFSTACK. This leads to inconsistent behaviors. With CONFIG_NR_CPUS=512 and !CONFIG_CPUMASK_OFFSTACK # cat /sys/devices/virtual/net/lo/queues/rx-0/rps_cpus 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000 # cat /proc/self/status | grep Cpus_allowed: Cpus_allowed: f With CONFIG_NR_CPUS=1024 and CONFIG_CPUMASK_OFFSTACK (fedora default) # cat /sys/devices/virtual/net/lo/queues/rx-0/rps_cpus 0 # cat /proc/self/status | grep Cpus_allowed: Cpus_allowed: f Note that /proc/self/status is always using nr_cpu_ids regardless of config. This is because seq cpumask formattings functions always use nr_cpu_ids. Given that the same output fields may switch between the two forms, converging on nr_cpu_ids always isn't too likely to surprise userland. This patch updates the formatting and parsing functions in cpumask.h to always use nr_cpu_ids. There's no point in dealing with CPUs which aren't even possible on the machine. Signed-off-by: Tejun Heo Cc: "David S. Miller" Cc: "James E.J. Bottomley" Cc: "John W. Linville" Cc: "Paul E. McKenney" Cc: Benjamin Herrenschmidt Cc: Chris Metcalf Cc: Chris Zankel Cc: Christoph Lameter Cc: Dmitry Torokhov Cc: Fenghua Yu Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Li Zefan Cc: Max Filippov Cc: Mike Travis Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Russell King Acked-by: Rusty Russell Cc: Steffen Klassert Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index ff9044286d88..ee9acb0ce542 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -550,7 +550,7 @@ static inline void cpumask_copy(struct cpumask *dstp, static inline int cpumask_scnprintf(char *buf, int len, const struct cpumask *srcp) { - return bitmap_scnprintf(buf, len, cpumask_bits(srcp), nr_cpumask_bits); + return bitmap_scnprintf(buf, len, cpumask_bits(srcp), nr_cpu_ids); } /** @@ -564,7 +564,7 @@ static inline int cpumask_scnprintf(char *buf, int len, static inline int cpumask_parse_user(const char __user *buf, int len, struct cpumask *dstp) { - return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpumask_bits); + return bitmap_parse_user(buf, len, cpumask_bits(dstp), nr_cpu_ids); } /** @@ -579,7 +579,7 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, struct cpumask *dstp) { return bitmap_parselist_user(buf, len, cpumask_bits(dstp), - nr_cpumask_bits); + nr_cpu_ids); } /** @@ -595,7 +595,7 @@ static inline int cpulist_scnprintf(char *buf, int len, const struct cpumask *srcp) { return bitmap_scnlistprintf(buf, len, cpumask_bits(srcp), - nr_cpumask_bits); + nr_cpu_ids); } /** @@ -610,7 +610,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) char *nl = strchr(buf, '\n'); unsigned int len = nl ? (unsigned int)(nl - buf) : strlen(buf); - return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpumask_bits); + return bitmap_parse(buf, len, cpumask_bits(dstp), nr_cpu_ids); } /** @@ -622,7 +622,7 @@ static inline int cpumask_parse(const char *buf, struct cpumask *dstp) */ static inline int cpulist_parse(const char *buf, struct cpumask *dstp) { - return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpumask_bits); + return bitmap_parselist(buf, cpumask_bits(dstp), nr_cpu_ids); } /** @@ -817,7 +817,7 @@ static inline ssize_t cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask) { return bitmap_print_to_pagebuf(list, buf, cpumask_bits(mask), - nr_cpumask_bits); + nr_cpu_ids); } /* From dbc760bcc150cc27160f0131b15db76350df4334 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:36:53 -0800 Subject: [PATCH 020/108] lib/vsprintf: implement bitmap printing through '%*pb[l]' bitmap and its derivatives such as cpumask and nodemask currently only provide formatting functions which put the output string into the provided buffer; however, how long this buffer should be isn't defined anywhere and given that some of these bitmaps can be too large to be formatted into an on-stack buffer it users sometimes are unnecessarily forced to come up with creative solutions and compromises for the buffer just to printk these bitmaps. There have been a couple different attempts at making this easier. 1. Way back, PeterZ tried printk '%pb' extension with the precision for bit width - '%.*pb'. This was intuitive and made sense but unfortunately triggered a compile warning about using precision for a pointer. http://lkml.kernel.org/g/1336577562.2527.58.camel@twins 2. I implemented bitmap_pr_cont[_list]() and its wrappers for cpumask and nodemask. This works but PeterZ pointed out that pr_cont's tendency to produce broken lines when multiple CPUs are printing is bothering considering the usages. http://lkml.kernel.org/g/1418226774-30215-3-git-send-email-tj@kernel.org So, this patch is another attempt at teaching printk and friends how to print bitmaps. It's almost identical to what PeterZ tried with precision but it uses the field width for the number of bits instead of precision. The format used is '%*pb[l]', with the optional trailing 'l' specifying list format instead of hex masks. This is a valid format string and doesn't trigger compiler warnings; however, it does make it impossible to specify output field width when printing bitmaps. I think this is an acceptable trade-off given how much easier it makes printing bitmaps and that we don't have any in-kernel user which is using the field width specification. If any future user wants to use field width with a bitmap, it'd have to format the bitmap into a string buffer and then print that buffer with width spec, which isn't different from how it should be done now. This patch implements bitmap[_list]_string() which are called from the vsprintf pointer() formatting function. The implementation is mostly identical to bitmap_scn[list]printf() except that the output is performed in the vsprintf way. These functions handle formatting into too small buffers and sprintf() family of functions report the correct overrun output length. bitmap_scn[list]printf() are now thin wrappers around scnprintf(). Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra (Intel) Cc: "David S. Miller" Cc: "James E.J. Bottomley" Cc: "John W. Linville" Cc: "Paul E. McKenney" Cc: Benjamin Herrenschmidt Cc: Chris Metcalf Cc: Chris Zankel Cc: Christoph Lameter Cc: Dmitry Torokhov Cc: Fenghua Yu Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Li Zefan Cc: Max Filippov Cc: Mike Travis Cc: Pekka Enberg Cc: Russell King Cc: Rusty Russell Cc: Steffen Klassert Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 61 ++------------------------------ lib/vsprintf.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 59 deletions(-) diff --git a/lib/bitmap.c b/lib/bitmap.c index a13c7f4e325a..e85040ba1f22 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -383,28 +383,7 @@ EXPORT_SYMBOL(bitmap_find_next_zero_area_off); int bitmap_scnprintf(char *buf, unsigned int buflen, const unsigned long *maskp, int nmaskbits) { - int i, word, bit, len = 0; - unsigned long val; - const char *sep = ""; - int chunksz; - u32 chunkmask; - - chunksz = nmaskbits & (CHUNKSZ - 1); - if (chunksz == 0) - chunksz = CHUNKSZ; - - i = ALIGN(nmaskbits, CHUNKSZ) - CHUNKSZ; - for (; i >= 0; i -= CHUNKSZ) { - chunkmask = ((1ULL << chunksz) - 1); - word = i / BITS_PER_LONG; - bit = i % BITS_PER_LONG; - val = (maskp[word] >> bit) & chunkmask; - len += scnprintf(buf+len, buflen-len, "%s%0*lx", sep, - (chunksz+3)/4, val); - chunksz = CHUNKSZ; - sep = ","; - } - return len; + return scnprintf(buf, buflen, "%*pb", nmaskbits, maskp); } EXPORT_SYMBOL(bitmap_scnprintf); @@ -521,25 +500,6 @@ int bitmap_parse_user(const char __user *ubuf, } EXPORT_SYMBOL(bitmap_parse_user); -/* - * bscnl_emit(buf, buflen, rbot, rtop, bp) - * - * Helper routine for bitmap_scnlistprintf(). Write decimal number - * or range to buf, suppressing output past buf+buflen, with optional - * comma-prefix. Return len of what was written to *buf, excluding the - * trailing \0. - */ -static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int len) -{ - if (len > 0) - len += scnprintf(buf + len, buflen - len, ","); - if (rbot == rtop) - len += scnprintf(buf + len, buflen - len, "%d", rbot); - else - len += scnprintf(buf + len, buflen - len, "%d-%d", rbot, rtop); - return len; -} - /** * bitmap_scnlistprintf - convert bitmap to list format ASCII string * @buf: byte buffer into which string is placed @@ -559,24 +519,7 @@ static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int len) int bitmap_scnlistprintf(char *buf, unsigned int buflen, const unsigned long *maskp, int nmaskbits) { - int len = 0; - /* current bit is 'cur', most recently seen range is [rbot, rtop] */ - int cur, rbot, rtop; - - if (buflen == 0) - return 0; - buf[0] = 0; - - rbot = cur = find_first_bit(maskp, nmaskbits); - while (cur < nmaskbits) { - rtop = cur; - cur = find_next_bit(maskp, nmaskbits, cur+1); - if (cur >= nmaskbits || cur > rtop + 1) { - len = bscnl_emit(buf, buflen, rbot, rtop, len); - rbot = cur; - } - } - return len; + return scnprintf(buf, buflen, "%*pbl", nmaskbits, maskp); } EXPORT_SYMBOL(bitmap_scnlistprintf); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 602d2081e713..b235c96167d3 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -793,6 +793,87 @@ char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec, return buf; } +static noinline_for_stack +char *bitmap_string(char *buf, char *end, unsigned long *bitmap, + struct printf_spec spec, const char *fmt) +{ + const int CHUNKSZ = 32; + int nr_bits = max_t(int, spec.field_width, 0); + int i, chunksz; + bool first = true; + + /* reused to print numbers */ + spec = (struct printf_spec){ .flags = SMALL | ZEROPAD, .base = 16 }; + + chunksz = nr_bits & (CHUNKSZ - 1); + if (chunksz == 0) + chunksz = CHUNKSZ; + + i = ALIGN(nr_bits, CHUNKSZ) - CHUNKSZ; + for (; i >= 0; i -= CHUNKSZ) { + u32 chunkmask, val; + int word, bit; + + chunkmask = ((1ULL << chunksz) - 1); + word = i / BITS_PER_LONG; + bit = i % BITS_PER_LONG; + val = (bitmap[word] >> bit) & chunkmask; + + if (!first) { + if (buf < end) + *buf = ','; + buf++; + } + first = false; + + spec.field_width = DIV_ROUND_UP(chunksz, 4); + buf = number(buf, end, val, spec); + + chunksz = CHUNKSZ; + } + return buf; +} + +static noinline_for_stack +char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap, + struct printf_spec spec, const char *fmt) +{ + int nr_bits = max_t(int, spec.field_width, 0); + /* current bit is 'cur', most recently seen range is [rbot, rtop] */ + int cur, rbot, rtop; + bool first = true; + + /* reused to print numbers */ + spec = (struct printf_spec){ .base = 10 }; + + rbot = cur = find_first_bit(bitmap, nr_bits); + while (cur < nr_bits) { + rtop = cur; + cur = find_next_bit(bitmap, nr_bits, cur + 1); + if (cur < nr_bits && cur <= rtop + 1) + continue; + + if (!first) { + if (buf < end) + *buf = ','; + buf++; + } + first = false; + + buf = number(buf, end, rbot, spec); + if (rbot < rtop) { + if (buf < end) + *buf = '-'; + buf++; + + buf = number(buf, end, rtop, spec); + } + + rbot = cur; + } + return buf; +} + static noinline_for_stack char *mac_address_string(char *buf, char *end, u8 *addr, struct printf_spec spec, const char *fmt) @@ -1258,6 +1339,10 @@ int kptr_restrict __read_mostly; * - 'B' For backtraced symbolic direct pointers with offset * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref] * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201] + * - 'b[l]' For a bitmap, the number of bits is determined by the field + * width which must be explicitly specified either as part of the + * format string '%32b[l]' or through '%*b[l]', [l] selects + * range-list format instead of hex format * - 'M' For a 6-byte MAC address, it prints the address in the * usual colon-separated hex notation * - 'm' For a 6-byte MAC address, it prints the hex address without colons @@ -1354,6 +1439,13 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return resource_string(buf, end, ptr, spec, fmt); case 'h': return hex_string(buf, end, ptr, spec, fmt); + case 'b': + switch (fmt[1]) { + case 'l': + return bitmap_list_string(buf, end, ptr, spec, fmt); + default: + return bitmap_string(buf, end, ptr, spec, fmt); + } case 'M': /* Colon separated: 00:01:02:03:04:05 */ case 'm': /* Contiguous: 000102030405 */ /* [mM]F (FDDI) */ @@ -1689,6 +1781,8 @@ qualifier: * %pB output the name of a backtrace symbol with its offset * %pR output the address range in a struct resource with decoded flags * %pr output the address range in a struct resource with raw flags + * %pb output the bitmap with field width as the number of bits + * %pbl output the bitmap as range list with field width as the number of bits * %pM output a 6-byte MAC address with colons * %pMR output a 6-byte MAC address with colons in reversed order * %pMF output a 6-byte MAC address with dashes From f1bbc032e45106400905ebb47550983af4690b0b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:36:57 -0800 Subject: [PATCH 021/108] cpumask, nodemask: implement cpumask/nodemask_pr_args() printf family of functions can now format bitmaps using '%*pb[l]' and all cpumask and nodemask formatting will be converted to use it. To ease printing these masks with '%*pb[l]' which require two params - the number of bits and the actual bitmap, this patch implement cpumask_pr_args() and nodemask_pr_args() which can be used to provide arguments for '%*pb[l]' Signed-off-by: Tejun Heo Cc: Rusty Russell Cc: "David S. Miller" Cc: "James E.J. Bottomley" Cc: "John W. Linville" Cc: "Paul E. McKenney" Cc: Benjamin Herrenschmidt Cc: Chris Metcalf Cc: Chris Zankel Cc: Christoph Lameter Cc: Dmitry Torokhov Cc: Fenghua Yu Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Li Zefan Cc: Max Filippov Cc: Mike Travis Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Russell King Cc: Steffen Klassert Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tony Luck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 8 ++++++++ include/linux/nodemask.h | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index ee9acb0ce542..a9b3d00915a0 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -22,6 +22,14 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; */ #define cpumask_bits(maskp) ((maskp)->bits) +/** + * cpumask_pr_args - printf args to output a cpumask + * @maskp: cpumask to be printed + * + * Can be used to provide arguments for '%*pb[l]' when printing a cpumask. + */ +#define cpumask_pr_args(maskp) nr_cpu_ids, cpumask_bits(maskp) + #if NR_CPUS == 1 #define nr_cpu_ids 1 #else diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 21cef483dc1b..10f8e556ba07 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -98,6 +98,14 @@ typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; extern nodemask_t _unused_nodemask_arg_; +/** + * nodemask_pr_args - printf args to output a nodemask + * @maskp: nodemask to be printed + * + * Can be used to provide arguments for '%*pb[l]' when printing a nodemask. + */ +#define nodemask_pr_args(maskp) MAX_NUMNODES, (maskp)->bits + /* * The inline keyword gives the compiler room to decide to inline, or * not inline a function as it sees best. However, as these functions From 4a0792b0e7a6f0f7c49628fb1ae29b2643d4eff3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:00 -0800 Subject: [PATCH 022/108] bitmap: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/bitmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/bitmap.c b/lib/bitmap.c index e85040ba1f22..088adbdcbad9 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -541,8 +541,8 @@ int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int n = 0; if (len > 1) { - n = list ? bitmap_scnlistprintf(buf, len, maskp, nmaskbits) : - bitmap_scnprintf(buf, len, maskp, nmaskbits); + n = list ? scnprintf(buf, len, "%*pbl", nmaskbits, maskp) : + scnprintf(buf, len, "%*pb", nmaskbits, maskp); buf[n++] = '\n'; buf[n] = '\0'; } From 729d8e093c3ec5c662965c97accd882a75b0f93a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:03 -0800 Subject: [PATCH 023/108] mips: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/netlogic/common/smp.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/mips/netlogic/common/smp.c b/arch/mips/netlogic/common/smp.c index 4fde7ac76cc9..e743bdd6e20c 100644 --- a/arch/mips/netlogic/common/smp.c +++ b/arch/mips/netlogic/common/smp.c @@ -162,7 +162,6 @@ void __init nlm_smp_setup(void) unsigned int boot_cpu; int num_cpus, i, ncore, node; volatile u32 *cpu_ready = nlm_get_boot_data(BOOT_CPU_READY); - char buf[64]; boot_cpu = hard_smp_processor_id(); cpumask_clear(&phys_cpu_present_mask); @@ -189,10 +188,10 @@ void __init nlm_smp_setup(void) } } - cpumask_scnprintf(buf, ARRAY_SIZE(buf), &phys_cpu_present_mask); - pr_info("Physical CPU mask: %s\n", buf); - cpumask_scnprintf(buf, ARRAY_SIZE(buf), cpu_possible_mask); - pr_info("Possible CPU mask: %s\n", buf); + pr_info("Physical CPU mask: %*pb\n", + cpumask_pr_args(&phys_cpu_present_mask)); + pr_info("Possible CPU mask: %*pb\n", + cpumask_pr_args(cpu_possible_mask)); /* check with the cores we have woken up */ for (ncore = 0, i = 0; i < NLM_NR_NODES; i++) @@ -209,7 +208,6 @@ static int nlm_parse_cpumask(cpumask_t *wakeup_mask) { uint32_t core0_thr_mask, core_thr_mask; int threadmode, i, j; - char buf[64]; core0_thr_mask = 0; for (i = 0; i < NLM_THREADS_PER_CORE; i++) @@ -244,8 +242,7 @@ static int nlm_parse_cpumask(cpumask_t *wakeup_mask) return threadmode; unsupp: - cpumask_scnprintf(buf, ARRAY_SIZE(buf), wakeup_mask); - panic("Unsupported CPU mask %s", buf); + panic("Unsupported CPU mask %*pb", cpumask_pr_args(wakeup_mask)); return 0; } From 0c118b7bd09a1d11731ba80421a34ea1105c5b21 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:06 -0800 Subject: [PATCH 024/108] powerpc: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. * Spurious if (len > 1) test dropped from shared_cpu_map_show(). Signed-off-by: Tejun Heo Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/powerpc/kernel/cacheinfo.c | 15 ++++++--------- arch/powerpc/sysdev/xics/ics-opal.c | 6 ++---- arch/powerpc/sysdev/xics/ics-rtas.c | 7 ++----- 3 files changed, 10 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 40198d50b4c2..ae77b7e59889 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -607,19 +607,16 @@ static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *att { struct cache_index_dir *index; struct cache *cache; - int len; - int n = 0; + int ret; index = kobj_to_cache_index_dir(k); cache = index->cache; - len = PAGE_SIZE - 2; - if (len > 1) { - n = cpumask_scnprintf(buf, len, &cache->shared_cpu_map); - buf[n++] = '\n'; - buf[n] = '\0'; - } - return n; + ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n", + cpumask_pr_args(&cache->shared_cpu_map)); + buf[ret++] = '\n'; + buf[ret] = '\0'; + return ret; } static struct kobj_attribute cache_shared_cpu_map_attr = diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index 4ba554ec8eaf..68c7e5cc98e0 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -131,10 +131,8 @@ static int ics_opal_set_affinity(struct irq_data *d, wanted_server = xics_get_irq_server(d->irq, cpumask, 1); if (wanted_server < 0) { - char cpulist[128]; - cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); - pr_warning("%s: No online cpus in the mask %s for irq %d\n", - __func__, cpulist, d->irq); + pr_warning("%s: No online cpus in the mask %*pb for irq %d\n", + __func__, cpumask_pr_args(cpumask), d->irq); return -1; } server = ics_opal_mangle_server(wanted_server); diff --git a/arch/powerpc/sysdev/xics/ics-rtas.c b/arch/powerpc/sysdev/xics/ics-rtas.c index bc81335b2cbc..0af97deb83f3 100644 --- a/arch/powerpc/sysdev/xics/ics-rtas.c +++ b/arch/powerpc/sysdev/xics/ics-rtas.c @@ -140,11 +140,8 @@ static int ics_rtas_set_affinity(struct irq_data *d, irq_server = xics_get_irq_server(d->irq, cpumask, 1); if (irq_server == -1) { - char cpulist[128]; - cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); - printk(KERN_WARNING - "%s: No online cpus in the mask %s for irq %d\n", - __func__, cpulist, d->irq); + pr_warning("%s: No online cpus in the mask %*pb for irq %d\n", + __func__, cpumask_pr_args(cpumask), d->irq); return -1; } From 839b268033c5d1316b2f8cf49184984e6f335fee Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:09 -0800 Subject: [PATCH 025/108] tile: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Cc: Chris Metcalf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/tile/kernel/hardwall.c | 5 +---- arch/tile/kernel/proc.c | 5 ++--- arch/tile/kernel/setup.c | 13 +++++-------- arch/tile/mm/homecache.c | 12 +++++------- arch/tile/mm/init.c | 18 +++++++----------- drivers/net/ethernet/tile/tilegx.c | 5 ++--- drivers/net/ethernet/tile/tilepro.c | 5 ++--- 7 files changed, 24 insertions(+), 39 deletions(-) diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index c4646bb99342..2fd1694ac1d0 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -909,11 +909,8 @@ static void hardwall_destroy(struct hardwall_info *info) static int hardwall_proc_show(struct seq_file *sf, void *v) { struct hardwall_info *info = sf->private; - char buf[256]; - int rc = cpulist_scnprintf(buf, sizeof(buf), &info->cpumask); - buf[rc++] = '\n'; - seq_write(sf, buf, rc); + seq_printf(sf, "%*pbl\n", cpumask_pr_args(&info->cpumask)); return 0; } diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c index 6829a9508649..7983e9868df6 100644 --- a/arch/tile/kernel/proc.c +++ b/arch/tile/kernel/proc.c @@ -45,10 +45,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) int n = ptr_to_cpu(v); if (n == 0) { - char buf[NR_CPUS*5]; - cpulist_scnprintf(buf, sizeof(buf), cpu_online_mask); seq_printf(m, "cpu count\t: %d\n", num_online_cpus()); - seq_printf(m, "cpu list\t: %s\n", buf); + seq_printf(m, "cpu list\t: %*pbl\n", + cpumask_pr_args(cpu_online_mask)); seq_printf(m, "model name\t: %s\n", chip_model); seq_printf(m, "flags\t\t:\n"); /* nothing for now */ seq_printf(m, "cpu MHz\t\t: %llu.%06llu\n", diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 864eea69556d..f1f579914952 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -215,12 +215,11 @@ early_param("mem", setup_mem); /* compatibility with x86 */ static int __init setup_isolnodes(char *str) { - char buf[MAX_NUMNODES * 5]; if (str == NULL || nodelist_parse(str, isolnodes) != 0) return -EINVAL; - nodelist_scnprintf(buf, sizeof(buf), isolnodes); - pr_info("Set isolnodes value to '%s'\n", buf); + pr_info("Set isolnodes value to '%*pbl'\n", + nodemask_pr_args(&isolnodes)); return 0; } early_param("isolnodes", setup_isolnodes); @@ -1315,11 +1314,9 @@ early_param("disabled_cpus", disabled_cpus); void __init print_disabled_cpus(void) { - if (!cpumask_empty(&disabled_map)) { - char buf[100]; - cpulist_scnprintf(buf, sizeof(buf), &disabled_map); - pr_info("CPUs not available for Linux: %s\n", buf); - } + if (!cpumask_empty(&disabled_map)) + pr_info("CPUs not available for Linux: %*pbl\n", + cpumask_pr_args(&disabled_map)); } static void __init setup_cpu_maps(void) diff --git a/arch/tile/mm/homecache.c b/arch/tile/mm/homecache.c index 0029b3fb651b..40ca30a9fee3 100644 --- a/arch/tile/mm/homecache.c +++ b/arch/tile/mm/homecache.c @@ -115,7 +115,6 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, struct cpumask cache_cpumask_copy, tlb_cpumask_copy; struct cpumask *cache_cpumask, *tlb_cpumask; HV_PhysAddr cache_pa; - char cache_buf[NR_CPUS*5], tlb_buf[NR_CPUS*5]; mb(); /* provided just to simplify "magic hypervisor" mode */ @@ -149,13 +148,12 @@ void flush_remote(unsigned long cache_pfn, unsigned long cache_control, asids, asidcount); if (rc == 0) return; - cpumask_scnprintf(cache_buf, sizeof(cache_buf), &cache_cpumask_copy); - cpumask_scnprintf(tlb_buf, sizeof(tlb_buf), &tlb_cpumask_copy); - pr_err("hv_flush_remote(%#llx, %#lx, %p [%s], %#lx, %#lx, %#lx, %p [%s], %p, %d) = %d\n", - cache_pa, cache_control, cache_cpumask, cache_buf, - (unsigned long)tlb_va, tlb_length, tlb_pgsize, - tlb_cpumask, tlb_buf, asids, asidcount, rc); + pr_err("hv_flush_remote(%#llx, %#lx, %p [%*pb], %#lx, %#lx, %#lx, %p [%*pb], %p, %d) = %d\n", + cache_pa, cache_control, cache_cpumask, + cpumask_pr_args(&cache_cpumask_copy), + (unsigned long)tlb_va, tlb_length, tlb_pgsize, tlb_cpumask, + cpumask_pr_args(&tlb_cpumask_copy), asids, asidcount, rc); panic("Unsafe to continue."); } diff --git a/arch/tile/mm/init.c b/arch/tile/mm/init.c index be240cc4978d..ace32d7d3864 100644 --- a/arch/tile/mm/init.c +++ b/arch/tile/mm/init.c @@ -353,15 +353,13 @@ static int __init setup_ktext(char *str) /* Neighborhood ktext pages on specified mask */ else if (cpulist_parse(str, &ktext_mask) == 0) { - char buf[NR_CPUS * 5]; - cpulist_scnprintf(buf, sizeof(buf), &ktext_mask); if (cpumask_weight(&ktext_mask) > 1) { ktext_small = 1; - pr_info("ktext: using caching neighborhood %s with small pages\n", - buf); + pr_info("ktext: using caching neighborhood %*pbl with small pages\n", + cpumask_pr_args(&ktext_mask)); } else { - pr_info("ktext: caching on cpu %s with one huge page\n", - buf); + pr_info("ktext: caching on cpu %*pbl with one huge page\n", + cpumask_pr_args(&ktext_mask)); } } @@ -492,11 +490,9 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) struct cpumask bad; cpumask_andnot(&bad, &ktext_mask, cpu_possible_mask); cpumask_and(&ktext_mask, &ktext_mask, cpu_possible_mask); - if (!cpumask_empty(&bad)) { - char buf[NR_CPUS * 5]; - cpulist_scnprintf(buf, sizeof(buf), &bad); - pr_info("ktext: not using unavailable cpus %s\n", buf); - } + if (!cpumask_empty(&bad)) + pr_info("ktext: not using unavailable cpus %*pbl\n", + cpumask_pr_args(&bad)); if (cpumask_empty(&ktext_mask)) { pr_warn("ktext: no valid cpus; caching on %d\n", smp_processor_id()); diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c index 049747f558c9..bea8cd2bb56c 100644 --- a/drivers/net/ethernet/tile/tilegx.c +++ b/drivers/net/ethernet/tile/tilegx.c @@ -292,7 +292,6 @@ static inline int mpipe_instance(struct net_device *dev) */ static bool network_cpus_init(void) { - char buf[1024]; int rc; if (network_cpus_string == NULL) @@ -314,8 +313,8 @@ static bool network_cpus_init(void) return false; } - cpulist_scnprintf(buf, sizeof(buf), &network_cpus_map); - pr_info("Linux network CPUs: %s\n", buf); + pr_info("Linux network CPUs: %*pbl\n", + cpumask_pr_args(&network_cpus_map)); return true; } diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c index fb12d31cfcf6..3d8f60d9643e 100644 --- a/drivers/net/ethernet/tile/tilepro.c +++ b/drivers/net/ethernet/tile/tilepro.c @@ -2410,9 +2410,8 @@ static int __init network_cpus_setup(char *str) if (cpumask_empty(&network_cpus_map)) { pr_warn("Ignoring network_cpus='%s'\n", str); } else { - char buf[1024]; - cpulist_scnprintf(buf, sizeof(buf), &network_cpus_map); - pr_info("Linux network CPUs: %s\n", buf); + pr_info("Linux network CPUs: %*pbl\n", + cpumask_pr_args(&network_cpus_map)); network_cpus_used = true; } } From bf58b4879c33b3475a33740562ebf6583f531d4a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:12 -0800 Subject: [PATCH 026/108] x86: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. * Unnecessary buffer size calculation and condition on the lenght removed from intel_cacheinfo.c::show_shared_cpu_map_func(). * uv_nmi_nr_cpus_pr() got overly smart and implemented "..." abbreviation if the output stretched over the predefined 1024 byte buffer. Replaced with plain printk. Signed-off-by: Tejun Heo Cc: Mike Travis Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/intel_cacheinfo.c | 24 +++++++++++------------- arch/x86/mm/numa.c | 6 ++---- arch/x86/platform/uv/uv_nmi.c | 25 +++++++------------------ 3 files changed, 20 insertions(+), 35 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index c7035073dfc1..659643376dbf 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -952,20 +952,18 @@ static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf, static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, int type, char *buf) { - ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf; - int n = 0; + const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); + int ret; - if (len > 1) { - const struct cpumask *mask; - - mask = to_cpumask(this_leaf->shared_cpu_map); - n = type ? - cpulist_scnprintf(buf, len-2, mask) : - cpumask_scnprintf(buf, len-2, mask); - buf[n++] = '\n'; - buf[n] = '\0'; - } - return n; + if (type) + ret = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", + cpumask_pr_args(mask)); + else + ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb", + cpumask_pr_args(mask)); + buf[ret++] = '\n'; + buf[ret] = '\0'; + return ret; } static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf, diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 1a883705a12a..cd4785bbacb9 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -794,7 +794,6 @@ int early_cpu_to_node(int cpu) void debug_cpumask_set_cpu(int cpu, int node, bool enable) { struct cpumask *mask; - char buf[64]; if (node == NUMA_NO_NODE) { /* early_cpu_to_node() already emits a warning and trace */ @@ -812,10 +811,9 @@ void debug_cpumask_set_cpu(int cpu, int node, bool enable) else cpumask_clear_cpu(cpu, mask); - cpulist_scnprintf(buf, sizeof(buf), mask); - printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", + printk(KERN_DEBUG "%s cpu %d node %d: mask now %*pbl\n", enable ? "numa_add_cpu" : "numa_remove_cpu", - cpu, node, buf); + cpu, node, cpumask_pr_args(mask)); return; } diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index c6b146e67116..7488cafab955 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -273,20 +273,6 @@ static inline void uv_clear_nmi(int cpu) } } -/* Print non-responding cpus */ -static void uv_nmi_nr_cpus_pr(char *fmt) -{ - static char cpu_list[1024]; - int len = sizeof(cpu_list); - int c = cpumask_weight(uv_nmi_cpu_mask); - int n = cpulist_scnprintf(cpu_list, len, uv_nmi_cpu_mask); - - if (n >= len-1) - strcpy(&cpu_list[len - 6], "...\n"); - - printk(fmt, c, cpu_list); -} - /* Ping non-responding cpus attemping to force them into the NMI handler */ static void uv_nmi_nr_cpus_ping(void) { @@ -371,16 +357,19 @@ static void uv_nmi_wait(int master) break; /* if not all made it in, send IPI NMI to them */ - uv_nmi_nr_cpus_pr(KERN_ALERT - "UV: Sending NMI IPI to %d non-responding CPUs: %s\n"); + pr_alert("UV: Sending NMI IPI to %d non-responding CPUs: %*pbl\n", + cpumask_weight(uv_nmi_cpu_mask), + cpumask_pr_args(uv_nmi_cpu_mask)); + uv_nmi_nr_cpus_ping(); /* if all cpus are in, then done */ if (!uv_nmi_wait_cpus(0)) break; - uv_nmi_nr_cpus_pr(KERN_ALERT - "UV: %d CPUs not in NMI loop: %s\n"); + pr_alert("UV: %d CPUs not in NMI loop: %*pbl\n", + cpumask_weight(uv_nmi_cpu_mask), + cpumask_pr_args(uv_nmi_cpu_mask)); } while (0); pr_alert("UV: %d of %d CPUs in NMI\n", From 90b586c026fee779b0d675c69f8de9111065f7a1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:14 -0800 Subject: [PATCH 027/108] ia64: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Cc: Tony Luck Cc: Fenghua Yu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/topology.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index f295f9abba4b..965ab42fabb0 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -217,14 +217,12 @@ static ssize_t show_number_of_sets(struct cache_info *this_leaf, char *buf) static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf) { - ssize_t len; cpumask_t shared_cpu_map; cpumask_and(&shared_cpu_map, &this_leaf->shared_cpu_map, cpu_online_mask); - len = cpumask_scnprintf(buf, NR_CPUS+1, &shared_cpu_map); - len += sprintf(buf+len, "\n"); - return len; + return scnprintf(buf, PAGE_SIZE, "%*pb\n", + cpumask_pr_args(&shared_cpu_map)); } static ssize_t show_type(struct cache_info *this_leaf, char *buf) From 62518994dd4c7c9a0a634441f012281406887258 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:17 -0800 Subject: [PATCH 028/108] xtensa: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Cc: Chris Zankel Cc: Max Filippov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/xtensa/kernel/setup.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 06370ccea9e9..28fc57ef5b86 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -574,12 +574,9 @@ void machine_power_off(void) static int c_show(struct seq_file *f, void *slot) { - char buf[NR_CPUS * 5]; - - cpulist_scnprintf(buf, sizeof(buf), cpu_online_mask); /* high-level stuff */ seq_printf(f, "CPU count\t: %u\n" - "CPU list\t: %s\n" + "CPU list\t: %*pbl\n" "vendor_id\t: Tensilica\n" "model\t\t: Xtensa " XCHAL_HW_VERSION_NAME "\n" "core ID\t\t: " XCHAL_CORE_ID "\n" @@ -588,7 +585,7 @@ c_show(struct seq_file *f, void *slot) "cpu MHz\t\t: %lu.%02lu\n" "bogomips\t: %lu.%02lu\n", num_online_cpus(), - buf, + cpumask_pr_args(cpu_online_mask), XCHAL_BUILD_UNIQUE_ID, XCHAL_HAVE_BE ? "big" : "little", ccount_freq/1000000, From 660e5ec02d5391305604887f33a844adeaa6220c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:20 -0800 Subject: [PATCH 029/108] arm: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. * Line termination only requires one extra space at the end of the buffer. Use PAGE_SIZE - 1 instead of PAGE_SIZE - 2 when formatting. Signed-off-by: Tejun Heo Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/bus/arm-cci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/bus/arm-cci.c b/drivers/bus/arm-cci.c index 0ce5e2d65a06..84fd66057dad 100644 --- a/drivers/bus/arm-cci.c +++ b/drivers/bus/arm-cci.c @@ -806,8 +806,8 @@ static int cci_pmu_event_init(struct perf_event *event) static ssize_t pmu_attr_cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) { - int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &pmu->cpus); - + int n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", + cpumask_pr_args(&pmu->cpus)); buf[n++] = '\n'; buf[n] = '\0'; return n; From e8e6d97c9bf734c42322dbed0f882ef11bfe7b58 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:23 -0800 Subject: [PATCH 030/108] cpuset: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. * kernel/cpuset.c::cpuset_print_task_mems_allowed() used a static buffer which is protected by a dedicated spinlock. Removed. Signed-off-by: Tejun Heo Cc: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/cpuset.c | 42 +++++++++--------------------------------- 1 file changed, 9 insertions(+), 33 deletions(-) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c54a1dae6c11..1d1fe9361d29 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1707,40 +1707,27 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) { struct cpuset *cs = css_cs(seq_css(sf)); cpuset_filetype_t type = seq_cft(sf)->private; - ssize_t count; - char *buf, *s; int ret = 0; - count = seq_get_buf(sf, &buf); - s = buf; - spin_lock_irq(&callback_lock); switch (type) { case FILE_CPULIST: - s += cpulist_scnprintf(s, count, cs->cpus_allowed); + seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_allowed)); break; case FILE_MEMLIST: - s += nodelist_scnprintf(s, count, cs->mems_allowed); + seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed)); break; case FILE_EFFECTIVE_CPULIST: - s += cpulist_scnprintf(s, count, cs->effective_cpus); + seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_cpus)); break; case FILE_EFFECTIVE_MEMLIST: - s += nodelist_scnprintf(s, count, cs->effective_mems); + seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems)); break; default: ret = -EINVAL; - goto out_unlock; } - if (s < buf + count - 1) { - *s++ = '\n'; - seq_commit(sf, s - buf); - } else { - seq_commit(sf, -1); - } -out_unlock: spin_unlock_irq(&callback_lock); return ret; } @@ -2610,8 +2597,6 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed); } -#define CPUSET_NODELIST_LEN (256) - /** * cpuset_print_task_mems_allowed - prints task's cpuset and mems_allowed * @tsk: pointer to task_struct of some task. @@ -2621,23 +2606,16 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1, */ void cpuset_print_task_mems_allowed(struct task_struct *tsk) { - /* Statically allocated to prevent using excess stack. */ - static char cpuset_nodelist[CPUSET_NODELIST_LEN]; - static DEFINE_SPINLOCK(cpuset_buffer_lock); struct cgroup *cgrp; - spin_lock(&cpuset_buffer_lock); rcu_read_lock(); cgrp = task_cs(tsk)->css.cgroup; - nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN, - tsk->mems_allowed); pr_info("%s cpuset=", tsk->comm); pr_cont_cgroup_name(cgrp); - pr_cont(" mems_allowed=%s\n", cpuset_nodelist); + pr_cont(" mems_allowed=%*pbl\n", nodemask_pr_args(&tsk->mems_allowed)); rcu_read_unlock(); - spin_unlock(&cpuset_buffer_lock); } /* @@ -2715,10 +2693,8 @@ out: /* Display task mems_allowed in /proc//status file. */ void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task) { - seq_puts(m, "Mems_allowed:\t"); - seq_nodemask(m, &task->mems_allowed); - seq_puts(m, "\n"); - seq_puts(m, "Mems_allowed_list:\t"); - seq_nodemask_list(m, &task->mems_allowed); - seq_puts(m, "\n"); + seq_printf(m, "Mems_allowed:\t%*pb\n", + nodemask_pr_args(&task->mems_allowed)); + seq_printf(m, "Mems_allowed_list:\t%*pbl\n", + nodemask_pr_args(&task->mems_allowed)); } From ad853b48cb4650285e8544eebbba5bbd9274ee15 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:25 -0800 Subject: [PATCH 031/108] rcu: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/rcu/tree_plugin.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 2e850a51bb8f..0d7bbe3095ad 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -49,7 +49,6 @@ DEFINE_PER_CPU(char, rcu_cpu_has_work); static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */ static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */ static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ -static char __initdata nocb_buf[NR_CPUS * 5]; #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ /* @@ -2386,8 +2385,8 @@ void __init rcu_init_nohz(void) cpumask_and(rcu_nocb_mask, cpu_possible_mask, rcu_nocb_mask); } - cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); - pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf); + pr_info("\tOffload RCU callbacks from CPUs: %*pbl.\n", + cpumask_pr_args(rcu_nocb_mask)); if (rcu_nocb_poll) pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); From 333470ee46b6851477dc9392eb71ba8ffa4f3387 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:28 -0800 Subject: [PATCH 032/108] sched: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/sched/core.c | 10 ++++------ kernel/sched/stats.c | 11 ++--------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1f37fe7f77a4..13049aac05a6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5462,9 +5462,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, struct cpumask *groupmask) { struct sched_group *group = sd->groups; - char str[256]; - cpulist_scnprintf(str, sizeof(str), sched_domain_span(sd)); cpumask_clear(groupmask); printk(KERN_DEBUG "%*s domain %d: ", level, "", level); @@ -5477,7 +5475,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, return -1; } - printk(KERN_CONT "span %s level %s\n", str, sd->name); + printk(KERN_CONT "span %*pbl level %s\n", + cpumask_pr_args(sched_domain_span(sd)), sd->name); if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) { printk(KERN_ERR "ERROR: domain->span does not contain " @@ -5522,9 +5521,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, cpumask_or(groupmask, groupmask, sched_group_cpus(group)); - cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); - - printk(KERN_CONT " %s", str); + printk(KERN_CONT " %*pbl", + cpumask_pr_args(sched_group_cpus(group))); if (group->sgc->capacity != SCHED_CAPACITY_SCALE) { printk(KERN_CONT " (cpu_capacity = %d)", group->sgc->capacity); diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index a476bea17fbc..87e2c9f0c33e 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -15,11 +15,6 @@ static int show_schedstat(struct seq_file *seq, void *v) { int cpu; - int mask_len = DIV_ROUND_UP(NR_CPUS, 32) * 9; - char *mask_str = kmalloc(mask_len, GFP_KERNEL); - - if (mask_str == NULL) - return -ENOMEM; if (v == (void *)1) { seq_printf(seq, "version %d\n", SCHEDSTAT_VERSION); @@ -50,9 +45,8 @@ static int show_schedstat(struct seq_file *seq, void *v) for_each_domain(cpu, sd) { enum cpu_idle_type itype; - cpumask_scnprintf(mask_str, mask_len, - sched_domain_span(sd)); - seq_printf(seq, "domain%d %s", dcount++, mask_str); + seq_printf(seq, "domain%d %*pb", dcount++, + cpumask_pr_args(sched_domain_span(sd))); for (itype = CPU_IDLE; itype < CPU_MAX_IDLE_TYPES; itype++) { seq_printf(seq, " %u %u %u %u %u %u %u %u", @@ -76,7 +70,6 @@ static int show_schedstat(struct seq_file *seq, void *v) rcu_read_unlock(); #endif } - kfree(mask_str); return 0; } From ffda22c1f316ff9c12f5911ac7a18ec3a49c9d02 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:31 -0800 Subject: [PATCH 033/108] time: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/time/tick-sched.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 1363d58f07e9..a4c4edac4528 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -326,13 +326,6 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb, return NOTIFY_OK; } -/* - * Worst case string length in chunks of CPU range seems 2 steps - * separations: 0,2,4,6,... - * This is NR_CPUS + sizeof('\0') - */ -static char __initdata nohz_full_buf[NR_CPUS + 1]; - static int tick_nohz_init_all(void) { int err = -1; @@ -393,8 +386,8 @@ void __init tick_nohz_init(void) context_tracking_cpu_set(cpu); cpu_notifier(tick_nohz_cpu_down_callback, 0); - cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask); - pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); + pr_info("NO_HZ: Full dynticks CPUs: %*pbl.\n", + cpumask_pr_args(tick_nohz_full_mask)); } #endif From 807de073bb5c92e6e19f2c3b7075b51e9d5f6a93 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:34 -0800 Subject: [PATCH 034/108] percpu: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/percpu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index d39e2f4e335c..73c97a5f4495 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1528,7 +1528,6 @@ static void pcpu_dump_alloc_info(const char *lvl, int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, void *base_addr) { - static char cpus_buf[4096] __initdata; static int smap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata; static int dmap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata; size_t dyn_size = ai->dyn_size; @@ -1541,12 +1540,11 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, int *unit_map; int group, unit, i; - cpumask_scnprintf(cpus_buf, sizeof(cpus_buf), cpu_possible_mask); - #define PCPU_SETUP_BUG_ON(cond) do { \ if (unlikely(cond)) { \ pr_emerg("PERCPU: failed to initialize, %s", #cond); \ - pr_emerg("PERCPU: cpu_possible_mask=%s\n", cpus_buf); \ + pr_emerg("PERCPU: cpu_possible_mask=%*pb\n", \ + cpumask_pr_args(cpu_possible_mask)); \ pcpu_dump_alloc_info(KERN_EMERG, ai); \ BUG(); \ } \ From dfbcbf42dd07b649bb02b1558e2c7150c035b4dc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:37 -0800 Subject: [PATCH 035/108] workqueue: use %*pb[l] to format bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/workqueue.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index beeeac9e0e3e..f28849394791 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3083,10 +3083,9 @@ static ssize_t wq_cpumask_show(struct device *dev, int written; mutex_lock(&wq->mutex); - written = cpumask_scnprintf(buf, PAGE_SIZE, wq->unbound_attrs->cpumask); + written = scnprintf(buf, PAGE_SIZE, "%*pb\n", + cpumask_pr_args(wq->unbound_attrs->cpumask)); mutex_unlock(&wq->mutex); - - written += scnprintf(buf + written, PAGE_SIZE - written, "\n"); return written; } From 1a40243bae6fa0cc09cee40d51e258c725d897e6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:39 -0800 Subject: [PATCH 036/108] tracing: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Cc: Steven Rostedt Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/trace/trace.c | 6 +++--- kernel/trace/trace_seq.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 77b8dc528006..62c6506d663f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3353,12 +3353,12 @@ tracing_cpumask_read(struct file *filp, char __user *ubuf, mutex_lock(&tracing_cpumask_update_lock); - len = cpumask_scnprintf(mask_str, count, tr->tracing_cpumask); - if (count - len < 2) { + len = snprintf(mask_str, count, "%*pb\n", + cpumask_pr_args(tr->tracing_cpumask)); + if (len >= count) { count = -EINVAL; goto out_err; } - len += sprintf(mask_str + len, "\n"); count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1); out_err: diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c index f8b45d8792f9..e694c9f9efa4 100644 --- a/kernel/trace/trace_seq.c +++ b/kernel/trace/trace_seq.c @@ -120,7 +120,7 @@ void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp, __trace_seq_init(s); - seq_buf_bitmask(&s->seq, maskp, nmaskbits); + seq_buf_printf(&s->seq, "%*pb", nmaskbits, maskp); if (unlikely(seq_buf_has_overflowed(&s->seq))) { s->seq.len = save_len; From f09068276c5cbe2dd76679b2c9fcc91e12eb7ebe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:42 -0800 Subject: [PATCH 037/108] net: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/core/net-sysfs.c | 28 +++++++--------------------- net/core/sysctl_net_core.c | 2 +- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 999341244434..f2aa73bfb0e4 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -614,8 +614,7 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue, { struct rps_map *map; cpumask_var_t mask; - size_t len = 0; - int i; + int i, len; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; @@ -626,17 +625,11 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue, for (i = 0; i < map->len; i++) cpumask_set_cpu(map->cpus[i], mask); - len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); - if (PAGE_SIZE - len < 3) { - rcu_read_unlock(); - free_cpumask_var(mask); - return -EINVAL; - } + len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask)); rcu_read_unlock(); - free_cpumask_var(mask); - len += sprintf(buf + len, "\n"); - return len; + + return len < PAGE_SIZE ? len : -EINVAL; } static ssize_t store_rps_map(struct netdev_rx_queue *queue, @@ -1090,8 +1083,7 @@ static ssize_t show_xps_map(struct netdev_queue *queue, struct xps_dev_maps *dev_maps; cpumask_var_t mask; unsigned long index; - size_t len = 0; - int i; + int i, len; if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; @@ -1117,15 +1109,9 @@ static ssize_t show_xps_map(struct netdev_queue *queue, } rcu_read_unlock(); - len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask); - if (PAGE_SIZE - len < 3) { - free_cpumask_var(mask); - return -EINVAL; - } - + len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask)); free_cpumask_var(mask); - len += sprintf(buf + len, "\n"); - return len; + return len < PAGE_SIZE ? len : -EINVAL; } static ssize_t store_xps_map(struct netdev_queue *queue, diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index eaa51ddf2368..433424804284 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -155,7 +155,7 @@ write_unlock: rcu_read_unlock(); len = min(sizeof(kbuf) - 1, *lenp); - len = cpumask_scnprintf(kbuf, len, mask); + len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask)); if (!len) { *lenp = 0; goto done; From 898600380ccdc5de24e3a1fea2537df41e43fa87 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:45 -0800 Subject: [PATCH 038/108] wireless: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Cc: "John W. Linville" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .../net/wireless/ath/ath9k/htc_drv_debug.c | 23 +++++------------- drivers/net/wireless/ath/carl9170/debug.c | 24 +++++-------------- 2 files changed, 12 insertions(+), 35 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_debug.c b/drivers/net/wireless/ath/ath9k/htc_drv_debug.c index 8cef1edcc621..dc79afd7e151 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_debug.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_debug.c @@ -291,26 +291,15 @@ static ssize_t read_file_slot(struct file *file, char __user *user_buf, { struct ath9k_htc_priv *priv = file->private_data; char buf[512]; - unsigned int len = 0; + unsigned int len; spin_lock_bh(&priv->tx.tx_lock); - - len += scnprintf(buf + len, sizeof(buf) - len, "TX slot bitmap : "); - - len += bitmap_scnprintf(buf + len, sizeof(buf) - len, - priv->tx.tx_slot, MAX_TX_BUF_NUM); - - len += scnprintf(buf + len, sizeof(buf) - len, "\n"); - - len += scnprintf(buf + len, sizeof(buf) - len, - "Used slots : %d\n", - bitmap_weight(priv->tx.tx_slot, MAX_TX_BUF_NUM)); - + len = scnprintf(buf, sizeof(buf), + "TX slot bitmap : %*pb\n" + "Used slots : %d\n", + MAX_TX_BUF_NUM, priv->tx.tx_slot, + bitmap_weight(priv->tx.tx_slot, MAX_TX_BUF_NUM)); spin_unlock_bh(&priv->tx.tx_lock); - - if (len > sizeof(buf)) - len = sizeof(buf); - return simple_read_from_buffer(user_buf, count, ppos, buf, len); } diff --git a/drivers/net/wireless/ath/carl9170/debug.c b/drivers/net/wireless/ath/carl9170/debug.c index 1c0af9cd9a85..6808db433283 100644 --- a/drivers/net/wireless/ath/carl9170/debug.c +++ b/drivers/net/wireless/ath/carl9170/debug.c @@ -214,14 +214,10 @@ DEBUGFS_DECLARE_RO_FILE(name, _read_bufsize) static char *carl9170_debugfs_mem_usage_read(struct ar9170 *ar, char *buf, size_t bufsize, ssize_t *len) { - ADD(buf, *len, bufsize, "jar: ["); - spin_lock_bh(&ar->mem_lock); - *len += bitmap_scnprintf(&buf[*len], bufsize - *len, - ar->mem_bitmap, ar->fw.mem_blocks); - - ADD(buf, *len, bufsize, "]\n"); + ADD(buf, *len, bufsize, "jar: [%*pb]\n", + ar->fw.mem_blocks, ar->mem_bitmap); ADD(buf, *len, bufsize, "cookies: used:%3d / total:%3d, allocs:%d\n", bitmap_weight(ar->mem_bitmap, ar->fw.mem_blocks), @@ -316,17 +312,13 @@ static char *carl9170_debugfs_ampdu_state_read(struct ar9170 *ar, char *buf, cnt, iter->tid, iter->bsn, iter->snx, iter->hsn, iter->max, iter->state, iter->counter); - ADD(buf, *len, bufsize, "\tWindow: ["); - - *len += bitmap_scnprintf(&buf[*len], bufsize - *len, - iter->bitmap, CARL9170_BAW_BITS); + ADD(buf, *len, bufsize, "\tWindow: [%*pb,W]\n", + CARL9170_BAW_BITS, iter->bitmap); #define BM_STR_OFF(offset) \ ((CARL9170_BAW_BITS - (offset) - 1) / 4 + \ (CARL9170_BAW_BITS - (offset) - 1) / 32 + 1) - ADD(buf, *len, bufsize, ",W]\n"); - offset = BM_STR_OFF(0); ADD(buf, *len, bufsize, "\tBase Seq: %*s\n", offset, "T"); @@ -448,12 +440,8 @@ static char *carl9170_debugfs_vif_dump_read(struct ar9170 *ar, char *buf, ADD(buf, *len, bufsize, "registered VIFs:%d \\ %d\n", ar->vifs, ar->fw.vif_num); - ADD(buf, *len, bufsize, "VIF bitmap: ["); - - *len += bitmap_scnprintf(&buf[*len], bufsize - *len, - &ar->vif_bitmap, ar->fw.vif_num); - - ADD(buf, *len, bufsize, "]\n"); + ADD(buf, *len, bufsize, "VIF bitmap: [%*pb]\n", + ar->fw.vif_num, &ar->vif_bitmap); rcu_read_lock(); list_for_each_entry_rcu(iter, &ar->vif_list, list) { From 0b480037e8eb7928bdb760717693485bb3b728e4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:48 -0800 Subject: [PATCH 039/108] input: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. * Line termination only requires one extra space at the end of the buffer. Use PAGE_SIZE - 1 instead of PAGE_SIZE - 2 when formatting. Signed-off-by: Tejun Heo Cc: Dmitry Torokhov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/input/keyboard/atkbd.c | 4 ++-- drivers/input/keyboard/gpio_keys.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index e27a25892db4..387c51f4b4e4 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -1399,8 +1399,8 @@ static ssize_t atkbd_set_extra(struct atkbd *atkbd, const char *buf, size_t coun static ssize_t atkbd_show_force_release(struct atkbd *atkbd, char *buf) { - size_t len = bitmap_scnlistprintf(buf, PAGE_SIZE - 2, - atkbd->force_release_mask, ATKBD_KEYMAP_SIZE); + size_t len = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", + ATKBD_KEYMAP_SIZE, atkbd->force_release_mask); buf[len++] = '\n'; buf[len] = '\0'; diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 883d6aed5b9a..ddf4045de084 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -190,7 +190,7 @@ static ssize_t gpio_keys_attr_show_helper(struct gpio_keys_drvdata *ddata, __set_bit(bdata->button->code, bits); } - ret = bitmap_scnlistprintf(buf, PAGE_SIZE - 2, bits, n_events); + ret = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", n_events, bits); buf[ret++] = '\n'; buf[ret] = '\0'; From c7badc90178b89c49e3852a002024d26cef5f070 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:51 -0800 Subject: [PATCH 040/108] scsi: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. * map_show()'s return value is too high by one and the function could modify beyond the end of the buffer when the formatted text is long enough. Signed-off-by: Tejun Heo Cc: "James E.J. Bottomley" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/scsi/scsi_debug.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index 113232135d27..1f8e2dc9c616 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -4658,10 +4658,10 @@ static ssize_t map_show(struct device_driver *ddp, char *buf) return scnprintf(buf, PAGE_SIZE, "0-%u\n", sdebug_store_sectors); - count = bitmap_scnlistprintf(buf, PAGE_SIZE, map_storep, map_size); - + count = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", + (int)map_size, map_storep); buf[count++] = '\n'; - buf[count++] = 0; + buf[count] = '\0'; return count; } From 125918dbd836d19b5dccc2690388b29e575f91a4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:53 -0800 Subject: [PATCH 041/108] usb: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. * drivers/uwb/drp.c::uwb_drp_handle_alien_drp() was formatting mas.bm into a buffer but never used it. Removed. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/usb/host/whci/debug.c | 7 ++----- drivers/usb/wusbcore/reservation.c | 5 ++--- drivers/usb/wusbcore/wa-rpipe.c | 5 ++--- drivers/usb/wusbcore/wusbhc.c | 7 ++----- drivers/uwb/drp.c | 2 -- drivers/uwb/uwb-debug.c | 14 ++++---------- 6 files changed, 12 insertions(+), 28 deletions(-) diff --git a/drivers/usb/host/whci/debug.c b/drivers/usb/host/whci/debug.c index ba61dae9e4d2..774b89d28fae 100644 --- a/drivers/usb/host/whci/debug.c +++ b/drivers/usb/host/whci/debug.c @@ -86,17 +86,14 @@ static void qset_print(struct seq_file *s, struct whc_qset *qset) static int di_print(struct seq_file *s, void *p) { struct whc *whc = s->private; - char buf[72]; int d; for (d = 0; d < whc->n_devices; d++) { struct di_buf_entry *di = &whc->di_buf[d]; - bitmap_scnprintf(buf, sizeof(buf), - (unsigned long *)di->availability_info, UWB_NUM_MAS); - seq_printf(s, "DI[%d]\n", d); - seq_printf(s, " availability: %s\n", buf); + seq_printf(s, " availability: %*pb\n", + UWB_NUM_MAS, (unsigned long *)di->availability_info); seq_printf(s, " %c%c key idx: %d dev addr: %d\n", (di->addr_sec_info & WHC_DI_SECURE) ? 'S' : ' ', (di->addr_sec_info & WHC_DI_DISABLE) ? 'D' : ' ', diff --git a/drivers/usb/wusbcore/reservation.c b/drivers/usb/wusbcore/reservation.c index d5efd0f07d2b..7b1b2e2fb673 100644 --- a/drivers/usb/wusbcore/reservation.c +++ b/drivers/usb/wusbcore/reservation.c @@ -49,14 +49,13 @@ static void wusbhc_rsv_complete_cb(struct uwb_rsv *rsv) struct wusbhc *wusbhc = rsv->pal_priv; struct device *dev = wusbhc->dev; struct uwb_mas_bm mas; - char buf[72]; dev_dbg(dev, "%s: state = %d\n", __func__, rsv->state); switch (rsv->state) { case UWB_RSV_STATE_O_ESTABLISHED: uwb_rsv_get_usable_mas(rsv, &mas); - bitmap_scnprintf(buf, sizeof(buf), mas.bm, UWB_NUM_MAS); - dev_dbg(dev, "established reservation: %s\n", buf); + dev_dbg(dev, "established reservation: %*pb\n", + UWB_NUM_MAS, mas.bm); wusbhc_bwa_set(wusbhc, rsv->stream, &mas); break; case UWB_RSV_STATE_NONE: diff --git a/drivers/usb/wusbcore/wa-rpipe.c b/drivers/usb/wusbcore/wa-rpipe.c index a80c5d284b59..c7ecdbe19a32 100644 --- a/drivers/usb/wusbcore/wa-rpipe.c +++ b/drivers/usb/wusbcore/wa-rpipe.c @@ -496,10 +496,9 @@ void wa_rpipes_destroy(struct wahc *wa) struct device *dev = &wa->usb_iface->dev; if (!bitmap_empty(wa->rpipe_bm, wa->rpipes)) { - char buf[256]; WARN_ON(1); - bitmap_scnprintf(buf, sizeof(buf), wa->rpipe_bm, wa->rpipes); - dev_err(dev, "BUG: pipes not released on exit: %s\n", buf); + dev_err(dev, "BUG: pipes not released on exit: %*pb\n", + wa->rpipes, wa->rpipe_bm); } kfree(wa->rpipe_bm); } diff --git a/drivers/usb/wusbcore/wusbhc.c b/drivers/usb/wusbcore/wusbhc.c index 3e1ba51d1a43..94f401ab859f 100644 --- a/drivers/usb/wusbcore/wusbhc.c +++ b/drivers/usb/wusbcore/wusbhc.c @@ -496,11 +496,8 @@ static void __exit wusbcore_exit(void) { clear_bit(0, wusb_cluster_id_table); if (!bitmap_empty(wusb_cluster_id_table, CLUSTER_IDS)) { - char buf[256]; - bitmap_scnprintf(buf, sizeof(buf), wusb_cluster_id_table, - CLUSTER_IDS); - printk(KERN_ERR "BUG: WUSB Cluster IDs not released " - "on exit: %s\n", buf); + printk(KERN_ERR "BUG: WUSB Cluster IDs not released on exit: %*pb\n", + CLUSTER_IDS, wusb_cluster_id_table); WARN_ON(1); } usb_unregister_notify(&wusb_usb_notifier); diff --git a/drivers/uwb/drp.c b/drivers/uwb/drp.c index 05b7bd762254..8fc1b787dced 100644 --- a/drivers/uwb/drp.c +++ b/drivers/uwb/drp.c @@ -619,11 +619,9 @@ static void uwb_drp_handle_alien_drp(struct uwb_rc *rc, struct uwb_ie_drp *drp_i struct device *dev = &rc->uwb_dev.dev; struct uwb_mas_bm mas; struct uwb_cnflt_alien *cnflt; - char buf[72]; unsigned long delay_us = UWB_MAS_LENGTH_US * UWB_MAS_PER_ZONE; uwb_drp_ie_to_bm(&mas, drp_ie); - bitmap_scnprintf(buf, sizeof(buf), mas.bm, UWB_NUM_MAS); list_for_each_entry(cnflt, &rc->cnflt_alien_list, rc_node) { if (bitmap_equal(cnflt->mas.bm, mas.bm, UWB_NUM_MAS)) { diff --git a/drivers/uwb/uwb-debug.c b/drivers/uwb/uwb-debug.c index 6ec45beb7af5..0b1e5a9449b5 100644 --- a/drivers/uwb/uwb-debug.c +++ b/drivers/uwb/uwb-debug.c @@ -217,7 +217,6 @@ static int reservations_print(struct seq_file *s, void *p) struct uwb_dev_addr devaddr; char owner[UWB_ADDR_STRSIZE], target[UWB_ADDR_STRSIZE]; bool is_owner; - char buf[72]; uwb_dev_addr_print(owner, sizeof(owner), &rsv->owner->dev_addr); if (rsv->target.type == UWB_RSV_TARGET_DEV) { @@ -234,8 +233,7 @@ static int reservations_print(struct seq_file *s, void *p) owner, target, uwb_rsv_state_str(rsv->state)); seq_printf(s, " stream: %d type: %s\n", rsv->stream, uwb_rsv_type_str(rsv->type)); - bitmap_scnprintf(buf, sizeof(buf), rsv->mas.bm, UWB_NUM_MAS); - seq_printf(s, " %s\n", buf); + seq_printf(s, " %*pb\n", UWB_NUM_MAS, rsv->mas.bm); } mutex_unlock(&rc->rsvs_mutex); @@ -259,14 +257,10 @@ static const struct file_operations reservations_fops = { static int drp_avail_print(struct seq_file *s, void *p) { struct uwb_rc *rc = s->private; - char buf[72]; - bitmap_scnprintf(buf, sizeof(buf), rc->drp_avail.global, UWB_NUM_MAS); - seq_printf(s, "global: %s\n", buf); - bitmap_scnprintf(buf, sizeof(buf), rc->drp_avail.local, UWB_NUM_MAS); - seq_printf(s, "local: %s\n", buf); - bitmap_scnprintf(buf, sizeof(buf), rc->drp_avail.pending, UWB_NUM_MAS); - seq_printf(s, "pending: %s\n", buf); + seq_printf(s, "global: %*pb\n", UWB_NUM_MAS, rc->drp_avail.global); + seq_printf(s, "local: %*pb\n", UWB_NUM_MAS, rc->drp_avail.local); + seq_printf(s, "pending: %*pb\n", UWB_NUM_MAS, rc->drp_avail.pending); return 0; } From f799b1a7fbd24cf0dc4fa33818c172bd70571bd1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:56 -0800 Subject: [PATCH 042/108] drivers/base: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. * Line termination only requires one extra space at the end of the buffer. Use PAGE_SIZE - 1 instead of PAGE_SIZE - 2 when formatting. Signed-off-by: Tejun Heo Acked-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/cpu.c | 2 +- drivers/base/node.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c index f829a4c71749..f160ea44a86d 100644 --- a/drivers/base/cpu.c +++ b/drivers/base/cpu.c @@ -245,7 +245,7 @@ static ssize_t print_cpus_offline(struct device *dev, if (!alloc_cpumask_var(&offline, GFP_KERNEL)) return -ENOMEM; cpumask_andnot(offline, cpu_possible_mask, cpu_online_mask); - n = cpulist_scnprintf(buf, len, offline); + n = scnprintf(buf, len, "%*pbl", cpumask_pr_args(offline)); free_cpumask_var(offline); /* display offline cpus >= nr_cpu_ids */ diff --git a/drivers/base/node.c b/drivers/base/node.c index a3b82e9c7f20..36fabe43cd44 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -605,7 +605,8 @@ static ssize_t print_nodes_state(enum node_states state, char *buf) { int n; - n = nodelist_scnprintf(buf, PAGE_SIZE-2, node_states[state]); + n = scnprintf(buf, PAGE_SIZE - 1, "%*pbl", + nodemask_pr_args(&node_states[state])); buf[n++] = '\n'; buf[n] = '\0'; return n; From 5024c1d71b988ca5da94e52461626e9930015681 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:37:59 -0800 Subject: [PATCH 043/108] slub: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. * This is an equivalent conversion but the whole function should be converted to use scnprinf famiily of functions rather than performing custom output length predictions in multiple places. Signed-off-by: Tejun Heo Acked-by: Christoph Lameter Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slub.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 06cdb1829dc9..783505ba2052 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4108,20 +4108,16 @@ static int list_locations(struct kmem_cache *s, char *buf, if (num_online_cpus() > 1 && !cpumask_empty(to_cpumask(l->cpus)) && - len < PAGE_SIZE - 60) { - len += sprintf(buf + len, " cpus="); - len += cpulist_scnprintf(buf + len, - PAGE_SIZE - len - 50, - to_cpumask(l->cpus)); - } + len < PAGE_SIZE - 60) + len += scnprintf(buf + len, PAGE_SIZE - len - 50, + " cpus=%*pbl", + cpumask_pr_args(to_cpumask(l->cpus))); if (nr_online_nodes > 1 && !nodes_empty(l->nodes) && - len < PAGE_SIZE - 60) { - len += sprintf(buf + len, " nodes="); - len += nodelist_scnprintf(buf + len, - PAGE_SIZE - len - 50, - l->nodes); - } + len < PAGE_SIZE - 60) + len += scnprintf(buf + len, PAGE_SIZE - len - 50, + " nodes=%*pbl", + nodemask_pr_args(&l->nodes)); len += sprintf(buf + len, "\n"); } From 9e763e0f4f94d5f9ac8928867a4399ec3b3c7e33 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:38:02 -0800 Subject: [PATCH 044/108] mm: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c75f4dcec808..4721046a134a 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2817,8 +2817,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol) p += snprintf(p, buffer + maxlen - p, "relative"); } - if (!nodes_empty(nodes)) { - p += snprintf(p, buffer + maxlen - p, ":"); - p += nodelist_scnprintf(p, buffer + maxlen - p, nodes); - } + if (!nodes_empty(nodes)) + p += scnprintf(p, buffer + maxlen - p, ":%*pbl", + nodemask_pr_args(&nodes)); } From 4497da6f950951b8819cd827bbebb8f214e8ecbe Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:38:05 -0800 Subject: [PATCH 045/108] padata: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Cc: Steffen Klassert Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/padata.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/kernel/padata.c b/kernel/padata.c index 161402f0b517..b38bea9c466a 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -917,15 +917,10 @@ static ssize_t show_cpumask(struct padata_instance *pinst, else cpumask = pinst->cpumask.pcpu; - len = bitmap_scnprintf(buf, PAGE_SIZE, cpumask_bits(cpumask), - nr_cpu_ids); - if (PAGE_SIZE - len < 2) - len = -EINVAL; - else - len += sprintf(buf + len, "\n"); - + len = snprintf(buf, PAGE_SIZE, "%*pb\n", + nr_cpu_ids, cpumask_bits(cpumask)); mutex_unlock(&pinst->lock); - return len; + return len < PAGE_SIZE ? len : -EINVAL; } static ssize_t store_cpumask(struct padata_instance *pinst, From a0c2e07d6d4fe6f67b057d0f1c961e70ff581eda Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:38:07 -0800 Subject: [PATCH 046/108] proc: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index a3ccf4c4ce70..1295a00ca316 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -316,12 +316,10 @@ static inline void task_context_switch_counts(struct seq_file *m, static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) { - seq_puts(m, "Cpus_allowed:\t"); - seq_cpumask(m, &task->cpus_allowed); - seq_putc(m, '\n'); - seq_puts(m, "Cpus_allowed_list:\t"); - seq_cpumask_list(m, &task->cpus_allowed); - seq_putc(m, '\n'); + seq_printf(m, "Cpus_allowed:\t%*pb\n", + cpumask_pr_args(&task->cpus_allowed)); + seq_printf(m, "Cpus_allowed_list:\t%*pbl\n", + cpumask_pr_args(&task->cpus_allowed)); } int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, From c1d7f03fdd0ed600b161a7f3309e45a20af89796 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:38:10 -0800 Subject: [PATCH 047/108] irq: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/irq/proc.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 9dc9bfd8a678..df2f4642d1e7 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -46,10 +46,9 @@ static int show_irq_affinity(int type, struct seq_file *m, void *v) mask = desc->pending_mask; #endif if (type) - seq_cpumask_list(m, mask); + seq_printf(m, "%*pbl\n", cpumask_pr_args(mask)); else - seq_cpumask(m, mask); - seq_putc(m, '\n'); + seq_printf(m, "%*pb\n", cpumask_pr_args(mask)); return 0; } @@ -67,8 +66,7 @@ static int irq_affinity_hint_proc_show(struct seq_file *m, void *v) cpumask_copy(mask, desc->affinity_hint); raw_spin_unlock_irqrestore(&desc->lock, flags); - seq_cpumask(m, mask); - seq_putc(m, '\n'); + seq_printf(m, "%*pb\n", cpumask_pr_args(mask)); free_cpumask_var(mask); return 0; @@ -186,8 +184,7 @@ static const struct file_operations irq_affinity_list_proc_fops = { static int default_affinity_show(struct seq_file *m, void *v) { - seq_cpumask(m, irq_default_affinity); - seq_putc(m, '\n'); + seq_printf(m, "%*pb\n", cpumask_pr_args(irq_default_affinity)); return 0; } From ccbd59c1c104d6e42e949e2588563bfe25d9d98f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:38:13 -0800 Subject: [PATCH 048/108] profile: use %*pb[l] to print bitmaps including cpumasks and nodemasks printk and friends can now format bitmaps using '%*pb[l]'. cpumask and nodemask also provide cpumask_pr_args() and nodemask_pr_args() respectively which can be used to generate the two printf arguments necessary to format the specified cpu/nodemask. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/profile.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/profile.c b/kernel/profile.c index 54bf5ba26420..a7bcd28d6e9f 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -422,8 +422,7 @@ void profile_tick(int type) static int prof_cpu_mask_proc_show(struct seq_file *m, void *v) { - seq_cpumask(m, prof_cpu_mask); - seq_putc(m, '\n'); + seq_printf(m, "%*pb\n", cpumask_pr_args(prof_cpu_mask)); return 0; } From 46385326cc1577587ed3e7432c2425cf6d3e4308 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Feb 2015 14:38:15 -0800 Subject: [PATCH 049/108] bitmap, cpumask, nodemask: remove dedicated formatting functions Now that all bitmap formatting usages have been converted to '%*pb[l]', the separate formatting functions are unnecessary. The following functions are removed. * bitmap_scn[list]printf() * cpumask_scnprintf(), cpulist_scnprintf() * [__]nodemask_scnprintf(), [__]nodelist_scnprintf() * seq_bitmap[_list](), seq_cpumask[_list](), seq_nodemask[_list]() * seq_buf_bitmask() Signed-off-by: Tejun Heo Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/seq_file.c | 32 ------------------------------- include/linux/bitmap.h | 7 ------- include/linux/cpumask.h | 31 ------------------------------ include/linux/nodemask.h | 33 +++++++------------------------- include/linux/seq_buf.h | 3 --- include/linux/seq_file.h | 25 ------------------------ lib/bitmap.c | 41 ---------------------------------------- lib/seq_buf.c | 36 ----------------------------------- 8 files changed, 7 insertions(+), 201 deletions(-) diff --git a/fs/seq_file.c b/fs/seq_file.c index dbf3a59c86bb..555f82155be8 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -539,38 +539,6 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, const char *esc) return res; } -int seq_bitmap(struct seq_file *m, const unsigned long *bits, - unsigned int nr_bits) -{ - if (m->count < m->size) { - int len = bitmap_scnprintf(m->buf + m->count, - m->size - m->count, bits, nr_bits); - if (m->count + len < m->size) { - m->count += len; - return 0; - } - } - seq_set_overflow(m); - return -1; -} -EXPORT_SYMBOL(seq_bitmap); - -int seq_bitmap_list(struct seq_file *m, const unsigned long *bits, - unsigned int nr_bits) -{ - if (m->count < m->size) { - int len = bitmap_scnlistprintf(m->buf + m->count, - m->size - m->count, bits, nr_bits); - if (m->count + len < m->size) { - m->count += len; - return 0; - } - } - seq_set_overflow(m); - return -1; -} -EXPORT_SYMBOL(seq_bitmap_list); - static void *single_start(struct seq_file *p, loff_t *pos) { return NULL + (*pos == 0); diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 5e7f75a6d7d0..dbfbf4990005 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -52,16 +52,13 @@ * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap * bitmap_fold(dst, orig, sz, nbits) dst bits = orig bits mod sz - * bitmap_scnprintf(buf, len, src, nbits) Print bitmap src to buf * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf - * bitmap_scnlistprintf(buf, len, src, nbits) Print bitmap src as list to buf * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from kernel buf * bitmap_parselist_user(buf, dst, nbits) Parse bitmap dst from user buf * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region * bitmap_release_region(bitmap, pos, order) Free specified bit region * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region - * bitmap_print_to_pagebuf(list, buf, mask, nbits) Print bitmap src as list/hex */ /* @@ -147,14 +144,10 @@ bitmap_find_next_zero_area(unsigned long *map, align_mask, 0); } -extern int bitmap_scnprintf(char *buf, unsigned int len, - const unsigned long *src, int nbits); extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user, unsigned long *dst, int nbits); extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen, unsigned long *dst, int nbits); -extern int bitmap_scnlistprintf(char *buf, unsigned int len, - const unsigned long *src, int nbits); extern int bitmap_parselist(const char *buf, unsigned long *maskp, int nmaskbits); extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index a9b3d00915a0..086549a665e2 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -546,21 +546,6 @@ static inline void cpumask_copy(struct cpumask *dstp, */ #define cpumask_of(cpu) (get_cpu_mask(cpu)) -/** - * cpumask_scnprintf - print a cpumask into a string as comma-separated hex - * @buf: the buffer to sprintf into - * @len: the length of the buffer - * @srcp: the cpumask to print - * - * If len is zero, returns zero. Otherwise returns the length of the - * (nul-terminated) @buf string. - */ -static inline int cpumask_scnprintf(char *buf, int len, - const struct cpumask *srcp) -{ - return bitmap_scnprintf(buf, len, cpumask_bits(srcp), nr_cpu_ids); -} - /** * cpumask_parse_user - extract a cpumask from a user string * @buf: the buffer to extract from @@ -590,22 +575,6 @@ static inline int cpumask_parselist_user(const char __user *buf, int len, nr_cpu_ids); } -/** - * cpulist_scnprintf - print a cpumask into a string as comma-separated list - * @buf: the buffer to sprintf into - * @len: the length of the buffer - * @srcp: the cpumask to print - * - * If len is zero, returns zero. Otherwise returns the length of the - * (nul-terminated) @buf string. - */ -static inline int cpulist_scnprintf(char *buf, int len, - const struct cpumask *srcp) -{ - return bitmap_scnlistprintf(buf, len, cpumask_bits(srcp), - nr_cpu_ids); -} - /** * cpumask_parse - extract a cpumask from from a string * @buf: the buffer to extract from diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 10f8e556ba07..6e85889cf9ab 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -8,14 +8,13 @@ * See detailed comments in the file linux/bitmap.h describing the * data type on which these nodemasks are based. * - * For details of nodemask_scnprintf() and nodemask_parse_user(), - * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c. - * For details of nodelist_scnprintf() and nodelist_parse(), see - * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c. - * For details of node_remap(), see bitmap_bitremap in lib/bitmap.c. - * For details of nodes_remap(), see bitmap_remap in lib/bitmap.c. - * For details of nodes_onto(), see bitmap_onto in lib/bitmap.c. - * For details of nodes_fold(), see bitmap_fold in lib/bitmap.c. + * For details of nodemask_parse_user(), see bitmap_parse_user() in + * lib/bitmap.c. For details of nodelist_parse(), see bitmap_parselist(), + * also in bitmap.c. For details of node_remap(), see bitmap_bitremap in + * lib/bitmap.c. For details of nodes_remap(), see bitmap_remap in + * lib/bitmap.c. For details of nodes_onto(), see bitmap_onto in + * lib/bitmap.c. For details of nodes_fold(), see bitmap_fold in + * lib/bitmap.c. * * The available nodemask operations are: * @@ -52,9 +51,7 @@ * NODE_MASK_NONE Initializer - no bits set * unsigned long *nodes_addr(mask) Array of unsigned long's in mask * - * int nodemask_scnprintf(buf, len, mask) Format nodemask for printing * int nodemask_parse_user(ubuf, ulen, mask) Parse ascii string as nodemask - * int nodelist_scnprintf(buf, len, mask) Format nodemask as list for printing * int nodelist_parse(buf, map) Parse ascii string as nodelist * int node_remap(oldbit, old, new) newbit = map(old, new)(oldbit) * void nodes_remap(dst, src, old, new) *dst = map(old, new)(src) @@ -312,14 +309,6 @@ static inline int __first_unset_node(const nodemask_t *maskp) #define nodes_addr(src) ((src).bits) -#define nodemask_scnprintf(buf, len, src) \ - __nodemask_scnprintf((buf), (len), &(src), MAX_NUMNODES) -static inline int __nodemask_scnprintf(char *buf, int len, - const nodemask_t *srcp, int nbits) -{ - return bitmap_scnprintf(buf, len, srcp->bits, nbits); -} - #define nodemask_parse_user(ubuf, ulen, dst) \ __nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES) static inline int __nodemask_parse_user(const char __user *buf, int len, @@ -328,14 +317,6 @@ static inline int __nodemask_parse_user(const char __user *buf, int len, return bitmap_parse_user(buf, len, dstp->bits, nbits); } -#define nodelist_scnprintf(buf, len, src) \ - __nodelist_scnprintf((buf), (len), &(src), MAX_NUMNODES) -static inline int __nodelist_scnprintf(char *buf, int len, - const nodemask_t *srcp, int nbits) -{ - return bitmap_scnlistprintf(buf, len, srcp->bits, nbits); -} - #define nodelist_parse(buf, dst) __nodelist_parse((buf), &(dst), MAX_NUMNODES) static inline int __nodelist_parse(const char *buf, nodemask_t *dstp, int nbits) { diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 9aafe0e24c68..fb7eb9ccb1cd 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -125,9 +125,6 @@ extern int seq_buf_putmem_hex(struct seq_buf *s, const void *mem, unsigned int len); extern int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc); -extern int seq_buf_bitmask(struct seq_buf *s, const unsigned long *maskp, - int nmaskbits); - #ifdef CONFIG_BINARY_PRINTF extern int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index cf6a9daaaf6d..afbb1fd77c77 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -126,31 +126,6 @@ int seq_path(struct seq_file *, const struct path *, const char *); int seq_dentry(struct seq_file *, struct dentry *, const char *); int seq_path_root(struct seq_file *m, const struct path *path, const struct path *root, const char *esc); -int seq_bitmap(struct seq_file *m, const unsigned long *bits, - unsigned int nr_bits); -static inline int seq_cpumask(struct seq_file *m, const struct cpumask *mask) -{ - return seq_bitmap(m, cpumask_bits(mask), nr_cpu_ids); -} - -static inline int seq_nodemask(struct seq_file *m, nodemask_t *mask) -{ - return seq_bitmap(m, mask->bits, MAX_NUMNODES); -} - -int seq_bitmap_list(struct seq_file *m, const unsigned long *bits, - unsigned int nr_bits); - -static inline int seq_cpumask_list(struct seq_file *m, - const struct cpumask *mask) -{ - return seq_bitmap_list(m, cpumask_bits(mask), nr_cpu_ids); -} - -static inline int seq_nodemask_list(struct seq_file *m, nodemask_t *mask) -{ - return seq_bitmap_list(m, mask->bits, MAX_NUMNODES); -} int single_open(struct file *, int (*)(struct seq_file *, void *), void *); int single_open_size(struct file *, int (*)(struct seq_file *, void *), void *, size_t); diff --git a/lib/bitmap.c b/lib/bitmap.c index 088adbdcbad9..d456f4c15a9f 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -369,24 +369,6 @@ EXPORT_SYMBOL(bitmap_find_next_zero_area_off); #define nbits_to_hold_value(val) fls(val) #define BASEDEC 10 /* fancier cpuset lists input in decimal */ -/** - * bitmap_scnprintf - convert bitmap to an ASCII hex string. - * @buf: byte buffer into which string is placed - * @buflen: reserved size of @buf, in bytes - * @maskp: pointer to bitmap to convert - * @nmaskbits: size of bitmap, in bits - * - * Exactly @nmaskbits bits are displayed. Hex digits are grouped into - * comma-separated sets of eight digits per set. Returns the number of - * characters which were written to *buf, excluding the trailing \0. - */ -int bitmap_scnprintf(char *buf, unsigned int buflen, - const unsigned long *maskp, int nmaskbits) -{ - return scnprintf(buf, buflen, "%*pb", nmaskbits, maskp); -} -EXPORT_SYMBOL(bitmap_scnprintf); - /** * __bitmap_parse - convert an ASCII hex string into a bitmap. * @buf: pointer to buffer containing string. @@ -500,29 +482,6 @@ int bitmap_parse_user(const char __user *ubuf, } EXPORT_SYMBOL(bitmap_parse_user); -/** - * bitmap_scnlistprintf - convert bitmap to list format ASCII string - * @buf: byte buffer into which string is placed - * @buflen: reserved size of @buf, in bytes - * @maskp: pointer to bitmap to convert - * @nmaskbits: size of bitmap, in bits - * - * Output format is a comma-separated list of decimal numbers and - * ranges. Consecutively set bits are shown as two hyphen-separated - * decimal numbers, the smallest and largest bit numbers set in - * the range. Output format is compatible with the format - * accepted as input by bitmap_parselist(). - * - * The return value is the number of characters which were written to *buf - * excluding the trailing '\0', as per ISO C99's scnprintf. - */ -int bitmap_scnlistprintf(char *buf, unsigned int buflen, - const unsigned long *maskp, int nmaskbits) -{ - return scnprintf(buf, buflen, "%*pbl", nmaskbits, maskp); -} -EXPORT_SYMBOL(bitmap_scnlistprintf); - /** * bitmap_print_to_pagebuf - convert bitmap to list or hex format ASCII string * @list: indicates whether the bitmap must be list diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 4eedfedb9e31..88c0854bd752 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -91,42 +91,6 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) return ret; } -/** - * seq_buf_bitmask - write a bitmask array in its ASCII representation - * @s: seq_buf descriptor - * @maskp: points to an array of unsigned longs that represent a bitmask - * @nmaskbits: The number of bits that are valid in @maskp - * - * Writes a ASCII representation of a bitmask string into @s. - * - * Returns zero on success, -1 on overflow. - */ -int seq_buf_bitmask(struct seq_buf *s, const unsigned long *maskp, - int nmaskbits) -{ - unsigned int len = seq_buf_buffer_left(s); - int ret; - - WARN_ON(s->size == 0); - - /* - * Note, because bitmap_scnprintf() only returns the number of bytes - * written and not the number that would be written, we use the last - * byte of the buffer to let us know if we overflowed. There's a small - * chance that the bitmap could have fit exactly inside the buffer, but - * it's not that critical if that does happen. - */ - if (len > 1) { - ret = bitmap_scnprintf(s->buffer + s->len, len, maskp, nmaskbits); - if (ret < len) { - s->len += ret; - return 0; - } - } - seq_buf_set_overflow(s); - return -1; -} - #ifdef CONFIG_BINARY_PRINTF /** * seq_buf_bprintf - Write the printf string from binary arguments From 62ec818f55ccc3e1a04a6634f8e541596778af5d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 13 Feb 2015 14:38:18 -0800 Subject: [PATCH 050/108] checkpatch: emit an error when using predefined timestamp macros Since commit fe7c36c7bde1 ("Makefile: Build with -Werror=date-time if the compiler supports it"), use of __DATE__, __TIME__, and __TIMESTAMP__ has not been allowed. As this test is gcc version specific (> 4.9), it hasn't prevented a few new uses from creeping into the kernel sources. Make checkpatch complain about them. Signed-off-by: Joe Perches Original-patch-by: Rasmus Villemoes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index f0bb6d60c07b..501c286369c9 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5089,6 +5089,12 @@ sub process { } } +# check for uses of __DATE__, __TIME__, __TIMESTAMP__ + while ($line =~ /\b(__(?:DATE|TIME|TIMESTAMP)__)\b/g) { + ERROR("DATE_TIME", + "Use of the '$1' macro makes the build non-deterministic\n" . $herecurr); + } + # check for use of yield() if ($line =~ /\byield\s*\(\s*\)/) { WARN("YIELD", From c0a5c89858337653d7b52ecbbcd9975a52359570 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 13 Feb 2015 14:38:21 -0800 Subject: [PATCH 051/108] checkpatch: improve octal permissions tests Add world writable permissions tests for the various functions like debugfs etc... Add $String type for $FuncArg so that string constants are matched. Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 501c286369c9..2b77d96db735 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -298,6 +298,7 @@ our $Binary = qr{(?i)0b[01]+$Int_type?}; our $Hex = qr{(?i)0x[0-9a-f]+$Int_type?}; our $Int = qr{[0-9]+$Int_type?}; our $Octal = qr{0[0-7]+$Int_type?}; +our $String = qr{"[X\t]*"}; our $Float_hex = qr{(?i)0x[0-9a-f]+p-?[0-9]+[fl]?}; our $Float_dec = qr{(?i)(?:[0-9]+\.[0-9]*|[0-9]*\.[0-9]+)(?:e-?[0-9]+)?[fl]?}; our $Float_int = qr{(?i)[0-9]+e-?[0-9]+[fl]?}; @@ -517,7 +518,7 @@ our $Typecast = qr{\s*(\(\s*$NonptrType\s*\)){0,1}\s*}; our $balanced_parens = qr/(\((?:[^\(\)]++|(?-1))*\))/; our $LvalOrFunc = qr{((?:[\&\*]\s*)?$Lval)\s*($balanced_parens{0,1})\s*}; -our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant)}; +our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant|$String)}; our $declaration_macros = qr{(?x: (?:$Storage\s+)?(?:[A-Z_][A-Z0-9]*_){0,2}(?:DEFINE|DECLARE)(?:_[A-Z0-9]+){1,2}\s*\(| @@ -5261,6 +5262,9 @@ sub process { length($val) ne 4)) { ERROR("NON_OCTAL_PERMISSIONS", "Use 4 digit octal (0777) not decimal permissions\n" . $herecurr); + } elsif ($val =~ /^$Octal$/ && (oct($val) & 02)) { + ERROR("EXPORTED_WORLD_WRITABLE", + "Exporting writable files is usually an error. Consider more restrictive permissions.\n" . $herecurr); } } } From e23ef1f3340c24d5ff130b574546566349897258 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 13 Feb 2015 14:38:24 -0800 Subject: [PATCH 052/108] checkpatch: ignore __pure $Attribute Just like "__cold", ignore the __pure gcc attribute macro so pointer warnings aren't generated for uses like "int * __pure fn(...)" Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 2b77d96db735..221a2b245690 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -278,6 +278,7 @@ our $Attribute = qr{ __noreturn| __used| __cold| + __pure| __noclone| __deprecated| __read_mostly| From dcaf112365369c3b55bfc37edb634dba32dde57e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 13 Feb 2015 14:38:26 -0800 Subject: [PATCH 053/108] checkpatch: fix UNNECESSARY_KERN_LEVEL false positive KERN_ is never redundant with printk_ratelimited or printk_once. (Except perhaps in the sense that you could use e.g. pr_err_ratelimited or pr_err_once, but that would apply to printk as well). Signed-off-by: Paolo Bonzini Cc: Andy Whitcroft Acked-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 221a2b245690..f130c93a5656 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4547,7 +4547,7 @@ sub process { } # check for logging functions with KERN_ - if ($line !~ /printk\s*\(/ && + if ($line !~ /printk(?:_ratelimited|_once)?\s*\(/ && $line =~ /\b$logFunctions\s*\(.*\b(KERN_[A-Z]+)\b/) { my $level = $1; if (WARN("UNNECESSARY_KERN_LEVEL", From 327953e9af6c59ad111b28359e59e3ec0cbd71b6 Mon Sep 17 00:00:00 2001 From: Christoph Jaeger Date: Fri, 13 Feb 2015 14:38:29 -0800 Subject: [PATCH 054/108] checkpatch: add check for keyword 'boolean' in Kconfig definitions Discourage the use of keyword 'boolean' for type definition attributes of config options as support for it will be dropped later on. See http://lkml.kernel.org/r/cover.1418003065.git.cj@linux.com Signed-off-by: Christoph Jaeger Suggested-by: Daniel Borkmann Cc: Joe Perches Acked-by: Paul Bolle Tested-by: Paul Bolle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index f130c93a5656..6afc24ba77a6 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2357,6 +2357,13 @@ sub process { "Use of CONFIG_EXPERIMENTAL is deprecated. For alternatives, see https://lkml.org/lkml/2012/10/23/580\n"); } +# discourage the use of boolean for type definition attributes of Kconfig options + if ($realfile =~ /Kconfig/ && + $line =~ /^\+\s*\bboolean\b/) { + WARN("CONFIG_TYPE_BOOLEAN", + "Use of boolean is deprecated, please use bool instead.\n" . $herecurr); + } + if (($realfile =~ /Makefile.*/ || $realfile =~ /Kbuild.*/) && ($line =~ /\+(EXTRA_[A-Z]+FLAGS).*/)) { my $flag = $1; From 1b36b201c081ddb6ced1983c5ebf75fa635b6e4b Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 13 Feb 2015 14:38:32 -0800 Subject: [PATCH 055/108] checkpatch: Allow comments in macros tested for single statements Convert all the comments to spaces before testing for single statement macros. Reported-by: Valdis Kletnieks Signed-off-by: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 6afc24ba77a6..6ac355edd0dc 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4260,6 +4260,7 @@ sub process { $ctx = $dstat; $dstat =~ s/\\\n.//g; + $dstat =~ s/$;/ /g; if ($dstat =~ /^\+\s*#\s*define\s+$Ident\s*${balanced_parens}\s*do\s*{(.*)\s*}\s*while\s*\(\s*0\s*\)\s*([;\s]*)\s*$/) { my $stmts = $2; From 0d7835fcaa67bb21e5ffd50afaa65c81c53f8856 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 13 Feb 2015 14:38:35 -0800 Subject: [PATCH 056/108] checkpatch: update git commit message The git commit message can be confusing, Try to clarify the message a bit to reduce the confusion when emitted. Show the correct form using Please use git commit description style 'commit <12+ chars of sha1> ("")' and if the git commit sha1 is unique, show the right sha1 to use with the actual title Signed-off-by: Joe Perches <joe@perches.com> Original-patch-by: Prarit Bhargava <prarit@redhat.com> Tested-by: Chris Rorvick <chris@rorvick.com> Acked-by: Prarit Bhargava <prarit@redhat.com> Cc: Daniel Baluta <daniel.baluta@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- scripts/checkpatch.pl | 45 ++++++++++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 6ac355edd0dc..786017d4d057 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -634,6 +634,8 @@ sub git_commit_info { $output =~ s/^\s*//gm; my @lines = split("\n", $output); + return ($id, $desc) if ($#lines < 0); + if ($lines[0] =~ /^error: short SHA1 $commit is ambiguous\./) { # Maybe one day convert this block of bash into something that returns # all matching commit ids, but it's very slow... @@ -2173,21 +2175,38 @@ sub process { "Remove Gerrit Change-Id's before submitting upstream.\n" . $herecurr); } -# Check for improperly formed commit descriptions - if ($in_commit_log && - $line =~ /\bcommit\s+[0-9a-f]{5,}/i && - !($line =~ /\b[Cc]ommit [0-9a-f]{12,40} \("/ || - ($line =~ /\b[Cc]ommit [0-9a-f]{12,40}\s*$/ && - defined $rawlines[$linenr] && - $rawlines[$linenr] =~ /^\s*\("/))) { - $line =~ /\b(c)ommit\s+([0-9a-f]{5,})/i; +# Check for git id commit length and improperly formed commit descriptions + if ($in_commit_log && $line =~ /\b(c)ommit\s+([0-9a-f]{5,})/i) { my $init_char = $1; my $orig_commit = lc($2); - my $id = '01234567890ab'; - my $desc = 'commit description'; - ($id, $desc) = git_commit_info($orig_commit, $id, $desc); - ERROR("GIT_COMMIT_ID", - "Please use 12 or more chars for the git commit ID like: '${init_char}ommit $id (\"$desc\")'\n" . $herecurr); + my $short = 1; + my $long = 0; + my $case = 1; + my $space = 1; + my $hasdesc = 0; + my $id = '0123456789ab'; + my $orig_desc = "commit description"; + my $description = ""; + + $short = 0 if ($line =~ /\bcommit\s+[0-9a-f]{12,40}/i); + $long = 1 if ($line =~ /\bcommit\s+[0-9a-f]{41,}/i); + $space = 0 if ($line =~ /\bcommit [0-9a-f]/i); + $case = 0 if ($line =~ /\b[Cc]ommit\s+[0-9a-f]{5,40}[^A-F]/); + if ($line =~ /\bcommit\s+[0-9a-f]{5,}\s+\("([^"]+)"\)/i) { + $orig_desc = $1; + } elsif ($line =~ /\bcommit\s+[0-9a-f]{5,}\s*$/i && + defined $rawlines[$linenr] && + $rawlines[$linenr] =~ /^\s*\("([^"]+)"\)/) { + $orig_desc = $1; + } + + ($id, $description) = git_commit_info($orig_commit, + $id, $orig_desc); + + if ($short || $long || $space || $case || ($orig_desc ne $description)) { + ERROR("GIT_COMMIT_ID", + "Please use git commit description style 'commit <12+ chars of sha1> (\"<title line>\")' - ie: '${init_char}ommit $id (\"$description\")'\n" . $herecurr); + } } # Check for added, moved or deleted files From acd9362c248e33187629c950badb834b73e20e41 Mon Sep 17 00:00:00 2001 From: Joe Perches <joe@perches.com> Date: Fri, 13 Feb 2015 14:38:38 -0800 Subject: [PATCH 057/108] checkpatch: add likely/unlikely comparison misuse test Add a test for probably likely/unlikely misuses where the comparison is likely misplaced if (likely(foo) > 0) vs if (likely(foo > 0)) Signed-off-by: Joe Perches <joe@perches.com> Cc: Christoph Jaeger <cj@linux.com> Cc: Julia Lawall <julia.lawall@lip6.fr> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- scripts/checkpatch.pl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 786017d4d057..41223c946808 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5239,6 +5239,13 @@ sub process { "#define of '$1' is wrong - use Kconfig variables or standard guards instead\n" . $herecurr); } +# likely/unlikely comparisons similar to "(likely(foo) > 0)" + if ($^V && $^V ge 5.10.0 && + $line =~ /\b((?:un)?likely)\s*\(\s*$FuncArg\s*\)\s*$Compare/) { + WARN("LIKELY_MISUSE", + "Using $1 should generally have parentheses around the comparison\n" . $herecurr); + } + # whine mightly about in_atomic if ($line =~ /\bin_atomic\s*\(/) { if ($realfile =~ m@^drivers/@) { From b671fde0572af42495ce3183969b0cafa98fd9ec Mon Sep 17 00:00:00 2001 From: Joe Perches <joe@perches.com> Date: Fri, 13 Feb 2015 14:38:41 -0800 Subject: [PATCH 058/108] checkpatch: add ability to coalesce commit descriptions on multiple lines If a git commit description is split on consecutive lines, coalesce it before testing. This allows: commit <foo> ("some long description") Signed-off-by: Joe Perches <joe@perches.com> Reported-by: Paul Bolle <pebolle@tiscali.nl> Tested-by: Paul Bolle <pebolle@tiscali.nl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- scripts/checkpatch.pl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 41223c946808..6a3baa0bdde8 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2198,6 +2198,13 @@ sub process { defined $rawlines[$linenr] && $rawlines[$linenr] =~ /^\s*\("([^"]+)"\)/) { $orig_desc = $1; + } elsif ($line =~ /\bcommit\s+[0-9a-f]{5,}\s+\("[^"]+$/i && + defined $rawlines[$linenr] && + $rawlines[$linenr] =~ /^\s*[^"]+"\)/) { + $line =~ /\bcommit\s+[0-9a-f]{5,}\s+\("([^"]+)$/i; + $orig_desc = $1; + $rawlines[$linenr] =~ /^\s*([^"]+)"\)/; + $orig_desc .= " " . $1; } ($id, $description) = git_commit_info($orig_commit, From 021158b4c7bd8d0ec4dc8d09c013288429da7ee2 Mon Sep 17 00:00:00 2001 From: Joe Perches <joe@perches.com> Date: Fri, 13 Feb 2015 14:38:43 -0800 Subject: [PATCH 059/108] checkpatch: add types for other OS typedefs bsd and sysv use different typedefs for unsigned types. These are in types.h but not in checkpatch, so add them to checkpatch's ability to know types. This can avoid false positives for code like: void foo(void) { int x; uint y; [...]; } where checkpatch incorrectly emits a warning for "missing a blank line after declarations". Signed-off-by: Joe Perches <joe@perches.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- scripts/checkpatch.pl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 6a3baa0bdde8..6705576198a6 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -339,6 +339,11 @@ our $UTF8 = qr{ | $NON_ASCII_UTF8 }x; +our $typeOtherOSTypedefs = qr{(?x: + u_(?:char|short|int|long) | # bsd + u(?:nchar|short|int|long) # sysv +)}; + our $typeTypedefs = qr{(?x: (?:__)?(?:u|s|be|le)(?:8|16|32|64)| atomic_t @@ -475,6 +480,7 @@ sub build_types { (?:$Modifier\s+|const\s+)* (?: (?:typeof|__typeof__)\s*\([^\)]*\)| + (?:$typeOtherOSTypedefs\b)| (?:$typeTypedefs\b)| (?:${all}\b) ) @@ -492,6 +498,7 @@ sub build_types { (?: (?:typeof|__typeof__)\s*\([^\)]*\)| (?:$typeTypedefs\b)| + (?:$typeOtherOSTypedefs\b)| (?:${allWithAttr}\b) ) (?:\s+$Modifier|\s+const)* @@ -3159,6 +3166,7 @@ sub process { $line !~ /\btypedef\s+$Type\s*\(\s*\*?$Ident\s*\)\s*\(/ && $line !~ /\btypedef\s+$Type\s+$Ident\s*\(/ && $line !~ /\b$typeTypedefs\b/ && + $line !~ /\b$typeOtherOSTypedefs\b/ && $line !~ /\b__bitwise(?:__|)\b/) { WARN("NEW_TYPEDEFS", "do not add new typedefs\n" . $herecurr); From f8e58219dc58cce633697fe15e6c5597bedb296b Mon Sep 17 00:00:00 2001 From: Joe Perches <joe@perches.com> Date: Fri, 13 Feb 2015 14:38:46 -0800 Subject: [PATCH 060/108] checkpatch: add ability to --fix unnecessary blank lines around braces There's a --strict test for these blank lines. Add the ability to automatically remove them with --fix. Signed-off-by: Joe Perches <joe@perches.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- scripts/checkpatch.pl | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 6705576198a6..059c032d8882 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4461,12 +4461,18 @@ sub process { # check for unnecessary blank lines around braces if (($line =~ /^.\s*}\s*$/ && $prevrawline =~ /^.\s*$/)) { - CHK("BRACES", - "Blank lines aren't necessary before a close brace '}'\n" . $hereprev); + if (CHK("BRACES", + "Blank lines aren't necessary before a close brace '}'\n" . $hereprev) && + $fix && $prevrawline =~ /^\+/) { + fix_delete_line($fixlinenr - 1, $prevrawline); + } } if (($rawline =~ /^.\s*$/ && $prevline =~ /^..*{\s*$/)) { - CHK("BRACES", - "Blank lines aren't necessary after an open brace '{'\n" . $hereprev); + if (CHK("BRACES", + "Blank lines aren't necessary after an open brace '{'\n" . $hereprev) && + $fix) { + fix_delete_line($fixlinenr, $rawline); + } } # no volatiles please From caac1d5fddf2d55e1e1fd6d86f6fc178c801e286 Mon Sep 17 00:00:00 2001 From: Heba Aamer <heba93aamer@gmail.com> Date: Fri, 13 Feb 2015 14:38:49 -0800 Subject: [PATCH 061/108] checkpatch: improve seq_print->seq_puts suggestion Improve the format specifier test by removing any %% before looking for any remaining % format specifier. Signed-off-by: Heba Aamer <heba93aamer@gmail.com> Signed-off-by: Joe Perches <joe@perches.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- scripts/checkpatch.pl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 059c032d8882..7f1804e052f2 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4854,7 +4854,8 @@ sub process { # check for seq_printf uses that could be seq_puts if ($sline =~ /\bseq_printf\s*\(.*"\s*\)\s*;\s*$/) { my $fmt = get_quoted_string($line, $rawline); - if ($fmt ne "" && $fmt !~ /[^\\]\%/) { + $fmt =~ s/%%//g; + if ($fmt !~ /%/) { if (WARN("PREFER_SEQ_PUTS", "Prefer seq_puts to seq_printf\n" . $herecurr) && $fix) { From 43f7fe52a82ec9f7aa6420af430745111674c7b9 Mon Sep 17 00:00:00 2001 From: Joe Perches <joe@perches.com> Date: Fri, 13 Feb 2015 14:38:52 -0800 Subject: [PATCH 062/108] checkpatch: improve "no space necessary after cast" test Code like: if (a < sizeof(<type>) && and { .len = sizeof(<type>) }, incorrectly emits that warning, so add more exceptions to avoid the warning. Signed-off-by: Joe Perches <joe@perches.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 7f1804e052f2..a9baaabfae36 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2541,7 +2541,7 @@ sub process { } } - if ($line =~ /^\+.*(\w+\s*)?\(\s*$Type\s*\)[ \t]+(?!$Assignment|$Arithmetic|[,;\({\[\<\>])/ && + if ($line =~ /^\+.*(\w+\s*)?\(\s*$Type\s*\)[ \t]+(?!$Assignment|$Arithmetic|[,;:\?\(\{\}\[\<\>]|&&|\|\||\\$)/ && (!defined($1) || $1 !~ /sizeof\s*/)) { if (CHK("SPACING", "No space is necessary after a cast\n" . $herecurr) && From 101ee6802a77d3a8f42538360a5e9c3f17d5d5b5 Mon Sep 17 00:00:00 2001 From: Joe Perches <joe@perches.com> Date: Fri, 13 Feb 2015 14:38:54 -0800 Subject: [PATCH 063/108] checkpatch: neaten printk_ratelimited message position Just neatening... Signed-off-by: Joe Perches <joe@perches.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index a9baaabfae36..bf232642cbb3 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3243,7 +3243,7 @@ sub process { # check for uses of printk_ratelimit if ($line =~ /\bprintk_ratelimit\s*\(/) { WARN("PRINTK_RATELIMITED", -"Prefer printk_ratelimited or pr_<level>_ratelimited to printk_ratelimit\n" . $herecurr); + "Prefer printk_ratelimited or pr_<level>_ratelimited to printk_ratelimit\n" . $herecurr); } # printk should use KERN_* levels. Note that follow on printk's on the From d2e025f364369dbe0a7dee1b15f198f5718f246a Mon Sep 17 00:00:00 2001 From: Joe Perches <joe@perches.com> Date: Fri, 13 Feb 2015 14:38:57 -0800 Subject: [PATCH 064/108] checkpatch: add --strict test for spaces around arithmetic Some prefer code to have spaces around arithmetic so instead of: a = b*c+d; suggest a = b * c + d; Signed-off-by: Joe Perches <joe@perches.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- scripts/checkpatch.pl | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index bf232642cbb3..32bd31c1345c 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -3689,7 +3689,22 @@ sub process { $op eq '*' or $op eq '/' or $op eq '%') { - if ($ctx =~ /Wx[^WCE]|[^WCE]xW/) { + if ($check) { + if (defined $fix_elements[$n + 2] && $ctx !~ /[EW]x[EW]/) { + if (CHK("SPACING", + "spaces preferred around that '$op' $at\n" . $hereptr)) { + $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " "; + $fix_elements[$n + 2] =~ s/^\s+//; + $line_fixed = 1; + } + } elsif (!defined $fix_elements[$n + 2] && $ctx !~ /Wx[OE]/) { + if (CHK("SPACING", + "space preferred before that '$op' $at\n" . $hereptr)) { + $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]); + $line_fixed = 1; + } + } + } elsif ($ctx =~ /Wx[^WCE]|[^WCE]xW/) { if (ERROR("SPACING", "need consistent spacing around '$op' $at\n" . $hereptr)) { $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " "; From 19c146a64c5e14c5dd910e930565edf74637a423 Mon Sep 17 00:00:00 2001 From: Joe Perches <joe@perches.com> Date: Fri, 13 Feb 2015 14:39:00 -0800 Subject: [PATCH 065/108] checkpatch: make sure a commit reference description uses parentheses The preferred style for a commit reference in a commit log is: commit <foo> ("<title line>") A recent commit removed this check for parentheses. Add it back. Signed-off-by: Joe Perches <joe@perches.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- scripts/checkpatch.pl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 32bd31c1345c..3642b0d5ad6a 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2191,6 +2191,7 @@ sub process { my $case = 1; my $space = 1; my $hasdesc = 0; + my $hasparens = 0; my $id = '0123456789ab'; my $orig_desc = "commit description"; my $description = ""; @@ -2201,10 +2202,12 @@ sub process { $case = 0 if ($line =~ /\b[Cc]ommit\s+[0-9a-f]{5,40}[^A-F]/); if ($line =~ /\bcommit\s+[0-9a-f]{5,}\s+\("([^"]+)"\)/i) { $orig_desc = $1; + $hasparens = 1; } elsif ($line =~ /\bcommit\s+[0-9a-f]{5,}\s*$/i && defined $rawlines[$linenr] && $rawlines[$linenr] =~ /^\s*\("([^"]+)"\)/) { $orig_desc = $1; + $hasparens = 1; } elsif ($line =~ /\bcommit\s+[0-9a-f]{5,}\s+\("[^"]+$/i && defined $rawlines[$linenr] && $rawlines[$linenr] =~ /^\s*[^"]+"\)/) { @@ -2212,12 +2215,13 @@ sub process { $orig_desc = $1; $rawlines[$linenr] =~ /^\s*([^"]+)"\)/; $orig_desc .= " " . $1; + $hasparens = 1; } ($id, $description) = git_commit_info($orig_commit, $id, $orig_desc); - if ($short || $long || $space || $case || ($orig_desc ne $description)) { + if ($short || $long || $space || $case || ($orig_desc ne $description) || !$hasparens) { ERROR("GIT_COMMIT_ID", "Please use git commit description style 'commit <12+ chars of sha1> (\"<title line>\")' - ie: '${init_char}ommit $id (\"$description\")'\n" . $herecurr); } From a2fe16b9d878a101b67678872e5cd1410c057ec0 Mon Sep 17 00:00:00 2001 From: Joe Perches <joe@perches.com> Date: Fri, 13 Feb 2015 14:39:02 -0800 Subject: [PATCH 066/108] checkpatch: try to avoid poor patch subject lines Naming the tool that found an issue in the subject line isn't very useful. Emit a warning when a common tool (currently checkpatch, sparse or smatch) is in the subject line. Signed-off-by: Joe Perches <joe@perches.com> Suggested-by: Al Viro <viro@ZenIV.linux.org.uk> Acked-by: Dan Carpenter <dan.carpenter@oracle.com> Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- scripts/checkpatch.pl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 3642b0d5ad6a..9c720e1261e9 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2170,6 +2170,13 @@ sub process { } } +# Check email subject for common tools that don't need to be mentioned + if ($in_header_lines && + $line =~ /^Subject:.*\b(?:checkpatch|sparse|smatch)\b[^:]/i) { + WARN("EMAIL_SUBJECT", + "A patch subject line should describe the change not the tool that found it\n" . $herecurr); + } + # Check for old stable address if ($line =~ /^\s*cc:\s*.*<?\bstable\@kernel\.org\b>?.*$/i) { ERROR("STABLE_ADDRESS", From 0f3c5aab5e00527eb3167aa9d1725cca9320e01e Mon Sep 17 00:00:00 2001 From: Joe Perches <joe@perches.com> Date: Fri, 13 Feb 2015 14:39:05 -0800 Subject: [PATCH 067/108] checkpatch: add of_device_id to structs that should be const Uses of struct of_device_id are most commonly const. Suggest using it as such. Signed-off-by: Joe Perches <joe@perches.com> Acked-by: Rob Herring <robh@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- scripts/checkpatch.pl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 9c720e1261e9..d12435992dea 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5223,8 +5223,9 @@ sub process { "please use device_initcall() or more appropriate function instead of __initcall() (see include/linux/init.h)\n" . $herecurr); } -# check for various ops structs, ensure they are const. - my $struct_ops = qr{acpi_dock_ops| +# check for various structs that are normally const (ops, kgdb, device_tree) + my $const_structs = qr{ + acpi_dock_ops| address_space_operations| backlight_ops| block_device_operations| @@ -5247,6 +5248,7 @@ sub process { mtrr_ops| neigh_ops| nlmsvc_binding| + of_device_id| pci_raw_ops| pipe_buf_operations| platform_hibernation_ops| @@ -5262,7 +5264,7 @@ sub process { usb_mon_operations| wd_ops}x; if ($line !~ /\bconst\b/ && - $line =~ /\bstruct\s+($struct_ops)\b/) { + $line =~ /\bstruct\s+($const_structs)\b/) { WARN("CONST_STRUCT", "struct $1 should normally be const\n" . $herecurr); From 4d5755b147665912c938504033d958f1115b68ff Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso <dave@stgolabs.net> Date: Fri, 13 Feb 2015 14:39:08 -0800 Subject: [PATCH 068/108] epoll: optimize setting task running after blocking After waking up a task waiting for an event, we explicitly mark it as TASK_RUNNING (which is necessary as we do the checks for wakeups as TASK_INTERRUPTIBLE). Once running and dealing with actually delivering the events, we're obviously not planning on calling schedule, thus we can relax the implied barrier and simply update the state with __set_current_state(). Signed-off-by: Davidlohr Bueso <dbueso@suse.de> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/eventpoll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index d77f94491352..1e009cad8d5c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1639,9 +1639,9 @@ fetch_events: spin_lock_irqsave(&ep->lock, flags); } - __remove_wait_queue(&ep->wq, &wait); - set_current_state(TASK_RUNNING); + __remove_wait_queue(&ep->wq, &wait); + __set_current_state(TASK_RUNNING); } check_events: /* Is it worth to try to dig for events ? */ From 0f989f749b51ec1fd94bb5a42f8ad10c8b9f73cb Mon Sep 17 00:00:00 2001 From: Andrew Morton <akpm@linux-foundation.org> Date: Fri, 13 Feb 2015 14:39:11 -0800 Subject: [PATCH 069/108] MODULE_DEVICE_TABLE: fix some callsites The patch "module: fix types of device tables aliases" newly requires that invocations of MODULE_DEVICE_TABLE(type, name); come *after* the definition of `name'. That is reasonable, but some drivers weren't doing this. Fix them. Cc: James Bottomley <James.Bottomley@HansenPartnership.com> Cc: Andrey Ryabinin <a.ryabinin@samsung.com> Cc: David Miller <davem@davemloft.net> Cc: Hans Verkuil <hverkuil@xs4all.nl> Acked-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- Documentation/video4linux/v4l2-pci-skeleton.c | 2 +- drivers/net/ethernet/emulex/benet/be_main.c | 1 - drivers/scsi/be2iscsi/be_main.c | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Documentation/video4linux/v4l2-pci-skeleton.c b/Documentation/video4linux/v4l2-pci-skeleton.c index 006721e43b2a..7bd1b975bfd2 100644 --- a/Documentation/video4linux/v4l2-pci-skeleton.c +++ b/Documentation/video4linux/v4l2-pci-skeleton.c @@ -42,7 +42,6 @@ MODULE_DESCRIPTION("V4L2 PCI Skeleton Driver"); MODULE_AUTHOR("Hans Verkuil"); MODULE_LICENSE("GPL v2"); -MODULE_DEVICE_TABLE(pci, skeleton_pci_tbl); /** * struct skeleton - All internal data for one instance of device @@ -95,6 +94,7 @@ static const struct pci_device_id skeleton_pci_tbl[] = { /* { PCI_DEVICE(PCI_VENDOR_ID_, PCI_DEVICE_ID_) }, */ { 0, } }; +MODULE_DEVICE_TABLE(pci, skeleton_pci_tbl); /* * HDTV: this structure has the capabilities of the HDTV receiver. diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 932b93a14965..0a816859aca5 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -26,7 +26,6 @@ #include <net/vxlan.h> MODULE_VERSION(DRV_VER); -MODULE_DEVICE_TABLE(pci, be_dev_ids); MODULE_DESCRIPTION(DRV_DESC " " DRV_VER); MODULE_AUTHOR("Emulex Corporation"); MODULE_LICENSE("GPL"); diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index f3193406776c..96241b20fd2c 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -48,7 +48,6 @@ static unsigned int be_iopoll_budget = 10; static unsigned int be_max_phys_size = 64; static unsigned int enable_msix = 1; -MODULE_DEVICE_TABLE(pci, beiscsi_pci_id_table); MODULE_DESCRIPTION(DRV_DESC " " BUILD_STR); MODULE_VERSION(BUILD_STR); MODULE_AUTHOR("Emulex Corporation"); From cb4188ac8e5779f66b9f55888ac2c75b391cde44 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:39:14 -0800 Subject: [PATCH 070/108] compiler: introduce __alias(symbol) shortcut To be consistent with other compiler attributes introduce __alias(symbol) macro expanding into __attribute__((alias(#symbol))) Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- include/linux/compiler-gcc.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 02ae99e8e6d3..cdf13ca7cac3 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,6 +66,7 @@ #define __deprecated __attribute__((deprecated)) #define __packed __attribute__((packed)) #define __weak __attribute__((weak)) +#define __alias(symbol) __attribute__((alias(#symbol))) /* * it doesn't make sense on ARM (currently the only user of __naked) to trace From 0b24becc810dc3be6e3f94103a866f214c282394 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:39:17 -0800 Subject: [PATCH 071/108] kasan: add kernel address sanitizer infrastructure Kernel Address sanitizer (KASan) is a dynamic memory error detector. It provides fast and comprehensive solution for finding use-after-free and out-of-bounds bugs. KASAN uses compile-time instrumentation for checking every memory access, therefore GCC > v4.9.2 required. v4.9.2 almost works, but has issues with putting symbol aliases into the wrong section, which breaks kasan instrumentation of globals. This patch only adds infrastructure for kernel address sanitizer. It's not available for use yet. The idea and some code was borrowed from [1]. Basic idea: The main idea of KASAN is to use shadow memory to record whether each byte of memory is safe to access or not, and use compiler's instrumentation to check the shadow memory on each memory access. Address sanitizer uses 1/8 of the memory addressable in kernel for shadow memory and uses direct mapping with a scale and offset to translate a memory address to its corresponding shadow address. Here is function to translate address to corresponding shadow address: unsigned long kasan_mem_to_shadow(unsigned long addr) { return (addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET; } where KASAN_SHADOW_SCALE_SHIFT = 3. So for every 8 bytes there is one corresponding byte of shadow memory. The following encoding used for each shadow byte: 0 means that all 8 bytes of the corresponding memory region are valid for access; k (1 <= k <= 7) means that the first k bytes are valid for access, and other (8 - k) bytes are not; Any negative value indicates that the entire 8-bytes are inaccessible. Different negative values used to distinguish between different kinds of inaccessible memory (redzones, freed memory) (see mm/kasan/kasan.h). To be able to detect accesses to bad memory we need a special compiler. Such compiler inserts a specific function calls (__asan_load*(addr), __asan_store*(addr)) before each memory access of size 1, 2, 4, 8 or 16. These functions check whether memory region is valid to access or not by checking corresponding shadow memory. If access is not valid an error printed. Historical background of the address sanitizer from Dmitry Vyukov: "We've developed the set of tools, AddressSanitizer (Asan), ThreadSanitizer and MemorySanitizer, for user space. We actively use them for testing inside of Google (continuous testing, fuzzing, running prod services). To date the tools have found more than 10'000 scary bugs in Chromium, Google internal codebase and various open-source projects (Firefox, OpenSSL, gcc, clang, ffmpeg, MySQL and lots of others): [2] [3] [4]. The tools are part of both gcc and clang compilers. We have not yet done massive testing under the Kernel AddressSanitizer (it's kind of chicken and egg problem, you need it to be upstream to start applying it extensively). To date it has found about 50 bugs. Bugs that we've found in upstream kernel are listed in [5]. We've also found ~20 bugs in out internal version of the kernel. Also people from Samsung and Oracle have found some. [...] As others noted, the main feature of AddressSanitizer is its performance due to inline compiler instrumentation and simple linear shadow memory. User-space Asan has ~2x slowdown on computational programs and ~2x memory consumption increase. Taking into account that kernel usually consumes only small fraction of CPU and memory when running real user-space programs, I would expect that kernel Asan will have ~10-30% slowdown and similar memory consumption increase (when we finish all tuning). I agree that Asan can well replace kmemcheck. We have plans to start working on Kernel MemorySanitizer that finds uses of unitialized memory. Asan+Msan will provide feature-parity with kmemcheck. As others noted, Asan will unlikely replace debug slab and pagealloc that can be enabled at runtime. Asan uses compiler instrumentation, so even if it is disabled, it still incurs visible overheads. Asan technology is easily portable to other architectures. Compiler instrumentation is fully portable. Runtime has some arch-dependent parts like shadow mapping and atomic operation interception. They are relatively easy to port." Comparison with other debugging features: ======================================== KMEMCHECK: - KASan can do almost everything that kmemcheck can. KASan uses compile-time instrumentation, which makes it significantly faster than kmemcheck. The only advantage of kmemcheck over KASan is detection of uninitialized memory reads. Some brief performance testing showed that kasan could be x500-x600 times faster than kmemcheck: $ netperf -l 30 MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to localhost (127.0.0.1) port 0 AF_INET Recv Send Send Socket Socket Message Elapsed Size Size Size Time Throughput bytes bytes bytes secs. 10^6bits/sec no debug: 87380 16384 16384 30.00 41624.72 kasan inline: 87380 16384 16384 30.00 12870.54 kasan outline: 87380 16384 16384 30.00 10586.39 kmemcheck: 87380 16384 16384 30.03 20.23 - Also kmemcheck couldn't work on several CPUs. It always sets number of CPUs to 1. KASan doesn't have such limitation. DEBUG_PAGEALLOC: - KASan is slower than DEBUG_PAGEALLOC, but KASan works on sub-page granularity level, so it able to find more bugs. SLUB_DEBUG (poisoning, redzones): - SLUB_DEBUG has lower overhead than KASan. - SLUB_DEBUG in most cases are not able to detect bad reads, KASan able to detect both reads and writes. - In some cases (e.g. redzone overwritten) SLUB_DEBUG detect bugs only on allocation/freeing of object. KASan catch bugs right before it will happen, so we always know exact place of first bad read/write. [1] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel [2] https://code.google.com/p/address-sanitizer/wiki/FoundBugs [3] https://code.google.com/p/thread-sanitizer/wiki/FoundBugs [4] https://code.google.com/p/memory-sanitizer/wiki/FoundBugs [5] https://code.google.com/p/address-sanitizer/wiki/AddressSanitizerForKernel#Trophies Based on work by Andrey Konovalov. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Acked-by: Michal Marek <mmarek@suse.cz> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- Documentation/kasan.txt | 170 +++++++++++++++ Makefile | 3 +- drivers/firmware/efi/libstub/Makefile | 1 + include/linux/kasan.h | 46 ++++ include/linux/sched.h | 3 + lib/Kconfig.debug | 2 + lib/Kconfig.kasan | 43 ++++ mm/Makefile | 1 + mm/kasan/Makefile | 8 + mm/kasan/kasan.c | 302 ++++++++++++++++++++++++++ mm/kasan/kasan.h | 34 +++ mm/kasan/report.c | 209 ++++++++++++++++++ scripts/Makefile.kasan | 24 ++ scripts/Makefile.lib | 10 + 14 files changed, 855 insertions(+), 1 deletion(-) create mode 100644 Documentation/kasan.txt create mode 100644 include/linux/kasan.h create mode 100644 lib/Kconfig.kasan create mode 100644 mm/kasan/Makefile create mode 100644 mm/kasan/kasan.c create mode 100644 mm/kasan/kasan.h create mode 100644 mm/kasan/report.c create mode 100644 scripts/Makefile.kasan diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt new file mode 100644 index 000000000000..f0645a8a992f --- /dev/null +++ b/Documentation/kasan.txt @@ -0,0 +1,170 @@ +Kernel address sanitizer +================ + +0. Overview +=========== + +Kernel Address sanitizer (KASan) is a dynamic memory error detector. It provides +a fast and comprehensive solution for finding use-after-free and out-of-bounds +bugs. + +KASan uses compile-time instrumentation for checking every memory access, +therefore you will need a certain version of GCC >= 4.9.2 + +Currently KASan is supported only for x86_64 architecture and requires that the +kernel be built with the SLUB allocator. + +1. Usage +========= + +To enable KASAN configure kernel with: + + CONFIG_KASAN = y + +and choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. Outline/inline +is compiler instrumentation types. The former produces smaller binary the +latter is 1.1 - 2 times faster. Inline instrumentation requires GCC 5.0 or +latter. + +Currently KASAN works only with the SLUB memory allocator. +For better bug detection and nicer report, enable CONFIG_STACKTRACE and put +at least 'slub_debug=U' in the boot cmdline. + +To disable instrumentation for specific files or directories, add a line +similar to the following to the respective kernel Makefile: + + For a single file (e.g. main.o): + KASAN_SANITIZE_main.o := n + + For all files in one directory: + KASAN_SANITIZE := n + +1.1 Error reports +========== + +A typical out of bounds access report looks like this: + +================================================================== +BUG: AddressSanitizer: out of bounds access in kmalloc_oob_right+0x65/0x75 [test_kasan] at addr ffff8800693bc5d3 +Write of size 1 by task modprobe/1689 +============================================================================= +BUG kmalloc-128 (Not tainted): kasan error +----------------------------------------------------------------------------- + +Disabling lock debugging due to kernel taint +INFO: Allocated in kmalloc_oob_right+0x3d/0x75 [test_kasan] age=0 cpu=0 pid=1689 + __slab_alloc+0x4b4/0x4f0 + kmem_cache_alloc_trace+0x10b/0x190 + kmalloc_oob_right+0x3d/0x75 [test_kasan] + init_module+0x9/0x47 [test_kasan] + do_one_initcall+0x99/0x200 + load_module+0x2cb3/0x3b20 + SyS_finit_module+0x76/0x80 + system_call_fastpath+0x12/0x17 +INFO: Slab 0xffffea0001a4ef00 objects=17 used=7 fp=0xffff8800693bd728 flags=0x100000000004080 +INFO: Object 0xffff8800693bc558 @offset=1368 fp=0xffff8800693bc720 + +Bytes b4 ffff8800693bc548: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ +Object ffff8800693bc558: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc568: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc578: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc588: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc598: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc5a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc5b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk +Object ffff8800693bc5c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk. +Redzone ffff8800693bc5d8: cc cc cc cc cc cc cc cc ........ +Padding ffff8800693bc718: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ +CPU: 0 PID: 1689 Comm: modprobe Tainted: G B 3.18.0-rc1-mm1+ #98 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014 + ffff8800693bc000 0000000000000000 ffff8800693bc558 ffff88006923bb78 + ffffffff81cc68ae 00000000000000f3 ffff88006d407600 ffff88006923bba8 + ffffffff811fd848 ffff88006d407600 ffffea0001a4ef00 ffff8800693bc558 +Call Trace: + [<ffffffff81cc68ae>] dump_stack+0x46/0x58 + [<ffffffff811fd848>] print_trailer+0xf8/0x160 + [<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan] + [<ffffffff811ff0f5>] object_err+0x35/0x40 + [<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan] + [<ffffffff8120b9fa>] kasan_report_error+0x38a/0x3f0 + [<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40 + [<ffffffff8120b344>] ? kasan_unpoison_shadow+0x14/0x40 + [<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40 + [<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan] + [<ffffffff8120a995>] __asan_store1+0x75/0xb0 + [<ffffffffa0002601>] ? kmem_cache_oob+0x1d/0xc3 [test_kasan] + [<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan] + [<ffffffffa0002065>] kmalloc_oob_right+0x65/0x75 [test_kasan] + [<ffffffffa00026b0>] init_module+0x9/0x47 [test_kasan] + [<ffffffff810002d9>] do_one_initcall+0x99/0x200 + [<ffffffff811e4e5c>] ? __vunmap+0xec/0x160 + [<ffffffff81114f63>] load_module+0x2cb3/0x3b20 + [<ffffffff8110fd70>] ? m_show+0x240/0x240 + [<ffffffff81115f06>] SyS_finit_module+0x76/0x80 + [<ffffffff81cd3129>] system_call_fastpath+0x12/0x17 +Memory state around the buggy address: + ffff8800693bc300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800693bc380: fc fc 00 00 00 00 00 00 00 00 00 00 00 00 00 fc + ffff8800693bc400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800693bc480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800693bc500: fc fc fc fc fc fc fc fc fc fc fc 00 00 00 00 00 +>ffff8800693bc580: 00 00 00 00 00 00 00 00 00 00 03 fc fc fc fc fc + ^ + ffff8800693bc600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800693bc680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8800693bc700: fc fc fc fc fb fb fb fb fb fb fb fb fb fb fb fb + ffff8800693bc780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8800693bc800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +================================================================== + +First sections describe slub object where bad access happened. +See 'SLUB Debug output' section in Documentation/vm/slub.txt for details. + +In the last section the report shows memory state around the accessed address. +Reading this part requires some more understanding of how KASAN works. + +Each 8 bytes of memory are encoded in one shadow byte as accessible, +partially accessible, freed or they can be part of a redzone. +We use the following encoding for each shadow byte: 0 means that all 8 bytes +of the corresponding memory region are accessible; number N (1 <= N <= 7) means +that the first N bytes are accessible, and other (8 - N) bytes are not; +any negative value indicates that the entire 8-byte word is inaccessible. +We use different negative values to distinguish between different kinds of +inaccessible memory like redzones or freed memory (see mm/kasan/kasan.h). + +In the report above the arrows point to the shadow byte 03, which means that +the accessed address is partially accessible. + + +2. Implementation details +======================== + +From a high level, our approach to memory error detection is similar to that +of kmemcheck: use shadow memory to record whether each byte of memory is safe +to access, and use compile-time instrumentation to check shadow memory on each +memory access. + +AddressSanitizer dedicates 1/8 of kernel memory to its shadow memory +(e.g. 16TB to cover 128TB on x86_64) and uses direct mapping with a scale and +offset to translate a memory address to its corresponding shadow address. + +Here is the function witch translate an address to its corresponding shadow +address: + +static inline void *kasan_mem_to_shadow(const void *addr) +{ + return ((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + + KASAN_SHADOW_OFFSET; +} + +where KASAN_SHADOW_SCALE_SHIFT = 3. + +Compile-time instrumentation used for checking memory accesses. Compiler inserts +function calls (__asan_load*(addr), __asan_store*(addr)) before each memory +access of size 1, 2, 4, 8 or 16. These functions check whether memory access is +valid or not by checking corresponding shadow memory. + +GCC 5.0 has possibility to perform inline instrumentation. Instead of making +function calls GCC directly inserts the code to check the shadow memory. +This option significantly enlarges kernel but it gives x1.1-x2 performance +boost over outline instrumented kernel. diff --git a/Makefile b/Makefile index 5fa2e3035509..33cb15efd257 100644 --- a/Makefile +++ b/Makefile @@ -423,7 +423,7 @@ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS -export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV +export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE CFLAGS_GCOV CFLAGS_KASAN export KBUILD_AFLAGS AFLAGS_KERNEL AFLAGS_MODULE export KBUILD_AFLAGS_MODULE KBUILD_CFLAGS_MODULE KBUILD_LDFLAGS_MODULE export KBUILD_AFLAGS_KERNEL KBUILD_CFLAGS_KERNEL @@ -781,6 +781,7 @@ ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO endif +include $(srctree)/scripts/Makefile.kasan include $(srctree)/scripts/Makefile.extrawarn # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 8902f52e0998..280bc0a63365 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -19,6 +19,7 @@ KBUILD_CFLAGS := $(cflags-y) \ $(call cc-option,-fno-stack-protector) GCOV_PROFILE := n +KASAN_SANITIZE := n lib-y := efi-stub-helper.o lib-$(CONFIG_EFI_ARMSTUB) += arm-stub.o fdt.o diff --git a/include/linux/kasan.h b/include/linux/kasan.h new file mode 100644 index 000000000000..9102fda60def --- /dev/null +++ b/include/linux/kasan.h @@ -0,0 +1,46 @@ +#ifndef _LINUX_KASAN_H +#define _LINUX_KASAN_H + +#include <linux/types.h> + +struct kmem_cache; +struct page; + +#ifdef CONFIG_KASAN + +#define KASAN_SHADOW_SCALE_SHIFT 3 +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) + +#include <asm/kasan.h> +#include <linux/sched.h> + +static inline void *kasan_mem_to_shadow(const void *addr) +{ + return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + + KASAN_SHADOW_OFFSET; +} + +/* Enable reporting bugs after kasan_disable_current() */ +static inline void kasan_enable_current(void) +{ + current->kasan_depth++; +} + +/* Disable reporting bugs for current task */ +static inline void kasan_disable_current(void) +{ + current->kasan_depth--; +} + +void kasan_unpoison_shadow(const void *address, size_t size); + +#else /* CONFIG_KASAN */ + +static inline void kasan_unpoison_shadow(const void *address, size_t size) {} + +static inline void kasan_enable_current(void) {} +static inline void kasan_disable_current(void) {} + +#endif /* CONFIG_KASAN */ + +#endif /* LINUX_KASAN_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 048b91b983ed..41c60e5302d7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1664,6 +1664,9 @@ struct task_struct { unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; +#ifdef CONFIG_KASAN + unsigned int kasan_depth; +#endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack */ int curr_ret_stack; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 79a9bb67aeaf..ecb3516f6546 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -651,6 +651,8 @@ config DEBUG_STACKOVERFLOW source "lib/Kconfig.kmemcheck" +source "lib/Kconfig.kasan" + endmenu # "Memory Debugging" config DEBUG_SHIRQ diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan new file mode 100644 index 000000000000..e5b3fbe5560f --- /dev/null +++ b/lib/Kconfig.kasan @@ -0,0 +1,43 @@ +config HAVE_ARCH_KASAN + bool + +if HAVE_ARCH_KASAN + +config KASAN + bool "KASan: runtime memory debugger" + help + Enables kernel address sanitizer - runtime memory debugger, + designed to find out-of-bounds accesses and use-after-free bugs. + This is strictly debugging feature. It consumes about 1/8 + of available memory and brings about ~x3 performance slowdown. + For better error detection enable CONFIG_STACKTRACE, + and add slub_debug=U to boot cmdline. + +config KASAN_SHADOW_OFFSET + hex + +choice + prompt "Instrumentation type" + depends on KASAN + default KASAN_OUTLINE + +config KASAN_OUTLINE + bool "Outline instrumentation" + help + Before every memory access compiler insert function call + __asan_load*/__asan_store*. These functions performs check + of shadow memory. This is slower than inline instrumentation, + however it doesn't bloat size of kernel's .text section so + much as inline does. + +config KASAN_INLINE + bool "Inline instrumentation" + help + Compiler directly inserts code checking shadow memory before + memory accesses. This is faster than outline (in some workloads + it gives about x2 boost over outline instrumentation), but + make kernel's .text size much bigger. + +endchoice + +endif diff --git a/mm/Makefile b/mm/Makefile index 3548460ab7b6..930b52df4aca 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -49,6 +49,7 @@ obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_SLUB) += slub.o obj-$(CONFIG_KMEMCHECK) += kmemcheck.o +obj-$(CONFIG_KASAN) += kasan/ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile new file mode 100644 index 000000000000..bd837b8c2f41 --- /dev/null +++ b/mm/kasan/Makefile @@ -0,0 +1,8 @@ +KASAN_SANITIZE := n + +CFLAGS_REMOVE_kasan.o = -pg +# Function splitter causes unnecessary splits in __asan_load1/__asan_store1 +# see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533 +CFLAGS_kasan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector) + +obj-y := kasan.o report.o diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c new file mode 100644 index 000000000000..6dc1aa7cefcc --- /dev/null +++ b/mm/kasan/kasan.c @@ -0,0 +1,302 @@ +/* + * This file contains shadow memory manipulation code. + * + * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin <a.ryabinin@samsung.com> + * + * Some of code borrowed from https://github.com/xairy/linux by + * Andrey Konovalov <adech.fo@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#define DISABLE_BRANCH_PROFILING + +#include <linux/export.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/memblock.h> +#include <linux/mm.h> +#include <linux/printk.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/stacktrace.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/kasan.h> + +#include "kasan.h" + +/* + * Poisons the shadow memory for 'size' bytes starting from 'addr'. + * Memory addresses should be aligned to KASAN_SHADOW_SCALE_SIZE. + */ +static void kasan_poison_shadow(const void *address, size_t size, u8 value) +{ + void *shadow_start, *shadow_end; + + shadow_start = kasan_mem_to_shadow(address); + shadow_end = kasan_mem_to_shadow(address + size); + + memset(shadow_start, value, shadow_end - shadow_start); +} + +void kasan_unpoison_shadow(const void *address, size_t size) +{ + kasan_poison_shadow(address, size, 0); + + if (size & KASAN_SHADOW_MASK) { + u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size); + *shadow = size & KASAN_SHADOW_MASK; + } +} + + +/* + * All functions below always inlined so compiler could + * perform better optimizations in each of __asan_loadX/__assn_storeX + * depending on memory access size X. + */ + +static __always_inline bool memory_is_poisoned_1(unsigned long addr) +{ + s8 shadow_value = *(s8 *)kasan_mem_to_shadow((void *)addr); + + if (unlikely(shadow_value)) { + s8 last_accessible_byte = addr & KASAN_SHADOW_MASK; + return unlikely(last_accessible_byte >= shadow_value); + } + + return false; +} + +static __always_inline bool memory_is_poisoned_2(unsigned long addr) +{ + u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr); + + if (unlikely(*shadow_addr)) { + if (memory_is_poisoned_1(addr + 1)) + return true; + + if (likely(((addr + 1) & KASAN_SHADOW_MASK) != 0)) + return false; + + return unlikely(*(u8 *)shadow_addr); + } + + return false; +} + +static __always_inline bool memory_is_poisoned_4(unsigned long addr) +{ + u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr); + + if (unlikely(*shadow_addr)) { + if (memory_is_poisoned_1(addr + 3)) + return true; + + if (likely(((addr + 3) & KASAN_SHADOW_MASK) >= 3)) + return false; + + return unlikely(*(u8 *)shadow_addr); + } + + return false; +} + +static __always_inline bool memory_is_poisoned_8(unsigned long addr) +{ + u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr); + + if (unlikely(*shadow_addr)) { + if (memory_is_poisoned_1(addr + 7)) + return true; + + if (likely(((addr + 7) & KASAN_SHADOW_MASK) >= 7)) + return false; + + return unlikely(*(u8 *)shadow_addr); + } + + return false; +} + +static __always_inline bool memory_is_poisoned_16(unsigned long addr) +{ + u32 *shadow_addr = (u32 *)kasan_mem_to_shadow((void *)addr); + + if (unlikely(*shadow_addr)) { + u16 shadow_first_bytes = *(u16 *)shadow_addr; + s8 last_byte = (addr + 15) & KASAN_SHADOW_MASK; + + if (unlikely(shadow_first_bytes)) + return true; + + if (likely(!last_byte)) + return false; + + return memory_is_poisoned_1(addr + 15); + } + + return false; +} + +static __always_inline unsigned long bytes_is_zero(const u8 *start, + size_t size) +{ + while (size) { + if (unlikely(*start)) + return (unsigned long)start; + start++; + size--; + } + + return 0; +} + +static __always_inline unsigned long memory_is_zero(const void *start, + const void *end) +{ + unsigned int words; + unsigned long ret; + unsigned int prefix = (unsigned long)start % 8; + + if (end - start <= 16) + return bytes_is_zero(start, end - start); + + if (prefix) { + prefix = 8 - prefix; + ret = bytes_is_zero(start, prefix); + if (unlikely(ret)) + return ret; + start += prefix; + } + + words = (end - start) / 8; + while (words) { + if (unlikely(*(u64 *)start)) + return bytes_is_zero(start, 8); + start += 8; + words--; + } + + return bytes_is_zero(start, (end - start) % 8); +} + +static __always_inline bool memory_is_poisoned_n(unsigned long addr, + size_t size) +{ + unsigned long ret; + + ret = memory_is_zero(kasan_mem_to_shadow((void *)addr), + kasan_mem_to_shadow((void *)addr + size - 1) + 1); + + if (unlikely(ret)) { + unsigned long last_byte = addr + size - 1; + s8 *last_shadow = (s8 *)kasan_mem_to_shadow((void *)last_byte); + + if (unlikely(ret != (unsigned long)last_shadow || + ((last_byte & KASAN_SHADOW_MASK) >= *last_shadow))) + return true; + } + return false; +} + +static __always_inline bool memory_is_poisoned(unsigned long addr, size_t size) +{ + if (__builtin_constant_p(size)) { + switch (size) { + case 1: + return memory_is_poisoned_1(addr); + case 2: + return memory_is_poisoned_2(addr); + case 4: + return memory_is_poisoned_4(addr); + case 8: + return memory_is_poisoned_8(addr); + case 16: + return memory_is_poisoned_16(addr); + default: + BUILD_BUG(); + } + } + + return memory_is_poisoned_n(addr, size); +} + + +static __always_inline void check_memory_region(unsigned long addr, + size_t size, bool write) +{ + struct kasan_access_info info; + + if (unlikely(size == 0)) + return; + + if (unlikely((void *)addr < + kasan_shadow_to_mem((void *)KASAN_SHADOW_START))) { + info.access_addr = (void *)addr; + info.access_size = size; + info.is_write = write; + info.ip = _RET_IP_; + kasan_report_user_access(&info); + return; + } + + if (likely(!memory_is_poisoned(addr, size))) + return; + + kasan_report(addr, size, write, _RET_IP_); +} + +#define DEFINE_ASAN_LOAD_STORE(size) \ + void __asan_load##size(unsigned long addr) \ + { \ + check_memory_region(addr, size, false); \ + } \ + EXPORT_SYMBOL(__asan_load##size); \ + __alias(__asan_load##size) \ + void __asan_load##size##_noabort(unsigned long); \ + EXPORT_SYMBOL(__asan_load##size##_noabort); \ + void __asan_store##size(unsigned long addr) \ + { \ + check_memory_region(addr, size, true); \ + } \ + EXPORT_SYMBOL(__asan_store##size); \ + __alias(__asan_store##size) \ + void __asan_store##size##_noabort(unsigned long); \ + EXPORT_SYMBOL(__asan_store##size##_noabort) + +DEFINE_ASAN_LOAD_STORE(1); +DEFINE_ASAN_LOAD_STORE(2); +DEFINE_ASAN_LOAD_STORE(4); +DEFINE_ASAN_LOAD_STORE(8); +DEFINE_ASAN_LOAD_STORE(16); + +void __asan_loadN(unsigned long addr, size_t size) +{ + check_memory_region(addr, size, false); +} +EXPORT_SYMBOL(__asan_loadN); + +__alias(__asan_loadN) +void __asan_loadN_noabort(unsigned long, size_t); +EXPORT_SYMBOL(__asan_loadN_noabort); + +void __asan_storeN(unsigned long addr, size_t size) +{ + check_memory_region(addr, size, true); +} +EXPORT_SYMBOL(__asan_storeN); + +__alias(__asan_storeN) +void __asan_storeN_noabort(unsigned long, size_t); +EXPORT_SYMBOL(__asan_storeN_noabort); + +/* to shut up compiler complaints */ +void __asan_handle_no_return(void) {} +EXPORT_SYMBOL(__asan_handle_no_return); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h new file mode 100644 index 000000000000..648b9c006f3f --- /dev/null +++ b/mm/kasan/kasan.h @@ -0,0 +1,34 @@ +#ifndef __MM_KASAN_KASAN_H +#define __MM_KASAN_KASAN_H + +#include <linux/kasan.h> + +#define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) +#define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1) + +struct kasan_access_info { + const void *access_addr; + const void *first_bad_addr; + size_t access_size; + bool is_write; + unsigned long ip; +}; + +void kasan_report_error(struct kasan_access_info *info); +void kasan_report_user_access(struct kasan_access_info *info); + +static inline const void *kasan_shadow_to_mem(const void *shadow_addr) +{ + return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET) + << KASAN_SHADOW_SCALE_SHIFT); +} + +static inline bool kasan_enabled(void) +{ + return !current->kasan_depth; +} + +void kasan_report(unsigned long addr, size_t size, + bool is_write, unsigned long ip); + +#endif diff --git a/mm/kasan/report.c b/mm/kasan/report.c new file mode 100644 index 000000000000..5835d69563f5 --- /dev/null +++ b/mm/kasan/report.c @@ -0,0 +1,209 @@ +/* + * This file contains error reporting code. + * + * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin <a.ryabinin@samsung.com> + * + * Some of code borrowed from https://github.com/xairy/linux by + * Andrey Konovalov <adech.fo@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/printk.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/stacktrace.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/kasan.h> + +#include "kasan.h" + +/* Shadow layout customization. */ +#define SHADOW_BYTES_PER_BLOCK 1 +#define SHADOW_BLOCKS_PER_ROW 16 +#define SHADOW_BYTES_PER_ROW (SHADOW_BLOCKS_PER_ROW * SHADOW_BYTES_PER_BLOCK) +#define SHADOW_ROWS_AROUND_ADDR 2 + +static const void *find_first_bad_addr(const void *addr, size_t size) +{ + u8 shadow_val = *(u8 *)kasan_mem_to_shadow(addr); + const void *first_bad_addr = addr; + + while (!shadow_val && first_bad_addr < addr + size) { + first_bad_addr += KASAN_SHADOW_SCALE_SIZE; + shadow_val = *(u8 *)kasan_mem_to_shadow(first_bad_addr); + } + return first_bad_addr; +} + +static void print_error_description(struct kasan_access_info *info) +{ + const char *bug_type = "unknown crash"; + u8 shadow_val; + + info->first_bad_addr = find_first_bad_addr(info->access_addr, + info->access_size); + + shadow_val = *(u8 *)kasan_mem_to_shadow(info->first_bad_addr); + + switch (shadow_val) { + case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: + bug_type = "out of bounds access"; + break; + } + + pr_err("BUG: KASan: %s in %pS at addr %p\n", + bug_type, (void *)info->ip, + info->access_addr); + pr_err("%s of size %zu by task %s/%d\n", + info->is_write ? "Write" : "Read", + info->access_size, current->comm, task_pid_nr(current)); +} + +static void print_address_description(struct kasan_access_info *info) +{ + dump_stack(); +} + +static bool row_is_guilty(const void *row, const void *guilty) +{ + return (row <= guilty) && (guilty < row + SHADOW_BYTES_PER_ROW); +} + +static int shadow_pointer_offset(const void *row, const void *shadow) +{ + /* The length of ">ff00ff00ff00ff00: " is + * 3 + (BITS_PER_LONG/8)*2 chars. + */ + return 3 + (BITS_PER_LONG/8)*2 + (shadow - row)*2 + + (shadow - row) / SHADOW_BYTES_PER_BLOCK + 1; +} + +static void print_shadow_for_address(const void *addr) +{ + int i; + const void *shadow = kasan_mem_to_shadow(addr); + const void *shadow_row; + + shadow_row = (void *)round_down((unsigned long)shadow, + SHADOW_BYTES_PER_ROW) + - SHADOW_ROWS_AROUND_ADDR * SHADOW_BYTES_PER_ROW; + + pr_err("Memory state around the buggy address:\n"); + + for (i = -SHADOW_ROWS_AROUND_ADDR; i <= SHADOW_ROWS_AROUND_ADDR; i++) { + const void *kaddr = kasan_shadow_to_mem(shadow_row); + char buffer[4 + (BITS_PER_LONG/8)*2]; + + snprintf(buffer, sizeof(buffer), + (i == 0) ? ">%p: " : " %p: ", kaddr); + + kasan_disable_current(); + print_hex_dump(KERN_ERR, buffer, + DUMP_PREFIX_NONE, SHADOW_BYTES_PER_ROW, 1, + shadow_row, SHADOW_BYTES_PER_ROW, 0); + kasan_enable_current(); + + if (row_is_guilty(shadow_row, shadow)) + pr_err("%*c\n", + shadow_pointer_offset(shadow_row, shadow), + '^'); + + shadow_row += SHADOW_BYTES_PER_ROW; + } +} + +static DEFINE_SPINLOCK(report_lock); + +void kasan_report_error(struct kasan_access_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(&report_lock, flags); + pr_err("=================================" + "=================================\n"); + print_error_description(info); + print_address_description(info); + print_shadow_for_address(info->first_bad_addr); + pr_err("=================================" + "=================================\n"); + spin_unlock_irqrestore(&report_lock, flags); +} + +void kasan_report_user_access(struct kasan_access_info *info) +{ + unsigned long flags; + + spin_lock_irqsave(&report_lock, flags); + pr_err("=================================" + "=================================\n"); + pr_err("BUG: KASan: user-memory-access on address %p\n", + info->access_addr); + pr_err("%s of size %zu by task %s/%d\n", + info->is_write ? "Write" : "Read", + info->access_size, current->comm, task_pid_nr(current)); + dump_stack(); + pr_err("=================================" + "=================================\n"); + spin_unlock_irqrestore(&report_lock, flags); +} + +void kasan_report(unsigned long addr, size_t size, + bool is_write, unsigned long ip) +{ + struct kasan_access_info info; + + if (likely(!kasan_enabled())) + return; + + info.access_addr = (void *)addr; + info.access_size = size; + info.is_write = is_write; + info.ip = ip; + kasan_report_error(&info); +} + + +#define DEFINE_ASAN_REPORT_LOAD(size) \ +void __asan_report_load##size##_noabort(unsigned long addr) \ +{ \ + kasan_report(addr, size, false, _RET_IP_); \ +} \ +EXPORT_SYMBOL(__asan_report_load##size##_noabort) + +#define DEFINE_ASAN_REPORT_STORE(size) \ +void __asan_report_store##size##_noabort(unsigned long addr) \ +{ \ + kasan_report(addr, size, true, _RET_IP_); \ +} \ +EXPORT_SYMBOL(__asan_report_store##size##_noabort) + +DEFINE_ASAN_REPORT_LOAD(1); +DEFINE_ASAN_REPORT_LOAD(2); +DEFINE_ASAN_REPORT_LOAD(4); +DEFINE_ASAN_REPORT_LOAD(8); +DEFINE_ASAN_REPORT_LOAD(16); +DEFINE_ASAN_REPORT_STORE(1); +DEFINE_ASAN_REPORT_STORE(2); +DEFINE_ASAN_REPORT_STORE(4); +DEFINE_ASAN_REPORT_STORE(8); +DEFINE_ASAN_REPORT_STORE(16); + +void __asan_report_load_n_noabort(unsigned long addr, size_t size) +{ + kasan_report(addr, size, false, _RET_IP_); +} +EXPORT_SYMBOL(__asan_report_load_n_noabort); + +void __asan_report_store_n_noabort(unsigned long addr, size_t size) +{ + kasan_report(addr, size, true, _RET_IP_); +} +EXPORT_SYMBOL(__asan_report_store_n_noabort); diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan new file mode 100644 index 000000000000..7acd6faa0335 --- /dev/null +++ b/scripts/Makefile.kasan @@ -0,0 +1,24 @@ +ifdef CONFIG_KASAN +ifdef CONFIG_KASAN_INLINE + call_threshold := 10000 +else + call_threshold := 0 +endif + +CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address + +CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \ + -fasan-shadow-offset=$(CONFIG_KASAN_SHADOW_OFFSET) \ + --param asan-instrumentation-with-call-threshold=$(call_threshold)) + +ifeq ($(call cc-option, $(CFLAGS_KASAN_MINIMAL) -Werror),) + $(warning Cannot use CONFIG_KASAN: \ + -fsanitize=kernel-address is not supported by compiler) +else + ifeq ($(CFLAGS_KASAN),) + $(warning CONFIG_KASAN: compiler does not support all options.\ + Trying minimal configuration) + CFLAGS_KASAN := $(CFLAGS_KASAN_MINIMAL) + endif +endif +endif diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 511755200634..044eb4f89a91 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -119,6 +119,16 @@ _c_flags += $(if $(patsubst n%,, \ $(CFLAGS_GCOV)) endif +# +# Enable address sanitizer flags for kernel except some files or directories +# we don't want to check (depends on variables KASAN_SANITIZE_obj.o, KASAN_SANITIZE) +# +ifeq ($(CONFIG_KASAN),y) +_c_flags += $(if $(patsubst n%,, \ + $(KASAN_SANITIZE_$(basetarget).o)$(KASAN_SANITIZE)y), \ + $(CFLAGS_KASAN)) +endif + # If building the kernel in a separate objtree expand all occurrences # of -Idir to -I$(srctree)/dir except for absolute paths (starting with '/'). From 786a8959912eb94fc2381c2ae487a96ce55dabca Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:39:21 -0800 Subject: [PATCH 072/108] kasan: disable memory hotplug Currently memory hotplug won't work with KASan. As we don't have shadow for hotplugged memory, kernel will crash on the first access to it. To make this work we will need to allocate shadow for new memory. At some future point proper memory hotplug support will be implemented. Until then, print a warning at startup and disable memory hot-add. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/kasan/kasan.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 6dc1aa7cefcc..def81104772f 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -20,6 +20,7 @@ #include <linux/init.h> #include <linux/kernel.h> #include <linux/memblock.h> +#include <linux/memory.h> #include <linux/mm.h> #include <linux/printk.h> #include <linux/sched.h> @@ -300,3 +301,23 @@ EXPORT_SYMBOL(__asan_storeN_noabort); /* to shut up compiler complaints */ void __asan_handle_no_return(void) {} EXPORT_SYMBOL(__asan_handle_no_return); + +#ifdef CONFIG_MEMORY_HOTPLUG +static int kasan_mem_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + return (action == MEM_GOING_ONLINE) ? NOTIFY_BAD : NOTIFY_OK; +} + +static int __init kasan_memhotplug_init(void) +{ + pr_err("WARNING: KASan doesn't support memory hot-add\n"); + pr_err("Memory hot-add will be disabled\n"); + + hotplug_memory_notifier(kasan_mem_notifier, 0); + + return 0; +} + +module_init(kasan_memhotplug_init); +#endif From ef7f0d6a6ca8c9e4b27d78895af86c2fbfaeedb2 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:39:25 -0800 Subject: [PATCH 073/108] x86_64: add KASan support This patch adds arch specific code for kernel address sanitizer. 16TB of virtual addressed used for shadow memory. It's located in range [ffffec0000000000 - fffffc0000000000] between vmemmap and %esp fixup stacks. At early stage we map whole shadow region with zero page. Latter, after pages mapped to direct mapping address range we unmap zero pages from corresponding shadow (see kasan_map_shadow()) and allocate and map a real shadow memory reusing vmemmap_populate() function. Also replace __pa with __pa_nodebug before shadow initialized. __pa with CONFIG_DEBUG_VIRTUAL=y make external function call (__phys_addr) __phys_addr is instrumented, so __asan_load could be called before shadow area initialized. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Jim Davis <jim.epost@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- Documentation/x86/x86_64/mm.txt | 2 + arch/x86/Kconfig | 1 + arch/x86/boot/Makefile | 2 + arch/x86/boot/compressed/Makefile | 2 + arch/x86/include/asm/kasan.h | 31 +++++ arch/x86/kernel/Makefile | 2 + arch/x86/kernel/dumpstack.c | 5 +- arch/x86/kernel/head64.c | 9 +- arch/x86/kernel/head_64.S | 30 +++++ arch/x86/kernel/setup.c | 3 + arch/x86/mm/Makefile | 3 + arch/x86/mm/kasan_init_64.c | 199 ++++++++++++++++++++++++++++++ arch/x86/realmode/Makefile | 2 +- arch/x86/realmode/rm/Makefile | 1 + arch/x86/vdso/Makefile | 1 + lib/Kconfig.kasan | 1 + 16 files changed, 290 insertions(+), 4 deletions(-) create mode 100644 arch/x86/include/asm/kasan.h create mode 100644 arch/x86/mm/kasan_init_64.c diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt index 052ee643a32e..05712ac83e38 100644 --- a/Documentation/x86/x86_64/mm.txt +++ b/Documentation/x86/x86_64/mm.txt @@ -12,6 +12,8 @@ ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB) ... unused hole ... +ffffec0000000000 - fffffc0000000000 (=44 bits) kasan shadow memory (16TB) +... unused hole ... ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ... unused hole ... ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0 diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 33ce9a344e38..eb1cf898ed3c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -85,6 +85,7 @@ config X86 select HAVE_CMPXCHG_LOCAL select HAVE_CMPXCHG_DOUBLE select HAVE_ARCH_KMEMCHECK + select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP select HAVE_USER_RETURN_NOTIFIER select ARCH_BINFMT_ELF_RANDOMIZE_PIE select HAVE_ARCH_JUMP_LABEL diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 3db07f30636f..57bbf2fb21f6 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -14,6 +14,8 @@ # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode. # The number is the same as you would ordinarily press at bootup. +KASAN_SANITIZE := n + SVGA_MODE := -DSVGA_MODE=NORMAL_VGA targets := vmlinux.bin setup.bin setup.elf bzImage diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index ad754b4411f7..843feb3eb20b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -16,6 +16,8 @@ # (see scripts/Makefile.lib size_append) # compressed vmlinux.bin.all + u32 size of vmlinux.bin.all +KASAN_SANITIZE := n + targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h new file mode 100644 index 000000000000..8b22422fbad8 --- /dev/null +++ b/arch/x86/include/asm/kasan.h @@ -0,0 +1,31 @@ +#ifndef _ASM_X86_KASAN_H +#define _ASM_X86_KASAN_H + +/* + * Compiler uses shadow offset assuming that addresses start + * from 0. Kernel addresses don't start from 0, so shadow + * for kernel really starts from compiler's shadow offset + + * 'kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT + */ +#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \ + (0xffff800000000000ULL >> 3)) +/* 47 bits for kernel address -> (47 - 3) bits for shadow */ +#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1ULL << (47 - 3))) + +#ifndef __ASSEMBLY__ + +extern pte_t kasan_zero_pte[]; +extern pte_t kasan_zero_pmd[]; +extern pte_t kasan_zero_pud[]; + +#ifdef CONFIG_KASAN +void __init kasan_map_early_shadow(pgd_t *pgd); +void __init kasan_init(void); +#else +static inline void kasan_map_early_shadow(pgd_t *pgd) { } +static inline void kasan_init(void) { } +#endif + +#endif + +#endif diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 732223496968..b13b70634124 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -16,6 +16,8 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg endif +KASAN_SANITIZE_head$(BITS).o := n + CFLAGS_irq.o := -I$(src)/../include/asm/trace obj-y := process_$(BITS).o signal.o entry_$(BITS).o diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index b74ebc7c4402..cf3df1d8d039 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -265,7 +265,10 @@ int __die(const char *str, struct pt_regs *regs, long err) printk("SMP "); #endif #ifdef CONFIG_DEBUG_PAGEALLOC - printk("DEBUG_PAGEALLOC"); + printk("DEBUG_PAGEALLOC "); +#endif +#ifdef CONFIG_KASAN + printk("KASAN"); #endif printk("\n"); if (notify_die(DIE_OOPS, str, regs, err, diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index eda1a865641e..efcddfaf05f9 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -27,6 +27,7 @@ #include <asm/bios_ebda.h> #include <asm/bootparam_utils.h> #include <asm/microcode.h> +#include <asm/kasan.h> /* * Manage page tables very early on. @@ -46,7 +47,7 @@ static void __init reset_early_page_tables(void) next_early_pgt = 0; - write_cr3(__pa(early_level4_pgt)); + write_cr3(__pa_nodebug(early_level4_pgt)); } /* Create a new PMD entry */ @@ -59,7 +60,7 @@ int __init early_make_pgtable(unsigned long address) pmdval_t pmd, *pmd_p; /* Invalid address or early pgt is done ? */ - if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt)) + if (physaddr >= MAXMEM || read_cr3() != __pa_nodebug(early_level4_pgt)) return -1; again: @@ -158,6 +159,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* Kill off the identity-map trampoline */ reset_early_page_tables(); + kasan_map_early_shadow(early_level4_pgt); + /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); @@ -179,6 +182,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* set init_level4_pgt kernel high mapping*/ init_level4_pgt[511] = early_level4_pgt[511]; + kasan_map_early_shadow(init_level4_pgt); + x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index a468c0a65c42..6fd514d9f69a 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -514,8 +514,38 @@ ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ .quad 0x0000000000000000 +#ifdef CONFIG_KASAN +#define FILL(VAL, COUNT) \ + .rept (COUNT) ; \ + .quad (VAL) ; \ + .endr + +NEXT_PAGE(kasan_zero_pte) + FILL(kasan_zero_page - __START_KERNEL_map + _KERNPG_TABLE, 512) +NEXT_PAGE(kasan_zero_pmd) + FILL(kasan_zero_pte - __START_KERNEL_map + _KERNPG_TABLE, 512) +NEXT_PAGE(kasan_zero_pud) + FILL(kasan_zero_pmd - __START_KERNEL_map + _KERNPG_TABLE, 512) + +#undef FILL +#endif + + #include "../../x86/xen/xen-head.S" __PAGE_ALIGNED_BSS NEXT_PAGE(empty_zero_page) .skip PAGE_SIZE + +#ifdef CONFIG_KASAN +/* + * This page used as early shadow. We don't use empty_zero_page + * at early stages, stack instrumentation could write some garbage + * to this page. + * Latter we reuse it as zero shadow for large ranges of memory + * that allowed to access, but not instrumented by kasan + * (vmalloc/vmemmap ...). + */ +NEXT_PAGE(kasan_zero_page) + .skip PAGE_SIZE +#endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c4648adadd7d..27d200929864 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -89,6 +89,7 @@ #include <asm/cacheflush.h> #include <asm/processor.h> #include <asm/bugs.h> +#include <asm/kasan.h> #include <asm/vsyscall.h> #include <asm/cpu.h> @@ -1174,6 +1175,8 @@ void __init setup_arch(char **cmdline_p) x86_init.paging.pagetable_init(); + kasan_init(); + if (boot_cpu_data.cpuid_level >= 0) { /* A CPU has %cr4 if and only if it has CPUID */ mmu_cr4_features = read_cr4(); diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index ecfdc46a024a..c4cc74006c61 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -20,6 +20,9 @@ obj-$(CONFIG_HIGHMEM) += highmem_32.o obj-$(CONFIG_KMEMCHECK) += kmemcheck/ +KASAN_SANITIZE_kasan_init_$(BITS).o := n +obj-$(CONFIG_KASAN) += kasan_init_$(BITS).o + obj-$(CONFIG_MMIOTRACE) += mmiotrace.o mmiotrace-y := kmmio.o pf_in.o mmio-mod.o obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c new file mode 100644 index 000000000000..3e4d9a1a39fa --- /dev/null +++ b/arch/x86/mm/kasan_init_64.c @@ -0,0 +1,199 @@ +#include <linux/bootmem.h> +#include <linux/kasan.h> +#include <linux/kdebug.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/vmalloc.h> + +#include <asm/tlbflush.h> +#include <asm/sections.h> + +extern pgd_t early_level4_pgt[PTRS_PER_PGD]; +extern struct range pfn_mapped[E820_X_MAX]; + +extern unsigned char kasan_zero_page[PAGE_SIZE]; + +static int __init map_range(struct range *range) +{ + unsigned long start; + unsigned long end; + + start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start)); + end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end)); + + /* + * end + 1 here is intentional. We check several shadow bytes in advance + * to slightly speed up fastpath. In some rare cases we could cross + * boundary of mapped shadow, so we just map some more here. + */ + return vmemmap_populate(start, end + 1, NUMA_NO_NODE); +} + +static void __init clear_pgds(unsigned long start, + unsigned long end) +{ + for (; start < end; start += PGDIR_SIZE) + pgd_clear(pgd_offset_k(start)); +} + +void __init kasan_map_early_shadow(pgd_t *pgd) +{ + int i; + unsigned long start = KASAN_SHADOW_START; + unsigned long end = KASAN_SHADOW_END; + + for (i = pgd_index(start); start < end; i++) { + pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) + | _KERNPG_TABLE); + start += PGDIR_SIZE; + } +} + +static int __init zero_pte_populate(pmd_t *pmd, unsigned long addr, + unsigned long end) +{ + pte_t *pte = pte_offset_kernel(pmd, addr); + + while (addr + PAGE_SIZE <= end) { + WARN_ON(!pte_none(*pte)); + set_pte(pte, __pte(__pa_nodebug(kasan_zero_page) + | __PAGE_KERNEL_RO)); + addr += PAGE_SIZE; + pte = pte_offset_kernel(pmd, addr); + } + return 0; +} + +static int __init zero_pmd_populate(pud_t *pud, unsigned long addr, + unsigned long end) +{ + int ret = 0; + pmd_t *pmd = pmd_offset(pud, addr); + + while (IS_ALIGNED(addr, PMD_SIZE) && addr + PMD_SIZE <= end) { + WARN_ON(!pmd_none(*pmd)); + set_pmd(pmd, __pmd(__pa_nodebug(kasan_zero_pte) + | __PAGE_KERNEL_RO)); + addr += PMD_SIZE; + pmd = pmd_offset(pud, addr); + } + if (addr < end) { + if (pmd_none(*pmd)) { + void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); + if (!p) + return -ENOMEM; + set_pmd(pmd, __pmd(__pa_nodebug(p) | _KERNPG_TABLE)); + } + ret = zero_pte_populate(pmd, addr, end); + } + return ret; +} + + +static int __init zero_pud_populate(pgd_t *pgd, unsigned long addr, + unsigned long end) +{ + int ret = 0; + pud_t *pud = pud_offset(pgd, addr); + + while (IS_ALIGNED(addr, PUD_SIZE) && addr + PUD_SIZE <= end) { + WARN_ON(!pud_none(*pud)); + set_pud(pud, __pud(__pa_nodebug(kasan_zero_pmd) + | __PAGE_KERNEL_RO)); + addr += PUD_SIZE; + pud = pud_offset(pgd, addr); + } + + if (addr < end) { + if (pud_none(*pud)) { + void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); + if (!p) + return -ENOMEM; + set_pud(pud, __pud(__pa_nodebug(p) | _KERNPG_TABLE)); + } + ret = zero_pmd_populate(pud, addr, end); + } + return ret; +} + +static int __init zero_pgd_populate(unsigned long addr, unsigned long end) +{ + int ret = 0; + pgd_t *pgd = pgd_offset_k(addr); + + while (IS_ALIGNED(addr, PGDIR_SIZE) && addr + PGDIR_SIZE <= end) { + WARN_ON(!pgd_none(*pgd)); + set_pgd(pgd, __pgd(__pa_nodebug(kasan_zero_pud) + | __PAGE_KERNEL_RO)); + addr += PGDIR_SIZE; + pgd = pgd_offset_k(addr); + } + + if (addr < end) { + if (pgd_none(*pgd)) { + void *p = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE); + if (!p) + return -ENOMEM; + set_pgd(pgd, __pgd(__pa_nodebug(p) | _KERNPG_TABLE)); + } + ret = zero_pud_populate(pgd, addr, end); + } + return ret; +} + + +static void __init populate_zero_shadow(const void *start, const void *end) +{ + if (zero_pgd_populate((unsigned long)start, (unsigned long)end)) + panic("kasan: unable to map zero shadow!"); +} + + +#ifdef CONFIG_KASAN_INLINE +static int kasan_die_handler(struct notifier_block *self, + unsigned long val, + void *data) +{ + if (val == DIE_GPF) { + pr_emerg("CONFIG_KASAN_INLINE enabled"); + pr_emerg("GPF could be caused by NULL-ptr deref or user memory access"); + } + return NOTIFY_OK; +} + +static struct notifier_block kasan_die_notifier = { + .notifier_call = kasan_die_handler, +}; +#endif + +void __init kasan_init(void) +{ + int i; + +#ifdef CONFIG_KASAN_INLINE + register_die_notifier(&kasan_die_notifier); +#endif + + memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt)); + load_cr3(early_level4_pgt); + + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + + populate_zero_shadow((void *)KASAN_SHADOW_START, + kasan_mem_to_shadow((void *)PAGE_OFFSET)); + + for (i = 0; i < E820_X_MAX; i++) { + if (pfn_mapped[i].end == 0) + break; + + if (map_range(&pfn_mapped[i])) + panic("kasan: unable to allocate shadow!"); + } + + populate_zero_shadow(kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), + (void *)KASAN_SHADOW_END); + + memset(kasan_zero_page, 0, PAGE_SIZE); + + load_cr3(init_level4_pgt); +} diff --git a/arch/x86/realmode/Makefile b/arch/x86/realmode/Makefile index 94f7fbe97b08..e02c2c6c56a5 100644 --- a/arch/x86/realmode/Makefile +++ b/arch/x86/realmode/Makefile @@ -6,7 +6,7 @@ # for more details. # # - +KASAN_SANITIZE := n subdir- := rm obj-y += init.o diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 7c0d7be176a5..2730d775ef9a 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile @@ -6,6 +6,7 @@ # for more details. # # +KASAN_SANITIZE := n always := realmode.bin realmode.relocs diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 09297c8e1fcd..7b9be9822724 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -3,6 +3,7 @@ # KBUILD_CFLAGS += $(DISABLE_LTO) +KASAN_SANITIZE := n VDSO64-$(CONFIG_X86_64) := y VDSOX32-$(CONFIG_X86_X32_ABI) := y diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index e5b3fbe5560f..0052b1b9aadd 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -15,6 +15,7 @@ config KASAN config KASAN_SHADOW_OFFSET hex + default 0xdffffc0000000000 if X86_64 choice prompt "Instrumentation type" From b8c73fc2493d42517be95cf2c89659fc6c6f4d02 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:39:28 -0800 Subject: [PATCH 074/108] mm: page_alloc: add kasan hooks on alloc and free paths Add kernel address sanitizer hooks to mark allocated page's addresses as accessible in corresponding shadow region. Mark freed pages as inaccessible. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- include/linux/kasan.h | 6 ++++++ mm/compaction.c | 2 ++ mm/kasan/kasan.c | 14 ++++++++++++++ mm/kasan/kasan.h | 2 ++ mm/kasan/report.c | 11 +++++++++++ mm/page_alloc.c | 3 +++ 6 files changed, 38 insertions(+) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 9102fda60def..f00c15c41235 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -34,6 +34,9 @@ static inline void kasan_disable_current(void) void kasan_unpoison_shadow(const void *address, size_t size); +void kasan_alloc_pages(struct page *page, unsigned int order); +void kasan_free_pages(struct page *page, unsigned int order); + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} @@ -41,6 +44,9 @@ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} static inline void kasan_enable_current(void) {} static inline void kasan_disable_current(void) {} +static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} +static inline void kasan_free_pages(struct page *page, unsigned int order) {} + #endif /* CONFIG_KASAN */ #endif /* LINUX_KASAN_H */ diff --git a/mm/compaction.c b/mm/compaction.c index d50d6de6f1b6..8c0d9459b54a 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -16,6 +16,7 @@ #include <linux/sysfs.h> #include <linux/balloon_compaction.h> #include <linux/page-isolation.h> +#include <linux/kasan.h> #include "internal.h" #ifdef CONFIG_COMPACTION @@ -72,6 +73,7 @@ static void map_pages(struct list_head *list) list_for_each_entry(page, list, lru) { arch_alloc_page(page, 0); kernel_map_pages(page, 1, 1); + kasan_alloc_pages(page, 0); } } diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index def81104772f..b516eb8632b9 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -254,6 +254,20 @@ static __always_inline void check_memory_region(unsigned long addr, kasan_report(addr, size, write, _RET_IP_); } +void kasan_alloc_pages(struct page *page, unsigned int order) +{ + if (likely(!PageHighMem(page))) + kasan_unpoison_shadow(page_address(page), PAGE_SIZE << order); +} + +void kasan_free_pages(struct page *page, unsigned int order) +{ + if (likely(!PageHighMem(page))) + kasan_poison_shadow(page_address(page), + PAGE_SIZE << order, + KASAN_FREE_PAGE); +} + #define DEFINE_ASAN_LOAD_STORE(size) \ void __asan_load##size(unsigned long addr) \ { \ diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 648b9c006f3f..d3c90d5dd97a 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -6,6 +6,8 @@ #define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) #define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1) +#define KASAN_FREE_PAGE 0xFF /* page was freed */ + struct kasan_access_info { const void *access_addr; const void *first_bad_addr; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 5835d69563f5..fab8e7882ff1 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -54,6 +54,9 @@ static void print_error_description(struct kasan_access_info *info) shadow_val = *(u8 *)kasan_mem_to_shadow(info->first_bad_addr); switch (shadow_val) { + case KASAN_FREE_PAGE: + bug_type = "use after free"; + break; case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: bug_type = "out of bounds access"; break; @@ -69,6 +72,14 @@ static void print_error_description(struct kasan_access_info *info) static void print_address_description(struct kasan_access_info *info) { + const void *addr = info->access_addr; + + if ((addr >= (void *)PAGE_OFFSET) && + (addr < high_memory)) { + struct page *page = virt_to_head_page(addr); + dump_page(page, "kasan: bad access detected"); + } + dump_stack(); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cb4758263f6b..a47f0b229a1a 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -25,6 +25,7 @@ #include <linux/compiler.h> #include <linux/kernel.h> #include <linux/kmemcheck.h> +#include <linux/kasan.h> #include <linux/module.h> #include <linux/suspend.h> #include <linux/pagevec.h> @@ -787,6 +788,7 @@ static bool free_pages_prepare(struct page *page, unsigned int order) trace_mm_page_free(page, order); kmemcheck_free_shadow(page, order); + kasan_free_pages(page, order); if (PageAnon(page)) page->mapping = NULL; @@ -970,6 +972,7 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, arch_alloc_page(page, order); kernel_map_pages(page, 1 << order, 1); + kasan_alloc_pages(page, order); if (gfp_flags & __GFP_ZERO) prep_zero_page(page, order, gfp_flags); From 912f5fbf1d3060f25d6994aed0265c55b974b2e9 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:39:31 -0800 Subject: [PATCH 075/108] mm: slub: introduce virt_to_obj function virt_to_obj takes kmem_cache address, address of slab page, address x pointing somewhere inside slab object, and returns address of the beginning of object. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Acked-by: Christoph Lameter <cl@linux.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- include/linux/slub_def.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 9abf04ed0999..db7d5de00c5f 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -110,4 +110,20 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) } #endif + +/** + * virt_to_obj - returns address of the beginning of object. + * @s: object's kmem_cache + * @slab_page: address of slab page + * @x: address within object memory range + * + * Returns address of the beginning of object + */ +static inline void *virt_to_obj(struct kmem_cache *s, + const void *slab_page, + const void *x) +{ + return (void *)x - ((x - slab_page) % s->size); +} + #endif /* _LINUX_SLUB_DEF_H */ From 75c66def8d815201aa0386ecc7c66a5c8dbca1ee Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:39:35 -0800 Subject: [PATCH 076/108] mm: slub: share object_err function Remove static and add function declarations to linux/slub_def.h so it could be used by kernel address sanitizer. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- include/linux/slub_def.h | 3 +++ mm/slub.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index db7d5de00c5f..33885118523c 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -126,4 +126,7 @@ static inline void *virt_to_obj(struct kmem_cache *s, return (void *)x - ((x - slab_page) % s->size); } +void object_err(struct kmem_cache *s, struct page *page, + u8 *object, char *reason); + #endif /* _LINUX_SLUB_DEF_H */ diff --git a/mm/slub.c b/mm/slub.c index 783505ba2052..6833b73ef6b3 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -629,7 +629,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) dump_stack(); } -static void object_err(struct kmem_cache *s, struct page *page, +void object_err(struct kmem_cache *s, struct page *page, u8 *object, char *reason) { slab_bug(s, "%s", reason); From a79316c6178ca419e35feef47d47f50b4e0ee9f2 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:39:38 -0800 Subject: [PATCH 077/108] mm: slub: introduce metadata_access_enable()/metadata_access_disable() It's ok for slub to access memory that marked by kasan as inaccessible (object's metadata). Kasan shouldn't print report in that case because these accesses are valid. Disabling instrumentation of slub.c code is not enough to achieve this because slub passes pointer to object's metadata into external functions like memchr_inv(). We don't want to disable instrumentation for memchr_inv() because this is quite generic function, and we don't want to miss bugs. metadata_access_enable/metadata_access_disable used to tell KASan where accesses to metadata starts/end, so we could temporarily disable KASan reports. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/slub.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index 6833b73ef6b3..37555ad8894d 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -20,6 +20,7 @@ #include <linux/proc_fs.h> #include <linux/notifier.h> #include <linux/seq_file.h> +#include <linux/kasan.h> #include <linux/kmemcheck.h> #include <linux/cpu.h> #include <linux/cpuset.h> @@ -467,13 +468,31 @@ static int slub_debug; static char *slub_debug_slabs; static int disable_higher_order_debug; +/* + * slub is about to manipulate internal object metadata. This memory lies + * outside the range of the allocated object, so accessing it would normally + * be reported by kasan as a bounds error. metadata_access_enable() is used + * to tell kasan that these accesses are OK. + */ +static inline void metadata_access_enable(void) +{ + kasan_disable_current(); +} + +static inline void metadata_access_disable(void) +{ + kasan_enable_current(); +} + /* * Object debugging */ static void print_section(char *text, u8 *addr, unsigned int length) { + metadata_access_enable(); print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr, length, 1); + metadata_access_disable(); } static struct track *get_track(struct kmem_cache *s, void *object, @@ -503,7 +522,9 @@ static void set_track(struct kmem_cache *s, void *object, trace.max_entries = TRACK_ADDRS_COUNT; trace.entries = p->addrs; trace.skip = 3; + metadata_access_enable(); save_stack_trace(&trace); + metadata_access_disable(); /* See rant in lockdep.c */ if (trace.nr_entries != 0 && @@ -677,7 +698,9 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, u8 *fault; u8 *end; + metadata_access_enable(); fault = memchr_inv(start, value, bytes); + metadata_access_disable(); if (!fault) return 1; @@ -770,7 +793,9 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) if (!remainder) return 1; + metadata_access_enable(); fault = memchr_inv(end - remainder, POISON_INUSE, remainder); + metadata_access_disable(); if (!fault) return 1; while (end > fault && end[-1] == POISON_INUSE) From 0316bec22ec95ea2faca6406437b0b5950553b7c Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:39:42 -0800 Subject: [PATCH 078/108] mm: slub: add kernel address sanitizer support for slub allocator With this patch kasan will be able to catch bugs in memory allocated by slub. Initially all objects in newly allocated slab page, marked as redzone. Later, when allocation of slub object happens, requested by caller number of bytes marked as accessible, and the rest of the object (including slub's metadata) marked as redzone (inaccessible). We also mark object as accessible if ksize was called for this object. There is some places in kernel where ksize function is called to inquire size of really allocated area. Such callers could validly access whole allocated memory, so it should be marked as accessible. Code in slub.c and slab_common.c files could validly access to object's metadata, so instrumentation for this files are disabled. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Signed-off-by: Dmitry Chernenkov <dmitryc@google.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- include/linux/kasan.h | 27 ++++++++++++ include/linux/slab.h | 11 ++++- lib/Kconfig.kasan | 1 + mm/Makefile | 3 ++ mm/kasan/kasan.c | 98 +++++++++++++++++++++++++++++++++++++++++++ mm/kasan/kasan.h | 5 +++ mm/kasan/report.c | 21 ++++++++++ mm/slab_common.c | 5 ++- mm/slub.c | 31 +++++++++++++- 9 files changed, 197 insertions(+), 5 deletions(-) diff --git a/include/linux/kasan.h b/include/linux/kasan.h index f00c15c41235..d5310eef3e38 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -37,6 +37,18 @@ void kasan_unpoison_shadow(const void *address, size_t size); void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); +void kasan_poison_slab(struct page *page); +void kasan_unpoison_object_data(struct kmem_cache *cache, void *object); +void kasan_poison_object_data(struct kmem_cache *cache, void *object); + +void kasan_kmalloc_large(const void *ptr, size_t size); +void kasan_kfree_large(const void *ptr); +void kasan_kmalloc(struct kmem_cache *s, const void *object, size_t size); +void kasan_krealloc(const void *object, size_t new_size); + +void kasan_slab_alloc(struct kmem_cache *s, void *object); +void kasan_slab_free(struct kmem_cache *s, void *object); + #else /* CONFIG_KASAN */ static inline void kasan_unpoison_shadow(const void *address, size_t size) {} @@ -47,6 +59,21 @@ static inline void kasan_disable_current(void) {} static inline void kasan_alloc_pages(struct page *page, unsigned int order) {} static inline void kasan_free_pages(struct page *page, unsigned int order) {} +static inline void kasan_poison_slab(struct page *page) {} +static inline void kasan_unpoison_object_data(struct kmem_cache *cache, + void *object) {} +static inline void kasan_poison_object_data(struct kmem_cache *cache, + void *object) {} + +static inline void kasan_kmalloc_large(void *ptr, size_t size) {} +static inline void kasan_kfree_large(const void *ptr) {} +static inline void kasan_kmalloc(struct kmem_cache *s, const void *object, + size_t size) {} +static inline void kasan_krealloc(const void *object, size_t new_size) {} + +static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {} +static inline void kasan_slab_free(struct kmem_cache *s, void *object) {} + #endif /* CONFIG_KASAN */ #endif /* LINUX_KASAN_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h index ed2ffaab59ea..76f1feeabd38 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -104,6 +104,7 @@ (unsigned long)ZERO_SIZE_PTR) #include <linux/kmemleak.h> +#include <linux/kasan.h> struct mem_cgroup; /* @@ -325,7 +326,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t flags, size_t size) { - return kmem_cache_alloc(s, flags); + void *ret = kmem_cache_alloc(s, flags); + + kasan_kmalloc(s, ret, size); + return ret; } static __always_inline void * @@ -333,7 +337,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, int node, size_t size) { - return kmem_cache_alloc_node(s, gfpflags, node); + void *ret = kmem_cache_alloc_node(s, gfpflags, node); + + kasan_kmalloc(s, ret, size); + return ret; } #endif /* CONFIG_TRACING */ diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 0052b1b9aadd..a11ac0234452 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -5,6 +5,7 @@ if HAVE_ARCH_KASAN config KASAN bool "KASan: runtime memory debugger" + depends on SLUB_DEBUG help Enables kernel address sanitizer - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. diff --git a/mm/Makefile b/mm/Makefile index 930b52df4aca..088c68e9ec35 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -2,6 +2,9 @@ # Makefile for the linux memory manager. # +KASAN_SANITIZE_slab_common.o := n +KASAN_SANITIZE_slub.o := n + mmu-y := nommu.o mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \ mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \ diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index b516eb8632b9..dc83f070edb6 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -31,6 +31,7 @@ #include <linux/kasan.h> #include "kasan.h" +#include "../slab.h" /* * Poisons the shadow memory for 'size' bytes starting from 'addr'. @@ -268,6 +269,103 @@ void kasan_free_pages(struct page *page, unsigned int order) KASAN_FREE_PAGE); } +void kasan_poison_slab(struct page *page) +{ + kasan_poison_shadow(page_address(page), + PAGE_SIZE << compound_order(page), + KASAN_KMALLOC_REDZONE); +} + +void kasan_unpoison_object_data(struct kmem_cache *cache, void *object) +{ + kasan_unpoison_shadow(object, cache->object_size); +} + +void kasan_poison_object_data(struct kmem_cache *cache, void *object) +{ + kasan_poison_shadow(object, + round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE), + KASAN_KMALLOC_REDZONE); +} + +void kasan_slab_alloc(struct kmem_cache *cache, void *object) +{ + kasan_kmalloc(cache, object, cache->object_size); +} + +void kasan_slab_free(struct kmem_cache *cache, void *object) +{ + unsigned long size = cache->object_size; + unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE); + + /* RCU slabs could be legally used after free within the RCU period */ + if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU)) + return; + + kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE); +} + +void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size) +{ + unsigned long redzone_start; + unsigned long redzone_end; + + if (unlikely(object == NULL)) + return; + + redzone_start = round_up((unsigned long)(object + size), + KASAN_SHADOW_SCALE_SIZE); + redzone_end = round_up((unsigned long)object + cache->object_size, + KASAN_SHADOW_SCALE_SIZE); + + kasan_unpoison_shadow(object, size); + kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start, + KASAN_KMALLOC_REDZONE); +} +EXPORT_SYMBOL(kasan_kmalloc); + +void kasan_kmalloc_large(const void *ptr, size_t size) +{ + struct page *page; + unsigned long redzone_start; + unsigned long redzone_end; + + if (unlikely(ptr == NULL)) + return; + + page = virt_to_page(ptr); + redzone_start = round_up((unsigned long)(ptr + size), + KASAN_SHADOW_SCALE_SIZE); + redzone_end = (unsigned long)ptr + (PAGE_SIZE << compound_order(page)); + + kasan_unpoison_shadow(ptr, size); + kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start, + KASAN_PAGE_REDZONE); +} + +void kasan_krealloc(const void *object, size_t size) +{ + struct page *page; + + if (unlikely(object == ZERO_SIZE_PTR)) + return; + + page = virt_to_head_page(object); + + if (unlikely(!PageSlab(page))) + kasan_kmalloc_large(object, size); + else + kasan_kmalloc(page->slab_cache, object, size); +} + +void kasan_kfree_large(const void *ptr) +{ + struct page *page = virt_to_page(ptr); + + kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page), + KASAN_FREE_PAGE); +} + #define DEFINE_ASAN_LOAD_STORE(size) \ void __asan_load##size(unsigned long addr) \ { \ diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index d3c90d5dd97a..5b052ab40cf9 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -7,6 +7,11 @@ #define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1) #define KASAN_FREE_PAGE 0xFF /* page was freed */ +#define KASAN_FREE_PAGE 0xFF /* page was freed */ +#define KASAN_PAGE_REDZONE 0xFE /* redzone for kmalloc_large allocations */ +#define KASAN_KMALLOC_REDZONE 0xFC /* redzone inside slub object */ +#define KASAN_KMALLOC_FREE 0xFB /* object was freed (kmem_cache_free/kfree) */ + struct kasan_access_info { const void *access_addr; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index fab8e7882ff1..2760edb4d0a8 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -24,6 +24,7 @@ #include <linux/kasan.h> #include "kasan.h" +#include "../slab.h" /* Shadow layout customization. */ #define SHADOW_BYTES_PER_BLOCK 1 @@ -55,8 +56,11 @@ static void print_error_description(struct kasan_access_info *info) switch (shadow_val) { case KASAN_FREE_PAGE: + case KASAN_KMALLOC_FREE: bug_type = "use after free"; break; + case KASAN_PAGE_REDZONE: + case KASAN_KMALLOC_REDZONE: case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: bug_type = "out of bounds access"; break; @@ -77,6 +81,23 @@ static void print_address_description(struct kasan_access_info *info) if ((addr >= (void *)PAGE_OFFSET) && (addr < high_memory)) { struct page *page = virt_to_head_page(addr); + + if (PageSlab(page)) { + void *object; + struct kmem_cache *cache = page->slab_cache; + void *last_object; + + object = virt_to_obj(cache, page_address(page), addr); + last_object = page_address(page) + + page->objects * cache->size; + + if (unlikely(object > last_object)) + object = last_object; /* we hit into padding */ + + object_err(cache, page, object, + "kasan: bad access detected"); + return; + } dump_page(page, "kasan: bad access detected"); } diff --git a/mm/slab_common.c b/mm/slab_common.c index 429a4506b382..999bb3424d44 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -898,6 +898,7 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) page = alloc_kmem_pages(flags, order); ret = page ? page_address(page) : NULL; kmemleak_alloc(ret, size, 1, flags); + kasan_kmalloc_large(ret, size); return ret; } EXPORT_SYMBOL(kmalloc_order); @@ -1077,8 +1078,10 @@ static __always_inline void *__do_krealloc(const void *p, size_t new_size, if (p) ks = ksize(p); - if (ks >= new_size) + if (ks >= new_size) { + kasan_krealloc((void *)p, new_size); return (void *)p; + } ret = kmalloc_track_caller(new_size, flags); if (ret && p) diff --git a/mm/slub.c b/mm/slub.c index 37555ad8894d..6832c4eab104 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1251,11 +1251,13 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node, static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags) { kmemleak_alloc(ptr, size, 1, flags); + kasan_kmalloc_large(ptr, size); } static inline void kfree_hook(const void *x) { kmemleak_free(x); + kasan_kfree_large(x); } static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, @@ -1278,6 +1280,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); memcg_kmem_put_cache(s); + kasan_slab_alloc(s, object); } static inline void slab_free_hook(struct kmem_cache *s, void *x) @@ -1301,6 +1304,8 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) #endif if (!(s->flags & SLAB_DEBUG_OBJECTS)) debug_check_no_obj_freed(x, s->object_size); + + kasan_slab_free(s, x); } /* @@ -1395,8 +1400,11 @@ static void setup_object(struct kmem_cache *s, struct page *page, void *object) { setup_object_debug(s, page, object); - if (unlikely(s->ctor)) + if (unlikely(s->ctor)) { + kasan_unpoison_object_data(s, object); s->ctor(object); + kasan_poison_object_data(s, object); + } } static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) @@ -1429,6 +1437,8 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) if (unlikely(s->flags & SLAB_POISON)) memset(start, POISON_INUSE, PAGE_SIZE << order); + kasan_poison_slab(page); + for_each_object_idx(p, idx, s, start, page->objects) { setup_object(s, page, p); if (likely(idx < page->objects)) @@ -2522,6 +2532,7 @@ void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) { void *ret = slab_alloc(s, gfpflags, _RET_IP_); trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); + kasan_kmalloc(s, ret, size); return ret; } EXPORT_SYMBOL(kmem_cache_alloc_trace); @@ -2548,6 +2559,8 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s, trace_kmalloc_node(_RET_IP_, ret, size, s->size, gfpflags, node); + + kasan_kmalloc(s, ret, size); return ret; } EXPORT_SYMBOL(kmem_cache_alloc_node_trace); @@ -2933,6 +2946,7 @@ static void early_kmem_cache_node_alloc(int node) init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); init_tracking(kmem_cache_node, n); #endif + kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node)); init_kmem_cache_node(n); inc_slabs_node(kmem_cache_node, node, page->objects); @@ -3305,6 +3319,8 @@ void *__kmalloc(size_t size, gfp_t flags) trace_kmalloc(_RET_IP_, ret, size, s->size, flags); + kasan_kmalloc(s, ret, size); + return ret; } EXPORT_SYMBOL(__kmalloc); @@ -3348,12 +3364,14 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node); + kasan_kmalloc(s, ret, size); + return ret; } EXPORT_SYMBOL(__kmalloc_node); #endif -size_t ksize(const void *object) +static size_t __ksize(const void *object) { struct page *page; @@ -3369,6 +3387,15 @@ size_t ksize(const void *object) return slab_ksize(page->slab_cache); } + +size_t ksize(const void *object) +{ + size_t size = __ksize(object); + /* We assume that ksize callers could use whole allocated area, + so we need unpoison this area. */ + kasan_krealloc(object, size); + return size; +} EXPORT_SYMBOL(ksize); void kfree(const void *x) From df4c0e36f1b1782b0611a77c52cc240e5c4752dd Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:39:45 -0800 Subject: [PATCH 079/108] fs: dcache: manually unpoison dname after allocation to shut up kasan's reports We need to manually unpoison rounded up allocation size for dname to avoid kasan's reports in dentry_string_cmp(). When CONFIG_DCACHE_WORD_ACCESS=y dentry_string_cmp may access few bytes beyound requested in kmalloc() size. dentry_string_cmp() relates on that fact that dentry allocated using kmalloc and kmalloc internally round up allocation size. So this is not a bug, but this makes kasan to complain about such accesses. To avoid such reports we mark rounded up allocation size in shadow as accessible. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Reported-by: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/dcache.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/dcache.c b/fs/dcache.c index d04be762b216..7d34f04ec7aa 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -38,6 +38,8 @@ #include <linux/prefetch.h> #include <linux/ratelimit.h> #include <linux/list_lru.h> +#include <linux/kasan.h> + #include "internal.h" #include "mount.h" @@ -1429,6 +1431,9 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) } atomic_set(&p->u.count, 1); dname = p->name; + if (IS_ENABLED(CONFIG_DCACHE_WORD_ACCESS)) + kasan_unpoison_shadow(dname, + round_up(name->len + 1, sizeof(unsigned long))); } else { dname = dentry->d_iname; } From e79ed2f13faab8fc9d4ad76d5f5a241724e45836 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:39:49 -0800 Subject: [PATCH 080/108] kmemleak: disable kasan instrumentation for kmemleak kmalloc internally round up allocation size, and kmemleak uses rounded up size as object's size. This makes kasan to complain while kmemleak scans memory or calculates of object's checksum. The simplest solution here is to disable kasan. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Acked-by: Catalin Marinas <catalin.marinas@arm.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/kmemleak.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 3cda50c1e394..5405aff5a590 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -98,6 +98,7 @@ #include <asm/processor.h> #include <linux/atomic.h> +#include <linux/kasan.h> #include <linux/kmemcheck.h> #include <linux/kmemleak.h> #include <linux/memory_hotplug.h> @@ -1113,7 +1114,10 @@ static bool update_checksum(struct kmemleak_object *object) if (!kmemcheck_is_obj_initialized(object->pointer, object->size)) return false; + kasan_disable_current(); object->checksum = crc32(0, (void *)object->pointer, object->size); + kasan_enable_current(); + return object->checksum != old_csum; } @@ -1164,7 +1168,9 @@ static void scan_block(void *_start, void *_end, BYTES_PER_POINTER)) continue; + kasan_disable_current(); pointer = *ptr; + kasan_enable_current(); object = find_and_get_object(pointer, 1); if (!object) From 3f15801cdc2379ca4bf507f48bffd788f9e508ae Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:39:53 -0800 Subject: [PATCH 081/108] lib: add kasan test module This is a test module doing various nasty things like out of bounds accesses, use after free. It is useful for testing kernel debugging features like kernel address sanitizer. It mostly concentrates on testing of slab allocator, but we might want to add more different stuff here in future (like stack/global variables out of bounds accesses and so on). Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- lib/Kconfig.kasan | 8 ++ lib/Makefile | 7 +- lib/test_kasan.c | 277 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 289 insertions(+), 3 deletions(-) create mode 100644 lib/test_kasan.c diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index a11ac0234452..4d47d874335c 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -42,4 +42,12 @@ config KASAN_INLINE endchoice +config TEST_KASAN + tristate "Module for testing kasan for bug detection" + depends on m && KASAN + help + This is a test module doing various nasty things like + out of bounds accesses, use after free. It is useful for testing + kernel debugging features like kernel address sanitizer. + endif diff --git a/lib/Makefile b/lib/Makefile index e456defd1021..87eb3bffc283 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -32,12 +32,13 @@ obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += hexdump.o obj-$(CONFIG_TEST_HEXDUMP) += test-hexdump.o obj-y += kstrtox.o -obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o -obj-$(CONFIG_TEST_LKM) += test_module.o -obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o obj-$(CONFIG_TEST_BPF) += test_bpf.o obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o +obj-$(CONFIG_TEST_KASAN) += test_kasan.o +obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o +obj-$(CONFIG_TEST_LKM) += test_module.o obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o +obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/test_kasan.c b/lib/test_kasan.c new file mode 100644 index 000000000000..098c08eddfab --- /dev/null +++ b/lib/test_kasan.c @@ -0,0 +1,277 @@ +/* + * + * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin <a.ryabinin@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define pr_fmt(fmt) "kasan test: %s " fmt, __func__ + +#include <linux/kernel.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/module.h> + +static noinline void __init kmalloc_oob_right(void) +{ + char *ptr; + size_t size = 123; + + pr_info("out-of-bounds to right\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + ptr[size] = 'x'; + kfree(ptr); +} + +static noinline void __init kmalloc_oob_left(void) +{ + char *ptr; + size_t size = 15; + + pr_info("out-of-bounds to left\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + *ptr = *(ptr - 1); + kfree(ptr); +} + +static noinline void __init kmalloc_node_oob_right(void) +{ + char *ptr; + size_t size = 4096; + + pr_info("kmalloc_node(): out-of-bounds to right\n"); + ptr = kmalloc_node(size, GFP_KERNEL, 0); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + ptr[size] = 0; + kfree(ptr); +} + +static noinline void __init kmalloc_large_oob_rigth(void) +{ + char *ptr; + size_t size = KMALLOC_MAX_CACHE_SIZE + 10; + + pr_info("kmalloc large allocation: out-of-bounds to right\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + ptr[size] = 0; + kfree(ptr); +} + +static noinline void __init kmalloc_oob_krealloc_more(void) +{ + char *ptr1, *ptr2; + size_t size1 = 17; + size_t size2 = 19; + + pr_info("out-of-bounds after krealloc more\n"); + ptr1 = kmalloc(size1, GFP_KERNEL); + ptr2 = krealloc(ptr1, size2, GFP_KERNEL); + if (!ptr1 || !ptr2) { + pr_err("Allocation failed\n"); + kfree(ptr1); + return; + } + + ptr2[size2] = 'x'; + kfree(ptr2); +} + +static noinline void __init kmalloc_oob_krealloc_less(void) +{ + char *ptr1, *ptr2; + size_t size1 = 17; + size_t size2 = 15; + + pr_info("out-of-bounds after krealloc less\n"); + ptr1 = kmalloc(size1, GFP_KERNEL); + ptr2 = krealloc(ptr1, size2, GFP_KERNEL); + if (!ptr1 || !ptr2) { + pr_err("Allocation failed\n"); + kfree(ptr1); + return; + } + ptr2[size1] = 'x'; + kfree(ptr2); +} + +static noinline void __init kmalloc_oob_16(void) +{ + struct { + u64 words[2]; + } *ptr1, *ptr2; + + pr_info("kmalloc out-of-bounds for 16-bytes access\n"); + ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL); + ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL); + if (!ptr1 || !ptr2) { + pr_err("Allocation failed\n"); + kfree(ptr1); + kfree(ptr2); + return; + } + *ptr1 = *ptr2; + kfree(ptr1); + kfree(ptr2); +} + +static noinline void __init kmalloc_oob_in_memset(void) +{ + char *ptr; + size_t size = 666; + + pr_info("out-of-bounds in memset\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + memset(ptr, 0, size+5); + kfree(ptr); +} + +static noinline void __init kmalloc_uaf(void) +{ + char *ptr; + size_t size = 10; + + pr_info("use-after-free\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr); + *(ptr + 8) = 'x'; +} + +static noinline void __init kmalloc_uaf_memset(void) +{ + char *ptr; + size_t size = 33; + + pr_info("use-after-free in memset\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr); + memset(ptr, 0, size); +} + +static noinline void __init kmalloc_uaf2(void) +{ + char *ptr1, *ptr2; + size_t size = 43; + + pr_info("use-after-free after another kmalloc\n"); + ptr1 = kmalloc(size, GFP_KERNEL); + if (!ptr1) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr1); + ptr2 = kmalloc(size, GFP_KERNEL); + if (!ptr2) { + pr_err("Allocation failed\n"); + return; + } + + ptr1[40] = 'x'; + kfree(ptr2); +} + +static noinline void __init kmem_cache_oob(void) +{ + char *p; + size_t size = 200; + struct kmem_cache *cache = kmem_cache_create("test_cache", + size, 0, + 0, NULL); + if (!cache) { + pr_err("Cache allocation failed\n"); + return; + } + pr_info("out-of-bounds in kmem_cache_alloc\n"); + p = kmem_cache_alloc(cache, GFP_KERNEL); + if (!p) { + pr_err("Allocation failed\n"); + kmem_cache_destroy(cache); + return; + } + + *p = p[size]; + kmem_cache_free(cache, p); + kmem_cache_destroy(cache); +} + +static char global_array[10]; + +static noinline void __init kasan_global_oob(void) +{ + volatile int i = 3; + char *p = &global_array[ARRAY_SIZE(global_array) + i]; + + pr_info("out-of-bounds global variable\n"); + *(volatile char *)p; +} + +static noinline void __init kasan_stack_oob(void) +{ + char stack_array[10]; + volatile int i = 0; + char *p = &stack_array[ARRAY_SIZE(stack_array) + i]; + + pr_info("out-of-bounds on stack\n"); + *(volatile char *)p; +} + +static int __init kmalloc_tests_init(void) +{ + kmalloc_oob_right(); + kmalloc_oob_left(); + kmalloc_node_oob_right(); + kmalloc_large_oob_rigth(); + kmalloc_oob_krealloc_more(); + kmalloc_oob_krealloc_less(); + kmalloc_oob_16(); + kmalloc_oob_in_memset(); + kmalloc_uaf(); + kmalloc_uaf_memset(); + kmalloc_uaf2(); + kmem_cache_oob(); + kasan_stack_oob(); + kasan_global_oob(); + return -EAGAIN; +} + +module_init(kmalloc_tests_init); +MODULE_LICENSE("GPL"); From 393f203f5fd54421fddb1e2a263f64d3876eeadb Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:39:56 -0800 Subject: [PATCH 082/108] x86_64: kasan: add interceptors for memset/memmove/memcpy functions Recently instrumentation of builtin functions calls was removed from GCC 5.0. To check the memory accessed by such functions, userspace asan always uses interceptors for them. So now we should do this as well. This patch declares memset/memmove/memcpy as weak symbols. In mm/kasan/kasan.c we have our own implementation of those functions which checks memory before accessing it. Default memset/memmove/memcpy now now always have aliases with '__' prefix. For files that built without kasan instrumentation (e.g. mm/slub.c) original mem* replaced (via #define) with prefixed variants, cause we don't want to check memory accesses there. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/x86/boot/compressed/eboot.c | 3 +-- arch/x86/boot/compressed/misc.h | 1 + arch/x86/include/asm/string_64.h | 18 +++++++++++++++- arch/x86/kernel/x8664_ksyms_64.c | 10 +++++++-- arch/x86/lib/memcpy_64.S | 6 ++++-- arch/x86/lib/memmove_64.S | 4 ++++ arch/x86/lib/memset_64.S | 10 +++++---- drivers/firmware/efi/libstub/efistub.h | 4 ++++ mm/kasan/kasan.c | 29 ++++++++++++++++++++++++++ 9 files changed, 74 insertions(+), 11 deletions(-) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 92b9a5f2aed6..ef17683484e9 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -13,8 +13,7 @@ #include <asm/setup.h> #include <asm/desc.h> -#undef memcpy /* Use memcpy from misc.c */ - +#include "../string.h" #include "eboot.h" static efi_system_table_t *sys_table; diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 24e3e569a13c..04477d68403f 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -7,6 +7,7 @@ * we just keep it from happening */ #undef CONFIG_PARAVIRT +#undef CONFIG_KASAN #ifdef CONFIG_X86_32 #define _ASM_X86_DESC_H 1 #endif diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 19e2c468fc2c..e4661196994e 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -27,11 +27,12 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t function. */ #define __HAVE_ARCH_MEMCPY 1 +extern void *__memcpy(void *to, const void *from, size_t len); + #ifndef CONFIG_KMEMCHECK #if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 extern void *memcpy(void *to, const void *from, size_t len); #else -extern void *__memcpy(void *to, const void *from, size_t len); #define memcpy(dst, src, len) \ ({ \ size_t __len = (len); \ @@ -53,9 +54,11 @@ extern void *__memcpy(void *to, const void *from, size_t len); #define __HAVE_ARCH_MEMSET void *memset(void *s, int c, size_t n); +void *__memset(void *s, int c, size_t n); #define __HAVE_ARCH_MEMMOVE void *memmove(void *dest, const void *src, size_t count); +void *__memmove(void *dest, const void *src, size_t count); int memcmp(const void *cs, const void *ct, size_t count); size_t strlen(const char *s); @@ -63,6 +66,19 @@ char *strcpy(char *dest, const char *src); char *strcat(char *dest, const char *src); int strcmp(const char *cs, const char *ct); +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + +/* + * For files that not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ + +#undef memcpy +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_STRING_64_H */ diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 040681928e9d..37d8fa4438f0 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -50,13 +50,19 @@ EXPORT_SYMBOL(csum_partial); #undef memset #undef memmove +extern void *__memset(void *, int, __kernel_size_t); +extern void *__memcpy(void *, const void *, __kernel_size_t); +extern void *__memmove(void *, const void *, __kernel_size_t); extern void *memset(void *, int, __kernel_size_t); extern void *memcpy(void *, const void *, __kernel_size_t); -extern void *__memcpy(void *, const void *, __kernel_size_t); +extern void *memmove(void *, const void *, __kernel_size_t); + +EXPORT_SYMBOL(__memset); +EXPORT_SYMBOL(__memcpy); +EXPORT_SYMBOL(__memmove); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); -EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(memmove); #ifndef CONFIG_DEBUG_VIRTUAL diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 56313a326188..89b53c9968e7 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -53,6 +53,8 @@ .Lmemcpy_e_e: .previous +.weak memcpy + ENTRY(__memcpy) ENTRY(memcpy) CFI_STARTPROC @@ -199,8 +201,8 @@ ENDPROC(__memcpy) * only outcome... */ .section .altinstructions, "a" - altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ + altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c - altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ + altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e .previous diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 65268a6104f4..9c4b530575da 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -24,7 +24,10 @@ * Output: * rax: dest */ +.weak memmove + ENTRY(memmove) +ENTRY(__memmove) CFI_STARTPROC /* Handle more 32 bytes in loop */ @@ -220,4 +223,5 @@ ENTRY(memmove) .Lmemmove_end_forward-.Lmemmove_begin_forward, \ .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs .previous +ENDPROC(__memmove) ENDPROC(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 2dcb3808cbda..6f44935c6a60 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -56,6 +56,8 @@ .Lmemset_e_e: .previous +.weak memset + ENTRY(memset) ENTRY(__memset) CFI_STARTPROC @@ -147,8 +149,8 @@ ENDPROC(__memset) * feature to implement the right patch order. */ .section .altinstructions,"a" - altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ - .Lfinal-memset,.Lmemset_e-.Lmemset_c - altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ - .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e + altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ + .Lfinal-__memset,.Lmemset_e-.Lmemset_c + altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ + .Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e .previous diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 2be10984a67a..47437b16b186 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -5,6 +5,10 @@ /* error code which can't be mistaken for valid address */ #define EFI_ERROR (~0UL) +#undef memcpy +#undef memset +#undef memmove + void efi_char16_printk(efi_system_table_t *, efi_char16_t *); efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image, diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index dc83f070edb6..799c52b9826c 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -255,6 +255,35 @@ static __always_inline void check_memory_region(unsigned long addr, kasan_report(addr, size, write, _RET_IP_); } +void __asan_loadN(unsigned long addr, size_t size); +void __asan_storeN(unsigned long addr, size_t size); + +#undef memset +void *memset(void *addr, int c, size_t len) +{ + __asan_storeN((unsigned long)addr, len); + + return __memset(addr, c, len); +} + +#undef memmove +void *memmove(void *dest, const void *src, size_t len) +{ + __asan_loadN((unsigned long)src, len); + __asan_storeN((unsigned long)dest, len); + + return __memmove(dest, src, len); +} + +#undef memcpy +void *memcpy(void *dest, const void *src, size_t len) +{ + __asan_loadN((unsigned long)src, len); + __asan_storeN((unsigned long)dest, len); + + return __memcpy(dest, src, len); +} + void kasan_alloc_pages(struct page *page, unsigned int order) { if (likely(!PageHighMem(page))) From c420f167db8c799d69fe43a801c58a7f02e9d57c Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:39:59 -0800 Subject: [PATCH 083/108] kasan: enable stack instrumentation Stack instrumentation allows to detect out of bounds memory accesses for variables allocated on stack. Compiler adds redzones around every variable on stack and poisons redzones in function's prologue. Such approach significantly increases stack usage, so all in-kernel stacks size were doubled. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/x86/include/asm/page_64_types.h | 12 +++++++++--- arch/x86/kernel/Makefile | 2 ++ arch/x86/mm/kasan_init_64.c | 11 +++++++++-- include/linux/init_task.h | 8 ++++++++ mm/kasan/kasan.h | 9 +++++++++ mm/kasan/report.c | 6 ++++++ scripts/Makefile.kasan | 1 + 7 files changed, 44 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 75450b2c7be4..4edd53b79a81 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -1,17 +1,23 @@ #ifndef _ASM_X86_PAGE_64_DEFS_H #define _ASM_X86_PAGE_64_DEFS_H -#define THREAD_SIZE_ORDER 2 +#ifdef CONFIG_KASAN +#define KASAN_STACK_ORDER 1 +#else +#define KASAN_STACK_ORDER 0 +#endif + +#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define CURRENT_MASK (~(THREAD_SIZE - 1)) -#define EXCEPTION_STACK_ORDER 0 +#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER) #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) #define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) #define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) -#define IRQ_STACK_ORDER 2 +#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER) #define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER) #define DOUBLEFAULT_STACK 1 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b13b70634124..cdb1b70ddad0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -17,6 +17,8 @@ CFLAGS_REMOVE_early_printk.o = -pg endif KASAN_SANITIZE_head$(BITS).o := n +KASAN_SANITIZE_dumpstack.o := n +KASAN_SANITIZE_dumpstack_$(BITS).o := n CFLAGS_irq.o := -I$(src)/../include/asm/trace diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 3e4d9a1a39fa..53508708b7aa 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -189,11 +189,18 @@ void __init kasan_init(void) if (map_range(&pfn_mapped[i])) panic("kasan: unable to allocate shadow!"); } - populate_zero_shadow(kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), - (void *)KASAN_SHADOW_END); + kasan_mem_to_shadow((void *)__START_KERNEL_map)); + + vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext), + (unsigned long)kasan_mem_to_shadow(_end), + NUMA_NO_NODE); + + populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_VADDR), + (void *)KASAN_SHADOW_END); memset(kasan_zero_page, 0, PAGE_SIZE); load_cr3(init_level4_pgt); + init_task.kasan_depth = 0; } diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d3d43ecf148c..696d22312b31 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -175,6 +175,13 @@ extern struct task_group root_task_group; # define INIT_NUMA_BALANCING(tsk) #endif +#ifdef CONFIG_KASAN +# define INIT_KASAN(tsk) \ + .kasan_depth = 1, +#else +# define INIT_KASAN(tsk) +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -250,6 +257,7 @@ extern struct task_group root_task_group; INIT_RT_MUTEXES(tsk) \ INIT_VTIME(tsk) \ INIT_NUMA_BALANCING(tsk) \ + INIT_KASAN(tsk) \ } diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 5b052ab40cf9..1fcc1d81a9cf 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -12,6 +12,15 @@ #define KASAN_KMALLOC_REDZONE 0xFC /* redzone inside slub object */ #define KASAN_KMALLOC_FREE 0xFB /* object was freed (kmem_cache_free/kfree) */ +/* + * Stack redzone shadow values + * (Those are compiler's ABI, don't change them) + */ +#define KASAN_STACK_LEFT 0xF1 +#define KASAN_STACK_MID 0xF2 +#define KASAN_STACK_RIGHT 0xF3 +#define KASAN_STACK_PARTIAL 0xF4 + struct kasan_access_info { const void *access_addr; diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 2760edb4d0a8..866732ef3db3 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -64,6 +64,12 @@ static void print_error_description(struct kasan_access_info *info) case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: bug_type = "out of bounds access"; break; + case KASAN_STACK_LEFT: + case KASAN_STACK_MID: + case KASAN_STACK_RIGHT: + case KASAN_STACK_PARTIAL: + bug_type = "out of bounds on stack"; + break; } pr_err("BUG: KASan: %s in %pS at addr %p\n", diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 7acd6faa0335..2163b8cc446e 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -9,6 +9,7 @@ CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \ -fasan-shadow-offset=$(CONFIG_KASAN_SHADOW_OFFSET) \ + --param asan-stack=1 \ --param asan-instrumentation-with-call-threshold=$(call_threshold)) ifeq ($(call cc-option, $(CFLAGS_KASAN_MINIMAL) -Werror),) From 71394fe50146202f2c8d92cf50f5ebc761acf254 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:40:03 -0800 Subject: [PATCH 084/108] mm: vmalloc: add flag preventing guard hole allocation For instrumenting global variables KASan will shadow memory backing memory for modules. So on module loading we will need to allocate memory for shadow and map it at address in shadow that corresponds to the address allocated in module_alloc(). __vmalloc_node_range() could be used for this purpose, except it puts a guard hole after allocated area. Guard hole in shadow memory should be a problem because at some future point we might need to have a shadow memory at address occupied by guard hole. So we could fail to allocate shadow for module_alloc(). Add a new vm_struct flag 'VM_NO_GUARD' indicating that vm area doesn't have a guard hole. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- include/linux/vmalloc.h | 9 +++++++-- mm/vmalloc.c | 6 ++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index b87696fdf06a..1526fe712ca0 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -16,6 +16,7 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ +#define VM_NO_GUARD 0x00000040 /* don't add guard page */ /* bits [20..32] reserved for arch specific ioremap internals */ /* @@ -96,8 +97,12 @@ void vmalloc_sync_all(void); static inline size_t get_vm_area_size(const struct vm_struct *area) { - /* return actual size without guard page */ - return area->size - PAGE_SIZE; + if (!(area->flags & VM_NO_GUARD)) + /* return actual size without guard page */ + return area->size - PAGE_SIZE; + else + return area->size; + } extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 39c338896416..2e74e99d4cfe 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1324,10 +1324,8 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, if (unlikely(!area)) return NULL; - /* - * We always allocate a guard page. - */ - size += PAGE_SIZE; + if (!(flags & VM_NO_GUARD)) + size += PAGE_SIZE; va = alloc_vmap_area(size, align, start, end, node, gfp_mask); if (IS_ERR(va)) { From cb9e3c292d0115499c660028ad35ac5501d722b5 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:40:07 -0800 Subject: [PATCH 085/108] mm: vmalloc: pass additional vm_flags to __vmalloc_node_range() For instrumenting global variables KASan will shadow memory backing memory for modules. So on module loading we will need to allocate memory for shadow and map it at address in shadow that corresponds to the address allocated in module_alloc(). __vmalloc_node_range() could be used for this purpose, except it puts a guard hole after allocated area. Guard hole in shadow memory should be a problem because at some future point we might need to have a shadow memory at address occupied by guard hole. So we could fail to allocate shadow for module_alloc(). Now we have VM_NO_GUARD flag disabling guard page, so we need to pass into __vmalloc_node_range(). Add new parameter 'vm_flags' to __vmalloc_node_range() function. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/arm/kernel/module.c | 2 +- arch/arm64/kernel/module.c | 4 ++-- arch/mips/kernel/module.c | 2 +- arch/parisc/kernel/module.c | 2 +- arch/s390/kernel/module.c | 2 +- arch/sparc/kernel/module.c | 2 +- arch/unicore32/kernel/module.c | 2 +- arch/x86/kernel/module.c | 2 +- include/linux/vmalloc.h | 4 +++- mm/vmalloc.c | 10 ++++++---- 10 files changed, 18 insertions(+), 14 deletions(-) diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index bea7db9e5b80..2e11961f65ae 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -41,7 +41,7 @@ void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); } #endif diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 9b6f71db2709..67bf4107f6ef 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -35,8 +35,8 @@ void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, - __builtin_return_address(0)); + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, + NUMA_NO_NODE, __builtin_return_address(0)); } enum aarch64_reloc_op { diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c index 2a52568dbcd6..1833f5171ccd 100644 --- a/arch/mips/kernel/module.c +++ b/arch/mips/kernel/module.c @@ -47,7 +47,7 @@ static DEFINE_SPINLOCK(dbe_lock); void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULE_START, MODULE_END, - GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE, + GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } #endif diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 5822e8e200e6..3c63a820fcda 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -219,7 +219,7 @@ void *module_alloc(unsigned long size) * init_data correctly */ return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, GFP_KERNEL | __GFP_HIGHMEM, - PAGE_KERNEL_RWX, NUMA_NO_NODE, + PAGE_KERNEL_RWX, 0, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 409d152585be..36154a2f1814 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -50,7 +50,7 @@ void *module_alloc(unsigned long size) if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE, + GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } #endif diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index 97655e0fd243..192a617a32f3 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -29,7 +29,7 @@ static void *module_map(unsigned long size) if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL, NUMA_NO_NODE, + GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } #else diff --git a/arch/unicore32/kernel/module.c b/arch/unicore32/kernel/module.c index dc41f6dfedb6..e191b3448bd3 100644 --- a/arch/unicore32/kernel/module.c +++ b/arch/unicore32/kernel/module.c @@ -25,7 +25,7 @@ void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, + GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e69f9882bf95..e830e61aae05 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -88,7 +88,7 @@ void *module_alloc(unsigned long size) return __vmalloc_node_range(size, 1, MODULES_VADDR + get_module_load_offset(), MODULES_END, GFP_KERNEL | __GFP_HIGHMEM, - PAGE_KERNEL_EXEC, NUMA_NO_NODE, + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 1526fe712ca0..7d7acb35603d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -76,7 +76,9 @@ extern void *vmalloc_32_user(unsigned long size); extern void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot); extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, - pgprot_t prot, int node, const void *caller); + pgprot_t prot, unsigned long vm_flags, int node, + const void *caller); + extern void vfree(const void *addr); extern void *vmap(struct page **pages, unsigned int count, diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 2e74e99d4cfe..35b25e1340ca 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1619,6 +1619,7 @@ fail: * @end: vm area range end * @gfp_mask: flags for the page level allocator * @prot: protection mask for the allocated pages + * @vm_flags: additional vm area flags (e.g. %VM_NO_GUARD) * @node: node to use for allocation or NUMA_NO_NODE * @caller: caller's return address * @@ -1628,7 +1629,8 @@ fail: */ void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, - pgprot_t prot, int node, const void *caller) + pgprot_t prot, unsigned long vm_flags, int node, + const void *caller) { struct vm_struct *area; void *addr; @@ -1638,8 +1640,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (!size || (size >> PAGE_SHIFT) > totalram_pages) goto fail; - area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED, - start, end, node, gfp_mask, caller); + area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED | + vm_flags, start, end, node, gfp_mask, caller); if (!area) goto fail; @@ -1688,7 +1690,7 @@ static void *__vmalloc_node(unsigned long size, unsigned long align, int node, const void *caller) { return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, - gfp_mask, prot, node, caller); + gfp_mask, prot, 0, node, caller); } void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) From 9ddf82521c86ae07af79dbe5a93c52890f2bab23 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:40:10 -0800 Subject: [PATCH 086/108] kernel: add support for .init_array.* constructors KASan uses constructors for initializing redzones for global variables. Globals instrumentation in GCC 4.9.2 produces constructors with priority (.init_array.00099) Currently kernel ignores such constructors. Only constructors with default priority supported (.init_array) This patch adds support for constructors with priorities. For kernel image we put pointers to constructors between __ctors_start/__ctors_end and do_ctors() will call them on start up. For modules we merge .init_array.* sections into resulting .init_array. Module code properly handles constructors in .init_array section. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- include/asm-generic/vmlinux.lds.h | 1 + scripts/module-common.lds | 3 +++ 2 files changed, 4 insertions(+) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index bee5d683074d..ac78910d7416 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -478,6 +478,7 @@ #define KERNEL_CTORS() . = ALIGN(8); \ VMLINUX_SYMBOL(__ctors_start) = .; \ *(.ctors) \ + *(SORT(.init_array.*)) \ *(.init_array) \ VMLINUX_SYMBOL(__ctors_end) = .; #else diff --git a/scripts/module-common.lds b/scripts/module-common.lds index bec15f908fc6..73a2c7da0e55 100644 --- a/scripts/module-common.lds +++ b/scripts/module-common.lds @@ -16,4 +16,7 @@ SECTIONS { __kcrctab_unused 0 : { *(SORT(___kcrctab_unused+*)) } __kcrctab_unused_gpl 0 : { *(SORT(___kcrctab_unused_gpl+*)) } __kcrctab_gpl_future 0 : { *(SORT(___kcrctab_gpl_future+*)) } + + . = ALIGN(8); + .init_array 0 : { *(SORT(.init_array.*)) *(.init_array) } } From 6301939d97d079f0d3dbe71e750f4daf5d39fc33 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:40:13 -0800 Subject: [PATCH 087/108] module: fix types of device tables aliases MODULE_DEVICE_TABLE() macro used to create aliases to device tables. Normally alias should have the same type as aliased symbol. Device tables are arrays, so they have 'struct type##_device_id[x]' types. Alias created by MODULE_DEVICE_TABLE() will have non-array type - 'struct type##_device_id'. This inconsistency confuses compiler, it could make a wrong assumption about variable's size which leads KASan to produce a false positive report about out of bounds access. For every global variable compiler calls __asan_register_globals() passing information about global variable (address, size, size with redzone, name ...) __asan_register_globals() poison symbols redzone to detect possible out of bounds accesses. When symbol has an alias __asan_register_globals() will be called as for symbol so for alias. Compiler determines size of variable by size of variable's type. Alias and symbol have the same address, so if alias have the wrong size part of memory that actually belongs to the symbol could be poisoned as redzone of alias symbol. By fixing type of alias symbol we will fix size of it, so __asan_register_globals() will not poison valid memory. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/module.h b/include/linux/module.h index b653d7c0a05a..42999fe2dbd0 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -135,7 +135,7 @@ void trim_init_extable(struct module *m); #ifdef MODULE /* Creates an alias so file2alias.c can find device table. */ #define MODULE_DEVICE_TABLE(type, name) \ - extern const struct type##_device_id __mod_##type##__##name##_device_table \ +extern const typeof(name) __mod_##type##__##name##_device_table \ __attribute__ ((unused, alias(__stringify(name)))) #else /* !MODULE */ #define MODULE_DEVICE_TABLE(type, name) From bebf56a1b176c2e1c9efe44e7e6915532cc682cf Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <a.ryabinin@samsung.com> Date: Fri, 13 Feb 2015 14:40:17 -0800 Subject: [PATCH 088/108] kasan: enable instrumentation of global variables This feature let us to detect accesses out of bounds of global variables. This will work as for globals in kernel image, so for globals in modules. Currently this won't work for symbols in user-specified sections (e.g. __init, __read_mostly, ...) The idea of this is simple. Compiler increases each global variable by redzone size and add constructors invoking __asan_register_globals() function. Information about global variable (address, size, size with redzone ...) passed to __asan_register_globals() so we could poison variable's redzone. This patch also forces module_alloc() to return 8*PAGE_SIZE aligned address making shadow memory handling ( kasan_module_alloc()/kasan_module_free() ) more simple. Such alignment guarantees that each shadow page backing modules address space correspond to only one module_alloc() allocation. Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com> Cc: Dmitry Vyukov <dvyukov@google.com> Cc: Konstantin Serebryany <kcc@google.com> Cc: Dmitry Chernenkov <dmitryc@google.com> Signed-off-by: Andrey Konovalov <adech.fo@gmail.com> Cc: Yuri Gribov <tetra2005@gmail.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Sasha Levin <sasha.levin@oracle.com> Cc: Christoph Lameter <cl@linux.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- Documentation/kasan.txt | 2 +- arch/x86/kernel/module.c | 12 +++++++- arch/x86/mm/kasan_init_64.c | 2 +- include/linux/compiler-gcc4.h | 4 +++ include/linux/compiler-gcc5.h | 2 ++ include/linux/kasan.h | 10 +++++++ kernel/module.c | 2 ++ lib/Kconfig.kasan | 1 + mm/kasan/kasan.c | 52 +++++++++++++++++++++++++++++++++++ mm/kasan/kasan.h | 25 +++++++++++++++++ mm/kasan/report.c | 22 +++++++++++++++ scripts/Makefile.kasan | 2 +- 12 files changed, 132 insertions(+), 4 deletions(-) diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt index f0645a8a992f..092fc10961fe 100644 --- a/Documentation/kasan.txt +++ b/Documentation/kasan.txt @@ -9,7 +9,7 @@ a fast and comprehensive solution for finding use-after-free and out-of-bounds bugs. KASan uses compile-time instrumentation for checking every memory access, -therefore you will need a certain version of GCC >= 4.9.2 +therefore you will need a certain version of GCC > 4.9.2 Currently KASan is supported only for x86_64 architecture and requires that the kernel be built with the SLUB allocator. diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e830e61aae05..d1ac80b72c72 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -24,6 +24,7 @@ #include <linux/fs.h> #include <linux/string.h> #include <linux/kernel.h> +#include <linux/kasan.h> #include <linux/bug.h> #include <linux/mm.h> #include <linux/gfp.h> @@ -83,13 +84,22 @@ static unsigned long int get_module_load_offset(void) void *module_alloc(unsigned long size) { + void *p; + if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; - return __vmalloc_node_range(size, 1, + + p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR + get_module_load_offset(), MODULES_END, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); + if (p && (kasan_module_alloc(p, size) < 0)) { + vfree(p); + return NULL; + } + + return p; } #ifdef CONFIG_X86_32 diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 53508708b7aa..4860906c6b9f 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -196,7 +196,7 @@ void __init kasan_init(void) (unsigned long)kasan_mem_to_shadow(_end), NUMA_NO_NODE); - populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_VADDR), + populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END), (void *)KASAN_SHADOW_END); memset(kasan_zero_page, 0, PAGE_SIZE); diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index d1a558239b1a..769e19864632 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -85,3 +85,7 @@ #define __HAVE_BUILTIN_BSWAP16__ #endif #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ + +#if GCC_VERSION >= 40902 +#define KASAN_ABI_VERSION 3 +#endif diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h index c8c565952548..efee493714eb 100644 --- a/include/linux/compiler-gcc5.h +++ b/include/linux/compiler-gcc5.h @@ -63,3 +63,5 @@ #define __HAVE_BUILTIN_BSWAP64__ #define __HAVE_BUILTIN_BSWAP16__ #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ + +#define KASAN_ABI_VERSION 4 diff --git a/include/linux/kasan.h b/include/linux/kasan.h index d5310eef3e38..72ba725ddf9c 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -49,8 +49,15 @@ void kasan_krealloc(const void *object, size_t new_size); void kasan_slab_alloc(struct kmem_cache *s, void *object); void kasan_slab_free(struct kmem_cache *s, void *object); +#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT) + +int kasan_module_alloc(void *addr, size_t size); +void kasan_module_free(void *addr); + #else /* CONFIG_KASAN */ +#define MODULE_ALIGN 1 + static inline void kasan_unpoison_shadow(const void *address, size_t size) {} static inline void kasan_enable_current(void) {} @@ -74,6 +81,9 @@ static inline void kasan_krealloc(const void *object, size_t new_size) {} static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {} static inline void kasan_slab_free(struct kmem_cache *s, void *object) {} +static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } +static inline void kasan_module_free(void *addr) {} + #endif /* CONFIG_KASAN */ #endif /* LINUX_KASAN_H */ diff --git a/kernel/module.c b/kernel/module.c index 82dc1f899e6d..8426ad48362c 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -56,6 +56,7 @@ #include <linux/async.h> #include <linux/percpu.h> #include <linux/kmemleak.h> +#include <linux/kasan.h> #include <linux/jump_label.h> #include <linux/pfn.h> #include <linux/bsearch.h> @@ -1813,6 +1814,7 @@ static void unset_module_init_ro_nx(struct module *mod) { } void __weak module_memfree(void *module_region) { vfree(module_region); + kasan_module_free(module_region); } void __weak module_arch_cleanup(struct module *mod) diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 4d47d874335c..4fecaedc80a2 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -6,6 +6,7 @@ if HAVE_ARCH_KASAN config KASAN bool "KASan: runtime memory debugger" depends on SLUB_DEBUG + select CONSTRUCTORS help Enables kernel address sanitizer - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index 799c52b9826c..78fee632a7ee 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -22,6 +22,7 @@ #include <linux/memblock.h> #include <linux/memory.h> #include <linux/mm.h> +#include <linux/module.h> #include <linux/printk.h> #include <linux/sched.h> #include <linux/slab.h> @@ -395,6 +396,57 @@ void kasan_kfree_large(const void *ptr) KASAN_FREE_PAGE); } +int kasan_module_alloc(void *addr, size_t size) +{ + void *ret; + size_t shadow_size; + unsigned long shadow_start; + + shadow_start = (unsigned long)kasan_mem_to_shadow(addr); + shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT, + PAGE_SIZE); + + if (WARN_ON(!PAGE_ALIGNED(shadow_start))) + return -EINVAL; + + ret = __vmalloc_node_range(shadow_size, 1, shadow_start, + shadow_start + shadow_size, + GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE, + __builtin_return_address(0)); + return ret ? 0 : -ENOMEM; +} + +void kasan_module_free(void *addr) +{ + vfree(kasan_mem_to_shadow(addr)); +} + +static void register_global(struct kasan_global *global) +{ + size_t aligned_size = round_up(global->size, KASAN_SHADOW_SCALE_SIZE); + + kasan_unpoison_shadow(global->beg, global->size); + + kasan_poison_shadow(global->beg + aligned_size, + global->size_with_redzone - aligned_size, + KASAN_GLOBAL_REDZONE); +} + +void __asan_register_globals(struct kasan_global *globals, size_t size) +{ + int i; + + for (i = 0; i < size; i++) + register_global(&globals[i]); +} +EXPORT_SYMBOL(__asan_register_globals); + +void __asan_unregister_globals(struct kasan_global *globals, size_t size) +{ +} +EXPORT_SYMBOL(__asan_unregister_globals); + #define DEFINE_ASAN_LOAD_STORE(size) \ void __asan_load##size(unsigned long addr) \ { \ diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 1fcc1d81a9cf..4986b0acab21 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -11,6 +11,7 @@ #define KASAN_PAGE_REDZONE 0xFE /* redzone for kmalloc_large allocations */ #define KASAN_KMALLOC_REDZONE 0xFC /* redzone inside slub object */ #define KASAN_KMALLOC_FREE 0xFB /* object was freed (kmem_cache_free/kfree) */ +#define KASAN_GLOBAL_REDZONE 0xFA /* redzone for global variable */ /* * Stack redzone shadow values @@ -21,6 +22,10 @@ #define KASAN_STACK_RIGHT 0xF3 #define KASAN_STACK_PARTIAL 0xF4 +/* Don't break randconfig/all*config builds */ +#ifndef KASAN_ABI_VERSION +#define KASAN_ABI_VERSION 1 +#endif struct kasan_access_info { const void *access_addr; @@ -30,6 +35,26 @@ struct kasan_access_info { unsigned long ip; }; +/* The layout of struct dictated by compiler */ +struct kasan_source_location { + const char *filename; + int line_no; + int column_no; +}; + +/* The layout of struct dictated by compiler */ +struct kasan_global { + const void *beg; /* Address of the beginning of the global variable. */ + size_t size; /* Size of the global variable. */ + size_t size_with_redzone; /* Size of the variable + size of the red zone. 32 bytes aligned */ + const void *name; + const void *module_name; /* Name of the module where the global variable is declared. */ + unsigned long has_dynamic_init; /* This needed for C++ */ +#if KASAN_ABI_VERSION >= 4 + struct kasan_source_location *location; +#endif +}; + void kasan_report_error(struct kasan_access_info *info); void kasan_report_user_access(struct kasan_access_info *info); diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 866732ef3db3..680ceedf810a 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -23,6 +23,8 @@ #include <linux/types.h> #include <linux/kasan.h> +#include <asm/sections.h> + #include "kasan.h" #include "../slab.h" @@ -61,6 +63,7 @@ static void print_error_description(struct kasan_access_info *info) break; case KASAN_PAGE_REDZONE: case KASAN_KMALLOC_REDZONE: + case KASAN_GLOBAL_REDZONE: case 0 ... KASAN_SHADOW_SCALE_SIZE - 1: bug_type = "out of bounds access"; break; @@ -80,6 +83,20 @@ static void print_error_description(struct kasan_access_info *info) info->access_size, current->comm, task_pid_nr(current)); } +static inline bool kernel_or_module_addr(const void *addr) +{ + return (addr >= (void *)_stext && addr < (void *)_end) + || (addr >= (void *)MODULES_VADDR + && addr < (void *)MODULES_END); +} + +static inline bool init_task_stack_addr(const void *addr) +{ + return addr >= (void *)&init_thread_union.stack && + (addr <= (void *)&init_thread_union.stack + + sizeof(init_thread_union.stack)); +} + static void print_address_description(struct kasan_access_info *info) { const void *addr = info->access_addr; @@ -107,6 +124,11 @@ static void print_address_description(struct kasan_access_info *info) dump_page(page, "kasan: bad access detected"); } + if (kernel_or_module_addr(addr)) { + if (!init_task_stack_addr(addr)) + pr_err("Address belongs to variable %pS\n", addr); + } + dump_stack(); } diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 2163b8cc446e..631619b2b118 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -9,7 +9,7 @@ CFLAGS_KASAN_MINIMAL := -fsanitize=kernel-address CFLAGS_KASAN := $(call cc-option, -fsanitize=kernel-address \ -fasan-shadow-offset=$(CONFIG_KASAN_SHADOW_OFFSET) \ - --param asan-stack=1 \ + --param asan-stack=1 --param asan-globals=1 \ --param asan-instrumentation-with-call-threshold=$(call_threshold)) ifeq ($(call cc-option, $(CFLAGS_KASAN_MINIMAL) -Werror),) From 5125991c9a9360fbdb717e22783c970bbd140660 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski <luto@amacapital.net> Date: Fri, 13 Feb 2015 14:40:21 -0800 Subject: [PATCH 089/108] init: remove CONFIG_INIT_FALLBACK CONFIG_INIT_FALLBACK adds config bloat without an obvious use case that makes it worth keeping around. Delete it. Signed-off-by: Andy Lutomirski <luto@amacapital.net> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Chuck Ebbert <cebbert.lkml@gmail.com> Cc: Frank Rowand <frowand.list@gmail.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Rob Landley <rob@landley.net> Cc: Shuah Khan <shuah.kh@samsung.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- init/Kconfig | 16 ---------------- init/main.c | 5 ----- 2 files changed, 21 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 1354ac09b516..058e3671fa11 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1287,22 +1287,6 @@ source "usr/Kconfig" endif -config INIT_FALLBACK - bool "Fall back to defaults if init= parameter is bad" - default y - help - If enabled, the kernel will try the default init binaries if an - explicit request from the init= parameter fails. - - This can have unexpected effects. For example, booting - with init=/sbin/kiosk_app will run /sbin/init or even /bin/sh - if /sbin/kiosk_app cannot be executed. - - The default value of Y is consistent with historical behavior. - Selecting N is likely to be more appropriate for most uses, - especially on kiosks and on kernels that are intended to be - run under the control of a script. - config CC_OPTIMIZE_FOR_SIZE bool "Optimize for size" help diff --git a/init/main.c b/init/main.c index 179ada15d08a..6f0f1c5ff8cc 100644 --- a/init/main.c +++ b/init/main.c @@ -953,13 +953,8 @@ static int __ref kernel_init(void *unused) ret = run_init_process(execute_command); if (!ret) return 0; -#ifndef CONFIG_INIT_FALLBACK panic("Requested init %s failed (error %d).", execute_command, ret); -#else - pr_err("Failed to execute %s (error %d). Attempting defaults...\n", - execute_command, ret); -#endif } if (!try_to_run_init_process("/sbin/init") || !try_to_run_init_process("/etc/init") || From 977ad481b66ca91e1f6492b3c5c4748c68fdee9c Mon Sep 17 00:00:00 2001 From: Wang Nan <wangnan0@huawei.com> Date: Fri, 13 Feb 2015 14:40:24 -0800 Subject: [PATCH 090/108] kprobes: set kprobes_all_disarmed earlier to enable re-optimization. In original code, the probed instruction doesn't get optimized after echo 0 > /sys/kernel/debug/kprobes/enabled echo 1 > /sys/kernel/debug/kprobes/enabled This is because original code checks kprobes_all_disarmed in optimize_kprobe(), but this flag is turned off after calling that function. Therefore, optimize_kprobe() will see kprobes_all_disarmed == true and doesn't do the optimization. This patch simply turns off kprobes_all_disarmed earlier to enable optimization. Signed-off-by: Wang Nan <wangnan0@huawei.com> Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Ingo Molnar <mingo@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- kernel/kprobes.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 2ca272f8f62e..c39790001854 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2320,6 +2320,12 @@ static void arm_all_kprobes(void) if (!kprobes_all_disarmed) goto already_enabled; + /* + * optimize_kprobe() called by arm_kprobe() checks + * kprobes_all_disarmed, so set kprobes_all_disarmed before + * arm_kprobe. + */ + kprobes_all_disarmed = false; /* Arming kprobes doesn't optimize kprobe itself */ for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; @@ -2328,7 +2334,6 @@ static void arm_all_kprobes(void) arm_kprobe(p); } - kprobes_all_disarmed = false; printk(KERN_INFO "Kprobes globally enabled\n"); already_enabled: From 69d54b916d83872a0a327778a01af2a096923f59 Mon Sep 17 00:00:00 2001 From: Wang Nan <wangnan0@huawei.com> Date: Fri, 13 Feb 2015 14:40:26 -0800 Subject: [PATCH 091/108] kprobes: makes kprobes/enabled works correctly for optimized kprobes. debugfs/kprobes/enabled doesn't work correctly on optimized kprobes. Masami Hiramatsu has a test report on x86_64 platform: https://lkml.org/lkml/2015/1/19/274 This patch forces it to unoptimize kprobe if kprobes_all_disarmed is set. It also checks the flag in unregistering path for skipping unneeded disarming process when kprobes globally disarmed. Signed-off-by: Wang Nan <wangnan0@huawei.com> Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Ingo Molnar <mingo@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- kernel/kprobes.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index c39790001854..c90e417bb963 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -869,7 +869,8 @@ static void __disarm_kprobe(struct kprobe *p, bool reopt) { struct kprobe *_p; - unoptimize_kprobe(p, false); /* Try to unoptimize */ + /* Try to unoptimize */ + unoptimize_kprobe(p, kprobes_all_disarmed); if (!kprobe_queued(p)) { arch_disarm_kprobe(p); @@ -1571,7 +1572,13 @@ static struct kprobe *__disable_kprobe(struct kprobe *p) /* Try to disarm and disable this/parent probe */ if (p == orig_p || aggr_kprobe_disabled(orig_p)) { - disarm_kprobe(orig_p, true); + /* + * If kprobes_all_disarmed is set, orig_p + * should have already been disarmed, so + * skip unneed disarming process. + */ + if (!kprobes_all_disarmed) + disarm_kprobe(orig_p, true); orig_p->flags |= KPROBE_FLAG_DISABLED; } } From 3fc70077e6a9feec7ec080710677a507bd41322c Mon Sep 17 00:00:00 2001 From: Joshua Clayton <stillcompiling@gmail.com> Date: Fri, 13 Feb 2015 14:40:29 -0800 Subject: [PATCH 092/108] drivers/rtc/rtc-pcf2123.c: add support for devicetree Add compatible string "nxp,rtc-pcf2123" Document the binding Signed-off-by: Joshua Clayton <stillcompiling@gmail.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Alessandro Zummo <a.zummo@towertech.it> Cc: Grant Likely <grant.likely@linaro.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- .../devicetree/bindings/rtc/nxp,rtc-2123.txt | 16 ++++++++++++++++ drivers/rtc/rtc-pcf2123.c | 10 ++++++++++ 2 files changed, 26 insertions(+) create mode 100644 Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt diff --git a/Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt b/Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt new file mode 100644 index 000000000000..5cbc0b145a61 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt @@ -0,0 +1,16 @@ +NXP PCF2123 SPI Real Time Clock + +Required properties: +- compatible: should be: "nxp,rtc-pcf2123" +- reg: should be the SPI slave chipselect address + +Optional properties: +- spi-cs-high: PCF2123 needs chipselect high + +Example: + +rtc: nxp,rtc-pcf2123@3 { + compatible = "nxp,rtc-pcf2123" + reg = <3> + spi-cs-high; +}; diff --git a/drivers/rtc/rtc-pcf2123.c b/drivers/rtc/rtc-pcf2123.c index d1953bb244c5..8a7556cbcb7f 100644 --- a/drivers/rtc/rtc-pcf2123.c +++ b/drivers/rtc/rtc-pcf2123.c @@ -38,6 +38,7 @@ #include <linux/errno.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/of.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/rtc.h> @@ -340,10 +341,19 @@ static int pcf2123_remove(struct spi_device *spi) return 0; } +#ifdef CONFIG_OF +static const struct of_device_id pcf2123_dt_ids[] = { + { .compatible = "nxp,rtc-pcf2123", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, pcf2123_dt_ids); +#endif + static struct spi_driver pcf2123_driver = { .driver = { .name = "rtc-pcf2123", .owner = THIS_MODULE, + .of_match_table = of_match_ptr(pcf2123_dt_ids), }, .probe = pcf2123_probe, .remove = pcf2123_remove, From fd71493d67977a49e293c96f213006ec9e30c4c9 Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard <arno@natisbad.org> Date: Fri, 13 Feb 2015 14:40:32 -0800 Subject: [PATCH 093/108] drivers/rtc/rtc-isl12057.c: add alarm support to Intersil ISL12057 RTC driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds alarm support to Intersil ISL12057 driver. This allows to configure the chip to generate an interrupt when the alarm matches current time value. Alarm can be programmed up to one month in the future and is accurate to the second. The patch was developed to support two different configurations: systems w/ and w/o RTC chip IRQ line connected to the main CPU. The latter is the one found on current 3 kernel users of the chip for which support was initially developed (Netgear ReadyNAS 102, 104 and 2120 NAS). On those devices, the IRQ#2 pin of the chip is not connected to the SoC but to a PMIC. This allows setting an alarm, powering off the device and have it wake up when the alarm rings. To support that configuration the driver does the following: 1. it has alarm_irq_enable() function returns -ENOTTY when no IRQ is passed to the driver. 2. it marks the device as a wakeup source in all cases (whether an IRQ is passed to the driver or not) to have 'wakealarm' sysfs entry created. 3. it marks the device has not supporting UIE mode when no IRQ is passed to the driver (see the commmit message of c9f5c7e7a84f) This specific configuration was tested on a ReadyNAS 102 by setting an alarm, powering off the device and see it reboot as expected when the alarm rang. The former configuration was tested on a Netgear ReadyNAS 102 after some soldering of the IRQ#2 pin of the RTC chip to a MPP line of the SoC (the one used usually handles the reset button). The test was performed using a modified .dts file reflecting this change (see below) and rtc-test.c program available in Documentation/rtc.txt. This test program ran as expected, which validates alarm supports, including interrupt support. As a side note, the ISL12057 remains in the list of trivial devices, i.e. no specific DT binding being added by this patch: i2c core automatically handles extraction of IRQ line info from .dts file. For instance, if one wants to reference the interrupt line for the alarm in its .dts file, adding interrupt and interrupt-parent properties works as expected: isl12057: isl12057@68 { compatible =3D "isil,isl12057"; interrupt-parent =3D <&gpio0>; interrupts =3D <6 IRQ_TYPE_EDGE_FALLING>; reg =3D <0x68>; }; FWIW, if someone is looking for a way to test alarm support on a system on which the chip IRQ line has the ability to boot the system (e.g. ReadyNAS 102, 104, etc): # echo 0 > /sys/class/rtc/rtc0/wakealarm # echo `date '+%s' -d '+ 1 minutes'` > /sys/class/rtc/rtc0/wakealarm # shutdown -h now With the commands above, after a minute, the system comes back to life. Signed-off-by: Arnaud Ebalard <arno@natisbad.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Alessandro Zummo <a.zummo@towertech.it> Cc: Peter Huewe <peter.huewe@infineon.com> Cc: Linus Walleij <linus.walleij@linaro.org> Cc: Thierry Reding <treding@nvidia.com> Cc: Mark Brown <broonie@kernel.org> Cc: Grant Likely <grant.likely@linaro.org> Cc: Uwe Kleine-König <uwe@kleine-koenig.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- drivers/rtc/rtc-isl12057.c | 313 ++++++++++++++++++++++++++++++++++++- 1 file changed, 305 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-isl12057.c b/drivers/rtc/rtc-isl12057.c index 6e1fcfb5d7e6..3ec73ad7f2d8 100644 --- a/drivers/rtc/rtc-isl12057.c +++ b/drivers/rtc/rtc-isl12057.c @@ -79,8 +79,10 @@ #define ISL12057_MEM_MAP_LEN 0x10 struct isl12057_rtc_data { + struct rtc_device *rtc; struct regmap *regmap; struct mutex lock; + int irq; }; static void isl12057_rtc_regs_to_tm(struct rtc_time *tm, u8 *regs) @@ -160,14 +162,47 @@ static int isl12057_i2c_validate_chip(struct regmap *regmap) return 0; } -static int isl12057_rtc_read_time(struct device *dev, struct rtc_time *tm) +static int _isl12057_rtc_clear_alarm(struct device *dev) +{ + struct isl12057_rtc_data *data = dev_get_drvdata(dev); + int ret; + + ret = regmap_update_bits(data->regmap, ISL12057_REG_SR, + ISL12057_REG_SR_A1F, 0); + if (ret) + dev_err(dev, "%s: clearing alarm failed (%d)\n", __func__, ret); + + return ret; +} + +static int _isl12057_rtc_update_alarm(struct device *dev, int enable) +{ + struct isl12057_rtc_data *data = dev_get_drvdata(dev); + int ret; + + ret = regmap_update_bits(data->regmap, ISL12057_REG_INT, + ISL12057_REG_INT_A1IE, + enable ? ISL12057_REG_INT_A1IE : 0); + if (ret) + dev_err(dev, "%s: changing alarm interrupt flag failed (%d)\n", + __func__, ret); + + return ret; +} + +/* + * Note: as we only read from device and do not perform any update, there is + * no need for an equivalent function which would try and get driver's main + * lock. Here, it is safe for everyone if we just use regmap internal lock + * on the device when reading. + */ +static int _isl12057_rtc_read_time(struct device *dev, struct rtc_time *tm) { struct isl12057_rtc_data *data = dev_get_drvdata(dev); u8 regs[ISL12057_RTC_SEC_LEN]; unsigned int sr; int ret; - mutex_lock(&data->lock); ret = regmap_read(data->regmap, ISL12057_REG_SR, &sr); if (ret) { dev_err(dev, "%s: unable to read oscillator status flag (%d)\n", @@ -187,8 +222,6 @@ static int isl12057_rtc_read_time(struct device *dev, struct rtc_time *tm) __func__, ret); out: - mutex_unlock(&data->lock); - if (ret) return ret; @@ -197,6 +230,168 @@ out: return rtc_valid_tm(tm); } +static int isl12057_rtc_update_alarm(struct device *dev, int enable) +{ + struct isl12057_rtc_data *data = dev_get_drvdata(dev); + int ret; + + mutex_lock(&data->lock); + ret = _isl12057_rtc_update_alarm(dev, enable); + mutex_unlock(&data->lock); + + return ret; +} + +static int isl12057_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + struct isl12057_rtc_data *data = dev_get_drvdata(dev); + struct rtc_time rtc_tm, *alarm_tm = &alarm->time; + unsigned long rtc_secs, alarm_secs; + u8 regs[ISL12057_A1_SEC_LEN]; + unsigned int ir; + int ret; + + mutex_lock(&data->lock); + ret = regmap_bulk_read(data->regmap, ISL12057_REG_A1_SC, regs, + ISL12057_A1_SEC_LEN); + if (ret) { + dev_err(dev, "%s: reading alarm section failed (%d)\n", + __func__, ret); + goto err_unlock; + } + + alarm_tm->tm_sec = bcd2bin(regs[0] & 0x7f); + alarm_tm->tm_min = bcd2bin(regs[1] & 0x7f); + alarm_tm->tm_hour = bcd2bin(regs[2] & 0x3f); + alarm_tm->tm_mday = bcd2bin(regs[3] & 0x3f); + alarm_tm->tm_wday = -1; + + /* + * The alarm section does not store year/month. We use the ones in rtc + * section as a basis and increment month and then year if needed to get + * alarm after current time. + */ + ret = _isl12057_rtc_read_time(dev, &rtc_tm); + if (ret) + goto err_unlock; + + alarm_tm->tm_year = rtc_tm.tm_year; + alarm_tm->tm_mon = rtc_tm.tm_mon; + + ret = rtc_tm_to_time(&rtc_tm, &rtc_secs); + if (ret) + goto err_unlock; + + ret = rtc_tm_to_time(alarm_tm, &alarm_secs); + if (ret) + goto err_unlock; + + if (alarm_secs < rtc_secs) { + if (alarm_tm->tm_mon == 11) { + alarm_tm->tm_mon = 0; + alarm_tm->tm_year += 1; + } else { + alarm_tm->tm_mon += 1; + } + } + + ret = regmap_read(data->regmap, ISL12057_REG_INT, &ir); + if (ret) { + dev_err(dev, "%s: reading alarm interrupt flag failed (%d)\n", + __func__, ret); + goto err_unlock; + } + + alarm->enabled = !!(ir & ISL12057_REG_INT_A1IE); + +err_unlock: + mutex_unlock(&data->lock); + + return ret; +} + +static int isl12057_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + struct isl12057_rtc_data *data = dev_get_drvdata(dev); + struct rtc_time *alarm_tm = &alarm->time; + unsigned long rtc_secs, alarm_secs; + u8 regs[ISL12057_A1_SEC_LEN]; + struct rtc_time rtc_tm; + int ret, enable = 1; + + mutex_lock(&data->lock); + ret = _isl12057_rtc_read_time(dev, &rtc_tm); + if (ret) + goto err_unlock; + + ret = rtc_tm_to_time(&rtc_tm, &rtc_secs); + if (ret) + goto err_unlock; + + ret = rtc_tm_to_time(alarm_tm, &alarm_secs); + if (ret) + goto err_unlock; + + /* If alarm time is before current time, disable the alarm */ + if (!alarm->enabled || alarm_secs <= rtc_secs) { + enable = 0; + } else { + /* + * Chip only support alarms up to one month in the future. Let's + * return an error if we get something after that limit. + * Comparison is done by incrementing rtc_tm month field by one + * and checking alarm value is still below. + */ + if (rtc_tm.tm_mon == 11) { /* handle year wrapping */ + rtc_tm.tm_mon = 0; + rtc_tm.tm_year += 1; + } else { + rtc_tm.tm_mon += 1; + } + + ret = rtc_tm_to_time(&rtc_tm, &rtc_secs); + if (ret) + goto err_unlock; + + if (alarm_secs > rtc_secs) { + dev_err(dev, "%s: max for alarm is one month (%d)\n", + __func__, ret); + ret = -EINVAL; + goto err_unlock; + } + } + + /* Disable the alarm before modifying it */ + ret = _isl12057_rtc_update_alarm(dev, 0); + if (ret < 0) { + dev_err(dev, "%s: unable to disable the alarm (%d)\n", + __func__, ret); + goto err_unlock; + } + + /* Program alarm registers */ + regs[0] = bin2bcd(alarm_tm->tm_sec) & 0x7f; + regs[1] = bin2bcd(alarm_tm->tm_min) & 0x7f; + regs[2] = bin2bcd(alarm_tm->tm_hour) & 0x3f; + regs[3] = bin2bcd(alarm_tm->tm_mday) & 0x3f; + + ret = regmap_bulk_write(data->regmap, ISL12057_REG_A1_SC, regs, + ISL12057_A1_SEC_LEN); + if (ret < 0) { + dev_err(dev, "%s: writing alarm section failed (%d)\n", + __func__, ret); + goto err_unlock; + } + + /* Enable or disable alarm */ + ret = _isl12057_rtc_update_alarm(dev, enable); + +err_unlock: + mutex_unlock(&data->lock); + + return ret; +} + static int isl12057_rtc_set_time(struct device *dev, struct rtc_time *tm) { struct isl12057_rtc_data *data = dev_get_drvdata(dev); @@ -262,9 +457,48 @@ static int isl12057_check_rtc_status(struct device *dev, struct regmap *regmap) return 0; } +static int isl12057_rtc_alarm_irq_enable(struct device *dev, + unsigned int enable) +{ + struct isl12057_rtc_data *rtc_data = dev_get_drvdata(dev); + int ret = -ENOTTY; + + if (rtc_data->irq) + ret = isl12057_rtc_update_alarm(dev, enable); + + return ret; +} + +static irqreturn_t isl12057_rtc_interrupt(int irq, void *data) +{ + struct i2c_client *client = data; + struct isl12057_rtc_data *rtc_data = dev_get_drvdata(&client->dev); + struct rtc_device *rtc = rtc_data->rtc; + int ret, handled = IRQ_NONE; + unsigned int sr; + + ret = regmap_read(rtc_data->regmap, ISL12057_REG_SR, &sr); + if (!ret && (sr & ISL12057_REG_SR_A1F)) { + dev_dbg(&client->dev, "RTC alarm!\n"); + + rtc_update_irq(rtc, 1, RTC_IRQF | RTC_AF); + + /* Acknowledge and disable the alarm */ + _isl12057_rtc_clear_alarm(&client->dev); + _isl12057_rtc_update_alarm(&client->dev, 0); + + handled = IRQ_HANDLED; + } + + return handled; +} + static const struct rtc_class_ops rtc_ops = { - .read_time = isl12057_rtc_read_time, + .read_time = _isl12057_rtc_read_time, .set_time = isl12057_rtc_set_time, + .read_alarm = isl12057_rtc_read_alarm, + .set_alarm = isl12057_rtc_set_alarm, + .alarm_irq_enable = isl12057_rtc_alarm_irq_enable, }; static struct regmap_config isl12057_rtc_regmap_config = { @@ -277,7 +511,6 @@ static int isl12057_probe(struct i2c_client *client, { struct device *dev = &client->dev; struct isl12057_rtc_data *data; - struct rtc_device *rtc; struct regmap *regmap; int ret; @@ -310,10 +543,72 @@ static int isl12057_probe(struct i2c_client *client, data->regmap = regmap; dev_set_drvdata(dev, data); - rtc = devm_rtc_device_register(dev, DRV_NAME, &rtc_ops, THIS_MODULE); - return PTR_ERR_OR_ZERO(rtc); + if (client->irq > 0) { + ret = devm_request_threaded_irq(dev, client->irq, NULL, + isl12057_rtc_interrupt, + IRQF_SHARED|IRQF_ONESHOT, + DRV_NAME, client); + if (!ret) + data->irq = client->irq; + else + dev_err(dev, "%s: irq %d unavailable (%d)\n", __func__, + client->irq, ret); + } + + device_init_wakeup(dev, !!data->irq); + + data->rtc = devm_rtc_device_register(dev, DRV_NAME, &rtc_ops, + THIS_MODULE); + ret = PTR_ERR_OR_ZERO(data->rtc); + if (ret) { + dev_err(dev, "%s: unable to register RTC device (%d)\n", + __func__, ret); + goto err; + } + + /* We cannot support UIE mode if we do not have an IRQ line */ + if (!data->irq) + data->rtc->uie_unsupported = 1; + +err: + return ret; } +static int isl12057_remove(struct i2c_client *client) +{ + struct isl12057_rtc_data *rtc_data = dev_get_drvdata(&client->dev); + + if (rtc_data->irq) + device_init_wakeup(&client->dev, false); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int isl12057_rtc_suspend(struct device *dev) +{ + struct isl12057_rtc_data *rtc_data = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + return enable_irq_wake(rtc_data->irq); + + return 0; +} + +static int isl12057_rtc_resume(struct device *dev) +{ + struct isl12057_rtc_data *rtc_data = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + return disable_irq_wake(rtc_data->irq); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(isl12057_rtc_pm_ops, isl12057_rtc_suspend, + isl12057_rtc_resume); + #ifdef CONFIG_OF static const struct of_device_id isl12057_dt_match[] = { { .compatible = "isl,isl12057" }, @@ -331,9 +626,11 @@ static struct i2c_driver isl12057_driver = { .driver = { .name = DRV_NAME, .owner = THIS_MODULE, + .pm = &isl12057_rtc_pm_ops, .of_match_table = of_match_ptr(isl12057_dt_match), }, .probe = isl12057_probe, + .remove = isl12057_remove, .id_table = isl12057_id, }; module_i2c_driver(isl12057_driver); From 298ff0122ab19d253d3a8fea45a7227491beaf98 Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard <arno@natisbad.org> Date: Fri, 13 Feb 2015 14:40:35 -0800 Subject: [PATCH 094/108] rtc: rtc-isl12057: add isil,irq2-can-wakeup-machine property for in-tree users MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current in-tree users of ISL12057 RTC chip (NETGEAR ReadyNAS 102, 104 and 2120) do not have the IRQ#2 pin of the chip (associated w/ the Alarm1 mechanism) connected to their SoC, but to a PMIC (TPS65251 FWIW). This specific hardware configuration allows the NAS to wake up when the alarms rings. Recently introduced alarm support for ISL12057 relies on the provision of an "interrupts" property in system .dts file, which previous three users will never get. For that reason, alarm support on those devices is not function. To support this use case, this patch adds a new DT property for ISL12057 (isil,irq2-can-wakeup-machine) to indicate that the chip is capable of waking up the device using its IRQ#2 pin (even though it does not have its IRQ#2 pin connected directly to the SoC). This specific configuration was tested on a ReadyNAS 102 by setting an alarm, powering off the device and see it reboot as expected when the alarm rang w/: # echo `date '+%s' -d '+ 1 minutes'` > /sys/class/rtc/rtc0/wakealarm # shutdown -h now As a side note, the ISL12057 remains in the list of trivial devices, because the property is not per se required by the device to work but can help handle system w/ specific requirements. In exchange, the new feature is described in details in a specific documentation file. Signed-off-by: Arnaud Ebalard <arno@natisbad.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Alessandro Zummo <a.zummo@towertech.it> Cc: Peter Huewe <peter.huewe@infineon.com> Cc: Linus Walleij <linus.walleij@linaro.org> Cc: Thierry Reding <treding@nvidia.com> Cc: Mark Brown <broonie@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Darshana Padmadas <darshanapadmadas@gmail.com> Cc: Rob Herring <rob.herring@calxeda.com> Cc: Pawel Moll <pawel.moll@arm.com> Cc: Stephen Warren <swarren@wwwdotorg.org> Cc: Ian Campbell <ijc+devicetree@hellion.org.uk> Cc: Grant Likely <grant.likely@linaro.org> Cc: Rob Landley <rob@landley.net> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Guenter Roeck <linux@roeck-us.net> Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com> Cc: Kumar Gala <galak@codeaurora.org> Cc: Uwe Kleine-König <uwe@kleine-koenig.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- .../devicetree/bindings/rtc/isil,isl12057.txt | 78 +++++++++++++++++++ drivers/rtc/rtc-isl12057.c | 45 +++++++++-- 2 files changed, 117 insertions(+), 6 deletions(-) create mode 100644 Documentation/devicetree/bindings/rtc/isil,isl12057.txt diff --git a/Documentation/devicetree/bindings/rtc/isil,isl12057.txt b/Documentation/devicetree/bindings/rtc/isil,isl12057.txt new file mode 100644 index 000000000000..501c39ceae79 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/isil,isl12057.txt @@ -0,0 +1,78 @@ +Intersil ISL12057 I2C RTC/Alarm chip + +ISL12057 is a trivial I2C device (it has simple device tree bindings, +consisting of a compatible field, an address and possibly an interrupt +line). + +Nonetheless, it also supports an option boolean property +("isil,irq2-can-wakeup-machine") to handle the specific use-case found +on at least three in-tree users of the chip (NETGEAR ReadyNAS 102, 104 +and 2120 ARM-based NAS); On those devices, the IRQ#2 pin of the chip +(associated with the alarm supported by the driver) is not connected +to the SoC but to a PMIC. It allows the device to be powered up when +RTC alarm rings. In order to mark the device has a wakeup source and +get access to the 'wakealarm' sysfs entry, this specific property can +be set when the IRQ#2 pin of the chip is not connected to the SoC but +can wake up the device. + +Required properties supported by the device: + + - "compatible": must be "isil,isl12057" + - "reg": I2C bus address of the device + +Optional properties: + + - "isil,irq2-can-wakeup-machine": mark the chip as a wakeup source, + independently of the availability of an IRQ line connected to the + SoC. + + - "interrupt-parent", "interrupts": for passing the interrupt line + of the SoC connected to IRQ#2 of the RTC chip. + + +Example isl12057 node without IRQ#2 pin connected (no alarm support): + + isl12057: isl12057@68 { + compatible = "isil,isl12057"; + reg = <0x68>; + }; + + +Example isl12057 node with IRQ#2 pin connected to main SoC via MPP6 (note +that the pinctrl-related properties below are given for completeness and +may not be required or may be different depending on your system or +SoC, and the main function of the MPP used as IRQ line, i.e. +"interrupt-parent" and "interrupts" are usually sufficient): + + pinctrl { + ... + + rtc_alarm_pin: rtc_alarm_pin { + marvell,pins = "mpp6"; + marvell,function = "gpio"; + }; + + ... + + }; + + ... + + isl12057: isl12057@68 { + compatible = "isil,isl12057"; + reg = <0x68>; + pinctrl-0 = <&rtc_alarm_pin>; + pinctrl-names = "default"; + interrupt-parent = <&gpio0>; + interrupts = <6 IRQ_TYPE_EDGE_FALLING>; + }; + + +Example isl12057 node without IRQ#2 pin connected to the SoC but to a +PMIC, allowing the device to be started based on configured alarm: + + isl12057: isl12057@68 { + compatible = "isil,isl12057"; + reg = <0x68>; + isil,irq2-can-wakeup-machine; + }; diff --git a/drivers/rtc/rtc-isl12057.c b/drivers/rtc/rtc-isl12057.c index 3ec73ad7f2d8..bd76c5e35fa5 100644 --- a/drivers/rtc/rtc-isl12057.c +++ b/drivers/rtc/rtc-isl12057.c @@ -457,6 +457,40 @@ static int isl12057_check_rtc_status(struct device *dev, struct regmap *regmap) return 0; } +#ifdef CONFIG_OF +/* + * One would expect the device to be marked as a wakeup source only + * when an IRQ pin of the RTC is routed to an interrupt line of the + * CPU. In practice, such an IRQ pin can be connected to a PMIC and + * this allows the device to be powered up when RTC alarm rings. This + * is for instance the case on ReadyNAS 102, 104 and 2120. On those + * devices with no IRQ driectly connected to the SoC, the RTC chip + * can be forced as a wakeup source by stating that explicitly in + * the device's .dts file using the "isil,irq2-can-wakeup-machine" + * boolean property. This will guarantee 'wakealarm' sysfs entry is + * available on the device. + * + * The function below returns 1, i.e. the capability of the chip to + * wakeup the device, based on IRQ availability or if the boolean + * property has been set in the .dts file. Otherwise, it returns 0. + */ + +static bool isl12057_can_wakeup_machine(struct device *dev) +{ + struct isl12057_rtc_data *data = dev_get_drvdata(dev); + + return (data->irq || of_property_read_bool(dev->of_node, + "isil,irq2-can-wakeup-machine")); +} +#else +static bool isl12057_can_wakeup_machine(struct device *dev) +{ + struct isl12057_rtc_data *data = dev_get_drvdata(dev); + + return !!data->irq; +} +#endif + static int isl12057_rtc_alarm_irq_enable(struct device *dev, unsigned int enable) { @@ -555,7 +589,8 @@ static int isl12057_probe(struct i2c_client *client, client->irq, ret); } - device_init_wakeup(dev, !!data->irq); + if (isl12057_can_wakeup_machine(dev)) + device_init_wakeup(dev, true); data->rtc = devm_rtc_device_register(dev, DRV_NAME, &rtc_ops, THIS_MODULE); @@ -576,9 +611,7 @@ err: static int isl12057_remove(struct i2c_client *client) { - struct isl12057_rtc_data *rtc_data = dev_get_drvdata(&client->dev); - - if (rtc_data->irq) + if (isl12057_can_wakeup_machine(&client->dev)) device_init_wakeup(&client->dev, false); return 0; @@ -589,7 +622,7 @@ static int isl12057_rtc_suspend(struct device *dev) { struct isl12057_rtc_data *rtc_data = dev_get_drvdata(dev); - if (device_may_wakeup(dev)) + if (rtc_data->irq && device_may_wakeup(dev)) return enable_irq_wake(rtc_data->irq); return 0; @@ -599,7 +632,7 @@ static int isl12057_rtc_resume(struct device *dev) { struct isl12057_rtc_data *rtc_data = dev_get_drvdata(dev); - if (device_may_wakeup(dev)) + if (rtc_data->irq && device_may_wakeup(dev)) return disable_irq_wake(rtc_data->irq); return 0; From 1a67e256dbd80f47f339c4a88c34deee12489aa0 Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard <arno@natisbad.org> Date: Fri, 13 Feb 2015 14:40:39 -0800 Subject: [PATCH 095/108] ARM: mvebu: ISL12057 rtc chip can now wake up RN102, RN102 and RN2120 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that alarm support for ISL12057 chip is available w/ the specific "isil,irq2-can-wakeup-machine" property, let's use that feature of the driver dedicated to NETGEAR ReadyNAS 102, 104 and 2120 specific routing of RTC Alarm IRQ#2 pin; on those devices, this pin is not connected to the SoC but to a PMIC, which allows the device to be powered up when RTC alarm rings. For that to work, the chip needs to be explicitly marked as a device wakeup source using this "isil,irq2-can-wakeup-machine" boolean property. This makes 'wakealarm' sysfs entry available to configure the alarm. Signed-off-by: Arnaud Ebalard <arno@natisbad.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Alessandro Zummo <a.zummo@towertech.it> Cc: Peter Huewe <peter.huewe@infineon.com> Cc: Linus Walleij <linus.walleij@linaro.org> Cc: Thierry Reding <treding@nvidia.com> Cc: Mark Brown <broonie@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Darshana Padmadas <darshanapadmadas@gmail.com> Cc: Rob Herring <rob.herring@calxeda.com> Cc: Pawel Moll <pawel.moll@arm.com> Cc: Stephen Warren <swarren@wwwdotorg.org> Cc: Ian Campbell <ijc+devicetree@hellion.org.uk> Cc: Grant Likely <grant.likely@linaro.org> Cc: Rob Landley <rob@landley.net> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Guenter Roeck <linux@roeck-us.net> Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com> Cc: Kumar Gala <galak@codeaurora.org> Cc: Uwe Kleine-König <uwe@kleine-koenig.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/arm/boot/dts/armada-370-netgear-rn102.dts | 1 + arch/arm/boot/dts/armada-370-netgear-rn104.dts | 1 + arch/arm/boot/dts/armada-xp-netgear-rn2120.dts | 1 + 3 files changed, 3 insertions(+) diff --git a/arch/arm/boot/dts/armada-370-netgear-rn102.dts b/arch/arm/boot/dts/armada-370-netgear-rn102.dts index 4e24932c6e30..1c83b7ce0982 100644 --- a/arch/arm/boot/dts/armada-370-netgear-rn102.dts +++ b/arch/arm/boot/dts/armada-370-netgear-rn102.dts @@ -87,6 +87,7 @@ isl12057: isl12057@68 { compatible = "isil,isl12057"; reg = <0x68>; + isil,irq2-can-wakeup-machine; }; g762: g762@3e { diff --git a/arch/arm/boot/dts/armada-370-netgear-rn104.dts b/arch/arm/boot/dts/armada-370-netgear-rn104.dts index 30586e47986a..5fbfe02964dc 100644 --- a/arch/arm/boot/dts/armada-370-netgear-rn104.dts +++ b/arch/arm/boot/dts/armada-370-netgear-rn104.dts @@ -93,6 +93,7 @@ isl12057: isl12057@68 { compatible = "isil,isl12057"; reg = <0x68>; + isil,irq2-can-wakeup-machine; }; g762: g762@3e { diff --git a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts index d81430aa4ab3..fc8bdfcd2348 100644 --- a/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts +++ b/arch/arm/boot/dts/armada-xp-netgear-rn2120.dts @@ -100,6 +100,7 @@ isl12057: isl12057@68 { compatible = "isil,isl12057"; reg = <0x68>; + isil,irq2-can-wakeup-machine; }; /* Controller for rear fan #1 of 3 (Protechnic From 6df17a6577ee1f4dc5d36a1520f8f3bfb38852c8 Mon Sep 17 00:00:00 2001 From: Juergen Borleis <jbe@pengutronix.de> Date: Fri, 13 Feb 2015 14:40:42 -0800 Subject: [PATCH 096/108] drivers/rtc/rtc-imxdi.c: trivial clean up code Signed-off-by: Juergen Borleis <jbe@pengutronix.de> Cc: Alessandro Zummo <a.zummo@towertech.it> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- drivers/rtc/rtc-imxdi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c index 42f5570f42f8..2b475a2c44ce 100644 --- a/drivers/rtc/rtc-imxdi.c +++ b/drivers/rtc/rtc-imxdi.c @@ -313,7 +313,7 @@ static irqreturn_t dryice_norm_irq(int irq, void *dev_id) dier = __raw_readl(imxdi->ioaddr + DIER); /* handle write complete and write error cases */ - if ((dier & DIER_WCIE)) { + if (dier & DIER_WCIE) { /*If the write wait queue is empty then there is no pending operations. It means the interrupt is for DryIce -Security. IRQ must be returned as none.*/ @@ -322,7 +322,7 @@ static irqreturn_t dryice_norm_irq(int irq, void *dev_id) /* DSR_WCF clears itself on DSR read */ dsr = __raw_readl(imxdi->ioaddr + DSR); - if ((dsr & (DSR_WCF | DSR_WEF))) { + if (dsr & (DSR_WCF | DSR_WEF)) { /* mask the interrupt */ di_int_disable(imxdi, DIER_WCIE); @@ -335,7 +335,7 @@ static irqreturn_t dryice_norm_irq(int irq, void *dev_id) } /* handle the alarm case */ - if ((dier & DIER_CAIE)) { + if (dier & DIER_CAIE) { /* DSR_WCF clears itself on DSR read */ dsr = __raw_readl(imxdi->ioaddr + DSR); if (dsr & DSR_CAF) { From 46edeffa1f4d9684b8ac444503628fbe309814da Mon Sep 17 00:00:00 2001 From: Juergen Borleis <jbe@pengutronix.de> Date: Fri, 13 Feb 2015 14:40:45 -0800 Subject: [PATCH 097/108] drivers/rtc/rtc-imxdi.c: add more known register bits Intended for monitoring and controlling the security features. These bits are required to bring this unit back to live after a security violation event was detected. The code to bring it back to live will follow after a vendor clearance. Signed-off-by: Juergen Borleis <jbe@pengutronix.de> Cc: Alessandro Zummo <a.zummo@towertech.it> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- drivers/rtc/rtc-imxdi.c | 44 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c index 2b475a2c44ce..c666eab98273 100644 --- a/drivers/rtc/rtc-imxdi.c +++ b/drivers/rtc/rtc-imxdi.c @@ -50,22 +50,58 @@ #define DCAMR_UNSET 0xFFFFFFFF /* doomsday - 1 sec */ #define DCR 0x10 /* Control Reg */ +#define DCR_TDCHL (1 << 30) /* Tamper-detect configuration hard lock */ +#define DCR_TDCSL (1 << 29) /* Tamper-detect configuration soft lock */ +#define DCR_KSSL (1 << 27) /* Key-select soft lock */ +#define DCR_MCHL (1 << 20) /* Monotonic-counter hard lock */ +#define DCR_MCSL (1 << 19) /* Monotonic-counter soft lock */ +#define DCR_TCHL (1 << 18) /* Timer-counter hard lock */ +#define DCR_TCSL (1 << 17) /* Timer-counter soft lock */ +#define DCR_FSHL (1 << 16) /* Failure state hard lock */ #define DCR_TCE (1 << 3) /* Time Counter Enable */ +#define DCR_MCE (1 << 2) /* Monotonic Counter Enable */ #define DSR 0x14 /* Status Reg */ -#define DSR_WBF (1 << 10) /* Write Busy Flag */ -#define DSR_WNF (1 << 9) /* Write Next Flag */ -#define DSR_WCF (1 << 8) /* Write Complete Flag */ +#define DSR_WTD (1 << 23) /* Wire-mesh tamper detected */ +#define DSR_ETBD (1 << 22) /* External tamper B detected */ +#define DSR_ETAD (1 << 21) /* External tamper A detected */ +#define DSR_EBD (1 << 20) /* External boot detected */ +#define DSR_SAD (1 << 19) /* SCC alarm detected */ +#define DSR_TTD (1 << 18) /* Temperatur tamper detected */ +#define DSR_CTD (1 << 17) /* Clock tamper detected */ +#define DSR_VTD (1 << 16) /* Voltage tamper detected */ +#define DSR_WBF (1 << 10) /* Write Busy Flag (synchronous) */ +#define DSR_WNF (1 << 9) /* Write Next Flag (synchronous) */ +#define DSR_WCF (1 << 8) /* Write Complete Flag (synchronous)*/ #define DSR_WEF (1 << 7) /* Write Error Flag */ #define DSR_CAF (1 << 4) /* Clock Alarm Flag */ +#define DSR_MCO (1 << 3) /* monotonic counter overflow */ +#define DSR_TCO (1 << 2) /* time counter overflow */ #define DSR_NVF (1 << 1) /* Non-Valid Flag */ #define DSR_SVF (1 << 0) /* Security Violation Flag */ -#define DIER 0x18 /* Interrupt Enable Reg */ +#define DIER 0x18 /* Interrupt Enable Reg (synchronous) */ #define DIER_WNIE (1 << 9) /* Write Next Interrupt Enable */ #define DIER_WCIE (1 << 8) /* Write Complete Interrupt Enable */ #define DIER_WEIE (1 << 7) /* Write Error Interrupt Enable */ #define DIER_CAIE (1 << 4) /* Clock Alarm Interrupt Enable */ +#define DIER_SVIE (1 << 0) /* Security-violation Interrupt Enable */ + +#define DMCR 0x1c /* DryIce Monotonic Counter Reg */ + +#define DTCR 0x28 /* DryIce Tamper Configuration Reg */ +#define DTCR_MOE (1 << 9) /* monotonic overflow enabled */ +#define DTCR_TOE (1 << 8) /* time overflow enabled */ +#define DTCR_WTE (1 << 7) /* wire-mesh tamper enabled */ +#define DTCR_ETBE (1 << 6) /* external B tamper enabled */ +#define DTCR_ETAE (1 << 5) /* external A tamper enabled */ +#define DTCR_EBE (1 << 4) /* external boot tamper enabled */ +#define DTCR_SAIE (1 << 3) /* SCC enabled */ +#define DTCR_TTE (1 << 2) /* temperature tamper enabled */ +#define DTCR_CTE (1 << 1) /* clock tamper enabled */ +#define DTCR_VTE (1 << 0) /* voltage tamper enabled */ + +#define DGPR 0x3c /* DryIce General Purpose Reg */ /** * struct imxdi_dev - private imxdi rtc data From bddd8ddd9f08c7b38b0b311e89a28d74d449d279 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski <k.kozlowski@samsung.com> Date: Fri, 13 Feb 2015 14:40:48 -0800 Subject: [PATCH 098/108] drivers/rtc/rtc-at91sam9.c: constify struct regmap_config The regmap_config struct may be const because it is not modified by the driver and regmap_init() accepts pointer to const. Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- drivers/rtc/rtc-at91sam9.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-at91sam9.c b/drivers/rtc/rtc-at91sam9.c index 6b9aaf1afc72..2183fd2750ab 100644 --- a/drivers/rtc/rtc-at91sam9.c +++ b/drivers/rtc/rtc-at91sam9.c @@ -313,7 +313,7 @@ static const struct rtc_class_ops at91_rtc_ops = { .alarm_irq_enable = at91_rtc_alarm_irq_enable, }; -static struct regmap_config gpbr_regmap_config = { +static const struct regmap_config gpbr_regmap_config = { .reg_bits = 32, .val_bits = 32, .reg_stride = 4, From 1ef2816f26a1d962d6317f7545218fa2ae3643bb Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski <k.kozlowski@samsung.com> Date: Fri, 13 Feb 2015 14:40:51 -0800 Subject: [PATCH 099/108] drivers/rtc/rtc-isl12057.c: constify struct regmap_config The regmap_config struct may be const because it is not modified by the driver and regmap_init() accepts pointer to const. Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- drivers/rtc/rtc-isl12057.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-isl12057.c b/drivers/rtc/rtc-isl12057.c index bd76c5e35fa5..b8f862953f7f 100644 --- a/drivers/rtc/rtc-isl12057.c +++ b/drivers/rtc/rtc-isl12057.c @@ -535,7 +535,7 @@ static const struct rtc_class_ops rtc_ops = { .alarm_irq_enable = isl12057_rtc_alarm_irq_enable, }; -static struct regmap_config isl12057_rtc_regmap_config = { +static const struct regmap_config isl12057_rtc_regmap_config = { .reg_bits = 8, .val_bits = 8, }; From c412c6034aaf38ff718296f3b77cb8e76b541985 Mon Sep 17 00:00:00 2001 From: Chris Zhong <zyw@rock-chips.com> Date: Fri, 13 Feb 2015 14:40:54 -0800 Subject: [PATCH 100/108] drivers/rtc/rtc-rk808.c: fix rtc time reading issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After we set the GET_TIME bit, the rtc time can't be read immediately. We should wait up to 31.25 us, about one cycle of 32khz. Otherwise reading RTC time will return a old time. If we clear the GET_TIME bit after setting, the time of i2c transfer is certainly more than 31.25us. Doug said: : I think we are safe. At 400kHz (the max speed of this part) each bit can : be transferred no faster than 2.5us. In order to do a valid i2c : transaction we need to _at least_ write the address of the device and the : data onto the bus, which is 16 bits. 16 * 2.5us = 40us. That's above the : 31.25us [akpm@linux-foundation.org: tweak comment per review discussion] Signed-off-by: Chris Zhong <zyw@rock-chips.com> Reviewed-by: Doug Anderson <dianders@chromium.org> Cc: Sonny Rao <sonnyrao@chromium.org> Cc: Heiko Stübner <heiko@sntech.de> Cc: Alessandro Zummo <a.zummo@towertech.it> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- drivers/rtc/rtc-rk808.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-rk808.c b/drivers/rtc/rtc-rk808.c index df42257668ac..91ca0bc1b484 100644 --- a/drivers/rtc/rtc-rk808.c +++ b/drivers/rtc/rtc-rk808.c @@ -67,15 +67,21 @@ static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm) /* Force an update of the shadowed registers right now */ ret = regmap_update_bits(rk808->regmap, RK808_RTC_CTRL_REG, BIT_RTC_CTRL_REG_RTC_GET_TIME, - 0); + BIT_RTC_CTRL_REG_RTC_GET_TIME); if (ret) { dev_err(dev, "Failed to update bits rtc_ctrl: %d\n", ret); return ret; } + /* + * After we set the GET_TIME bit, the rtc time can't be read + * immediately. So we should wait up to 31.25 us, about one cycle of + * 32khz. If we clear the GET_TIME bit here, the time of i2c transfer + * certainly more than 31.25us: 16 * 2.5us at 400kHz bus frequency. + */ ret = regmap_update_bits(rk808->regmap, RK808_RTC_CTRL_REG, BIT_RTC_CTRL_REG_RTC_GET_TIME, - BIT_RTC_CTRL_REG_RTC_GET_TIME); + 0); if (ret) { dev_err(dev, "Failed to update bits rtc_ctrl: %d\n", ret); return ret; From 446810f2dd4101ca4af2c93576e4b7189a398d69 Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard <arno@natisbad.org> Date: Fri, 13 Feb 2015 14:40:57 -0800 Subject: [PATCH 101/108] of: add vendor prefix for Abracon Corporation This series adds support for Abracon AB-RTCMC-32.768kHz-B5ZE-S3 I2C RTC chip. Unlike many RTC chips, it includes an internal oscillator which spares room on the PCB. It also has some interesting features, like battery low detection (which the driver in this series supports). The only small "limitation" (mainly due to what RTC subsystem expects from RTC chips) is the fact that its alarm is accurate to the second. This series provides a solution (described below) for that limitation using another mechanism of the chip. I decided to split support between three different patches for this v0: - Patch 1/3: it simply references Abracon Corporation in vendor-prefixes documentation file. As Abracon has no NASDAQ ticker symbol; I have decided to use "abcn" (I initially started my work w/ "ab" but later changed for "abcn" which looked more meaningful) - Patch 2/3: it adds initial support for the chip and provides the ability to read/write time and also read/write alarm. As the alarm the chip provides is accurate to the minute, the support provided by this patch also has this limitation (e.g. UIE mode is not supported). - Patch 3/3: the chip supports a watchdog timer which can be used to extend the alarm mechanism in patch 2/3 in order to provide support for alarms under one minute (e.g. support UIE mode). In practice, the logic I implemented is to use the watchdog timer for alarms which are at most 4 minutes in the future and use the common alarm mechanism for alarms which are set to larger values. With that additional patch the device fully passes the rtctest.c program. I decided to split the driver between two patches (2 and 3 of 3) in order to ease review: patch 2 should be pretty straightforward to read for someone familiar w/ RTC subsystem. Patch 3 only extends what is in patch 2 regarding alarms. This patch (of 3): Documentation/devicetree/bindings/vendor-prefixes.txt: add vendor prefix for Abracon Corporation Signed-off-by: Arnaud Ebalard <arno@natisbad.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Alessandro Zummo <a.zummo@towertech.it> Cc: Peter Huewe <peter.huewe@infineon.com> Cc: Linus Walleij <linus.walleij@linaro.org> Cc: Thierry Reding <treding@nvidia.com> Cc: Mark Brown <broonie@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Rob Herring <robherring2@gmail.com> Cc: Pawel Moll <pawel.moll@arm.com> Cc: Stephen Warren <swarren@wwwdotorg.org> Cc: Ian Campbell <ijc+devicetree@hellion.org.uk> Cc: Grant Likely <grant.likely@linaro.org> Cc: Rob Landley <rob@landley.net> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Guenter Roeck <linux@roeck-us.net> Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com> Cc: Kumar Gala <galak@codeaurora.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- Documentation/devicetree/bindings/vendor-prefixes.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index 7075698abd8c..797e6b024241 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -4,6 +4,7 @@ This isn't an exhaustive list, but you should add new prefixes to it before using them to avoid name-space collisions. abilis Abilis Systems +abcn Abracon Corporation active-semi Active-Semi International Inc ad Avionic Design GmbH adapteva Adapteva, Inc. From 0b2f6228b2909a82006f87d28df45a0725a95094 Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard <arno@natisbad.org> Date: Fri, 13 Feb 2015 14:41:00 -0800 Subject: [PATCH 102/108] rtc: add support for Abracon AB-RTCMC-32.768kHz-B5ZE-S3 I2C RTC chip This patch adds support for Abracon AB-RTCMC-32.768kHz-B5ZE-S3 RTC/Calendar module w/ I2C interface. This support includes RTC time reading and setting, Alarm (1 minute accuracy) reading and setting, and battery low detection. The device also supports frequency adjustment and two timers but those features are currently not implemented in this driver. Due to alarm accuracy limitation (and current lack of timer support in the driver), UIE mode is not supported. Signed-off-by: Arnaud Ebalard <arno@natisbad.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Alessandro Zummo <a.zummo@towertech.it> Cc: Peter Huewe <peter.huewe@infineon.com> Cc: Linus Walleij <linus.walleij@linaro.org> Cc: Thierry Reding <treding@nvidia.com> Cc: Mark Brown <broonie@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Rob Herring <robherring2@gmail.com> Cc: Pawel Moll <pawel.moll@arm.com> Cc: Stephen Warren <swarren@wwwdotorg.org> Cc: Ian Campbell <ijc+devicetree@hellion.org.uk> Cc: Grant Likely <grant.likely@linaro.org> Cc: Rob Landley <rob@landley.net> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Guenter Roeck <linux@roeck-us.net> Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com> Cc: Kumar Gala <galak@codeaurora.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- .../bindings/i2c/trivial-devices.txt | 1 + drivers/rtc/Kconfig | 11 + drivers/rtc/Makefile | 1 + drivers/rtc/rtc-ab-b5ze-s3.c | 802 ++++++++++++++++++ 4 files changed, 815 insertions(+) create mode 100644 drivers/rtc/rtc-ab-b5ze-s3.c diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt index 9f41d05be3be..f9463b492f44 100644 --- a/Documentation/devicetree/bindings/i2c/trivial-devices.txt +++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt @@ -9,6 +9,7 @@ document for it just like any other devices. Compatible Vendor / Chip ========== ============= +abracon,abb5zes3 AB-RTCMC-32.768kHz-B5ZE-S3: Real Time Clock/Calendar Module with I2C Interface ad,ad7414 SMBus/I2C Digital Temperature Sensor in 6-Pin SOT with SMBus Alert and Over Temperature Pin ad,adm9240 ADM9240: Complete System Hardware Monitor for uProcessor-Based Systems adi,adt7461 +/-1C TDM Extended Temp Range I.C diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index f15cddfeb897..1b19f327f35f 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -153,6 +153,17 @@ config RTC_DRV_88PM80X This driver can also be built as a module. If so, the module will be called rtc-88pm80x. +config RTC_DRV_ABB5ZES3 + depends on I2C + select REGMAP_I2C + tristate "Abracon AB-RTCMC-32.768kHz-B5ZE-S3" + help + If you say yes here you get support for the Abracon + AB-RTCMC-32.768kHz-B5ZE-S3 I2C RTC chip. + + This driver can also be built as a module. If so, the module + will be called rtc-ab-b5ze-s3. + config RTC_DRV_AS3722 tristate "ams AS3722 RTC driver" depends on MFD_AS3722 diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index c8ef3e1e6ccd..855c4e364058 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -24,6 +24,7 @@ obj-$(CONFIG_RTC_DRV_88PM860X) += rtc-88pm860x.o obj-$(CONFIG_RTC_DRV_88PM80X) += rtc-88pm80x.o obj-$(CONFIG_RTC_DRV_AB3100) += rtc-ab3100.o obj-$(CONFIG_RTC_DRV_AB8500) += rtc-ab8500.o +obj-$(CONFIG_RTC_DRV_ABB5ZES3) += rtc-ab-b5ze-s3.o obj-$(CONFIG_RTC_DRV_AS3722) += rtc-as3722.o obj-$(CONFIG_RTC_DRV_AT32AP700X)+= rtc-at32ap700x.o obj-$(CONFIG_RTC_DRV_AT91RM9200)+= rtc-at91rm9200.o diff --git a/drivers/rtc/rtc-ab-b5ze-s3.c b/drivers/rtc/rtc-ab-b5ze-s3.c new file mode 100644 index 000000000000..bbbf06f55e17 --- /dev/null +++ b/drivers/rtc/rtc-ab-b5ze-s3.c @@ -0,0 +1,802 @@ +/* + * rtc-ab-b5ze-s3 - Driver for Abracon AB-RTCMC-32.768Khz-B5ZE-S3 + * I2C RTC / Alarm chip + * + * Copyright (C) 2014, Arnaud EBALARD <arno@natisbad.org> + * + * Detailed datasheet of the chip is available here: + * + * http://www.abracon.com/realtimeclock/AB-RTCMC-32.768kHz-B5ZE-S3-Application-Manual.pdf + * + * This work is based on ISL12057 driver (drivers/rtc/rtc-isl12057.c). + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/rtc.h> +#include <linux/i2c.h> +#include <linux/bcd.h> +#include <linux/of.h> +#include <linux/regmap.h> +#include <linux/interrupt.h> + +#define DRV_NAME "rtc-ab-b5ze-s3" + +/* Control section */ +#define ABB5ZES3_REG_CTRL1 0x00 /* Control 1 register */ +#define ABB5ZES3_REG_CTRL1_CIE BIT(0) /* Pulse interrupt enable */ +#define ABB5ZES3_REG_CTRL1_AIE BIT(1) /* Alarm interrupt enable */ +#define ABB5ZES3_REG_CTRL1_SIE BIT(2) /* Second interrupt enable */ +#define ABB5ZES3_REG_CTRL1_PM BIT(3) /* 24h/12h mode */ +#define ABB5ZES3_REG_CTRL1_SR BIT(4) /* Software reset */ +#define ABB5ZES3_REG_CTRL1_STOP BIT(5) /* RTC circuit enable */ +#define ABB5ZES3_REG_CTRL1_CAP BIT(7) + +#define ABB5ZES3_REG_CTRL2 0x01 /* Control 2 register */ +#define ABB5ZES3_REG_CTRL2_CTBIE BIT(0) /* Countdown timer B int. enable */ +#define ABB5ZES3_REG_CTRL2_CTAIE BIT(1) /* Countdown timer A int. enable */ +#define ABB5ZES3_REG_CTRL2_WTAIE BIT(2) /* Watchdog timer A int. enable */ +#define ABB5ZES3_REG_CTRL2_AF BIT(3) /* Alarm interrupt status */ +#define ABB5ZES3_REG_CTRL2_SF BIT(4) /* Second interrupt status */ +#define ABB5ZES3_REG_CTRL2_CTBF BIT(5) /* Countdown timer B int. status */ +#define ABB5ZES3_REG_CTRL2_CTAF BIT(6) /* Countdown timer A int. status */ +#define ABB5ZES3_REG_CTRL2_WTAF BIT(7) /* Watchdog timer A int. status */ + +#define ABB5ZES3_REG_CTRL3 0x02 /* Control 3 register */ +#define ABB5ZES3_REG_CTRL3_PM2 BIT(7) /* Power Management bit 2 */ +#define ABB5ZES3_REG_CTRL3_PM1 BIT(6) /* Power Management bit 1 */ +#define ABB5ZES3_REG_CTRL3_PM0 BIT(5) /* Power Management bit 0 */ +#define ABB5ZES3_REG_CTRL3_BSF BIT(3) /* Battery switchover int. status */ +#define ABB5ZES3_REG_CTRL3_BLF BIT(2) /* Battery low int. status */ +#define ABB5ZES3_REG_CTRL3_BSIE BIT(1) /* Battery switchover int. enable */ +#define ABB5ZES3_REG_CTRL3_BLIE BIT(0) /* Battery low int. enable */ + +#define ABB5ZES3_CTRL_SEC_LEN 3 + +/* RTC section */ +#define ABB5ZES3_REG_RTC_SC 0x03 /* RTC Seconds register */ +#define ABB5ZES3_REG_RTC_SC_OSC BIT(7) /* Clock integrity status */ +#define ABB5ZES3_REG_RTC_MN 0x04 /* RTC Minutes register */ +#define ABB5ZES3_REG_RTC_HR 0x05 /* RTC Hours register */ +#define ABB5ZES3_REG_RTC_HR_PM BIT(5) /* RTC Hours PM bit */ +#define ABB5ZES3_REG_RTC_DT 0x06 /* RTC Date register */ +#define ABB5ZES3_REG_RTC_DW 0x07 /* RTC Day of the week register */ +#define ABB5ZES3_REG_RTC_MO 0x08 /* RTC Month register */ +#define ABB5ZES3_REG_RTC_YR 0x09 /* RTC Year register */ + +#define ABB5ZES3_RTC_SEC_LEN 7 + +/* Alarm section (enable bits are all active low) */ +#define ABB5ZES3_REG_ALRM_MN 0x0A /* Alarm - minute register */ +#define ABB5ZES3_REG_ALRM_MN_AE BIT(7) /* Minute enable */ +#define ABB5ZES3_REG_ALRM_HR 0x0B /* Alarm - hours register */ +#define ABB5ZES3_REG_ALRM_HR_AE BIT(7) /* Hour enable */ +#define ABB5ZES3_REG_ALRM_DT 0x0C /* Alarm - date register */ +#define ABB5ZES3_REG_ALRM_DT_AE BIT(7) /* Date (day of the month) enable */ +#define ABB5ZES3_REG_ALRM_DW 0x0D /* Alarm - day of the week reg. */ +#define ABB5ZES3_REG_ALRM_DW_AE BIT(7) /* Day of the week enable */ + +#define ABB5ZES3_ALRM_SEC_LEN 4 + +/* Frequency offset section */ +#define ABB5ZES3_REG_FREQ_OF 0x0E /* Frequency offset register */ +#define ABB5ZES3_REG_FREQ_OF_MODE 0x0E /* Offset mode: 2 hours / minute */ + +/* CLOCKOUT section */ +#define ABB5ZES3_REG_TIM_CLK 0x0F /* Timer & Clockout register */ +#define ABB5ZES3_REG_TIM_CLK_TAM BIT(7) /* Permanent/pulsed timer A/int. 2 */ +#define ABB5ZES3_REG_TIM_CLK_TBM BIT(6) /* Permanent/pulsed timer B */ +#define ABB5ZES3_REG_TIM_CLK_COF2 BIT(5) /* Clkout Freq bit 2 */ +#define ABB5ZES3_REG_TIM_CLK_COF1 BIT(4) /* Clkout Freq bit 1 */ +#define ABB5ZES3_REG_TIM_CLK_COF0 BIT(3) /* Clkout Freq bit 0 */ +#define ABB5ZES3_REG_TIM_CLK_TAC1 BIT(2) /* Timer A: - 01 : countdown */ +#define ABB5ZES3_REG_TIM_CLK_TAC0 BIT(1) /* - 10 : timer */ +#define ABB5ZES3_REG_TIM_CLK_TBC BIT(0) /* Timer B enable */ + +/* Timer A Section */ +#define ABB5ZES3_REG_TIMA_CLK 0x10 /* Timer A clock register */ +#define ABB5ZES3_REG_TIMA_CLK_TAQ2 BIT(2) /* Freq bit 2 */ +#define ABB5ZES3_REG_TIMA_CLK_TAQ1 BIT(1) /* Freq bit 1 */ +#define ABB5ZES3_REG_TIMA_CLK_TAQ0 BIT(0) /* Freq bit 0 */ +#define ABB5ZES3_REG_TIMA 0x11 /* Timer A register */ + +#define ABB5ZES3_TIMA_SEC_LEN 2 + +/* Timer B Section */ +#define ABB5ZES3_REG_TIMB_CLK 0x12 /* Timer B clock register */ +#define ABB5ZES3_REG_TIMB_CLK_TBW2 BIT(6) +#define ABB5ZES3_REG_TIMB_CLK_TBW1 BIT(5) +#define ABB5ZES3_REG_TIMB_CLK_TBW0 BIT(4) +#define ABB5ZES3_REG_TIMB_CLK_TAQ2 BIT(2) +#define ABB5ZES3_REG_TIMB_CLK_TAQ1 BIT(1) +#define ABB5ZES3_REG_TIMB_CLK_TAQ0 BIT(0) +#define ABB5ZES3_REG_TIMB 0x13 /* Timer B register */ +#define ABB5ZES3_TIMB_SEC_LEN 2 + +#define ABB5ZES3_MEM_MAP_LEN 0x14 + +struct abb5zes3_rtc_data { + struct rtc_device *rtc; + struct regmap *regmap; + struct mutex lock; + + int irq; + + bool battery_low; +}; + +/* + * Try and match register bits w/ fixed null values to see whether we + * are dealing with an ABB5ZES3. Note: this function is called early + * during init and hence does need mutex protection. + */ +static int abb5zes3_i2c_validate_chip(struct regmap *regmap) +{ + u8 regs[ABB5ZES3_MEM_MAP_LEN]; + static const u8 mask[ABB5ZES3_MEM_MAP_LEN] = { 0x00, 0x00, 0x10, 0x00, + 0x80, 0xc0, 0xc0, 0xf8, + 0xe0, 0x00, 0x00, 0x40, + 0x40, 0x78, 0x00, 0x00, + 0xf8, 0x00, 0x88, 0x00 }; + int ret, i; + + ret = regmap_bulk_read(regmap, 0, regs, ABB5ZES3_MEM_MAP_LEN); + if (ret) + return ret; + + for (i = 0; i < ABB5ZES3_MEM_MAP_LEN; ++i) { + if (regs[i] & mask[i]) /* check if bits are cleared */ + return -ENODEV; + } + + return 0; +} + +/* Clear alarm status bit. */ +static int _abb5zes3_rtc_clear_alarm(struct device *dev) +{ + struct abb5zes3_rtc_data *data = dev_get_drvdata(dev); + int ret; + + ret = regmap_update_bits(data->regmap, ABB5ZES3_REG_CTRL2, + ABB5ZES3_REG_CTRL2_AF, 0); + if (ret) + dev_err(dev, "%s: clearing alarm failed (%d)\n", __func__, ret); + + return ret; +} + +/* Enable or disable alarm (i.e. alarm interrupt generation) */ +static int _abb5zes3_rtc_update_alarm(struct device *dev, bool enable) +{ + struct abb5zes3_rtc_data *data = dev_get_drvdata(dev); + int ret; + + ret = regmap_update_bits(data->regmap, ABB5ZES3_REG_CTRL1, + ABB5ZES3_REG_CTRL1_AIE, + enable ? ABB5ZES3_REG_CTRL1_AIE : 0); + if (ret) + dev_err(dev, "%s: writing alarm INT failed (%d)\n", + __func__, ret); + + return ret; +} + +/* + * Note: we only read, so regmap inner lock protection is sufficient, i.e. + * we do not need driver's main lock protection. + */ +static int _abb5zes3_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + struct abb5zes3_rtc_data *data = dev_get_drvdata(dev); + u8 regs[ABB5ZES3_REG_RTC_SC + ABB5ZES3_RTC_SEC_LEN]; + int ret; + + /* + * As we need to read CTRL1 register anyway to access 24/12h + * mode bit, we do a single bulk read of both control and RTC + * sections (they are consecutive). This also ease indexing + * of register values after bulk read. + */ + ret = regmap_bulk_read(data->regmap, ABB5ZES3_REG_CTRL1, regs, + sizeof(regs)); + if (ret) { + dev_err(dev, "%s: reading RTC time failed (%d)\n", + __func__, ret); + goto err; + } + + /* If clock integrity is not guaranteed, do not return a time value */ + if (regs[ABB5ZES3_REG_RTC_SC] & ABB5ZES3_REG_RTC_SC_OSC) { + ret = -ENODATA; + goto err; + } + + tm->tm_sec = bcd2bin(regs[ABB5ZES3_REG_RTC_SC] & 0x7F); + tm->tm_min = bcd2bin(regs[ABB5ZES3_REG_RTC_MN]); + + if (regs[ABB5ZES3_REG_CTRL1] & ABB5ZES3_REG_CTRL1_PM) { /* 12hr mode */ + tm->tm_hour = bcd2bin(regs[ABB5ZES3_REG_RTC_HR] & 0x1f); + if (regs[ABB5ZES3_REG_RTC_HR] & ABB5ZES3_REG_RTC_HR_PM) /* PM */ + tm->tm_hour += 12; + } else { /* 24hr mode */ + tm->tm_hour = bcd2bin(regs[ABB5ZES3_REG_RTC_HR]); + } + + tm->tm_mday = bcd2bin(regs[ABB5ZES3_REG_RTC_DT]); + tm->tm_wday = bcd2bin(regs[ABB5ZES3_REG_RTC_DW]); + tm->tm_mon = bcd2bin(regs[ABB5ZES3_REG_RTC_MO]) - 1; /* starts at 1 */ + tm->tm_year = bcd2bin(regs[ABB5ZES3_REG_RTC_YR]) + 100; + + ret = rtc_valid_tm(tm); + +err: + return ret; +} + +static int abb5zes3_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + struct abb5zes3_rtc_data *data = dev_get_drvdata(dev); + u8 regs[ABB5ZES3_REG_RTC_SC + ABB5ZES3_RTC_SEC_LEN]; + int ret; + + /* + * Year register is 8-bit wide and bcd-coded, i.e records values + * between 0 and 99. tm_year is an offset from 1900 and we are + * interested in the 2000-2099 range, so any value less than 100 + * is invalid. + */ + if (tm->tm_year < 100) + return -EINVAL; + + regs[ABB5ZES3_REG_RTC_SC] = bin2bcd(tm->tm_sec); /* MSB=0 clears OSC */ + regs[ABB5ZES3_REG_RTC_MN] = bin2bcd(tm->tm_min); + regs[ABB5ZES3_REG_RTC_HR] = bin2bcd(tm->tm_hour); /* 24-hour format */ + regs[ABB5ZES3_REG_RTC_DT] = bin2bcd(tm->tm_mday); + regs[ABB5ZES3_REG_RTC_DW] = bin2bcd(tm->tm_wday); + regs[ABB5ZES3_REG_RTC_MO] = bin2bcd(tm->tm_mon + 1); + regs[ABB5ZES3_REG_RTC_YR] = bin2bcd(tm->tm_year - 100); + + mutex_lock(&data->lock); + ret = regmap_bulk_write(data->regmap, ABB5ZES3_REG_RTC_SC, + regs + ABB5ZES3_REG_RTC_SC, + ABB5ZES3_RTC_SEC_LEN); + mutex_unlock(&data->lock); + + + return ret; +} + +static int abb5zes3_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + struct abb5zes3_rtc_data *data = dev_get_drvdata(dev); + struct rtc_time rtc_tm, *alarm_tm = &alarm->time; + unsigned long rtc_secs, alarm_secs; + u8 regs[ABB5ZES3_ALRM_SEC_LEN]; + unsigned int reg; + int ret; + + mutex_lock(&data->lock); + ret = regmap_bulk_read(data->regmap, ABB5ZES3_REG_ALRM_MN, regs, + ABB5ZES3_ALRM_SEC_LEN); + if (ret) { + dev_err(dev, "%s: reading alarm section failed (%d)\n", + __func__, ret); + goto err; + } + + alarm_tm->tm_sec = 0; + alarm_tm->tm_min = bcd2bin(regs[0] & 0x7f); + alarm_tm->tm_hour = bcd2bin(regs[1] & 0x3f); + alarm_tm->tm_mday = bcd2bin(regs[2] & 0x3f); + alarm_tm->tm_wday = -1; + + /* + * The alarm section does not store year/month. We use the ones in rtc + * section as a basis and increment month and then year if needed to get + * alarm after current time. + */ + ret = _abb5zes3_rtc_read_time(dev, &rtc_tm); + if (ret) + goto err; + + alarm_tm->tm_year = rtc_tm.tm_year; + alarm_tm->tm_mon = rtc_tm.tm_mon; + + ret = rtc_tm_to_time(&rtc_tm, &rtc_secs); + if (ret) + goto err; + + ret = rtc_tm_to_time(alarm_tm, &alarm_secs); + if (ret) + goto err; + + if (alarm_secs < rtc_secs) { + if (alarm_tm->tm_mon == 11) { + alarm_tm->tm_mon = 0; + alarm_tm->tm_year += 1; + } else { + alarm_tm->tm_mon += 1; + } + } + + ret = regmap_read(data->regmap, ABB5ZES3_REG_CTRL1, ®); + if (ret) { + dev_err(dev, "%s: reading ctrl reg failed (%d)\n", + __func__, ret); + goto err; + } + + alarm->enabled = !!(reg & ABB5ZES3_REG_CTRL1_AIE); + +err: + mutex_unlock(&data->lock); + + return ret; +} + +/* ALARM is only accurate to the minute (not the second) */ +static int abb5zes3_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + struct abb5zes3_rtc_data *data = dev_get_drvdata(dev); + struct rtc_time *alarm_tm = &alarm->time; + unsigned long rtc_secs, alarm_secs; + u8 regs[ABB5ZES3_ALRM_SEC_LEN]; + struct rtc_time rtc_tm; + int ret, enable = 1; + + mutex_lock(&data->lock); + ret = _abb5zes3_rtc_read_time(dev, &rtc_tm); + if (ret) + goto err; + + ret = rtc_tm_to_time(&rtc_tm, &rtc_secs); + if (ret) + goto err; + + ret = rtc_tm_to_time(alarm_tm, &alarm_secs); + if (ret) + goto err; + + /* If alarm time is before current time, disable the alarm */ + if (!alarm->enabled || alarm_secs <= rtc_secs) { + enable = 0; + } else { + /* + * Chip only support alarms up to one month in the future. Let's + * return an error if we get something after that limit. + * Comparison is done by incrementing rtc_tm month field by one + * and checking alarm value is still below. + */ + if (rtc_tm.tm_mon == 11) { /* handle year wrapping */ + rtc_tm.tm_mon = 0; + rtc_tm.tm_year += 1; + } else { + rtc_tm.tm_mon += 1; + } + + ret = rtc_tm_to_time(&rtc_tm, &rtc_secs); + if (ret) + goto err; + + if (alarm_secs > rtc_secs) { + dev_err(dev, "%s: alarm maximum is one month in the " + "future (%d)\n", __func__, ret); + ret = -EINVAL; + goto err; + } + } + + /* Disable the alarm before modifying it */ + ret = _abb5zes3_rtc_update_alarm(dev, 0); + if (ret < 0) { + dev_err(dev, "%s: unable to disable the alarm (%d)\n", + __func__, ret); + goto err; + } + + /* Program alarm registers */ + regs[0] = bin2bcd(alarm_tm->tm_min) & 0x7f; /* minute */ + regs[1] = bin2bcd(alarm_tm->tm_hour) & 0x3f; /* hour */ + regs[2] = bin2bcd(alarm_tm->tm_mday) & 0x3f; /* day of the month */ + regs[3] = ABB5ZES3_REG_ALRM_DW_AE; /* do not match day of the week */ + + ret = regmap_bulk_write(data->regmap, ABB5ZES3_REG_ALRM_MN, regs, + ABB5ZES3_ALRM_SEC_LEN); + if (ret < 0) { + dev_err(dev, "%s: writing ALARM section failed (%d)\n", + __func__, ret); + goto err; + } + + /* Enable or disable alarm */ + ret = _abb5zes3_rtc_update_alarm(dev, enable); + +err: + mutex_unlock(&data->lock); + + return ret; +} + + +/* Enable or disable battery low irq generation */ +static inline int _abb5zes3_rtc_battery_low_irq_enable(struct regmap *regmap, + bool enable) +{ + return regmap_update_bits(regmap, ABB5ZES3_REG_CTRL3, + ABB5ZES3_REG_CTRL3_BLIE, + enable ? ABB5ZES3_REG_CTRL3_BLIE : 0); +} + +/* + * Check current RTC status and enable/disable what needs to be. Return 0 if + * everything went ok and a negative value upon error. Note: this function + * is called early during init and hence does need mutex protection. + */ +static int abb5zes3_rtc_check_setup(struct device *dev) +{ + struct abb5zes3_rtc_data *data = dev_get_drvdata(dev); + struct regmap *regmap = data->regmap; + unsigned int reg; + int ret; + u8 mask; + + /* + * By default, the devices generates a 32.768KHz signal on IRQ#1 pin. It + * is disabled here to prevent polluting the interrupt line and + * uselessly triggering the IRQ handler we install for alarm and battery + * low events. Note: this is done before clearing int. status below + * in this function. + * We also disable all timers and set timer interrupt to permanent (not + * pulsed). + */ + mask = (ABB5ZES3_REG_TIM_CLK_TBC | ABB5ZES3_REG_TIM_CLK_TAC0 | + ABB5ZES3_REG_TIM_CLK_TAC1 | ABB5ZES3_REG_TIM_CLK_COF0 | + ABB5ZES3_REG_TIM_CLK_COF1 | ABB5ZES3_REG_TIM_CLK_COF2 | + ABB5ZES3_REG_TIM_CLK_TBM | ABB5ZES3_REG_TIM_CLK_TAM); + ret = regmap_update_bits(regmap, ABB5ZES3_REG_TIM_CLK, mask, + ABB5ZES3_REG_TIM_CLK_COF0 | ABB5ZES3_REG_TIM_CLK_COF1 | + ABB5ZES3_REG_TIM_CLK_COF2); + if (ret < 0) { + dev_err(dev, "%s: unable to initialize clkout register (%d)\n", + __func__, ret); + return ret; + } + + /* + * Each component of the alarm (MN, HR, DT, DW) can be enabled/disabled + * individually by clearing/setting MSB of each associated register. So, + * we set all alarm enable bits to disable current alarm setting. + */ + mask = (ABB5ZES3_REG_ALRM_MN_AE | ABB5ZES3_REG_ALRM_HR_AE | + ABB5ZES3_REG_ALRM_DT_AE | ABB5ZES3_REG_ALRM_DW_AE); + ret = regmap_update_bits(regmap, ABB5ZES3_REG_CTRL2, mask, mask); + if (ret < 0) { + dev_err(dev, "%s: unable to disable alarm setting (%d)\n", + __func__, ret); + return ret; + } + + /* Set Control 1 register (RTC enabled, 24hr mode, all int. disabled) */ + mask = (ABB5ZES3_REG_CTRL1_CIE | ABB5ZES3_REG_CTRL1_AIE | + ABB5ZES3_REG_CTRL1_SIE | ABB5ZES3_REG_CTRL1_PM | + ABB5ZES3_REG_CTRL1_CAP | ABB5ZES3_REG_CTRL1_STOP); + ret = regmap_update_bits(regmap, ABB5ZES3_REG_CTRL1, mask, 0); + if (ret < 0) { + dev_err(dev, "%s: unable to initialize CTRL1 register (%d)\n", + __func__, ret); + return ret; + } + + /* + * Set Control 2 register (timer int. disabled, alarm status cleared). + * WTAF is read-only and cleared automatically by reading the register. + */ + mask = (ABB5ZES3_REG_CTRL2_CTBIE | ABB5ZES3_REG_CTRL2_CTAIE | + ABB5ZES3_REG_CTRL2_WTAIE | ABB5ZES3_REG_CTRL2_AF | + ABB5ZES3_REG_CTRL2_SF | ABB5ZES3_REG_CTRL2_CTBF | + ABB5ZES3_REG_CTRL2_CTAF); + ret = regmap_update_bits(regmap, ABB5ZES3_REG_CTRL2, mask, 0); + if (ret < 0) { + dev_err(dev, "%s: unable to initialize CTRL2 register (%d)\n", + __func__, ret); + return ret; + } + + /* + * Enable battery low detection function and battery switchover function + * (standard mode). Disable associated interrupts. Clear battery + * switchover flag but not battery low flag. The latter is checked + * later below. + */ + mask = (ABB5ZES3_REG_CTRL3_PM0 | ABB5ZES3_REG_CTRL3_PM1 | + ABB5ZES3_REG_CTRL3_PM2 | ABB5ZES3_REG_CTRL3_BLIE | + ABB5ZES3_REG_CTRL3_BSIE| ABB5ZES3_REG_CTRL3_BSF); + ret = regmap_update_bits(regmap, ABB5ZES3_REG_CTRL3, mask, 0); + if (ret < 0) { + dev_err(dev, "%s: unable to initialize CTRL3 register (%d)\n", + __func__, ret); + return ret; + } + + /* Check oscillator integrity flag */ + ret = regmap_read(regmap, ABB5ZES3_REG_RTC_SC, ®); + if (ret < 0) { + dev_err(dev, "%s: unable to read osc. integrity flag (%d)\n", + __func__, ret); + return ret; + } + + if (reg & ABB5ZES3_REG_RTC_SC_OSC) { + dev_err(dev, "clock integrity not guaranteed. Osc. has stopped " + "or has been interrupted.\n"); + dev_err(dev, "change battery (if not already done) and " + "then set time to reset osc. failure flag.\n"); + } + + /* + * Check battery low flag at startup: this allows reporting battery + * is low at startup when IRQ line is not connected. Note: we record + * current status to avoid reenabling this interrupt later in probe + * function if battery is low. + */ + ret = regmap_read(regmap, ABB5ZES3_REG_CTRL3, ®); + if (ret < 0) { + dev_err(dev, "%s: unable to read battery low flag (%d)\n", + __func__, ret); + return ret; + } + + data->battery_low = reg & ABB5ZES3_REG_CTRL3_BLF; + if (data->battery_low) { + dev_err(dev, "RTC battery is low; please, consider " + "changing it!\n"); + + ret = _abb5zes3_rtc_battery_low_irq_enable(regmap, false); + if (ret) + dev_err(dev, "%s: disabling battery low interrupt " + "generation failed (%d)\n", __func__, ret); + } + + return ret; +} + +static int abb5zes3_rtc_alarm_irq_enable(struct device *dev, + unsigned int enable) +{ + struct abb5zes3_rtc_data *rtc_data = dev_get_drvdata(dev); + int ret = 0; + + if (rtc_data->irq) { + mutex_lock(&rtc_data->lock); + ret = _abb5zes3_rtc_update_alarm(dev, enable); + mutex_unlock(&rtc_data->lock); + } + + return ret; +} + +static irqreturn_t _abb5zes3_rtc_interrupt(int irq, void *data) +{ + struct i2c_client *client = data; + struct device *dev = &client->dev; + struct abb5zes3_rtc_data *rtc_data = dev_get_drvdata(dev); + struct rtc_device *rtc = rtc_data->rtc; + u8 regs[ABB5ZES3_CTRL_SEC_LEN]; + int ret, handled = IRQ_NONE; + + ret = regmap_bulk_read(rtc_data->regmap, 0, regs, + ABB5ZES3_CTRL_SEC_LEN); + if (ret) { + dev_err(dev, "%s: unable to read control section (%d)!\n", + __func__, ret); + return handled; + } + + /* + * Check battery low detection flag and disable battery low interrupt + * generation if flag is set (interrupt can only be cleared when + * battery is replaced). + */ + if (regs[ABB5ZES3_REG_CTRL3] & ABB5ZES3_REG_CTRL3_BLF) { + dev_err(dev, "RTC battery is low; please change it!\n"); + + _abb5zes3_rtc_battery_low_irq_enable(rtc_data->regmap, false); + + handled = IRQ_HANDLED; + } + + /* Check alarm flag */ + if (regs[ABB5ZES3_REG_CTRL2] & ABB5ZES3_REG_CTRL2_AF) { + dev_dbg(dev, "RTC alarm!\n"); + + rtc_update_irq(rtc, 1, RTC_IRQF | RTC_AF); + + /* Acknowledge and disable the alarm */ + _abb5zes3_rtc_clear_alarm(dev); + _abb5zes3_rtc_update_alarm(dev, 0); + + handled = IRQ_HANDLED; + } + + return handled; +} + +static const struct rtc_class_ops rtc_ops = { + .read_time = _abb5zes3_rtc_read_time, + .set_time = abb5zes3_rtc_set_time, + .read_alarm = abb5zes3_rtc_read_alarm, + .set_alarm = abb5zes3_rtc_set_alarm, + .alarm_irq_enable = abb5zes3_rtc_alarm_irq_enable, +}; + +static struct regmap_config abb5zes3_rtc_regmap_config = { + .reg_bits = 8, + .val_bits = 8, +}; + +static int abb5zes3_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct abb5zes3_rtc_data *data = NULL; + struct device *dev = &client->dev; + struct regmap *regmap; + int ret; + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C | + I2C_FUNC_SMBUS_BYTE_DATA | + I2C_FUNC_SMBUS_I2C_BLOCK)) { + ret = -ENODEV; + goto err; + } + + regmap = devm_regmap_init_i2c(client, &abb5zes3_rtc_regmap_config); + if (IS_ERR(regmap)) { + ret = PTR_ERR(regmap); + dev_err(dev, "%s: regmap allocation failed: %d\n", + __func__, ret); + goto err; + } + + ret = abb5zes3_i2c_validate_chip(regmap); + if (ret) + goto err; + + data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!data) { + ret = -ENOMEM; + goto err; + } + + mutex_init(&data->lock); + data->regmap = regmap; + dev_set_drvdata(dev, data); + + ret = abb5zes3_rtc_check_setup(dev); + if (ret) + goto err; + + if (client->irq > 0) { + ret = devm_request_threaded_irq(dev, client->irq, NULL, + _abb5zes3_rtc_interrupt, + IRQF_SHARED|IRQF_ONESHOT, + DRV_NAME, client); + if (!ret) { + device_init_wakeup(dev, true); + data->irq = client->irq; + dev_dbg(dev, "%s: irq %d used by RTC\n", __func__, + client->irq); + } else { + dev_err(dev, "%s: irq %d unavailable (%d)\n", + __func__, client->irq, ret); + goto err; + } + } + + data->rtc = devm_rtc_device_register(dev, DRV_NAME, &rtc_ops, + THIS_MODULE); + ret = PTR_ERR_OR_ZERO(data->rtc); + if (ret) { + dev_err(dev, "%s: unable to register RTC device (%d)\n", + __func__, ret); + goto err; + } + + /* + * AB-B5Z5E only supports a coarse granularity alarm (one minute + * resolution up to one month) so we cannot support UIE mode + * using the device's alarm. Note it should be feasible to support + * such a feature using one of the two timers the device provides. + */ + data->rtc->uie_unsupported = 1; + + /* Enable battery low detection interrupt if battery not already low */ + if (!data->battery_low && data->irq) { + ret = _abb5zes3_rtc_battery_low_irq_enable(regmap, true); + if (ret) { + dev_err(dev, "%s: enabling battery low interrupt " + "generation failed (%d)\n", __func__, ret); + goto err; + } + } + +err: + if (ret && data && data->irq) + device_init_wakeup(dev, false); + return ret; +} + +static int abb5zes3_remove(struct i2c_client *client) +{ + struct abb5zes3_rtc_data *rtc_data = dev_get_drvdata(&client->dev); + + if (rtc_data->irq > 0) + device_init_wakeup(&client->dev, false); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int abb5zes3_rtc_suspend(struct device *dev) +{ + struct abb5zes3_rtc_data *rtc_data = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + return enable_irq_wake(rtc_data->irq); + + return 0; +} + +static int abb5zes3_rtc_resume(struct device *dev) +{ + struct abb5zes3_rtc_data *rtc_data = dev_get_drvdata(dev); + + if (device_may_wakeup(dev)) + return disable_irq_wake(rtc_data->irq); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(abb5zes3_rtc_pm_ops, abb5zes3_rtc_suspend, + abb5zes3_rtc_resume); + +#ifdef CONFIG_OF +static const struct of_device_id abb5zes3_dt_match[] = { + { .compatible = "abracon,abb5zes3" }, + { }, +}; +#endif + +static const struct i2c_device_id abb5zes3_id[] = { + { "abb5zes3", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, abb5zes3_id); + +static struct i2c_driver abb5zes3_driver = { + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + .pm = &abb5zes3_rtc_pm_ops, + .of_match_table = of_match_ptr(abb5zes3_dt_match), + }, + .probe = abb5zes3_probe, + .remove = abb5zes3_remove, + .id_table = abb5zes3_id, +}; +module_i2c_driver(abb5zes3_driver); + +MODULE_AUTHOR("Arnaud EBALARD <arno@natisbad.org>"); +MODULE_DESCRIPTION("Abracon AB-RTCMC-32.768kHz-B5ZE-S3 RTC/Alarm driver"); +MODULE_LICENSE("GPL"); From c8a1d8a523e1018c3b7d23c7e1c99bf20006bcbf Mon Sep 17 00:00:00 2001 From: Arnaud Ebalard <arno@natisbad.org> Date: Fri, 13 Feb 2015 14:41:04 -0800 Subject: [PATCH 103/108] rtc: rtc-ab-b5ze-s3: add sub-minute alarm support Abracon AB-RTCMC-32.768kHz-B5ZE-S3 alarm is only accurate to the minute. For that reason, UIE mode is currently not supported by the driver. But the device provides a watchdog timer which can be coupled with the alarm mechanism to extend support and provide sub-minute alarm capability. This patch implements that extension. More precisely, it makes use of the watchdog timer for alarms which are less that four minutes in the future (with second accuracy) and use standard alarm mechanism for other alarms (with minute accuracy). Signed-off-by: Arnaud Ebalard <arno@natisbad.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Alessandro Zummo <a.zummo@towertech.it> Cc: Peter Huewe <peter.huewe@infineon.com> Cc: Linus Walleij <linus.walleij@linaro.org> Cc: Thierry Reding <treding@nvidia.com> Cc: Mark Brown <broonie@kernel.org> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Rob Herring <robherring2@gmail.com> Cc: Pawel Moll <pawel.moll@arm.com> Cc: Stephen Warren <swarren@wwwdotorg.org> Cc: Ian Campbell <ijc+devicetree@hellion.org.uk> Cc: Grant Likely <grant.likely@linaro.org> Cc: Rob Landley <rob@landley.net> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Guenter Roeck <linux@roeck-us.net> Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com> Cc: Kumar Gala <galak@codeaurora.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- drivers/rtc/rtc-ab-b5ze-s3.c | 293 +++++++++++++++++++++++++++++++---- 1 file changed, 263 insertions(+), 30 deletions(-) diff --git a/drivers/rtc/rtc-ab-b5ze-s3.c b/drivers/rtc/rtc-ab-b5ze-s3.c index bbbf06f55e17..cfc2ef98d393 100644 --- a/drivers/rtc/rtc-ab-b5ze-s3.c +++ b/drivers/rtc/rtc-ab-b5ze-s3.c @@ -133,6 +133,7 @@ struct abb5zes3_rtc_data { int irq; bool battery_low; + bool timer_alarm; /* current alarm is via timer A */ }; /* @@ -192,6 +193,22 @@ static int _abb5zes3_rtc_update_alarm(struct device *dev, bool enable) return ret; } +/* Enable or disable timer (watchdog timer A interrupt generation) */ +static int _abb5zes3_rtc_update_timer(struct device *dev, bool enable) +{ + struct abb5zes3_rtc_data *data = dev_get_drvdata(dev); + int ret; + + ret = regmap_update_bits(data->regmap, ABB5ZES3_REG_CTRL2, + ABB5ZES3_REG_CTRL2_WTAIE, + enable ? ABB5ZES3_REG_CTRL2_WTAIE : 0); + if (ret) + dev_err(dev, "%s: writing timer INT failed (%d)\n", + __func__, ret); + + return ret; +} + /* * Note: we only read, so regmap inner lock protection is sufficient, i.e. * we do not need driver's main lock protection. @@ -277,7 +294,92 @@ static int abb5zes3_rtc_set_time(struct device *dev, struct rtc_time *tm) return ret; } -static int abb5zes3_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) +/* + * Set provided TAQ and Timer A registers (TIMA_CLK and TIMA) based on + * given number of seconds. + */ +static inline void sec_to_timer_a(u8 secs, u8 *taq, u8 *timer_a) +{ + *taq = ABB5ZES3_REG_TIMA_CLK_TAQ1; /* 1Hz */ + *timer_a = secs; +} + +/* + * Return current number of seconds in Timer A. As we only use + * timer A with a 1Hz freq, this is what we expect to have. + */ +static inline int sec_from_timer_a(u8 *secs, u8 taq, u8 timer_a) +{ + if (taq != ABB5ZES3_REG_TIMA_CLK_TAQ1) /* 1Hz */ + return -EINVAL; + + *secs = timer_a; + + return 0; +} + +/* + * Read alarm currently configured via a watchdog timer using timer A. This + * is done by reading current RTC time and adding remaining timer time. + */ +static int _abb5zes3_rtc_read_timer(struct device *dev, + struct rtc_wkalrm *alarm) +{ + struct abb5zes3_rtc_data *data = dev_get_drvdata(dev); + struct rtc_time rtc_tm, *alarm_tm = &alarm->time; + u8 regs[ABB5ZES3_TIMA_SEC_LEN + 1]; + unsigned long rtc_secs; + unsigned int reg; + u8 timer_secs; + int ret; + + /* + * Instead of doing two separate calls, because they are consecutive, + * we grab both clockout register and Timer A section. The latter is + * used to decide if timer A is enabled (as a watchdog timer). + */ + ret = regmap_bulk_read(data->regmap, ABB5ZES3_REG_TIM_CLK, regs, + ABB5ZES3_TIMA_SEC_LEN + 1); + if (ret) { + dev_err(dev, "%s: reading Timer A section failed (%d)\n", + __func__, ret); + goto err; + } + + /* get current time ... */ + ret = _abb5zes3_rtc_read_time(dev, &rtc_tm); + if (ret) + goto err; + + /* ... convert to seconds ... */ + ret = rtc_tm_to_time(&rtc_tm, &rtc_secs); + if (ret) + goto err; + + /* ... add remaining timer A time ... */ + ret = sec_from_timer_a(&timer_secs, regs[1], regs[2]); + if (ret) + goto err; + + /* ... and convert back. */ + rtc_time_to_tm(rtc_secs + timer_secs, alarm_tm); + + ret = regmap_read(data->regmap, ABB5ZES3_REG_CTRL2, ®); + if (ret) { + dev_err(dev, "%s: reading ctrl reg failed (%d)\n", + __func__, ret); + goto err; + } + + alarm->enabled = !!(reg & ABB5ZES3_REG_CTRL2_WTAIE); + +err: + return ret; +} + +/* Read alarm currently configured via a RTC alarm registers. */ +static int _abb5zes3_rtc_read_alarm(struct device *dev, + struct rtc_wkalrm *alarm) { struct abb5zes3_rtc_data *data = dev_get_drvdata(dev); struct rtc_time rtc_tm, *alarm_tm = &alarm->time; @@ -286,7 +388,6 @@ static int abb5zes3_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) unsigned int reg; int ret; - mutex_lock(&data->lock); ret = regmap_bulk_read(data->regmap, ABB5ZES3_REG_ALRM_MN, regs, ABB5ZES3_ALRM_SEC_LEN); if (ret) { @@ -340,13 +441,39 @@ static int abb5zes3_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) alarm->enabled = !!(reg & ABB5ZES3_REG_CTRL1_AIE); err: + return ret; +} + +/* + * As the Alarm mechanism supported by the chip is only accurate to the + * minute, we use the watchdog timer mechanism provided by timer A + * (up to 256 seconds w/ a second accuracy) for low alarm values (below + * 4 minutes). Otherwise, we use the common alarm mechanism provided + * by the chip. In order for that to work, we keep track of currently + * configured timer type via 'timer_alarm' flag in our private data + * structure. + */ +static int abb5zes3_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + struct abb5zes3_rtc_data *data = dev_get_drvdata(dev); + int ret; + + mutex_lock(&data->lock); + if (data->timer_alarm) + ret = _abb5zes3_rtc_read_timer(dev, alarm); + else + ret = _abb5zes3_rtc_read_alarm(dev, alarm); mutex_unlock(&data->lock); return ret; } -/* ALARM is only accurate to the minute (not the second) */ -static int abb5zes3_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) +/* + * Set alarm using chip alarm mechanism. It is only accurate to the + * minute (not the second). The function expects alarm interrupt to + * be disabled. + */ +static int _abb5zes3_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) { struct abb5zes3_rtc_data *data = dev_get_drvdata(dev); struct rtc_time *alarm_tm = &alarm->time; @@ -355,7 +482,6 @@ static int abb5zes3_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) struct rtc_time rtc_tm; int ret, enable = 1; - mutex_lock(&data->lock); ret = _abb5zes3_rtc_read_time(dev, &rtc_tm); if (ret) goto err; @@ -397,18 +523,13 @@ static int abb5zes3_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) } } - /* Disable the alarm before modifying it */ - ret = _abb5zes3_rtc_update_alarm(dev, 0); - if (ret < 0) { - dev_err(dev, "%s: unable to disable the alarm (%d)\n", - __func__, ret); - goto err; - } - - /* Program alarm registers */ - regs[0] = bin2bcd(alarm_tm->tm_min) & 0x7f; /* minute */ - regs[1] = bin2bcd(alarm_tm->tm_hour) & 0x3f; /* hour */ - regs[2] = bin2bcd(alarm_tm->tm_mday) & 0x3f; /* day of the month */ + /* + * Program all alarm registers but DW one. For each register, setting + * MSB to 0 enables associated alarm. + */ + regs[0] = bin2bcd(alarm_tm->tm_min) & 0x7f; + regs[1] = bin2bcd(alarm_tm->tm_hour) & 0x3f; + regs[2] = bin2bcd(alarm_tm->tm_mday) & 0x3f; regs[3] = ABB5ZES3_REG_ALRM_DW_AE; /* do not match day of the week */ ret = regmap_bulk_write(data->regmap, ABB5ZES3_REG_ALRM_MN, regs, @@ -419,15 +540,115 @@ static int abb5zes3_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) goto err; } - /* Enable or disable alarm */ + /* Record currently configured alarm is not a timer */ + data->timer_alarm = 0; + + /* Enable or disable alarm interrupt generation */ ret = _abb5zes3_rtc_update_alarm(dev, enable); err: - mutex_unlock(&data->lock); - return ret; } +/* + * Set alarm using timer watchdog (via timer A) mechanism. The function expects + * timer A interrupt to be disabled. + */ +static int _abb5zes3_rtc_set_timer(struct device *dev, struct rtc_wkalrm *alarm, + u8 secs) +{ + struct abb5zes3_rtc_data *data = dev_get_drvdata(dev); + u8 regs[ABB5ZES3_TIMA_SEC_LEN]; + u8 mask = ABB5ZES3_REG_TIM_CLK_TAC0 | ABB5ZES3_REG_TIM_CLK_TAC1; + int ret = 0; + + /* Program given number of seconds to Timer A registers */ + sec_to_timer_a(secs, ®s[0], ®s[1]); + ret = regmap_bulk_write(data->regmap, ABB5ZES3_REG_TIMA_CLK, regs, + ABB5ZES3_TIMA_SEC_LEN); + if (ret < 0) { + dev_err(dev, "%s: writing timer section failed\n", __func__); + goto err; + } + + /* Configure Timer A as a watchdog timer */ + ret = regmap_update_bits(data->regmap, ABB5ZES3_REG_TIM_CLK, + mask, ABB5ZES3_REG_TIM_CLK_TAC1); + if (ret) + dev_err(dev, "%s: failed to update timer\n", __func__); + + /* Record currently configured alarm is a timer */ + data->timer_alarm = 1; + + /* Enable or disable timer interrupt generation */ + ret = _abb5zes3_rtc_update_timer(dev, alarm->enabled); + +err: + return ret; +} + +/* + * The chip has an alarm which is only accurate to the minute. In order to + * handle alarms below that limit, we use the watchdog timer function of + * timer A. More precisely, the timer method is used for alarms below 240 + * seconds. + */ +static int abb5zes3_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm) +{ + struct abb5zes3_rtc_data *data = dev_get_drvdata(dev); + struct rtc_time *alarm_tm = &alarm->time; + unsigned long rtc_secs, alarm_secs; + struct rtc_time rtc_tm; + int ret; + + mutex_lock(&data->lock); + ret = _abb5zes3_rtc_read_time(dev, &rtc_tm); + if (ret) + goto err; + + ret = rtc_tm_to_time(&rtc_tm, &rtc_secs); + if (ret) + goto err; + + ret = rtc_tm_to_time(alarm_tm, &alarm_secs); + if (ret) + goto err; + + /* Let's first disable both the alarm and the timer interrupts */ + ret = _abb5zes3_rtc_update_alarm(dev, false); + if (ret < 0) { + dev_err(dev, "%s: unable to disable alarm (%d)\n", __func__, + ret); + goto err; + } + ret = _abb5zes3_rtc_update_timer(dev, false); + if (ret < 0) { + dev_err(dev, "%s: unable to disable timer (%d)\n", __func__, + ret); + goto err; + } + + data->timer_alarm = 0; + + /* + * Let's now configure the alarm; if we are expected to ring in + * more than 240s, then we setup an alarm. Otherwise, a timer. + */ + if ((alarm_secs > rtc_secs) && ((alarm_secs - rtc_secs) <= 240)) + ret = _abb5zes3_rtc_set_timer(dev, alarm, + alarm_secs - rtc_secs); + else + ret = _abb5zes3_rtc_set_alarm(dev, alarm); + + err: + mutex_unlock(&data->lock); + + if (ret) + dev_err(dev, "%s: unable to configure alarm (%d)\n", __func__, + ret); + + return ret; + } /* Enable or disable battery low irq generation */ static inline int _abb5zes3_rtc_battery_low_irq_enable(struct regmap *regmap, @@ -446,7 +667,7 @@ static inline int _abb5zes3_rtc_battery_low_irq_enable(struct regmap *regmap, static int abb5zes3_rtc_check_setup(struct device *dev) { struct abb5zes3_rtc_data *data = dev_get_drvdata(dev); - struct regmap *regmap = data->regmap; + struct regmap *regmap = data->regmap; unsigned int reg; int ret; u8 mask; @@ -579,7 +800,10 @@ static int abb5zes3_rtc_alarm_irq_enable(struct device *dev, if (rtc_data->irq) { mutex_lock(&rtc_data->lock); - ret = _abb5zes3_rtc_update_alarm(dev, enable); + if (rtc_data->timer_alarm) + ret = _abb5zes3_rtc_update_timer(dev, enable); + else + ret = _abb5zes3_rtc_update_alarm(dev, enable); mutex_unlock(&rtc_data->lock); } @@ -629,6 +853,23 @@ static irqreturn_t _abb5zes3_rtc_interrupt(int irq, void *data) handled = IRQ_HANDLED; } + /* Check watchdog Timer A flag */ + if (regs[ABB5ZES3_REG_CTRL2] & ABB5ZES3_REG_CTRL2_WTAF) { + dev_dbg(dev, "RTC timer!\n"); + + rtc_update_irq(rtc, 1, RTC_IRQF | RTC_AF); + + /* + * Acknowledge and disable the alarm. Note: WTAF + * flag had been cleared when reading CTRL2 + */ + _abb5zes3_rtc_update_timer(dev, 0); + + rtc_data->timer_alarm = 0; + + handled = IRQ_HANDLED; + } + return handled; } @@ -712,14 +953,6 @@ static int abb5zes3_probe(struct i2c_client *client, goto err; } - /* - * AB-B5Z5E only supports a coarse granularity alarm (one minute - * resolution up to one month) so we cannot support UIE mode - * using the device's alarm. Note it should be feasible to support - * such a feature using one of the two timers the device provides. - */ - data->rtc->uie_unsupported = 1; - /* Enable battery low detection interrupt if battery not already low */ if (!data->battery_low && data->irq) { ret = _abb5zes3_rtc_battery_low_irq_enable(regmap, true); From bb624047de21993622bc616eceaae94a096d9256 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT <gregory.clement@free-electrons.com> Date: Fri, 13 Feb 2015 14:41:07 -0800 Subject: [PATCH 104/108] rtc: armada38x: add the device tree binding documentation The Marvell Armada 38x SoCs contains an RTC which differs from the RTC used in the other mvebu SoCs until now. This forth version of the patch set adds support for this new IP and enable it in the Device Tree of the Armada 38x SoC. This patch (of 5): The Armada 38x SoCs come with a new RTC which differs from the one used in the other mvebu SoCs until now. This patch describes the binding of this RTC. Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com> Cc: Alessandro Zummo <a.zummo@towertech.it> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> Cc: Arnaud Ebalard <arno@natisbad.org> Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Cc: Ezequiel Garcia <ezequiel.garcia@free-electrons.com> Cc: Maxime Ripard <maxime.ripard@free-electrons.com> Cc: Boris BREZILLON <boris.brezillon@free-electrons.com> Cc: Lior Amsalem <alior@marvell.com> Cc: Tawfik Bayouk <tawfik@marvell.com> Cc: Nadav Haklai <nadavh@marvell.com> Cc: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- .../bindings/rtc/armada-380-rtc.txt | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 Documentation/devicetree/bindings/rtc/armada-380-rtc.txt diff --git a/Documentation/devicetree/bindings/rtc/armada-380-rtc.txt b/Documentation/devicetree/bindings/rtc/armada-380-rtc.txt new file mode 100644 index 000000000000..2eb9d4ee7dc0 --- /dev/null +++ b/Documentation/devicetree/bindings/rtc/armada-380-rtc.txt @@ -0,0 +1,22 @@ +* Real Time Clock of the Armada 38x SoCs + +RTC controller for the Armada 38x SoCs + +Required properties: +- compatible : Should be "marvell,armada-380-rtc" +- reg: a list of base address and size pairs, one for each entry in + reg-names +- reg names: should contain: + * "rtc" for the RTC registers + * "rtc-soc" for the SoC related registers and among them the one + related to the interrupt. +- interrupts: IRQ line for the RTC. + +Example: + +rtc@a3800 { + compatible = "marvell,armada-380-rtc"; + reg = <0xa3800 0x20>, <0x184a0 0x0c>; + reg-names = "rtc", "rtc-soc"; + interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>; +}; From a3a42806920ad12ec831fb2de4f63e251778300f Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT <gregory.clement@free-electrons.com> Date: Fri, 13 Feb 2015 14:41:11 -0800 Subject: [PATCH 105/108] drivers/rtc/rtc-armada38x: add a new RTC driver for recent mvebu SoCs The new mvebu SoCs come with a new RTC driver. This patch adds the support for this new IP which is currently found in the Armada 38x SoCs. This RTC provides two alarms, but only the first one is used in the driver. The RTC also allows using periodic interrupts. Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com> Reviewed-by: Arnaud Ebalard <arno@natisbad.org> Cc: Alessandro Zummo <a.zummo@towertech.it> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Cc: Ezequiel Garcia <ezequiel.garcia@free-electrons.com> Cc: Maxime Ripard <maxime.ripard@free-electrons.com> Cc: Boris BREZILLON <boris.brezillon@free-electrons.com> Cc: Lior Amsalem <alior@marvell.com> Cc: Tawfik Bayouk <tawfik@marvell.com> Cc: Nadav Haklai <nadavh@marvell.com> Cc: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- drivers/rtc/Kconfig | 10 ++ drivers/rtc/Makefile | 1 + drivers/rtc/rtc-armada38x.c | 320 ++++++++++++++++++++++++++++++++++++ 3 files changed, 331 insertions(+) create mode 100644 drivers/rtc/rtc-armada38x.c diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 1b19f327f35f..3bc9ddbe5cf7 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -1280,6 +1280,16 @@ config RTC_DRV_MV This driver can also be built as a module. If so, the module will be called rtc-mv. +config RTC_DRV_ARMADA38X + tristate "Armada 38x Marvell SoC RTC" + depends on ARCH_MVEBU + help + If you say yes here you will get support for the in-chip RTC + that can be found in the Armada 38x Marvell's SoC device + + This driver can also be built as a module. If so, the module + will be called armada38x-rtc. + config RTC_DRV_PS3 tristate "PS3 RTC" depends on PPC_PS3 diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index 855c4e364058..99ded8b75e95 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_RTC_DRV_88PM80X) += rtc-88pm80x.o obj-$(CONFIG_RTC_DRV_AB3100) += rtc-ab3100.o obj-$(CONFIG_RTC_DRV_AB8500) += rtc-ab8500.o obj-$(CONFIG_RTC_DRV_ABB5ZES3) += rtc-ab-b5ze-s3.o +obj-$(CONFIG_RTC_DRV_ARMADA38X) += rtc-armada38x.o obj-$(CONFIG_RTC_DRV_AS3722) += rtc-as3722.o obj-$(CONFIG_RTC_DRV_AT32AP700X)+= rtc-at32ap700x.o obj-$(CONFIG_RTC_DRV_AT91RM9200)+= rtc-at91rm9200.o diff --git a/drivers/rtc/rtc-armada38x.c b/drivers/rtc/rtc-armada38x.c new file mode 100644 index 000000000000..43e04af39e09 --- /dev/null +++ b/drivers/rtc/rtc-armada38x.c @@ -0,0 +1,320 @@ +/* + * RTC driver for the Armada 38x Marvell SoCs + * + * Copyright (C) 2015 Marvell + * + * Gregory Clement <gregory.clement@free-electrons.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + */ + +#include <linux/delay.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/rtc.h> + +#define RTC_STATUS 0x0 +#define RTC_STATUS_ALARM1 BIT(0) +#define RTC_STATUS_ALARM2 BIT(1) +#define RTC_IRQ1_CONF 0x4 +#define RTC_IRQ1_AL_EN BIT(0) +#define RTC_IRQ1_FREQ_EN BIT(1) +#define RTC_IRQ1_FREQ_1HZ BIT(2) +#define RTC_TIME 0xC +#define RTC_ALARM1 0x10 + +#define SOC_RTC_INTERRUPT 0x8 +#define SOC_RTC_ALARM1 BIT(0) +#define SOC_RTC_ALARM2 BIT(1) +#define SOC_RTC_ALARM1_MASK BIT(2) +#define SOC_RTC_ALARM2_MASK BIT(3) + +struct armada38x_rtc { + struct rtc_device *rtc_dev; + void __iomem *regs; + void __iomem *regs_soc; + spinlock_t lock; + int irq; +}; + +/* + * According to the datasheet, the OS should wait 5us after every + * register write to the RTC hard macro so that the required update + * can occur without holding off the system bus + */ +static void rtc_delayed_write(u32 val, struct armada38x_rtc *rtc, int offset) +{ + writel(val, rtc->regs + offset); + udelay(5); +} + +static int armada38x_rtc_read_time(struct device *dev, struct rtc_time *tm) +{ + struct armada38x_rtc *rtc = dev_get_drvdata(dev); + unsigned long time, time_check, flags; + + spin_lock_irqsave(&rtc->lock, flags); + + time = readl(rtc->regs + RTC_TIME); + /* + * WA for failing time set attempts. As stated in HW ERRATA if + * more than one second between two time reads is detected + * then read once again. + */ + time_check = readl(rtc->regs + RTC_TIME); + if ((time_check - time) > 1) + time_check = readl(rtc->regs + RTC_TIME); + + spin_unlock_irqrestore(&rtc->lock, flags); + + rtc_time_to_tm(time_check, tm); + + return 0; +} + +static int armada38x_rtc_set_time(struct device *dev, struct rtc_time *tm) +{ + struct armada38x_rtc *rtc = dev_get_drvdata(dev); + int ret = 0; + unsigned long time, flags; + + ret = rtc_tm_to_time(tm, &time); + + if (ret) + goto out; + /* + * Setting the RTC time not always succeeds. According to the + * errata we need to first write on the status register and + * then wait for 100ms before writing to the time register to be + * sure that the data will be taken into account. + */ + spin_lock_irqsave(&rtc->lock, flags); + + rtc_delayed_write(0, rtc, RTC_STATUS); + + spin_unlock_irqrestore(&rtc->lock, flags); + + msleep(100); + + spin_lock_irqsave(&rtc->lock, flags); + + rtc_delayed_write(time, rtc, RTC_TIME); + + spin_unlock_irqrestore(&rtc->lock, flags); +out: + return ret; +} + +static int armada38x_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct armada38x_rtc *rtc = dev_get_drvdata(dev); + unsigned long time, flags; + u32 val; + + spin_lock_irqsave(&rtc->lock, flags); + + time = readl(rtc->regs + RTC_ALARM1); + val = readl(rtc->regs + RTC_IRQ1_CONF) & RTC_IRQ1_AL_EN; + + spin_unlock_irqrestore(&rtc->lock, flags); + + alrm->enabled = val ? 1 : 0; + rtc_time_to_tm(time, &alrm->time); + + return 0; +} + +static int armada38x_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm) +{ + struct armada38x_rtc *rtc = dev_get_drvdata(dev); + unsigned long time, flags; + int ret = 0; + u32 val; + + ret = rtc_tm_to_time(&alrm->time, &time); + + if (ret) + goto out; + + spin_lock_irqsave(&rtc->lock, flags); + + rtc_delayed_write(time, rtc, RTC_ALARM1); + + if (alrm->enabled) { + rtc_delayed_write(RTC_IRQ1_AL_EN, rtc, RTC_IRQ1_CONF); + val = readl(rtc->regs_soc + SOC_RTC_INTERRUPT); + writel(val | SOC_RTC_ALARM1_MASK, + rtc->regs_soc + SOC_RTC_INTERRUPT); + } + + spin_unlock_irqrestore(&rtc->lock, flags); + +out: + return ret; +} + +static int armada38x_rtc_alarm_irq_enable(struct device *dev, + unsigned int enabled) +{ + struct armada38x_rtc *rtc = dev_get_drvdata(dev); + unsigned long flags; + + spin_lock_irqsave(&rtc->lock, flags); + + if (enabled) + rtc_delayed_write(RTC_IRQ1_AL_EN, rtc, RTC_IRQ1_CONF); + else + rtc_delayed_write(0, rtc, RTC_IRQ1_CONF); + + spin_unlock_irqrestore(&rtc->lock, flags); + + return 0; +} + +static irqreturn_t armada38x_rtc_alarm_irq(int irq, void *data) +{ + struct armada38x_rtc *rtc = data; + u32 val; + int event = RTC_IRQF | RTC_AF; + + dev_dbg(&rtc->rtc_dev->dev, "%s:irq(%d)\n", __func__, irq); + + spin_lock(&rtc->lock); + + val = readl(rtc->regs_soc + SOC_RTC_INTERRUPT); + + writel(val & ~SOC_RTC_ALARM1, rtc->regs_soc + SOC_RTC_INTERRUPT); + val = readl(rtc->regs + RTC_IRQ1_CONF); + /* disable all the interrupts for alarm 1 */ + rtc_delayed_write(0, rtc, RTC_IRQ1_CONF); + /* Ack the event */ + rtc_delayed_write(RTC_STATUS_ALARM1, rtc, RTC_STATUS); + + spin_unlock(&rtc->lock); + + if (val & RTC_IRQ1_FREQ_EN) { + if (val & RTC_IRQ1_FREQ_1HZ) + event |= RTC_UF; + else + event |= RTC_PF; + } + + rtc_update_irq(rtc->rtc_dev, 1, event); + + return IRQ_HANDLED; +} + +static struct rtc_class_ops armada38x_rtc_ops = { + .read_time = armada38x_rtc_read_time, + .set_time = armada38x_rtc_set_time, + .read_alarm = armada38x_rtc_read_alarm, + .set_alarm = armada38x_rtc_set_alarm, + .alarm_irq_enable = armada38x_rtc_alarm_irq_enable, +}; + +static __init int armada38x_rtc_probe(struct platform_device *pdev) +{ + struct resource *res; + struct armada38x_rtc *rtc; + int ret; + + rtc = devm_kzalloc(&pdev->dev, sizeof(struct armada38x_rtc), + GFP_KERNEL); + if (!rtc) + return -ENOMEM; + + spin_lock_init(&rtc->lock); + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rtc"); + rtc->regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(rtc->regs)) + return PTR_ERR(rtc->regs); + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rtc-soc"); + rtc->regs_soc = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(rtc->regs_soc)) + return PTR_ERR(rtc->regs_soc); + + rtc->irq = platform_get_irq(pdev, 0); + + if (rtc->irq < 0) { + dev_err(&pdev->dev, "no irq\n"); + return rtc->irq; + } + if (devm_request_irq(&pdev->dev, rtc->irq, armada38x_rtc_alarm_irq, + 0, pdev->name, rtc) < 0) { + dev_warn(&pdev->dev, "Interrupt not available.\n"); + rtc->irq = -1; + /* + * If there is no interrupt available then we can't + * use the alarm + */ + armada38x_rtc_ops.set_alarm = NULL; + armada38x_rtc_ops.alarm_irq_enable = NULL; + } + platform_set_drvdata(pdev, rtc); + if (rtc->irq != -1) + device_init_wakeup(&pdev->dev, 1); + + rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, pdev->name, + &armada38x_rtc_ops, THIS_MODULE); + if (IS_ERR(rtc->rtc_dev)) { + ret = PTR_ERR(rtc->rtc_dev); + dev_err(&pdev->dev, "Failed to register RTC device: %d\n", ret); + return ret; + } + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int armada38x_rtc_suspend(struct device *dev) +{ + if (device_may_wakeup(dev)) { + struct armada38x_rtc *rtc = dev_get_drvdata(dev); + + return enable_irq_wake(rtc->irq); + } + + return 0; +} + +static int armada38x_rtc_resume(struct device *dev) +{ + if (device_may_wakeup(dev)) { + struct armada38x_rtc *rtc = dev_get_drvdata(dev); + + return disable_irq_wake(rtc->irq); + } + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(armada38x_rtc_pm_ops, + armada38x_rtc_suspend, armada38x_rtc_resume); + +#ifdef CONFIG_OF +static const struct of_device_id armada38x_rtc_of_match_table[] = { + { .compatible = "marvell,armada-380-rtc", }, + {} +}; +#endif + +static struct platform_driver armada38x_rtc_driver = { + .driver = { + .name = "armada38x-rtc", + .pm = &armada38x_rtc_pm_ops, + .of_match_table = of_match_ptr(armada38x_rtc_of_match_table), + }, +}; + +module_platform_driver_probe(armada38x_rtc_driver, armada38x_rtc_probe); + +MODULE_DESCRIPTION("Marvell Armada 38x RTC driver"); +MODULE_AUTHOR("Gregory CLEMENT <gregory.clement@free-electrons.com>"); +MODULE_LICENSE("GPL"); From c6a95dbee79321d10f030546cc57a2268f4dd2b7 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT <gregory.clement@free-electrons.com> Date: Fri, 13 Feb 2015 14:41:14 -0800 Subject: [PATCH 106/108] MAINTAINERS: add the RTC driver for the Armada38x Put it in the mvebu entry. Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com> Cc: Alessandro Zummo <a.zummo@towertech.it> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> Cc: Arnaud Ebalard <arno@natisbad.org> Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Cc: Ezequiel Garcia <ezequiel.garcia@free-electrons.com> Cc: Maxime Ripard <maxime.ripard@free-electrons.com> Cc: Boris BREZILLON <boris.brezillon@free-electrons.com> Cc: Lior Amsalem <alior@marvell.com> Cc: Tawfik Bayouk <tawfik@marvell.com> Cc: Nadav Haklai <nadavh@marvell.com> Cc: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index debe74cde67b..cd8383e26ac8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1173,6 +1173,7 @@ M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-mvebu/ +F: drivers/rtc/armada38x-rtc ARM/Marvell Berlin SoC support M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> From a73c730541f63bca2b8c15403be4e085348ea35c Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT <gregory.clement@free-electrons.com> Date: Fri, 13 Feb 2015 14:41:18 -0800 Subject: [PATCH 107/108] ARM: mvebu: add Device Tree description of RTC on Armada 38x The Marvell Armada 38x SoCs contains an RTC which differs from the RTC used in the other mvebu SoCs until now. This commit adds the Device Tree description of this interface at the SoC level. Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com> Cc: Alessandro Zummo <a.zummo@towertech.it> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> Cc: Arnaud Ebalard <arno@natisbad.org> Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Cc: Ezequiel Garcia <ezequiel.garcia@free-electrons.com> Cc: Maxime Ripard <maxime.ripard@free-electrons.com> Cc: Boris BREZILLON <boris.brezillon@free-electrons.com> Cc: Lior Amsalem <alior@marvell.com> Cc: Tawfik Bayouk <tawfik@marvell.com> Cc: Nadav Haklai <nadavh@marvell.com> Cc: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/arm/boot/dts/armada-38x.dtsi | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi index 74391dace9e7..2a9f4caac643 100644 --- a/arch/arm/boot/dts/armada-38x.dtsi +++ b/arch/arm/boot/dts/armada-38x.dtsi @@ -381,6 +381,13 @@ clocks = <&gateclk 4>; }; + rtc@a3800 { + compatible = "marvell,armada-380-rtc"; + reg = <0xa3800 0x20>, <0x184a0 0x0c>; + reg-names = "rtc", "rtc-soc"; + interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>; + }; + sata@a8000 { compatible = "marvell,armada-380-ahci"; reg = <0xa8000 0x2000>; From a3b30e7210c870d79d3c3cedb80f8cfaab0f4e2e Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT <gregory.clement@free-electrons.com> Date: Fri, 13 Feb 2015 14:41:21 -0800 Subject: [PATCH 108/108] ARM: mvebu: enable Armada 38x RTC driver in mvebu_v7_defconfig Now that the Armada 38x RTC driver has been pushed, let's enable it in mvebu_v7_defconfig. Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com> Cc: Alessandro Zummo <a.zummo@towertech.it> Cc: Jason Cooper <jason@lakedaemon.net> Cc: Andrew Lunn <andrew@lunn.ch> Cc: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com> Cc: Arnaud Ebalard <arno@natisbad.org> Cc: Thomas Petazzoni <thomas.petazzoni@free-electrons.com> Cc: Ezequiel Garcia <ezequiel.garcia@free-electrons.com> Cc: Maxime Ripard <maxime.ripard@free-electrons.com> Cc: Boris BREZILLON <boris.brezillon@free-electrons.com> Cc: Lior Amsalem <alior@marvell.com> Cc: Tawfik Bayouk <tawfik@marvell.com> Cc: Nadav Haklai <nadavh@marvell.com> Cc: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/arm/configs/mvebu_v7_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/configs/mvebu_v7_defconfig b/arch/arm/configs/mvebu_v7_defconfig index 627accea72fb..2400b9f52403 100644 --- a/arch/arm/configs/mvebu_v7_defconfig +++ b/arch/arm/configs/mvebu_v7_defconfig @@ -112,6 +112,7 @@ CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_S35390A=y CONFIG_RTC_DRV_MV=y +CONFIG_RTC_DRV_ARMADA38X=y CONFIG_DMADEVICES=y CONFIG_MV_XOR=y # CONFIG_IOMMU_SUPPORT is not set