Yinghai Lu d97b07c54f initramfs: support initramfs that is bigger than 2GiB
Now with 64bit bzImage and kexec tools, we support ramdisk that size is
bigger than 2g, as we could put it above 4G.

Found compressed initramfs image could not be decompressed properly.  It
turns out that image length is int during decompress detection, and it
will become < 0 when length is more than 2G.  Furthermore, during
decompressing len as int is used for inbuf count, that has problem too.

Change len to long, that should be ok as on 32 bit platform long is
32bits.

Tested with following compressed initramfs image as root with kexec.
	gzip, bzip2, xz, lzma, lzop, lz4.
run time for populate_rootfs():
   size        name       Nehalem-EX  Westmere-EX  Ivybridge-EX
 9034400256 root_img     :   26s           24s          30s
 3561095057 root_img.lz4 :   28s           27s          27s
 3459554629 root_img.lzo :   29s           29s          28s
 3219399480 root_img.gz  :   64s           62s          49s
 2251594592 root_img.xz  :  262s          260s         183s
 2226366598 root_img.lzma:  386s          376s         277s
 2901482513 root_img.bz2 :  635s          599s

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Rashika Kheria <rashika.kheria@gmail.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Kyungsik Lee <kyungsik.lee@lge.com>
Cc: P J P <ppandit@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Cc: "Daniel M. Weeks" <dan@danweeks.net>
Cc: Alexandre Courbot <acourbot@nvidia.com>
Cc: Jan Beulich <JBeulich@suse.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2014-08-08 15:57:26 -07:00

362 lines
8.7 KiB
C

/*
* Many of the syscalls used in this file expect some of the arguments
* to be __user pointers not __kernel pointers. To limit the sparse
* noise, turn off sparse checking for this file.
*/
#ifdef __CHECKER__
#undef __CHECKER__
#warning "Sparse checking disabled for this file"
#endif
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/minix_fs.h>
#include <linux/ext2_fs.h>
#include <linux/romfs_fs.h>
#include <uapi/linux/cramfs_fs.h>
#include <linux/initrd.h>
#include <linux/string.h>
#include <linux/slab.h>
#include "do_mounts.h"
#include "../fs/squashfs/squashfs_fs.h"
#include <linux/decompress/generic.h>
int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */
static int __init prompt_ramdisk(char *str)
{
rd_prompt = simple_strtol(str,NULL,0) & 1;
return 1;
}
__setup("prompt_ramdisk=", prompt_ramdisk);
int __initdata rd_image_start; /* starting block # of image */
static int __init ramdisk_start_setup(char *str)
{
rd_image_start = simple_strtol(str,NULL,0);
return 1;
}
__setup("ramdisk_start=", ramdisk_start_setup);
static int __init crd_load(int in_fd, int out_fd, decompress_fn deco);
/*
* This routine tries to find a RAM disk image to load, and returns the
* number of blocks to read for a non-compressed image, 0 if the image
* is a compressed image, and -1 if an image with the right magic
* numbers could not be found.
*
* We currently check for the following magic numbers:
* minix
* ext2
* romfs
* cramfs
* squashfs
* gzip
* bzip2
* lzma
* xz
* lzo
* lz4
*/
static int __init
identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
{
const int size = 512;
struct minix_super_block *minixsb;
struct romfs_super_block *romfsb;
struct cramfs_super *cramfsb;
struct squashfs_super_block *squashfsb;
int nblocks = -1;
unsigned char *buf;
const char *compress_name;
unsigned long n;
buf = kmalloc(size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
minixsb = (struct minix_super_block *) buf;
romfsb = (struct romfs_super_block *) buf;
cramfsb = (struct cramfs_super *) buf;
squashfsb = (struct squashfs_super_block *) buf;
memset(buf, 0xe5, size);
/*
* Read block 0 to test for compressed kernel
*/
sys_lseek(fd, start_block * BLOCK_SIZE, 0);
sys_read(fd, buf, size);
*decompressor = decompress_method(buf, size, &compress_name);
if (compress_name) {
printk(KERN_NOTICE "RAMDISK: %s image found at block %d\n",
compress_name, start_block);
if (!*decompressor)
printk(KERN_EMERG
"RAMDISK: %s decompressor not configured!\n",
compress_name);
nblocks = 0;
goto done;
}
/* romfs is at block zero too */
if (romfsb->word0 == ROMSB_WORD0 &&
romfsb->word1 == ROMSB_WORD1) {
printk(KERN_NOTICE
"RAMDISK: romfs filesystem found at block %d\n",
start_block);
nblocks = (ntohl(romfsb->size)+BLOCK_SIZE-1)>>BLOCK_SIZE_BITS;
goto done;
}
if (cramfsb->magic == CRAMFS_MAGIC) {
printk(KERN_NOTICE
"RAMDISK: cramfs filesystem found at block %d\n",
start_block);
nblocks = (cramfsb->size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS;
goto done;
}
/* squashfs is at block zero too */
if (le32_to_cpu(squashfsb->s_magic) == SQUASHFS_MAGIC) {
printk(KERN_NOTICE
"RAMDISK: squashfs filesystem found at block %d\n",
start_block);
nblocks = (le64_to_cpu(squashfsb->bytes_used) + BLOCK_SIZE - 1)
>> BLOCK_SIZE_BITS;
goto done;
}
/*
* Read 512 bytes further to check if cramfs is padded
*/
sys_lseek(fd, start_block * BLOCK_SIZE + 0x200, 0);
sys_read(fd, buf, size);
if (cramfsb->magic == CRAMFS_MAGIC) {
printk(KERN_NOTICE
"RAMDISK: cramfs filesystem found at block %d\n",
start_block);
nblocks = (cramfsb->size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS;
goto done;
}
/*
* Read block 1 to test for minix and ext2 superblock
*/
sys_lseek(fd, (start_block+1) * BLOCK_SIZE, 0);
sys_read(fd, buf, size);
/* Try minix */
if (minixsb->s_magic == MINIX_SUPER_MAGIC ||
minixsb->s_magic == MINIX_SUPER_MAGIC2) {
printk(KERN_NOTICE
"RAMDISK: Minix filesystem found at block %d\n",
start_block);
nblocks = minixsb->s_nzones << minixsb->s_log_zone_size;
goto done;
}
/* Try ext2 */
n = ext2_image_size(buf);
if (n) {
printk(KERN_NOTICE
"RAMDISK: ext2 filesystem found at block %d\n",
start_block);
nblocks = n;
goto done;
}
printk(KERN_NOTICE
"RAMDISK: Couldn't find valid RAM disk image starting at %d.\n",
start_block);
done:
sys_lseek(fd, start_block * BLOCK_SIZE, 0);
kfree(buf);
return nblocks;
}
int __init rd_load_image(char *from)
{
int res = 0;
int in_fd, out_fd;
unsigned long rd_blocks, devblocks;
int nblocks, i, disk;
char *buf = NULL;
unsigned short rotate = 0;
decompress_fn decompressor = NULL;
#if !defined(CONFIG_S390)
char rotator[4] = { '|' , '/' , '-' , '\\' };
#endif
out_fd = sys_open("/dev/ram", O_RDWR, 0);
if (out_fd < 0)
goto out;
in_fd = sys_open(from, O_RDONLY, 0);
if (in_fd < 0)
goto noclose_input;
nblocks = identify_ramdisk_image(in_fd, rd_image_start, &decompressor);
if (nblocks < 0)
goto done;
if (nblocks == 0) {
if (crd_load(in_fd, out_fd, decompressor) == 0)
goto successful_load;
goto done;
}
/*
* NOTE NOTE: nblocks is not actually blocks but
* the number of kibibytes of data to load into a ramdisk.
* So any ramdisk block size that is a multiple of 1KiB should
* work when the appropriate ramdisk_blocksize is specified
* on the command line.
*
* The default ramdisk_blocksize is 1KiB and it is generally
* silly to use anything else, so make sure to use 1KiB
* blocksize while generating ext2fs ramdisk-images.
*/
if (sys_ioctl(out_fd, BLKGETSIZE, (unsigned long)&rd_blocks) < 0)
rd_blocks = 0;
else
rd_blocks >>= 1;
if (nblocks > rd_blocks) {
printk("RAMDISK: image too big! (%dKiB/%ldKiB)\n",
nblocks, rd_blocks);
goto done;
}
/*
* OK, time to copy in the data
*/
if (sys_ioctl(in_fd, BLKGETSIZE, (unsigned long)&devblocks) < 0)
devblocks = 0;
else
devblocks >>= 1;
if (strcmp(from, "/initrd.image") == 0)
devblocks = nblocks;
if (devblocks == 0) {
printk(KERN_ERR "RAMDISK: could not determine device size\n");
goto done;
}
buf = kmalloc(BLOCK_SIZE, GFP_KERNEL);
if (!buf) {
printk(KERN_ERR "RAMDISK: could not allocate buffer\n");
goto done;
}
printk(KERN_NOTICE "RAMDISK: Loading %dKiB [%ld disk%s] into ram disk... ",
nblocks, ((nblocks-1)/devblocks)+1, nblocks>devblocks ? "s" : "");
for (i = 0, disk = 1; i < nblocks; i++) {
if (i && (i % devblocks == 0)) {
printk("done disk #%d.\n", disk++);
rotate = 0;
if (sys_close(in_fd)) {
printk("Error closing the disk.\n");
goto noclose_input;
}
change_floppy("disk #%d", disk);
in_fd = sys_open(from, O_RDONLY, 0);
if (in_fd < 0) {
printk("Error opening disk.\n");
goto noclose_input;
}
printk("Loading disk #%d... ", disk);
}
sys_read(in_fd, buf, BLOCK_SIZE);
sys_write(out_fd, buf, BLOCK_SIZE);
#if !defined(CONFIG_S390)
if (!(i % 16)) {
printk("%c\b", rotator[rotate & 0x3]);
rotate++;
}
#endif
}
printk("done.\n");
successful_load:
res = 1;
done:
sys_close(in_fd);
noclose_input:
sys_close(out_fd);
out:
kfree(buf);
sys_unlink("/dev/ram");
return res;
}
int __init rd_load_disk(int n)
{
if (rd_prompt)
change_floppy("root floppy disk to be loaded into RAM disk");
create_dev("/dev/root", ROOT_DEV);
create_dev("/dev/ram", MKDEV(RAMDISK_MAJOR, n));
return rd_load_image("/dev/root");
}
static int exit_code;
static int decompress_error;
static int crd_infd, crd_outfd;
static long __init compr_fill(void *buf, unsigned long len)
{
long r = sys_read(crd_infd, buf, len);
if (r < 0)
printk(KERN_ERR "RAMDISK: error while reading compressed data");
else if (r == 0)
printk(KERN_ERR "RAMDISK: EOF while reading compressed data");
return r;
}
static long __init compr_flush(void *window, unsigned long outcnt)
{
long written = sys_write(crd_outfd, window, outcnt);
if (written != outcnt) {
if (decompress_error == 0)
printk(KERN_ERR
"RAMDISK: incomplete write (%ld != %ld)\n",
written, outcnt);
decompress_error = 1;
return -1;
}
return outcnt;
}
static void __init error(char *x)
{
printk(KERN_ERR "%s\n", x);
exit_code = 1;
decompress_error = 1;
}
static int __init crd_load(int in_fd, int out_fd, decompress_fn deco)
{
int result;
crd_infd = in_fd;
crd_outfd = out_fd;
if (!deco) {
pr_emerg("Invalid ramdisk decompression routine. "
"Select appropriate config option.\n");
panic("Could not decompress initial ramdisk image.");
}
result = deco(NULL, 0, compr_fill, compr_flush, NULL, NULL, error);
if (decompress_error)
result = 1;
return result;
}