From 78b7280cce23293f7570ad52c1ffe1485c6d9669 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Mar 2011 17:57:23 +0000 Subject: [PATCH 0001/3380] KEYS: Improve /proc/keys Improve /proc/keys by: (1) Don't attempt to summarise the payload of a negated key. It won't have one. To this end, a helper function - key_is_instantiated() has been added that allows the caller to find out whether the key is positively instantiated (as opposed to being uninstantiated or negatively instantiated). (2) Do show keys that are negative, expired or revoked rather than hiding them. This requires an override flag (no_state_check) to be passed to search_my_process_keyrings() and keyring_search_aux() to suppress this check. Without this, keys that are possessed by the caller, but only grant permissions to the caller if possessed are skipped as the possession check fails. Keys that are visible due to user, group or other checks are visible with or without this patch. Signed-off-by: David Howells Signed-off-by: James Morris --- include/linux/key.h | 13 +++++++++++ net/dns_resolver/dns_key.c | 10 +++++---- security/keys/internal.h | 4 +++- security/keys/keyring.c | 37 +++++++++++++++++++++----------- security/keys/proc.c | 2 +- security/keys/process_keys.c | 12 ++++++----- security/keys/request_key.c | 3 +-- security/keys/request_key_auth.c | 3 ++- security/keys/user_defined.c | 4 ++-- 9 files changed, 59 insertions(+), 29 deletions(-) diff --git a/include/linux/key.h b/include/linux/key.h index b2bb01719561..ef19b99aff98 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -276,6 +276,19 @@ static inline key_serial_t key_serial(struct key *key) return key ? key->serial : 0; } +/** + * key_is_instantiated - Determine if a key has been positively instantiated + * @key: The key to check. + * + * Return true if the specified key has been positively instantiated, false + * otherwise. + */ +static inline bool key_is_instantiated(const struct key *key) +{ + return test_bit(KEY_FLAG_INSTANTIATED, &key->flags) && + !test_bit(KEY_FLAG_NEGATIVE, &key->flags); +} + #define rcu_dereference_key(KEY) \ (rcu_dereference_protected((KEY)->payload.rcudata, \ rwsem_is_locked(&((struct key *)(KEY))->sem))) diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index cfa7a5e1c5c9..fa000d26dc60 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -212,10 +212,12 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m) int err = key->type_data.x[0]; seq_puts(m, key->description); - if (err) - seq_printf(m, ": %d", err); - else - seq_printf(m, ": %u", key->datalen); + if (key_is_instantiated(key)) { + if (err) + seq_printf(m, ": %d", err); + else + seq_printf(m, ": %u", key->datalen); + } } /* diff --git a/security/keys/internal.h b/security/keys/internal.h index 07a025f81902..f375152a2500 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -109,11 +109,13 @@ extern key_ref_t keyring_search_aux(key_ref_t keyring_ref, const struct cred *cred, struct key_type *type, const void *description, - key_match_func_t match); + key_match_func_t match, + bool no_state_check); extern key_ref_t search_my_process_keyrings(struct key_type *type, const void *description, key_match_func_t match, + bool no_state_check, const struct cred *cred); extern key_ref_t search_process_keyrings(struct key_type *type, const void *description, diff --git a/security/keys/keyring.c b/security/keys/keyring.c index cdd2f3f88c88..a06ffab38568 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -176,13 +176,15 @@ static void keyring_describe(const struct key *keyring, struct seq_file *m) else seq_puts(m, "[anon]"); - rcu_read_lock(); - klist = rcu_dereference(keyring->payload.subscriptions); - if (klist) - seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys); - else - seq_puts(m, ": empty"); - rcu_read_unlock(); + if (key_is_instantiated(keyring)) { + rcu_read_lock(); + klist = rcu_dereference(keyring->payload.subscriptions); + if (klist) + seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys); + else + seq_puts(m, ": empty"); + rcu_read_unlock(); + } } /* @@ -271,6 +273,7 @@ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, * @type: The type of key to search for. * @description: Parameter for @match. * @match: Function to rule on whether or not a key is the one required. + * @no_state_check: Don't check if a matching key is bad * * Search the supplied keyring tree for a key that matches the criteria given. * The root keyring and any linked keyrings must grant Search permission to the @@ -303,7 +306,8 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, const struct cred *cred, struct key_type *type, const void *description, - key_match_func_t match) + key_match_func_t match, + bool no_state_check) { struct { struct keyring_list *keylist; @@ -345,6 +349,8 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, kflags = keyring->flags; if (keyring->type == type && match(keyring, description)) { key = keyring; + if (no_state_check) + goto found; /* check it isn't negative and hasn't expired or been * revoked */ @@ -384,11 +390,13 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, continue; /* skip revoked keys and expired keys */ - if (kflags & (1 << KEY_FLAG_REVOKED)) - continue; + if (!no_state_check) { + if (kflags & (1 << KEY_FLAG_REVOKED)) + continue; - if (key->expiry && now.tv_sec >= key->expiry) - continue; + if (key->expiry && now.tv_sec >= key->expiry) + continue; + } /* keys that don't match */ if (!match(key, description)) @@ -399,6 +407,9 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, cred, KEY_SEARCH) < 0) continue; + if (no_state_check) + goto found; + /* we set a different error code if we pass a negative key */ if (kflags & (1 << KEY_FLAG_NEGATIVE)) { err = key->type_data.reject_error; @@ -478,7 +489,7 @@ key_ref_t keyring_search(key_ref_t keyring, return ERR_PTR(-ENOKEY); return keyring_search_aux(keyring, current->cred, - type, description, type->match); + type, description, type->match, false); } EXPORT_SYMBOL(keyring_search); diff --git a/security/keys/proc.c b/security/keys/proc.c index 525cf8a29cdd..49bbc97943ad 100644 --- a/security/keys/proc.c +++ b/security/keys/proc.c @@ -199,7 +199,7 @@ static int proc_keys_show(struct seq_file *m, void *v) if (key->perm & KEY_POS_VIEW) { skey_ref = search_my_process_keyrings(key->type, key, lookup_user_key_possessed, - cred); + true, cred); if (!IS_ERR(skey_ref)) { key_ref_put(skey_ref); key_ref = make_key_ref(key, 1); diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 930634e45149..6c0480db8885 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -331,6 +331,7 @@ void key_fsgid_changed(struct task_struct *tsk) key_ref_t search_my_process_keyrings(struct key_type *type, const void *description, key_match_func_t match, + bool no_state_check, const struct cred *cred) { key_ref_t key_ref, ret, err; @@ -350,7 +351,7 @@ key_ref_t search_my_process_keyrings(struct key_type *type, if (cred->thread_keyring) { key_ref = keyring_search_aux( make_key_ref(cred->thread_keyring, 1), - cred, type, description, match); + cred, type, description, match, no_state_check); if (!IS_ERR(key_ref)) goto found; @@ -371,7 +372,7 @@ key_ref_t search_my_process_keyrings(struct key_type *type, if (cred->tgcred->process_keyring) { key_ref = keyring_search_aux( make_key_ref(cred->tgcred->process_keyring, 1), - cred, type, description, match); + cred, type, description, match, no_state_check); if (!IS_ERR(key_ref)) goto found; @@ -395,7 +396,7 @@ key_ref_t search_my_process_keyrings(struct key_type *type, make_key_ref(rcu_dereference( cred->tgcred->session_keyring), 1), - cred, type, description, match); + cred, type, description, match, no_state_check); rcu_read_unlock(); if (!IS_ERR(key_ref)) @@ -417,7 +418,7 @@ key_ref_t search_my_process_keyrings(struct key_type *type, else if (cred->user->session_keyring) { key_ref = keyring_search_aux( make_key_ref(cred->user->session_keyring, 1), - cred, type, description, match); + cred, type, description, match, no_state_check); if (!IS_ERR(key_ref)) goto found; @@ -459,7 +460,8 @@ key_ref_t search_process_keyrings(struct key_type *type, might_sleep(); - key_ref = search_my_process_keyrings(type, description, match, cred); + key_ref = search_my_process_keyrings(type, description, match, + false, cred); if (!IS_ERR(key_ref)) goto found; err = key_ref; diff --git a/security/keys/request_key.c b/security/keys/request_key.c index df3c0417ee40..b18a71745901 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -530,8 +530,7 @@ struct key *request_key_and_link(struct key_type *type, dest_keyring, flags); /* search all the process keyrings for a key */ - key_ref = search_process_keyrings(type, description, type->match, - cred); + key_ref = search_process_keyrings(type, description, type->match, cred); if (!IS_ERR(key_ref)) { key = key_ref_to_ptr(key_ref); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 68164031a74e..f6337c9082eb 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -59,7 +59,8 @@ static void request_key_auth_describe(const struct key *key, seq_puts(m, "key:"); seq_puts(m, key->description); - seq_printf(m, " pid:%d ci:%zu", rka->pid, rka->callout_len); + if (key_is_instantiated(key)) + seq_printf(m, " pid:%d ci:%zu", rka->pid, rka->callout_len); } /* diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c index c6ca8662a468..63bb1aaffc0a 100644 --- a/security/keys/user_defined.c +++ b/security/keys/user_defined.c @@ -169,8 +169,8 @@ EXPORT_SYMBOL_GPL(user_destroy); void user_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); - - seq_printf(m, ": %u", key->datalen); + if (key_is_instantiated(key)) + seq_printf(m, ": %u", key->datalen); } EXPORT_SYMBOL_GPL(user_describe); From 4aab1e896a0a9d57420ff2867caa5a369123d8cb Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Mar 2011 17:57:33 +0000 Subject: [PATCH 0002/3380] KEYS: Make request_key() and co. return an error for a negative key Make request_key() and co. return an error for a negative or rejected key. If the key was simply negated, then return ENOKEY, otherwise return the error with which it was rejected. Without this patch, the following command returns a key number (with the latest keyutils): [root@andromeda ~]# keyctl request2 user debug:foo rejected @s 586569904 Trying to print the key merely gets you a permission denied error: [root@andromeda ~]# keyctl print 586569904 keyctl_read_alloc: Permission denied Doing another request_key() call does get you the error, as long as it hasn't expired yet: [root@andromeda ~]# keyctl request user debug:foo request_key: Key was rejected by service Signed-off-by: David Howells Signed-off-by: James Morris --- security/keys/keyctl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 427fddcaeb19..eca51918c951 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -206,8 +206,14 @@ SYSCALL_DEFINE4(request_key, const char __user *, _type, goto error5; } + /* wait for the key to finish being constructed */ + ret = wait_for_key_construction(key, 1); + if (ret < 0) + goto error6; + ret = key->serial; +error6: key_put(key); error5: key_type_put(ktype); From 5806896019ceaa0a1e808182afb4bba33c948ad6 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 14 Mar 2011 19:32:21 -0400 Subject: [PATCH 0003/3380] security: select correct default LSM_MMAP_MIN_ADDR on ARM. The default for this is universally set to 64k, but the help says: For most ia64, ppc64 and x86 users with lots of address space a value of 65536 is reasonable and should cause no problems. On arm and other archs it should not be higher than 32768. The text is right, in that we are seeing selinux-enabled ARM targets that fail to launch /sbin/init because selinux blocks a memory map. So select the right value if we know we are building ARM. Signed-off-by: Paul Gortmaker Signed-off-by: James Morris --- security/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig b/security/Kconfig index 95accd442d55..e0f08b52e4ab 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -167,6 +167,7 @@ config INTEL_TXT config LSM_MMAP_MIN_ADDR int "Low address space for LSM to protect from user allocation" depends on SECURITY && SECURITY_SELINUX + default 32768 if ARM default 65536 help This is the portion of low virtual memory which should be protected From b299eb5cde1a91706c450804006c6559b0826df8 Mon Sep 17 00:00:00 2001 From: Durgadoss R Date: Thu, 3 Mar 2011 04:30:13 +0530 Subject: [PATCH 0004/3380] ACPI:Fix goto flows in thermal-sys This patch fixes two minor bugs in thermal_sys: (a) The flow of goto's in thermal_hwmon_add_sysfs. (b) Remove the temp*_crit only if there is a get_crit_temp defined, in thermal_remove_hwmon_sysfs. Signed-off-by: Durgadoss R Signed-off-by: Len Brown --- drivers/thermal/thermal_sys.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 713b7ea4a607..0b1c82ad6805 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -499,7 +499,7 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) dev_set_drvdata(hwmon->device, hwmon); result = device_create_file(hwmon->device, &dev_attr_name); if (result) - goto unregister_hwmon_device; + goto free_mem; register_sys_interface: tz->hwmon = hwmon; @@ -513,7 +513,7 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) sysfs_attr_init(&tz->temp_input.attr.attr); result = device_create_file(hwmon->device, &tz->temp_input.attr); if (result) - goto unregister_hwmon_device; + goto unregister_name; if (tz->ops->get_crit_temp) { unsigned long temperature; @@ -527,7 +527,7 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) result = device_create_file(hwmon->device, &tz->temp_crit.attr); if (result) - goto unregister_hwmon_device; + goto unregister_input; } } @@ -539,9 +539,9 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) return 0; - unregister_hwmon_device: - device_remove_file(hwmon->device, &tz->temp_crit.attr); + unregister_input: device_remove_file(hwmon->device, &tz->temp_input.attr); + unregister_name: if (new_hwmon_device) { device_remove_file(hwmon->device, &dev_attr_name); hwmon_device_unregister(hwmon->device); @@ -560,7 +560,8 @@ thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz) tz->hwmon = NULL; device_remove_file(hwmon->device, &tz->temp_input.attr); - device_remove_file(hwmon->device, &tz->temp_crit.attr); + if (tz->ops->get_crit_temp) + device_remove_file(hwmon->device, &tz->temp_crit.attr); mutex_lock(&thermal_list_lock); list_del(&tz->hwmon_node); From 0415b00d175e0d8945e6785aad21b5f157976ce0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 24 Mar 2011 18:50:09 +0100 Subject: [PATCH 0005/3380] percpu: Always align percpu output section to PAGE_SIZE Percpu allocator honors alignment request upto PAGE_SIZE and both the percpu addresses in the percpu address space and the translated kernel addresses should be aligned accordingly. The calculation of the former depends on the alignment of percpu output section in the kernel image. The linker script macros PERCPU_VADDR() and PERCPU() are used to define this output section and the latter takes @align parameter. Several architectures are using @align smaller than PAGE_SIZE breaking percpu memory alignment. This patch removes @align parameter from PERCPU(), renames it to PERCPU_SECTION() and makes it always align to PAGE_SIZE. While at it, add PCPU_SETUP_BUG_ON() checks such that alignment problems are reliably detected and remove percpu alignment comment recently added in workqueue.c as the condition would trigger BUG way before reaching there. For um, this patch raises the alignment of percpu area. As the area is in .init, there shouldn't be any noticeable difference. This problem was discovered by David Howells while debugging boot failure on mn10300. Signed-off-by: Tejun Heo Acked-by: Mike Frysinger Cc: uclinux-dist-devel@blackfin.uclinux.org Cc: David Howells Cc: Jeff Dike Cc: user-mode-linux-devel@lists.sourceforge.net --- arch/alpha/kernel/vmlinux.lds.S | 2 +- arch/arm/kernel/vmlinux.lds.S | 2 +- arch/blackfin/kernel/vmlinux.lds.S | 2 +- arch/cris/kernel/vmlinux.lds.S | 2 +- arch/frv/kernel/vmlinux.lds.S | 2 +- arch/m32r/kernel/vmlinux.lds.S | 2 +- arch/mips/kernel/vmlinux.lds.S | 2 +- arch/mn10300/kernel/vmlinux.lds.S | 2 +- arch/parisc/kernel/vmlinux.lds.S | 2 +- arch/powerpc/kernel/vmlinux.lds.S | 2 +- arch/s390/kernel/vmlinux.lds.S | 2 +- arch/sh/kernel/vmlinux.lds.S | 2 +- arch/sparc/kernel/vmlinux.lds.S | 2 +- arch/tile/kernel/vmlinux.lds.S | 2 +- arch/um/include/asm/common.lds.S | 2 +- arch/x86/kernel/vmlinux.lds.S | 2 +- arch/xtensa/kernel/vmlinux.lds.S | 2 +- include/asm-generic/vmlinux.lds.h | 17 ++++++++--------- kernel/workqueue.c | 4 +--- mm/percpu.c | 2 ++ 20 files changed, 28 insertions(+), 29 deletions(-) diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 433be2a24f31..8d57948c0aef 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S @@ -39,7 +39,7 @@ SECTIONS __init_begin = ALIGN(PAGE_SIZE); INIT_TEXT_SECTION(PAGE_SIZE) INIT_DATA_SECTION(16) - PERCPU(L1_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L1_CACHE_BYTES) /* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page needed for the THREAD_SIZE aligned init_task gets freed after init */ . = ALIGN(THREAD_SIZE); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index b4348e62ef06..e5287f21badc 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -82,7 +82,7 @@ SECTIONS #endif } - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) #ifndef CONFIG_XIP_KERNEL . = ALIGN(PAGE_SIZE); diff --git a/arch/blackfin/kernel/vmlinux.lds.S b/arch/blackfin/kernel/vmlinux.lds.S index 854fa49f1c3e..8d85c8c6f857 100644 --- a/arch/blackfin/kernel/vmlinux.lds.S +++ b/arch/blackfin/kernel/vmlinux.lds.S @@ -136,7 +136,7 @@ SECTIONS . = ALIGN(16); INIT_DATA_SECTION(16) - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) .exit.data : { diff --git a/arch/cris/kernel/vmlinux.lds.S b/arch/cris/kernel/vmlinux.lds.S index 728bbd9e7d4c..a6990cb0f098 100644 --- a/arch/cris/kernel/vmlinux.lds.S +++ b/arch/cris/kernel/vmlinux.lds.S @@ -102,7 +102,7 @@ SECTIONS #endif __vmlinux_end = .; /* Last address of the physical file. */ #ifdef CONFIG_ETRAX_ARCH_V32 - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) .init.ramfs : { INIT_RAM_FS diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index 0daae8af5787..7e958d829ec9 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -37,7 +37,7 @@ SECTIONS _einittext = .; INIT_DATA_SECTION(8) - PERCPU(L1_CACHE_BYTES, 4096) + PERCPU_SECTION(L1_CACHE_BYTES) . = ALIGN(PAGE_SIZE); __init_end = .; diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index c194d64cdbb9..2e7ccf7299a0 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S @@ -53,7 +53,7 @@ SECTIONS __init_begin = .; INIT_TEXT_SECTION(PAGE_SIZE) INIT_DATA_SECTION(16) - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 832afbb87588..8616709452b1 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -115,7 +115,7 @@ SECTIONS EXIT_DATA } - PERCPU(1 << CONFIG_MIPS_L1_CACHE_SHIFT, PAGE_SIZE) + PERCPU_SECTION(1 << CONFIG_MIPS_L1_CACHE_SHIFT) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/mn10300/kernel/vmlinux.lds.S b/arch/mn10300/kernel/vmlinux.lds.S index 968bcd2cb022..6f702a6ab395 100644 --- a/arch/mn10300/kernel/vmlinux.lds.S +++ b/arch/mn10300/kernel/vmlinux.lds.S @@ -70,7 +70,7 @@ SECTIONS .exit.text : { EXIT_TEXT; } .exit.data : { EXIT_DATA; } - PERCPU(32, PAGE_SIZE) + PERCPU_SECTION(32) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 8f1e4efd143e..85b86617fe0b 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -145,7 +145,7 @@ SECTIONS EXIT_DATA } - PERCPU(L1_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L1_CACHE_BYTES) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index b9150f07d266..920276c0f6a1 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -160,7 +160,7 @@ SECTIONS INIT_RAM_FS } - PERCPU(L1_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L1_CACHE_BYTES) . = ALIGN(8); .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) { diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 1bc18cdb525b..56fe6bc81fee 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -77,7 +77,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) - PERCPU(0x100, PAGE_SIZE) + PERCPU_SECTION(0x100) . = ALIGN(PAGE_SIZE); __init_end = .; /* freed after init ends here */ diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index af4d46187a79..731c10ce67b5 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S @@ -66,7 +66,7 @@ SECTIONS __machvec_end = .; } - PERCPU(L1_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L1_CACHE_BYTES) /* * .exit.text is discarded at runtime, not link time, to deal with diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 92b557afe535..c0220759003e 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -108,7 +108,7 @@ SECTIONS __sun4v_2insn_patch_end = .; } - PERCPU(SMP_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(SMP_CACHE_BYTES) . = ALIGN(PAGE_SIZE); __init_end = .; diff --git a/arch/tile/kernel/vmlinux.lds.S b/arch/tile/kernel/vmlinux.lds.S index 38f64fafdc10..631f10de12fe 100644 --- a/arch/tile/kernel/vmlinux.lds.S +++ b/arch/tile/kernel/vmlinux.lds.S @@ -60,7 +60,7 @@ SECTIONS . = ALIGN(PAGE_SIZE); VMLINUX_SYMBOL(_sinitdata) = .; INIT_DATA_SECTION(16) :data =0 - PERCPU(L2_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(L2_CACHE_BYTES) . = ALIGN(PAGE_SIZE); VMLINUX_SYMBOL(_einitdata) = .; diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index 34bede8aad4a..4938de5512d2 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S @@ -42,7 +42,7 @@ INIT_SETUP(0) } - PERCPU(32, 32) + PERCPU_SECTION(32) .initcall.init : { INIT_CALLS diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 624a2016198e..3e9fb5d54f96 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -319,7 +319,7 @@ SECTIONS } #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) - PERCPU(INTERNODE_CACHE_BYTES, PAGE_SIZE) + PERCPU_SECTION(INTERNODE_CACHE_BYTES) #endif . = ALIGN(PAGE_SIZE); diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index a2820065927e..88ecea3facb4 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S @@ -155,7 +155,7 @@ SECTIONS INIT_RAM_FS } - PERCPU(XCHAL_ICACHE_LINESIZE, PAGE_SIZE) + PERCPU_SECTION(XCHAL_ICACHE_LINESIZE) /* We need this dummy segment here */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 32c45e5fe0ab..f301cea5ca2d 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -15,7 +15,7 @@ * HEAD_TEXT_SECTION * INIT_TEXT_SECTION(PAGE_SIZE) * INIT_DATA_SECTION(...) - * PERCPU(CACHELINE_SIZE, PAGE_SIZE) + * PERCPU_SECTION(CACHELINE_SIZE) * __init_end = .; * * _stext = .; @@ -709,7 +709,7 @@ * * Note that this macros defines __per_cpu_load as an absolute symbol. * If there is no need to put the percpu section at a predetermined - * address, use PERCPU(). + * address, use PERCPU_SECTION. */ #define PERCPU_VADDR(cacheline, vaddr, phdr) \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ @@ -729,20 +729,19 @@ . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data..percpu); /** - * PERCPU - define output section for percpu area, simple version + * PERCPU_SECTION - define output section for percpu area, simple version * @cacheline: cacheline size - * @align: required alignment * - * Align to @align and outputs output section for percpu area. This macro - * doesn't manipulate @vaddr or @phdr and __per_cpu_load and + * Align to PAGE_SIZE and outputs output section for percpu area. This + * macro doesn't manipulate @vaddr or @phdr and __per_cpu_load and * __per_cpu_start will be identical. * - * This macro is equivalent to ALIGN(@align); PERCPU_VADDR(@cacheline,,) + * This macro is equivalent to ALIGN(PAGE_SIZE); PERCPU_VADDR(@cacheline,,) * except that __per_cpu_load is defined as a relative symbol against * .data..percpu which is required for relocatable x86_32 configuration. */ -#define PERCPU(cacheline, align) \ - . = ALIGN(align); \ +#define PERCPU_SECTION(cacheline) \ + . = ALIGN(PAGE_SIZE); \ .data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ VMLINUX_SYMBOL(__per_cpu_start) = .; \ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 04ef830690ec..d30a502e8c6d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2860,9 +2860,7 @@ static int alloc_cwqs(struct workqueue_struct *wq) } } - /* just in case, make sure it's actually aligned - * - this is affected by PERCPU() alignment in vmlinux.lds.S - */ + /* just in case, make sure it's actually aligned */ BUG_ON(!IS_ALIGNED(wq->cpu_wq.v, align)); return wq->cpu_wq.v ? 0 : -ENOMEM; } diff --git a/mm/percpu.c b/mm/percpu.c index 3f930018aa60..c5feb79f5995 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1216,8 +1216,10 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); #ifdef CONFIG_SMP PCPU_SETUP_BUG_ON(!ai->static_size); + PCPU_SETUP_BUG_ON((unsigned long)__per_cpu_start & ~PAGE_MASK); #endif PCPU_SETUP_BUG_ON(!base_addr); + PCPU_SETUP_BUG_ON((unsigned long)base_addr & ~PAGE_MASK); PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); From 9f63b88bd7a1ac1afbb4358772a39abaeddbdd13 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 23 Mar 2011 17:26:34 +0800 Subject: [PATCH 0006/3380] ACPI: osl, add acpi_os_create_lock interface Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/osl.c | 33 +++++++++++++++++++++++++-------- include/acpi/acpiosxf.h | 3 +++ 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index c90c76aa7f8b..cf750a7a9523 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -883,14 +883,6 @@ void acpi_os_wait_events_complete(void *context) EXPORT_SYMBOL(acpi_os_wait_events_complete); -/* - * Deallocate the memory for a spinlock. - */ -void acpi_os_delete_lock(acpi_spinlock handle) -{ - return; -} - acpi_status acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle) { @@ -1321,6 +1313,31 @@ int acpi_resources_are_enforced(void) } EXPORT_SYMBOL(acpi_resources_are_enforced); +/* + * Create and initialize a spinlock. + */ +acpi_status +acpi_os_create_lock(acpi_spinlock *out_handle) +{ + spinlock_t *lock; + + lock = ACPI_ALLOCATE(sizeof(spinlock_t)); + if (!lock) + return AE_NO_MEMORY; + spin_lock_init(lock); + *out_handle = lock; + + return AE_OK; +} + +/* + * Deallocate the memory for a spinlock. + */ +void acpi_os_delete_lock(acpi_spinlock handle) +{ + ACPI_FREE(handle); +} + /* * Acquire a spinlock. * diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index a3252a5ead66..a756bc8d866d 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -98,6 +98,9 @@ acpi_os_table_override(struct acpi_table_header *existing_table, /* * Spinlock primitives */ +acpi_status +acpi_os_create_lock(acpi_spinlock *out_handle); + void acpi_os_delete_lock(acpi_spinlock handle); acpi_cpu_flags acpi_os_acquire_lock(acpi_spinlock handle); From 3854c8e32f46ffa6ee0bf2eb01137f5a48b2754f Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 23 Mar 2011 17:26:35 +0800 Subject: [PATCH 0007/3380] ACPICA: Use acpi_os_create_lock interface Replace spin_lock_init with acpi_os_create_lock. Signed-off-by: Lin Ming Signed-off-by: Len Brown --- drivers/acpi/acpica/acglobal.h | 9 +++------ drivers/acpi/acpica/utmutex.c | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index d69750b83b36..6d512fcabdbe 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -226,12 +226,9 @@ ACPI_EXTERN u8 acpi_gbl_global_lock_present; * Spinlocks are used for interfaces that can be possibly called at * interrupt level */ -ACPI_EXTERN spinlock_t _acpi_gbl_gpe_lock; /* For GPE data structs and registers */ -ACPI_EXTERN spinlock_t _acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ -ACPI_EXTERN spinlock_t _acpi_ev_global_lock_pending_lock; /* For global lock */ -#define acpi_gbl_gpe_lock &_acpi_gbl_gpe_lock -#define acpi_gbl_hardware_lock &_acpi_gbl_hardware_lock -#define acpi_ev_global_lock_pending_lock &_acpi_ev_global_lock_pending_lock +ACPI_EXTERN acpi_spinlock acpi_gbl_gpe_lock; /* For GPE data structs and registers */ +ACPI_EXTERN acpi_spinlock acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ +ACPI_EXTERN acpi_spinlock acpi_ev_global_lock_pending_lock; /* For global lock */ /***************************************************************************** * diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c index a946c689f03b..519d4ee9b45a 100644 --- a/drivers/acpi/acpica/utmutex.c +++ b/drivers/acpi/acpica/utmutex.c @@ -83,9 +83,20 @@ acpi_status acpi_ut_mutex_initialize(void) /* Create the spinlocks for use at interrupt level */ - spin_lock_init(acpi_gbl_gpe_lock); - spin_lock_init(acpi_gbl_hardware_lock); - spin_lock_init(acpi_ev_global_lock_pending_lock); + status = acpi_os_create_lock (&acpi_gbl_gpe_lock); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } + + status = acpi_os_create_lock (&acpi_gbl_hardware_lock); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } + + status = acpi_os_create_lock (&acpi_ev_global_lock_pending_lock); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } /* Mutex for _OSI support */ status = acpi_os_create_mutex(&acpi_gbl_osi_mutex); From 749c27639b95c5c4a8185e02a4efb189034944ed Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 23 Mar 2011 17:26:36 +0800 Subject: [PATCH 0008/3380] ACPICA: Fix code divergence of global lock handling Commit 9cd0314(ACPI / ACPICA: Fix global lock acquisition) was backported into ACPICA code base, and some divergence was introduced. This patch fixed it, - rename acpi_ev_global_lock_pending/acpi_ev_global_lock_pending_lock to acpi_gbl_global_lock_pending/acpi_gbl_global_lock_pending_lock. - move the initialization of acpi_gbl_global_lock_pending_lock from acpi_ut_mutex_initialize to acpi_ev_init_global_lock_handler. Signed-off-by: Lin Ming Reviewed-by: Rafael J. Wysocki Signed-off-by: Len Brown --- drivers/acpi/acpica/acglobal.h | 6 ++- drivers/acpi/acpica/evmisc.c | 74 ++++++++++++++++++---------------- drivers/acpi/acpica/utmutex.c | 5 --- 3 files changed, 44 insertions(+), 41 deletions(-) diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 6d512fcabdbe..73863d86f022 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -214,13 +214,16 @@ ACPI_EXTERN struct acpi_mutex_info acpi_gbl_mutex_info[ACPI_NUM_MUTEX]; /* * Global lock mutex is an actual AML mutex object - * Global lock semaphore works in conjunction with the HW global lock + * Global lock semaphore works in conjunction with the actual global lock + * Global lock spinlock is used for "pending" handshake */ ACPI_EXTERN union acpi_operand_object *acpi_gbl_global_lock_mutex; ACPI_EXTERN acpi_semaphore acpi_gbl_global_lock_semaphore; +ACPI_EXTERN acpi_spinlock acpi_gbl_global_lock_pending_lock; ACPI_EXTERN u16 acpi_gbl_global_lock_handle; ACPI_EXTERN u8 acpi_gbl_global_lock_acquired; ACPI_EXTERN u8 acpi_gbl_global_lock_present; +ACPI_EXTERN u8 acpi_gbl_global_lock_pending; /* * Spinlocks are used for interfaces that can be possibly called at @@ -228,7 +231,6 @@ ACPI_EXTERN u8 acpi_gbl_global_lock_present; */ ACPI_EXTERN acpi_spinlock acpi_gbl_gpe_lock; /* For GPE data structs and registers */ ACPI_EXTERN acpi_spinlock acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ -ACPI_EXTERN acpi_spinlock acpi_ev_global_lock_pending_lock; /* For global lock */ /***************************************************************************** * diff --git a/drivers/acpi/acpica/evmisc.c b/drivers/acpi/acpica/evmisc.c index 7dc80946f7bd..69a3b4aa862b 100644 --- a/drivers/acpi/acpica/evmisc.c +++ b/drivers/acpi/acpica/evmisc.c @@ -284,39 +284,41 @@ static void ACPI_SYSTEM_XFACE acpi_ev_notify_dispatch(void *context) * RETURN: ACPI_INTERRUPT_HANDLED * * DESCRIPTION: Invoked directly from the SCI handler when a global lock - * release interrupt occurs. If there's a thread waiting for - * the global lock, signal it. - * - * NOTE: Assumes that the semaphore can be signaled from interrupt level. If - * this is not possible for some reason, a separate thread will have to be - * scheduled to do this. + * release interrupt occurs. If there is actually a pending + * request for the lock, signal the waiting thread. * ******************************************************************************/ -static u8 acpi_ev_global_lock_pending; static u32 acpi_ev_global_lock_handler(void *context) { acpi_status status; acpi_cpu_flags flags; - flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock); + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); - if (!acpi_ev_global_lock_pending) { - goto out; + /* + * If a request for the global lock is not actually pending, + * we are done. This handles "spurious" global lock interrupts + * which are possible (and have been seen) with bad BIOSs. + */ + if (!acpi_gbl_global_lock_pending) { + goto cleanup_and_exit; } - /* Send a unit to the semaphore */ - + /* + * Send a unit to the global lock semaphore. The actual acquisition + * of the global lock will be performed by the waiting thread. + */ status = acpi_os_signal_semaphore(acpi_gbl_global_lock_semaphore, 1); if (ACPI_FAILURE(status)) { ACPI_ERROR((AE_INFO, "Could not signal Global Lock semaphore")); } - acpi_ev_global_lock_pending = FALSE; + acpi_gbl_global_lock_pending = FALSE; - out: - acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags); +cleanup_and_exit: + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); return (ACPI_INTERRUPT_HANDLED); } @@ -350,14 +352,20 @@ acpi_status acpi_ev_init_global_lock_handler(void) * Map to AE_OK, but mark global lock as not present. Any attempt to * actually use the global lock will be flagged with an error. */ + acpi_gbl_global_lock_present = FALSE; if (status == AE_NO_HARDWARE_RESPONSE) { ACPI_ERROR((AE_INFO, "No response from Global Lock hardware, disabling lock")); - acpi_gbl_global_lock_present = FALSE; return_ACPI_STATUS(AE_OK); } + status = acpi_os_create_lock(&acpi_gbl_global_lock_pending_lock); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + acpi_gbl_global_lock_pending = FALSE; acpi_gbl_global_lock_present = TRUE; return_ACPI_STATUS(status); } @@ -414,7 +422,7 @@ static int acpi_ev_global_lock_acquired; acpi_status acpi_ev_acquire_global_lock(u16 timeout) { acpi_cpu_flags flags; - acpi_status status = AE_OK; + acpi_status status; u8 acquired = FALSE; ACPI_FUNCTION_TRACE(ev_acquire_global_lock); @@ -458,15 +466,15 @@ acpi_status acpi_ev_acquire_global_lock(u16 timeout) } /* - * Make sure that a global lock actually exists. If not, just treat the - * lock as a standard mutex. + * Make sure that a global lock actually exists. If not, just + * treat the lock as a standard mutex. */ if (!acpi_gbl_global_lock_present) { acpi_gbl_global_lock_acquired = TRUE; return_ACPI_STATUS(AE_OK); } - flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock); + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); do { @@ -475,20 +483,19 @@ acpi_status acpi_ev_acquire_global_lock(u16 timeout) ACPI_ACQUIRE_GLOBAL_LOCK(acpi_gbl_FACS, acquired); if (acquired) { acpi_gbl_global_lock_acquired = TRUE; - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Acquired hardware Global Lock\n")); break; } - acpi_ev_global_lock_pending = TRUE; - - acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags); - /* - * Did not get the lock. The pending bit was set above, and we - * must wait until we get the global lock released interrupt. + * Did not get the lock. The pending bit was set above, and + * we must now wait until we receive the global lock + * released interrupt. */ + acpi_gbl_global_lock_pending = TRUE; + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Waiting for hardware Global Lock\n")); @@ -496,17 +503,16 @@ acpi_status acpi_ev_acquire_global_lock(u16 timeout) * Wait for handshake with the global lock interrupt handler. * This interface releases the interpreter if we must wait. */ - status = acpi_ex_system_wait_semaphore( - acpi_gbl_global_lock_semaphore, - ACPI_WAIT_FOREVER); + status = + acpi_ex_system_wait_semaphore + (acpi_gbl_global_lock_semaphore, ACPI_WAIT_FOREVER); - flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock); + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); } while (ACPI_SUCCESS(status)); - acpi_ev_global_lock_pending = FALSE; - - acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags); + acpi_gbl_global_lock_pending = FALSE; + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); return_ACPI_STATUS(status); } diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c index 519d4ee9b45a..7d797e2baecd 100644 --- a/drivers/acpi/acpica/utmutex.c +++ b/drivers/acpi/acpica/utmutex.c @@ -93,11 +93,6 @@ acpi_status acpi_ut_mutex_initialize(void) return_ACPI_STATUS (status); } - status = acpi_os_create_lock (&acpi_ev_global_lock_pending_lock); - if (ACPI_FAILURE (status)) { - return_ACPI_STATUS (status); - } - /* Mutex for _OSI support */ status = acpi_os_create_mutex(&acpi_gbl_osi_mutex); if (ACPI_FAILURE(status)) { From 4bcad6c1ef53a9a0224f4654ceb3b9030d0769ec Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Thu, 24 Mar 2011 13:56:47 +0000 Subject: [PATCH 0009/3380] dlm: Remove superfluous call to recalc_sigpending() recalc_sigpending() is called within sigprocmask(), so there is no need call it again after sigprocmask() has returned. Signed-off-by: Matt Fleming Signed-off-by: David Teigland --- fs/dlm/user.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/dlm/user.c b/fs/dlm/user.c index d5ab3fe7c198..e96bf3e9be88 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -611,7 +611,6 @@ static ssize_t device_write(struct file *file, const char __user *buf, out_sig: sigprocmask(SIG_SETMASK, &tmpsig, NULL); - recalc_sigpending(); out_free: kfree(kbuf); return error; From 787e5b06a80e7fc9dc02d9b53a9d8d2ac63b7ace Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Wed, 23 Mar 2011 08:23:52 +0100 Subject: [PATCH 0010/3380] percpu: Cast away printk format warning On 32-bit systems which don't happen to implicitly define or cast VMALLOC_START and/or VMALLOC_END to long in their arch headers, the printk in the percpu code will cause a warning to be emitted: mm/percpu.c: In function 'pcpu_embed_first_chunk': mm/percpu.c:1648: warning: format '%lx' expects type 'long unsigned int', but argument 3 has type 'unsigned int' So add an explicit cast to unsigned long here. Signed-off-by: Mike Frysinger Signed-off-by: Tejun Heo --- mm/percpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index 3f930018aa60..8a11cd2e976d 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1646,8 +1646,8 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, /* warn if maximum distance is further than 75% of vmalloc space */ if (max_distance > (VMALLOC_END - VMALLOC_START) * 3 / 4) { pr_warning("PERCPU: max_distance=0x%zx too large for vmalloc " - "space 0x%lx\n", - max_distance, VMALLOC_END - VMALLOC_START); + "space 0x%lx\n", max_distance, + (unsigned long)(VMALLOC_END - VMALLOC_START)); #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK /* and fail if we have fallback */ rc = -EINVAL; From 5f55924deaa62d6df687c131fb92aebe071ec787 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 28 Mar 2011 18:06:58 +0200 Subject: [PATCH 0011/3380] percpu: Avoid extra NOP in percpu_cmpxchg16b_double percpu_cmpxchg16b_double() uses alternative_io() and looks like : e8 .. .. .. .. call this_cpu_cmpxchg16b_emu X bytes NOPX or, once patched (if cpu supports native instruction) on SMP build : 65 48 0f c7 0e cmpxchg16b %gs:(%rsi) 0f 94 c0 sete %al on !SMP build : 48 0f c7 0e cmpxchg16b (%rsi) 0f 94 c0 sete %al Therefore, NOPX should be : P6_NOP3 on SMP P6_NOP2 on !SMP Signed-off-by: Eric Dumazet Acked-by: Christoph Lameter Cc: Ingo Molnar Cc: Pekka Enberg Signed-off-by: Tejun Heo --- arch/x86/include/asm/percpu.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index d475b4398d8b..d68fca61ad91 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -509,6 +509,11 @@ do { \ * it in software. The address used in the cmpxchg16 instruction must be * aligned to a 16 byte boundary. */ +#ifdef CONFIG_SMP +#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" P6_NOP3 +#else +#define CMPXCHG16B_EMU_CALL "call this_cpu_cmpxchg16b_emu\n\t" P6_NOP2 +#endif #define percpu_cmpxchg16b_double(pcp1, o1, o2, n1, n2) \ ({ \ char __ret; \ @@ -517,7 +522,7 @@ do { \ typeof(o2) __o2 = o2; \ typeof(o2) __n2 = n2; \ typeof(o2) __dummy; \ - alternative_io("call this_cpu_cmpxchg16b_emu\n\t" P6_NOP4, \ + alternative_io(CMPXCHG16B_EMU_CALL, \ "cmpxchg16b " __percpu_prefix "(%%rsi)\n\tsetz %0\n\t", \ X86_FEATURE_CX16, \ ASM_OUTPUT2("=a"(__ret), "=d"(__dummy)), \ From 8023976cf4627d9f1d82ad468ec40e32eb87d211 Mon Sep 17 00:00:00 2001 From: Harry Ciao Date: Fri, 25 Mar 2011 13:51:56 +0800 Subject: [PATCH 0012/3380] SELinux: Add class support to the role_trans structure If kernel policy version is >= 26, then the binary representation of the role_trans structure supports specifying the class for the current subject or the newly created object. If kernel policy version is < 26, then the class field would be default to the process class. Signed-off-by: Harry Ciao Acked-by: Stephen Smalley Signed-off-by: Eric Paris --- security/selinux/include/security.h | 3 ++- security/selinux/ss/policydb.c | 14 ++++++++++++++ security/selinux/ss/policydb.h | 3 ++- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 348eb00cb668..bfc5218d5840 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -30,13 +30,14 @@ #define POLICYDB_VERSION_PERMISSIVE 23 #define POLICYDB_VERSION_BOUNDARY 24 #define POLICYDB_VERSION_FILENAME_TRANS 25 +#define POLICYDB_VERSION_ROLETRANS 26 /* Range of policy versions we understand*/ #define POLICYDB_VERSION_MIN POLICYDB_VERSION_BASE #ifdef CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX #define POLICYDB_VERSION_MAX CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX_VALUE #else -#define POLICYDB_VERSION_MAX POLICYDB_VERSION_FILENAME_TRANS +#define POLICYDB_VERSION_MAX POLICYDB_VERSION_ROLETRANS #endif /* Mask for just the mount related flags */ diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index e7b850ad57ee..fd62c50d6e7d 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -128,6 +128,11 @@ static struct policydb_compat_info policydb_compat[] = { .sym_num = SYM_NUM, .ocon_num = OCON_NUM, }, + { + .version = POLICYDB_VERSION_ROLETRANS, + .sym_num = SYM_NUM, + .ocon_num = OCON_NUM, + }, }; static struct policydb_compat_info *policydb_lookup_compat(int version) @@ -2302,8 +2307,17 @@ int policydb_read(struct policydb *p, void *fp) tr->role = le32_to_cpu(buf[0]); tr->type = le32_to_cpu(buf[1]); tr->new_role = le32_to_cpu(buf[2]); + if (p->policyvers >= POLICYDB_VERSION_ROLETRANS) { + rc = next_entry(buf, fp, sizeof(u32)); + if (rc) + goto bad; + tr->tclass = le32_to_cpu(buf[0]); + } else + tr->tclass = p->process_class; + if (!policydb_role_isvalid(p, tr->role) || !policydb_type_isvalid(p, tr->type) || + !policydb_class_isvalid(p, tr->tclass) || !policydb_role_isvalid(p, tr->new_role)) goto bad; ltr = tr; diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h index 732ea4a68682..801175f79cf9 100644 --- a/security/selinux/ss/policydb.h +++ b/security/selinux/ss/policydb.h @@ -72,7 +72,8 @@ struct role_datum { struct role_trans { u32 role; /* current role */ - u32 type; /* program executable type */ + u32 type; /* program executable type, or new object type */ + u32 tclass; /* process class, or new object class */ u32 new_role; /* new role */ struct role_trans *next; }; From 63a312ca55d09a3f6526919df495fff1073c88f4 Mon Sep 17 00:00:00 2001 From: Harry Ciao Date: Fri, 25 Mar 2011 13:51:58 +0800 Subject: [PATCH 0013/3380] SELinux: Compute role in newcontext for all classes Apply role_transition rules for all kinds of classes. Signed-off-by: Harry Ciao Acked-by: Stephen Smalley Signed-off-by: Eric Paris --- security/selinux/ss/services.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 3e7544d2a07b..03f7a4748ee8 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1484,17 +1484,15 @@ static int security_compute_sid(u32 ssid, tcontext->type, tclass, qstr); /* Check for class-specific changes. */ - if (tclass == policydb.process_class) { - if (specified & AVTAB_TRANSITION) { - /* Look for a role transition rule. */ - for (roletr = policydb.role_tr; roletr; - roletr = roletr->next) { - if (roletr->role == scontext->role && - roletr->type == tcontext->type) { - /* Use the role transition rule. */ - newcontext.role = roletr->new_role; - break; - } + if (specified & AVTAB_TRANSITION) { + /* Look for a role transition rule. */ + for (roletr = policydb.role_tr; roletr; roletr = roletr->next) { + if ((roletr->role == scontext->role) && + (roletr->type == tcontext->type) && + (roletr->tclass == tclass)) { + /* Use the role transition rule. */ + newcontext.role = roletr->new_role; + break; } } } From c900ff323d761753a56d8d6a67b034ceee277b6e Mon Sep 17 00:00:00 2001 From: Harry Ciao Date: Fri, 25 Mar 2011 13:52:00 +0800 Subject: [PATCH 0014/3380] SELinux: Write class field in role_trans_write. If kernel policy version is >= 26, then write the class field of the role_trans structure into the binary reprensentation. Signed-off-by: Harry Ciao Acked-by: Stephen Smalley Signed-off-by: Eric Paris --- security/selinux/ss/policydb.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index fd62c50d6e7d..a493eae24e0a 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -2535,8 +2535,9 @@ static int cat_write(void *vkey, void *datum, void *ptr) return 0; } -static int role_trans_write(struct role_trans *r, void *fp) +static int role_trans_write(struct policydb *p, void *fp) { + struct role_trans *r = p->role_tr; struct role_trans *tr; u32 buf[3]; size_t nel; @@ -2556,6 +2557,12 @@ static int role_trans_write(struct role_trans *r, void *fp) rc = put_entry(buf, sizeof(u32), 3, fp); if (rc) return rc; + if (p->policyvers >= POLICYDB_VERSION_ROLETRANS) { + buf[0] = cpu_to_le32(tr->tclass); + rc = put_entry(buf, sizeof(u32), 1, fp); + if (rc) + return rc; + } } return 0; @@ -3267,7 +3274,7 @@ int policydb_write(struct policydb *p, void *fp) if (rc) return rc; - rc = role_trans_write(p->role_tr, fp); + rc = role_trans_write(p, fp); if (rc) return rc; From efb3bb4fad062f8e9b8c9c945d499597e14007e7 Mon Sep 17 00:00:00 2001 From: James Morris Date: Wed, 30 Mar 2011 08:50:41 +1100 Subject: [PATCH 0015/3380] Merge branch 'master'; commit 'v2.6.39-rc1' into next From cfc64fd91fabed099a4c3df58559f4b7efe9bcce Mon Sep 17 00:00:00 2001 From: Xiaochen Wang Date: Thu, 31 Mar 2011 00:27:32 +0900 Subject: [PATCH 0016/3380] tomoyo: fix memory leak in tomoyo_commit_ok() When memory used for policy exceeds the quota, tomoyo_memory_ok() return false. In this case, tomoyo_commit_ok() must call kfree() before returning NULL. This bug exists since 2.6.35. Signed-off-by: Xiaochen Wang Acked-by: Tetsuo Handa Signed-off-by: James Morris --- security/tomoyo/memory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/security/tomoyo/memory.c b/security/tomoyo/memory.c index 297612669c74..42a7b1ba8cbf 100644 --- a/security/tomoyo/memory.c +++ b/security/tomoyo/memory.c @@ -75,6 +75,7 @@ void *tomoyo_commit_ok(void *data, const unsigned int size) memset(data, 0, size); return ptr; } + kfree(ptr); return NULL; } From de6efe0a966cf86b3c4039a610b2d4157db707f2 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Wed, 30 Mar 2011 23:09:52 +0800 Subject: [PATCH 0017/3380] spi/dw_spi: unify the low level read/write routines The original version has many duplicated codes for null/u8/u16 case, so unify them to make it cleaner Signed-off-by: Feng Tang Signed-off-by: Grant Likely --- drivers/spi/dw_spi.c | 105 +++++++++++-------------------------------- drivers/spi/dw_spi.h | 2 - 2 files changed, 27 insertions(+), 80 deletions(-) diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c index 9a6196461b27..c4fca3d9d45f 100644 --- a/drivers/spi/dw_spi.c +++ b/drivers/spi/dw_spi.c @@ -58,8 +58,6 @@ struct chip_data { u8 bits_per_word; u16 clk_div; /* baud rate divider */ u32 speed_hz; /* baud rate */ - int (*write)(struct dw_spi *dws); - int (*read)(struct dw_spi *dws); void (*cs_control)(u32 command); }; @@ -185,80 +183,45 @@ static void flush(struct dw_spi *dws) wait_till_not_busy(dws); } -static int null_writer(struct dw_spi *dws) + +static int dw_writer(struct dw_spi *dws) { - u8 n_bytes = dws->n_bytes; + u16 txw = 0; if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL) || (dws->tx == dws->tx_end)) return 0; - dw_writew(dws, dr, 0); - dws->tx += n_bytes; - wait_till_not_busy(dws); - return 1; -} - -static int null_reader(struct dw_spi *dws) -{ - u8 n_bytes = dws->n_bytes; - - while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT) - && (dws->rx < dws->rx_end)) { - dw_readw(dws, dr); - dws->rx += n_bytes; - } - wait_till_not_busy(dws); - return dws->rx == dws->rx_end; -} - -static int u8_writer(struct dw_spi *dws) -{ - if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL) - || (dws->tx == dws->tx_end)) - return 0; - - dw_writew(dws, dr, *(u8 *)(dws->tx)); - ++dws->tx; - - wait_till_not_busy(dws); - return 1; -} - -static int u8_reader(struct dw_spi *dws) -{ - while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT) - && (dws->rx < dws->rx_end)) { - *(u8 *)(dws->rx) = dw_readw(dws, dr); - ++dws->rx; + /* Set the tx word if the transfer's original "tx" is not null */ + if (dws->tx_end - dws->len) { + if (dws->n_bytes == 1) + txw = *(u8 *)(dws->tx); + else + txw = *(u16 *)(dws->tx); } - wait_till_not_busy(dws); - return dws->rx == dws->rx_end; -} - -static int u16_writer(struct dw_spi *dws) -{ - if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL) - || (dws->tx == dws->tx_end)) - return 0; - - dw_writew(dws, dr, *(u16 *)(dws->tx)); - dws->tx += 2; + dw_writew(dws, dr, txw); + dws->tx += dws->n_bytes; wait_till_not_busy(dws); return 1; } -static int u16_reader(struct dw_spi *dws) +static int dw_reader(struct dw_spi *dws) { - u16 temp; + u16 rxw; while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT) && (dws->rx < dws->rx_end)) { - temp = dw_readw(dws, dr); - *(u16 *)(dws->rx) = temp; - dws->rx += 2; + rxw = dw_readw(dws, dr); + /* Care rx only if the transfer's original "rx" is not null */ + if (dws->rx_end - dws->len) { + if (dws->n_bytes == 1) + *(u8 *)(dws->rx) = rxw; + else + *(u16 *)(dws->rx) = rxw; + } + dws->rx += dws->n_bytes; } wait_till_not_busy(dws); @@ -383,8 +346,8 @@ static irqreturn_t interrupt_transfer(struct dw_spi *dws) left = (left > int_level) ? int_level : left; while (left--) - dws->write(dws); - dws->read(dws); + dw_writer(dws); + dw_reader(dws); /* Re-enable the IRQ if there is still data left to tx */ if (dws->tx_end > dws->tx) @@ -417,13 +380,13 @@ static irqreturn_t dw_spi_irq(int irq, void *dev_id) /* Must be called inside pump_transfers() */ static void poll_transfer(struct dw_spi *dws) { - while (dws->write(dws)) - dws->read(dws); + while (dw_writer(dws)) + dw_reader(dws); /* * There is a possibility that the last word of a transaction * will be lost if data is not ready. Re-read to solve this issue. */ - dws->read(dws); + dw_reader(dws); dw_spi_xfer_done(dws); } @@ -483,8 +446,6 @@ static void pump_transfers(unsigned long data) dws->tx_end = dws->tx + transfer->len; dws->rx = transfer->rx_buf; dws->rx_end = dws->rx + transfer->len; - dws->write = dws->tx ? chip->write : null_writer; - dws->read = dws->rx ? chip->read : null_reader; dws->cs_change = transfer->cs_change; dws->len = dws->cur_transfer->len; if (chip != dws->prev_chip) @@ -520,18 +481,10 @@ static void pump_transfers(unsigned long data) case 8: dws->n_bytes = 1; dws->dma_width = 1; - dws->read = (dws->read != null_reader) ? - u8_reader : null_reader; - dws->write = (dws->write != null_writer) ? - u8_writer : null_writer; break; case 16: dws->n_bytes = 2; dws->dma_width = 2; - dws->read = (dws->read != null_reader) ? - u16_reader : null_reader; - dws->write = (dws->write != null_writer) ? - u16_writer : null_writer; break; default: printk(KERN_ERR "MRST SPI0: unsupported bits:" @@ -733,13 +686,9 @@ static int dw_spi_setup(struct spi_device *spi) if (spi->bits_per_word <= 8) { chip->n_bytes = 1; chip->dma_width = 1; - chip->read = u8_reader; - chip->write = u8_writer; } else if (spi->bits_per_word <= 16) { chip->n_bytes = 2; chip->dma_width = 2; - chip->read = u16_reader; - chip->write = u16_writer; } else { /* Never take >16b case for MRST SPIC */ dev_err(&spi->dev, "invalid wordsize\n"); diff --git a/drivers/spi/dw_spi.h b/drivers/spi/dw_spi.h index fb0bce564844..d8aac1f4c546 100644 --- a/drivers/spi/dw_spi.h +++ b/drivers/spi/dw_spi.h @@ -137,8 +137,6 @@ struct dw_spi { u8 max_bits_per_word; /* maxim is 16b */ u32 dma_width; int cs_change; - int (*write)(struct dw_spi *dws); - int (*read)(struct dw_spi *dws); irqreturn_t (*transfer_handler)(struct dw_spi *dws); void (*cs_control)(u32 command); From 8a33a373e5ffb6040c58ff41ea48ba21d5f8b5e9 Mon Sep 17 00:00:00 2001 From: Alek Du Date: Wed, 30 Mar 2011 23:09:53 +0800 Subject: [PATCH 0018/3380] spi/dw_spi: remove the un-necessary flush() The flush() is used to drain all the left data in rx fifo, currently is is always called together with disabling hw. But from spec, disabling hw will also reset all the fifo, so flush() is not needed. Signed-off-by: Alek Du Signed-off-by: Feng Tang Signed-off-by: Grant Likely --- drivers/spi/dw_spi.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c index c4fca3d9d45f..d3aaf8db86cc 100644 --- a/drivers/spi/dw_spi.c +++ b/drivers/spi/dw_spi.c @@ -173,17 +173,6 @@ static void wait_till_not_busy(struct dw_spi *dws) "DW SPI: Status keeps busy for 5000us after a read/write!\n"); } -static void flush(struct dw_spi *dws) -{ - while (dw_readw(dws, sr) & SR_RF_NOT_EMPT) { - dw_readw(dws, dr); - cpu_relax(); - } - - wait_till_not_busy(dws); -} - - static int dw_writer(struct dw_spi *dws) { u16 txw = 0; @@ -297,8 +286,7 @@ static void giveback(struct dw_spi *dws) static void int_error_stop(struct dw_spi *dws, const char *msg) { - /* Stop and reset hw */ - flush(dws); + /* Stop the hw */ spi_enable_chip(dws, 0); dev_err(&dws->master->dev, "%s\n", msg); @@ -800,7 +788,6 @@ static void spi_hw_init(struct dw_spi *dws) spi_enable_chip(dws, 0); spi_mask_intr(dws, 0xff); spi_enable_chip(dws, 1); - flush(dws); /* * Try to detect the FIFO depth if not set by interface driver, From 2ff271bf6505038d8c937e73438ea3c80c387439 Mon Sep 17 00:00:00 2001 From: Alek Du Date: Wed, 30 Mar 2011 23:09:54 +0800 Subject: [PATCH 0019/3380] spi/dw_spi: change poll mode transfer from byte ops to batch ops Current poll transfer will read/write one word, then wait till the hw is non-busy, it's not efficient. This patch will try to read/write as many words as permitted by hardware FIFO depth. Signed-off-by: Alek Du Signed-off-by: Feng Tang Signed-off-by: Grant Likely --- drivers/spi/dw_spi.c | 71 ++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c index d3aaf8db86cc..7a2a72268f0a 100644 --- a/drivers/spi/dw_spi.c +++ b/drivers/spi/dw_spi.c @@ -160,6 +160,37 @@ static inline void mrst_spi_debugfs_remove(struct dw_spi *dws) } #endif /* CONFIG_DEBUG_FS */ +/* Return the max entries we can fill into tx fifo */ +static inline u32 tx_max(struct dw_spi *dws) +{ + u32 tx_left, tx_room, rxtx_gap; + + tx_left = (dws->tx_end - dws->tx) / dws->n_bytes; + tx_room = dws->fifo_len - dw_readw(dws, txflr); + + /* + * Another concern is about the tx/rx mismatch, we + * though to use (dws->fifo_len - rxflr - txflr) as + * one maximum value for tx, but it doesn't cover the + * data which is out of tx/rx fifo and inside the + * shift registers. So a control from sw point of + * view is taken. + */ + rxtx_gap = ((dws->rx_end - dws->rx) - (dws->tx_end - dws->tx)) + / dws->n_bytes; + + return min3(tx_left, tx_room, (u32) (dws->fifo_len - rxtx_gap)); +} + +/* Return the max entries we should read out of rx fifo */ +static inline u32 rx_max(struct dw_spi *dws) +{ + u32 rx_left = (dws->rx_end - dws->rx) / dws->n_bytes; + + return min(rx_left, (u32)dw_readw(dws, rxflr)); +} + + static void wait_till_not_busy(struct dw_spi *dws) { unsigned long end = jiffies + 1 + usecs_to_jiffies(5000); @@ -175,33 +206,30 @@ static void wait_till_not_busy(struct dw_spi *dws) static int dw_writer(struct dw_spi *dws) { + u32 max = tx_max(dws); u16 txw = 0; - if (!(dw_readw(dws, sr) & SR_TF_NOT_FULL) - || (dws->tx == dws->tx_end)) - return 0; - - /* Set the tx word if the transfer's original "tx" is not null */ - if (dws->tx_end - dws->len) { - if (dws->n_bytes == 1) - txw = *(u8 *)(dws->tx); - else - txw = *(u16 *)(dws->tx); + while (max--) { + /* Set the tx word if the transfer's original "tx" is not null */ + if (dws->tx_end - dws->len) { + if (dws->n_bytes == 1) + txw = *(u8 *)(dws->tx); + else + txw = *(u16 *)(dws->tx); + } + dw_writew(dws, dr, txw); + dws->tx += dws->n_bytes; } - dw_writew(dws, dr, txw); - dws->tx += dws->n_bytes; - - wait_till_not_busy(dws); return 1; } static int dw_reader(struct dw_spi *dws) { + u32 max = rx_max(dws); u16 rxw; - while ((dw_readw(dws, sr) & SR_RF_NOT_EMPT) - && (dws->rx < dws->rx_end)) { + while (max--) { rxw = dw_readw(dws, dr); /* Care rx only if the transfer's original "rx" is not null */ if (dws->rx_end - dws->len) { @@ -213,7 +241,6 @@ static int dw_reader(struct dw_spi *dws) dws->rx += dws->n_bytes; } - wait_till_not_busy(dws); return dws->rx == dws->rx_end; } @@ -368,13 +395,11 @@ static irqreturn_t dw_spi_irq(int irq, void *dev_id) /* Must be called inside pump_transfers() */ static void poll_transfer(struct dw_spi *dws) { - while (dw_writer(dws)) + do { + dw_writer(dws); dw_reader(dws); - /* - * There is a possibility that the last word of a transaction - * will be lost if data is not ready. Re-read to solve this issue. - */ - dw_reader(dws); + cpu_relax(); + } while (dws->rx_end > dws->rx); dw_spi_xfer_done(dws); } From 3b8a4dd3ebfcc647260ad5c39ef4f73eb3a6b155 Mon Sep 17 00:00:00 2001 From: Alek Du Date: Wed, 30 Mar 2011 23:09:55 +0800 Subject: [PATCH 0020/3380] spi/dw_spi: improve the interrupt mode with the batch ops leverage the performance gain by change in low level read/write batch operations Signed-off-by: Alek Du Signed-off-by: Feng Tang Signed-off-by: Grant Likely --- drivers/spi/dw_spi.c | 63 +++++++++++--------------------------------- 1 file changed, 16 insertions(+), 47 deletions(-) diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c index 7a2a72268f0a..855ac4ae0f22 100644 --- a/drivers/spi/dw_spi.c +++ b/drivers/spi/dw_spi.c @@ -190,21 +190,7 @@ static inline u32 rx_max(struct dw_spi *dws) return min(rx_left, (u32)dw_readw(dws, rxflr)); } - -static void wait_till_not_busy(struct dw_spi *dws) -{ - unsigned long end = jiffies + 1 + usecs_to_jiffies(5000); - - while (time_before(jiffies, end)) { - if (!(dw_readw(dws, sr) & SR_BUSY)) - return; - cpu_relax(); - } - dev_err(&dws->master->dev, - "DW SPI: Status keeps busy for 5000us after a read/write!\n"); -} - -static int dw_writer(struct dw_spi *dws) +static void dw_writer(struct dw_spi *dws) { u32 max = tx_max(dws); u16 txw = 0; @@ -220,11 +206,9 @@ static int dw_writer(struct dw_spi *dws) dw_writew(dws, dr, txw); dws->tx += dws->n_bytes; } - - return 1; } -static int dw_reader(struct dw_spi *dws) +static void dw_reader(struct dw_spi *dws) { u32 max = rx_max(dws); u16 rxw; @@ -240,8 +224,6 @@ static int dw_reader(struct dw_spi *dws) } dws->rx += dws->n_bytes; } - - return dws->rx == dws->rx_end; } static void *next_transfer(struct dw_spi *dws) @@ -340,35 +322,28 @@ EXPORT_SYMBOL_GPL(dw_spi_xfer_done); static irqreturn_t interrupt_transfer(struct dw_spi *dws) { - u16 irq_status, irq_mask = 0x3f; - u32 int_level = dws->fifo_len / 2; - u32 left; + u16 irq_status = dw_readw(dws, isr); - irq_status = dw_readw(dws, isr) & irq_mask; /* Error handling */ if (irq_status & (SPI_INT_TXOI | SPI_INT_RXOI | SPI_INT_RXUI)) { dw_readw(dws, txoicr); dw_readw(dws, rxoicr); dw_readw(dws, rxuicr); - int_error_stop(dws, "interrupt_transfer: fifo overrun"); + int_error_stop(dws, "interrupt_transfer: fifo overrun/underrun"); return IRQ_HANDLED; } + dw_reader(dws); + if (dws->rx_end == dws->rx) { + spi_mask_intr(dws, SPI_INT_TXEI); + dw_spi_xfer_done(dws); + return IRQ_HANDLED; + } if (irq_status & SPI_INT_TXEI) { spi_mask_intr(dws, SPI_INT_TXEI); - - left = (dws->tx_end - dws->tx) / dws->n_bytes; - left = (left > int_level) ? int_level : left; - - while (left--) - dw_writer(dws); - dw_reader(dws); - - /* Re-enable the IRQ if there is still data left to tx */ - if (dws->tx_end > dws->tx) - spi_umask_intr(dws, SPI_INT_TXEI); - else - dw_spi_xfer_done(dws); + dw_writer(dws); + /* Enable TX irq always, it will be disabled when RX finished */ + spi_umask_intr(dws, SPI_INT_TXEI); } return IRQ_HANDLED; @@ -377,15 +352,13 @@ static irqreturn_t interrupt_transfer(struct dw_spi *dws) static irqreturn_t dw_spi_irq(int irq, void *dev_id) { struct dw_spi *dws = dev_id; - u16 irq_status, irq_mask = 0x3f; + u16 irq_status = dw_readw(dws, isr) & 0x3f; - irq_status = dw_readw(dws, isr) & irq_mask; if (!irq_status) return IRQ_NONE; if (!dws->cur_msg) { spi_mask_intr(dws, SPI_INT_TXEI); - /* Never fail */ return IRQ_HANDLED; } @@ -492,12 +465,8 @@ static void pump_transfers(unsigned long data) switch (bits) { case 8: - dws->n_bytes = 1; - dws->dma_width = 1; - break; case 16: - dws->n_bytes = 2; - dws->dma_width = 2; + dws->n_bytes = dws->dma_width = bits >> 3; break; default: printk(KERN_ERR "MRST SPI0: unsupported bits:" @@ -541,7 +510,7 @@ static void pump_transfers(unsigned long data) txint_level = dws->fifo_len / 2; txint_level = (templen > txint_level) ? txint_level : templen; - imask |= SPI_INT_TXEI; + imask |= SPI_INT_TXEI | SPI_INT_TXOI | SPI_INT_RXUI | SPI_INT_RXOI; dws->transfer_handler = interrupt_transfer; } From 84adccfb8cd2a6b8237da6752668ba25cd90c20b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 24 Mar 2011 11:32:15 +0530 Subject: [PATCH 0021/3380] dmaengine/dw_dmac fix: dwc_scan_descriptors must compare first desc address also with llp dwc_scan_descriptors scans all descriptors from active_list in case transfer is not completed. It compares first_desc->lli.llp, and then all childrens of its tx_list. But it doesn't compare its own address, i.e. first_desc->txd.phys, as this is what we have initially programmed into the controller register. So this causes dma to stop and finish a transfer, which was never started. And thus fail. Signed-off-by: Viresh Kumar Signed-off-by: Vinod Koul --- drivers/dma/dw_dmac.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 9c25c7d099e4..b15c32ca0efa 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -304,6 +304,11 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) dev_vdbg(chan2dev(&dwc->chan), "scan_descriptors: llp=0x%x\n", llp); list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) { + /* check first descriptors addr */ + if (desc->txd.phys == llp) + return; + + /* check first descriptors llp */ if (desc->lli.llp == llp) /* This one is currently in progress */ return; From e2ec771a99a5cf231c9dea4da26238bf073e1e9c Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 27 Mar 2011 01:26:52 +0800 Subject: [PATCH 0022/3380] dma: use BUG_ON correctly in iop-adma.c, v4 This patch makes BUG_ON() usage correct in drivers/dma/iop-adma.c. Cc: Dan Williams Cc: Vinod Koul Signed-off-by: Coly Li Signed-off-by: Vinod Koul --- drivers/dma/iop-adma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index c6b01f535b29..e03f811a83dd 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -619,7 +619,7 @@ iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); + BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT); dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", __func__, len); @@ -652,7 +652,7 @@ iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest, if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); + BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT); dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", __func__, len); @@ -686,7 +686,7 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u flags: %lx\n", From 7912d30007d0c958bcf11cd5ce19f77856cf041b Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 27 Mar 2011 01:26:53 +0800 Subject: [PATCH 0023/3380] dma: use BUG_ON correctly in mv_xor.c, v4 This patch makes BUG_ON() usage correct in drivers/dma/mv_xor.c Cc: Vinod Koul Cc: Dan Williams Signed-off-by: Coly Li Signed-off-by: Vinod Koul --- drivers/dma/mv_xor.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index a25f5f61e0e0..954e334e01bb 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -671,7 +671,7 @@ mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) return NULL; - BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); spin_lock_bh(&mv_chan->lock); slot_cnt = mv_chan_memcpy_slot_count(len); @@ -710,7 +710,7 @@ mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) return NULL; - BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); spin_lock_bh(&mv_chan->lock); slot_cnt = mv_chan_memset_slot_count(len); @@ -744,7 +744,7 @@ mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) return NULL; - BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); dev_dbg(mv_chan->device->common.dev, "%s src_cnt: %d len: dest %x %u flags: %ld\n", From 427cdf19b97e509e21e5d347e18d8b0b34723dfc Mon Sep 17 00:00:00 2001 From: Coly Li Date: Sun, 27 Mar 2011 01:26:54 +0800 Subject: [PATCH 0024/3380] dma: use BUG_ON correctly in ppc4xx/adam.c, v4 This patch makes BUG_ON() usage correct in drivers/dma/ppc4xx/adam.c Cc: Vinod Koul Cc: Dan Williams Cc: Grant Likely Cc: Anatolij Gustschin Cc: Sean MacLennan Cc: Joe Perches Signed-off-by: Coly Li Signed-off-by: Vinod Koul --- drivers/dma/ppc4xx/adma.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c index cef584533ee8..a2b62e93a143 100644 --- a/drivers/dma/ppc4xx/adma.c +++ b/drivers/dma/ppc4xx/adma.c @@ -2313,7 +2313,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memcpy( if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT); spin_lock_bh(&ppc440spe_chan->lock); @@ -2354,7 +2354,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memset( if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT); spin_lock_bh(&ppc440spe_chan->lock); @@ -2397,7 +2397,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor( dma_dest, dma_src, src_cnt)); if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT); dev_dbg(ppc440spe_chan->device->common.dev, "ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n", @@ -2887,7 +2887,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pq( ADMA_LL_DBG(prep_dma_pq_dbg(ppc440spe_chan->device->id, dst, src, src_cnt)); BUG_ON(!len); - BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT); BUG_ON(!src_cnt); if (src_cnt == 1 && dst[1] == src[0]) { From 1cb7b1e0de6a1f8f071f4a146e3d10f3a662f707 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Thu, 31 Mar 2011 13:36:38 +0200 Subject: [PATCH 0025/3380] ACPI EC: remove dead code static void acpi_ec_gpe_query(void *ec_cxt); -> The function is right above this declaration -> not needed. poll_force is also not used, cleaned up in ec.c and its users: compal-laptop and msi-laptop. Signed-off-by: Thomas Renninger Signed-off-by: Len Brown --- drivers/acpi/ec.c | 6 +----- drivers/platform/x86/compal-laptop.c | 12 ++++++------ drivers/platform/x86/msi-laptop.c | 12 ++++++------ include/linux/acpi.h | 3 +-- 4 files changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index fa848c4116a8..b3f1d6f52a89 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -69,7 +69,6 @@ enum ec_command { #define ACPI_EC_DELAY 500 /* Wait 500ms max. during EC ops */ #define ACPI_EC_UDELAY_GLK 1000 /* Wait 1ms max. to get global lock */ -#define ACPI_EC_CDELAY 10 /* Wait 10us before polling EC */ #define ACPI_EC_MSI_UDELAY 550 /* Wait 550us for MSI EC */ #define ACPI_EC_STORM_THRESHOLD 8 /* number of false interrupts @@ -433,8 +432,7 @@ EXPORT_SYMBOL(ec_write); int ec_transaction(u8 command, const u8 * wdata, unsigned wdata_len, - u8 * rdata, unsigned rdata_len, - int force_poll) + u8 * rdata, unsigned rdata_len) { struct transaction t = {.command = command, .wdata = wdata, .rdata = rdata, @@ -592,8 +590,6 @@ static void acpi_ec_gpe_query(void *ec_cxt) mutex_unlock(&ec->lock); } -static void acpi_ec_gpe_query(void *ec_cxt); - static int ec_check_sci(struct acpi_ec *ec, u8 state) { if (state & ACPI_EC_FLAG_SCI) { diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c index 034572b980c9..f4f43e65475f 100644 --- a/drivers/platform/x86/compal-laptop.c +++ b/drivers/platform/x86/compal-laptop.c @@ -200,7 +200,7 @@ static bool extra_features; * watching the output of address 0x4F (do an ec_transaction writing 0x33 * into 0x4F and read a few bytes from the output, like so: * u8 writeData = 0x33; - * ec_transaction(0x4F, &writeData, 1, buffer, 32, 0); + * ec_transaction(0x4F, &writeData, 1, buffer, 32); * That address is labled "fan1 table information" in the service manual. * It should be clear which value in 'buffer' changes). This seems to be * related to fan speed. It isn't a proper 'realtime' fan speed value @@ -286,7 +286,7 @@ static int get_backlight_level(void) static void set_backlight_state(bool on) { u8 data = on ? BACKLIGHT_STATE_ON_DATA : BACKLIGHT_STATE_OFF_DATA; - ec_transaction(BACKLIGHT_STATE_ADDR, &data, 1, NULL, 0, 0); + ec_transaction(BACKLIGHT_STATE_ADDR, &data, 1, NULL, 0); } @@ -294,24 +294,24 @@ static void set_backlight_state(bool on) static void pwm_enable_control(void) { unsigned char writeData = PWM_ENABLE_DATA; - ec_transaction(PWM_ENABLE_ADDR, &writeData, 1, NULL, 0, 0); + ec_transaction(PWM_ENABLE_ADDR, &writeData, 1, NULL, 0); } static void pwm_disable_control(void) { unsigned char writeData = PWM_DISABLE_DATA; - ec_transaction(PWM_DISABLE_ADDR, &writeData, 1, NULL, 0, 0); + ec_transaction(PWM_DISABLE_ADDR, &writeData, 1, NULL, 0); } static void set_pwm(int pwm) { - ec_transaction(PWM_ADDRESS, &pwm_lookup_table[pwm], 1, NULL, 0, 0); + ec_transaction(PWM_ADDRESS, &pwm_lookup_table[pwm], 1, NULL, 0); } static int get_fan_rpm(void) { u8 value, data = FAN_DATA; - ec_transaction(FAN_ADDRESS, &data, 1, &value, 1, 0); + ec_transaction(FAN_ADDRESS, &data, 1, &value, 1); return 100 * (int)value; } diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index 7e9bb6df9d39..918a65dd2ab3 100644 --- a/drivers/platform/x86/msi-laptop.c +++ b/drivers/platform/x86/msi-laptop.c @@ -120,7 +120,7 @@ static int set_lcd_level(int level) buf[1] = (u8) (level*31); return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, buf, sizeof(buf), - NULL, 0, 1); + NULL, 0); } static int get_lcd_level(void) @@ -129,7 +129,7 @@ static int get_lcd_level(void) int result; result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1, - &rdata, 1, 1); + &rdata, 1); if (result < 0) return result; @@ -142,7 +142,7 @@ static int get_auto_brightness(void) int result; result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1, - &rdata, 1, 1); + &rdata, 1); if (result < 0) return result; @@ -157,7 +157,7 @@ static int set_auto_brightness(int enable) wdata[0] = 4; result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 1, - &rdata, 1, 1); + &rdata, 1); if (result < 0) return result; @@ -165,7 +165,7 @@ static int set_auto_brightness(int enable) wdata[1] = (rdata & 0xF7) | (enable ? 8 : 0); return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 2, - NULL, 0, 1); + NULL, 0); } static ssize_t set_device_state(const char *buf, size_t count, u8 mask) @@ -202,7 +202,7 @@ static int get_wireless_state(int *wlan, int *bluetooth) u8 wdata = 0, rdata; int result; - result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1, 1); + result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1); if (result < 0) return -1; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a2e910e01293..1deb2a73c2da 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -150,8 +150,7 @@ extern int ec_read(u8 addr, u8 *val); extern int ec_write(u8 addr, u8 val); extern int ec_transaction(u8 command, const u8 *wdata, unsigned wdata_len, - u8 *rdata, unsigned rdata_len, - int force_poll); + u8 *rdata, unsigned rdata_len); #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE) From e2142df7ec7184ed4a77ada686bc1eb41075490f Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Thu, 31 Mar 2011 11:02:43 -0700 Subject: [PATCH 0026/3380] intel_mid_dma: fix runtime pm issues Use the correct api in probe to enable runtime pm for this driver. Additionally, do not just call legacy suspend for runtime_suspend, as this duplicates some work the pci core does for you. Signed-off-by: Kristen Carlson Accardi Signed-off-by: Vinod Koul --- drivers/dma/intel_mid_dma.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c index 798f46a4590d..f153adfcaceb 100644 --- a/drivers/dma/intel_mid_dma.c +++ b/drivers/dma/intel_mid_dma.c @@ -1292,8 +1292,7 @@ static int __devinit intel_mid_dma_probe(struct pci_dev *pdev, if (err) goto err_dma; - pm_runtime_set_active(&pdev->dev); - pm_runtime_enable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); pm_runtime_allow(&pdev->dev); return 0; @@ -1322,6 +1321,9 @@ static int __devinit intel_mid_dma_probe(struct pci_dev *pdev, static void __devexit intel_mid_dma_remove(struct pci_dev *pdev) { struct middma_device *device = pci_get_drvdata(pdev); + + pm_runtime_get_noresume(&pdev->dev); + pm_runtime_forbid(&pdev->dev); middma_shutdown(pdev); pci_dev_put(pdev); kfree(device); @@ -1385,13 +1387,20 @@ int dma_resume(struct pci_dev *pci) static int dma_runtime_suspend(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - return dma_suspend(pci_dev, PMSG_SUSPEND); + struct middma_device *device = pci_get_drvdata(pci_dev); + + device->state = SUSPENDED; + return 0; } static int dma_runtime_resume(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - return dma_resume(pci_dev); + struct middma_device *device = pci_get_drvdata(pci_dev); + + device->state = RUNNING; + iowrite32(REG_BIT0, device->dma_base + DMA_CFG); + return 0; } static int dma_runtime_idle(struct device *dev) From c6ff669bac5c409f4cb74366248f51b73f7d6feb Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 28 Mar 2011 14:17:26 -0500 Subject: [PATCH 0027/3380] dlm: delayed reply message warning Add an option (disabled by default) to print a warning message when a lock has been waiting a configurable amount of time for a reply message from another node. This is mainly for debugging. Signed-off-by: David Teigland --- fs/dlm/config.c | 9 +++- fs/dlm/config.h | 1 + fs/dlm/dlm_internal.h | 2 + fs/dlm/lock.c | 100 +++++++++++++++++++++++++++++++++++++++--- fs/dlm/lock.h | 1 + fs/dlm/lockspace.c | 6 +-- 6 files changed, 108 insertions(+), 11 deletions(-) diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 0d329ff8ed4c..9b026ea8baa9 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -100,6 +100,7 @@ struct dlm_cluster { unsigned int cl_log_debug; unsigned int cl_protocol; unsigned int cl_timewarn_cs; + unsigned int cl_waitwarn_us; }; enum { @@ -114,6 +115,7 @@ enum { CLUSTER_ATTR_LOG_DEBUG, CLUSTER_ATTR_PROTOCOL, CLUSTER_ATTR_TIMEWARN_CS, + CLUSTER_ATTR_WAITWARN_US, }; struct cluster_attribute { @@ -166,6 +168,7 @@ CLUSTER_ATTR(scan_secs, 1); CLUSTER_ATTR(log_debug, 0); CLUSTER_ATTR(protocol, 0); CLUSTER_ATTR(timewarn_cs, 1); +CLUSTER_ATTR(waitwarn_us, 0); static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, @@ -179,6 +182,7 @@ static struct configfs_attribute *cluster_attrs[] = { [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, + [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, NULL, }; @@ -439,6 +443,7 @@ static struct config_group *make_cluster(struct config_group *g, cl->cl_log_debug = dlm_config.ci_log_debug; cl->cl_protocol = dlm_config.ci_protocol; cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; + cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; space_list = &sps->ss_group; comm_list = &cms->cs_group; @@ -986,6 +991,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) #define DEFAULT_LOG_DEBUG 0 #define DEFAULT_PROTOCOL 0 #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ +#define DEFAULT_WAITWARN_US 0 struct dlm_config_info dlm_config = { .ci_tcp_port = DEFAULT_TCP_PORT, @@ -998,6 +1004,7 @@ struct dlm_config_info dlm_config = { .ci_scan_secs = DEFAULT_SCAN_SECS, .ci_log_debug = DEFAULT_LOG_DEBUG, .ci_protocol = DEFAULT_PROTOCOL, - .ci_timewarn_cs = DEFAULT_TIMEWARN_CS + .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, + .ci_waitwarn_us = DEFAULT_WAITWARN_US }; diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 4f1d6fce58c5..dd0ce24d5a80 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h @@ -28,6 +28,7 @@ struct dlm_config_info { int ci_log_debug; int ci_protocol; int ci_timewarn_cs; + int ci_waitwarn_us; }; extern struct dlm_config_info dlm_config; diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index b94204913011..6a92478fe1f1 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -245,6 +245,7 @@ struct dlm_lkb { int8_t lkb_wait_type; /* type of reply waiting for */ int8_t lkb_wait_count; + int lkb_wait_nodeid; /* for debugging */ struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ struct list_head lkb_statequeue; /* rsb g/c/w list */ @@ -254,6 +255,7 @@ struct dlm_lkb { struct list_head lkb_ownqueue; /* list of locks for a process */ struct list_head lkb_time_list; ktime_t lkb_timestamp; + ktime_t lkb_wait_time; unsigned long lkb_timeout_cs; struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE]; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 04b8c449303f..e3c864120371 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -799,10 +799,84 @@ static int msg_reply_type(int mstype) return -1; } +static int nodeid_warned(int nodeid, int num_nodes, int *warned) +{ + int i; + + for (i = 0; i < num_nodes; i++) { + if (!warned[i]) { + warned[i] = nodeid; + return 0; + } + if (warned[i] == nodeid) + return 1; + } + return 0; +} + +void dlm_scan_waiters(struct dlm_ls *ls) +{ + struct dlm_lkb *lkb; + ktime_t zero = ktime_set(0, 0); + s64 us; + s64 debug_maxus = 0; + u32 debug_scanned = 0; + u32 debug_expired = 0; + int num_nodes = 0; + int *warned = NULL; + + if (!dlm_config.ci_waitwarn_us) + return; + + mutex_lock(&ls->ls_waiters_mutex); + + list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { + if (ktime_equal(lkb->lkb_wait_time, zero)) + continue; + + debug_scanned++; + + us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time)); + + if (us < dlm_config.ci_waitwarn_us) + continue; + + lkb->lkb_wait_time = zero; + + debug_expired++; + if (us > debug_maxus) + debug_maxus = us; + + if (!num_nodes) { + num_nodes = ls->ls_num_nodes; + warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int)); + if (warned) + memset(warned, 0, num_nodes * sizeof(int)); + } + if (!warned) + continue; + if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned)) + continue; + + log_error(ls, "waitwarn %x %lld %d us check connection to " + "node %d", lkb->lkb_id, (long long)us, + dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid); + } + mutex_unlock(&ls->ls_waiters_mutex); + + if (warned) + kfree(warned); + + if (debug_expired) + log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us", + debug_scanned, debug_expired, + dlm_config.ci_waitwarn_us, (long long)debug_maxus); +} + /* add/remove lkb from global waiters list of lkb's waiting for a reply from a remote node */ -static int add_to_waiters(struct dlm_lkb *lkb, int mstype) +static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid) { struct dlm_ls *ls = lkb->lkb_resource->res_ls; int error = 0; @@ -842,6 +916,8 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype) lkb->lkb_wait_count++; lkb->lkb_wait_type = mstype; + lkb->lkb_wait_time = ktime_get(); + lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */ hold_lkb(lkb); list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); out: @@ -1157,6 +1233,16 @@ void dlm_adjust_timeouts(struct dlm_ls *ls) list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us); mutex_unlock(&ls->ls_timeout_mutex); + + if (!dlm_config.ci_waitwarn_us) + return; + + mutex_lock(&ls->ls_waiters_mutex); + list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { + if (ktime_to_us(lkb->lkb_wait_time)) + lkb->lkb_wait_time = ktime_get(); + } + mutex_unlock(&ls->ls_waiters_mutex); } /* lkb is master or local copy */ @@ -2844,12 +2930,12 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) struct dlm_mhandle *mh; int to_nodeid, error; - error = add_to_waiters(lkb, mstype); + to_nodeid = r->res_nodeid; + + error = add_to_waiters(lkb, mstype, to_nodeid); if (error) return error; - to_nodeid = r->res_nodeid; - error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); if (error) goto fail; @@ -2951,12 +3037,12 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb) struct dlm_mhandle *mh; int to_nodeid, error; - error = add_to_waiters(lkb, DLM_MSG_LOOKUP); + to_nodeid = dlm_dir_nodeid(r); + + error = add_to_waiters(lkb, DLM_MSG_LOOKUP, to_nodeid); if (error) return error; - to_nodeid = dlm_dir_nodeid(r); - error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh); if (error) goto fail; diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 88e93c80cc22..265017a7c3e7 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h @@ -24,6 +24,7 @@ int dlm_put_lkb(struct dlm_lkb *lkb); void dlm_scan_rsbs(struct dlm_ls *ls); int dlm_lock_recovery_try(struct dlm_ls *ls); void dlm_unlock_recovery(struct dlm_ls *ls); +void dlm_scan_waiters(struct dlm_ls *ls); void dlm_scan_timeout(struct dlm_ls *ls); void dlm_adjust_timeouts(struct dlm_ls *ls); diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index f994a7dfda85..14cbf4099753 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -243,7 +243,6 @@ static struct dlm_ls *find_ls_to_scan(void) static int dlm_scand(void *data) { struct dlm_ls *ls; - int timeout_jiffies = dlm_config.ci_scan_secs * HZ; while (!kthread_should_stop()) { ls = find_ls_to_scan(); @@ -252,13 +251,14 @@ static int dlm_scand(void *data) ls->ls_scan_time = jiffies; dlm_scan_rsbs(ls); dlm_scan_timeout(ls); + dlm_scan_waiters(ls); dlm_unlock_recovery(ls); } else { ls->ls_scan_time += HZ; } - } else { - schedule_timeout_interruptible(timeout_jiffies); + continue; } + schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ); } return 0; } From 6bde95ce33e1c2ac9b5cb3d814722105131090ec Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:09:41 -0400 Subject: [PATCH 0028/3380] SELinux: update git tree in MAINTAINERS update the git tree in MAINTAINERS Signed-off-by: Eric Paris --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 560ecce38ff5..3297b8f76640 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5542,10 +5542,11 @@ M: James Morris M: Eric Paris L: selinux@tycho.nsa.gov (subscribers-only, general discussion) W: http://selinuxproject.org -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/security-testing-2.6.git +T: git git://git.infradead.org/users/eparis/selinux.git S: Supported F: include/linux/selinux* F: security/selinux/ +F: scripts/selinux/ APPARMOR SECURITY MODULE M: John Johansen From f50a3ec961f90e38c0311411179d5dfee1412192 Mon Sep 17 00:00:00 2001 From: Kohei Kaigai Date: Fri, 1 Apr 2011 15:39:26 +0100 Subject: [PATCH 0029/3380] selinux: add type_transition with name extension support for selinuxfs The attached patch allows /selinux/create takes optional 4th argument to support TYPE_TRANSITION with name extension for userspace object managers. If 4th argument is not supplied, it shall perform as existing kernel. In fact, the regression test of SE-PostgreSQL works well on the patched kernel. Thanks, Signed-off-by: KaiGai Kohei [manually verify fuzz was not an issue, and it wasn't: eparis] Signed-off-by: Eric Paris --- security/selinux/include/security.h | 4 ++-- security/selinux/selinuxfs.c | 18 +++++++++++++++--- security/selinux/ss/services.c | 17 +++++++++-------- 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index bfc5218d5840..2cf670864147 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -112,8 +112,8 @@ void security_compute_av_user(u32 ssid, u32 tsid, int security_transition_sid(u32 ssid, u32 tsid, u16 tclass, const struct qstr *qstr, u32 *out_sid); -int security_transition_sid_user(u32 ssid, u32 tsid, - u16 tclass, u32 *out_sid); +int security_transition_sid_user(u32 ssid, u32 tsid, u16 tclass, + const char *objname, u32 *out_sid); int security_member_sid(u32 ssid, u32 tsid, u16 tclass, u32 *out_sid); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index ea39cb742ae5..973f5a4a6fce 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -753,11 +753,13 @@ static ssize_t sel_write_access(struct file *file, char *buf, size_t size) static ssize_t sel_write_create(struct file *file, char *buf, size_t size) { char *scon = NULL, *tcon = NULL; + char *namebuf = NULL, *objname = NULL; u32 ssid, tsid, newsid; u16 tclass; ssize_t length; char *newcon = NULL; u32 len; + int nargs; length = task_has_security(current, SECURITY__COMPUTE_CREATE); if (length) @@ -773,10 +775,18 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) if (!tcon) goto out; - length = -EINVAL; - if (sscanf(buf, "%s %s %hu", scon, tcon, &tclass) != 3) + length = -ENOMEM; + namebuf = kzalloc(size + 1, GFP_KERNEL); + if (!namebuf) goto out; + length = -EINVAL; + nargs = sscanf(buf, "%s %s %hu %s", scon, tcon, &tclass, namebuf); + if (nargs < 3 || nargs > 4) + goto out; + if (nargs == 4) + objname = namebuf; + length = security_context_to_sid(scon, strlen(scon) + 1, &ssid); if (length) goto out; @@ -785,7 +795,8 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) if (length) goto out; - length = security_transition_sid_user(ssid, tsid, tclass, &newsid); + length = security_transition_sid_user(ssid, tsid, tclass, + objname, &newsid); if (length) goto out; @@ -804,6 +815,7 @@ static ssize_t sel_write_create(struct file *file, char *buf, size_t size) length = len; out: kfree(newcon); + kfree(namebuf); kfree(tcon); kfree(scon); return length; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 03f7a4748ee8..39d732145abe 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -1360,14 +1360,14 @@ static int compute_sid_handle_invalid_context( static void filename_compute_type(struct policydb *p, struct context *newcontext, u32 scon, u32 tcon, u16 tclass, - const struct qstr *qstr) + const char *objname) { struct filename_trans *ft; for (ft = p->filename_trans; ft; ft = ft->next) { if (ft->stype == scon && ft->ttype == tcon && ft->tclass == tclass && - !strcmp(ft->name, qstr->name)) { + !strcmp(ft->name, objname)) { newcontext->type = ft->otype; return; } @@ -1378,7 +1378,7 @@ static int security_compute_sid(u32 ssid, u32 tsid, u16 orig_tclass, u32 specified, - const struct qstr *qstr, + const char *objname, u32 *out_sid, bool kern) { @@ -1479,9 +1479,9 @@ static int security_compute_sid(u32 ssid, } /* if we have a qstr this is a file trans check so check those rules */ - if (qstr) + if (objname) filename_compute_type(&policydb, &newcontext, scontext->type, - tcontext->type, tclass, qstr); + tcontext->type, tclass, objname); /* Check for class-specific changes. */ if (specified & AVTAB_TRANSITION) { @@ -1539,13 +1539,14 @@ int security_transition_sid(u32 ssid, u32 tsid, u16 tclass, const struct qstr *qstr, u32 *out_sid) { return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, - qstr, out_sid, true); + qstr ? qstr->name : NULL, out_sid, true); } -int security_transition_sid_user(u32 ssid, u32 tsid, u16 tclass, u32 *out_sid) +int security_transition_sid_user(u32 ssid, u32 tsid, u16 tclass, + const char *objname, u32 *out_sid) { return security_compute_sid(ssid, tsid, tclass, AVTAB_TRANSITION, - NULL, out_sid, false); + objname, out_sid, false); } /** From 6ea0c34dac89611126455537552cffe6c7e832ad Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Mon, 4 Apr 2011 01:41:32 +0200 Subject: [PATCH 0030/3380] percpu: Unify input section names The two percpu helper macros have the section names duplicated. So create a new define to merge the two. This also allows arches who need to link things more directly themselves to avoid duplicating the input sections in their linker script. Signed-off-by: Mike Frysinger Signed-off-by: Tejun Heo --- include/asm-generic/vmlinux.lds.h | 44 +++++++++++++++++-------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 32c45e5fe0ab..bf90fbc6688b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -687,6 +687,28 @@ *(.discard.*) \ } +/** + * PERCPU_INPUT - the percpu input sections + * @cacheline: cacheline size + * + * The core percpu section names and core symbols which do not rely + * directly upon load addresses. + * + * @cacheline is used to align subsections to avoid false cacheline + * sharing between subsections for different purposes. + */ +#define PERCPU_INPUT(cacheline) \ + VMLINUX_SYMBOL(__per_cpu_start) = .; \ + *(.data..percpu..first) \ + . = ALIGN(PAGE_SIZE); \ + *(.data..percpu..page_aligned) \ + . = ALIGN(cacheline); \ + *(.data..percpu..readmostly) \ + . = ALIGN(cacheline); \ + *(.data..percpu) \ + *(.data..percpu..shared_aligned) \ + VMLINUX_SYMBOL(__per_cpu_end) = .; + /** * PERCPU_VADDR - define output section for percpu area * @cacheline: cacheline size @@ -715,16 +737,7 @@ VMLINUX_SYMBOL(__per_cpu_load) = .; \ .data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \ - LOAD_OFFSET) { \ - VMLINUX_SYMBOL(__per_cpu_start) = .; \ - *(.data..percpu..first) \ - . = ALIGN(PAGE_SIZE); \ - *(.data..percpu..page_aligned) \ - . = ALIGN(cacheline); \ - *(.data..percpu..readmostly) \ - . = ALIGN(cacheline); \ - *(.data..percpu) \ - *(.data..percpu..shared_aligned) \ - VMLINUX_SYMBOL(__per_cpu_end) = .; \ + PERCPU_INPUT(cacheline) \ } phdr \ . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data..percpu); @@ -745,16 +758,7 @@ . = ALIGN(align); \ .data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__per_cpu_load) = .; \ - VMLINUX_SYMBOL(__per_cpu_start) = .; \ - *(.data..percpu..first) \ - . = ALIGN(PAGE_SIZE); \ - *(.data..percpu..page_aligned) \ - . = ALIGN(cacheline); \ - *(.data..percpu..readmostly) \ - . = ALIGN(cacheline); \ - *(.data..percpu) \ - *(.data..percpu..shared_aligned) \ - VMLINUX_SYMBOL(__per_cpu_end) = .; \ + PERCPU_INPUT(cacheline) \ } From 17f60a7da150fdd0cfb9756f86a262daa72c835f Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:07:50 -0400 Subject: [PATCH 0031/3380] capabilites: allow the application of capability limits to usermode helpers There is no way to limit the capabilities of usermodehelpers. This problem reared its head recently when someone complained that any user with cap_net_admin was able to load arbitrary kernel modules, even though the user didn't have cap_sys_module. The reason is because the actual load is done by a usermode helper and those always have the full cap set. This patch addes new sysctls which allow us to bound the permissions of usermode helpers. /proc/sys/kernel/usermodehelper/bset /proc/sys/kernel/usermodehelper/inheritable You must have CAP_SYS_MODULE and CAP_SETPCAP to change these (changes are &= ONLY). When the kernel launches a usermodehelper it will do so with these as the bset and pI. -v2: make globals static create spinlock to protect globals -v3: require both CAP_SETPCAP and CAP_SYS_MODULE -v4: fix the typo s/CAP_SET_PCAP/CAP_SETPCAP/ because I didn't commit Signed-off-by: Eric Paris No-objection-from: Serge E. Hallyn Acked-by: David Howells Acked-by: Serge E. Hallyn Acked-by: Andrew G. Morgan Signed-off-by: James Morris --- include/linux/kmod.h | 3 ++ kernel/kmod.c | 100 +++++++++++++++++++++++++++++++++++++++++++ kernel/sysctl.c | 6 +++ 3 files changed, 109 insertions(+) diff --git a/include/linux/kmod.h b/include/linux/kmod.h index 6efd7a78de6a..79bb98d71858 100644 --- a/include/linux/kmod.h +++ b/include/linux/kmod.h @@ -24,6 +24,7 @@ #include #include #include +#include #define KMOD_PATH_LEN 256 @@ -109,6 +110,8 @@ call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait) NULL, NULL, NULL); } +extern struct ctl_table usermodehelper_table[]; + extern void usermodehelper_init(void); extern int usermodehelper_disable(void); diff --git a/kernel/kmod.c b/kernel/kmod.c index 9cd0591c96a2..06fdea2819b6 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +44,13 @@ extern int max_threads; static struct workqueue_struct *khelper_wq; +#define CAP_BSET (void *)1 +#define CAP_PI (void *)2 + +static kernel_cap_t usermodehelper_bset = CAP_FULL_SET; +static kernel_cap_t usermodehelper_inheritable = CAP_FULL_SET; +static DEFINE_SPINLOCK(umh_sysctl_lock); + #ifdef CONFIG_MODULES /* @@ -132,6 +140,7 @@ EXPORT_SYMBOL(__request_module); static int ____call_usermodehelper(void *data) { struct subprocess_info *sub_info = data; + struct cred *new; int retval; spin_lock_irq(¤t->sighand->siglock); @@ -153,6 +162,19 @@ static int ____call_usermodehelper(void *data) goto fail; } + retval = -ENOMEM; + new = prepare_kernel_cred(current); + if (!new) + goto fail; + + spin_lock(&umh_sysctl_lock); + new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset); + new->cap_inheritable = cap_intersect(usermodehelper_inheritable, + new->cap_inheritable); + spin_unlock(&umh_sysctl_lock); + + commit_creds(new); + retval = kernel_execve(sub_info->path, (const char *const *)sub_info->argv, (const char *const *)sub_info->envp); @@ -418,6 +440,84 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, } EXPORT_SYMBOL(call_usermodehelper_exec); +static int proc_cap_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table t; + unsigned long cap_array[_KERNEL_CAPABILITY_U32S]; + kernel_cap_t new_cap; + int err, i; + + if (write && (!capable(CAP_SETPCAP) || + !capable(CAP_SYS_MODULE))) + return -EPERM; + + /* + * convert from the global kernel_cap_t to the ulong array to print to + * userspace if this is a read. + */ + spin_lock(&umh_sysctl_lock); + for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) { + if (table->data == CAP_BSET) + cap_array[i] = usermodehelper_bset.cap[i]; + else if (table->data == CAP_PI) + cap_array[i] = usermodehelper_inheritable.cap[i]; + else + BUG(); + } + spin_unlock(&umh_sysctl_lock); + + t = *table; + t.data = &cap_array; + + /* + * actually read or write and array of ulongs from userspace. Remember + * these are least significant 32 bits first + */ + err = proc_doulongvec_minmax(&t, write, buffer, lenp, ppos); + if (err < 0) + return err; + + /* + * convert from the sysctl array of ulongs to the kernel_cap_t + * internal representation + */ + for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) + new_cap.cap[i] = cap_array[i]; + + /* + * Drop everything not in the new_cap (but don't add things) + */ + spin_lock(&umh_sysctl_lock); + if (write) { + if (table->data == CAP_BSET) + usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap); + if (table->data == CAP_PI) + usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap); + } + spin_unlock(&umh_sysctl_lock); + + return 0; +} + +struct ctl_table usermodehelper_table[] = { + { + .procname = "bset", + .data = CAP_BSET, + .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), + .mode = 0600, + .proc_handler = proc_cap_handler, + }, + { + .procname = "inheritable", + .data = CAP_PI, + .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), + .mode = 0600, + .proc_handler = proc_cap_handler, + }, + { } +}; + void __init usermodehelper_init(void) { khelper_wq = create_singlethread_workqueue("khelper"); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c0bb32414b17..965134bed6cd 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include @@ -615,6 +616,11 @@ static struct ctl_table kern_table[] = { .mode = 0555, .child = random_table, }, + { + .procname = "usermodehelper", + .mode = 0555, + .child = usermodehelper_table, + }, { .procname = "overflowuid", .data = &overflowuid, From 4bf2ea77dba76a22f49db3c10773896aaeeb8f66 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:08:28 -0400 Subject: [PATCH 0032/3380] capabilities: do not special case exec of init When the global init task is exec'd we have special case logic to make sure the pE is not reduced. There is no reason for this. If init wants to drop it's pE is should be allowed to do so. Remove this special logic. Signed-off-by: Eric Paris Acked-by: Serge Hallyn Acked-by: David Howells Acked-by: Andrew G. Morgan Signed-off-by: James Morris --- security/commoncap.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/security/commoncap.c b/security/commoncap.c index f20e984ccfb4..a93b3b733079 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -529,15 +529,10 @@ int cap_bprm_set_creds(struct linux_binprm *bprm) new->suid = new->fsuid = new->euid; new->sgid = new->fsgid = new->egid; - /* For init, we want to retain the capabilities set in the initial - * task. Thus we skip the usual capability rules - */ - if (!is_global_init(current)) { - if (effective) - new->cap_effective = new->cap_permitted; - else - cap_clear(new->cap_effective); - } + if (effective) + new->cap_effective = new->cap_permitted; + else + cap_clear(new->cap_effective); bprm->cap_effective = effective; /* From ffa8e59df047d57e812a04f7d6baf6a25c652c0c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:08:34 -0400 Subject: [PATCH 0033/3380] capabilities: do not drop CAP_SETPCAP from the initial task In olden' days of yore CAP_SETPCAP had special meaning for the init task. We actually have code to make sure that CAP_SETPCAP wasn't in pE of things using the init_cred. But CAP_SETPCAP isn't so special any more and we don't have a reason to special case dropping it for init or kthreads.... Signed-off-by: Eric Paris Acked-by: Andrew G. Morgan Signed-off-by: James Morris --- include/linux/capability.h | 6 ++++-- kernel/capability.c | 2 -- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/capability.h b/include/linux/capability.h index 16ee8b49a200..11d562863e49 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -412,7 +412,6 @@ extern const kernel_cap_t __cap_init_eff_set; # define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }}) # define CAP_FULL_SET ((kernel_cap_t){{ ~0, ~0 }}) -# define CAP_INIT_EFF_SET ((kernel_cap_t){{ ~CAP_TO_MASK(CAP_SETPCAP), ~0 }}) # define CAP_FS_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \ CAP_FS_MASK_B1 } }) @@ -423,10 +422,10 @@ extern const kernel_cap_t __cap_init_eff_set; #endif /* _KERNEL_CAPABILITY_U32S != 2 */ #define CAP_INIT_INH_SET CAP_EMPTY_SET +#define CAP_INIT_EFF_SET CAP_FULL_SET # define cap_clear(c) do { (c) = __cap_empty_set; } while (0) # define cap_set_full(c) do { (c) = __cap_full_set; } while (0) -# define cap_set_init_eff(c) do { (c) = __cap_init_eff_set; } while (0) #define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) #define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag)) @@ -547,6 +546,9 @@ extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool task_ns_capable(struct task_struct *t, int cap); +extern const kernel_cap_t __cap_empty_set; +extern const kernel_cap_t __cap_full_set; + /** * nsown_capable - Check superior capability to one's own user_ns * @cap: The capability in question diff --git a/kernel/capability.c b/kernel/capability.c index bf0c734d0c12..2a374d512ead 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -23,11 +23,9 @@ const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; const kernel_cap_t __cap_full_set = CAP_FULL_SET; -const kernel_cap_t __cap_init_eff_set = CAP_INIT_EFF_SET; EXPORT_SYMBOL(__cap_empty_set); EXPORT_SYMBOL(__cap_full_set); -EXPORT_SYMBOL(__cap_init_eff_set); int file_caps_enabled = 1; From 5163b583a036b103c3cec7171d6731c125773ed6 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:08:39 -0400 Subject: [PATCH 0034/3380] capabilities: delete unused cap_set_full unused code. Clean it up. Signed-off-by: Eric Paris Acked-by: David Howells Acked-by: Andrew G. Morgan Signed-off-by: James Morris --- include/linux/capability.h | 2 -- kernel/capability.c | 2 -- 2 files changed, 4 deletions(-) diff --git a/include/linux/capability.h b/include/linux/capability.h index 11d562863e49..8d0da30dad23 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -425,7 +425,6 @@ extern const kernel_cap_t __cap_init_eff_set; #define CAP_INIT_EFF_SET CAP_FULL_SET # define cap_clear(c) do { (c) = __cap_empty_set; } while (0) -# define cap_set_full(c) do { (c) = __cap_full_set; } while (0) #define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) #define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag)) @@ -547,7 +546,6 @@ extern bool ns_capable(struct user_namespace *ns, int cap); extern bool task_ns_capable(struct task_struct *t, int cap); extern const kernel_cap_t __cap_empty_set; -extern const kernel_cap_t __cap_full_set; /** * nsown_capable - Check superior capability to one's own user_ns diff --git a/kernel/capability.c b/kernel/capability.c index 2a374d512ead..14ea4210a530 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -22,10 +22,8 @@ */ const kernel_cap_t __cap_empty_set = CAP_EMPTY_SET; -const kernel_cap_t __cap_full_set = CAP_FULL_SET; EXPORT_SYMBOL(__cap_empty_set); -EXPORT_SYMBOL(__cap_full_set); int file_caps_enabled = 1; From a3232d2fa2e3cbab3e76d91cdae5890fee8a4034 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 1 Apr 2011 17:08:45 -0400 Subject: [PATCH 0035/3380] capabilities: delete all CAP_INIT macros The CAP_INIT macros of INH, BSET, and EFF made sense at one point in time, but now days they aren't helping. Just open code the logic in the init_cred. Signed-off-by: Eric Paris Acked-by: David Howells Signed-off-by: James Morris --- include/linux/capability.h | 3 --- include/linux/init_task.h | 7 ------- kernel/cred.c | 6 +++--- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/include/linux/capability.h b/include/linux/capability.h index 8d0da30dad23..04fed72809de 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -421,9 +421,6 @@ extern const kernel_cap_t __cap_init_eff_set; #endif /* _KERNEL_CAPABILITY_U32S != 2 */ -#define CAP_INIT_INH_SET CAP_EMPTY_SET -#define CAP_INIT_EFF_SET CAP_FULL_SET - # define cap_clear(c) do { (c) = __cap_empty_set; } while (0) #define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index caa151fbebb7..1f277204de34 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -83,13 +83,6 @@ extern struct group_info init_groups; #define INIT_IDS #endif -/* - * Because of the reduced scope of CAP_SETPCAP when filesystem - * capabilities are in effect, it is safe to allow CAP_SETPCAP to - * be available in the default configuration. - */ -# define CAP_INIT_BSET CAP_FULL_SET - #ifdef CONFIG_RCU_BOOST #define INIT_TASK_RCU_BOOST() \ .rcu_boost_mutex = NULL, diff --git a/kernel/cred.c b/kernel/cred.c index 5557b55048df..b982f0863ae9 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -49,10 +49,10 @@ struct cred init_cred = { .magic = CRED_MAGIC, #endif .securebits = SECUREBITS_DEFAULT, - .cap_inheritable = CAP_INIT_INH_SET, + .cap_inheritable = CAP_EMPTY_SET, .cap_permitted = CAP_FULL_SET, - .cap_effective = CAP_INIT_EFF_SET, - .cap_bset = CAP_INIT_BSET, + .cap_effective = CAP_FULL_SET, + .cap_bset = CAP_FULL_SET, .user = INIT_USER, .group_info = &init_groups, #ifdef CONFIG_KEYS From 177525d26e31806d71653f74bbec13574b97892c Mon Sep 17 00:00:00 2001 From: Andreas Mohr Date: Sun, 3 Apr 2011 20:58:28 +0200 Subject: [PATCH 0036/3380] eradicate bashisms in scripts/patch-kernel Silence a remaining annoying (or worse, irritating - "is my entire patched tree broken now!?") bashism-related message that occurs when /bin/sh is configured to instead deploy dash, a POSIX-compliant shell, as is the pretty much standard case on e.g. Debian. Current kernel version is 2.6.38 ( Flesh-Eating Bats with Fangs) ===> linux-2.6.38.patch-kernel_test/scripts/patch-kernel: line 253: [: =: unary operator expected <=== cannot find patch file: patch-2.6.39 Signed-off-by: Andreas Mohr Signed-off-by: Michal Marek --- scripts/patch-kernel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/patch-kernel b/scripts/patch-kernel index 46a59cae3a0a..20fb25c23382 100755 --- a/scripts/patch-kernel +++ b/scripts/patch-kernel @@ -250,7 +250,7 @@ while : # incrementing SUBLEVEL (s in v.p.s) do CURRENTFULLVERSION="$VERSION.$PATCHLEVEL.$SUBLEVEL" EXTRAVER= - if [ $STOPFULLVERSION = $CURRENTFULLVERSION ]; then + if [ x$STOPFULLVERSION = x$CURRENTFULLVERSION ]; then echo "Stopping at $CURRENTFULLVERSION base as requested." break fi From 2a7ce0edd661b3144c7b916ecf1eba0967b6d4a5 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Mon, 4 Apr 2011 15:19:59 -0500 Subject: [PATCH 0037/3380] dlm: remove shared message stub for recovery kmalloc a stub message struct during recovery instead of sharing the struct in the lockspace. This leaves the lockspace stub_ms only for faking downconvert replies, where it is never modified and sharing is not a problem. Also improve the debug messages in the same recovery function. Signed-off-by: David Teigland --- fs/dlm/dlm_internal.h | 1 + fs/dlm/lock.c | 82 ++++++++++++++++++++++++++----------------- 2 files changed, 50 insertions(+), 33 deletions(-) diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 6a92478fe1f1..0262451eb9c6 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -209,6 +209,7 @@ struct dlm_args { #define DLM_IFL_WATCH_TIMEWARN 0x00400000 #define DLM_IFL_TIMEOUT_CANCEL 0x00800000 #define DLM_IFL_DEADLOCK_CANCEL 0x01000000 +#define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */ #define DLM_IFL_USER 0x00000001 #define DLM_IFL_ORPHAN 0x00000002 diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index e3c864120371..81227799d47a 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -1037,10 +1037,10 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms) struct dlm_ls *ls = lkb->lkb_resource->res_ls; int error; - if (ms != &ls->ls_stub_ms) + if (ms->m_flags != DLM_IFL_STUB_MS) mutex_lock(&ls->ls_waiters_mutex); error = _remove_from_waiters(lkb, ms->m_type, ms); - if (ms != &ls->ls_stub_ms) + if (ms->m_flags != DLM_IFL_STUB_MS) mutex_unlock(&ls->ls_waiters_mutex); return error; } @@ -1462,14 +1462,8 @@ static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb) ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become compatible with other granted locks */ -static void munge_demoted(struct dlm_lkb *lkb, struct dlm_message *ms) +static void munge_demoted(struct dlm_lkb *lkb) { - if (ms->m_type != DLM_MSG_CONVERT_REPLY) { - log_print("munge_demoted %x invalid reply type %d", - lkb->lkb_id, ms->m_type); - return; - } - if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) { log_print("munge_demoted %x invalid modes gr %d rq %d", lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode); @@ -2966,9 +2960,9 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) /* down conversions go without a reply from the master */ if (!error && down_conversion(lkb)) { remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY); + r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS; r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; r->res_ls->ls_stub_ms.m_result = 0; - r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags; __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); } @@ -3156,6 +3150,9 @@ static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms) static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms) { + if (ms->m_flags == DLM_IFL_STUB_MS) + return; + lkb->lkb_sbflags = ms->m_sbflags; lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | (ms->m_flags & 0x0000FFFF); @@ -3698,7 +3695,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, /* convert was queued on remote master */ receive_flags_reply(lkb, ms); if (is_demoted(lkb)) - munge_demoted(lkb, ms); + munge_demoted(lkb); del_lkb(r, lkb); add_lkb(r, lkb, DLM_LKSTS_CONVERT); add_timeout(lkb); @@ -3708,7 +3705,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, /* convert was granted on remote master */ receive_flags_reply(lkb, ms); if (is_demoted(lkb)) - munge_demoted(lkb, ms); + munge_demoted(lkb); grant_lock_pc(r, lkb, ms); queue_cast(r, lkb, 0); break; @@ -4082,15 +4079,17 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid) dlm_put_lockspace(ls); } -static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) +static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb, + struct dlm_message *ms_stub) { if (middle_conversion(lkb)) { hold_lkb(lkb); - ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; - ls->ls_stub_ms.m_result = -EINPROGRESS; - ls->ls_stub_ms.m_flags = lkb->lkb_flags; - ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; - _receive_convert_reply(lkb, &ls->ls_stub_ms); + memset(ms_stub, 0, sizeof(struct dlm_message)); + ms_stub->m_flags = DLM_IFL_STUB_MS; + ms_stub->m_type = DLM_MSG_CONVERT_REPLY; + ms_stub->m_result = -EINPROGRESS; + ms_stub->m_header.h_nodeid = lkb->lkb_nodeid; + _receive_convert_reply(lkb, ms_stub); /* Same special case as in receive_rcom_lock_args() */ lkb->lkb_grmode = DLM_LOCK_IV; @@ -4131,13 +4130,27 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb) void dlm_recover_waiters_pre(struct dlm_ls *ls) { struct dlm_lkb *lkb, *safe; + struct dlm_message *ms_stub; int wait_type, stub_unlock_result, stub_cancel_result; + ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message)); + if (!ms_stub) { + log_error(ls, "dlm_recover_waiters_pre no mem"); + return; + } + mutex_lock(&ls->ls_waiters_mutex); list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { - log_debug(ls, "pre recover waiter lkid %x type %d flags %x", - lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags); + + /* exclude debug messages about unlocks because there can be so + many and they aren't very interesting */ + + if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) { + log_debug(ls, "recover_waiter %x nodeid %d " + "msg %d to %d", lkb->lkb_id, lkb->lkb_nodeid, + lkb->lkb_wait_type, lkb->lkb_wait_nodeid); + } /* all outstanding lookups, regardless of destination will be resent after recovery is done */ @@ -4183,26 +4196,28 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) break; case DLM_MSG_CONVERT: - recover_convert_waiter(ls, lkb); + recover_convert_waiter(ls, lkb, ms_stub); break; case DLM_MSG_UNLOCK: hold_lkb(lkb); - ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY; - ls->ls_stub_ms.m_result = stub_unlock_result; - ls->ls_stub_ms.m_flags = lkb->lkb_flags; - ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; - _receive_unlock_reply(lkb, &ls->ls_stub_ms); + memset(ms_stub, 0, sizeof(struct dlm_message)); + ms_stub->m_flags = DLM_IFL_STUB_MS; + ms_stub->m_type = DLM_MSG_UNLOCK_REPLY; + ms_stub->m_result = stub_unlock_result; + ms_stub->m_header.h_nodeid = lkb->lkb_nodeid; + _receive_unlock_reply(lkb, ms_stub); dlm_put_lkb(lkb); break; case DLM_MSG_CANCEL: hold_lkb(lkb); - ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY; - ls->ls_stub_ms.m_result = stub_cancel_result; - ls->ls_stub_ms.m_flags = lkb->lkb_flags; - ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; - _receive_cancel_reply(lkb, &ls->ls_stub_ms); + memset(ms_stub, 0, sizeof(struct dlm_message)); + ms_stub->m_flags = DLM_IFL_STUB_MS; + ms_stub->m_type = DLM_MSG_CANCEL_REPLY; + ms_stub->m_result = stub_cancel_result; + ms_stub->m_header.h_nodeid = lkb->lkb_nodeid; + _receive_cancel_reply(lkb, ms_stub); dlm_put_lkb(lkb); break; @@ -4213,6 +4228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) schedule(); } mutex_unlock(&ls->ls_waiters_mutex); + kfree(ms_stub); } static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls) @@ -4277,8 +4293,8 @@ int dlm_recover_waiters_post(struct dlm_ls *ls) ou = is_overlap_unlock(lkb); err = 0; - log_debug(ls, "recover_waiters_post %x type %d flags %x %s", - lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name); + log_debug(ls, "recover_waiter %x nodeid %d msg %d r_nodeid %d", + lkb->lkb_id, lkb->lkb_nodeid, mstype, r->res_nodeid); /* At this point we assume that we won't get a reply to any previous op or overlap op on this lock. First, do a big From 364de77831213be20f7f33c39ca1c194593b5c11 Mon Sep 17 00:00:00 2001 From: Liu Yuan Date: Sat, 2 Apr 2011 14:20:47 +0800 Subject: [PATCH 0038/3380] drivers, pch_dma: Fix uninitialized var before use In the function pdc_desc_get(), var 'i' is not initialized before use. This patch fixes it. Signed-off-by: Liu Yuan Signed-off-by: Vinod Koul --- drivers/dma/pch_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index 8d8fef1480a9..6eebc6205c65 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -403,7 +403,7 @@ static struct pch_dma_desc *pdc_desc_get(struct pch_dma_chan *pd_chan) { struct pch_dma_desc *desc, *_d; struct pch_dma_desc *ret = NULL; - int i; + int i = 0; spin_lock(&pd_chan->lock); list_for_each_entry_safe(desc, _d, &pd_chan->free_list, desc_node) { From eba71de2cb7c02c5ae4f2ad3656343da71bc4661 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 25 Mar 2011 10:13:43 -0400 Subject: [PATCH 0039/3380] selinux: Fix regression for Xorg Commit 6f5317e730505d5cbc851c435a2dfe3d5a21d343 introduced a bug in the handling of userspace object classes that is causing breakage for Xorg when XSELinux is enabled. Fix the bug by changing map_class() to return SECCLASS_NULL when the class cannot be mapped to a kernel object class. Reported-by: "Justin P. Mattock" Signed-off-by: Stephen Smalley Signed-off-by: James Morris --- security/selinux/ss/services.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 39d732145abe..f3f5dca81006 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -213,7 +213,7 @@ static u16 map_class(u16 pol_value) return i; } - return pol_value; + return SECCLASS_NULL; } static void map_decision(u16 tclass, struct av_decision *avd, From 1214eac73f798bccabc6adb55e7b2d787527c13c Mon Sep 17 00:00:00 2001 From: Harry Ciao Date: Thu, 7 Apr 2011 14:12:57 +0800 Subject: [PATCH 0040/3380] Initialize policydb.process_class eariler. Initialize policydb.process_class once all symtabs read from policy image, so that it could be used to setup the role_trans.tclass field when a lower version policy.X is loaded. Signed-off-by: Harry Ciao Signed-off-by: Eric Paris --- security/selinux/ss/policydb.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index a493eae24e0a..82373eb2dc97 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -2275,6 +2275,11 @@ int policydb_read(struct policydb *p, void *fp) p->symtab[i].nprim = nprim; } + rc = -EINVAL; + p->process_class = string_to_security_class(p, "process"); + if (!p->process_class) + goto bad; + rc = avtab_read(&p->te_avtab, fp, p); if (rc) goto bad; @@ -2358,11 +2363,6 @@ int policydb_read(struct policydb *p, void *fp) if (rc) goto bad; - rc = -EINVAL; - p->process_class = string_to_security_class(p, "process"); - if (!p->process_class) - goto bad; - rc = -EINVAL; p->process_trans_perms = string_to_av_perm(p, p->process_class, "transition"); p->process_trans_perms |= string_to_av_perm(p, p->process_class, "dyntransition"); From 0601f793921157603831d00a9541d92e8f5763f6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 May 2009 17:47:56 -0400 Subject: [PATCH 0041/3380] SUNRPC: requeue tcp socket less frequently Don't requeue the socket in some cases where we know it's unnecessary. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b7d435c3f19e..7a3e4bfd895d 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -965,7 +965,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) goto err_again; /* record not complete */ } len = svsk->sk_reclen; - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); return len; error: @@ -1115,6 +1114,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + /* If we have more data, signal svc_xprt_enqueue() to try again */ + if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); if (serv->sv_stats) From 5ee78d483c5812228e971e145b912e0a7e35e571 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 May 2009 17:47:56 -0400 Subject: [PATCH 0042/3380] SUNRPC: svc_tcp_recvfrom cleanup Minor cleanup in preparation for later patches. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 7a3e4bfd895d..733c2f6a1858 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -893,6 +893,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) { struct svc_serv *serv = svsk->sk_xprt.xpt_server; + unsigned int want; int len; if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) @@ -915,9 +916,9 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { - int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; struct kvec iov; + want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; iov.iov_len = want; if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) @@ -1040,8 +1041,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) struct svc_serv *serv = svsk->sk_xprt.xpt_server; int len; struct kvec *vec; - int pnum, vlen; struct rpc_rqst *req = NULL; + unsigned int vlen; + int pnum; dprintk("svc: tcp_recv %p data %d conn %d close %d\n", svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), @@ -1072,7 +1074,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) } pnum = 1; - while (vlen < len) { + while (vlen < svsk->sk_reclen - 8) { vec[pnum].iov_base = (req) ? page_address(req->rq_private_buf.pages[pnum - 1]) : page_address(rqstp->rq_pages[pnum]); @@ -1083,29 +1085,23 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_respages = &rqstp->rq_pages[pnum]; /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, len); + len = svc_recvfrom(rqstp, vec, pnum, svsk->sk_reclen - 8); if (len < 0) goto err_again; - /* - * Account for the 8 bytes we read earlier - */ - len += 8; - if (req) { - xprt_complete_rqst(req->rq_task, len); - len = 0; + xprt_complete_rqst(req->rq_task, svsk->sk_reclen); + rqstp->rq_arg.len = 0; goto out; } - dprintk("svc: TCP complete record (%d bytes)\n", len); - rqstp->rq_arg.len = len; + dprintk("svc: TCP complete record (%d bytes)\n", svsk->sk_reclen); + rqstp->rq_arg.len = svsk->sk_reclen; rqstp->rq_arg.page_base = 0; - if (len <= rqstp->rq_arg.head[0].iov_len) { - rqstp->rq_arg.head[0].iov_len = len; + if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { + rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len; rqstp->rq_arg.page_len = 0; - } else { - rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; - } + } else + rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; @@ -1123,7 +1119,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (serv->sv_stats) serv->sv_stats->nettcpcnt++; - return len; + return rqstp->rq_arg.len; err_again: if (len == -EAGAIN) { From 48e6555c7b3bf0d92f8167d8b8b8ecf4a3fdab84 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 14 Feb 2011 14:52:03 -0500 Subject: [PATCH 0043/3380] svcrpc: note network-order types in svc_process_calldir Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 733c2f6a1858..1955e1a1e390 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -982,9 +982,9 @@ static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, struct rpc_rqst **reqpp, struct kvec *vec) { struct rpc_rqst *req = NULL; - u32 *p; - u32 xid; - u32 calldir; + __be32 *p; + __be32 xid; + __be32 calldir; int len; len = svc_recvfrom(rqstp, vec, 1, 8); From cc6c2127f2316c2b2ad1e8919b45cde5e03f65aa Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 14 Feb 2011 15:03:35 -0500 Subject: [PATCH 0044/3380] svcrpc: close connection if client sends short packet If the client sents a record too short to contain even the beginning of the rpc header, then just close the connection. The current code drops the record data and continues. I don't see the point. It's a hopeless situation and simpler just to cut off the connection completely. Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 1955e1a1e390..62ff7c5c09c2 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -955,6 +955,9 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) } } + if (svsk->sk_reclen < 8) + goto err_delete; /* client is nuts. */ + /* Check whether enough data is available */ len = svc_recv_available(svsk); if (len < 0) @@ -1058,20 +1061,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) vec[0] = rqstp->rq_arg.head[0]; vlen = PAGE_SIZE; - /* - * We have enough data for the whole tcp record. Let's try and read the - * first 8 bytes to get the xid and the call direction. We can use this - * to figure out if this is a call or a reply to a callback. If - * sk_reclen is < 8 (xid and calldir), then this is a malformed packet. - * In that case, don't bother with the calldir and just read the data. - * It will be rejected in svc_process. - */ - if (len >= 8) { - len = svc_process_calldir(svsk, rqstp, &req, vec); - if (len < 0) - goto err_again; - vlen -= 8; - } + len = svc_process_calldir(svsk, rqstp, &req, vec); + if (len < 0) + goto err_again; + vlen -= 8; pnum = 1; while (vlen < svsk->sk_reclen - 8) { From 586c52cc61b5b84c70102208b78269ef5924bf49 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 18 May 2009 17:47:56 -0400 Subject: [PATCH 0045/3380] svcrpc: copy cb reply instead of pages It's much simpler just to copy the cb reply data than to play tricks with pages. Callback replies will typically be very small (at least until we implement cb_getattr, in which case files with very long ACLs could pose a problem), so there's no loss in efficiency. Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 120 ++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 65 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 62ff7c5c09c2..40b502b11442 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -981,57 +981,58 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) return -EAGAIN; } -static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, - struct rpc_rqst **reqpp, struct kvec *vec) +static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) { + struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; struct rpc_rqst *req = NULL; - __be32 *p; + struct kvec *src, *dst; + __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base; __be32 xid; __be32 calldir; - int len; - len = svc_recvfrom(rqstp, vec, 1, 8); - if (len < 0) - goto error; - - p = (u32 *)rqstp->rq_arg.head[0].iov_base; xid = *p++; calldir = *p; - if (calldir == 0) { - /* REQUEST is the most common case */ - vec[0] = rqstp->rq_arg.head[0]; - } else { - /* REPLY */ - struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; + if (bc_xprt) + req = xprt_lookup_rqst(bc_xprt, xid); - if (bc_xprt) - req = xprt_lookup_rqst(bc_xprt, xid); - - if (!req) { - printk(KERN_NOTICE - "%s: Got unrecognized reply: " - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", - __func__, ntohl(calldir), - bc_xprt, xid); - vec[0] = rqstp->rq_arg.head[0]; - goto out; - } - - memcpy(&req->rq_private_buf, &req->rq_rcv_buf, - sizeof(struct xdr_buf)); - /* copy the xid and call direction */ - memcpy(req->rq_private_buf.head[0].iov_base, - rqstp->rq_arg.head[0].iov_base, 8); - vec[0] = req->rq_private_buf.head[0]; + if (!req) { + printk(KERN_NOTICE + "%s: Got unrecognized reply: " + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", + __func__, ntohl(calldir), + bc_xprt, xid); + return -EAGAIN; } - out: - vec[0].iov_base += 8; - vec[0].iov_len -= 8; - len = svsk->sk_reclen - 8; - error: - *reqpp = req; - return len; + + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); + /* + * XXX!: cheating for now! Only copying HEAD. + * But we know this is good enough for now (in fact, for any + * callback reply in the forseeable future). + */ + dst = &req->rq_private_buf.head[0]; + src = &rqstp->rq_arg.head[0]; + if (dst->iov_len < src->iov_len) + return -EAGAIN; /* whatever; just giving up. */ + memcpy(dst->iov_base, src->iov_base, src->iov_len); + xprt_complete_rqst(req->rq_task, svsk->sk_reclen); + rqstp->rq_arg.len = 0; + return 0; +} + +static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) +{ + int i = 0; + int t = 0; + + while (t < len) { + vec[i].iov_base = page_address(pages[i]); + vec[i].iov_len = PAGE_SIZE; + i++; + t += PAGE_SIZE; + } + return i; } /* @@ -1044,8 +1045,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) struct svc_serv *serv = svsk->sk_xprt.xpt_server; int len; struct kvec *vec; - struct rpc_rqst *req = NULL; - unsigned int vlen; + __be32 *p; + __be32 calldir; int pnum; dprintk("svc: tcp_recv %p data %d conn %d close %d\n", @@ -1058,35 +1059,17 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto error; vec = rqstp->rq_vec; - vec[0] = rqstp->rq_arg.head[0]; - vlen = PAGE_SIZE; - len = svc_process_calldir(svsk, rqstp, &req, vec); - if (len < 0) - goto err_again; - vlen -= 8; + pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], + svsk->sk_reclen); - pnum = 1; - while (vlen < svsk->sk_reclen - 8) { - vec[pnum].iov_base = (req) ? - page_address(req->rq_private_buf.pages[pnum - 1]) : - page_address(rqstp->rq_pages[pnum]); - vec[pnum].iov_len = PAGE_SIZE; - pnum++; - vlen += PAGE_SIZE; - } rqstp->rq_respages = &rqstp->rq_pages[pnum]; /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, svsk->sk_reclen - 8); + len = svc_recvfrom(rqstp, vec, pnum, svsk->sk_reclen); if (len < 0) goto err_again; - if (req) { - xprt_complete_rqst(req->rq_task, svsk->sk_reclen); - rqstp->rq_arg.len = 0; - goto out; - } dprintk("svc: TCP complete record (%d bytes)\n", svsk->sk_reclen); rqstp->rq_arg.len = svsk->sk_reclen; rqstp->rq_arg.page_base = 0; @@ -1099,7 +1082,14 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; -out: + p = (__be32 *)rqstp->rq_arg.head[0].iov_base; + calldir = p[1]; + if (calldir) { + len = receive_cb_reply(svsk, rqstp); + if (len < 0) + goto err_again; + } + /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; From 31d68ef65c7d49def19c1bae4e01b87d66cf5a56 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 24 Feb 2011 11:25:33 -0800 Subject: [PATCH 0046/3380] SUNRPC: Don't wait for full record to receive tcp data Ensure that we immediately read and buffer data from the incoming TCP stream so that we grow the receive window quickly, and don't deadlock on large READ or WRITE requests. Also do some minor exit cleanup. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svcsock.h | 1 + net/sunrpc/svcsock.c | 144 +++++++++++++++++++++++++-------- 2 files changed, 113 insertions(+), 32 deletions(-) diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 04dba23c59f2..85c50b40759d 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -28,6 +28,7 @@ struct svc_sock { /* private TCP part */ u32 sk_reclen; /* length of record */ u32 sk_tcplen; /* current read length */ + struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ }; /* diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 40b502b11442..a4fafcbc6ea0 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -387,6 +387,33 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, return len; } +static int svc_partial_recvfrom(struct svc_rqst *rqstp, + struct kvec *iov, int nr, + int buflen, unsigned int base) +{ + size_t save_iovlen; + void __user *save_iovbase; + unsigned int i; + int ret; + + if (base == 0) + return svc_recvfrom(rqstp, iov, nr, buflen); + + for (i = 0; i < nr; i++) { + if (iov[i].iov_len > base) + break; + base -= iov[i].iov_len; + } + save_iovlen = iov[i].iov_len; + save_iovbase = iov[i].iov_base; + iov[i].iov_len -= base; + iov[i].iov_base += base; + ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen); + iov[i].iov_len = save_iovlen; + iov[i].iov_base = save_iovbase; + return ret; +} + /* * Set socket snd and rcv buffer lengths */ @@ -884,6 +911,56 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) return NULL; } +static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + return 0; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + if (rqstp->rq_pages[i] != NULL) + put_page(rqstp->rq_pages[i]); + BUG_ON(svsk->sk_pages[i] == NULL); + rqstp->rq_pages[i] = svsk->sk_pages[i]; + svsk->sk_pages[i] = NULL; + } + rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]); + return len; +} + +static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + return; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + svsk->sk_pages[i] = rqstp->rq_pages[i]; + rqstp->rq_pages[i] = NULL; + } +} + +static void svc_tcp_clear_pages(struct svc_sock *svsk) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + goto out; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + BUG_ON(svsk->sk_pages[i] == NULL); + put_page(svsk->sk_pages[i]); + svsk->sk_pages[i] = NULL; + } +out: + svsk->sk_tcplen = 0; +} + /* * Receive data. * If we haven't gotten the record length yet, get the next four bytes. @@ -928,7 +1005,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) if (len < want) { dprintk("svc: short recvfrom while reading record " "length (%d of %d)\n", len, want); - goto err_again; /* record header not complete */ + return -EAGAIN; } svsk->sk_reclen = ntohl(svsk->sk_reclen); @@ -958,26 +1035,14 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) if (svsk->sk_reclen < 8) goto err_delete; /* client is nuts. */ - /* Check whether enough data is available */ - len = svc_recv_available(svsk); - if (len < 0) - goto error; - - if (len < svsk->sk_reclen) { - dprintk("svc: incomplete TCP record (%d of %d)\n", - len, svsk->sk_reclen); - goto err_again; /* record not complete */ - } len = svsk->sk_reclen; return len; - error: - if (len == -EAGAIN) - dprintk("RPC: TCP recv_record got EAGAIN\n"); +error: + dprintk("RPC: TCP recv_record got %d\n", len); return len; - err_delete: +err_delete: set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - err_again: return -EAGAIN; } @@ -1035,6 +1100,7 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) return i; } + /* * Receive data from a TCP socket. */ @@ -1045,6 +1111,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) struct svc_serv *serv = svsk->sk_xprt.xpt_server; int len; struct kvec *vec; + unsigned int want, base; __be32 *p; __be32 calldir; int pnum; @@ -1058,6 +1125,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (len < 0) goto error; + base = svc_tcp_restore_pages(svsk, rqstp); + want = svsk->sk_reclen - base; + vec = rqstp->rq_vec; pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], @@ -1066,11 +1136,18 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) rqstp->rq_respages = &rqstp->rq_pages[pnum]; /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, svsk->sk_reclen); - if (len < 0) - goto err_again; + len = svc_partial_recvfrom(rqstp, vec, pnum, want, base); + if (len >= 0) + svsk->sk_tcplen += len; + if (len != want) { + if (len < 0 && len != -EAGAIN) + goto err_other; + svc_tcp_save_pages(svsk, rqstp); + dprintk("svc: incomplete TCP record (%d of %d)\n", + svsk->sk_tcplen, svsk->sk_reclen); + goto err_noclose; + } - dprintk("svc: TCP complete record (%d bytes)\n", svsk->sk_reclen); rqstp->rq_arg.len = svsk->sk_reclen; rqstp->rq_arg.page_base = 0; if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { @@ -1087,7 +1164,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (calldir) { len = receive_cb_reply(svsk, rqstp); if (len < 0) - goto err_again; + goto error; } /* Reset TCP read info */ @@ -1102,20 +1179,20 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (serv->sv_stats) serv->sv_stats->nettcpcnt++; + dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len); return rqstp->rq_arg.len; -err_again: - if (len == -EAGAIN) { - dprintk("RPC: TCP recvfrom got EAGAIN\n"); - return len; - } error: - if (len != -EAGAIN) { - printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", - svsk->sk_xprt.xpt_server->sv_name, -len); - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - } + if (len != -EAGAIN) + goto err_other; + dprintk("RPC: TCP recvfrom got EAGAIN\n"); return -EAGAIN; +err_other: + printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", + svsk->sk_xprt.xpt_server->sv_name, -len); + set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); +err_noclose: + return -EAGAIN; /* record not complete */ } /* @@ -1286,6 +1363,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; @@ -1544,8 +1622,10 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) svc_sock_detach(xprt); - if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) + if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) { + svc_tcp_clear_pages(svsk); kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); + } } /* From 9660439861aa8dbd5e2b8087f33e20760c2c9afc Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 21 Oct 2008 14:13:47 -0400 Subject: [PATCH 0047/3380] svcrpc: take advantage of tcp autotuning Allow the NFSv4 server to make use of TCP autotuning behaviour, which was previously disabled by setting the sk_userlocks variable. Set the receive buffers to be big enough to receive the whole RPC request, and set this for the listening socket, not the accept socket. Remove the code that readjusts the receive/send buffer sizes for the accepted socket. Previously this code was used to influence the TCP window management behaviour, which is no longer needed when autotuning is enabled. This can improve IO bandwidth on networks with high bandwidth-delay products, where a large tcp window is required. It also simplifies performance tuning, since getting adequate tcp buffers previously required increasing the number of nfsd threads. Signed-off-by: Olga Kornievskaia Cc: Jim Rees Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index a4fafcbc6ea0..213dea8b283c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -436,7 +436,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, lock_sock(sock->sk); sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; - sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; sock->sk->sk_write_space(sock->sk); release_sock(sock->sk); #endif @@ -973,23 +972,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) unsigned int want; int len; - if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) - /* sndbuf needs to have room for one request - * per thread, otherwise we can stall even when the - * network isn't a bottleneck. - * - * We count all threads rather than threads in a - * particular pool, which provides an upper bound - * on the number of threads which will access the socket. - * - * rcvbuf just needs to be able to hold a few requests. - * Normally they will be removed from the queue - * as soon a a complete request arrives. - */ - svc_sock_setbufsize(svsk->sk_sock, - (serv->sv_nrthreads+3) * serv->sv_max_mesg, - 3 * serv->sv_max_mesg); - clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { @@ -1367,15 +1349,6 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; - /* initialise setting must have enough space to - * receive and respond to one request. - * svc_tcp_recvfrom will re-adjust if necessary - */ - svc_sock_setbufsize(svsk->sk_sock, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); - - set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (sk->sk_state != TCP_ESTABLISHED) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1439,8 +1412,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Initialize the socket */ if (sock->type == SOCK_DGRAM) svc_udp_init(svsk, serv); - else + else { + /* initialise setting must have enough space to + * receive and respond to one request. + */ + svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg, + 4 * serv->sv_max_mesg); svc_tcp_init(svsk, serv); + } dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); From 466de9183570fe9fd21ef167951488fc9d513fcb Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sat, 19 Mar 2011 04:26:10 +0000 Subject: [PATCH 0048/3380] kconfig: Avoid buffer underrun in choice input commit 40aee729b350672c2550640622416a855e27938f ('kconfig: fix default value for choice input') fixed some cases where kconfig would select the wrong option from a choice with a single valid option and thus enter an infinite loop. However, this broke the test for user input of the form 'N?', because when kconfig selects the single valid option the input is zero-length and the test will read the byte before the input buffer. If this happens to contain '?' (as it will in a mips build on Debian unstable today) then kconfig again enters an infinite loop. Signed-off-by: Ben Hutchings Cc: stable@kernel.org [2.6.17+] Signed-off-by: Michal Marek --- scripts/kconfig/conf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/kconfig/conf.c b/scripts/kconfig/conf.c index 659326c3e895..006ad817cd5f 100644 --- a/scripts/kconfig/conf.c +++ b/scripts/kconfig/conf.c @@ -332,7 +332,7 @@ static int conf_choice(struct menu *menu) } if (!child) continue; - if (line[strlen(line) - 1] == '?') { + if (line[0] && line[strlen(line) - 1] == '?') { print_help(child); continue; } From 8985ef0b8af895c3b85a8c1b7108e0169fcbd20b Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 9 Apr 2011 10:03:10 -0400 Subject: [PATCH 0049/3380] svcrpc: complete svsk processing on cb receive failure Currently when there's some failure to receive a callback (because we couldn't find a matching xid, for example), we exit svc_recv with sk_tcplen still set but without any pages saved with the socket. This will cause a crash later in svc_tcp_restore_pages. Instead, make sure we reset that tcp information whether the callback received failed or succeeded. Signed-off-by: J. Bruce Fields --- net/sunrpc/svcsock.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 213dea8b283c..af04f779ce9f 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1143,11 +1143,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) p = (__be32 *)rqstp->rq_arg.head[0].iov_base; calldir = p[1]; - if (calldir) { + if (calldir) len = receive_cb_reply(svsk, rqstp); - if (len < 0) - goto error; - } /* Reset TCP read info */ svsk->sk_reclen = 0; @@ -1156,6 +1153,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + if (len < 0) + goto error; svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); if (serv->sv_stats) From aea93397db4b39c9d15443a0e7cc9a380ba990c6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sun, 10 Apr 2011 10:35:12 -0400 Subject: [PATCH 0050/3380] nfsd: distinguish functions of NFSD_MAY_* flags Most of the NFSD_MAY_* flags actually request permissions, but over the years we've accreted a few that modify the behavior of the permission or open code in other ways. Distinguish the two cases a little more. In particular, allow the shortcut at the start of nfsd_permission to ignore the non-permission-requesting bits. Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 2 +- fs/nfsd/vfs.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 2e1cebde90df..a76ef7e0b3d0 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -2027,7 +2027,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, struct inode *inode = dentry->d_inode; int err; - if (acc == NFSD_MAY_NOP) + if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP) return 0; #if 0 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 9a370a5e36b7..1036913e6e86 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -17,6 +17,9 @@ #define NFSD_MAY_SATTR 8 #define NFSD_MAY_TRUNC 16 #define NFSD_MAY_LOCK 32 +#define NFSD_MAY_MASK 63 + +/* extra hints to permission and open routines: */ #define NFSD_MAY_OWNER_OVERRIDE 64 #define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 From 204f4ce75434c3453907813f8a819d4cf2a5728f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 8 Apr 2011 16:32:54 -0400 Subject: [PATCH 0051/3380] nfsd4: allow fh_verify caller to skip pseudoflavor checks Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsfh.c | 2 +- fs/nfsd/vfs.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 55c8e63af0be..90c6aa6d5e0f 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -344,7 +344,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) * which clients virtually always use auth_sys for, * even while using RPCSEC_GSS for NFS. */ - if (access & NFSD_MAY_LOCK) + if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS) goto skip_pseudoflavor_check; /* * Clients may expect to be able to use auth_sys during mount, diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 1036913e6e86..4d2509f766d4 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -24,6 +24,7 @@ #define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 #define NFSD_MAY_NOT_BREAK_LEASE 512 +#define NFSD_MAY_BYPASS_GSS 1024 #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) From 22b03214962ec2a9748abc9987fc2e66dec4626d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 9 Apr 2011 11:23:24 -0400 Subject: [PATCH 0052/3380] nfsd4: introduce OPDESC helper Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5fcb1396a7e3..126b8f75b576 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1031,6 +1031,11 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) return nfs_ok; } +static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) +{ + return &nfsd4_ops[op->opnum]; +} + /* * COMPOUND call. */ @@ -1108,7 +1113,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, goto encode_op; } - opdesc = &nfsd4_ops[op->opnum]; + opdesc = OPDESC(op); if (!cstate->current_fh.fh_dentry) { if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { From 29a78a3ed7fc9c4ee49962751eb321b038c190a2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 9 Apr 2011 11:28:53 -0400 Subject: [PATCH 0053/3380] nfsd4: make fh_verify responsibility of nfsd_lookup_dentry caller The secinfo caller actually won't want this. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 3 +++ fs/nfsd/vfs.c | 9 +++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 126b8f75b576..8059adae013b 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -762,6 +762,9 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 err; fh_init(&resfh, NFS4_FHSIZE); + err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC); + if (err) + return err; err = nfsd_lookup_dentry(rqstp, &cstate->current_fh, secinfo->si_name, secinfo->si_namelen, &exp, &dentry); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a76ef7e0b3d0..e53313972c30 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -181,16 +181,10 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, struct svc_export *exp; struct dentry *dparent; struct dentry *dentry; - __be32 err; int host_err; dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); - /* Obtain dentry and export. */ - err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); - if (err) - return err; - dparent = fhp->fh_dentry; exp = fhp->fh_export; exp_get(exp); @@ -254,6 +248,9 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, struct dentry *dentry; __be32 err; + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); + if (err) + return err; err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); if (err) return err; From 0d60b281dc7fd2ae7ec6463f916138fd2d182bee Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Mon, 11 Apr 2011 07:22:58 +0000 Subject: [PATCH 0054/3380] video: s3c-fb: make runtime pm functions static This patch makes runtime pm functions static. Signed-off-by: Jingoo Han Signed-off-by: Paul Mundt --- drivers/video/s3c-fb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/s3c-fb.c b/drivers/video/s3c-fb.c index 3b6cdcac8f1a..2d075f0684d9 100644 --- a/drivers/video/s3c-fb.c +++ b/drivers/video/s3c-fb.c @@ -1549,7 +1549,7 @@ static int s3c_fb_resume(struct device *dev) return 0; } -int s3c_fb_runtime_suspend(struct device *dev) +static int s3c_fb_runtime_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct s3c_fb *sfb = platform_get_drvdata(pdev); @@ -1569,7 +1569,7 @@ int s3c_fb_runtime_suspend(struct device *dev) return 0; } -int s3c_fb_runtime_resume(struct device *dev) +static int s3c_fb_runtime_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct s3c_fb *sfb = platform_get_drvdata(pdev); From b07f3bbee12163a6b48991138a37b87a1126462a Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Mon, 11 Apr 2011 07:25:37 +0000 Subject: [PATCH 0055/3380] video: s3c-fb: add spinlock to interrupt routine The spinlock is added to interrupt routine to ensure that the driver is protected against multiple accesses. Signed-off-by: Jingoo Han Signed-off-by: Paul Mundt --- drivers/video/s3c-fb.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/video/s3c-fb.c b/drivers/video/s3c-fb.c index 2d075f0684d9..3fa7911ac906 100644 --- a/drivers/video/s3c-fb.c +++ b/drivers/video/s3c-fb.c @@ -182,6 +182,7 @@ struct s3c_fb_vsync { /** * struct s3c_fb - overall hardware state of the hardware + * @slock: The spinlock protection for this data sturcture. * @dev: The device that we bound to, for printing, etc. * @regs_res: The resource we claimed for the IO registers. * @bus_clk: The clk (hclk) feeding our interface and possibly pixclk. @@ -195,6 +196,7 @@ struct s3c_fb_vsync { * @vsync_info: VSYNC-related information (count, queues...) */ struct s3c_fb { + spinlock_t slock; struct device *dev; struct resource *regs_res; struct clk *bus_clk; @@ -947,6 +949,8 @@ static irqreturn_t s3c_fb_irq(int irq, void *dev_id) void __iomem *regs = sfb->regs; u32 irq_sts_reg; + spin_lock(&sfb->slock); + irq_sts_reg = readl(regs + VIDINTCON1); if (irq_sts_reg & VIDINTCON1_INT_FRAME) { @@ -963,6 +967,7 @@ static irqreturn_t s3c_fb_irq(int irq, void *dev_id) */ s3c_fb_disable_irq(sfb); + spin_unlock(&sfb->slock); return IRQ_HANDLED; } @@ -1339,6 +1344,8 @@ static int __devinit s3c_fb_probe(struct platform_device *pdev) sfb->pdata = pd; sfb->variant = fbdrv->variant; + spin_lock_init(&sfb->slock); + sfb->bus_clk = clk_get(dev, "lcd"); if (IS_ERR(sfb->bus_clk)) { dev_err(dev, "failed to get bus clock\n"); From 92a47674f57b4a84a43ce93b0dfdb596c0543749 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 11 Apr 2011 23:34:37 -0700 Subject: [PATCH 0056/3380] Input: gpio_keys - add support for EV_ABS With this patch you can setup a group of GPIOs representing a specific position on an EV_ABS axis. Signed-off-by: Alexander Stein Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/gpio_keys.c | 9 +++++++-- include/linux/gpio_keys.h | 7 ++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index eb3006361ee4..73e58a96ab99 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -324,7 +324,12 @@ static void gpio_keys_report_event(struct gpio_button_data *bdata) unsigned int type = button->type ?: EV_KEY; int state = (gpio_get_value_cansleep(button->gpio) ? 1 : 0) ^ button->active_low; - input_event(input, type, button->code, !!state); + if (type == EV_ABS) { + if (state) + input_event(input, type, button->code, button->value); + } else { + input_event(input, type, button->code, !!state); + } input_sync(input); } @@ -363,7 +368,7 @@ static int __devinit gpio_keys_setup_key(struct platform_device *pdev, struct gpio_button_data *bdata, struct gpio_keys_button *button) { - char *desc = button->desc ? button->desc : "gpio_keys"; + const char *desc = button->desc ? button->desc : "gpio_keys"; struct device *dev = &pdev->dev; unsigned long irqflags; int irq, error; diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index dd1a56fbe924..3204edfe6b19 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -3,14 +3,15 @@ struct gpio_keys_button { /* Configuration parameters */ - int code; /* input event code (KEY_*, SW_*) */ + unsigned int code; /* input event code (KEY_*, SW_*) */ int gpio; int active_low; - char *desc; - int type; /* input event type (EV_KEY, EV_SW) */ + const char *desc; + unsigned int type; /* input event type (EV_KEY, EV_SW, EV_ABS) */ int wakeup; /* configure the button as a wake-up source */ int debounce_interval; /* debounce ticks interval in msecs */ bool can_disable; + int value; /* axis value for EV_ABS */ }; struct gpio_keys_platform_data { From 467112777c462a592c27338eeea5d1a320e82b5f Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 11 Apr 2011 23:34:48 -0700 Subject: [PATCH 0057/3380] Input: gpio-keys - add support for setting device name This patch allows to set a device name which helps distinguishing several gpio-keys devices. Signed-off-by: Alexander Stein Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/gpio_keys.c | 2 +- include/linux/gpio_keys.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 73e58a96ab99..6e6145b9a4c1 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -473,7 +473,7 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ddata); input_set_drvdata(input, ddata); - input->name = pdev->name; + input->name = pdata->name ? : pdev->name; input->phys = "gpio-keys/input0"; input->dev.parent = &pdev->dev; input->open = gpio_keys_open; diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h index 3204edfe6b19..b5ca4b2c08ec 100644 --- a/include/linux/gpio_keys.h +++ b/include/linux/gpio_keys.h @@ -22,6 +22,7 @@ struct gpio_keys_platform_data { unsigned int rep:1; /* enable input subsystem auto repeat */ int (*enable)(struct device *dev); void (*disable)(struct device *dev); + const char *name; /* input device name */ }; #endif From 4203306506ebe4eaaa84a2cbd7c1eb2fc0128faa Mon Sep 17 00:00:00 2001 From: Zhang Jiejing Date: Mon, 11 Apr 2011 23:48:23 -0700 Subject: [PATCH 0058/3380] Input: add driver for Maxim max11801 touchscreen controller Add MAXI max11801 resistive touchscreen controller driver. This driver uses Auto Mode and Aperture Mode. Support for other max1180x devices can be added to this driver as well, as they use almost the same register addresses and codes. You can find data sheet here: http://www.maxim-ic.com/datasheet/index.mvp/id/5943 Signed-off-by: Zhang Jiejing Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/Kconfig | 12 ++ drivers/input/touchscreen/Makefile | 1 + drivers/input/touchscreen/max11801_ts.c | 272 ++++++++++++++++++++++++ 3 files changed, 285 insertions(+) create mode 100644 drivers/input/touchscreen/max11801_ts.c diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig index 112ec55f2939..6b2d441a7dde 100644 --- a/drivers/input/touchscreen/Kconfig +++ b/drivers/input/touchscreen/Kconfig @@ -248,6 +248,18 @@ config TOUCHSCREEN_LPC32XX To compile this driver as a module, choose M here: the module will be called lpc32xx_ts. +config TOUCHSCREEN_MAX11801 + tristate "MAX11801 based touchscreens" + depends on I2C + help + Say Y here if you have a MAX11801 based touchscreen + controller. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called max11801_ts. + config TOUCHSCREEN_MCS5000 tristate "MELFAS MCS-5000 touchscreen" depends on I2C diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile index ca94098d4c92..282d6f76ae26 100644 --- a/drivers/input/touchscreen/Makefile +++ b/drivers/input/touchscreen/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_TOUCHSCREEN_FUJITSU) += fujitsu_ts.o obj-$(CONFIG_TOUCHSCREEN_INEXIO) += inexio.o obj-$(CONFIG_TOUCHSCREEN_INTEL_MID) += intel-mid-touch.o obj-$(CONFIG_TOUCHSCREEN_LPC32XX) += lpc32xx_ts.o +obj-$(CONFIG_TOUCHSCREEN_MAX11801) += max11801_ts.o obj-$(CONFIG_TOUCHSCREEN_MC13783) += mc13783_ts.o obj-$(CONFIG_TOUCHSCREEN_MCS5000) += mcs5000_ts.o obj-$(CONFIG_TOUCHSCREEN_MIGOR) += migor_ts.o diff --git a/drivers/input/touchscreen/max11801_ts.c b/drivers/input/touchscreen/max11801_ts.c new file mode 100644 index 000000000000..4f2713d92791 --- /dev/null +++ b/drivers/input/touchscreen/max11801_ts.c @@ -0,0 +1,272 @@ +/* + * Driver for MAXI MAX11801 - A Resistive touch screen controller with + * i2c interface + * + * Copyright (C) 2011 Freescale Semiconductor, Inc. + * Author: Zhang Jiejing + * + * Based on mcs5000_ts.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + */ + +/* + * This driver aims to support the series of MAXI touch chips max11801 + * through max11803. The main difference between these 4 chips can be + * found in the table below: + * ----------------------------------------------------- + * | CHIP | AUTO MODE SUPPORT(FIFO) | INTERFACE | + * |----------------------------------------------------| + * | max11800 | YES | SPI | + * | max11801 | YES | I2C | + * | max11802 | NO | SPI | + * | max11803 | NO | I2C | + * ------------------------------------------------------ + * + * Currently, this driver only supports max11801. + * + * Data Sheet: + * http://www.maxim-ic.com/datasheet/index.mvp/id/5943 + */ + +#include +#include +#include +#include +#include +#include +#include + +/* Register Address define */ +#define GENERNAL_STATUS_REG 0x00 +#define GENERNAL_CONF_REG 0x01 +#define MESURE_RES_CONF_REG 0x02 +#define MESURE_AVER_CONF_REG 0x03 +#define ADC_SAMPLE_TIME_CONF_REG 0x04 +#define PANEL_SETUPTIME_CONF_REG 0x05 +#define DELAY_CONVERSION_CONF_REG 0x06 +#define TOUCH_DETECT_PULLUP_CONF_REG 0x07 +#define AUTO_MODE_TIME_CONF_REG 0x08 /* only for max11800/max11801 */ +#define APERTURE_CONF_REG 0x09 /* only for max11800/max11801 */ +#define AUX_MESURE_CONF_REG 0x0a +#define OP_MODE_CONF_REG 0x0b + +/* FIFO is found only in max11800 and max11801 */ +#define FIFO_RD_CMD (0x50 << 1) +#define MAX11801_FIFO_INT (1 << 2) +#define MAX11801_FIFO_OVERFLOW (1 << 3) + +#define XY_BUFSIZE 4 +#define XY_BUF_OFFSET 4 + +#define MAX11801_MAX_X 0xfff +#define MAX11801_MAX_Y 0xfff + +#define MEASURE_TAG_OFFSET 2 +#define MEASURE_TAG_MASK (3 << MEASURE_TAG_OFFSET) +#define EVENT_TAG_OFFSET 0 +#define EVENT_TAG_MASK (3 << EVENT_TAG_OFFSET) +#define MEASURE_X_TAG (0 << MEASURE_TAG_OFFSET) +#define MEASURE_Y_TAG (1 << MEASURE_TAG_OFFSET) + +/* These are the state of touch event state machine */ +enum { + EVENT_INIT, + EVENT_MIDDLE, + EVENT_RELEASE, + EVENT_FIFO_END +}; + +struct max11801_data { + struct i2c_client *client; + struct input_dev *input_dev; +}; + +static u8 read_register(struct i2c_client *client, int addr) +{ + /* XXX: The chip ignores LSB of register address */ + return i2c_smbus_read_byte_data(client, addr << 1); +} + +static int max11801_write_reg(struct i2c_client *client, int addr, int data) +{ + /* XXX: The chip ignores LSB of register address */ + return i2c_smbus_write_byte_data(client, addr << 1, data); +} + +static irqreturn_t max11801_ts_interrupt(int irq, void *dev_id) +{ + struct max11801_data *data = dev_id; + struct i2c_client *client = data->client; + int status, i, ret; + u8 buf[XY_BUFSIZE]; + int x = -1; + int y = -1; + + status = read_register(data->client, GENERNAL_STATUS_REG); + + if (status & (MAX11801_FIFO_INT | MAX11801_FIFO_OVERFLOW)) { + status = read_register(data->client, GENERNAL_STATUS_REG); + + ret = i2c_smbus_read_i2c_block_data(client, FIFO_RD_CMD, + XY_BUFSIZE, buf); + + /* + * We should get 4 bytes buffer that contains X,Y + * and event tag + */ + if (ret < XY_BUFSIZE) + goto out; + + for (i = 0; i < XY_BUFSIZE; i += XY_BUFSIZE / 2) { + if ((buf[i + 1] & MEASURE_TAG_MASK) == MEASURE_X_TAG) + x = (buf[i] << XY_BUF_OFFSET) + + (buf[i + 1] >> XY_BUF_OFFSET); + else if ((buf[i + 1] & MEASURE_TAG_MASK) == MEASURE_Y_TAG) + y = (buf[i] << XY_BUF_OFFSET) + + (buf[i + 1] >> XY_BUF_OFFSET); + } + + if ((buf[1] & EVENT_TAG_MASK) != (buf[3] & EVENT_TAG_MASK)) + goto out; + + switch (buf[1] & EVENT_TAG_MASK) { + case EVENT_INIT: + /* fall through */ + case EVENT_MIDDLE: + input_report_abs(data->input_dev, ABS_X, x); + input_report_abs(data->input_dev, ABS_Y, y); + input_event(data->input_dev, EV_KEY, BTN_TOUCH, 1); + input_sync(data->input_dev); + break; + + case EVENT_RELEASE: + input_event(data->input_dev, EV_KEY, BTN_TOUCH, 0); + input_sync(data->input_dev); + break; + + case EVENT_FIFO_END: + break; + } + } +out: + return IRQ_HANDLED; +} + +static void __devinit max11801_ts_phy_init(struct max11801_data *data) +{ + struct i2c_client *client = data->client; + + /* Average X,Y, take 16 samples, average eight media sample */ + max11801_write_reg(client, MESURE_AVER_CONF_REG, 0xff); + /* X,Y panel setup time set to 20us */ + max11801_write_reg(client, PANEL_SETUPTIME_CONF_REG, 0x11); + /* Rough pullup time (2uS), Fine pullup time (10us) */ + max11801_write_reg(client, TOUCH_DETECT_PULLUP_CONF_REG, 0x10); + /* Auto mode init period = 5ms , scan period = 5ms*/ + max11801_write_reg(client, AUTO_MODE_TIME_CONF_REG, 0xaa); + /* Aperture X,Y set to +- 4LSB */ + max11801_write_reg(client, APERTURE_CONF_REG, 0x33); + /* Enable Power, enable Automode, enable Aperture, enable Average X,Y */ + max11801_write_reg(client, OP_MODE_CONF_REG, 0x36); +} + +static int __devinit max11801_ts_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct max11801_data *data; + struct input_dev *input_dev; + int error; + + data = kzalloc(sizeof(struct max11801_data), GFP_KERNEL); + input_dev = input_allocate_device(); + if (!data || !input_dev) { + dev_err(&client->dev, "Failed to allocate memory\n"); + error = -ENOMEM; + goto err_free_mem; + } + + data->client = client; + data->input_dev = input_dev; + + input_dev->name = "max11801_ts"; + input_dev->id.bustype = BUS_I2C; + input_dev->dev.parent = &client->dev; + + __set_bit(EV_ABS, input_dev->evbit); + __set_bit(EV_KEY, input_dev->evbit); + __set_bit(BTN_TOUCH, input_dev->keybit); + input_set_abs_params(input_dev, ABS_X, 0, MAX11801_MAX_X, 0, 0); + input_set_abs_params(input_dev, ABS_Y, 0, MAX11801_MAX_Y, 0, 0); + input_set_drvdata(input_dev, data); + + max11801_ts_phy_init(data); + + error = request_threaded_irq(client->irq, NULL, max11801_ts_interrupt, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + "max11801_ts", data); + if (error) { + dev_err(&client->dev, "Failed to register interrupt\n"); + goto err_free_mem; + } + + error = input_register_device(data->input_dev); + if (error) + goto err_free_irq; + + i2c_set_clientdata(client, data); + return 0; + +err_free_irq: + free_irq(client->irq, data); +err_free_mem: + input_free_device(input_dev); + kfree(data); + return error; +} + +static __devexit int max11801_ts_remove(struct i2c_client *client) +{ + struct max11801_data *data = i2c_get_clientdata(client); + + free_irq(client->irq, data); + input_unregister_device(data->input_dev); + kfree(data); + + return 0; +} + +static const struct i2c_device_id max11801_ts_id[] = { + {"max11801", 0}, + { } +}; +MODULE_DEVICE_TABLE(i2c, max11801_ts_id); + +static struct i2c_driver max11801_ts_driver = { + .driver = { + .name = "max11801_ts", + .owner = THIS_MODULE, + }, + .id_table = max11801_ts_id, + .probe = max11801_ts_probe, + .remove = __devexit_p(max11801_ts_remove), +}; + +static int __init max11801_ts_init(void) +{ + return i2c_add_driver(&max11801_ts_driver); +} + +static void __exit max11801_ts_exit(void) +{ + i2c_del_driver(&max11801_ts_driver); +} + +module_init(max11801_ts_init); +module_exit(max11801_ts_exit); + +MODULE_AUTHOR("Zhang Jiejing "); +MODULE_DESCRIPTION("Touchscreen driver for MAXI MAX11801 controller"); +MODULE_LICENSE("GPL"); From 910d80513056589d3b12b3aad8598d19e0a0a5bd Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Tue, 12 Apr 2011 23:14:38 -0700 Subject: [PATCH 0059/3380] Input: atmel_mxt_ts - support 12bit resolution Atmel touchscreen chip can support 12bit resolution and this patch modifies to get maximum x and y size from platform data. Signed-off-by: Joonyoung Shim Acked-by: Iiro Valkonen Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 53 +++++++++++++++++------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 4012436633b1..a97905a17b72 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -196,9 +196,12 @@ #define MXT_PRESS (1 << 6) #define MXT_DETECT (1 << 7) +/* Touch orient bits */ +#define MXT_XY_SWITCH (1 << 0) +#define MXT_X_INVERT (1 << 1) +#define MXT_Y_INVERT (1 << 2) + /* Touchscreen absolute values */ -#define MXT_MAX_XC 0x3ff -#define MXT_MAX_YC 0x3ff #define MXT_MAX_AREA 0xff #define MXT_MAX_FINGER 10 @@ -246,6 +249,8 @@ struct mxt_data { struct mxt_info info; struct mxt_finger finger[MXT_MAX_FINGER]; unsigned int irq; + unsigned int max_x; + unsigned int max_y; }; static bool mxt_object_readable(unsigned int type) @@ -549,8 +554,13 @@ static void mxt_input_touchevent(struct mxt_data *data, if (!(status & (MXT_PRESS | MXT_MOVE))) return; - x = (message->message[1] << 2) | ((message->message[3] & ~0x3f) >> 6); - y = (message->message[2] << 2) | ((message->message[3] & ~0xf3) >> 2); + x = (message->message[1] << 4) | ((message->message[3] >> 4) & 0xf); + y = (message->message[2] << 4) | ((message->message[3] & 0xf)); + if (data->max_x < 1024) + x = x >> 2; + if (data->max_y < 1024) + y = y >> 2; + area = message->message[4]; dev_dbg(dev, "[%d] %s x: %d, y: %d, area: %d\n", id, @@ -845,6 +855,20 @@ static int mxt_initialize(struct mxt_data *data) return 0; } +static void mxt_calc_resolution(struct mxt_data *data) +{ + unsigned int max_x = data->pdata->x_size - 1; + unsigned int max_y = data->pdata->y_size - 1; + + if (data->pdata->orient & MXT_XY_SWITCH) { + data->max_x = max_y; + data->max_y = max_x; + } else { + data->max_x = max_x; + data->max_y = max_y; + } +} + static ssize_t mxt_object_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1052,31 +1076,32 @@ static int __devinit mxt_probe(struct i2c_client *client, input_dev->open = mxt_input_open; input_dev->close = mxt_input_close; + data->client = client; + data->input_dev = input_dev; + data->pdata = pdata; + data->irq = client->irq; + + mxt_calc_resolution(data); + __set_bit(EV_ABS, input_dev->evbit); __set_bit(EV_KEY, input_dev->evbit); __set_bit(BTN_TOUCH, input_dev->keybit); /* For single touch */ input_set_abs_params(input_dev, ABS_X, - 0, MXT_MAX_XC, 0, 0); + 0, data->max_x, 0, 0); input_set_abs_params(input_dev, ABS_Y, - 0, MXT_MAX_YC, 0, 0); + 0, data->max_y, 0, 0); /* For multi touch */ input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, MXT_MAX_AREA, 0, 0); input_set_abs_params(input_dev, ABS_MT_POSITION_X, - 0, MXT_MAX_XC, 0, 0); + 0, data->max_x, 0, 0); input_set_abs_params(input_dev, ABS_MT_POSITION_Y, - 0, MXT_MAX_YC, 0, 0); + 0, data->max_y, 0, 0); input_set_drvdata(input_dev, data); - - data->client = client; - data->input_dev = input_dev; - data->pdata = pdata; - data->irq = client->irq; - i2c_set_clientdata(client, data); error = mxt_initialize(data); From 08960a070add74cda8c968b8ace5418a5acf17c3 Mon Sep 17 00:00:00 2001 From: Iiro Valkonen Date: Tue, 12 Apr 2011 23:16:40 -0700 Subject: [PATCH 0060/3380] Input: atmel_mxt_ts - make CHG line high after enabling interrupts Make the CHG line (interrupt line) go high after the interrupts have been enabled to make sure we don't miss the falling edge. Signed-off-by: Iiro Valkonen Acked-by: Joonyoung Shim Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index a97905a17b72..2accf1dffee9 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -814,10 +814,6 @@ static int mxt_initialize(struct mxt_data *data) if (error) return error; - error = mxt_make_highchg(data); - if (error) - return error; - mxt_handle_pdata(data); /* Backup to memory */ @@ -1005,6 +1001,10 @@ static ssize_t mxt_update_fw_store(struct device *dev, enable_irq(data->irq); + error = mxt_make_highchg(data); + if (error) + return error; + return count; } @@ -1115,6 +1115,10 @@ static int __devinit mxt_probe(struct i2c_client *client, goto err_free_object; } + error = mxt_make_highchg(data); + if (error) + goto err_free_irq; + error = input_register_device(input_dev); if (error) goto err_free_irq; From 8b86c1c28f569301aa1a113a060f9ed803300903 Mon Sep 17 00:00:00 2001 From: Joonyoung Shim Date: Tue, 12 Apr 2011 23:18:59 -0700 Subject: [PATCH 0061/3380] Input: atmel_mxt_ts - convert to MT protocol B Atmel touchscreen chips can use MT protocol B because they can assign unique id to ABS_MT_TRACKING_ID from finger id provided by hardware. Signed-off-by: Joonyoung Shim Reviewed-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 27 +++++++++++++----------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 2accf1dffee9..1e61387c73ca 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include @@ -504,19 +504,21 @@ static void mxt_input_report(struct mxt_data *data, int single_id) if (!finger[id].status) continue; - input_report_abs(input_dev, ABS_MT_TOUCH_MAJOR, - finger[id].status != MXT_RELEASE ? - finger[id].area : 0); - input_report_abs(input_dev, ABS_MT_POSITION_X, - finger[id].x); - input_report_abs(input_dev, ABS_MT_POSITION_Y, - finger[id].y); - input_mt_sync(input_dev); + input_mt_slot(input_dev, id); + input_mt_report_slot_state(input_dev, MT_TOOL_FINGER, + finger[id].status != MXT_RELEASE); - if (finger[id].status == MXT_RELEASE) - finger[id].status = 0; - else + if (finger[id].status != MXT_RELEASE) { finger_num++; + input_report_abs(input_dev, ABS_MT_TOUCH_MAJOR, + finger[id].area); + input_report_abs(input_dev, ABS_MT_POSITION_X, + finger[id].x); + input_report_abs(input_dev, ABS_MT_POSITION_Y, + finger[id].y); + } else { + finger[id].status = 0; + } } input_report_key(input_dev, BTN_TOUCH, finger_num > 0); @@ -1094,6 +1096,7 @@ static int __devinit mxt_probe(struct i2c_client *client, 0, data->max_y, 0, 0); /* For multi touch */ + input_mt_init_slots(input_dev, MXT_MAX_FINGER); input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, MXT_MAX_AREA, 0, 0); input_set_abs_params(input_dev, ABS_MT_POSITION_X, From e10b376e98332edcc2530aaed384a7e248477052 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 21 Mar 2011 16:50:18 +0200 Subject: [PATCH 0062/3380] UBI: make the control character device non-seekable This patch makes the UBI control device (/dev/ubi_ctrl) non-seekable. The seek operation does is not applicable to this file, so it is cleaner to explicitly return error (which the added 'no_llseek()') does than trying to change the position (which the removed 'default_llseek()' does). This is an API break, but the only known user of this interface is mtd-utils which does not need the seeking functionality. And any app which relies on this is broken, but I'm not aware of such apps. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/cdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index af9fb0ff8210..9a1703286411 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -1100,5 +1100,5 @@ const struct file_operations ubi_ctrl_cdev_operations = { .owner = THIS_MODULE, .unlocked_ioctl = ctrl_cdev_ioctl, .compat_ioctl = ctrl_cdev_compat_ioctl, - .llseek = noop_llseek, + .llseek = no_llseek, }; From 6748482f4153fc0e095aa3dc831d5edac5656a80 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 15 Mar 2011 16:25:38 +0200 Subject: [PATCH 0063/3380] UBI: re-name set volume properties ioctl Rename the ioctl which sets volume properties from 'UBI_IOCSETPROP' to 'UBI_IOCSETVOLPROP' to reflect the fact that this ioctl is about volume properties, not device properties. This is also consistent with the other volume ioctl name - 'UBI_IOCVOLUP'. The main motivation for the re-name, however, is that we are going to introduce the per-UBI device "set properties" ioctl, so we need good and logical naming. At the same time, re-name the "set volume properties request" data structure from 'struct ubi_set_prop_req' to 'struct ubi_set_vol_prop_req'. And re-name 'UBI_PROP_DIRECT_WRITE' to 'UBI_VOL_PROP_DIRECT_WRITE'. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/cdev.c | 8 ++++---- include/mtd/ubi-user.h | 19 ++++++++++--------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 9a1703286411..4119cb857c97 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -561,18 +561,18 @@ static long vol_cdev_ioctl(struct file *file, unsigned int cmd, } /* Set volume property command */ - case UBI_IOCSETPROP: + case UBI_IOCSETVOLPROP: { - struct ubi_set_prop_req req; + struct ubi_set_vol_prop_req req; err = copy_from_user(&req, argp, - sizeof(struct ubi_set_prop_req)); + sizeof(struct ubi_set_vol_prop_req)); if (err) { err = -EFAULT; break; } switch (req.property) { - case UBI_PROP_DIRECT_WRITE: + case UBI_VOL_PROP_DIRECT_WRITE: mutex_lock(&ubi->device_mutex); desc->vol->direct_writes = !!req.value; mutex_unlock(&ubi->device_mutex); diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h index c0d47ad4b103..8d8484b1ed46 100644 --- a/include/mtd/ubi-user.h +++ b/include/mtd/ubi-user.h @@ -131,7 +131,7 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~ * * To set an UBI volume property the %UBI_IOCSETPROP ioctl command should be - * used. A pointer to a &struct ubi_set_prop_req object is expected to be + * used. A pointer to a &struct ubi_set_vol_prop_req object is expected to be * passed. The object describes which property should be set, and to which value * it should be set. */ @@ -186,7 +186,8 @@ /* Check if LEB is mapped command */ #define UBI_IOCEBISMAP _IOR(UBI_VOL_IOC_MAGIC, 5, __s32) /* Set an UBI volume property */ -#define UBI_IOCSETPROP _IOW(UBI_VOL_IOC_MAGIC, 6, struct ubi_set_prop_req) +#define UBI_IOCSETVOLPROP _IOW(UBI_VOL_IOC_MAGIC, 6, \ + struct ubi_set_vol_prop_req) /* Maximum MTD device name length supported by UBI */ #define MAX_UBI_MTD_NAME_LEN 127 @@ -225,11 +226,11 @@ enum { /* * UBI set property ioctl constants * - * @UBI_PROP_DIRECT_WRITE: allow / disallow user to directly write and - * erase individual eraseblocks on dynamic volumes + * @UBI_VOL_PROP_DIRECT_WRITE: allow / disallow user to directly write and + * erase individual eraseblocks on dynamic volumes */ enum { - UBI_PROP_DIRECT_WRITE = 1, + UBI_VOL_PROP_DIRECT_WRITE = 1, }; /** @@ -397,13 +398,13 @@ struct ubi_map_req { /** - * struct ubi_set_prop_req - a data structure used to set an ubi volume - * property. - * @property: property to set (%UBI_PROP_DIRECT_WRITE) + * struct ubi_set_vol_prop_req - a data structure used to set an ubi volume + * property. + * @property: property to set (%UBI_VOL_PROP_DIRECT_WRITE) * @padding: reserved for future, not used, has to be zeroed * @value: value to set */ -struct ubi_set_prop_req { +struct ubi_set_vol_prop_req { __u8 property; __u8 padding[7]; __u64 value; From e8e088de305d7cc00b2c8b2a857ceb62d5fa68d3 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 15 Mar 2011 16:37:57 +0200 Subject: [PATCH 0064/3380] UBI: cleanup comments around volume properties Cleanup and improve commentaries around the "set volume properties" ioctl, make a simple indentation fix as well. Signed-off-by: Artem Bityutskiy --- include/mtd/ubi-user.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h index 8d8484b1ed46..e70bd347dbbb 100644 --- a/include/mtd/ubi-user.h +++ b/include/mtd/ubi-user.h @@ -224,13 +224,14 @@ enum { }; /* - * UBI set property ioctl constants + * UBI set volume property ioctl constants. * - * @UBI_VOL_PROP_DIRECT_WRITE: allow / disallow user to directly write and - * erase individual eraseblocks on dynamic volumes + * @UBI_VOL_PROP_DIRECT_WRITE: allow (any non-zero value) or disallow (value 0) + * user to directly write and erase individual + * eraseblocks on dynamic volumes */ enum { - UBI_VOL_PROP_DIRECT_WRITE = 1, + UBI_VOL_PROP_DIRECT_WRITE = 1, }; /** @@ -398,7 +399,7 @@ struct ubi_map_req { /** - * struct ubi_set_vol_prop_req - a data structure used to set an ubi volume + * struct ubi_set_vol_prop_req - a data structure used to set an UBI volume * property. * @property: property to set (%UBI_VOL_PROP_DIRECT_WRITE) * @padding: reserved for future, not used, has to be zeroed From 3627924acf70a9a26587712e4888ee7144489678 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 28 Mar 2011 10:04:09 +0300 Subject: [PATCH 0065/3380] UBI: use __packed instead of __attribute__((packed)) There was an attempt to standartize various "__attribute__" and other macros in order to have potentially portable and more consistent code, see commit 82ddcb040570411fc2d421d96b3e69711c670328. Note, that commit refers Rober Love's blog post, but the URL is broken, the valid URL is: http://blog.rlove.org/2005/10/with-little-help-from-your-compiler.html Moreover, nowadays checkpatch.pl warns about using __attribute__((packed)): "WARNING: __packed is preferred over __attribute__((packed))" It is not a big deal for UBI to use __packed, so let's do it. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/ubi-media.h | 6 +++--- include/mtd/ubi-user.h | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/mtd/ubi/ubi-media.h b/drivers/mtd/ubi/ubi-media.h index 503ea9b27309..6fb8ec2174a5 100644 --- a/drivers/mtd/ubi/ubi-media.h +++ b/drivers/mtd/ubi/ubi-media.h @@ -164,7 +164,7 @@ struct ubi_ec_hdr { __be32 image_seq; __u8 padding2[32]; __be32 hdr_crc; -} __attribute__ ((packed)); +} __packed; /** * struct ubi_vid_hdr - on-flash UBI volume identifier header. @@ -292,7 +292,7 @@ struct ubi_vid_hdr { __be64 sqnum; __u8 padding3[12]; __be32 hdr_crc; -} __attribute__ ((packed)); +} __packed; /* Internal UBI volumes count */ #define UBI_INT_VOL_COUNT 1 @@ -373,6 +373,6 @@ struct ubi_vtbl_record { __u8 flags; __u8 padding[23]; __be32 crc; -} __attribute__ ((packed)); +} __packed; #endif /* !__UBI_MEDIA_H__ */ diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h index e70bd347dbbb..a3903423c005 100644 --- a/include/mtd/ubi-user.h +++ b/include/mtd/ubi-user.h @@ -310,7 +310,7 @@ struct ubi_mkvol_req { __s16 name_len; __s8 padding2[4]; char name[UBI_MAX_VOLUME_NAME + 1]; -} __attribute__ ((packed)); +} __packed; /** * struct ubi_rsvol_req - a data structure used in volume re-size requests. @@ -326,7 +326,7 @@ struct ubi_mkvol_req { struct ubi_rsvol_req { __s64 bytes; __s32 vol_id; -} __attribute__ ((packed)); +} __packed; /** * struct ubi_rnvol_req - volumes re-name request. @@ -368,7 +368,7 @@ struct ubi_rnvol_req { __s8 padding2[2]; char name[UBI_MAX_VOLUME_NAME + 1]; } ents[UBI_MAX_RNVOL]; -} __attribute__ ((packed)); +} __packed; /** * struct ubi_leb_change_req - a data structure used in atomic LEB change @@ -383,7 +383,7 @@ struct ubi_leb_change_req { __s32 bytes; __s8 dtype; __s8 padding[7]; -} __attribute__ ((packed)); +} __packed; /** * struct ubi_map_req - a data structure used in map LEB requests. @@ -395,7 +395,7 @@ struct ubi_map_req { __s32 lnum; __s8 dtype; __s8 padding[3]; -} __attribute__ ((packed)); +} __packed; /** @@ -409,6 +409,6 @@ struct ubi_set_vol_prop_req { __u8 property; __u8 padding[7]; __u64 value; -} __attribute__ ((packed)); +} __packed; #endif /* __UBI_USER_H__ */ From feddbb34ebd75e9b6bf573b852079e327a88c07a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 28 Mar 2011 10:12:25 +0300 Subject: [PATCH 0066/3380] UBI: fix minor stylistic issues Fix checkpatch.pl errors and warnings: * space before tab * line over 80 characters * include linux/ioctl.h instead of asm/ioctl.h Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/cdev.c | 4 ++-- drivers/mtd/ubi/debug.c | 18 +++++++++--------- drivers/mtd/ubi/io.c | 4 ++-- drivers/mtd/ubi/scan.c | 2 +- drivers/mtd/ubi/ubi.h | 4 ++-- drivers/mtd/ubi/wl.c | 3 ++- include/linux/mtd/ubi.h | 4 ++-- include/mtd/ubi-user.h | 6 +++--- 8 files changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 4119cb857c97..191f3bb3c41a 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -115,7 +115,7 @@ static int vol_cdev_open(struct inode *inode, struct file *file) mode = UBI_READONLY; dbg_gen("open device %d, volume %d, mode %d", - ubi_num, vol_id, mode); + ubi_num, vol_id, mode); desc = ubi_open_volume(ubi_num, vol_id, mode); if (IS_ERR(desc)) @@ -158,7 +158,7 @@ static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin) loff_t new_offset; if (vol->updating) { - /* Update is in progress, seeking is prohibited */ + /* Update is in progress, seeking is prohibited */ dbg_err("updating"); return -EBUSY; } diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c index d4d07e5f138f..0cd5beabe9c9 100644 --- a/drivers/mtd/ubi/debug.c +++ b/drivers/mtd/ubi/debug.c @@ -75,15 +75,15 @@ void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) { printk(KERN_DEBUG "Volume identifier header dump:\n"); printk(KERN_DEBUG "\tmagic %08x\n", be32_to_cpu(vid_hdr->magic)); - printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version); - printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type); - printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag); - printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat); - printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id)); - printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum)); - printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size)); - printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs)); - printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad)); + printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version); + printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type); + printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag); + printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat); + printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id)); + printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum)); + printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size)); + printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs)); + printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad)); printk(KERN_DEBUG "\tsqnum %llu\n", (unsigned long long)be64_to_cpu(vid_hdr->sqnum)); printk(KERN_DEBUG "\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc)); diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index e347cc4388ed..d58ceb1ca8fd 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -189,8 +189,8 @@ int ubi_io_read(const struct ubi_device *ubi, void *buf, int pnum, int offset, } if (retries++ < UBI_IO_RETRIES) { - dbg_io("error %d%s while reading %d bytes from PEB %d:%d," - " read only %zd bytes, retry", + dbg_io("error %d%s while reading %d bytes from PEB " + "%d:%d, read only %zd bytes, retry", err, errstr, len, pnum, offset, read); yield(); goto retry; diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c index d2d12ab7def4..2135a53732ff 100644 --- a/drivers/mtd/ubi/scan.c +++ b/drivers/mtd/ubi/scan.c @@ -1103,7 +1103,7 @@ static int check_what_we_have(struct ubi_device *ubi, struct ubi_scan_info *si) * otherwise, only print a warning. */ if (si->corr_peb_count >= max_corr) { - ubi_err("too many corrupted PEBs, refusing this device"); + ubi_err("too many corrupted PEBs, refusing"); return -EINVAL; } } diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h index f1be8b79663c..c6c22295898e 100644 --- a/drivers/mtd/ubi/ubi.h +++ b/drivers/mtd/ubi/ubi.h @@ -341,8 +341,8 @@ struct ubi_wl_entry; * protected from the wear-leveling worker) * @pq_head: protection queue head * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from, - * @move_to, @move_to_put @erase_pending, @wl_scheduled, @works, - * @erroneous, and @erroneous_peb_count fields + * @move_to, @move_to_put @erase_pending, @wl_scheduled, @works, + * @erroneous, and @erroneous_peb_count fields * @move_mutex: serializes eraseblock moves * @work_sem: synchronizes the WL worker with use tasks * @wl_scheduled: non-zero if the wear-leveling was scheduled diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index b4cf57db2556..ff2c4956eeff 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1570,7 +1570,8 @@ void ubi_wl_close(struct ubi_device *ubi) * @ec: the erase counter to check * * This function returns zero if the erase counter of physical eraseblock @pnum - * is equivalent to @ec, and a negative error code if not or if an error occurred. + * is equivalent to @ec, and a negative error code if not or if an error + * occurred. */ static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec) { diff --git a/include/linux/mtd/ubi.h b/include/linux/mtd/ubi.h index 84854edf4436..15da0e99f48a 100644 --- a/include/linux/mtd/ubi.h +++ b/include/linux/mtd/ubi.h @@ -21,7 +21,7 @@ #ifndef __LINUX_UBI_H__ #define __LINUX_UBI_H__ -#include +#include #include #include @@ -87,7 +87,7 @@ enum { * physical eraseblock size and on how much bytes UBI headers consume. But * because of the volume alignment (@alignment), the usable size of logical * eraseblocks if a volume may be less. The following equation is true: - * @usable_leb_size = LEB size - (LEB size mod @alignment), + * @usable_leb_size = LEB size - (LEB size mod @alignment), * where LEB size is the logical eraseblock size defined by the UBI device. * * The alignment is multiple to the minimal flash input/output unit size or %1 diff --git a/include/mtd/ubi-user.h b/include/mtd/ubi-user.h index a3903423c005..3c4109777aff 100644 --- a/include/mtd/ubi-user.h +++ b/include/mtd/ubi-user.h @@ -406,9 +406,9 @@ struct ubi_map_req { * @value: value to set */ struct ubi_set_vol_prop_req { - __u8 property; - __u8 padding[7]; - __u64 value; + __u8 property; + __u8 padding[7]; + __u64 value; } __packed; #endif /* __UBI_USER_H__ */ From 1426414431a8d37a6e631e0b5e2ad6186b81876a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 14 Apr 2011 11:36:31 +0300 Subject: [PATCH 0067/3380] UBI: fix typo in a message When a PEB passes the torture test, UBI prints "do not mark it a bad", but should print "do not mark it as bad". This patch corrects the typo. Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/io.c b/drivers/mtd/ubi/io.c index d58ceb1ca8fd..8c1b1c7bc4a7 100644 --- a/drivers/mtd/ubi/io.c +++ b/drivers/mtd/ubi/io.c @@ -465,7 +465,7 @@ static int torture_peb(struct ubi_device *ubi, int pnum) } err = patt_count; - ubi_msg("PEB %d passed torture test, do not mark it a bad", pnum); + ubi_msg("PEB %d passed torture test, do not mark it as bad", pnum); out: mutex_unlock(&ubi->buf_mutex); From 4d05a28db56225bbab5e1321d818f318e92a4657 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 18:25:47 -0400 Subject: [PATCH 0068/3380] xen: add blkback support Signed-off-by: Ian Campbell Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk Conflicts: drivers/xen/Makefile --- drivers/xen/Kconfig | 8 + drivers/xen/Makefile | 1 + drivers/xen/blkback/Makefile | 3 + drivers/xen/blkback/blkback.c | 656 ++++++++++++++++++++++++++++++++ drivers/xen/blkback/common.h | 139 +++++++ drivers/xen/blkback/interface.c | 181 +++++++++ drivers/xen/blkback/vbd.c | 118 ++++++ drivers/xen/blkback/xenbus.c | 541 ++++++++++++++++++++++++++ include/xen/blkif.h | 123 ++++++ 9 files changed, 1770 insertions(+) create mode 100644 drivers/xen/blkback/Makefile create mode 100644 drivers/xen/blkback/blkback.c create mode 100644 drivers/xen/blkback/common.h create mode 100644 drivers/xen/blkback/interface.c create mode 100644 drivers/xen/blkback/vbd.c create mode 100644 drivers/xen/blkback/xenbus.c create mode 100644 include/xen/blkif.h diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index a59638b37c1a..fb1af628cbfc 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -37,6 +37,14 @@ config XEN_BACKEND Support for backend device drivers that provide I/O services to other virtual machines. +config XEN_BLKDEV_BACKEND + tristate "Block-device backend driver" + depends on XEN_BACKEND && BLOCK + help + The block-device backend driver allows the kernel to export its + block devices to other guests via a high-performance shared-memory + interface. + config XENFS tristate "Xen filesystem" default y diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index f420f1ff7f13..29c0a416f082 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o +obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o diff --git a/drivers/xen/blkback/Makefile b/drivers/xen/blkback/Makefile new file mode 100644 index 000000000000..8bab63da3b3e --- /dev/null +++ b/drivers/xen/blkback/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_XEN_BLKDEV_BACKEND) := blkbk.o + +blkbk-y := blkback.o xenbus.o interface.o vbd.o diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c new file mode 100644 index 000000000000..5b8d50e344b4 --- /dev/null +++ b/drivers/xen/blkback/blkback.c @@ -0,0 +1,656 @@ +/****************************************************************************** + * arch/xen/drivers/blkif/backend/main.c + * + * Back-end of the driver for virtual block devices. This portion of the + * driver exports a 'unified' block-device interface that can be accessed + * by any operating system that implements a compatible front end. A + * reference front-end implementation can be found in: + * arch/xen/drivers/blkif/frontend + * + * Copyright (c) 2003-2004, Keir Fraser & Steve Hand + * Copyright (c) 2005, Christopher Clark + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include "common.h" + +/* + * These are rather arbitrary. They are fairly large because adjacent requests + * pulled from a communication ring are quite likely to end up being part of + * the same scatter/gather request at the disc. + * + * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** + * + * This will increase the chances of being able to write whole tracks. + * 64 should be enough to keep us competitive with Linux. + */ +static int blkif_reqs = 64; +module_param_named(reqs, blkif_reqs, int, 0); +MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); + +/* Run-time switchable: /sys/module/blkback/parameters/ */ +static unsigned int log_stats = 0; +static unsigned int debug_lvl = 0; +module_param(log_stats, int, 0644); +module_param(debug_lvl, int, 0644); + +/* + * Each outstanding request that we've passed to the lower device layers has a + * 'pending_req' allocated to it. Each buffer_head that completes decrements + * the pendcnt towards zero. When it hits zero, the specified domain has a + * response queued for it, with the saved 'id' passed back. + */ +typedef struct { + blkif_t *blkif; + u64 id; + int nr_pages; + atomic_t pendcnt; + unsigned short operation; + int status; + struct list_head free_list; +} pending_req_t; + +static pending_req_t *pending_reqs; +static struct list_head pending_free; +static DEFINE_SPINLOCK(pending_free_lock); +static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); + +#define BLKBACK_INVALID_HANDLE (~0) + +static struct page **pending_pages; +static grant_handle_t *pending_grant_handles; + +static inline int vaddr_pagenr(pending_req_t *req, int seg) +{ + return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; +} + +static inline unsigned long vaddr(pending_req_t *req, int seg) +{ + unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]); + return (unsigned long)pfn_to_kaddr(pfn); +} + +#define pending_handle(_req, _seg) \ + (pending_grant_handles[vaddr_pagenr(_req, _seg)]) + + +static int do_block_io_op(blkif_t *blkif); +static void dispatch_rw_block_io(blkif_t *blkif, + blkif_request_t *req, + pending_req_t *pending_req); +static void make_response(blkif_t *blkif, u64 id, + unsigned short op, int st); + +/****************************************************************** + * misc small helpers + */ +static pending_req_t* alloc_req(void) +{ + pending_req_t *req = NULL; + unsigned long flags; + + spin_lock_irqsave(&pending_free_lock, flags); + if (!list_empty(&pending_free)) { + req = list_entry(pending_free.next, pending_req_t, free_list); + list_del(&req->free_list); + } + spin_unlock_irqrestore(&pending_free_lock, flags); + return req; +} + +static void free_req(pending_req_t *req) +{ + unsigned long flags; + int was_empty; + + spin_lock_irqsave(&pending_free_lock, flags); + was_empty = list_empty(&pending_free); + list_add(&req->free_list, &pending_free); + spin_unlock_irqrestore(&pending_free_lock, flags); + if (was_empty) + wake_up(&pending_free_wq); +} + +static void unplug_queue(blkif_t *blkif) +{ + if (blkif->plug == NULL) + return; + if (blkif->plug->unplug_fn) + blkif->plug->unplug_fn(blkif->plug); + blk_put_queue(blkif->plug); + blkif->plug = NULL; +} + +static void plug_queue(blkif_t *blkif, struct block_device *bdev) +{ + request_queue_t *q = bdev_get_queue(bdev); + + if (q == blkif->plug) + return; + unplug_queue(blkif); + blk_get_queue(q); + blkif->plug = q; +} + +static void fast_flush_area(pending_req_t *req) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int i, invcount = 0; + grant_handle_t handle; + int ret; + + for (i = 0; i < req->nr_pages; i++) { + handle = pending_handle(req, i); + if (handle == BLKBACK_INVALID_HANDLE) + continue; + gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), + GNTMAP_host_map, handle); + pending_handle(req, i) = BLKBACK_INVALID_HANDLE; + invcount++; + } + + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + BUG_ON(ret); +} + +/****************************************************************** + * SCHEDULER FUNCTIONS + */ + +static void print_stats(blkif_t *blkif) +{ + printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", + current->comm, blkif->st_oo_req, + blkif->st_rd_req, blkif->st_wr_req, blkif->st_br_req); + blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000); + blkif->st_rd_req = 0; + blkif->st_wr_req = 0; + blkif->st_oo_req = 0; +} + +int blkif_schedule(void *arg) +{ + blkif_t *blkif = arg; + + blkif_get(blkif); + + if (debug_lvl) + printk(KERN_DEBUG "%s: started\n", current->comm); + + while (!kthread_should_stop()) { + if (try_to_freeze()) + continue; + + wait_event_interruptible( + blkif->wq, + blkif->waiting_reqs || kthread_should_stop()); + wait_event_interruptible( + pending_free_wq, + !list_empty(&pending_free) || kthread_should_stop()); + + blkif->waiting_reqs = 0; + smp_mb(); /* clear flag *before* checking for work */ + + if (do_block_io_op(blkif)) + blkif->waiting_reqs = 1; + unplug_queue(blkif); + + if (log_stats && time_after(jiffies, blkif->st_print)) + print_stats(blkif); + } + + if (log_stats) + print_stats(blkif); + if (debug_lvl) + printk(KERN_DEBUG "%s: exiting\n", current->comm); + + blkif->xenblkd = NULL; + blkif_put(blkif); + + return 0; +} + +/****************************************************************** + * COMPLETION CALLBACK -- Called as bh->b_end_io() + */ + +static void __end_block_io_op(pending_req_t *pending_req, int error) +{ + /* An error fails the entire request. */ + if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && + (error == -EOPNOTSUPP)) { + DPRINTK("blkback: write barrier op failed, not supported\n"); + blkback_barrier(XBT_NIL, pending_req->blkif->be, 0); + pending_req->status = BLKIF_RSP_EOPNOTSUPP; + } else if (error) { + DPRINTK("Buffer not up-to-date at end of operation, " + "error=%d\n", error); + pending_req->status = BLKIF_RSP_ERROR; + } + + if (atomic_dec_and_test(&pending_req->pendcnt)) { + fast_flush_area(pending_req); + make_response(pending_req->blkif, pending_req->id, + pending_req->operation, pending_req->status); + blkif_put(pending_req->blkif); + free_req(pending_req); + } +} + +static int end_block_io_op(struct bio *bio, unsigned int done, int error) +{ + if (bio->bi_size != 0) + return 1; + __end_block_io_op(bio->bi_private, error); + bio_put(bio); + return error; +} + + +/****************************************************************************** + * NOTIFICATION FROM GUEST OS. + */ + +static void blkif_notify_work(blkif_t *blkif) +{ + blkif->waiting_reqs = 1; + wake_up(&blkif->wq); +} + +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) +{ + blkif_notify_work(dev_id); + return IRQ_HANDLED; +} + + + +/****************************************************************** + * DOWNWARD CALLS -- These interface with the block-device layer proper. + */ + +static int do_block_io_op(blkif_t *blkif) +{ + blkif_back_rings_t *blk_rings = &blkif->blk_rings; + blkif_request_t req; + pending_req_t *pending_req; + RING_IDX rc, rp; + int more_to_do = 0; + + rc = blk_rings->common.req_cons; + rp = blk_rings->common.sring->req_prod; + rmb(); /* Ensure we see queued requests up to 'rp'. */ + + while (rc != rp) { + + if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) + break; + + pending_req = alloc_req(); + if (NULL == pending_req) { + blkif->st_oo_req++; + more_to_do = 1; + break; + } + + if (kthread_should_stop()) { + more_to_do = 1; + break; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + memcpy(&req, RING_GET_REQUEST(&blk_rings->native, rc), sizeof(req)); + break; + case BLKIF_PROTOCOL_X86_32: + blkif_get_x86_32_req(&req, RING_GET_REQUEST(&blk_rings->x86_32, rc)); + break; + case BLKIF_PROTOCOL_X86_64: + blkif_get_x86_64_req(&req, RING_GET_REQUEST(&blk_rings->x86_64, rc)); + break; + default: + BUG(); + } + blk_rings->common.req_cons = ++rc; /* before make_response() */ + + /* Apply all sanity checks to /private copy/ of request. */ + barrier(); + + switch (req.operation) { + case BLKIF_OP_READ: + blkif->st_rd_req++; + dispatch_rw_block_io(blkif, &req, pending_req); + break; + case BLKIF_OP_WRITE_BARRIER: + blkif->st_br_req++; + /* fall through */ + case BLKIF_OP_WRITE: + blkif->st_wr_req++; + dispatch_rw_block_io(blkif, &req, pending_req); + break; + default: + /* A good sign something is wrong: sleep for a while to + * avoid excessive CPU consumption by a bad guest. */ + msleep(1); + DPRINTK("error: unknown block io operation [%d]\n", + req.operation); + make_response(blkif, req.id, req.operation, + BLKIF_RSP_ERROR); + free_req(pending_req); + break; + } + + /* Yield point for this unbounded loop. */ + cond_resched(); + } + + return more_to_do; +} + +static void dispatch_rw_block_io(blkif_t *blkif, + blkif_request_t *req, + pending_req_t *pending_req) +{ + extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct phys_req preq; + struct { + unsigned long buf; unsigned int nsec; + } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int nseg; + struct bio *bio = NULL; + int ret, i; + int operation; + + switch (req->operation) { + case BLKIF_OP_READ: + operation = READ; + break; + case BLKIF_OP_WRITE: + operation = WRITE; + break; + case BLKIF_OP_WRITE_BARRIER: + operation = WRITE_BARRIER; + break; + default: + operation = 0; /* make gcc happy */ + BUG(); + } + + /* Check that number of segments is sane. */ + nseg = req->nr_segments; + if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { + DPRINTK("Bad number of segments in request (%d)\n", nseg); + goto fail_response; + } + + preq.dev = req->handle; + preq.sector_number = req->sector_number; + preq.nr_sects = 0; + + pending_req->blkif = blkif; + pending_req->id = req->id; + pending_req->operation = req->operation; + pending_req->status = BLKIF_RSP_OKAY; + pending_req->nr_pages = nseg; + + for (i = 0; i < nseg; i++) { + uint32_t flags; + + seg[i].nsec = req->seg[i].last_sect - + req->seg[i].first_sect + 1; + + if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) || + (req->seg[i].last_sect < req->seg[i].first_sect)) + goto fail_response; + preq.nr_sects += seg[i].nsec; + + flags = GNTMAP_host_map; + if (operation != READ) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, + req->seg[i].gref, blkif->domid); + } + + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); + BUG_ON(ret); + + for (i = 0; i < nseg; i++) { + if (unlikely(map[i].status != 0)) { + DPRINTK("invalid buffer -- could not remap it\n"); + map[i].handle = BLKBACK_INVALID_HANDLE; + ret |= 1; + } + + pending_handle(pending_req, i) = map[i].handle; + + if (ret) + continue; + + set_phys_to_machine(__pa(vaddr( + pending_req, i)) >> PAGE_SHIFT, + FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); + seg[i].buf = map[i].dev_bus_addr | + (req->seg[i].first_sect << 9); + } + + if (ret) + goto fail_flush; + + if (vbd_translate(&preq, blkif, operation) != 0) { + DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", + operation == READ ? "read" : "write", + preq.sector_number, + preq.sector_number + preq.nr_sects, preq.dev); + goto fail_flush; + } + + plug_queue(blkif, preq.bdev); + atomic_set(&pending_req->pendcnt, 1); + blkif_get(blkif); + + for (i = 0; i < nseg; i++) { + if (((int)preq.sector_number|(int)seg[i].nsec) & + ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) { + DPRINTK("Misaligned I/O request from domain %d", + blkif->domid); + goto fail_put_bio; + } + + while ((bio == NULL) || + (bio_add_page(bio, + virt_to_page(vaddr(pending_req, i)), + seg[i].nsec << 9, + seg[i].buf & ~PAGE_MASK) == 0)) { + if (bio) { + atomic_inc(&pending_req->pendcnt); + submit_bio(operation, bio); + } + + bio = bio_alloc(GFP_KERNEL, nseg-i); + if (unlikely(bio == NULL)) + goto fail_put_bio; + + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; + bio->bi_sector = preq.sector_number; + } + + preq.sector_number += seg[i].nsec; + } + + if (!bio) { + BUG_ON(operation != WRITE_BARRIER); + bio = bio_alloc(GFP_KERNEL, 0); + if (unlikely(bio == NULL)) + goto fail_put_bio; + + bio->bi_bdev = preq.bdev; + bio->bi_private = pending_req; + bio->bi_end_io = end_block_io_op; + bio->bi_sector = -1; + } + + submit_bio(operation, bio); + + if (operation == READ) + blkif->st_rd_sect += preq.nr_sects; + else if (operation == WRITE || operation == WRITE_BARRIER) + blkif->st_wr_sect += preq.nr_sects; + + return; + + fail_flush: + fast_flush_area(pending_req); + fail_response: + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); + free_req(pending_req); + msleep(1); /* back off a bit */ + return; + + fail_put_bio: + __end_block_io_op(pending_req, -EINVAL); + if (bio) + bio_put(bio); + unplug_queue(blkif); + msleep(1); /* back off a bit */ + return; +} + + + +/****************************************************************** + * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING + */ + + +static void make_response(blkif_t *blkif, u64 id, + unsigned short op, int st) +{ + blkif_response_t resp; + unsigned long flags; + blkif_back_rings_t *blk_rings = &blkif->blk_rings; + int more_to_do = 0; + int notify; + + resp.id = id; + resp.operation = op; + resp.status = st; + + spin_lock_irqsave(&blkif->blk_ring_lock, flags); + /* Place on the response ring for the relevant domain. */ + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + memcpy(RING_GET_RESPONSE(&blk_rings->native, blk_rings->native.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + case BLKIF_PROTOCOL_X86_32: + memcpy(RING_GET_RESPONSE(&blk_rings->x86_32, blk_rings->x86_32.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + case BLKIF_PROTOCOL_X86_64: + memcpy(RING_GET_RESPONSE(&blk_rings->x86_64, blk_rings->x86_64.rsp_prod_pvt), + &resp, sizeof(resp)); + break; + default: + BUG(); + } + blk_rings->common.rsp_prod_pvt++; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&blk_rings->common, notify); + if (blk_rings->common.rsp_prod_pvt == blk_rings->common.req_cons) { + /* + * Tail check for pending requests. Allows frontend to avoid + * notifications if requests are already in flight (lower + * overheads and promotes batching). + */ + RING_FINAL_CHECK_FOR_REQUESTS(&blk_rings->common, more_to_do); + + } else if (RING_HAS_UNCONSUMED_REQUESTS(&blk_rings->common)) { + more_to_do = 1; + } + + spin_unlock_irqrestore(&blkif->blk_ring_lock, flags); + + if (more_to_do) + blkif_notify_work(blkif); + if (notify) + notify_remote_via_irq(blkif->irq); +} + +static int __init blkif_init(void) +{ + int i, mmap_pages; + + if (!is_running_on_xen()) + return -ENODEV; + + mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; + + pending_reqs = kmalloc(sizeof(pending_reqs[0]) * + blkif_reqs, GFP_KERNEL); + pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * + mmap_pages, GFP_KERNEL); + pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); + + if (!pending_reqs || !pending_grant_handles || !pending_pages) + goto out_of_memory; + + for (i = 0; i < mmap_pages; i++) + pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; + + blkif_interface_init(); + + memset(pending_reqs, 0, sizeof(pending_reqs)); + INIT_LIST_HEAD(&pending_free); + + for (i = 0; i < blkif_reqs; i++) + list_add_tail(&pending_reqs[i].free_list, &pending_free); + + blkif_xenbus_init(); + + return 0; + + out_of_memory: + kfree(pending_reqs); + kfree(pending_grant_handles); + free_empty_pages_and_pagevec(pending_pages, mmap_pages); + printk("%s: out of memory\n", __FUNCTION__); + return -ENOMEM; +} + +module_init(blkif_init); + +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h new file mode 100644 index 000000000000..422e935528be --- /dev/null +++ b/drivers/xen/blkback/common.h @@ -0,0 +1,139 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __BLKIF__BACKEND__COMMON_H__ +#define __BLKIF__BACKEND__COMMON_H__ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DPRINTK(_f, _a...) \ + pr_debug("(file=%s, line=%d) " _f, \ + __FILE__ , __LINE__ , ## _a ) + +struct vbd { + blkif_vdev_t handle; /* what the domain refers to this vbd as */ + unsigned char readonly; /* Non-zero -> read-only */ + unsigned char type; /* VDISK_xxx */ + u32 pdevice; /* phys device that this vbd maps to */ + struct block_device *bdev; +}; + +struct backend_info; + +typedef struct blkif_st { + /* Unique identifier for this interface. */ + domid_t domid; + unsigned int handle; + /* Physical parameters of the comms window. */ + unsigned int irq; + /* Comms information. */ + enum blkif_protocol blk_protocol; + blkif_back_rings_t blk_rings; + struct vm_struct *blk_ring_area; + /* The VBD attached to this interface. */ + struct vbd vbd; + /* Back pointer to the backend_info. */ + struct backend_info *be; + /* Private fields. */ + spinlock_t blk_ring_lock; + atomic_t refcnt; + + wait_queue_head_t wq; + struct task_struct *xenblkd; + unsigned int waiting_reqs; + request_queue_t *plug; + + /* statistics */ + unsigned long st_print; + int st_rd_req; + int st_wr_req; + int st_oo_req; + int st_br_req; + int st_rd_sect; + int st_wr_sect; + + wait_queue_head_t waiting_to_free; + + grant_handle_t shmem_handle; + grant_ref_t shmem_ref; +} blkif_t; + +blkif_t *blkif_alloc(domid_t domid); +void blkif_disconnect(blkif_t *blkif); +void blkif_free(blkif_t *blkif); +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); + +#define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) +#define blkif_put(_b) \ + do { \ + if (atomic_dec_and_test(&(_b)->refcnt)) \ + wake_up(&(_b)->waiting_to_free);\ + } while (0) + +/* Create a vbd. */ +int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major, + unsigned minor, int readonly, int cdrom); +void vbd_free(struct vbd *vbd); + +unsigned long long vbd_size(struct vbd *vbd); +unsigned int vbd_info(struct vbd *vbd); +unsigned long vbd_secsize(struct vbd *vbd); + +struct phys_req { + unsigned short dev; + unsigned short nr_sects; + struct block_device *bdev; + blkif_sector_t sector_number; +}; + +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); + +void blkif_interface_init(void); + +void blkif_xenbus_init(void); + +irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); +int blkif_schedule(void *arg); + +int blkback_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state); + +#endif /* __BLKIF__BACKEND__COMMON_H__ */ diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c new file mode 100644 index 000000000000..81821bdc7ef1 --- /dev/null +++ b/drivers/xen/blkback/interface.c @@ -0,0 +1,181 @@ +/****************************************************************************** + * arch/xen/drivers/blkif/backend/interface.c + * + * Block-device interface management. + * + * Copyright (c) 2004, Keir Fraser + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "common.h" +#include +#include + +static kmem_cache_t *blkif_cachep; + +blkif_t *blkif_alloc(domid_t domid) +{ + blkif_t *blkif; + + blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); + if (!blkif) + return ERR_PTR(-ENOMEM); + + memset(blkif, 0, sizeof(*blkif)); + blkif->domid = domid; + spin_lock_init(&blkif->blk_ring_lock); + atomic_set(&blkif->refcnt, 1); + init_waitqueue_head(&blkif->wq); + blkif->st_print = jiffies; + init_waitqueue_head(&blkif->waiting_to_free); + + return blkif; +} + +static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) +{ + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, shared_page, blkif->domid); + + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) + BUG(); + + if (op.status) { + DPRINTK(" Grant table operation failure !\n"); + return op.status; + } + + blkif->shmem_ref = shared_page; + blkif->shmem_handle = op.handle; + + return 0; +} + +static void unmap_frontend_page(blkif_t *blkif) +{ + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr, + GNTMAP_host_map, blkif->shmem_handle); + + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) + BUG(); +} + +int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) +{ + int err; + + /* Already connected through? */ + if (blkif->irq) + return 0; + + if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL ) + return -ENOMEM; + + err = map_frontend_page(blkif, shared_page); + if (err) { + free_vm_area(blkif->blk_ring_area); + return err; + } + + switch (blkif->blk_protocol) { + case BLKIF_PROTOCOL_NATIVE: + { + blkif_sring_t *sring; + sring = (blkif_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_32: + { + blkif_x86_32_sring_t *sring_x86_32; + sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); + break; + } + case BLKIF_PROTOCOL_X86_64: + { + blkif_x86_64_sring_t *sring_x86_64; + sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr; + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); + break; + } + default: + BUG(); + } + + err = bind_interdomain_evtchn_to_irqhandler( + blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); + if (err < 0) + { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + return err; + } + blkif->irq = err; + + return 0; +} + +void blkif_disconnect(blkif_t *blkif) +{ + if (blkif->xenblkd) { + kthread_stop(blkif->xenblkd); + blkif->xenblkd = NULL; + } + + atomic_dec(&blkif->refcnt); + wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0); + atomic_inc(&blkif->refcnt); + + if (blkif->irq) { + unbind_from_irqhandler(blkif->irq, blkif); + blkif->irq = 0; + } + + if (blkif->blk_rings.common.sring) { + unmap_frontend_page(blkif); + free_vm_area(blkif->blk_ring_area); + blkif->blk_rings.common.sring = NULL; + } +} + +void blkif_free(blkif_t *blkif) +{ + if (!atomic_dec_and_test(&blkif->refcnt)) + BUG(); + kmem_cache_free(blkif_cachep, blkif); +} + +void __init blkif_interface_init(void) +{ + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), + 0, 0, NULL, NULL); +} diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c new file mode 100644 index 000000000000..1fb31d0236b4 --- /dev/null +++ b/drivers/xen/blkback/vbd.c @@ -0,0 +1,118 @@ +/****************************************************************************** + * blkback/vbd.c + * + * Routines for managing virtual block devices (VBDs). + * + * Copyright (c) 2003-2005, Keir Fraser & Steve Hand + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "common.h" + +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ + (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity) + +unsigned long long vbd_size(struct vbd *vbd) +{ + return vbd_sz(vbd); +} + +unsigned int vbd_info(struct vbd *vbd) +{ + return vbd->type | (vbd->readonly?VDISK_READONLY:0); +} + +unsigned long vbd_secsize(struct vbd *vbd) +{ + return bdev_hardsect_size(vbd->bdev); +} + +int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, + unsigned minor, int readonly, int cdrom) +{ + struct vbd *vbd; + struct block_device *bdev; + + vbd = &blkif->vbd; + vbd->handle = handle; + vbd->readonly = readonly; + vbd->type = 0; + + vbd->pdevice = MKDEV(major, minor); + + bdev = open_by_devnum(vbd->pdevice, + vbd->readonly ? FMODE_READ : FMODE_WRITE); + + if (IS_ERR(bdev)) { + DPRINTK("vbd_creat: device %08x could not be opened.\n", + vbd->pdevice); + return -ENOENT; + } + + vbd->bdev = bdev; + + if (vbd->bdev->bd_disk == NULL) { + DPRINTK("vbd_creat: device %08x doesn't exist.\n", + vbd->pdevice); + vbd_free(vbd); + return -ENOENT; + } + + if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom) + vbd->type |= VDISK_CDROM; + if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE) + vbd->type |= VDISK_REMOVABLE; + + DPRINTK("Successful creation of handle=%04x (dom=%u)\n", + handle, blkif->domid); + return 0; +} + +void vbd_free(struct vbd *vbd) +{ + if (vbd->bdev) + blkdev_put(vbd->bdev); + vbd->bdev = NULL; +} + +int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) +{ + struct vbd *vbd = &blkif->vbd; + int rc = -EACCES; + + if ((operation != READ) && vbd->readonly) + goto out; + + if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd))) + goto out; + + req->dev = vbd->pdevice; + req->bdev = vbd->bdev; + rc = 0; + + out: + return rc; +} diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c new file mode 100644 index 000000000000..80d9aa6e6ba3 --- /dev/null +++ b/drivers/xen/blkback/xenbus.c @@ -0,0 +1,541 @@ +/* Xenbus code for blkif backend + Copyright (C) 2005 Rusty Russell + Copyright (C) 2005 XenSource Ltd + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include +#include +#include +#include "common.h" + +#undef DPRINTK +#define DPRINTK(fmt, args...) \ + pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ + __FUNCTION__, __LINE__, ##args) + +struct backend_info +{ + struct xenbus_device *dev; + blkif_t *blkif; + struct xenbus_watch backend_watch; + unsigned major; + unsigned minor; + char *mode; +}; + +static void connect(struct backend_info *); +static int connect_ring(struct backend_info *); +static void backend_changed(struct xenbus_watch *, const char **, + unsigned int); + +static int blkback_name(blkif_t *blkif, char *buf) +{ + char *devpath, *devname; + struct xenbus_device *dev = blkif->be->dev; + + devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL); + if (IS_ERR(devpath)) + return PTR_ERR(devpath); + + if ((devname = strstr(devpath, "/dev/")) != NULL) + devname += strlen("/dev/"); + else + devname = devpath; + + snprintf(buf, TASK_COMM_LEN, "blkback.%d.%s", blkif->domid, devname); + kfree(devpath); + + return 0; +} + +static void update_blkif_status(blkif_t *blkif) +{ + int err; + char name[TASK_COMM_LEN]; + + /* Not ready to connect? */ + if (!blkif->irq || !blkif->vbd.bdev) + return; + + /* Already connected? */ + if (blkif->be->dev->state == XenbusStateConnected) + return; + + /* Attempt to connect: exit if we fail to. */ + connect(blkif->be); + if (blkif->be->dev->state != XenbusStateConnected) + return; + + err = blkback_name(blkif, name); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "get blkback dev name"); + return; + } + + blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); + if (IS_ERR(blkif->xenblkd)) { + err = PTR_ERR(blkif->xenblkd); + blkif->xenblkd = NULL; + xenbus_dev_error(blkif->be->dev, err, "start xenblkd"); + } +} + + +/**************************************************************** + * sysfs interface for VBD I/O requests + */ + +#define VBD_SHOW(name, format, args...) \ + static ssize_t show_##name(struct device *_dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + struct xenbus_device *dev = to_xenbus_device(_dev); \ + struct backend_info *be = dev->dev.driver_data; \ + \ + return sprintf(buf, format, ##args); \ + } \ + static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL) + +VBD_SHOW(oo_req, "%d\n", be->blkif->st_oo_req); +VBD_SHOW(rd_req, "%d\n", be->blkif->st_rd_req); +VBD_SHOW(wr_req, "%d\n", be->blkif->st_wr_req); +VBD_SHOW(br_req, "%d\n", be->blkif->st_br_req); +VBD_SHOW(rd_sect, "%d\n", be->blkif->st_rd_sect); +VBD_SHOW(wr_sect, "%d\n", be->blkif->st_wr_sect); + +static struct attribute *vbdstat_attrs[] = { + &dev_attr_oo_req.attr, + &dev_attr_rd_req.attr, + &dev_attr_wr_req.attr, + &dev_attr_br_req.attr, + &dev_attr_rd_sect.attr, + &dev_attr_wr_sect.attr, + NULL +}; + +static struct attribute_group vbdstat_group = { + .name = "statistics", + .attrs = vbdstat_attrs, +}; + +VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor); +VBD_SHOW(mode, "%s\n", be->mode); + +int xenvbd_sysfs_addif(struct xenbus_device *dev) +{ + int error; + + error = device_create_file(&dev->dev, &dev_attr_physical_device); + if (error) + goto fail1; + + error = device_create_file(&dev->dev, &dev_attr_mode); + if (error) + goto fail2; + + error = sysfs_create_group(&dev->dev.kobj, &vbdstat_group); + if (error) + goto fail3; + + return 0; + +fail3: sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); +fail2: device_remove_file(&dev->dev, &dev_attr_mode); +fail1: device_remove_file(&dev->dev, &dev_attr_physical_device); + return error; +} + +void xenvbd_sysfs_delif(struct xenbus_device *dev) +{ + sysfs_remove_group(&dev->dev.kobj, &vbdstat_group); + device_remove_file(&dev->dev, &dev_attr_mode); + device_remove_file(&dev->dev, &dev_attr_physical_device); +} + +static int blkback_remove(struct xenbus_device *dev) +{ + struct backend_info *be = dev->dev.driver_data; + + DPRINTK(""); + + if (be->major || be->minor) + xenvbd_sysfs_delif(dev); + + if (be->backend_watch.node) { + unregister_xenbus_watch(&be->backend_watch); + kfree(be->backend_watch.node); + be->backend_watch.node = NULL; + } + + if (be->blkif) { + blkif_disconnect(be->blkif); + vbd_free(&be->blkif->vbd); + blkif_free(be->blkif); + be->blkif = NULL; + } + + kfree(be); + dev->dev.driver_data = NULL; + return 0; +} + +int blkback_barrier(struct xenbus_transaction xbt, + struct backend_info *be, int state) +{ + struct xenbus_device *dev = be->dev; + int err; + + err = xenbus_printf(xbt, dev->nodename, "feature-barrier", + "%d", state); + if (err) + xenbus_dev_fatal(dev, err, "writing feature-barrier"); + + return err; +} + +/** + * Entry point to this code when a new device is created. Allocate the basic + * structures, and watch the store waiting for the hotplug scripts to tell us + * the device's physical major and minor numbers. Switch to InitWait. + */ +static int blkback_probe(struct xenbus_device *dev, + const struct xenbus_device_id *id) +{ + int err; + struct backend_info *be = kzalloc(sizeof(struct backend_info), + GFP_KERNEL); + if (!be) { + xenbus_dev_fatal(dev, -ENOMEM, + "allocating backend structure"); + return -ENOMEM; + } + be->dev = dev; + dev->dev.driver_data = be; + + be->blkif = blkif_alloc(dev->otherend_id); + if (IS_ERR(be->blkif)) { + err = PTR_ERR(be->blkif); + be->blkif = NULL; + xenbus_dev_fatal(dev, err, "creating block interface"); + goto fail; + } + + /* setup back pointer */ + be->blkif->be = be; + + err = xenbus_watch_path2(dev, dev->nodename, "physical-device", + &be->backend_watch, backend_changed); + if (err) + goto fail; + + err = xenbus_switch_state(dev, XenbusStateInitWait); + if (err) + goto fail; + + return 0; + +fail: + DPRINTK("failed"); + blkback_remove(dev); + return err; +} + + +/** + * Callback received when the hotplug scripts have placed the physical-device + * node. Read it and the mode node, and create a vbd. If the frontend is + * ready, connect. + */ +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + int err; + unsigned major; + unsigned minor; + struct backend_info *be + = container_of(watch, struct backend_info, backend_watch); + struct xenbus_device *dev = be->dev; + int cdrom = 0; + char *device_type; + + DPRINTK(""); + + err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x", + &major, &minor); + if (XENBUS_EXIST_ERR(err)) { + /* Since this watch will fire once immediately after it is + registered, we expect this. Ignore it, and wait for the + hotplug scripts. */ + return; + } + if (err != 2) { + xenbus_dev_fatal(dev, err, "reading physical-device"); + return; + } + + if ((be->major || be->minor) && + ((be->major != major) || (be->minor != minor))) { + printk(KERN_WARNING + "blkback: changing physical device (from %x:%x to " + "%x:%x) not supported.\n", be->major, be->minor, + major, minor); + return; + } + + be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL); + if (IS_ERR(be->mode)) { + err = PTR_ERR(be->mode); + be->mode = NULL; + xenbus_dev_fatal(dev, err, "reading mode"); + return; + } + + device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL); + if (!IS_ERR(device_type)) { + cdrom = strcmp(device_type, "cdrom") == 0; + kfree(device_type); + } + + if (be->major == 0 && be->minor == 0) { + /* Front end dir is a number, which is used as the handle. */ + + char *p = strrchr(dev->otherend, '/') + 1; + long handle = simple_strtoul(p, NULL, 0); + + be->major = major; + be->minor = minor; + + err = vbd_create(be->blkif, handle, major, minor, + (NULL == strchr(be->mode, 'w')), cdrom); + if (err) { + be->major = be->minor = 0; + xenbus_dev_fatal(dev, err, "creating vbd structure"); + return; + } + + err = xenvbd_sysfs_addif(dev); + if (err) { + vbd_free(&be->blkif->vbd); + be->major = be->minor = 0; + xenbus_dev_fatal(dev, err, "creating sysfs entries"); + return; + } + + /* We're potentially connected now */ + update_blkif_status(be->blkif); + } +} + + +/** + * Callback received when the frontend's state changes. + */ +static void frontend_changed(struct xenbus_device *dev, + enum xenbus_state frontend_state) +{ + struct backend_info *be = dev->dev.driver_data; + int err; + + DPRINTK("%s", xenbus_strstate(frontend_state)); + + switch (frontend_state) { + case XenbusStateInitialising: + if (dev->state == XenbusStateClosed) { + printk(KERN_INFO "%s: %s: prepare for reconnect\n", + __FUNCTION__, dev->nodename); + xenbus_switch_state(dev, XenbusStateInitWait); + } + break; + + case XenbusStateInitialised: + case XenbusStateConnected: + /* Ensure we connect even when two watches fire in + close successsion and we miss the intermediate value + of frontend_state. */ + if (dev->state == XenbusStateConnected) + break; + + err = connect_ring(be); + if (err) + break; + update_blkif_status(be->blkif); + break; + + case XenbusStateClosing: + blkif_disconnect(be->blkif); + xenbus_switch_state(dev, XenbusStateClosing); + break; + + case XenbusStateClosed: + xenbus_switch_state(dev, XenbusStateClosed); + if (xenbus_dev_is_online(dev)) + break; + /* fall through if not online */ + case XenbusStateUnknown: + device_unregister(&dev->dev); + break; + + default: + xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend", + frontend_state); + break; + } +} + + +/* ** Connection ** */ + + +/** + * Write the physical details regarding the block device to the store, and + * switch to Connected state. + */ +static void connect(struct backend_info *be) +{ + struct xenbus_transaction xbt; + int err; + struct xenbus_device *dev = be->dev; + + DPRINTK("%s", dev->otherend); + + /* Supply the information about the device the frontend needs */ +again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + return; + } + + err = blkback_barrier(xbt, be, 1); + if (err) + goto abort; + + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", + vbd_size(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/sectors", + dev->nodename); + goto abort; + } + + /* FIXME: use a typename instead */ + err = xenbus_printf(xbt, dev->nodename, "info", "%u", + vbd_info(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/info", + dev->nodename); + goto abort; + } + err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu", + vbd_secsize(&be->blkif->vbd)); + if (err) { + xenbus_dev_fatal(dev, err, "writing %s/sector-size", + dev->nodename); + goto abort; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + xenbus_dev_fatal(dev, err, "ending transaction"); + + err = xenbus_switch_state(dev, XenbusStateConnected); + if (err) + xenbus_dev_fatal(dev, err, "switching to Connected state", + dev->nodename); + + return; + abort: + xenbus_transaction_end(xbt, 1); +} + + +static int connect_ring(struct backend_info *be) +{ + struct xenbus_device *dev = be->dev; + unsigned long ring_ref; + unsigned int evtchn; + char protocol[64] = ""; + int err; + + DPRINTK("%s", dev->otherend); + + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref, + "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_fatal(dev, err, + "reading %s/ring-ref and event-channel", + dev->otherend); + return err; + } + + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", + "%63s", protocol, NULL); + if (err) + strcpy(protocol, "unspecified, assuming native"); + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; + else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) + be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; + else { + xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); + return -1; + } + printk(KERN_INFO + "blkback: ring-ref %ld, event-channel %d, protocol %d (%s)\n", + ring_ref, evtchn, be->blkif->blk_protocol, protocol); + + /* Map the shared frame, irq etc. */ + err = blkif_map(be->blkif, ring_ref, evtchn); + if (err) { + xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", + ring_ref, evtchn); + return err; + } + + return 0; +} + + +/* ** Driver Registration ** */ + + +static const struct xenbus_device_id blkback_ids[] = { + { "vbd" }, + { "" } +}; + + +static struct xenbus_driver blkback = { + .name = "vbd", + .owner = THIS_MODULE, + .ids = blkback_ids, + .probe = blkback_probe, + .remove = blkback_remove, + .otherend_changed = frontend_changed +}; + + +void blkif_xenbus_init(void) +{ + xenbus_register_backend(&blkback); +} diff --git a/include/xen/blkif.h b/include/xen/blkif.h new file mode 100644 index 000000000000..3d56b75de909 --- /dev/null +++ b/include/xen/blkif.h @@ -0,0 +1,123 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_BLKIF_H__ +#define __XEN_BLKIF_H__ + +#include +#include +#include + +/* Not a real protocol. Used to generate ring structs which contain + * the elements common to all protocols only. This way we get a + * compiler-checkable way to use common struct elements, so we can + * avoid using switch(protocol) in a number of places. */ +struct blkif_common_request { + char dummy; +}; +struct blkif_common_response { + char dummy; +}; + +/* i386 protocol version */ +#pragma pack(push, 4) +struct blkif_x86_32_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_32_response { + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_x86_32_request blkif_x86_32_request_t; +typedef struct blkif_x86_32_response blkif_x86_32_response_t; +#pragma pack(pop) + +/* x86_64 protocol version */ +struct blkif_x86_64_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t __attribute__((__aligned__(8))) id; + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +struct blkif_x86_64_response { + uint64_t __attribute__((__aligned__(8))) id; + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_x86_64_request blkif_x86_64_request_t; +typedef struct blkif_x86_64_response blkif_x86_64_response_t; + +DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, struct blkif_common_response); +DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32_response); +DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response); + +union blkif_back_rings { + blkif_back_ring_t native; + blkif_common_back_ring_t common; + blkif_x86_32_back_ring_t x86_32; + blkif_x86_64_back_ring_t x86_64; +}; +typedef union blkif_back_rings blkif_back_rings_t; + +enum blkif_protocol { + BLKIF_PROTOCOL_NATIVE = 1, + BLKIF_PROTOCOL_X86_32 = 2, + BLKIF_PROTOCOL_X86_64 = 3, +}; + +static void inline blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src) +{ + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + dst->operation = src->operation; + dst->nr_segments = src->nr_segments; + dst->handle = src->handle; + dst->id = src->id; + dst->sector_number = src->sector_number; + barrier(); + if (n > dst->nr_segments) + n = dst->nr_segments; + for (i = 0; i < n; i++) + dst->seg[i] = src->seg[i]; +} + +static void inline blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src) +{ + int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; + dst->operation = src->operation; + dst->nr_segments = src->nr_segments; + dst->handle = src->handle; + dst->id = src->id; + dst->sector_number = src->sector_number; + barrier(); + if (n > dst->nr_segments) + n = dst->nr_segments; + for (i = 0; i < n; i++) + dst->seg[i] = src->seg[i]; +} + +#endif /* __XEN_BLKIF_H__ */ From 8812293323a79134e06c3bf82eba1e217d23382e Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 12:05:51 -0800 Subject: [PATCH 0069/3380] xen-blkback-porting --- drivers/xen/blkback/blkback.c | 30 ++++++++++++++++-------------- drivers/xen/blkback/common.h | 9 ++++----- drivers/xen/blkback/interface.c | 19 ++++++++++--------- drivers/xen/blkback/vbd.c | 4 ++-- drivers/xen/blkback/xenbus.c | 7 ++++--- include/xen/blkif.h | 13 ++++++------- 6 files changed, 42 insertions(+), 40 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 5b8d50e344b4..43fd07091d4d 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -39,8 +39,12 @@ #include #include #include +#include #include -#include +#include +#include +#include +#include #include "common.h" /* @@ -106,7 +110,7 @@ static inline unsigned long vaddr(pending_req_t *req, int seg) static int do_block_io_op(blkif_t *blkif); static void dispatch_rw_block_io(blkif_t *blkif, - blkif_request_t *req, + struct blkif_request *req, pending_req_t *pending_req); static void make_response(blkif_t *blkif, u64 id, unsigned short op, int st); @@ -153,7 +157,7 @@ static void unplug_queue(blkif_t *blkif) static void plug_queue(blkif_t *blkif, struct block_device *bdev) { - request_queue_t *q = bdev_get_queue(bdev); + struct request_queue *q = bdev_get_queue(bdev); if (q == blkif->plug) return; @@ -268,13 +272,10 @@ static void __end_block_io_op(pending_req_t *pending_req, int error) } } -static int end_block_io_op(struct bio *bio, unsigned int done, int error) +static void end_block_io_op(struct bio *bio, int error) { - if (bio->bi_size != 0) - return 1; __end_block_io_op(bio->bi_private, error); bio_put(bio); - return error; } @@ -288,7 +289,7 @@ static void blkif_notify_work(blkif_t *blkif) wake_up(&blkif->wq); } -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) +irqreturn_t blkif_be_int(int irq, void *dev_id) { blkif_notify_work(dev_id); return IRQ_HANDLED; @@ -302,8 +303,8 @@ irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs) static int do_block_io_op(blkif_t *blkif) { - blkif_back_rings_t *blk_rings = &blkif->blk_rings; - blkif_request_t req; + union blkif_back_rings *blk_rings = &blkif->blk_rings; + struct blkif_request req; pending_req_t *pending_req; RING_IDX rc, rp; int more_to_do = 0; @@ -379,7 +380,7 @@ static int do_block_io_op(blkif_t *blkif) } static void dispatch_rw_block_io(blkif_t *blkif, - blkif_request_t *req, + struct blkif_request *req, pending_req_t *pending_req) { extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); @@ -560,9 +561,9 @@ static void dispatch_rw_block_io(blkif_t *blkif, static void make_response(blkif_t *blkif, u64 id, unsigned short op, int st) { - blkif_response_t resp; + struct blkif_response resp; unsigned long flags; - blkif_back_rings_t *blk_rings = &blkif->blk_rings; + union blkif_back_rings *blk_rings = &blkif->blk_rings; int more_to_do = 0; int notify; @@ -614,7 +615,8 @@ static int __init blkif_init(void) { int i, mmap_pages; - if (!is_running_on_xen()) + printk(KERN_CRIT "***blkif_init\n"); + if (!xen_pv_domain()) return -ENODEV; mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 422e935528be..1c422b00974e 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -40,8 +40,7 @@ #include #include #include -#include -#include +#include #include #define DPRINTK(_f, _a...) \ @@ -66,7 +65,7 @@ typedef struct blkif_st { unsigned int irq; /* Comms information. */ enum blkif_protocol blk_protocol; - blkif_back_rings_t blk_rings; + union blkif_back_rings blk_rings; struct vm_struct *blk_ring_area; /* The VBD attached to this interface. */ struct vbd vbd; @@ -79,7 +78,7 @@ typedef struct blkif_st { wait_queue_head_t wq; struct task_struct *xenblkd; unsigned int waiting_reqs; - request_queue_t *plug; + struct request_queue *plug; /* statistics */ unsigned long st_print; @@ -130,7 +129,7 @@ void blkif_interface_init(void); void blkif_xenbus_init(void); -irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs); +irqreturn_t blkif_be_int(int irq, void *dev_id); int blkif_schedule(void *arg); int blkback_barrier(struct xenbus_transaction xbt, diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index 81821bdc7ef1..c6c3e14776b9 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -31,10 +31,11 @@ */ #include "common.h" -#include +#include +#include #include -static kmem_cache_t *blkif_cachep; +static struct kmem_cache *blkif_cachep; blkif_t *blkif_alloc(domid_t domid) { @@ -107,22 +108,22 @@ int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) switch (blkif->blk_protocol) { case BLKIF_PROTOCOL_NATIVE: { - blkif_sring_t *sring; - sring = (blkif_sring_t *)blkif->blk_ring_area->addr; + struct blkif_sring *sring; + sring = (struct blkif_sring *)blkif->blk_ring_area->addr; BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); break; } case BLKIF_PROTOCOL_X86_32: { - blkif_x86_32_sring_t *sring_x86_32; - sring_x86_32 = (blkif_x86_32_sring_t *)blkif->blk_ring_area->addr; + struct blkif_x86_32_sring *sring_x86_32; + sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring_area->addr; BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); break; } case BLKIF_PROTOCOL_X86_64: { - blkif_x86_64_sring_t *sring_x86_64; - sring_x86_64 = (blkif_x86_64_sring_t *)blkif->blk_ring_area->addr; + struct blkif_x86_64_sring *sring_x86_64; + sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); break; } @@ -177,5 +178,5 @@ void blkif_free(blkif_t *blkif) void __init blkif_interface_init(void) { blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), - 0, 0, NULL, NULL); + 0, 0, NULL); } diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 1fb31d0236b4..7e9a1cd35ade 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -33,7 +33,7 @@ #include "common.h" #define vbd_sz(_v) ((_v)->bdev->bd_part ? \ - (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity) + (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk)) unsigned long long vbd_size(struct vbd *vbd) { @@ -94,7 +94,7 @@ int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, void vbd_free(struct vbd *vbd) { if (vbd->bdev) - blkdev_put(vbd->bdev); + blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE); vbd->bdev = NULL; } diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 80d9aa6e6ba3..650f4b3e9b3c 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -238,8 +238,8 @@ static int blkback_probe(struct xenbus_device *dev, /* setup back pointer */ be->blkif->be = be; - err = xenbus_watch_path2(dev, dev->nodename, "physical-device", - &be->backend_watch, backend_changed); + err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed, + "%s/%s", dev->nodename, "physical-device"); if (err) goto fail; @@ -537,5 +537,6 @@ static struct xenbus_driver blkback = { void blkif_xenbus_init(void) { - xenbus_register_backend(&blkback); + /* XXX must_check */ + (void)xenbus_register_backend(&blkback); } diff --git a/include/xen/blkif.h b/include/xen/blkif.h index 3d56b75de909..d27428046918 100644 --- a/include/xen/blkif.h +++ b/include/xen/blkif.h @@ -77,12 +77,11 @@ DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, struct blkif_x86_32 DEFINE_RING_TYPES(blkif_x86_64, struct blkif_x86_64_request, struct blkif_x86_64_response); union blkif_back_rings { - blkif_back_ring_t native; - blkif_common_back_ring_t common; - blkif_x86_32_back_ring_t x86_32; - blkif_x86_64_back_ring_t x86_64; + struct blkif_back_ring native; + struct blkif_common_back_ring common; + struct blkif_x86_32_back_ring x86_32; + struct blkif_x86_64_back_ring x86_64; }; -typedef union blkif_back_rings blkif_back_rings_t; enum blkif_protocol { BLKIF_PROTOCOL_NATIVE = 1, @@ -90,7 +89,7 @@ enum blkif_protocol { BLKIF_PROTOCOL_X86_64 = 3, }; -static void inline blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src) +static void inline blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_x86_32_request *src) { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; dst->operation = src->operation; @@ -105,7 +104,7 @@ static void inline blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_reque dst->seg[i] = src->seg[i]; } -static void inline blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src) +static void inline blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_x86_64_request *src) { int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST; dst->operation = src->operation; From dd3672424caa7b302433635831afbb6787476b96 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 9 Feb 2009 16:39:58 -0800 Subject: [PATCH 0070/3380] xen/blkback: don't include xen/evtchn.h It's a user-mode header for users of /dev/evtchn Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/common.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 1c422b00974e..57b78250cfb7 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include From 8270b45bc8a45eef4a224bd256bd0997d4fd857e Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Fri, 6 Mar 2009 08:29:15 +0000 Subject: [PATCH 0071/3380] blkback: Fix potential resource leak. --- drivers/xen/blkback/blkback.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 43fd07091d4d..8d988f4513aa 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -318,14 +318,14 @@ static int do_block_io_op(blkif_t *blkif) if (RING_REQUEST_CONS_OVERFLOW(&blk_rings->common, rc)) break; - pending_req = alloc_req(); - if (NULL == pending_req) { - blkif->st_oo_req++; + if (kthread_should_stop()) { more_to_do = 1; break; } - if (kthread_should_stop()) { + pending_req = alloc_req(); + if (NULL == pending_req) { + blkif->st_oo_req++; more_to_do = 1; break; } From 690f1b63b2db88330834d8482f3b125990c8e609 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Sat, 21 Mar 2009 23:34:19 -0700 Subject: [PATCH 0072/3380] block: export blk_get/put_queue for blkback Impact: build fix I'm not sure if blkback should be using these functions, but in the meantime export them to allow blkback to be a module. Signed-off-by: Jeremy Fitzhardinge --- block/blk-core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-core.c b/block/blk-core.c index 90f22cc30799..9b60e69a5400 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -351,6 +351,7 @@ void blk_put_queue(struct request_queue *q) { kobject_put(&q->kobj); } +EXPORT_SYMBOL_GPL(blk_put_queue); /* * Note: If a driver supplied the queue lock, it should not zap that lock @@ -572,6 +573,7 @@ int blk_get_queue(struct request_queue *q) return 1; } +EXPORT_SYMBOL_GPL(blk_get_queue); static inline void blk_free_request(struct request_queue *q, struct request *rq) { From 05d43865ddc00bdb33d12c8e9d9f176ed5d3797b Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Mon, 29 Jun 2009 14:58:45 -0700 Subject: [PATCH 0073/3380] xen/blkback: deal with hardsect_size to logical_block_size rename Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/blkback.c | 2 +- drivers/xen/blkback/vbd.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 8d988f4513aa..ac5af91c393f 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -484,7 +484,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, for (i = 0; i < nseg; i++) { if (((int)preq.sector_number|(int)seg[i].nsec) & - ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) { + ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { DPRINTK("Misaligned I/O request from domain %d", blkif->domid); goto fail_put_bio; diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 7e9a1cd35ade..410c2eac5ad7 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -47,7 +47,7 @@ unsigned int vbd_info(struct vbd *vbd) unsigned long vbd_secsize(struct vbd *vbd) { - return bdev_hardsect_size(vbd->bdev); + return bdev_logical_block_size(vbd->bdev); } int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, From 0660c7dbf228a06345392a64ebb43734875a3b91 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 9 Sep 2009 15:15:16 -0700 Subject: [PATCH 0074/3380] xen/blkback: remove spurious debug output noise Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/blkback.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index ac5af91c393f..31458bd07252 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -615,7 +615,6 @@ static int __init blkif_init(void) { int i, mmap_pages; - printk(KERN_CRIT "***blkif_init\n"); if (!xen_pv_domain()) return -ENODEV; From afd91d07ff72919071e37086c0664384b3875688 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Tue, 15 Sep 2009 14:12:37 -0700 Subject: [PATCH 0075/3380] xen/blkback: little cleanups Remove unused local prototype; group headers. Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/blkback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 31458bd07252..e9e3de119a73 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -40,6 +40,7 @@ #include #include #include + #include #include #include @@ -383,7 +384,6 @@ static void dispatch_rw_block_io(blkif_t *blkif, struct blkif_request *req, pending_req_t *pending_req) { - extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct phys_req preq; struct { From 8770b2683f9f98d4c1d6caf2e28f625592bba4f3 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 8 Oct 2009 13:23:09 -0400 Subject: [PATCH 0076/3380] Fix compile warnings: ignoring return value of 'xenbus_register_backend' .. We neglect to check the return value of xenbus_register_backend and take actions when that fails. This patch fixes that and adds code to deal with those type of failures. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/blkback.c | 18 +++++++++++++----- drivers/xen/blkback/common.h | 4 ++-- drivers/xen/blkback/interface.c | 6 +++++- drivers/xen/blkback/xenbus.c | 5 ++--- 4 files changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index e9e3de119a73..a2ac7189cc0a 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -614,6 +614,7 @@ static void make_response(blkif_t *blkif, u64 id, static int __init blkif_init(void) { int i, mmap_pages; + int rc = 0; if (!xen_pv_domain()) return -ENODEV; @@ -626,13 +627,17 @@ static int __init blkif_init(void) mmap_pages, GFP_KERNEL); pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); - if (!pending_reqs || !pending_grant_handles || !pending_pages) + if (!pending_reqs || !pending_grant_handles || !pending_pages) { + rc = -ENOMEM; goto out_of_memory; + } for (i = 0; i < mmap_pages; i++) pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; - blkif_interface_init(); + rc = blkif_interface_init(); + if (rc) + goto failed_init; memset(pending_reqs, 0, sizeof(pending_reqs)); INIT_LIST_HEAD(&pending_free); @@ -640,16 +645,19 @@ static int __init blkif_init(void) for (i = 0; i < blkif_reqs; i++) list_add_tail(&pending_reqs[i].free_list, &pending_free); - blkif_xenbus_init(); + rc = blkif_xenbus_init(); + if (rc) + goto failed_init; return 0; out_of_memory: + printk(KERN_ERR "%s: out of memory\n", __func__); + failed_init: kfree(pending_reqs); kfree(pending_grant_handles); free_empty_pages_and_pagevec(pending_pages, mmap_pages); - printk("%s: out of memory\n", __FUNCTION__); - return -ENOMEM; + return rc; } module_init(blkif_init); diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 57b78250cfb7..aaf36485bc01 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -124,9 +124,9 @@ struct phys_req { int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); -void blkif_interface_init(void); +int blkif_interface_init(void); -void blkif_xenbus_init(void); +int blkif_xenbus_init(void); irqreturn_t blkif_be_int(int irq, void *dev_id); int blkif_schedule(void *arg); diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index c6c3e14776b9..e397a4134f1b 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -175,8 +175,12 @@ void blkif_free(blkif_t *blkif) kmem_cache_free(blkif_cachep, blkif); } -void __init blkif_interface_init(void) +int __init blkif_interface_init(void) { blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 0, 0, NULL); + if (!blkif_cachep) + return -ENOMEM; + + return 0; } diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 650f4b3e9b3c..04c0a12aff36 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -535,8 +535,7 @@ static struct xenbus_driver blkback = { }; -void blkif_xenbus_init(void) +int blkif_xenbus_init(void) { - /* XXX must_check */ - (void)xenbus_register_backend(&blkback); + return xenbus_register_backend(&blkback); } From e7579a99b598f8e4a2b4df4854fbda2cc961bb02 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 3 Dec 2009 21:56:18 +0000 Subject: [PATCH 0077/3380] xen: rename blkbk module xen-blkback. blkbk is rather generic for a modular distro style kernel. Signed-off-by: Ian Campbell Cc: Jeremy Fitzhardinge Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/blkback/Makefile b/drivers/xen/blkback/Makefile index 8bab63da3b3e..f1ae1ff07a4d 100644 --- a/drivers/xen/blkback/Makefile +++ b/drivers/xen/blkback/Makefile @@ -1,3 +1,3 @@ -obj-$(CONFIG_XEN_BLKDEV_BACKEND) := blkbk.o +obj-$(CONFIG_XEN_BLKDEV_BACKEND) := xen-blkback.o -blkbk-y := blkback.o xenbus.o interface.o vbd.o +xen-blkback-y := blkback.o xenbus.o interface.o vbd.o From 5cf6e4f6f6d5549904db6ecb3ffd5b8f71f41250 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 11 Feb 2010 16:07:31 -0800 Subject: [PATCH 0078/3380] xen/blkback: use drv_get/set_drvdata rather than directly accessing driver_data. Direct driver_data access is obsolete and will disappear. Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/xenbus.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 04c0a12aff36..34f8e4046578 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -105,7 +105,7 @@ static void update_blkif_status(blkif_t *blkif) char *buf) \ { \ struct xenbus_device *dev = to_xenbus_device(_dev); \ - struct backend_info *be = dev->dev.driver_data; \ + struct backend_info *be = dev_get_drvdata(&dev->dev); \ \ return sprintf(buf, format, ##args); \ } \ @@ -169,7 +169,7 @@ void xenvbd_sysfs_delif(struct xenbus_device *dev) static int blkback_remove(struct xenbus_device *dev) { - struct backend_info *be = dev->dev.driver_data; + struct backend_info *be = dev_get_drvdata(&dev->dev); DPRINTK(""); @@ -190,7 +190,7 @@ static int blkback_remove(struct xenbus_device *dev) } kfree(be); - dev->dev.driver_data = NULL; + dev_set_drvdata(&dev->dev, NULL); return 0; } @@ -225,7 +225,7 @@ static int blkback_probe(struct xenbus_device *dev, return -ENOMEM; } be->dev = dev; - dev->dev.driver_data = be; + dev_set_drvdata(&dev->dev, be); be->blkif = blkif_alloc(dev->otherend_id); if (IS_ERR(be->blkif)) { @@ -348,7 +348,7 @@ static void backend_changed(struct xenbus_watch *watch, static void frontend_changed(struct xenbus_device *dev, enum xenbus_state frontend_state) { - struct backend_info *be = dev->dev.driver_data; + struct backend_info *be = dev_get_drvdata(&dev->dev); int err; DPRINTK("%s", xenbus_strstate(frontend_state)); From 2ccbfe26c106a1a93a402567b7853c1484c4a0b0 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 11 Mar 2010 13:39:50 -0800 Subject: [PATCH 0079/3380] xen/blkback: Propagate changed size of VBDs Support dynamic resizing of virtual block devices. This patch supports both file backed block devices as well as physical devices that can be dynamically resized on the host side. Signed-off-by: K. Y. Srinivasan Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/blkback.c | 3 +++ drivers/xen/blkback/common.h | 2 ++ drivers/xen/blkback/vbd.c | 43 +++++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index a2ac7189cc0a..6d897664802d 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -207,6 +207,7 @@ static void print_stats(blkif_t *blkif) int blkif_schedule(void *arg) { blkif_t *blkif = arg; + struct vbd *vbd = &blkif->vbd; blkif_get(blkif); @@ -216,6 +217,8 @@ int blkif_schedule(void *arg) while (!kthread_should_stop()) { if (try_to_freeze()) continue; + if (unlikely(vbd->size != vbd_size(vbd))) + vbd_resize(blkif); wait_event_interruptible( blkif->wq, diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index aaf36485bc01..cebcc2b7e9f6 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -52,6 +52,7 @@ struct vbd { unsigned char type; /* VDISK_xxx */ u32 pdevice; /* phys device that this vbd maps to */ struct block_device *bdev; + sector_t size; /* Cached size parameter */ }; struct backend_info; @@ -98,6 +99,7 @@ blkif_t *blkif_alloc(domid_t domid); void blkif_disconnect(blkif_t *blkif); void blkif_free(blkif_t *blkif); int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); +void vbd_resize(blkif_t *blkif); #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define blkif_put(_b) \ diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 410c2eac5ad7..0635c54079f8 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -73,6 +73,7 @@ int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, } vbd->bdev = bdev; + vbd->size = vbd_size(vbd); if (vbd->bdev->bd_disk == NULL) { DPRINTK("vbd_creat: device %08x doesn't exist.\n", @@ -116,3 +117,45 @@ int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) out: return rc; } + +void vbd_resize(blkif_t *blkif) +{ + struct vbd *vbd = &blkif->vbd; + struct xenbus_transaction xbt; + int err; + struct xenbus_device *dev = blkif->be->dev; + unsigned long long new_size = vbd_size(vbd); + + printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size); + vbd->size = new_size; +again: + err = xenbus_transaction_start(&xbt); + if (err) { + printk(KERN_WARNING "Error starting transaction"); + return; + } + err = xenbus_printf(xbt, dev->nodename, "sectors", "%Lu", + vbd_size(vbd)); + if (err) { + printk(KERN_WARNING "Error writing new size"); + goto abort; + } + /* + * Write the current state; we will use this to synchronize + * the front-end. If the current state is "connected" the + * front-end will get the new size information online. + */ + err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); + if (err) { + printk(KERN_WARNING "Error writing the state"); + goto abort; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + printk(KERN_WARNING "Error ending transaction"); +abort: + xenbus_transaction_end(xbt, 1); +} From 98e036a356747cfaa225478b1e4875e190257b09 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 18 Mar 2010 15:35:05 -0700 Subject: [PATCH 0080/3380] xen/blkback: add accessor for xenbus backend device Since backend_info is hidden away now. Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/common.h | 2 ++ drivers/xen/blkback/vbd.c | 2 +- drivers/xen/blkback/xenbus.c | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index cebcc2b7e9f6..0f91830f18c8 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -136,4 +136,6 @@ int blkif_schedule(void *arg); int blkback_barrier(struct xenbus_transaction xbt, struct backend_info *be, int state); +struct xenbus_device *blkback_xenbus(struct backend_info *be); + #endif /* __BLKIF__BACKEND__COMMON_H__ */ diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 0635c54079f8..943ec2313522 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -123,7 +123,7 @@ void vbd_resize(blkif_t *blkif) struct vbd *vbd = &blkif->vbd; struct xenbus_transaction xbt; int err; - struct xenbus_device *dev = blkif->be->dev; + struct xenbus_device *dev = blkback_xenbus(blkif->be); unsigned long long new_size = vbd_size(vbd); printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size); diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 34f8e4046578..c31e5c40b45c 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -42,6 +42,11 @@ static int connect_ring(struct backend_info *); static void backend_changed(struct xenbus_watch *, const char **, unsigned int); +struct xenbus_device *blkback_xenbus(struct backend_info *be) +{ + return be->dev; +} + static int blkback_name(blkif_t *blkif, char *buf) { char *devpath, *devname; From cbf462908c8080f47c2a3300072877589dd1275f Mon Sep 17 00:00:00 2001 From: Chris Lalancette Date: Wed, 21 Jul 2010 12:41:45 -0700 Subject: [PATCH 0081/3380] xen/blkback: Flush blkback data when connecting. First cut at flushing blkback data when first connecting blkback. This should avoid the pygrub issues we are experiencing in (RedHat bugzilla) 466681. [ 2.6.18-xen.hg commit 63b4d7f56688 ] Signed-off-by: Chris Lalancette Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/xenbus.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index c31e5c40b45c..a0534fc6a428 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -91,6 +91,13 @@ static void update_blkif_status(blkif_t *blkif) return; } + err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping); + if (err) { + xenbus_dev_error(blkif->be->dev, err, "block flush"); + return; + } + invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping); + blkif->xenblkd = kthread_run(blkif_schedule, blkif, name); if (IS_ERR(blkif->xenblkd)) { err = PTR_ERR(blkif->xenblkd); From a81135d90bf176e6139c352c7b96c03d00131836 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Mon, 16 Aug 2010 13:43:06 -0700 Subject: [PATCH 0082/3380] xen/blkback: Print additional information when a vbd is resized. Signed-off-by: K. Y. Srinivasan Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/vbd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 943ec2313522..dc2572338567 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -126,6 +126,8 @@ void vbd_resize(blkif_t *blkif) struct xenbus_device *dev = blkback_xenbus(blkif->be); unsigned long long new_size = vbd_size(vbd); + printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", + blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size); vbd->size = new_size; again: From 313d7b003ceceb797e8c0d18ab085ed0638b4aff Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Wed, 24 Nov 2010 22:08:20 -0800 Subject: [PATCH 0083/3380] blkback: Fix CVE-2010-3699 A guest can cause the backend driver to leak a kernel thread. Such leaked threads hold references to the device, whichmakes the device impossible to tear down. If shut down, the guest remains a zombie domain, the xenwatch process hangs, and most xm commands will stop working. This patch tries to do the following for blkback: - identify/extract idempotent teardown operations, - add/move the invocation of said teardown operation right before we're about to allocate new resources in the Connected states. [ linux-2.6.18-xen.hg 59f097ef181b ] Signed-off-by: Laszlo Ersek Signed-off-by: Keir Fraser Signed-off-by: Jeremy Fitzhardinge --- drivers/xen/blkback/xenbus.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index a0534fc6a428..031bc3d7eec3 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -382,6 +382,11 @@ static void frontend_changed(struct xenbus_device *dev, if (dev->state == XenbusStateConnected) break; + /* Enforce precondition before potential leak point. + * blkif_disconnect() is idempotent. + */ + blkif_disconnect(be->blkif); + err = connect_ring(be); if (err) break; @@ -399,6 +404,7 @@ static void frontend_changed(struct xenbus_device *dev, break; /* fall through if not online */ case XenbusStateUnknown: + /* implies blkif_disconnect() via blkback_remove() */ device_unregister(&dev->dev); break; From 248e9f7539f8351cd857d12a74bd52133a3a900f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 24 Feb 2011 17:22:41 -0500 Subject: [PATCH 0084/3380] xen/blkback: Replace WRITE_BARRIER with (REQ_FLUSH | REQ_FUA) TODO: Double check xen-blkfront.c --- drivers/xen/blkback/blkback.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 6d897664802d..cb844f734d91 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -405,7 +405,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, operation = WRITE; break; case BLKIF_OP_WRITE_BARRIER: - operation = WRITE_BARRIER; + operation = REQ_FLUSH | REQ_FUA; break; default: operation = 0; /* make gcc happy */ @@ -414,7 +414,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, /* Check that number of segments is sane. */ nseg = req->nr_segments; - if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || + if (unlikely(nseg == 0 && operation != (REQ_FLUSH | REQ_FUA)) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { DPRINTK("Bad number of segments in request (%d)\n", nseg); goto fail_response; @@ -517,7 +517,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, } if (!bio) { - BUG_ON(operation != WRITE_BARRIER); + BUG_ON(operation != (REQ_FLUSH | REQ_FUA)); bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -532,7 +532,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (operation == READ) blkif->st_rd_sect += preq.nr_sects; - else if (operation == WRITE || operation == WRITE_BARRIER) + else if (operation == WRITE || operation == (REQ_FLUSH | REQ_FUA)) blkif->st_wr_sect += preq.nr_sects; return; From bc0c081b0e7a4afc4d2c7bc0666f5cd169e96814 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 25 Feb 2011 10:02:39 -0500 Subject: [PATCH 0085/3380] xen/blkback: Update to use blkdev_get_by_dev instead of open_by_devnum. The API for opening a block device has changed since 2.6.32. The correct function to open a device is blkdev_get_by_dev. --- drivers/xen/blkback/vbd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index dc2572338567..8c91a2fb0019 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -63,8 +63,8 @@ int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, vbd->pdevice = MKDEV(major, minor); - bdev = open_by_devnum(vbd->pdevice, - vbd->readonly ? FMODE_READ : FMODE_WRITE); + bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ? + FMODE_READ : FMODE_WRITE, NULL); if (IS_ERR(bdev)) { DPRINTK("vbd_creat: device %08x could not be opened.\n", From efe08a3eecf15ab022afba48c691d02c7de2fbbb Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 5 Feb 2010 14:19:33 -0500 Subject: [PATCH 0086/3380] xen/blkback: simplify address translations Cherry-pick and modified from 69d64727c42eecd47fdf82c15a54474d21a4012a ("blkback/blktap2: simplify address translations"): "There are quite a number of places where e.g. page->va->page translations happen. Besides yielding smaller code (source and binary), a second goal is to make it easier to determine where virtual addresses of pages allocated through alloc_empty_pages_and_pagevec() are really used (in turn in order to determine whether using highmem pages would be possible there)." The second goal is not the purpose of this patch - it is just to make it easier to read the code. linux-2.6-pvops: * Stripped drivers/xen/gntdev/* * Stripped drivers/xen/netback/* [v2: Stripped blktap off] Signed-off-by: Jan Beulich Signed-off-by: Daniel Stodden Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index cb844f734d91..7c9421cc5991 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -99,9 +99,11 @@ static inline int vaddr_pagenr(pending_req_t *req, int seg) return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; } +#define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] + static inline unsigned long vaddr(pending_req_t *req, int seg) { - unsigned long pfn = page_to_pfn(pending_pages[vaddr_pagenr(req, seg)]); + unsigned long pfn = page_to_pfn(pending_page(req, seg)); return (unsigned long)pfn_to_kaddr(pfn); } @@ -463,8 +465,8 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (ret) continue; - set_phys_to_machine(__pa(vaddr( - pending_req, i)) >> PAGE_SHIFT, + set_phys_to_machine( + page_to_pfn(pending_page(pending_req, i)), FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); seg[i].buf = map[i].dev_bus_addr | (req->seg[i].first_sect << 9); @@ -495,7 +497,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, while ((bio == NULL) || (bio_add_page(bio, - virt_to_page(vaddr(pending_req, i)), + pending_page(pending_req, i), seg[i].nsec << 9, seg[i].buf & ~PAGE_MASK) == 0)) { if (bio) { From e8e28871edf0d0adb0bd7e597c044cbaf7a7f137 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 25 Feb 2011 10:51:29 -0500 Subject: [PATCH 0087/3380] xen/blkback: Move global/static variables into struct xen_blkbk. Bundle the lot of discrete variables into a single structure. This is based on what was done in the xen-netback driver: xen: netback: Move global/static variables into struct xen_netbk. (094944631cc5a9d6e623302c987f78117c0bf7ac) Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 82 ++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 34 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 7c9421cc5991..c08875b0ad64 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -84,31 +84,34 @@ typedef struct { struct list_head free_list; } pending_req_t; -static pending_req_t *pending_reqs; -static struct list_head pending_free; -static DEFINE_SPINLOCK(pending_free_lock); -static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); - #define BLKBACK_INVALID_HANDLE (~0) -static struct page **pending_pages; -static grant_handle_t *pending_grant_handles; +struct xen_blkbk { + pending_req_t *pending_reqs; + struct list_head pending_free; + spinlock_t pending_free_lock; + wait_queue_head_t pending_free_wq; + struct page **pending_pages; + grant_handle_t *pending_grant_handles; +}; + +static struct xen_blkbk *blkbk; static inline int vaddr_pagenr(pending_req_t *req, int seg) { - return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; + return (req - blkbk->pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; } #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] static inline unsigned long vaddr(pending_req_t *req, int seg) { - unsigned long pfn = page_to_pfn(pending_page(req, seg)); + unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); return (unsigned long)pfn_to_kaddr(pfn); } #define pending_handle(_req, _seg) \ - (pending_grant_handles[vaddr_pagenr(_req, _seg)]) + (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) static int do_block_io_op(blkif_t *blkif); @@ -126,12 +129,12 @@ static pending_req_t* alloc_req(void) pending_req_t *req = NULL; unsigned long flags; - spin_lock_irqsave(&pending_free_lock, flags); - if (!list_empty(&pending_free)) { - req = list_entry(pending_free.next, pending_req_t, free_list); + spin_lock_irqsave(&blkbk->pending_free_lock, flags); + if (!list_empty(&blkbk->pending_free)) { + req = list_entry(blkbk->pending_free.next, pending_req_t, free_list); list_del(&req->free_list); } - spin_unlock_irqrestore(&pending_free_lock, flags); + spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); return req; } @@ -140,12 +143,12 @@ static void free_req(pending_req_t *req) unsigned long flags; int was_empty; - spin_lock_irqsave(&pending_free_lock, flags); - was_empty = list_empty(&pending_free); - list_add(&req->free_list, &pending_free); - spin_unlock_irqrestore(&pending_free_lock, flags); + spin_lock_irqsave(&blkbk->pending_free_lock, flags); + was_empty = list_empty(&blkbk->pending_free); + list_add(&req->free_list, &blkbk->pending_free); + spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); if (was_empty) - wake_up(&pending_free_wq); + wake_up(&blkbk->pending_free_wq); } static void unplug_queue(blkif_t *blkif) @@ -226,8 +229,8 @@ int blkif_schedule(void *arg) blkif->wq, blkif->waiting_reqs || kthread_should_stop()); wait_event_interruptible( - pending_free_wq, - !list_empty(&pending_free) || kthread_should_stop()); + blkbk->pending_free_wq, + !list_empty(&blkbk->pending_free) || kthread_should_stop()); blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ @@ -466,7 +469,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, continue; set_phys_to_machine( - page_to_pfn(pending_page(pending_req, i)), + page_to_pfn(blkbk->pending_page(pending_req, i)), FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); seg[i].buf = map[i].dev_bus_addr | (req->seg[i].first_sect << 9); @@ -497,7 +500,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, while ((bio == NULL) || (bio_add_page(bio, - pending_page(pending_req, i), + blkbk->pending_page(pending_req, i), seg[i].nsec << 9, seg[i].buf & ~PAGE_MASK) == 0)) { if (bio) { @@ -624,31 +627,40 @@ static int __init blkif_init(void) if (!xen_pv_domain()) return -ENODEV; + blkbk = (struct xen_blkbk *)vmalloc(sizeof(struct xen_blkbk)); + if (!blkbk) { + printk(KERN_ALERT "%s: out of memory!\n", __func__); + return -ENOMEM; + } + mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; - pending_reqs = kmalloc(sizeof(pending_reqs[0]) * + blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * blkif_reqs, GFP_KERNEL); - pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) * + blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * mmap_pages, GFP_KERNEL); - pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); + blkbk->pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); - if (!pending_reqs || !pending_grant_handles || !pending_pages) { + if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || !blkbk->pending_pages) { rc = -ENOMEM; goto out_of_memory; } for (i = 0; i < mmap_pages; i++) - pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; + blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; rc = blkif_interface_init(); if (rc) goto failed_init; - memset(pending_reqs, 0, sizeof(pending_reqs)); - INIT_LIST_HEAD(&pending_free); + memset(blkbk->pending_reqs, 0, sizeof(blkbk->pending_reqs)); + + INIT_LIST_HEAD(&blkbk->pending_free); + spin_lock_init(&blkbk->pending_free_lock); + init_waitqueue_head(&blkbk->pending_free_wq); for (i = 0; i < blkif_reqs; i++) - list_add_tail(&pending_reqs[i].free_list, &pending_free); + list_add_tail(&blkbk->pending_reqs[i].free_list, &blkbk->pending_free); rc = blkif_xenbus_init(); if (rc) @@ -659,9 +671,11 @@ static int __init blkif_init(void) out_of_memory: printk(KERN_ERR "%s: out of memory\n", __func__); failed_init: - kfree(pending_reqs); - kfree(pending_grant_handles); - free_empty_pages_and_pagevec(pending_pages, mmap_pages); + kfree(blkbk->pending_reqs); + kfree(blkbk->pending_grant_handles); + free_empty_pages_and_pagevec(blkbk->pending_pages, mmap_pages); + vfree(blkbk); + blkbk = NULL; return rc; } From c35950bfa9abaaf16548a287a8d5d782a361414f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 1 Mar 2011 16:22:28 -0500 Subject: [PATCH 0088/3380] xen/blkback: Union the blkif_request request specific fields Following in the steps of patch: "xen: Union the blkif_request request specific fields" this patch changes the blkback. Per the original patch: "Prepare for extending the block device ring to allow request specific fields, by moving the request specific fields for reads, writes and barrier requests to a union member." Cc: Owen Smith Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 14 +++++++------- include/xen/blkif.h | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index c08875b0ad64..eda50646775d 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -426,7 +426,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, } preq.dev = req->handle; - preq.sector_number = req->sector_number; + preq.sector_number = req->u.rw.sector_number; preq.nr_sects = 0; pending_req->blkif = blkif; @@ -438,11 +438,11 @@ static void dispatch_rw_block_io(blkif_t *blkif, for (i = 0; i < nseg; i++) { uint32_t flags; - seg[i].nsec = req->seg[i].last_sect - - req->seg[i].first_sect + 1; + seg[i].nsec = req->u.rw.seg[i].last_sect - + req->u.rw.seg[i].first_sect + 1; - if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) || - (req->seg[i].last_sect < req->seg[i].first_sect)) + if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || + (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) goto fail_response; preq.nr_sects += seg[i].nsec; @@ -450,7 +450,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (operation != READ) flags |= GNTMAP_readonly; gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, - req->seg[i].gref, blkif->domid); + req->u.rw.seg[i].gref, blkif->domid); } ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); @@ -472,7 +472,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, page_to_pfn(blkbk->pending_page(pending_req, i)), FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); seg[i].buf = map[i].dev_bus_addr | - (req->seg[i].first_sect << 9); + (req->u.rw.seg[i].first_sect << 9); } if (ret) diff --git a/include/xen/blkif.h b/include/xen/blkif.h index d27428046918..ab794269fc53 100644 --- a/include/xen/blkif.h +++ b/include/xen/blkif.h @@ -96,12 +96,12 @@ static void inline blkif_get_x86_32_req(struct blkif_request *dst, struct blkif_ dst->nr_segments = src->nr_segments; dst->handle = src->handle; dst->id = src->id; - dst->sector_number = src->sector_number; + dst->u.rw.sector_number = src->sector_number; barrier(); if (n > dst->nr_segments) n = dst->nr_segments; for (i = 0; i < n; i++) - dst->seg[i] = src->seg[i]; + dst->u.rw.seg[i] = src->seg[i]; } static void inline blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_x86_64_request *src) @@ -111,12 +111,12 @@ static void inline blkif_get_x86_64_req(struct blkif_request *dst, struct blkif_ dst->nr_segments = src->nr_segments; dst->handle = src->handle; dst->id = src->id; - dst->sector_number = src->sector_number; + dst->u.rw.sector_number = src->sector_number; barrier(); if (n > dst->nr_segments) n = dst->nr_segments; for (i = 0; i < n; i++) - dst->seg[i] = src->seg[i]; + dst->u.rw.seg[i] = src->seg[i]; } #endif /* __XEN_BLKIF_H__ */ From 464fb419e17083a18b636c9f4714fc49ef6857d2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 1 Mar 2011 16:26:10 -0500 Subject: [PATCH 0089/3380] xen/blkback: Use 'vzalloc' for page arrays and pre-allocate pages. Previously we would allocate the array for page using 'kmalloc' which we can as easily do with 'vzalloc'. The pre-allocation of pages was done a bit differently in the past - it used to be that the balloon driver would export "alloc_empty_pages_and_pagevec" which would have in one function created an array, allocated the pages, balloned the pages out (so the memory behind those pages would be non-present), and provide us those pages. This was OK as those pages were shared between other guest and the only thing we needed was to "swizzel" the MFN of those pages to point to the other guest MFN. We can still "swizzel" the MFNs using the M2P (and P2M) override API calls, but for the sake of simplicity we are dropping the balloon API calls. We can return to those later on. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index eda50646775d..d32198d1be04 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -637,18 +637,23 @@ static int __init blkif_init(void) blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * blkif_reqs, GFP_KERNEL); - blkbk->pending_grant_handles = kmalloc(sizeof(blkbk->pending_grant_handles[0]) * - mmap_pages, GFP_KERNEL); - blkbk->pending_pages = alloc_empty_pages_and_pagevec(mmap_pages); + blkbk->pending_grant_handles = vzalloc(sizeof(blkbk->pending_grant_handles[0]) * + mmap_pages); + blkbk->pending_pages = vzalloc(sizeof(blkbk->pending_pages[0]) * mmap_pages); if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || !blkbk->pending_pages) { rc = -ENOMEM; goto out_of_memory; } - for (i = 0; i < mmap_pages; i++) + for (i = 0; i < mmap_pages; i++) { blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; - + blkbk->pending_pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + if (blkbk->pending_pages[i] == NULL) { + rc = -ENOMEM; + goto out_of_memory; + } + } rc = blkif_interface_init(); if (rc) goto failed_init; @@ -672,8 +677,12 @@ static int __init blkif_init(void) printk(KERN_ERR "%s: out of memory\n", __func__); failed_init: kfree(blkbk->pending_reqs); - kfree(blkbk->pending_grant_handles); - free_empty_pages_and_pagevec(blkbk->pending_pages, mmap_pages); + vfree(blkbk->pending_grant_handles); + for (i = 0; i < mmap_pages; i++) { + if (blkbk->pending_pages[i]) + __free_page(blkbk->pending_pages[i]); + } + vfree(blkbk->pending_pages); vfree(blkbk); blkbk = NULL; return rc; From 5dc03639cc903f887931831d69895facb5260f4b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 1 Mar 2011 16:46:45 -0500 Subject: [PATCH 0090/3380] xen/blkback: Utilize the M2P override mechanism for GNTMAP_host_map Instead of doing copy grants lets do mapping grants using the M2P(and P2M) override mechanism. Signed-off-by: Konrad Rzeszutek Wilk Conflicts: drivers/xen/blkback/blkback.c --- drivers/xen/blkback/blkback.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index d32198d1be04..15790ae96f33 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -41,7 +41,6 @@ #include #include -#include #include #include #include @@ -192,6 +191,17 @@ static void fast_flush_area(pending_req_t *req) ret = HYPERVISOR_grant_table_op( GNTTABOP_unmap_grant_ref, unmap, invcount); BUG_ON(ret); + /* Note, we use invcount, so nr->pages, so we can't index + * using vaddr(req, i). */ + for (i = 0; i < invcount; i++) { + ret = m2p_remove_override( + virt_to_page(unmap[i].host_addr), false); + if (ret) { + printk(KERN_ALERT "Failed to remove M2P override for " \ + "%lx\n", (unsigned long)unmap[i].host_addr); + continue; + } + } } /****************************************************************** @@ -467,10 +477,15 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (ret) continue; + + ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), + blkbk->pending_page(pending_req, i), false); + if (ret) { + printk(KERN_ALERT "Failed to install M2P override for"\ + " %lx (ret: %d)\n", (unsigned long)map[i].dev_bus_addr, ret); + continue; + } - set_phys_to_machine( - page_to_pfn(blkbk->pending_page(pending_req, i)), - FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); seg[i].buf = map[i].dev_bus_addr | (req->u.rw.seg[i].first_sect << 9); } From a742b02c75e6e76bd0833f9b6e702f1be7d7e008 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 14 Mar 2011 12:41:26 -0400 Subject: [PATCH 0091/3380] xen/blkback: Use kzalloc's, and GFP_KERNEL for data structures. The patch titled:"xen/blkback: Use 'vzalloc' for page arrays and pre-allocate pages." allocates the structures and its member variables using the 'vzalloc'. Daniel Stodden pointed out that vzalloc is good when we use big number of pages - while these are at the max two pages. We can do this using kzalloc. Also the GFP_HIGHMEM does not work properly with Xen, so take that out. We will have to revisit this when a "get_empty_pages_and_pagevec" type API shows up to leverage that. BugLink: http://mid.gmane.org/1299898639.11681.227.camel@agari.van.xensource.com CC: Daniel Stodden Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 15790ae96f33..a6f8f1338118 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -642,7 +642,7 @@ static int __init blkif_init(void) if (!xen_pv_domain()) return -ENODEV; - blkbk = (struct xen_blkbk *)vmalloc(sizeof(struct xen_blkbk)); + blkbk = (struct xen_blkbk *)kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); if (!blkbk) { printk(KERN_ALERT "%s: out of memory!\n", __func__); return -ENOMEM; @@ -652,9 +652,10 @@ static int __init blkif_init(void) blkbk->pending_reqs = kmalloc(sizeof(blkbk->pending_reqs[0]) * blkif_reqs, GFP_KERNEL); - blkbk->pending_grant_handles = vzalloc(sizeof(blkbk->pending_grant_handles[0]) * - mmap_pages); - blkbk->pending_pages = vzalloc(sizeof(blkbk->pending_pages[0]) * mmap_pages); + blkbk->pending_grant_handles = kzalloc(sizeof(blkbk->pending_grant_handles[0]) * + mmap_pages, GFP_KERNEL); + blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * + mmap_pages, GFP_KERNEL); if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || !blkbk->pending_pages) { rc = -ENOMEM; @@ -663,7 +664,7 @@ static int __init blkif_init(void) for (i = 0; i < mmap_pages; i++) { blkbk->pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; - blkbk->pending_pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); + blkbk->pending_pages[i] = alloc_page(GFP_KERNEL); if (blkbk->pending_pages[i] == NULL) { rc = -ENOMEM; goto out_of_memory; @@ -692,13 +693,13 @@ static int __init blkif_init(void) printk(KERN_ERR "%s: out of memory\n", __func__); failed_init: kfree(blkbk->pending_reqs); - vfree(blkbk->pending_grant_handles); + kfree(blkbk->pending_grant_handles); for (i = 0; i < mmap_pages; i++) { if (blkbk->pending_pages[i]) __free_page(blkbk->pending_pages[i]); } - vfree(blkbk->pending_pages); - vfree(blkbk); + kfree(blkbk->pending_pages); + kfree(blkbk); blkbk = NULL; return rc; } From 314146e515710f8a7d7eaf7a58b7ed590c9c14c3 Mon Sep 17 00:00:00 2001 From: Tom Goetz Date: Thu, 17 Mar 2011 12:14:29 -0400 Subject: [PATCH 0092/3380] xen/blkback: Fix the WRITE_BARRIER The WRITE_BARRIER was missing the REQ_WRITE option. This was causing the blktap to die. Signed-off-by: Tom Goetz Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index a6f8f1338118..4cd5b49de0c1 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -47,6 +47,8 @@ #include #include "common.h" +#define WRITE_BARRIER (REQ_WRITE | REQ_FLUSH | REQ_FUA) + /* * These are rather arbitrary. They are fairly large because adjacent requests * pulled from a communication ring are quite likely to end up being part of @@ -420,7 +422,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, operation = WRITE; break; case BLKIF_OP_WRITE_BARRIER: - operation = REQ_FLUSH | REQ_FUA; + operation = WRITE_BARRIER; break; default: operation = 0; /* make gcc happy */ @@ -429,7 +431,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, /* Check that number of segments is sane. */ nseg = req->nr_segments; - if (unlikely(nseg == 0 && operation != (REQ_FLUSH | REQ_FUA)) || + if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { DPRINTK("Bad number of segments in request (%d)\n", nseg); goto fail_response; @@ -537,7 +539,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, } if (!bio) { - BUG_ON(operation != (REQ_FLUSH | REQ_FUA)); + BUG_ON(operation != WRITE_BARRIER); bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -552,7 +554,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (operation == READ) blkif->st_rd_sect += preq.nr_sects; - else if (operation == WRITE || operation == (REQ_FLUSH | REQ_FUA)) + else if (operation == WRITE || operation == WRITE_BARRIER) blkif->st_wr_sect += preq.nr_sects; return; From a1397fa3090c25c6c51c04b4101f2786d16b615f Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:05:23 -0400 Subject: [PATCH 0093/3380] xen/blkback: Add some comments. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 88 ++++++++++++++++++++++++++------- drivers/xen/blkback/common.h | 1 + drivers/xen/blkback/interface.c | 2 - drivers/xen/blkback/vbd.c | 2 - drivers/xen/blkback/xenbus.c | 2 +- 5 files changed, 71 insertions(+), 24 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 4cd5b49de0c1..8a4b1e8eeb62 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -1,11 +1,10 @@ /****************************************************************************** - * arch/xen/drivers/blkif/backend/main.c * * Back-end of the driver for virtual block devices. This portion of the * driver exports a 'unified' block-device interface that can be accessed * by any operating system that implements a compatible front end. A * reference front-end implementation can be found in: - * arch/xen/drivers/blkif/frontend + * drivers/block/xen-blkfront.c * * Copyright (c) 2003-2004, Keir Fraser & Steve Hand * Copyright (c) 2005, Christopher Clark @@ -88,16 +87,25 @@ typedef struct { #define BLKBACK_INVALID_HANDLE (~0) struct xen_blkbk { - pending_req_t *pending_reqs; + pending_req_t *pending_reqs; + /* List of all 'pending_req' available */ struct list_head pending_free; + /* And its spinlock. */ spinlock_t pending_free_lock; wait_queue_head_t pending_free_wq; + /* The list of all pages that are available. */ struct page **pending_pages; + /* And the grant handles that are available. */ grant_handle_t *pending_grant_handles; }; static struct xen_blkbk *blkbk; +/* + * Little helpful macro to figure out the index and virtual address of the + * pending_pages[..]. For each 'pending_req' we have have up to + * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through + * 10 and would index in the pending_pages[..]. */ static inline int vaddr_pagenr(pending_req_t *req, int seg) { return (req - blkbk->pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; @@ -122,8 +130,8 @@ static void dispatch_rw_block_io(blkif_t *blkif, static void make_response(blkif_t *blkif, u64 id, unsigned short op, int st); -/****************************************************************** - * misc small helpers +/* + * Retrieve from the 'pending_reqs' a free pending_req structure to be used. */ static pending_req_t* alloc_req(void) { @@ -139,6 +147,10 @@ static pending_req_t* alloc_req(void) return req; } +/* + * Return the 'pending_req' structure back to the freepool. We also + * wake up the thread if it was waiting for a free page. + */ static void free_req(pending_req_t *req) { unsigned long flags; @@ -152,6 +164,11 @@ static void free_req(pending_req_t *req) wake_up(&blkbk->pending_free_wq); } +/* + * Give back a reference count on the underlaying storage. + * It is OK to make multiple calls in this function as it + * resets the plug to NULL when it is done on the first call. + */ static void unplug_queue(blkif_t *blkif) { if (blkif->plug == NULL) @@ -162,6 +179,12 @@ static void unplug_queue(blkif_t *blkif) blkif->plug = NULL; } +/* + * Take a reference count on the underlaying storage. + * It is OK to call this multiple times as we check to make sure + * not to double reference. We also give back a reference count + * if it corresponds to another queue. + */ static void plug_queue(blkif_t *blkif, struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); @@ -173,6 +196,10 @@ static void plug_queue(blkif_t *blkif, struct block_device *bdev) blkif->plug = q; } +/* + * Unmap the grant references, and also remove the M2P over-rides + * used in the 'pending_req'. +*/ static void fast_flush_area(pending_req_t *req) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; @@ -266,8 +293,8 @@ int blkif_schedule(void *arg) return 0; } -/****************************************************************** - * COMPLETION CALLBACK -- Called as bh->b_end_io() +/* + * Completion callback on the bio's. Called as bh->b_end_io() */ static void __end_block_io_op(pending_req_t *pending_req, int error) @@ -284,6 +311,9 @@ static void __end_block_io_op(pending_req_t *pending_req, int error) pending_req->status = BLKIF_RSP_ERROR; } + /* If all of the bio's have completed it is time to unmap + * the grant references associated with 'request' and provide + * the proper response on the ring. */ if (atomic_dec_and_test(&pending_req->pendcnt)) { fast_flush_area(pending_req); make_response(pending_req->blkif, pending_req->id, @@ -293,6 +323,9 @@ static void __end_block_io_op(pending_req_t *pending_req, int error) } } +/* + * bio callback. + */ static void end_block_io_op(struct bio *bio, int error) { __end_block_io_op(bio->bi_private, error); @@ -300,8 +333,8 @@ static void end_block_io_op(struct bio *bio, int error) } -/****************************************************************************** - * NOTIFICATION FROM GUEST OS. +/* + * Notification from the guest OS. */ static void blkif_notify_work(blkif_t *blkif) @@ -318,10 +351,11 @@ irqreturn_t blkif_be_int(int irq, void *dev_id) -/****************************************************************** - * DOWNWARD CALLS -- These interface with the block-device layer proper. +/* + * Function to copy the from the ring buffer the 'struct blkif_request' + * (which has the sectors we want, number of them, grant references, etc), + * and transmute it to the block API to hand it over to the proper block disk. */ - static int do_block_io_op(blkif_t *blkif) { union blkif_back_rings *blk_rings = &blkif->blk_rings; @@ -400,6 +434,10 @@ static int do_block_io_op(blkif_t *blkif) return more_to_do; } +/* + * Transumation of the 'struct blkif_request' to a proper 'struct bio' + * and call the 'submit_bio' to pass it to the underlaying storage. + */ static void dispatch_rw_block_io(blkif_t *blkif, struct blkif_request *req, pending_req_t *pending_req) @@ -429,7 +467,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, BUG(); } - /* Check that number of segments is sane. */ + /* Check that the number of segments is sane. */ nseg = req->nr_segments; if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { @@ -447,12 +485,14 @@ static void dispatch_rw_block_io(blkif_t *blkif, pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; + /* Fill out preq.nr_sects with proper amount of sectors, and setup + * assign map[..] with the PFN of the page in our domain with the + * corresponding grant reference for each page.*/ for (i = 0; i < nseg; i++) { uint32_t flags; seg[i].nsec = req->u.rw.seg[i].last_sect - req->u.rw.seg[i].first_sect + 1; - if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) goto fail_response; @@ -468,6 +508,9 @@ static void dispatch_rw_block_io(blkif_t *blkif, ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); BUG_ON(ret); + /* Now swizzel the MFN in our domain with the MFN from the other domain + * so that when we access vaddr(pending_req,i) it has the contents of the + * page from the other domain. */ for (i = 0; i < nseg; i++) { if (unlikely(map[i].status != 0)) { DPRINTK("invalid buffer -- could not remap it\n"); @@ -485,6 +528,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, if (ret) { printk(KERN_ALERT "Failed to install M2P override for"\ " %lx (ret: %d)\n", (unsigned long)map[i].dev_bus_addr, ret); + /* We could switch over to GNTTABOP_copy */ continue; } @@ -492,6 +536,9 @@ static void dispatch_rw_block_io(blkif_t *blkif, (req->u.rw.seg[i].first_sect << 9); } + /* If we have failed at this point, we need to undo the M2P override, set + * gnttab_set_unmap_op on all of the grant references and perform the + * hypercall to unmap the grants - that is all done in fast_flush_area. */ if (ret) goto fail_flush; @@ -503,7 +550,11 @@ static void dispatch_rw_block_io(blkif_t *blkif, goto fail_flush; } + /* Get a reference count for the disk queue and start sending I/O */ plug_queue(blkif, preq.bdev); + + /* We set it one so that the last submit_bio does not have to call + * atomic_inc. */ atomic_set(&pending_req->pendcnt, 1); blkif_get(blkif); @@ -524,7 +575,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, atomic_inc(&pending_req->pendcnt); submit_bio(operation, bio); } - + bio = bio_alloc(GFP_KERNEL, nseg-i); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -538,6 +589,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, preq.sector_number += seg[i].nsec; } + /* This will be hit if the operation was a barrier. */ if (!bio) { BUG_ON(operation != WRITE_BARRIER); bio = bio_alloc(GFP_KERNEL, 0); @@ -578,11 +630,9 @@ static void dispatch_rw_block_io(blkif_t *blkif, -/****************************************************************** - * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING +/* + * Put a response on the ring on how the operation fared. */ - - static void make_response(blkif_t *blkif, u64 id, unsigned short op, int st) { diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 0f91830f18c8..4c140c8e75bd 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -76,6 +76,7 @@ typedef struct blkif_st { atomic_t refcnt; wait_queue_head_t wq; + /* One thread per one blkif. */ struct task_struct *xenblkd; unsigned int waiting_reqs; struct request_queue *plug; diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index e397a4134f1b..a4a15350737f 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -1,6 +1,4 @@ /****************************************************************************** - * arch/xen/drivers/blkif/backend/interface.c - * * Block-device interface management. * * Copyright (c) 2004, Keir Fraser diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 8c91a2fb0019..95156c95ab2f 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -1,6 +1,4 @@ /****************************************************************************** - * blkback/vbd.c - * * Routines for managing virtual block devices (VBDs). * * Copyright (c) 2003-2005, Keir Fraser & Steve Hand diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 031bc3d7eec3..e9c4f80ef1c8 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -107,7 +107,7 @@ static void update_blkif_status(blkif_t *blkif) } -/**************************************************************** +/* * sysfs interface for VBD I/O requests */ From 5489377ce40d52fb722dcd811617114cebad7bba Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:21:50 -0400 Subject: [PATCH 0094/3380] xen/blkback: blkif->struct blkif_st checkpatch.pl suggested that we don't use the typdef in common.h and this triggered this avalanche of patches. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 24 ++++++++++++------------ drivers/xen/blkback/common.h | 23 ++++++++++++----------- drivers/xen/blkback/interface.c | 16 ++++++++-------- drivers/xen/blkback/vbd.c | 6 +++--- drivers/xen/blkback/xenbus.c | 6 +++--- 5 files changed, 38 insertions(+), 37 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 8a4b1e8eeb62..d07ad5318a85 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -75,7 +75,7 @@ module_param(debug_lvl, int, 0644); * response queued for it, with the saved 'id' passed back. */ typedef struct { - blkif_t *blkif; + struct blkif_st *blkif; u64 id; int nr_pages; atomic_t pendcnt; @@ -123,11 +123,11 @@ static inline unsigned long vaddr(pending_req_t *req, int seg) (blkbk->pending_grant_handles[vaddr_pagenr(_req, _seg)]) -static int do_block_io_op(blkif_t *blkif); -static void dispatch_rw_block_io(blkif_t *blkif, +static int do_block_io_op(struct blkif_st *blkif); +static void dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, pending_req_t *pending_req); -static void make_response(blkif_t *blkif, u64 id, +static void make_response(struct blkif_st *blkif, u64 id, unsigned short op, int st); /* @@ -169,7 +169,7 @@ static void free_req(pending_req_t *req) * It is OK to make multiple calls in this function as it * resets the plug to NULL when it is done on the first call. */ -static void unplug_queue(blkif_t *blkif) +static void unplug_queue(struct blkif_st *blkif) { if (blkif->plug == NULL) return; @@ -185,7 +185,7 @@ static void unplug_queue(blkif_t *blkif) * not to double reference. We also give back a reference count * if it corresponds to another queue. */ -static void plug_queue(blkif_t *blkif, struct block_device *bdev) +static void plug_queue(struct blkif_st *blkif, struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); @@ -237,7 +237,7 @@ static void fast_flush_area(pending_req_t *req) * SCHEDULER FUNCTIONS */ -static void print_stats(blkif_t *blkif) +static void print_stats(struct blkif_st *blkif) { printk(KERN_DEBUG "%s: oo %3d | rd %4d | wr %4d | br %4d\n", current->comm, blkif->st_oo_req, @@ -250,7 +250,7 @@ static void print_stats(blkif_t *blkif) int blkif_schedule(void *arg) { - blkif_t *blkif = arg; + struct blkif_st *blkif = arg; struct vbd *vbd = &blkif->vbd; blkif_get(blkif); @@ -337,7 +337,7 @@ static void end_block_io_op(struct bio *bio, int error) * Notification from the guest OS. */ -static void blkif_notify_work(blkif_t *blkif) +static void blkif_notify_work(struct blkif_st *blkif) { blkif->waiting_reqs = 1; wake_up(&blkif->wq); @@ -356,7 +356,7 @@ irqreturn_t blkif_be_int(int irq, void *dev_id) * (which has the sectors we want, number of them, grant references, etc), * and transmute it to the block API to hand it over to the proper block disk. */ -static int do_block_io_op(blkif_t *blkif) +static int do_block_io_op(struct blkif_st *blkif) { union blkif_back_rings *blk_rings = &blkif->blk_rings; struct blkif_request req; @@ -438,7 +438,7 @@ static int do_block_io_op(blkif_t *blkif) * Transumation of the 'struct blkif_request' to a proper 'struct bio' * and call the 'submit_bio' to pass it to the underlaying storage. */ -static void dispatch_rw_block_io(blkif_t *blkif, +static void dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, pending_req_t *pending_req) { @@ -633,7 +633,7 @@ static void dispatch_rw_block_io(blkif_t *blkif, /* * Put a response on the ring on how the operation fared. */ -static void make_response(blkif_t *blkif, u64 id, +static void make_response(struct blkif_st *blkif, u64 id, unsigned short op, int st) { struct blkif_response resp; diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index 4c140c8e75bd..be3fc93d8a31 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include @@ -44,7 +44,7 @@ #define DPRINTK(_f, _a...) \ pr_debug("(file=%s, line=%d) " _f, \ - __FILE__ , __LINE__ , ## _a ) + __FILE__ , __LINE__ , ## _a) struct vbd { blkif_vdev_t handle; /* what the domain refers to this vbd as */ @@ -57,7 +57,7 @@ struct vbd { struct backend_info; -typedef struct blkif_st { +struct blkif_st { /* Unique identifier for this interface. */ domid_t domid; unsigned int handle; @@ -94,13 +94,14 @@ typedef struct blkif_st { grant_handle_t shmem_handle; grant_ref_t shmem_ref; -} blkif_t; +}; -blkif_t *blkif_alloc(domid_t domid); -void blkif_disconnect(blkif_t *blkif); -void blkif_free(blkif_t *blkif); -int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn); -void vbd_resize(blkif_t *blkif); +struct blkif_st *blkif_alloc(domid_t domid); +void blkif_disconnect(struct blkif_st *blkif); +void blkif_free(struct blkif_st *blkif); +int blkif_map(struct blkif_st *blkif, unsigned long shared_page, + unsigned int evtchn); +void vbd_resize(struct blkif_st *blkif); #define blkif_get(_b) (atomic_inc(&(_b)->refcnt)) #define blkif_put(_b) \ @@ -110,7 +111,7 @@ void vbd_resize(blkif_t *blkif); } while (0) /* Create a vbd. */ -int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major, +int vbd_create(struct blkif_st *blkif, blkif_vdev_t vdevice, unsigned major, unsigned minor, int readonly, int cdrom); void vbd_free(struct vbd *vbd); @@ -125,7 +126,7 @@ struct phys_req { blkif_sector_t sector_number; }; -int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); +int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation); int blkif_interface_init(void); diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index a4a15350737f..7d59f13115cf 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -35,9 +35,9 @@ static struct kmem_cache *blkif_cachep; -blkif_t *blkif_alloc(domid_t domid) +struct blkif_st *blkif_alloc(domid_t domid) { - blkif_t *blkif; + struct blkif_st *blkif; blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL); if (!blkif) @@ -54,7 +54,7 @@ blkif_t *blkif_alloc(domid_t domid) return blkif; } -static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) +static int map_frontend_page(struct blkif_st *blkif, unsigned long shared_page) { struct gnttab_map_grant_ref op; @@ -75,7 +75,7 @@ static int map_frontend_page(blkif_t *blkif, unsigned long shared_page) return 0; } -static void unmap_frontend_page(blkif_t *blkif) +static void unmap_frontend_page(struct blkif_st *blkif) { struct gnttab_unmap_grant_ref op; @@ -86,7 +86,7 @@ static void unmap_frontend_page(blkif_t *blkif) BUG(); } -int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) +int blkif_map(struct blkif_st *blkif, unsigned long shared_page, unsigned int evtchn) { int err; @@ -143,7 +143,7 @@ int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn) return 0; } -void blkif_disconnect(blkif_t *blkif) +void blkif_disconnect(struct blkif_st *blkif) { if (blkif->xenblkd) { kthread_stop(blkif->xenblkd); @@ -166,7 +166,7 @@ void blkif_disconnect(blkif_t *blkif) } } -void blkif_free(blkif_t *blkif) +void blkif_free(struct blkif_st *blkif) { if (!atomic_dec_and_test(&blkif->refcnt)) BUG(); @@ -175,7 +175,7 @@ void blkif_free(blkif_t *blkif) int __init blkif_interface_init(void) { - blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), + blkif_cachep = kmem_cache_create("blkif_cache", sizeof(struct blkif_st), 0, 0, NULL); if (!blkif_cachep) return -ENOMEM; diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 95156c95ab2f..26a37df8173a 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -48,7 +48,7 @@ unsigned long vbd_secsize(struct vbd *vbd) return bdev_logical_block_size(vbd->bdev); } -int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major, +int vbd_create(struct blkif_st *blkif, blkif_vdev_t handle, unsigned major, unsigned minor, int readonly, int cdrom) { struct vbd *vbd; @@ -97,7 +97,7 @@ void vbd_free(struct vbd *vbd) vbd->bdev = NULL; } -int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) +int vbd_translate(struct phys_req *req, struct blkif_st *blkif, int operation) { struct vbd *vbd = &blkif->vbd; int rc = -EACCES; @@ -116,7 +116,7 @@ int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation) return rc; } -void vbd_resize(blkif_t *blkif) +void vbd_resize(struct blkif_st *blkif) { struct vbd *vbd = &blkif->vbd; struct xenbus_transaction xbt; diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index e9c4f80ef1c8..67462c4e9ab4 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -30,7 +30,7 @@ struct backend_info { struct xenbus_device *dev; - blkif_t *blkif; + struct blkif_st *blkif; struct xenbus_watch backend_watch; unsigned major; unsigned minor; @@ -47,7 +47,7 @@ struct xenbus_device *blkback_xenbus(struct backend_info *be) return be->dev; } -static int blkback_name(blkif_t *blkif, char *buf) +static int blkback_name(struct blkif_st *blkif, char *buf) { char *devpath, *devname; struct xenbus_device *dev = blkif->be->dev; @@ -67,7 +67,7 @@ static int blkback_name(blkif_t *blkif, char *buf) return 0; } -static void update_blkif_status(blkif_t *blkif) +static void update_blkif_status(struct blkif_st *blkif) { int err; char name[TASK_COMM_LEN]; From 3c64b58cd614c976dcb19e16fa59ab620b3fe130 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:24:45 -0400 Subject: [PATCH 0095/3380] xen/blkback: Fix checkpatch warnings in vbd.c Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/vbd.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c index 26a37df8173a..d0ff4cf91a34 100644 --- a/drivers/xen/blkback/vbd.c +++ b/drivers/xen/blkback/vbd.c @@ -30,8 +30,9 @@ #include "common.h" -#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ - (_v)->bdev->bd_part->nr_sects : get_capacity((_v)->bdev->bd_disk)) +#define vbd_sz(_v) ((_v)->bdev->bd_part ? \ + (_v)->bdev->bd_part->nr_sects : \ + get_capacity((_v)->bdev->bd_disk)) unsigned long long vbd_size(struct vbd *vbd) { @@ -40,7 +41,7 @@ unsigned long long vbd_size(struct vbd *vbd) unsigned int vbd_info(struct vbd *vbd) { - return vbd->type | (vbd->readonly?VDISK_READONLY:0); + return vbd->type | (vbd->readonly ? VDISK_READONLY : 0); } unsigned long vbd_secsize(struct vbd *vbd) @@ -126,7 +127,7 @@ void vbd_resize(struct blkif_st *blkif) printk(KERN_INFO "VBD Resize: Domid: %d, Device: (%d, %d)\n", blkif->domid, MAJOR(vbd->pdevice), MINOR(vbd->pdevice)); - printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size); + printk(KERN_INFO "VBD Resize: new size %llu\n", new_size); vbd->size = new_size; again: err = xenbus_transaction_start(&xbt); @@ -134,7 +135,7 @@ void vbd_resize(struct blkif_st *blkif) printk(KERN_WARNING "Error starting transaction"); return; } - err = xenbus_printf(xbt, dev->nodename, "sectors", "%Lu", + err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu", vbd_size(vbd)); if (err) { printk(KERN_WARNING "Error writing new size"); From e5f4b3c498623fc3d83f6d92e00a2b2dbf500cd0 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:27:29 -0400 Subject: [PATCH 0096/3380] xen/blkback: Fix interface.c checkpatch warnings .. except + sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring_area->addr; WARNING: line over 80 characters + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); as breaking them up really does not help that much. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/interface.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c index 7d59f13115cf..163aed41e825 100644 --- a/drivers/xen/blkback/interface.c +++ b/drivers/xen/blkback/interface.c @@ -86,7 +86,8 @@ static void unmap_frontend_page(struct blkif_st *blkif) BUG(); } -int blkif_map(struct blkif_st *blkif, unsigned long shared_page, unsigned int evtchn) +int blkif_map(struct blkif_st *blkif, unsigned long shared_page, + unsigned int evtchn) { int err; @@ -94,7 +95,8 @@ int blkif_map(struct blkif_st *blkif, unsigned long shared_page, unsigned int ev if (blkif->irq) return 0; - if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL ) + blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE); + if (!blkif->blk_ring_area) return -ENOMEM; err = map_frontend_page(blkif, shared_page); @@ -131,8 +133,7 @@ int blkif_map(struct blkif_st *blkif, unsigned long shared_page, unsigned int ev err = bind_interdomain_evtchn_to_irqhandler( blkif->domid, evtchn, blkif_be_int, 0, "blkif-backend", blkif); - if (err < 0) - { + if (err < 0) { unmap_frontend_page(blkif); free_vm_area(blkif->blk_ring_area); blkif->blk_rings.common.sring = NULL; From d6091b217dd4fdabc4a8cd6fa61775f1e3eb6efe Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:33:30 -0400 Subject: [PATCH 0097/3380] xen/blkback: Fix checkpatch warnings of xenbus.c Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/xenbus.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/xen/blkback/xenbus.c b/drivers/xen/blkback/xenbus.c index 67462c4e9ab4..b41ed65db2d3 100644 --- a/drivers/xen/blkback/xenbus.c +++ b/drivers/xen/blkback/xenbus.c @@ -25,10 +25,9 @@ #undef DPRINTK #define DPRINTK(fmt, args...) \ pr_debug("blkback/xenbus (%s:%d) " fmt ".\n", \ - __FUNCTION__, __LINE__, ##args) + __func__, __LINE__, ##args) -struct backend_info -{ +struct backend_info { struct xenbus_device *dev; struct blkif_st *blkif; struct xenbus_watch backend_watch; @@ -56,7 +55,8 @@ static int blkback_name(struct blkif_st *blkif, char *buf) if (IS_ERR(devpath)) return PTR_ERR(devpath); - if ((devname = strstr(devpath, "/dev/")) != NULL) + devname = strstr(devpath, "/dev/"); + if (devname != NULL) devname += strlen("/dev/"); else devname = devpath; @@ -153,7 +153,7 @@ int xenvbd_sysfs_addif(struct xenbus_device *dev) int error; error = device_create_file(&dev->dev, &dev_attr_physical_device); - if (error) + if (error) goto fail1; error = device_create_file(&dev->dev, &dev_attr_mode); @@ -327,7 +327,10 @@ static void backend_changed(struct xenbus_watch *watch, /* Front end dir is a number, which is used as the handle. */ char *p = strrchr(dev->otherend, '/') + 1; - long handle = simple_strtoul(p, NULL, 0); + long handle; + err = strict_strtoul(p, 0, &handle); + if (err) + return; be->major = major; be->minor = minor; @@ -369,7 +372,7 @@ static void frontend_changed(struct xenbus_device *dev, case XenbusStateInitialising: if (dev->state == XenbusStateClosed) { printk(KERN_INFO "%s: %s: prepare for reconnect\n", - __FUNCTION__, dev->nodename); + __func__, dev->nodename); xenbus_switch_state(dev, XenbusStateInitWait); } break; @@ -494,8 +497,8 @@ static int connect_ring(struct backend_info *be) DPRINTK("%s", dev->otherend); - err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", &ring_ref, - "event-channel", "%u", &evtchn, NULL); + err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", + &ring_ref, "event-channel", "%u", &evtchn, NULL); if (err) { xenbus_dev_fatal(dev, err, "reading %s/ring-ref and event-channel", From 2e9977c21f7679d5f616132ae1f7857e932ccd19 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:42:07 -0400 Subject: [PATCH 0098/3380] xen/blkback: Fix checkpatch warnings in blkback.c Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 81 ++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index d07ad5318a85..2d413930f235 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -63,8 +63,8 @@ module_param_named(reqs, blkif_reqs, int, 0); MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); /* Run-time switchable: /sys/module/blkback/parameters/ */ -static unsigned int log_stats = 0; -static unsigned int debug_lvl = 0; +static unsigned int log_stats; +static unsigned int debug_lvl; module_param(log_stats, int, 0644); module_param(debug_lvl, int, 0644); @@ -74,7 +74,7 @@ module_param(debug_lvl, int, 0644); * the pendcnt towards zero. When it hits zero, the specified domain has a * response queued for it, with the saved 'id' passed back. */ -typedef struct { +struct pending_req { struct blkif_st *blkif; u64 id; int nr_pages; @@ -82,12 +82,12 @@ typedef struct { unsigned short operation; int status; struct list_head free_list; -} pending_req_t; +}; #define BLKBACK_INVALID_HANDLE (~0) struct xen_blkbk { - pending_req_t *pending_reqs; + struct pending_req *pending_reqs; /* List of all 'pending_req' available */ struct list_head pending_free; /* And its spinlock. */ @@ -106,14 +106,15 @@ static struct xen_blkbk *blkbk; * pending_pages[..]. For each 'pending_req' we have have up to * BLKIF_MAX_SEGMENTS_PER_REQUEST (11) pages. The seg would be from 0 through * 10 and would index in the pending_pages[..]. */ -static inline int vaddr_pagenr(pending_req_t *req, int seg) +static inline int vaddr_pagenr(struct pending_req *req, int seg) { - return (req - blkbk->pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; + return (req - blkbk->pending_reqs) * + BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; } #define pending_page(req, seg) pending_pages[vaddr_pagenr(req, seg)] -static inline unsigned long vaddr(pending_req_t *req, int seg) +static inline unsigned long vaddr(struct pending_req *req, int seg) { unsigned long pfn = page_to_pfn(blkbk->pending_page(req, seg)); return (unsigned long)pfn_to_kaddr(pfn); @@ -126,21 +127,22 @@ static inline unsigned long vaddr(pending_req_t *req, int seg) static int do_block_io_op(struct blkif_st *blkif); static void dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, - pending_req_t *pending_req); + struct pending_req *pending_req); static void make_response(struct blkif_st *blkif, u64 id, unsigned short op, int st); /* * Retrieve from the 'pending_reqs' a free pending_req structure to be used. */ -static pending_req_t* alloc_req(void) +static struct pending_req *alloc_req(void) { - pending_req_t *req = NULL; + struct pending_req *req = NULL; unsigned long flags; spin_lock_irqsave(&blkbk->pending_free_lock, flags); if (!list_empty(&blkbk->pending_free)) { - req = list_entry(blkbk->pending_free.next, pending_req_t, free_list); + req = list_entry(blkbk->pending_free.next, struct pending_req, + free_list); list_del(&req->free_list); } spin_unlock_irqrestore(&blkbk->pending_free_lock, flags); @@ -151,7 +153,7 @@ static pending_req_t* alloc_req(void) * Return the 'pending_req' structure back to the freepool. We also * wake up the thread if it was waiting for a free page. */ -static void free_req(pending_req_t *req) +static void free_req(struct pending_req *req) { unsigned long flags; int was_empty; @@ -200,7 +202,7 @@ static void plug_queue(struct blkif_st *blkif, struct block_device *bdev) * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. */ -static void fast_flush_area(pending_req_t *req) +static void fast_flush_area(struct pending_req *req) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; @@ -221,7 +223,8 @@ static void fast_flush_area(pending_req_t *req) GNTTABOP_unmap_grant_ref, unmap, invcount); BUG_ON(ret); /* Note, we use invcount, so nr->pages, so we can't index - * using vaddr(req, i). */ + * using vaddr(req, i). + */ for (i = 0; i < invcount; i++) { ret = m2p_remove_override( virt_to_page(unmap[i].host_addr), false); @@ -233,7 +236,7 @@ static void fast_flush_area(pending_req_t *req) } } -/****************************************************************** +/* * SCHEDULER FUNCTIONS */ @@ -269,7 +272,8 @@ int blkif_schedule(void *arg) blkif->waiting_reqs || kthread_should_stop()); wait_event_interruptible( blkbk->pending_free_wq, - !list_empty(&blkbk->pending_free) || kthread_should_stop()); + !list_empty(&blkbk->pending_free) || + kthread_should_stop()); blkif->waiting_reqs = 0; smp_mb(); /* clear flag *before* checking for work */ @@ -297,7 +301,7 @@ int blkif_schedule(void *arg) * Completion callback on the bio's. Called as bh->b_end_io() */ -static void __end_block_io_op(pending_req_t *pending_req, int error) +static void __end_block_io_op(struct pending_req *pending_req, int error) { /* An error fails the entire request. */ if ((pending_req->operation == BLKIF_OP_WRITE_BARRIER) && @@ -313,7 +317,8 @@ static void __end_block_io_op(pending_req_t *pending_req, int error) /* If all of the bio's have completed it is time to unmap * the grant references associated with 'request' and provide - * the proper response on the ring. */ + * the proper response on the ring. + */ if (atomic_dec_and_test(&pending_req->pendcnt)) { fast_flush_area(pending_req); make_response(pending_req->blkif, pending_req->id, @@ -360,7 +365,7 @@ static int do_block_io_op(struct blkif_st *blkif) { union blkif_back_rings *blk_rings = &blkif->blk_rings; struct blkif_request req; - pending_req_t *pending_req; + struct pending_req *pending_req; RING_IDX rc, rp; int more_to_do = 0; @@ -440,7 +445,7 @@ static int do_block_io_op(struct blkif_st *blkif) */ static void dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, - pending_req_t *pending_req) + struct pending_req *pending_req) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct phys_req preq; @@ -487,7 +492,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, /* Fill out preq.nr_sects with proper amount of sectors, and setup * assign map[..] with the PFN of the page in our domain with the - * corresponding grant reference for each page.*/ + * corresponding grant reference for each page. + */ for (i = 0; i < nseg; i++) { uint32_t flags; @@ -509,8 +515,9 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, BUG_ON(ret); /* Now swizzel the MFN in our domain with the MFN from the other domain - * so that when we access vaddr(pending_req,i) it has the contents of the - * page from the other domain. */ + * so that when we access vaddr(pending_req,i) it has the contents of + * the page from the other domain. + */ for (i = 0; i < nseg; i++) { if (unlikely(map[i].status != 0)) { DPRINTK("invalid buffer -- could not remap it\n"); @@ -522,12 +529,13 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, if (ret) continue; - + ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), blkbk->pending_page(pending_req, i), false); if (ret) { printk(KERN_ALERT "Failed to install M2P override for"\ - " %lx (ret: %d)\n", (unsigned long)map[i].dev_bus_addr, ret); + " %lx (ret: %d)\n", (unsigned long) + map[i].dev_bus_addr, ret); /* We could switch over to GNTTABOP_copy */ continue; } @@ -536,9 +544,11 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, (req->u.rw.seg[i].first_sect << 9); } - /* If we have failed at this point, we need to undo the M2P override, set - * gnttab_set_unmap_op on all of the grant references and perform the - * hypercall to unmap the grants - that is all done in fast_flush_area. */ + /* If we have failed at this point, we need to undo the M2P override, + * set gnttab_set_unmap_op on all of the grant references and perform + * the hypercall to unmap the grants - that is all done in + * fast_flush_area. + */ if (ret) goto fail_flush; @@ -554,7 +564,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, plug_queue(blkif, preq.bdev); /* We set it one so that the last submit_bio does not have to call - * atomic_inc. */ + * atomic_inc. + */ atomic_set(&pending_req->pendcnt, 1); blkif_get(blkif); @@ -575,7 +586,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, atomic_inc(&pending_req->pendcnt); submit_bio(operation, bio); } - + bio = bio_alloc(GFP_KERNEL, nseg-i); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -694,7 +705,7 @@ static int __init blkif_init(void) if (!xen_pv_domain()) return -ENODEV; - blkbk = (struct xen_blkbk *)kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); + blkbk = kzalloc(sizeof(struct xen_blkbk), GFP_KERNEL); if (!blkbk) { printk(KERN_ALERT "%s: out of memory!\n", __func__); return -ENOMEM; @@ -709,7 +720,8 @@ static int __init blkif_init(void) blkbk->pending_pages = kzalloc(sizeof(blkbk->pending_pages[0]) * mmap_pages, GFP_KERNEL); - if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || !blkbk->pending_pages) { + if (!blkbk->pending_reqs || !blkbk->pending_grant_handles || + !blkbk->pending_pages) { rc = -ENOMEM; goto out_of_memory; } @@ -733,7 +745,8 @@ static int __init blkif_init(void) init_waitqueue_head(&blkbk->pending_free_wq); for (i = 0; i < blkif_reqs; i++) - list_add_tail(&blkbk->pending_reqs[i].free_list, &blkbk->pending_free); + list_add_tail(&blkbk->pending_reqs[i].free_list, + &blkbk->pending_free); rc = blkif_xenbus_init(); if (rc) From 0faa8cca883bbc6a0919e3c89128672659b75820 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Apr 2011 17:58:19 -0400 Subject: [PATCH 0099/3380] xen/blkback: remove per-queue plugging commit 7eaceaccab5f40bbfda044629a6298616aeaed50 ("block: remove per-queue plugging") added two new interfaces to plug and unplug: blk_start_plug and blk_finish_plug. Lets use those. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 44 +++++++---------------------------- drivers/xen/blkback/common.h | 1 - 2 files changed, 9 insertions(+), 36 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 2d413930f235..464f2e0b5a61 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -166,38 +166,6 @@ static void free_req(struct pending_req *req) wake_up(&blkbk->pending_free_wq); } -/* - * Give back a reference count on the underlaying storage. - * It is OK to make multiple calls in this function as it - * resets the plug to NULL when it is done on the first call. - */ -static void unplug_queue(struct blkif_st *blkif) -{ - if (blkif->plug == NULL) - return; - if (blkif->plug->unplug_fn) - blkif->plug->unplug_fn(blkif->plug); - blk_put_queue(blkif->plug); - blkif->plug = NULL; -} - -/* - * Take a reference count on the underlaying storage. - * It is OK to call this multiple times as we check to make sure - * not to double reference. We also give back a reference count - * if it corresponds to another queue. - */ -static void plug_queue(struct blkif_st *blkif, struct block_device *bdev) -{ - struct request_queue *q = bdev_get_queue(bdev); - - if (q == blkif->plug) - return; - unplug_queue(blkif); - blk_get_queue(q); - blkif->plug = q; -} - /* * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. @@ -280,7 +248,6 @@ int blkif_schedule(void *arg) if (do_block_io_op(blkif)) blkif->waiting_reqs = 1; - unplug_queue(blkif); if (log_stats && time_after(jiffies, blkif->st_print)) print_stats(blkif); @@ -456,6 +423,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, struct bio *bio = NULL; int ret, i; int operation; + struct blk_plug plug; + struct request_queue *q; switch (req->operation) { case BLKIF_OP_READ: @@ -561,7 +530,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, } /* Get a reference count for the disk queue and start sending I/O */ - plug_queue(blkif, preq.bdev); + blk_get_queue(q); + blk_start_plug(&plug); /* We set it one so that the last submit_bio does not have to call * atomic_inc. @@ -620,11 +590,14 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, else if (operation == WRITE || operation == WRITE_BARRIER) blkif->st_wr_sect += preq.nr_sects; + blk_finish_plug(&plug); + blk_put_queue(q); return; fail_flush: fast_flush_area(pending_req); fail_response: + /* Haven't submitted any bio's yet. */ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); free_req(pending_req); msleep(1); /* back off a bit */ @@ -634,7 +607,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, __end_block_io_op(pending_req, -EINVAL); if (bio) bio_put(bio); - unplug_queue(blkif); + blk_finish_plug(&plug); + blk_put_queue(q); msleep(1); /* back off a bit */ return; } diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h index be3fc93d8a31..6257c1106591 100644 --- a/drivers/xen/blkback/common.h +++ b/drivers/xen/blkback/common.h @@ -79,7 +79,6 @@ struct blkif_st { /* One thread per one blkif. */ struct task_struct *xenblkd; unsigned int waiting_reqs; - struct request_queue *plug; /* statistics */ unsigned long st_print; From ae038af12c2bc0859279a1a62b5f8cb0ef00f5f8 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Thu, 7 Apr 2011 15:28:47 +0300 Subject: [PATCH 0100/3380] OMAP: DSS2: DSI: fix use_sys_clk & highfreq use_sys_clk and highfreq fields in dsi.current_cinfo were never set. Luckily they weren't used anywhere so it didn't cause any problems. This patch fixes those fields and they are now set at the same time as the rest of the fields. Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/dss/dsi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c index 0a7f1a47f8e3..86041535f34a 100644 --- a/drivers/video/omap2/dss/dsi.c +++ b/drivers/video/omap2/dss/dsi.c @@ -1276,6 +1276,9 @@ int dsi_pll_set_clock_div(struct dsi_clock_info *cinfo) DSSDBGF(); + dsi.current_cinfo.use_sys_clk = cinfo->use_sys_clk; + dsi.current_cinfo.highfreq = cinfo->highfreq; + dsi.current_cinfo.fint = cinfo->fint; dsi.current_cinfo.clkin4ddr = cinfo->clkin4ddr; dsi.current_cinfo.dsi_pll_hsdiv_dispc_clk = From 4eb68edb7d21aff81765a065680270693c23fbfc Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Mon, 4 Apr 2011 10:02:53 +0300 Subject: [PATCH 0101/3380] OMAP: DSS2: DSI: fix dsi_dump_clocks() On OMAP4, reading DSI_PLL_CONFIGURATION2 register requires the L3 clock (CIO_CLK_ICG) to PLL. Currently dsi_dump_clocks() tries to read that register without enabling the L3 clock, leading to crash if DSI is not in use. The status of the bit being read from DSI_PLL_CONFIGURATION2 is available from dsi_clock_info->use_sys_clk, so we can avoid the whole problem by just using that. Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/dss/dsi.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c index 86041535f34a..1464ac4f6f58 100644 --- a/drivers/video/omap2/dss/dsi.c +++ b/drivers/video/omap2/dss/dsi.c @@ -1491,7 +1491,6 @@ void dsi_pll_uninit(void) void dsi_dump_clocks(struct seq_file *s) { - int clksel; struct dsi_clock_info *cinfo = &dsi.current_cinfo; enum dss_clk_source dispc_clk_src, dsi_clk_src; @@ -1500,13 +1499,10 @@ void dsi_dump_clocks(struct seq_file *s) enable_clocks(1); - clksel = REG_GET(DSI_PLL_CONFIGURATION2, 11, 11); - seq_printf(s, "- DSI PLL -\n"); seq_printf(s, "dsi pll source = %s\n", - clksel == 0 ? - "dss_sys_clk" : "pclkfree"); + cinfo->use_sys_clk ? "dss_sys_clk" : "pclkfree"); seq_printf(s, "Fint\t\t%-16luregn %u\n", cinfo->fint, cinfo->regn); From 6553b2105c8871dae8dfff244440e793f3a6bdb9 Mon Sep 17 00:00:00 2001 From: Archit Taneja Date: Thu, 31 Mar 2011 13:23:35 +0530 Subject: [PATCH 0102/3380] OMAP: DSS2: Fix: Return correct lcd clock source for OMAP2/3 dss.lcd_clk_source is set to the default value DSS_CLK_SRC_FCK at dss_init. For OMAP2 and OMAP3, the dss.lcd_clk_source should always be the same as dss.dispc_clk_source. The function dss_get_lcd_clk_source() always returns the default value DSS_CLK_SRC_FCK for OMAP2/3. This leads to wrong clock dumps when dispc_clk_source is not DSS_CLK_SRC_FCK. Correct this function to always return dss.dispc_clk_source for OMAP2/3. Signed-off-by: Archit Taneja Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/dss/dss.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/video/omap2/dss/dss.c b/drivers/video/omap2/dss/dss.c index 3f1fee63c678..c3b48a0fcf35 100644 --- a/drivers/video/omap2/dss/dss.c +++ b/drivers/video/omap2/dss/dss.c @@ -385,8 +385,14 @@ enum dss_clk_source dss_get_dsi_clk_source(void) enum dss_clk_source dss_get_lcd_clk_source(enum omap_channel channel) { - int ix = channel == OMAP_DSS_CHANNEL_LCD ? 0 : 1; - return dss.lcd_clk_source[ix]; + if (dss_has_feature(FEAT_LCD_CLK_SRC)) { + int ix = channel == OMAP_DSS_CHANNEL_LCD ? 0 : 1; + return dss.lcd_clk_source[ix]; + } else { + /* LCD_CLK source is the same as DISPC_FCLK source for + * OMAP2 and OMAP3 */ + return dss.dispc_clk_source; + } } /* calculate clock rates using dividers in cinfo */ From 0b41136c0d2f544b9b771643f849f82ed86f765e Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 15 Apr 2011 10:42:59 +0300 Subject: [PATCH 0103/3380] OMAP: DSS2: DSI: Fix DSI PLL power bug OMAP3630 has a HW bug causing DSI PLL power command POWER_ON_DIV (0x3) to not work properly. The bug prevents us from enabling DSI PLL power only to HS divider block. This patch adds a dss feature for the bug and converts POWER_ON_DIV requests to POWER_ON_ALL (0x2). Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/dss/dsi.c | 5 +++++ drivers/video/omap2/dss/dss_features.c | 2 +- drivers/video/omap2/dss/dss_features.h | 2 ++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c index 1464ac4f6f58..cbd9ca48d6ec 100644 --- a/drivers/video/omap2/dss/dsi.c +++ b/drivers/video/omap2/dss/dsi.c @@ -1059,6 +1059,11 @@ static int dsi_pll_power(enum dsi_pll_power_state state) { int t = 0; + /* DSI-PLL power command 0x3 is not working */ + if (dss_has_feature(FEAT_DSI_PLL_PWR_BUG) && + state == DSI_PLL_POWER_ON_DIV) + state = DSI_PLL_POWER_ON_ALL; + REG_FLD_MOD(DSI_CLK_CTRL, state, 31, 30); /* PLL_PWR_CMD */ /* PLL_PWR_STATUS */ diff --git a/drivers/video/omap2/dss/dss_features.c b/drivers/video/omap2/dss/dss_features.c index aa1622241d0d..8c50e18bc0b0 100644 --- a/drivers/video/omap2/dss/dss_features.c +++ b/drivers/video/omap2/dss/dss_features.c @@ -271,7 +271,7 @@ static struct omap_dss_features omap3630_dss_features = { FEAT_LCDENABLESIGNAL | FEAT_PCKFREEENABLE | FEAT_PRE_MULT_ALPHA | FEAT_FUNCGATED | FEAT_ROWREPEATENABLE | FEAT_LINEBUFFERSPLIT | - FEAT_RESIZECONF, + FEAT_RESIZECONF | FEAT_DSI_PLL_PWR_BUG, .num_mgrs = 2, .num_ovls = 3, diff --git a/drivers/video/omap2/dss/dss_features.h b/drivers/video/omap2/dss/dss_features.h index 12e9c4ef0dec..37922ce6b8b1 100644 --- a/drivers/video/omap2/dss/dss_features.h +++ b/drivers/video/omap2/dss/dss_features.h @@ -40,6 +40,8 @@ enum dss_feat_id { /* Independent core clk divider */ FEAT_CORE_CLK_DIV = 1 << 11, FEAT_LCD_CLK_SRC = 1 << 12, + /* DSI-PLL power command 0x3 is not working */ + FEAT_DSI_PLL_PWR_BUG = 1 << 13, }; /* DSS register field id */ From 0fd08060f1bb9d7d0d712f39257dc3574a632271 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 8 Apr 2011 09:30:27 +0300 Subject: [PATCH 0104/3380] OMAP: DSS2: fix panel Kconfig dependencies All DPI panels were missing dependency to OMAP2_DSS_DPI. Add the dependency. Signed-off-by: Tomi Valkeinen --- drivers/video/omap2/displays/Kconfig | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/video/omap2/displays/Kconfig b/drivers/video/omap2/displays/Kconfig index d18ad6b2372a..609a28073178 100644 --- a/drivers/video/omap2/displays/Kconfig +++ b/drivers/video/omap2/displays/Kconfig @@ -3,6 +3,7 @@ menu "OMAP2/3 Display Device Drivers" config PANEL_GENERIC_DPI tristate "Generic DPI Panel" + depends on OMAP2_DSS_DPI help Generic DPI panel driver. Supports DVI output for Beagle and OMAP3 SDP. @@ -11,20 +12,20 @@ config PANEL_GENERIC_DPI config PANEL_LGPHILIPS_LB035Q02 tristate "LG.Philips LB035Q02 LCD Panel" - depends on OMAP2_DSS && SPI + depends on OMAP2_DSS_DPI && SPI help LCD Panel used on the Gumstix Overo Palo35 config PANEL_SHARP_LS037V7DW01 tristate "Sharp LS037V7DW01 LCD Panel" - depends on OMAP2_DSS + depends on OMAP2_DSS_DPI select BACKLIGHT_CLASS_DEVICE help LCD Panel used in TI's SDP3430 and EVM boards config PANEL_NEC_NL8048HL11_01B tristate "NEC NL8048HL11-01B Panel" - depends on OMAP2_DSS + depends on OMAP2_DSS_DPI help This NEC NL8048HL11-01B panel is TFT LCD used in the Zoom2/3/3630 sdp boards. @@ -37,7 +38,7 @@ config PANEL_TAAL config PANEL_TPO_TD043MTEA1 tristate "TPO TD043MTEA1 LCD Panel" - depends on OMAP2_DSS && SPI + depends on OMAP2_DSS_DPI && SPI help LCD Panel used in OMAP3 Pandora From f094f8a1b2737a4f3ca46742ff9aaf460d39285e Mon Sep 17 00:00:00 2001 From: "Yann E. MORIN" Date: Thu, 24 Feb 2011 19:36:42 +0100 Subject: [PATCH 0105/3380] kconfig: allow multiple inclusion of the same file Allow 'source'ing the same file from multiple places (eg. from different files, and/or under different conditions). To avoid circular inclusion, scan the source-ancestry of the current file, and abort if already sourced in this branch. Regenerate the pre-parsed lex.zconf.c_shipped file. Signed-off-by: "Yann E. MORIN" Signed-off-by: Michal Marek --- scripts/kconfig/lex.zconf.c_shipped | 29 +++++++++++++++++++---------- scripts/kconfig/zconf.l | 29 +++++++++++++++++++---------- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/scripts/kconfig/lex.zconf.c_shipped b/scripts/kconfig/lex.zconf.c_shipped index 6eb039718259..f4b3b1a15e21 100644 --- a/scripts/kconfig/lex.zconf.c_shipped +++ b/scripts/kconfig/lex.zconf.c_shipped @@ -2368,6 +2368,7 @@ void zconf_initscan(const char *name) void zconf_nextfile(const char *name) { + struct file *iter; struct file *file = file_lookup(name); struct buffer *buf = malloc(sizeof(*buf)); memset(buf, 0, sizeof(*buf)); @@ -2383,16 +2384,24 @@ void zconf_nextfile(const char *name) buf->parent = current_buf; current_buf = buf; - if (file->flags & FILE_BUSY) { - printf("%s:%d: do not source '%s' from itself\n", - zconf_curname(), zconf_lineno(), name); - exit(1); - } - if (file->flags & FILE_SCANNED) { - printf("%s:%d: file '%s' is already sourced from '%s'\n", - zconf_curname(), zconf_lineno(), name, - file->parent->name); - exit(1); + for (iter = current_file->parent; iter; iter = iter->parent ) { + if (!strcmp(current_file->name,iter->name) ) { + printf("%s:%d: recursive inclusion detected. " + "Inclusion path:\n current file : '%s'\n", + zconf_curname(), zconf_lineno(), + zconf_curname()); + iter = current_file->parent; + while (iter && \ + strcmp(iter->name,current_file->name)) { + printf(" included from: '%s:%d'\n", + iter->name, iter->lineno-1); + iter = iter->parent; + } + if (iter) + printf(" included from: '%s:%d'\n", + iter->name, iter->lineno+1); + exit(1); + } } file->flags |= FILE_BUSY; file->lineno = 1; diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index 3dbaec185cc4..f23e3affa9b5 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -299,6 +299,7 @@ void zconf_initscan(const char *name) void zconf_nextfile(const char *name) { + struct file *iter; struct file *file = file_lookup(name); struct buffer *buf = malloc(sizeof(*buf)); memset(buf, 0, sizeof(*buf)); @@ -314,16 +315,24 @@ void zconf_nextfile(const char *name) buf->parent = current_buf; current_buf = buf; - if (file->flags & FILE_BUSY) { - printf("%s:%d: do not source '%s' from itself\n", - zconf_curname(), zconf_lineno(), name); - exit(1); - } - if (file->flags & FILE_SCANNED) { - printf("%s:%d: file '%s' is already sourced from '%s'\n", - zconf_curname(), zconf_lineno(), name, - file->parent->name); - exit(1); + for (iter = current_file->parent; iter; iter = iter->parent ) { + if (!strcmp(current_file->name,iter->name) ) { + printf("%s:%d: recursive inclusion detected. " + "Inclusion path:\n current file : '%s'\n", + zconf_curname(), zconf_lineno(), + zconf_curname()); + iter = current_file->parent; + while (iter && \ + strcmp(iter->name,current_file->name)) { + printf(" included from: '%s:%d'\n", + iter->name, iter->lineno-1); + iter = iter->parent; + } + if (iter) + printf(" included from: '%s:%d'\n", + iter->name, iter->lineno+1); + exit(1); + } } file->flags |= FILE_BUSY; file->lineno = 1; From 2b2112f617e8ca600ec24271c93bbd49aa2acce4 Mon Sep 17 00:00:00 2001 From: "Yann E. MORIN" Date: Thu, 24 Feb 2011 19:36:43 +0100 Subject: [PATCH 0106/3380] kconfig: get rid of unused flags Now that we detect recusrion of sourced files, get rid of now unused flags. Regenerate lex.zconf.c_shipped file. Signed-off-by: "Yann E. MORIN" Signed-off-by: Michal Marek --- scripts/kconfig/expr.h | 4 ---- scripts/kconfig/lex.zconf.c_shipped | 4 ---- scripts/kconfig/zconf.l | 4 ---- 3 files changed, 12 deletions(-) diff --git a/scripts/kconfig/expr.h b/scripts/kconfig/expr.h index 3d238db49764..16bfae2d3217 100644 --- a/scripts/kconfig/expr.h +++ b/scripts/kconfig/expr.h @@ -20,12 +20,8 @@ struct file { struct file *parent; const char *name; int lineno; - int flags; }; -#define FILE_BUSY 0x0001 -#define FILE_SCANNED 0x0002 - typedef enum tristate { no, mod, yes } tristate; diff --git a/scripts/kconfig/lex.zconf.c_shipped b/scripts/kconfig/lex.zconf.c_shipped index f4b3b1a15e21..d9182916f724 100644 --- a/scripts/kconfig/lex.zconf.c_shipped +++ b/scripts/kconfig/lex.zconf.c_shipped @@ -2363,7 +2363,6 @@ void zconf_initscan(const char *name) current_file = file_lookup(name); current_file->lineno = 1; - current_file->flags = FILE_BUSY; } void zconf_nextfile(const char *name) @@ -2403,7 +2402,6 @@ void zconf_nextfile(const char *name) exit(1); } } - file->flags |= FILE_BUSY; file->lineno = 1; file->parent = current_file; current_file = file; @@ -2413,8 +2411,6 @@ static void zconf_endfile(void) { struct buffer *parent; - current_file->flags |= FILE_SCANNED; - current_file->flags &= ~FILE_BUSY; current_file = current_file->parent; parent = current_buf->parent; diff --git a/scripts/kconfig/zconf.l b/scripts/kconfig/zconf.l index f23e3affa9b5..b22f884f9022 100644 --- a/scripts/kconfig/zconf.l +++ b/scripts/kconfig/zconf.l @@ -294,7 +294,6 @@ void zconf_initscan(const char *name) current_file = file_lookup(name); current_file->lineno = 1; - current_file->flags = FILE_BUSY; } void zconf_nextfile(const char *name) @@ -334,7 +333,6 @@ void zconf_nextfile(const char *name) exit(1); } } - file->flags |= FILE_BUSY; file->lineno = 1; file->parent = current_file; current_file = file; @@ -344,8 +342,6 @@ static void zconf_endfile(void) { struct buffer *parent; - current_file->flags |= FILE_SCANNED; - current_file->flags &= ~FILE_BUSY; current_file = current_file->parent; parent = current_buf->parent; From 71a83ec7da8910f374a1c82e96d2704aa45d9238 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 5 Apr 2011 13:24:57 +0300 Subject: [PATCH 0107/3380] Kconfig: improve KALLSYMS_ALL documentation Dumb users like myself are not able to grasp from the existing KALLSYMS_ALL documentation that this option is not what they need. Improve the help message and make it clearer that KALLSYMS is enough in the majority of use cases, and KALLSYMS_ALL should really be used very rarely. Signed-off-by: Artem Bityutskiy Signed-off-by: Michal Marek --- init/Kconfig | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 56240e724d9a..563065df29a5 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -968,12 +968,18 @@ config KALLSYMS_ALL bool "Include all symbols in kallsyms" depends on DEBUG_KERNEL && KALLSYMS help - Normally kallsyms only contains the symbols of functions, for nicer - OOPS messages. Some debuggers can use kallsyms for other - symbols too: say Y here to include all symbols, if you need them - and you don't care about adding 300k to the size of your kernel. + Normally kallsyms only contains the symbols of functions for nicer + OOPS messages and backtraces (i.e., symbols from the text and inittext + sections). This is sufficient for most cases. And only in very rare + cases (e.g., when a debugger is used) all symbols are required (e.g., + names of variables from the data sections, etc). - Say N. + This option makes sure that all symbols are loaded into the kernel + image (i.e., symbols from all sections) in cost of increased kernel + size (depending on the kernel configuration, it may be 300KiB or + something like this). + + Say N unless you really need all symbols. config KALLSYMS_EXTRA_PASS bool "Do an extra kallsyms pass" From 1e2795a1191bb5ff05e80d77feffd51ac875c06d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 5 Apr 2011 13:24:58 +0300 Subject: [PATCH 0108/3380] kbuild: move KALLSYMS_EXTRA_PASS from Kconfig to Makefile At the moment we have the CONFIG_KALLSYMS_EXTRA_PASS Kconfig switch, which users can enable or disable while configuring the kernel. This option is then used by 'make' to determine whether an extra kallsyms pass is needed or not. However, this approach is not nice and confusing, and this patch moves CONFIG_KALLSYMS_EXTRA_PASS from Kconfig to Makefile instead. The rationale is below. 1. CONFIG_KALLSYMS_EXTRA_PASS is really about the build time, not run-time. There is no real need for it to be in Kconfig. It is just an additional work-around which should be used only in rare cases, when someone breaks kallsyms, so Kbuild/Makefile is much better place for this option. 2. Grepping CONFIG_KALLSYMS_EXTRA_PASS shows that many defconfigs have it enabled, probably not because they try to work-around a kallsyms bug, but just because the Kconfig help text is confusing and does not really make it clear that this option should not be used unless except when kallsyms is broken. 3. And since many people have CONFIG_KALLSYMS_EXTRA_PASS enabled in their Kconfig, we do might fail to notice kallsyms bugs in time. E.g., many testers use "make allyesconfig" to test builds, which will enable CONFIG_KALLSYMS_EXTRA_PASS and kallsyms breakage will not be noticed. To address that, this patch: 1. Kills CONFIG_KALLSYMS_EXTRA_PASS 2. Changes Makefile so that people can use "make KALLSYMS_EXTRA_PASS=1" to enable the extra pass if needed. Additionally, they may define KALLSYMS_EXTRA_PASS as an environment variable. 3. By default KALLSYMS_EXTRA_PASS is disabled and if kallsyms has issues, "make" should print a warning and suggest using KALLSYMS_EXTRA_PASS Signed-off-by: Artem Bityutskiy [mmarek: Removed make help text, is not necessary] Signed-off-by: Michal Marek --- Makefile | 13 ++++++++----- init/Kconfig | 12 ------------ 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index ba7a55ccd890..c3bd316b16f4 100644 --- a/Makefile +++ b/Makefile @@ -797,15 +797,17 @@ ifdef CONFIG_KALLSYMS # o The correct .tmp_kallsyms2.o is linked into the final vmlinux. # o Verify that the System.map from vmlinux matches the map from # .tmp_vmlinux2, just in case we did not generate kallsyms correctly. -# o If CONFIG_KALLSYMS_EXTRA_PASS is set, do an extra pass using +# o If 'make KALLSYMS_EXTRA_PASS=1" was used, do an extra pass using # .tmp_vmlinux3 and .tmp_kallsyms3.o. This is only meant as a # temporary bypass to allow the kernel to be built while the # maintainers work out what went wrong with kallsyms. -ifdef CONFIG_KALLSYMS_EXTRA_PASS -last_kallsyms := 3 -else last_kallsyms := 2 + +ifdef KALLSYMS_EXTRA_PASS +ifneq ($(KALLSYMS_EXTRA_PASS),0) +last_kallsyms := 3 +endif endif kallsyms.o := .tmp_kallsyms$(last_kallsyms).o @@ -816,7 +818,8 @@ define verify_kallsyms $(cmd_sysmap) .tmp_vmlinux$(last_kallsyms) .tmp_System.map $(Q)cmp -s System.map .tmp_System.map || \ (echo Inconsistent kallsyms data; \ - echo Try setting CONFIG_KALLSYMS_EXTRA_PASS; \ + echo This is a bug - please report about it; \ + echo Try "make KALLSYMS_EXTRA_PASS=1" as a workaround; \ rm .tmp_kallsyms* ; /bin/false ) endef diff --git a/init/Kconfig b/init/Kconfig index 563065df29a5..0edda616640f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -981,18 +981,6 @@ config KALLSYMS_ALL Say N unless you really need all symbols. -config KALLSYMS_EXTRA_PASS - bool "Do an extra kallsyms pass" - depends on KALLSYMS - help - If kallsyms is not working correctly, the build will fail with - inconsistent kallsyms data. If that occurs, log a bug report and - turn on KALLSYMS_EXTRA_PASS which should result in a stable build. - Always say N here unless you find a bug in kallsyms, which must be - reported. KALLSYMS_EXTRA_PASS is only a temporary workaround while - you wait for kallsyms to be fixed. - - config HOTPLUG bool "Support for hot-pluggable devices" if EXPERT default y From 7708992616487c00d5ca8ed7612111180d8e1b68 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 15 Apr 2011 10:51:27 -0400 Subject: [PATCH 0109/3380] xen/blkback: Seperate the bio allocation and the bio submission. We seperate the bio allocation (bio_alloc) from the bio submission so that the error paths are much easier, and also so that the bio submission can be done in one tight loop. It also makes the plug/unplug calls much much easier. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 45 ++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 464f2e0b5a61..3c10499d61a7 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -421,7 +421,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int nseg; struct bio *bio = NULL; - int ret, i; + struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int ret, i, nbio = 0; int operation; struct blk_plug plug; struct request_queue *q; @@ -529,14 +530,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, goto fail_flush; } - /* Get a reference count for the disk queue and start sending I/O */ - blk_get_queue(q); - blk_start_plug(&plug); - - /* We set it one so that the last submit_bio does not have to call - * atomic_inc. - */ - atomic_set(&pending_req->pendcnt, 1); + /* This corresponding blkif_put is done in __end_block_io_op */ blkif_get(blkif); for (i = 0; i < nseg; i++) { @@ -552,12 +546,8 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, blkbk->pending_page(pending_req, i), seg[i].nsec << 9, seg[i].buf & ~PAGE_MASK) == 0)) { - if (bio) { - atomic_inc(&pending_req->pendcnt); - submit_bio(operation, bio); - } - bio = bio_alloc(GFP_KERNEL, nseg-i); + bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -573,7 +563,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, /* This will be hit if the operation was a barrier. */ if (!bio) { BUG_ON(operation != WRITE_BARRIER); - bio = bio_alloc(GFP_KERNEL, 0); + bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) goto fail_put_bio; @@ -583,15 +573,28 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, bio->bi_sector = -1; } - submit_bio(operation, bio); + + /* We set it one so that the last submit_bio does not have to call + * atomic_inc. + */ + atomic_set(&pending_req->pendcnt, nbio); + + /* Get a reference count for the disk queue and start sending I/O */ + blk_get_queue(q); + blk_start_plug(&plug); + + for (i = 0; i < nbio; i++) + submit_bio(operation, biolist[i]); + + blk_finish_plug(&plug); + /* Let the I/Os go.. */ + blk_put_queue(q); if (operation == READ) blkif->st_rd_sect += preq.nr_sects; else if (operation == WRITE || operation == WRITE_BARRIER) blkif->st_wr_sect += preq.nr_sects; - blk_finish_plug(&plug); - blk_put_queue(q); return; fail_flush: @@ -604,11 +607,9 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, return; fail_put_bio: + for (i = 0; i < (nbio-1); i++) + bio_put(biolist[i]); __end_block_io_op(pending_req, -EINVAL); - if (bio) - bio_put(bio); - blk_finish_plug(&plug); - blk_put_queue(q); msleep(1); /* back off a bit */ return; } From b0aef17924a06646403cae8eecf6c73219a63c19 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 15 Apr 2011 10:58:05 -0400 Subject: [PATCH 0110/3380] xen/blkback: Cleanup move the code a bit around. Moving it so that the code that 'fast_flush_area' code is close to the code that deals with it so that the reader won't lose focus. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 97 +++++++++++++++++------------------ 1 file changed, 47 insertions(+), 50 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 3c10499d61a7..f282463d7b5c 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -167,41 +167,18 @@ static void free_req(struct pending_req *req) } /* - * Unmap the grant references, and also remove the M2P over-rides - * used in the 'pending_req'. -*/ -static void fast_flush_area(struct pending_req *req) + * Notification from the guest OS. + */ +static void blkif_notify_work(struct blkif_st *blkif) { - struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - unsigned int i, invcount = 0; - grant_handle_t handle; - int ret; + blkif->waiting_reqs = 1; + wake_up(&blkif->wq); +} - for (i = 0; i < req->nr_pages; i++) { - handle = pending_handle(req, i); - if (handle == BLKBACK_INVALID_HANDLE) - continue; - gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), - GNTMAP_host_map, handle); - pending_handle(req, i) = BLKBACK_INVALID_HANDLE; - invcount++; - } - - ret = HYPERVISOR_grant_table_op( - GNTTABOP_unmap_grant_ref, unmap, invcount); - BUG_ON(ret); - /* Note, we use invcount, so nr->pages, so we can't index - * using vaddr(req, i). - */ - for (i = 0; i < invcount; i++) { - ret = m2p_remove_override( - virt_to_page(unmap[i].host_addr), false); - if (ret) { - printk(KERN_ALERT "Failed to remove M2P override for " \ - "%lx\n", (unsigned long)unmap[i].host_addr); - continue; - } - } +irqreturn_t blkif_be_int(int irq, void *dev_id) +{ + blkif_notify_work(dev_id); + return IRQ_HANDLED; } /* @@ -264,6 +241,43 @@ int blkif_schedule(void *arg) return 0; } +/* + * Unmap the grant references, and also remove the M2P over-rides + * used in the 'pending_req'. +*/ +static void fast_flush_area(struct pending_req *req) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int i, invcount = 0; + grant_handle_t handle; + int ret; + + for (i = 0; i < req->nr_pages; i++) { + handle = pending_handle(req, i); + if (handle == BLKBACK_INVALID_HANDLE) + continue; + gnttab_set_unmap_op(&unmap[invcount], vaddr(req, i), + GNTMAP_host_map, handle); + pending_handle(req, i) = BLKBACK_INVALID_HANDLE; + invcount++; + } + + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + BUG_ON(ret); + /* Note, we use invcount, so nr->pages, so we can't index + * using vaddr(req, i). + */ + for (i = 0; i < invcount; i++) { + ret = m2p_remove_override( + virt_to_page(unmap[i].host_addr), false); + if (ret) { + printk(KERN_ALERT "Failed to remove M2P override for " \ + "%lx\n", (unsigned long)unmap[i].host_addr); + continue; + } + } +} /* * Completion callback on the bio's. Called as bh->b_end_io() */ @@ -305,23 +319,6 @@ static void end_block_io_op(struct bio *bio, int error) } -/* - * Notification from the guest OS. - */ - -static void blkif_notify_work(struct blkif_st *blkif) -{ - blkif->waiting_reqs = 1; - wake_up(&blkif->wq); -} - -irqreturn_t blkif_be_int(int irq, void *dev_id) -{ - blkif_notify_work(dev_id); - return IRQ_HANDLED; -} - - /* * Function to copy the from the ring buffer the 'struct blkif_request' From 1a95fe6e42cefc52c62c471ad87d7fe8643231df Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 15 Apr 2011 11:35:13 -0400 Subject: [PATCH 0111/3380] xen/blkback: Shuffle code around (vbd_translate moved higher). We take out the chunk of code dealing with mapping to the guest of pages into the xen_blk_map_buf code. And we also move the vbd_translate to be done much earlier. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 131 ++++++++++++++++++---------------- 1 file changed, 71 insertions(+), 60 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index f282463d7b5c..211b2005f963 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -241,6 +241,10 @@ int blkif_schedule(void *arg) return 0; } +struct seg_buf { + unsigned long buf; + unsigned int nsec; +}; /* * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. @@ -278,6 +282,62 @@ static void fast_flush_area(struct pending_req *req) } } } +static int xen_blk_map_buf(struct blkif_request *req, struct pending_req *pending_req, + struct seg_buf seg[]) +{ + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int i; + int nseg = req->nr_segments; + int ret = 0; + /* Fill out preq.nr_sects with proper amount of sectors, and setup + * assign map[..] with the PFN of the page in our domain with the + * corresponding grant reference for each page. + */ + for (i = 0; i < nseg; i++) { + uint32_t flags; + + flags = GNTMAP_host_map; + if (pending_req->operation != BLKIF_OP_READ) + flags |= GNTMAP_readonly; + gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, + req->u.rw.seg[i].gref, pending_req->blkif->domid); + } + + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); + BUG_ON(ret); + + /* Now swizzel the MFN in our domain with the MFN from the other domain + * so that when we access vaddr(pending_req,i) it has the contents of + * the page from the other domain. + */ + for (i = 0; i < nseg; i++) { + if (unlikely(map[i].status != 0)) { + DPRINTK("invalid buffer -- could not remap it\n"); + map[i].handle = BLKBACK_INVALID_HANDLE; + ret |= 1; + } + + pending_handle(pending_req, i) = map[i].handle; + + if (ret) + continue; + + ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), + blkbk->pending_page(pending_req, i), false); + if (ret) { + printk(KERN_ALERT "Failed to install M2P override for"\ + " %lx (ret: %d)\n", (unsigned long) + map[i].dev_bus_addr, ret); + /* We could switch over to GNTTABOP_copy */ + continue; + } + + seg[i].buf = map[i].dev_bus_addr | + (req->u.rw.seg[i].first_sect << 9); + } + return ret; +} + /* * Completion callback on the bio's. Called as bh->b_end_io() */ @@ -411,15 +471,12 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, struct blkif_request *req, struct pending_req *pending_req) { - struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct phys_req preq; - struct { - unsigned long buf; unsigned int nsec; - } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct seg_buf seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int nseg; struct bio *bio = NULL; struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; - int ret, i, nbio = 0; + int i, nbio = 0; int operation; struct blk_plug plug; struct request_queue *q; @@ -444,6 +501,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, if (unlikely(nseg == 0 && operation != WRITE_BARRIER) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { DPRINTK("Bad number of segments in request (%d)\n", nseg); + /* Haven't submitted any bio's yet. */ goto fail_response; } @@ -456,76 +514,29 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, pending_req->operation = req->operation; pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; - - /* Fill out preq.nr_sects with proper amount of sectors, and setup - * assign map[..] with the PFN of the page in our domain with the - * corresponding grant reference for each page. - */ for (i = 0; i < nseg; i++) { - uint32_t flags; - seg[i].nsec = req->u.rw.seg[i].last_sect - req->u.rw.seg[i].first_sect + 1; if ((req->u.rw.seg[i].last_sect >= (PAGE_SIZE >> 9)) || (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) goto fail_response; preq.nr_sects += seg[i].nsec; - - flags = GNTMAP_host_map; - if (operation != READ) - flags |= GNTMAP_readonly; - gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags, - req->u.rw.seg[i].gref, blkif->domid); } - ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); - BUG_ON(ret); - - /* Now swizzel the MFN in our domain with the MFN from the other domain - * so that when we access vaddr(pending_req,i) it has the contents of - * the page from the other domain. - */ - for (i = 0; i < nseg; i++) { - if (unlikely(map[i].status != 0)) { - DPRINTK("invalid buffer -- could not remap it\n"); - map[i].handle = BLKBACK_INVALID_HANDLE; - ret |= 1; - } - - pending_handle(pending_req, i) = map[i].handle; - - if (ret) - continue; - - ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr), - blkbk->pending_page(pending_req, i), false); - if (ret) { - printk(KERN_ALERT "Failed to install M2P override for"\ - " %lx (ret: %d)\n", (unsigned long) - map[i].dev_bus_addr, ret); - /* We could switch over to GNTTABOP_copy */ - continue; - } - - seg[i].buf = map[i].dev_bus_addr | - (req->u.rw.seg[i].first_sect << 9); - } - - /* If we have failed at this point, we need to undo the M2P override, - * set gnttab_set_unmap_op on all of the grant references and perform - * the hypercall to unmap the grants - that is all done in - * fast_flush_area. - */ - if (ret) - goto fail_flush; - if (vbd_translate(&preq, blkif, operation) != 0) { DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", operation == READ ? "read" : "write", preq.sector_number, preq.sector_number + preq.nr_sects, preq.dev); - goto fail_flush; + goto fail_response; } + /* If we have failed at this point, we need to undo the M2P override, + * set gnttab_set_unmap_op on all of the grant references and perform + * the hypercall to unmap the grants - that is all done in + * fast_flush_area. + */ + if (xen_blk_map_buf(req, pending_req, seg)) + goto fail_flush; /* This corresponding blkif_put is done in __end_block_io_op */ blkif_get(blkif); From 976222e05ea5a9959ccf880d7a24efbf79b3c6cf Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 15 Apr 2011 11:38:29 -0400 Subject: [PATCH 0112/3380] xen/blkback: Move the check for misaligned I/O higher. We move it up higher to be in same loop that actually computes the sector number. This way, all of the code that deals with verifying that the request is correct is all done before we do any of the page mapping, I/O submission, etc. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 211b2005f963..9598e0fd0f9e 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -521,6 +521,13 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, (req->u.rw.seg[i].last_sect < req->u.rw.seg[i].first_sect)) goto fail_response; preq.nr_sects += seg[i].nsec; + + if (((int)preq.sector_number|(int)seg[i].nsec) & + ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { + DPRINTK("Misaligned I/O request from domain %d", + blkif->domid); + goto fail_response; + } } if (vbd_translate(&preq, blkif, operation) != 0) { @@ -542,13 +549,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, blkif_get(blkif); for (i = 0; i < nseg; i++) { - if (((int)preq.sector_number|(int)seg[i].nsec) & - ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { - DPRINTK("Misaligned I/O request from domain %d", - blkif->domid); - goto fail_put_bio; - } - while ((bio == NULL) || (bio_add_page(bio, blkbk->pending_page(pending_req, i), From 9f3aedf573dd034d59e7eb6c4ee97648d5be8fc6 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 15 Apr 2011 11:50:34 -0400 Subject: [PATCH 0113/3380] xen/blkback: Change fast_flush_area to xen_blkbk_unmap, and tweak xen_blk_map_seg. The previous name ('fast_flush_area') had nothing to do with what it does right now. Changing the names so that the code dealing with mapping pages in and out of the guest is called xen_blkbk_[map|unmap]. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 9598e0fd0f9e..c645c83f900b 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -249,7 +249,7 @@ struct seg_buf { * Unmap the grant references, and also remove the M2P over-rides * used in the 'pending_req'. */ -static void fast_flush_area(struct pending_req *req) +static void xen_blkbk_unmap(struct pending_req *req) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; @@ -282,8 +282,8 @@ static void fast_flush_area(struct pending_req *req) } } } -static int xen_blk_map_buf(struct blkif_request *req, struct pending_req *pending_req, - struct seg_buf seg[]) +static int xen_blkbk_map(struct blkif_request *req, struct pending_req *pending_req, + struct seg_buf seg[]) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int i; @@ -361,7 +361,7 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) * the proper response on the ring. */ if (atomic_dec_and_test(&pending_req->pendcnt)) { - fast_flush_area(pending_req); + xen_blkbk_unmap(pending_req); make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); blkif_put(pending_req->blkif); @@ -540,9 +540,9 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, /* If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in - * fast_flush_area. + * xen_blkbk_unmap. */ - if (xen_blk_map_buf(req, pending_req, seg)) + if (xen_blkbk_map(req, pending_req, seg)) goto fail_flush; /* This corresponding blkif_put is done in __end_block_io_op */ @@ -606,7 +606,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, return; fail_flush: - fast_flush_area(pending_req); + xen_blkbk_unmap(pending_req); fail_response: /* Haven't submitted any bio's yet. */ make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); From 26626f1172fb4f3f323239a6a5cf4e082643fa46 Mon Sep 17 00:00:00 2001 From: Yang Ruirui Date: Sat, 16 Apr 2011 19:17:48 -0400 Subject: [PATCH 0114/3380] ext4: release page cache in ext4_mb_load_buddy error path Add missing page_cache_release in the error path of ext4_mb_load_buddy Signed-off-by: Yang Ruirui Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/mballoc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d8a16eecf1d5..15bfa44abd29 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -1273,6 +1273,8 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, return 0; err: + if (page) + page_cache_release(page); if (e4b->bd_bitmap_page) page_cache_release(e4b->bd_bitmap_page); if (e4b->bd_buddy_page) From a7f800131f35925299860a95259453c9bc0c272f Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Thu, 14 Apr 2011 17:13:22 +0900 Subject: [PATCH 0115/3380] ARM: mach-shmobile: clock-sh7372: remove status check from fsidiv_recalc clock status check is not needed in recalc function. clk->rate will be 0 in clk_set_rate without this patch. Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/clock-sh7372.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index e9731b5a73ed..6c79b4019667 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -421,9 +421,6 @@ static unsigned long fsidiv_recalc(struct clk *clk) value = __raw_readl(clk->mapping->base); - if ((value & 0x3) != 0x3) - return 0; - value >>= 16; if (value < 2) return 0; From c33724a43875786719f51916311308f2752d846e Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Tue, 28 Apr 2009 15:05:20 +0200 Subject: [PATCH 0116/3380] kconfig: Do not record timestamp in auto.conf and autoconf.h Timestamps in file data are useless and there is already one in .config Signed-off-by: Michal Marek --- scripts/kconfig/confdata.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 61c35bf2d9cb..834eecb010ba 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -784,7 +784,6 @@ int conf_write_autoconf(void) const char *str; const char *name; FILE *out, *tristate, *out_h; - time_t now; int i; sym_clear_all_valid(); @@ -811,22 +810,19 @@ int conf_write_autoconf(void) return 1; } - time(&now); fprintf(out, "#\n" "# Automatically generated make config: don't edit\n" "# %s\n" - "# %s" "#\n", - rootmenu.prompt->text, ctime(&now)); + rootmenu.prompt->text); fprintf(tristate, "#\n" "# Automatically generated - do not edit\n" "\n"); fprintf(out_h, "/*\n" " * Automatically generated C config: don't edit\n" " * %s\n" - " * %s" " */\n", - rootmenu.prompt->text, ctime(&now)); + rootmenu.prompt->text); for_all_symbols(i, sym) { sym_calc_value(sym); From 6ae9ecb86188cc8419024cdb299f18d4ae4f5713 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Thu, 31 Mar 2011 15:47:55 +0200 Subject: [PATCH 0117/3380] kbuild: Call gzip with -n The timestamps recorded in the .gz files add no value. Signed-off-by: Michal Marek --- scripts/Makefile.lib | 2 +- scripts/gen_initramfs_list.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 1c702ca8aac8..93b2b5938a2e 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -197,7 +197,7 @@ cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@ # --------------------------------------------------------------------------- quiet_cmd_gzip = GZIP $@ -cmd_gzip = (cat $(filter-out FORCE,$^) | gzip -f -9 > $@) || \ +cmd_gzip = (cat $(filter-out FORCE,$^) | gzip -n -f -9 > $@) || \ (rm -f $@ ; false) # DTC diff --git a/scripts/gen_initramfs_list.sh b/scripts/gen_initramfs_list.sh index 55caecdad995..4a43fe12d179 100644 --- a/scripts/gen_initramfs_list.sh +++ b/scripts/gen_initramfs_list.sh @@ -226,7 +226,7 @@ cpio_list= output="/dev/stdout" output_file="" is_cpio_compressed= -compr="gzip -9 -f" +compr="gzip -n -9 -f" arg="$1" case "$arg" in @@ -240,7 +240,7 @@ case "$arg" in output_file="$1" cpio_list="$(mktemp ${TMPDIR:-/tmp}/cpiolist.XXXXXX)" output=${cpio_list} - echo "$output_file" | grep -q "\.gz$" && compr="gzip -9 -f" + echo "$output_file" | grep -q "\.gz$" && compr="gzip -n -9 -f" echo "$output_file" | grep -q "\.bz2$" && compr="bzip2 -9 -f" echo "$output_file" | grep -q "\.lzma$" && compr="lzma -9 -f" echo "$output_file" | grep -q "\.xz$" && \ From 09ff9fecc039d60fff6c11d47522af61e89fff56 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Thu, 31 Mar 2011 16:09:47 +0200 Subject: [PATCH 0118/3380] kbuild: Use the deterministic mode of ar Signed-off-by: Michal Marek --- scripts/Makefile.build | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index d5f925abe4d2..5f87d3709f9f 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -345,7 +345,7 @@ quiet_cmd_link_o_target = LD $@ cmd_link_o_target = $(if $(strip $(obj-y)),\ $(LD) $(ld_flags) -r -o $@ $(filter $(obj-y), $^) \ $(cmd_secanalysis),\ - rm -f $@; $(AR) rcs $@) + rm -f $@; $(AR) rcsD $@) $(builtin-target): $(obj-y) FORCE $(call if_changed,link_o_target) @@ -371,7 +371,7 @@ $(modorder-target): $(subdir-ym) FORCE # ifdef lib-target quiet_cmd_link_l_target = AR $@ -cmd_link_l_target = rm -f $@; $(AR) rcs $@ $(lib-y) +cmd_link_l_target = rm -f $@; $(AR) rcsD $@ $(lib-y) $(lib-target): $(lib-y) FORCE $(call if_changed,link_l_target) From 061296dc2c14f852604fc6849669fe0b78bb1eda Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Thu, 31 Mar 2011 17:13:55 +0200 Subject: [PATCH 0119/3380] kbuild: Drop unused LINUX_COMPILE_TIME and LINUX_COMPILE_DOMAIN macros Signed-off-by: Michal Marek --- scripts/mkcompile_h | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h index 50ad317a4bf9..82416a81df5e 100755 --- a/scripts/mkcompile_h +++ b/scripts/mkcompile_h @@ -63,21 +63,9 @@ UTS_TRUNCATE="cut -b -$UTS_LEN" echo \#define UTS_VERSION \"`echo $UTS_VERSION | $UTS_TRUNCATE`\" - echo \#define LINUX_COMPILE_TIME \"`date +%T`\" echo \#define LINUX_COMPILE_BY \"`whoami`\" echo \#define LINUX_COMPILE_HOST \"`hostname | $UTS_TRUNCATE`\" - domain=`dnsdomainname 2> /dev/null` - if [ -z "$domain" ]; then - domain=`domainname 2> /dev/null` - fi - - if [ -n "$domain" ]; then - echo \#define LINUX_COMPILE_DOMAIN \"`echo $domain | $UTS_TRUNCATE`\" - else - echo \#define LINUX_COMPILE_DOMAIN - fi - echo \#define LINUX_COMPILER \"`$CC -v 2>&1 | tail -n 1`\" ) > .tmpcompile @@ -91,8 +79,8 @@ UTS_TRUNCATE="cut -b -$UTS_LEN" # first line. if [ -r $TARGET ] && \ - grep -v 'UTS_VERSION\|LINUX_COMPILE_TIME' $TARGET > .tmpver.1 && \ - grep -v 'UTS_VERSION\|LINUX_COMPILE_TIME' .tmpcompile > .tmpver.2 && \ + grep -v 'UTS_VERSION' $TARGET > .tmpver.1 && \ + grep -v 'UTS_VERSION' .tmpcompile > .tmpver.2 && \ cmp -s .tmpver.1 .tmpver.2; then rm -f .tmpcompile else From 53e6892c0411006848882eacfcfea9e93681b55d Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Tue, 5 Apr 2011 14:32:30 +0200 Subject: [PATCH 0120/3380] kbuild: Allow to override LINUX_COMPILE_BY and LINUX_COMPILE_HOST macros Make it possible to override the user@host string displayed during boot and in /proc/version by the environment variables KBUILD_BUILD_USER and KBUILD_BUILD_HOST. Several distributions patch scripts/mkcompile_h to achieve this, so let's provide an official way. Also, document the KBUILD_BUILD_TIMESTAMP variable while at it. Signed-off-by: Michal Marek --- Documentation/kbuild/kbuild.txt | 12 ++++++++++++ scripts/mkcompile_h | 14 ++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt index f1431d099fce..f11ebb33e4a6 100644 --- a/Documentation/kbuild/kbuild.txt +++ b/Documentation/kbuild/kbuild.txt @@ -201,3 +201,15 @@ KBUILD_ENABLE_EXTRA_GCC_CHECKS -------------------------------------------------- If enabled over the make command line with "W=1", it turns on additional gcc -W... options for more extensive build-time checking. + +KBUILD_BUILD_TIMESTAMP +-------------------------------------------------- +Setting this to a date string overrides the timestamp used in the +UTS_VERSION definition (uname -v in the running kernel). The default value +is the output of the date command at one point during build. + +KBUILD_BUILD_USER, KBUILD_BUILD_HOST +-------------------------------------------------- +These two variables allow to override the user@host string displayed during +boot and in /proc/version. The default value is the output of the commands +whoami and host, respectively. diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h index 82416a81df5e..7ad6bf7a09ff 100755 --- a/scripts/mkcompile_h +++ b/scripts/mkcompile_h @@ -42,6 +42,16 @@ if [ -z "$KBUILD_BUILD_TIMESTAMP" ]; then else TIMESTAMP=$KBUILD_BUILD_TIMESTAMP fi +if test -z "$KBUILD_BUILD_USER"; then + LINUX_COMPILE_BY=`whoami` +else + LINUX_COMPILE_BY=$KBUILD_BUILD_USER +fi +if test -z "$KBUILD_BUILD_HOST"; then + LINUX_COMPILE_HOST=`hostname` +else + LINUX_COMPILE_HOST=$KBUILD_BUILD_HOST +fi UTS_VERSION="#$VERSION" CONFIG_FLAGS="" @@ -63,8 +73,8 @@ UTS_TRUNCATE="cut -b -$UTS_LEN" echo \#define UTS_VERSION \"`echo $UTS_VERSION | $UTS_TRUNCATE`\" - echo \#define LINUX_COMPILE_BY \"`whoami`\" - echo \#define LINUX_COMPILE_HOST \"`hostname | $UTS_TRUNCATE`\" + echo \#define LINUX_COMPILE_BY \"`echo $LINUX_COMPILE_BY | $UTS_TRUNCATE`\" + echo \#define LINUX_COMPILE_HOST \"`echo $LINUX_COMPILE_HOST | $UTS_TRUNCATE`\" echo \#define LINUX_COMPILER \"`$CC -v 2>&1 | tail -n 1`\" ) > .tmpcompile From a8b8017c34fefcb763d8b06c294b58d1c480b2e4 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Thu, 31 Mar 2011 23:16:42 +0200 Subject: [PATCH 0121/3380] initramfs: Use KBUILD_BUILD_TIMESTAMP for generated entries gen_init_cpio gets the current time and uses it for each symlink, special file, and directory. Grab the current time once and make it possible to override it with the KBUILD_BUILD_TIMESTAMP variable for reproducible builds. Signed-off-by: Michal Marek --- Documentation/kbuild/kbuild.txt | 3 +- scripts/gen_initramfs_list.sh | 9 +++++- usr/gen_init_cpio.c | 53 +++++++++++++++++++++++++-------- 3 files changed, 50 insertions(+), 15 deletions(-) diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt index f11ebb33e4a6..646e2c114fff 100644 --- a/Documentation/kbuild/kbuild.txt +++ b/Documentation/kbuild/kbuild.txt @@ -205,7 +205,8 @@ gcc -W... options for more extensive build-time checking. KBUILD_BUILD_TIMESTAMP -------------------------------------------------- Setting this to a date string overrides the timestamp used in the -UTS_VERSION definition (uname -v in the running kernel). The default value +UTS_VERSION definition (uname -v in the running kernel). The value has to +be a string that can be passed to date -d. The default value is the output of the date command at one point during build. KBUILD_BUILD_USER, KBUILD_BUILD_HOST diff --git a/scripts/gen_initramfs_list.sh b/scripts/gen_initramfs_list.sh index 4a43fe12d179..d44cf675bc22 100644 --- a/scripts/gen_initramfs_list.sh +++ b/scripts/gen_initramfs_list.sh @@ -287,8 +287,15 @@ done # we are carefull to delete tmp files if [ ! -z ${output_file} ]; then if [ -z ${cpio_file} ]; then + timestamp= + if test -n "$KBUILD_BUILD_TIMESTAMP"; then + timestamp="$(date -d"$KBUILD_BUILD_TIMESTAMP" +%s || :)" + if test -n "$timestamp"; then + timestamp="-t $timestamp" + fi + fi cpio_tfile="$(mktemp ${TMPDIR:-/tmp}/cpiofile.XXXXXX)" - usr/gen_init_cpio ${cpio_list} > ${cpio_tfile} + usr/gen_init_cpio $timestamp ${cpio_list} > ${cpio_tfile} else cpio_tfile=${cpio_file} fi diff --git a/usr/gen_init_cpio.c b/usr/gen_init_cpio.c index 7f06884ecd41..af0f22fb1ef7 100644 --- a/usr/gen_init_cpio.c +++ b/usr/gen_init_cpio.c @@ -22,6 +22,7 @@ static unsigned int offset; static unsigned int ino = 721; +static time_t default_mtime; struct file_handler { const char *type; @@ -102,7 +103,6 @@ static int cpio_mkslink(const char *name, const char *target, unsigned int mode, uid_t uid, gid_t gid) { char s[256]; - time_t mtime = time(NULL); if (name[0] == '/') name++; @@ -114,7 +114,7 @@ static int cpio_mkslink(const char *name, const char *target, (long) uid, /* uid */ (long) gid, /* gid */ 1, /* nlink */ - (long) mtime, /* mtime */ + (long) default_mtime, /* mtime */ (unsigned)strlen(target)+1, /* filesize */ 3, /* major */ 1, /* minor */ @@ -152,7 +152,6 @@ static int cpio_mkgeneric(const char *name, unsigned int mode, uid_t uid, gid_t gid) { char s[256]; - time_t mtime = time(NULL); if (name[0] == '/') name++; @@ -164,7 +163,7 @@ static int cpio_mkgeneric(const char *name, unsigned int mode, (long) uid, /* uid */ (long) gid, /* gid */ 2, /* nlink */ - (long) mtime, /* mtime */ + (long) default_mtime, /* mtime */ 0, /* filesize */ 3, /* major */ 1, /* minor */ @@ -242,7 +241,6 @@ static int cpio_mknod(const char *name, unsigned int mode, unsigned int maj, unsigned int min) { char s[256]; - time_t mtime = time(NULL); if (dev_type == 'b') mode |= S_IFBLK; @@ -259,7 +257,7 @@ static int cpio_mknod(const char *name, unsigned int mode, (long) uid, /* uid */ (long) gid, /* gid */ 1, /* nlink */ - (long) mtime, /* mtime */ + (long) default_mtime, /* mtime */ 0, /* filesize */ 3, /* major */ 1, /* minor */ @@ -460,7 +458,7 @@ static int cpio_mkfile_line(const char *line) static void usage(const char *prog) { fprintf(stderr, "Usage:\n" - "\t%s \n" + "\t%s [-t ] \n" "\n" " is a file containing newline separated entries that\n" "describe the files to be included in the initramfs archive:\n" @@ -491,7 +489,11 @@ static void usage(const char *prog) "nod /dev/console 0600 0 0 c 5 1\n" "dir /root 0700 0 0\n" "dir /sbin 0755 0 0\n" - "file /sbin/kinit /usr/src/klibc/kinit/kinit 0755 0 0\n", + "file /sbin/kinit /usr/src/klibc/kinit/kinit 0755 0 0\n" + "\n" + " is time in seconds since Epoch that will be used\n" + "as mtime for symlinks, special files and directories. The default\n" + "is to use the current time for these entries.\n", prog); } @@ -529,17 +531,42 @@ int main (int argc, char *argv[]) char *args, *type; int ec = 0; int line_nr = 0; + const char *filename; - if (2 != argc) { + default_mtime = time(NULL); + while (1) { + int opt = getopt(argc, argv, "t:h"); + char *invalid; + + if (opt == -1) + break; + switch (opt) { + case 't': + default_mtime = strtol(optarg, &invalid, 10); + if (!*optarg || *invalid) { + fprintf(stderr, "Invalid timestamp: %s\n", + optarg); + usage(argv[0]); + exit(1); + } + break; + case 'h': + case '?': + usage(argv[0]); + exit(opt == 'h' ? 0 : 1); + } + } + + if (argc - optind != 1) { usage(argv[0]); exit(1); } - - if (!strcmp(argv[1], "-")) + filename = argv[optind]; + if (!strcmp(filename, "-")) cpio_list = stdin; - else if (! (cpio_list = fopen(argv[1], "r"))) { + else if (!(cpio_list = fopen(filename, "r"))) { fprintf(stderr, "ERROR: unable to open '%s': %s\n\n", - argv[1], strerror(errno)); + filename, strerror(errno)); usage(argv[0]); exit(1); } From e93504933ee6982bdc005fa5c24e1ea330faaf8b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Apr 2011 11:34:55 -0400 Subject: [PATCH 0122/3380] xen/blkback: Move the check for misaligned I/O once more. The commit 976222e05ea5a9959ccf880d7a24efbf79b3c6cf xen/blkback: Move the check for misaligned I/O higher. moved it a bit to high. The preq->vbdev was not set, so the check for misaligned I/O would cause a NULL pointer derefence. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index c645c83f900b..a0d3227955c9 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -514,6 +514,7 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, pending_req->operation = req->operation; pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; + for (i = 0; i < nseg; i++) { seg[i].nsec = req->u.rw.seg[i].last_sect - req->u.rw.seg[i].first_sect + 1; @@ -522,12 +523,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, goto fail_response; preq.nr_sects += seg[i].nsec; - if (((int)preq.sector_number|(int)seg[i].nsec) & - ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { - DPRINTK("Misaligned I/O request from domain %d", - blkif->domid); - goto fail_response; - } } if (vbd_translate(&preq, blkif, operation) != 0) { @@ -537,6 +532,16 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, preq.sector_number + preq.nr_sects, preq.dev); goto fail_response; } + /* This check _MUST_ be done after vbd_translate as the preq.bdev + * is set there. */ + for (i = 0; i < nseg; i++) { + if (((int)preq.sector_number|(int)seg[i].nsec) & + ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) { + DPRINTK("Misaligned I/O request from domain %d", + blkif->domid); + goto fail_response; + } + } /* If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in From 6fd17b5643bf05c29fc226a5aee96328056fca10 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Apr 2011 12:04:17 -0400 Subject: [PATCH 0123/3380] xen/blkback: Get the 'requeust_queue' properly. After the commit 0faa8cca883bbc6a0919e3c89128672659b75820 (" xen/blkback: remove per-queue plugging") we forgot to retrieve the 'struct request_queue' from the block device. This puts the functionality back in and fixes a NULL pointer bug. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/blkback/blkback.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index a0d3227955c9..3751325bfc32 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -542,6 +542,9 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, goto fail_response; } } + q = bdev_get_queue(preq.bdev); + if (!q) + goto fail_response; /* If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in From d2436eda2e81f1993bfe6349f17f52503bffeff5 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Apr 2011 14:17:49 -0400 Subject: [PATCH 0124/3380] block, xen/blkback: remove blk_[get|put]_queue calls. They were used to check if the queue does not have QUEUE_FLAG_DEAD set. That is not necessary anymore as the 'submit_io' call ends up doing that for us. Signed-off-by: Konrad Rzeszutek Wilk --- block/blk-core.c | 2 -- drivers/xen/blkback/blkback.c | 6 ------ 2 files changed, 8 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 9b60e69a5400..90f22cc30799 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -351,7 +351,6 @@ void blk_put_queue(struct request_queue *q) { kobject_put(&q->kobj); } -EXPORT_SYMBOL_GPL(blk_put_queue); /* * Note: If a driver supplied the queue lock, it should not zap that lock @@ -573,7 +572,6 @@ int blk_get_queue(struct request_queue *q) return 1; } -EXPORT_SYMBOL_GPL(blk_get_queue); static inline void blk_free_request(struct request_queue *q, struct request *rq) { diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c index 3751325bfc32..59a2bae0f35e 100644 --- a/drivers/xen/blkback/blkback.c +++ b/drivers/xen/blkback/blkback.c @@ -479,7 +479,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, int i, nbio = 0; int operation; struct blk_plug plug; - struct request_queue *q; switch (req->operation) { case BLKIF_OP_READ: @@ -542,9 +541,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, goto fail_response; } } - q = bdev_get_queue(preq.bdev); - if (!q) - goto fail_response; /* If we have failed at this point, we need to undo the M2P override, * set gnttab_set_unmap_op on all of the grant references and perform * the hypercall to unmap the grants - that is all done in @@ -596,7 +592,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, atomic_set(&pending_req->pendcnt, nbio); /* Get a reference count for the disk queue and start sending I/O */ - blk_get_queue(q); blk_start_plug(&plug); for (i = 0; i < nbio; i++) @@ -604,7 +599,6 @@ static void dispatch_rw_block_io(struct blkif_st *blkif, blk_finish_plug(&plug); /* Let the I/Os go.. */ - blk_put_queue(q); if (operation == READ) blkif->st_rd_sect += preq.nr_sects; From dfc07b13dcacefda6ebdea14584ed8724dc980ef Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 18 Apr 2011 14:24:23 -0400 Subject: [PATCH 0125/3380] xen/blkback: Move it from drivers/xen to drivers/block .. and modify the Makefile and Kconfig files appropriately. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/Kconfig | 8 ++++++++ drivers/block/Makefile | 1 + drivers/{xen/blkback => block/xen-blkback}/Makefile | 0 drivers/{xen/blkback => block/xen-blkback}/blkback.c | 0 drivers/{xen/blkback => block/xen-blkback}/common.h | 0 drivers/{xen/blkback => block/xen-blkback}/interface.c | 0 drivers/{xen/blkback => block/xen-blkback}/vbd.c | 0 drivers/{xen/blkback => block/xen-blkback}/xenbus.c | 0 drivers/xen/Kconfig | 8 -------- drivers/xen/Makefile | 1 - 10 files changed, 9 insertions(+), 9 deletions(-) rename drivers/{xen/blkback => block/xen-blkback}/Makefile (100%) rename drivers/{xen/blkback => block/xen-blkback}/blkback.c (100%) rename drivers/{xen/blkback => block/xen-blkback}/common.h (100%) rename drivers/{xen/blkback => block/xen-blkback}/interface.c (100%) rename drivers/{xen/blkback => block/xen-blkback}/vbd.c (100%) rename drivers/{xen/blkback => block/xen-blkback}/xenbus.c (100%) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 83c32cb72582..9abb64689712 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -470,6 +470,14 @@ config XEN_BLKDEV_FRONTEND block device driver. It communicates with a back-end driver in another domain which drives the actual block device. +config XEN_BLKDEV_BACKEND + tristate "Block-device backend driver" + depends on XEN_BACKEND + help + The block-device backend driver allows the kernel to export its + block devices to other guests via a high-performance shared-memory + interface. + config VIRTIO_BLK tristate "Virtio block driver (EXPERIMENTAL)" depends on EXPERIMENTAL && VIRTIO diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 40528ba56d1b..76646e9a1c91 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_BLK_DEV_UB) += ub.o obj-$(CONFIG_BLK_DEV_HD) += hd.o obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o +obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/ obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o diff --git a/drivers/xen/blkback/Makefile b/drivers/block/xen-blkback/Makefile similarity index 100% rename from drivers/xen/blkback/Makefile rename to drivers/block/xen-blkback/Makefile diff --git a/drivers/xen/blkback/blkback.c b/drivers/block/xen-blkback/blkback.c similarity index 100% rename from drivers/xen/blkback/blkback.c rename to drivers/block/xen-blkback/blkback.c diff --git a/drivers/xen/blkback/common.h b/drivers/block/xen-blkback/common.h similarity index 100% rename from drivers/xen/blkback/common.h rename to drivers/block/xen-blkback/common.h diff --git a/drivers/xen/blkback/interface.c b/drivers/block/xen-blkback/interface.c similarity index 100% rename from drivers/xen/blkback/interface.c rename to drivers/block/xen-blkback/interface.c diff --git a/drivers/xen/blkback/vbd.c b/drivers/block/xen-blkback/vbd.c similarity index 100% rename from drivers/xen/blkback/vbd.c rename to drivers/block/xen-blkback/vbd.c diff --git a/drivers/xen/blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c similarity index 100% rename from drivers/xen/blkback/xenbus.c rename to drivers/block/xen-blkback/xenbus.c diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index fb1af628cbfc..a59638b37c1a 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -37,14 +37,6 @@ config XEN_BACKEND Support for backend device drivers that provide I/O services to other virtual machines. -config XEN_BLKDEV_BACKEND - tristate "Block-device backend driver" - depends on XEN_BACKEND && BLOCK - help - The block-device backend driver allows the kernel to export its - block devices to other guests via a high-performance shared-memory - interface. - config XENFS tristate "Xen filesystem" default y diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 29c0a416f082..f420f1ff7f13 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -11,7 +11,6 @@ obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o -obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o From 2035e776050aea57fb5255557216473e82793f2c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 18 Apr 2011 17:29:14 -0400 Subject: [PATCH 0126/3380] ext4: check for ext[23] file system features when mounting as ext[23] Provide better emulation for ext[23] mode by enforcing that the file system does not have any unsupported file system features as defined by ext[23] when emulating the ext[23] file system driver when CONFIG_EXT4_USE_FOR_EXT23 is defined. This causes the file system type information in /proc/mounts to be correct for the automatically mounted root file system. This also means that "mount -t ext2 /dev/sda /mnt" will fail if /dev/sda contains an ext3 or ext4 file system, just as one would expect if the original ext2 file system driver were in use. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 15 ++++++++++ fs/ext4/super.c | 74 +++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 80 insertions(+), 9 deletions(-) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4daaf2b753f4..076c5d212a3c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1351,6 +1351,21 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ +#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR +#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ + EXT4_FEATURE_INCOMPAT_META_BG) +#define EXT2_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_BTREE_DIR) + +#define EXT3_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR +#define EXT3_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ + EXT4_FEATURE_INCOMPAT_RECOVER| \ + EXT4_FEATURE_INCOMPAT_META_BG) +#define EXT3_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_BTREE_DIR) + #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ EXT4_FEATURE_INCOMPAT_RECOVER| \ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 8553dfb310af..cb22783a4377 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -75,11 +75,27 @@ static void ext4_write_super(struct super_block *sb); static int ext4_freeze(struct super_block *sb); static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data); +static inline int ext2_feature_set_ok(struct super_block *sb); +static inline int ext3_feature_set_ok(struct super_block *sb); static int ext4_feature_set_ok(struct super_block *sb, int readonly); static void ext4_destroy_lazyinit_thread(void); static void ext4_unregister_li_request(struct super_block *sb); static void ext4_clear_request_list(void); +#if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) +static struct file_system_type ext2_fs_type = { + .owner = THIS_MODULE, + .name = "ext2", + .mount = ext4_mount, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; +#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type) +#else +#define IS_EXT2_SB(sb) (0) +#endif + + #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) static struct file_system_type ext3_fs_type = { .owner = THIS_MODULE, @@ -3187,6 +3203,28 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "feature flags set on rev 0 fs, " "running e2fsck is recommended"); + if (IS_EXT2_SB(sb)) { + if (ext2_feature_set_ok(sb)) + ext4_msg(sb, KERN_INFO, "mounting ext2 file system " + "using the ext4 subsystem"); + else { + ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " + "to feature incompatibilities"); + goto failed_mount; + } + } + + if (IS_EXT3_SB(sb)) { + if (ext3_feature_set_ok(sb)) + ext4_msg(sb, KERN_INFO, "mounting ext3 file system " + "using the ext4 subsystem"); + else { + ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " + "to feature incompatibilities"); + goto failed_mount; + } + } + /* * Check feature flags regardless of the revision level, since we * previously didn't change the revision level when setting the flags, @@ -4772,14 +4810,6 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, } #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) -static struct file_system_type ext2_fs_type = { - .owner = THIS_MODULE, - .name = "ext2", - .mount = ext4_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, -}; - static inline void register_as_ext2(void) { int err = register_filesystem(&ext2_fs_type); @@ -4792,10 +4822,22 @@ static inline void unregister_as_ext2(void) { unregister_filesystem(&ext2_fs_type); } + +static inline int ext2_feature_set_ok(struct super_block *sb) +{ + if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT2_FEATURE_INCOMPAT_SUPP)) + return 0; + if (sb->s_flags & MS_RDONLY) + return 1; + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT2_FEATURE_RO_COMPAT_SUPP)) + return 0; + return 1; +} MODULE_ALIAS("ext2"); #else static inline void register_as_ext2(void) { } static inline void unregister_as_ext2(void) { } +static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; } #endif #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23) @@ -4811,10 +4853,24 @@ static inline void unregister_as_ext3(void) { unregister_filesystem(&ext3_fs_type); } + +static inline int ext3_feature_set_ok(struct super_block *sb) +{ + if (EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP)) + return 0; + if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) + return 0; + if (sb->s_flags & MS_RDONLY) + return 1; + if (EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP)) + return 0; + return 1; +} MODULE_ALIAS("ext3"); #else static inline void register_as_ext3(void) { } static inline void unregister_as_ext3(void) { } +static inline int ext3_feature_set_ok(struct super_block *sb) { return 0; } #endif static struct file_system_type ext4_fs_type = { @@ -4898,8 +4954,8 @@ static int __init ext4_init_fs(void) err = init_inodecache(); if (err) goto out1; - register_as_ext2(); register_as_ext3(); + register_as_ext2(); err = register_filesystem(&ext4_fs_type); if (err) goto out; From 64a14b51bed6427a2e6d68ed687027f065f5a156 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: [PATCH 0127/3380] cyclades: Drop __TIME__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Acked-by: Greg Kroah-Hartman Signed-off-by: Michal Marek --- drivers/tty/cyclades.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c index c99728f0cd9f..d6f6dd28b424 100644 --- a/drivers/tty/cyclades.c +++ b/drivers/tty/cyclades.c @@ -4099,8 +4099,7 @@ static int __init cy_init(void) if (!cy_serial_driver) goto err; - printk(KERN_INFO "Cyclades driver " CY_VERSION " (built %s %s)\n", - __DATE__, __TIME__); + printk(KERN_INFO "Cyclades driver " CY_VERSION "\n"); /* Initialize the tty_driver structure */ From 2cae8de7b0464cc4c246517fca10f04593f46a3b Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: [PATCH 0128/3380] nozomi: Drop __TIME__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Acked-by: Greg Kroah-Hartman Signed-off-by: Michal Marek --- drivers/tty/nozomi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/nozomi.c b/drivers/tty/nozomi.c index f4f11164efe5..db16048484c0 100644 --- a/drivers/tty/nozomi.c +++ b/drivers/tty/nozomi.c @@ -61,8 +61,7 @@ #include -#define VERSION_STRING DRIVER_DESC " 2.1d (build date: " \ - __DATE__ " " __TIME__ ")" +#define VERSION_STRING DRIVER_DESC " 2.1d" /* Macros definitions */ From 372c05c4959aaccdd671f75e0fd332629cf15964 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: [PATCH 0129/3380] media/radio-maxiradio: Drop __TIME__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Acked-by: Mauro Carvalho Chehab Cc: linux-media@vger.kernel.org Signed-off-by: Michal Marek --- drivers/media/radio/radio-maxiradio.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/radio/radio-maxiradio.c b/drivers/media/radio/radio-maxiradio.c index 5c2a9058c09f..e83e84003025 100644 --- a/drivers/media/radio/radio-maxiradio.c +++ b/drivers/media/radio/radio-maxiradio.c @@ -412,8 +412,7 @@ static int __devinit maxiradio_init_one(struct pci_dev *pdev, const struct pci_d goto err_out_free_region; } - v4l2_info(v4l2_dev, "version " DRIVER_VERSION - " time " __TIME__ " " __DATE__ "\n"); + v4l2_info(v4l2_dev, "version " DRIVER_VERSION "\n"); v4l2_info(v4l2_dev, "found Guillemot MAXI Radio device (io = 0x%x)\n", dev->io); From 7cf444639101ad8671cb210addbffbdebd07c068 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: [PATCH 0130/3380] media/cx231xx: Drop __TIME__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Acked-by: Mauro Carvalho Chehab Cc: linux-media@vger.kernel.org Signed-off-by: Michal Marek --- drivers/media/video/cx231xx/cx231xx-avcore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/video/cx231xx/cx231xx-avcore.c b/drivers/media/video/cx231xx/cx231xx-avcore.c index 62843d39817c..98e876553824 100644 --- a/drivers/media/video/cx231xx/cx231xx-avcore.c +++ b/drivers/media/video/cx231xx/cx231xx-avcore.c @@ -1354,7 +1354,7 @@ void cx231xx_dump_SC_reg(struct cx231xx *dev) { u8 value[4] = { 0, 0, 0, 0 }; int status = 0; - cx231xx_info("cx231xx_dump_SC_reg %s!\n", __TIME__); + cx231xx_info("cx231xx_dump_SC_reg!\n"); status = cx231xx_read_ctrl_reg(dev, VRT_GET_REGISTER, BOARD_CFG_STAT, value, 4); From 571b16da39922cf71db41c10852d798a44686c15 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: [PATCH 0131/3380] aacraid: Drop __TIME__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Cc: Adaptec OEM Raid Solutions Cc: linux-scsi@vger.kernel.org Signed-off-by: Michal Marek --- drivers/scsi/aacraid/linit.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 4ff26521d75f..3382475dc22d 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -59,7 +59,6 @@ #ifndef AAC_DRIVER_BRANCH #define AAC_DRIVER_BRANCH "" #endif -#define AAC_DRIVER_BUILD_DATE __DATE__ " " __TIME__ #define AAC_DRIVERNAME "aacraid" #ifdef AAC_DRIVER_BUILD @@ -67,7 +66,7 @@ #define str(x) _str(x) #define AAC_DRIVER_FULL_VERSION AAC_DRIVER_VERSION "[" str(AAC_DRIVER_BUILD) "]" AAC_DRIVER_BRANCH #else -#define AAC_DRIVER_FULL_VERSION AAC_DRIVER_VERSION AAC_DRIVER_BRANCH " " AAC_DRIVER_BUILD_DATE +#define AAC_DRIVER_FULL_VERSION AAC_DRIVER_VERSION AAC_DRIVER_BRANCH #endif MODULE_AUTHOR("Red Hat Inc and Adaptec"); From 4c315a5d57300db83d2eb7b8d5b6019e1fba99fc Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: [PATCH 0132/3380] scsi/in2000: Drop __TIME__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Cc: "James E.J. Bottomley" Cc: linux-scsi@vger.kernel.org Signed-off-by: Michal Marek --- drivers/scsi/in2000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/in2000.c b/drivers/scsi/in2000.c index 6568aab745a0..6096d9340407 100644 --- a/drivers/scsi/in2000.c +++ b/drivers/scsi/in2000.c @@ -2228,7 +2228,7 @@ static int in2000_proc_info(struct Scsi_Host *instance, char *buf, char **start, bp = buf; *bp = '\0'; if (hd->proc & PR_VERSION) { - sprintf(tbuf, "\nVersion %s - %s. Compiled %s %s", IN2000_VERSION, IN2000_DATE, __DATE__, __TIME__); + sprintf(tbuf, "\nVersion %s - %s.", IN2000_VERSION, IN2000_DATE); strcat(bp, tbuf); } if (hd->proc & PR_INFO) { From 565502f8894739ce7b5fd131f39c930dc4351710 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: [PATCH 0133/3380] scsi/wd33c93: Drop __TIME__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Cc: "James E.J. Bottomley" Cc: linux-scsi@vger.kernel.org Signed-off-by: Michal Marek --- drivers/scsi/wd33c93.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/wd33c93.c b/drivers/scsi/wd33c93.c index 5f697e0bd009..6c983a2ffcbe 100644 --- a/drivers/scsi/wd33c93.c +++ b/drivers/scsi/wd33c93.c @@ -2052,8 +2052,7 @@ wd33c93_init(struct Scsi_Host *instance, const wd33c93_regs regs, for (i = 0; i < MAX_SETUP_ARGS; i++) printk("%s,", setup_args[i]); printk("\n"); - printk(" Version %s - %s, Compiled %s at %s\n", - WD33C93_VERSION, WD33C93_DATE, __DATE__, __TIME__); + printk(" Version %s - %s\n", WD33C93_VERSION, WD33C93_DATE); } int @@ -2133,8 +2132,8 @@ wd33c93_proc_info(struct Scsi_Host *instance, char *buf, char **start, off_t off bp = buf; *bp = '\0'; if (hd->proc & PR_VERSION) { - sprintf(tbuf, "\nVersion %s - %s. Compiled %s %s", - WD33C93_VERSION, WD33C93_DATE, __DATE__, __TIME__); + sprintf(tbuf, "\nVersion %s - %s.", + WD33C93_VERSION, WD33C93_DATE); strcat(bp, tbuf); } if (hd->proc & PR_INFO) { From 9f3ad1cab2a0357d5866d45413fa2ee3e88e496f Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: [PATCH 0134/3380] rio: Drop __DATE__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. As the buildDate field is part of the userspace API, I replaced it with the date of the last code change. Acked-by: Greg Kroah-Hartman Signed-off-by: Michal Marek --- drivers/staging/generic_serial/rio/rioinit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/generic_serial/rio/rioinit.c b/drivers/staging/generic_serial/rio/rioinit.c index 24a282bb89d4..fb62b383f1de 100644 --- a/drivers/staging/generic_serial/rio/rioinit.c +++ b/drivers/staging/generic_serial/rio/rioinit.c @@ -381,7 +381,7 @@ struct rioVersion *RIOVersid(void) { strlcpy(stVersion.version, "RIO driver for linux V1.0", sizeof(stVersion.version)); - strlcpy(stVersion.buildDate, __DATE__, + strlcpy(stVersion.buildDate, "Aug 15 2010", sizeof(stVersion.buildDate)); return &stVersion; From 152ba3942276c2a240703669ae4a3099e0a79451 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: [PATCH 0135/3380] edac: Drop __DATE__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Cc: Doug Thompson Cc: bluesmoke-devel@lists.sourceforge.net Cc: linux-edac@vger.kernel.org Acked-by: Mauro Carvalho Chehab Signed-off-by: Michal Marek --- drivers/edac/amd76x_edac.c | 2 +- drivers/edac/amd8111_edac.c | 2 +- drivers/edac/amd8131_edac.c | 2 +- drivers/edac/cpc925_edac.c | 2 +- drivers/edac/e752x_edac.c | 2 +- drivers/edac/e7xxx_edac.c | 2 +- drivers/edac/edac_module.c | 2 +- drivers/edac/i5000_edac.c | 2 +- drivers/edac/i5400_edac.c | 2 +- drivers/edac/i7300_edac.c | 2 +- drivers/edac/i7core_edac.c | 2 +- drivers/edac/i82860_edac.c | 2 +- drivers/edac/i82875p_edac.c | 2 +- drivers/edac/i82975x_edac.c | 2 +- drivers/edac/mpc85xx_edac.h | 2 +- drivers/edac/mv64x60_edac.h | 2 +- drivers/edac/ppc4xx_edac.c | 2 +- drivers/edac/r82600_edac.c | 2 +- 18 files changed, 18 insertions(+), 18 deletions(-) diff --git a/drivers/edac/amd76x_edac.c b/drivers/edac/amd76x_edac.c index cace0a7b707a..e47e73bbbcc5 100644 --- a/drivers/edac/amd76x_edac.c +++ b/drivers/edac/amd76x_edac.c @@ -19,7 +19,7 @@ #include #include "edac_core.h" -#define AMD76X_REVISION " Ver: 2.0.2 " __DATE__ +#define AMD76X_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "amd76x_edac" #define amd76x_printk(level, fmt, arg...) \ diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c index 35b78d04bbfa..ddd890052ce2 100644 --- a/drivers/edac/amd8111_edac.c +++ b/drivers/edac/amd8111_edac.c @@ -33,7 +33,7 @@ #include "edac_module.h" #include "amd8111_edac.h" -#define AMD8111_EDAC_REVISION " Ver: 1.0.0 " __DATE__ +#define AMD8111_EDAC_REVISION " Ver: 1.0.0" #define AMD8111_EDAC_MOD_STR "amd8111_edac" #define PCI_DEVICE_ID_AMD_8111_PCI 0x7460 diff --git a/drivers/edac/amd8131_edac.c b/drivers/edac/amd8131_edac.c index b432d60c622a..a5c680561c73 100644 --- a/drivers/edac/amd8131_edac.c +++ b/drivers/edac/amd8131_edac.c @@ -33,7 +33,7 @@ #include "edac_module.h" #include "amd8131_edac.h" -#define AMD8131_EDAC_REVISION " Ver: 1.0.0 " __DATE__ +#define AMD8131_EDAC_REVISION " Ver: 1.0.0" #define AMD8131_EDAC_MOD_STR "amd8131_edac" /* Wrapper functions for accessing PCI configuration space */ diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c index b9a781c47e3c..3400ae34795a 100644 --- a/drivers/edac/cpc925_edac.c +++ b/drivers/edac/cpc925_edac.c @@ -30,7 +30,7 @@ #include "edac_core.h" #include "edac_module.h" -#define CPC925_EDAC_REVISION " Ver: 1.0.0 " __DATE__ +#define CPC925_EDAC_REVISION " Ver: 1.0.0" #define CPC925_EDAC_MOD_STR "cpc925_edac" #define cpc925_printk(level, fmt, arg...) \ diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index ec302d426589..1af531a11d21 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -24,7 +24,7 @@ #include #include "edac_core.h" -#define E752X_REVISION " Ver: 2.0.2 " __DATE__ +#define E752X_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "e752x_edac" static int report_non_memory_errors; diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index 1731d7245816..6ffb6d23281f 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -29,7 +29,7 @@ #include #include "edac_core.h" -#define E7XXX_REVISION " Ver: 2.0.2 " __DATE__ +#define E7XXX_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "e7xxx_edac" #define e7xxx_printk(level, fmt, arg...) \ diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index be4b075c3098..5ddaa86d6a6e 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c @@ -15,7 +15,7 @@ #include "edac_core.h" #include "edac_module.h" -#define EDAC_VERSION "Ver: 2.1.0 " __DATE__ +#define EDAC_VERSION "Ver: 2.1.0" #ifdef CONFIG_EDAC_DEBUG /* Values of 0 to 4 will generate output */ diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index a5cefab8d65d..3d0b726304fe 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -27,7 +27,7 @@ /* * Alter this version for the I5000 module when modifications are made */ -#define I5000_REVISION " Ver: 2.0.12 " __DATE__ +#define I5000_REVISION " Ver: 2.0.12" #define EDAC_MOD_STR "i5000_edac" #define i5000_printk(level, fmt, arg...) \ diff --git a/drivers/edac/i5400_edac.c b/drivers/edac/i5400_edac.c index 38a9be9e1c7c..fd362b4c2a8e 100644 --- a/drivers/edac/i5400_edac.c +++ b/drivers/edac/i5400_edac.c @@ -33,7 +33,7 @@ /* * Alter this version for the I5400 module when modifications are made */ -#define I5400_REVISION " Ver: 1.0.0 " __DATE__ +#define I5400_REVISION " Ver: 1.0.0" #define EDAC_MOD_STR "i5400_edac" diff --git a/drivers/edac/i7300_edac.c b/drivers/edac/i7300_edac.c index 76d1f576cdc8..ff320c0b9cac 100644 --- a/drivers/edac/i7300_edac.c +++ b/drivers/edac/i7300_edac.c @@ -31,7 +31,7 @@ /* * Alter this version for the I7300 module when modifications are made */ -#define I7300_REVISION " Ver: 1.0.0 " __DATE__ +#define I7300_REVISION " Ver: 1.0.0" #define EDAC_MOD_STR "i7300_edac" diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 81154ab296b6..3f320ba5445f 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -59,7 +59,7 @@ MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices"); /* * Alter this version for the module when modifications are made */ -#define I7CORE_REVISION " Ver: 1.0.0 " __DATE__ +#define I7CORE_REVISION " Ver: 1.0.0" #define EDAC_MOD_STR "i7core_edac" /* diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index b8a95cf50718..931a05775049 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -16,7 +16,7 @@ #include #include "edac_core.h" -#define I82860_REVISION " Ver: 2.0.2 " __DATE__ +#define I82860_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "i82860_edac" #define i82860_printk(level, fmt, arg...) \ diff --git a/drivers/edac/i82875p_edac.c b/drivers/edac/i82875p_edac.c index b2fd1e899142..33864c63c684 100644 --- a/drivers/edac/i82875p_edac.c +++ b/drivers/edac/i82875p_edac.c @@ -20,7 +20,7 @@ #include #include "edac_core.h" -#define I82875P_REVISION " Ver: 2.0.2 " __DATE__ +#define I82875P_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "i82875p_edac" #define i82875p_printk(level, fmt, arg...) \ diff --git a/drivers/edac/i82975x_edac.c b/drivers/edac/i82975x_edac.c index 92e65e7038e9..a5da732fe5b2 100644 --- a/drivers/edac/i82975x_edac.c +++ b/drivers/edac/i82975x_edac.c @@ -16,7 +16,7 @@ #include #include "edac_core.h" -#define I82975X_REVISION " Ver: 1.0.0 " __DATE__ +#define I82975X_REVISION " Ver: 1.0.0" #define EDAC_MOD_STR "i82975x_edac" #define i82975x_printk(level, fmt, arg...) \ diff --git a/drivers/edac/mpc85xx_edac.h b/drivers/edac/mpc85xx_edac.h index cb24df839460..932016f2cf06 100644 --- a/drivers/edac/mpc85xx_edac.h +++ b/drivers/edac/mpc85xx_edac.h @@ -11,7 +11,7 @@ #ifndef _MPC85XX_EDAC_H_ #define _MPC85XX_EDAC_H_ -#define MPC85XX_REVISION " Ver: 2.0.0 " __DATE__ +#define MPC85XX_REVISION " Ver: 2.0.0" #define EDAC_MOD_STR "MPC85xx_edac" #define mpc85xx_printk(level, fmt, arg...) \ diff --git a/drivers/edac/mv64x60_edac.h b/drivers/edac/mv64x60_edac.h index e042e2daa8f4..c7f209c92a1a 100644 --- a/drivers/edac/mv64x60_edac.h +++ b/drivers/edac/mv64x60_edac.h @@ -12,7 +12,7 @@ #ifndef _MV64X60_EDAC_H_ #define _MV64X60_EDAC_H_ -#define MV64x60_REVISION " Ver: 2.0.0 " __DATE__ +#define MV64x60_REVISION " Ver: 2.0.0" #define EDAC_MOD_STR "MV64x60_edac" #define mv64x60_printk(level, fmt, arg...) \ diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index c1f0045ceb8e..208244e4a6f2 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -113,7 +113,7 @@ #define EDAC_OPSTATE_UNKNOWN_STR "unknown" #define PPC4XX_EDAC_MODULE_NAME "ppc4xx_edac" -#define PPC4XX_EDAC_MODULE_REVISION "v1.0.0 " __DATE__ +#define PPC4XX_EDAC_MODULE_REVISION "v1.0.0" #define PPC4XX_EDAC_MESSAGE_SIZE 256 diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index 6a822c631ef5..387997a3fab5 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -22,7 +22,7 @@ #include #include "edac_core.h" -#define R82600_REVISION " Ver: 2.0.2 " __DATE__ +#define R82600_REVISION " Ver: 2.0.2" #define EDAC_MOD_STR "r82600_edac" #define r82600_printk(level, fmt, arg...) \ From a1b666657c3a691c4f8a0025905e88e8b4baa360 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Fri, 1 Apr 2011 12:41:20 +0200 Subject: [PATCH 0136/3380] pmcraid: Drop __DATE__ usage The kernel already prints its build timestamp during boot, no need to repeat it in random drivers and produce different object files each time. Cc: Anil Ravindranath Cc: linux-scsi@vger.kernel.org Signed-off-by: Michal Marek --- drivers/scsi/pmcraid.c | 9 ++++----- drivers/scsi/pmcraid.h | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index bcf858e88c64..c83bc5e7753b 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -4249,8 +4249,8 @@ static ssize_t pmcraid_show_drv_version( char *buf ) { - return snprintf(buf, PAGE_SIZE, "version: %s, build date: %s\n", - PMCRAID_DRIVER_VERSION, PMCRAID_DRIVER_DATE); + return snprintf(buf, PAGE_SIZE, "version: %s\n", + PMCRAID_DRIVER_VERSION); } static struct device_attribute pmcraid_driver_version_attr = { @@ -6093,9 +6093,8 @@ static int __init pmcraid_init(void) dev_t dev; int error; - pmcraid_info("%s Device Driver version: %s %s\n", - PMCRAID_DRIVER_NAME, - PMCRAID_DRIVER_VERSION, PMCRAID_DRIVER_DATE); + pmcraid_info("%s Device Driver version: %s\n", + PMCRAID_DRIVER_NAME, PMCRAID_DRIVER_VERSION); error = alloc_chrdev_region(&dev, 0, PMCRAID_MAX_ADAPTERS, diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h index 4db210d93947..d302737c279c 100644 --- a/drivers/scsi/pmcraid.h +++ b/drivers/scsi/pmcraid.h @@ -43,7 +43,6 @@ #define PMCRAID_DRIVER_NAME "PMC MaxRAID" #define PMCRAID_DEVFILE "pmcsas" #define PMCRAID_DRIVER_VERSION "1.0.3" -#define PMCRAID_DRIVER_DATE __DATE__ #define PMCRAID_FW_VERSION_1 0x002 From 2a086e5d3a23570735f75b784d29b93068070833 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 3 Apr 2011 00:09:26 +0900 Subject: [PATCH 0137/3380] TOMOYO: Fix race on updating profile's comment line. In tomoyo_write_profile() since 2.6.34, a lock was by error missing when replacing profile's comment line. If multiple threads attempted echo '0-COMMENT=comment' > /sys/kernel/security/tomoyo/profile in parallel, garbage collector will fail to kfree() the old value. Protect the replacement using a lock. Also, keep the old value rather than replace with empty string when out of memory error has occurred. Signed-off-by: Xiaochen Wang Signed-off-by: Tetsuo Handa Signed-off-by: James Morris --- security/tomoyo/common.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index 7556315c1978..2b7b1a123600 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -459,8 +459,16 @@ static int tomoyo_write_profile(struct tomoyo_io_buffer *head) if (profile == &tomoyo_default_profile) return -EINVAL; if (!strcmp(data, "COMMENT")) { - const struct tomoyo_path_info *old_comment = profile->comment; - profile->comment = tomoyo_get_name(cp); + static DEFINE_SPINLOCK(lock); + const struct tomoyo_path_info *new_comment + = tomoyo_get_name(cp); + const struct tomoyo_path_info *old_comment; + if (!new_comment) + return -ENOMEM; + spin_lock(&lock); + old_comment = profile->comment; + profile->comment = new_comment; + spin_unlock(&lock); tomoyo_put_name(old_comment); return 0; } From e4f5f26d8336318a5aa0858223c81cf29fcf5f68 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 3 Apr 2011 00:11:50 +0900 Subject: [PATCH 0138/3380] TOMOYO: Don't add / for allow_unmount permission check. "mount --bind /path/to/file1 /path/to/file2" is legal. Therefore, "umount /path/to/file2" is also legal. Do not automatically append trailing '/' if pathname to be unmounted does not end with '/'. Signed-off-by: Tetsuo Handa Signed-off-by: James Morris --- security/tomoyo/file.c | 1 - 1 file changed, 1 deletion(-) diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c index cb09f1fce910..d64e8ecb6fb3 100644 --- a/security/tomoyo/file.c +++ b/security/tomoyo/file.c @@ -1011,7 +1011,6 @@ int tomoyo_path_perm(const u8 operation, struct path *path) break; case TOMOYO_TYPE_RMDIR: case TOMOYO_TYPE_CHROOT: - case TOMOYO_TYPE_UMOUNT: tomoyo_add_slash(&buf); break; } From c0fa797ae6cd02ff87c0bfe0d509368a3b45640e Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Sun, 3 Apr 2011 00:12:54 +0900 Subject: [PATCH 0139/3380] TOMOYO: Fix infinite loop bug when reading /sys/kernel/security/tomoyo/audit In tomoyo_flush(), head->r.w[0] holds pointer to string data to be printed. But head->r.w[0] was updated only when the string data was partially printed (because head->r.w[0] will be updated by head->r.w[1] later if completely printed). However, regarding /sys/kernel/security/tomoyo/query , an additional '\0' is printed after the string data was completely printed. But if free space for read buffer became 0 before printing the additional '\0', tomoyo_flush() was returning without updating head->r.w[0]. As a result, tomoyo_flush() forever reprints already printed string data. Signed-off-by: Tetsuo Handa Signed-off-by: James Morris --- security/tomoyo/common.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index 2b7b1a123600..a0d09e56874b 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -108,10 +108,9 @@ static bool tomoyo_flush(struct tomoyo_io_buffer *head) head->read_user_buf += len; w += len; } - if (*w) { - head->r.w[0] = w; + head->r.w[0] = w; + if (*w) return false; - } /* Add '\0' for query. */ if (head->poll) { if (!head->read_user_buf_avail || From 86c0f043a737dadf034a4e6f29aefb074f4a1146 Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Mon, 18 Apr 2011 10:14:57 +0000 Subject: [PATCH 0140/3380] s3fb: add DDC support Add I2C support for the DDC bus and also default mode initialization by reading monitor EDID to the s3fb driver. Tested on Trio64V+ (2 cards), Trio64V2/DX, Virge (3 cards), Virge/DX (3 cards), Virge/GX2, Trio3D/2X (4 cards), Trio3D. Will probably not work on Trio32 - my 2 cards have DDC support in BIOS that looks different from the other cards but the DDC pins on the VGA connector are not connected. Signed-off-by: Ondrej Zary Signed-off-by: Paul Mundt --- drivers/video/Kconfig | 8 ++ drivers/video/s3fb.c | 206 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 209 insertions(+), 5 deletions(-) diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index e6a8d8c0101d..e2126b5af1de 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -1463,6 +1463,14 @@ config FB_S3 ---help--- Driver for graphics boards with S3 Trio / S3 Virge chip. +config FB_S3_DDC + bool "DDC for S3 support" + depends on FB_S3 + select FB_DDC + default y + help + Say Y here if you want DDC support for your S3 graphics card. + config FB_SAVAGE tristate "S3 Savage support" depends on FB && PCI && EXPERIMENTAL diff --git a/drivers/video/s3fb.c b/drivers/video/s3fb.c index c4482f2e5799..9a345209535b 100644 --- a/drivers/video/s3fb.c +++ b/drivers/video/s3fb.c @@ -25,6 +25,9 @@ #include /* Why should fb driver call console functions? because console_lock() */ #include